this the first time I post here,
I am new in Pytorch, I am trying to train my first image recognition with MNIST.
I noted when I don’t use transforms.Normalize((0.5,), (0.5,)) , The NN wights don’t change, and the gradient remains zero, but when I use the normalization the wights are updated and the gradient change, why is that?
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)),
])
mnist_train = datasets.MNIST("./", train=True, download=True, transform=transform)
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=False)
input_size=784
hidden_size=100
output_size=10
network = nn.Sequential(
nn.Linear(input_size, hidden_size), # First layer (hidden)of the network takes the entire image and reduces it to 100 dimensions
nn.ReLU(),
nn.Linear(hidden_size, output_size), # The second layer(output) takes those 100 dimensions and reduces them into estimeated values for each digit
nn.Softmax(dim=1)
)
images, labels = next(iter(train_loader))
images = images.view(-1,784)
labels=torch.eye(10)[labels]
output = network(images)
los_fun=nn.MSELoss()
loss = los_fun(output, labels) #calculate the loss
print('Before backward pass: \n', network[0].weight.grad)
loss.backward() # to calculate gradients of parameter
print('After backward pass: \n',network[0].weight.grad)
optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=0.9)
print('Initial weights - ', network[0].weight)
images, labels = next(iter(train_loader))
images = images.view(-1,784)
Clear the gradients, do this because gradients are accumulated
optimizer.zero_grad()
Forward pass
output = network(images)
labels=torch.eye(10)[labels]
loss = los_fun(output, labels)
the backward pass and update weights
loss.backward()
print(‘Gradient -’, network[0].weight.grad)