I am a beginner with PyTorch. I am following its quickstart guide and have replicated its code almost verbatim, but in my output, the model’s weights are not changing. I took a look at some other posts about the step function not working, but their suggestions didn’t seem to work for me. Would someone be able to point me in the right direction please?
Definition of custom module:
# Get hardware for training
device = (
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
print(f"Using {device} device")
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512), # The choice of transformation of data across the layer - this is equal to multiplying each input value by a corresponding weight and adding them, then adding a bias.
nn.ReLU(), # The choice of activation function between the layers - this is equal to max(input, 0).
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x) # Make the values stored in each layer equal to the shapes of X.
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
print(model)
Declaration of loss function and optimizer:
# Loss function
loss_fn = nn.CrossEntropyLoss()
# Stochastic Gradient Descent
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
Definition of training function (with additional lines printing the weights before and after):
def train(dataloader, model, loss_fn, optimizer):
# Number of samples
size = len(dataloader.dataset)
# Put module in training mode
model.train()
# Iterate through batches
for batch, (X, y) in enumerate(dataloader):
# Transfer data to the device to do calculations
X, y = X.to(device), y.to(device)
# Calculate predicted y values
pred = model(X)
# Get loss on predicted y values
loss = loss_fn(pred, y)
# Modify the weights in the NN
loss.backward()
before_step = list(model.parameters())[0].clone().detach().numpy()
optimizer.step()
after_step = list(model.parameters())[0].clone().detach().numpy()
# Reset the gradients
optimizer.zero_grad()
print("Before: ", before_step)
print("After: ", after_step)
# For every hundred batches, report the loss
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(X)
print(f"Loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
Output when training:
Before: [[-0.00293945 -0.03357641 0.02126637 ... -0.03217686 0.02163239
0.01577387]
[ 0.01675193 0.00785605 0.00159624 ... 0.02838659 0.01042882
-0.00520851]
[ 0.02488738 0.0200601 0.02569237 ... -0.01135333 0.00391812
-0.00557524]
...
[ 0.03510774 0.02370638 -0.03260146 ... 0.02252392 0.01040222
0.01905873]
[-0.03008673 -0.0211468 0.01441458 ... -0.0064997 -0.01389741
0.01241069]
[-0.02991394 -0.02001241 -0.01785357 ... -0.03501913 0.01994138
0.00311066]]
After: [[-0.00293945 -0.03357641 0.02126622 ... -0.03219151 0.02162728
0.01577382]
[ 0.01675189 0.00785571 0.00159275 ... 0.02837904 0.01042525
-0.00520895]
[ 0.02488733 0.02005999 0.02569141 ... -0.01130284 0.00391492
-0.00557806]
...
[ 0.03510772 0.02370632 -0.03260311 ... 0.02265108 0.01045562
0.01906238]
[-0.03008673 -0.0211468 0.01441456 ... -0.00650601 -0.01389765
...
As seen in the output above, the weights do not change, and they do not change across all iterations of training.
Any help would be greatly appreciated!