I am trying to train a simple CNN classifier for a binary version of MNIST (.png files containing 0’s and 1’s). My code is as follows:
class NetTwo(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(6 * 14 * 14, 84)
self.fc2 = nn.Linear(84, 1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = torch.sigmoid(self.fc2(x)) # ensures range between 0 and 1
return x
net = NetTwo()
# confirm model works
for mini_batch in trainloader:
# predict on one batch
# mini_batch is a size 2 list where
# [0] is the samples and [1] is the labels
# batch size 32
sample_points = mini_batch[0]
sample_labels = mini_batch[1]
logits = net(sample_points)
print("Raw logit: ", logits[21])
print("Label (round logit): ", torch.round(logits[21]))
break
# Define a Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# Train the net
idx = 1
for epoch in range(20): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs = data[0]
labels = data[1]
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = torch.squeeze(net(inputs))
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print("Loss (epoch " + str(idx) + "): ", running_loss)
idx += 1
print('Finished Training')
# testing
num_correct = 0
num_total = 0
for mini_batch in testloader:
sample_points = mini_batch[0]
sample_labels = mini_batch[1]
logits = net(sample_points)
logits = logits[0]
loss_output = criterion(logits, sample_labels)
for i in range(len(logits)):
pred = torch.round(logits[i]).item()
real = sample_labels[i].item()
if pred == real:
num_correct += 1
num_total += 1
print("accuracy: ", num_correct/num_total)
But the loss printed out during training is:
Loss (epoch 1): 169.0617936849594
Loss (epoch 2): 169.0617936849594
Loss (epoch 3): 169.0617936849594
Loss (epoch 4): 169.0617936849594
Loss (epoch 5): 169.0617936849594
Loss (epoch 6): 169.0617936849594
Loss (epoch 7): 169.0617936849594
Loss (epoch 8): 169.0617936849594
Loss (epoch 9): 169.0617936849594
Loss (epoch 10): 169.0617936849594
Loss (epoch 11): 169.0617936849594
Loss (epoch 12): 169.0617936849594
Loss (epoch 13): 169.0617936849594
Loss (epoch 14): 169.0617936849594
Loss (epoch 15): 169.0617936849594
Loss (epoch 16): 169.0617936849594
Loss (epoch 17): 169.0617936849594
Loss (epoch 18): 169.0617936849594
Loss (epoch 19): 169.0617936849594
Loss (epoch 20): 169.0617936849594
Finished Training
And the accuracy on the test set is the same when ran before and after training. So clearly, training is not doing anything to update the model params. Why is this? I feel pretty directionless with regards to debugging this. Any tips for what may be wrong?