I am tring to fine turn a Resnet 152 model on the CIFAR100 dataset (from torchvision) and trying to track the accuracy of the training and validation but it always stays at 0.1 which is not correct as when I test the model it gets an average of 54% on the 100 images.
I thought I was tracking it correctly but now I am not too sure.
Here is my training code
valid_loss_min = np.Inf
for epoch in range(10):
train_loss = 0.0
valid_loss = 0.0
train_total = 0
val_total = 0
train_correct = 0
val_correct = 0
# Training
model.train()
for idx, (images, labels) in enumerate(train_loader):
if use_gpu:
images, labels = images.cuda(), labels.cuda()
optimizer.zero_grad() # Reset gradient
output = model(images)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
# Calculate loss and accuracy
train_loss += loss.item()*images.size(0)
scores, predictions = torch.max(output.data, 1)
train_total += labels.size(0)
train_correct += int(sum(predictions == labels))
acc = round((train_correct / train_total) / 100, 2)
#Validate Model
model.eval()
for idx, (images, labels) in enumerate(val_loader):
if use_gpu:
images, labels = images.cuda(), labels.cuda()
output = model(images)
loss = criterion(output, labels)
valid_loss += loss.item()*images.size(0)
scores, predictions = torch.max(output.data, 1)
val_total += labels.size(0)
val_correct += int(sum(predictions == labels))
val_acc = round((val_correct / val_total) / 100, 2)
# Now display the models accuracy and loss
train_loss = train_loss/len(train_loader.sampler)
valid_loss = valid_loss/len(val_loader.sampler)
print("Epoch:{} TL:{} TA:{} VL:{} VA:{}".format(epoch,
train_loss,
acc,
valid_loss,
val_acc))
# Save the best possible model
if valid_loss < valid_loss_min:
print("Saving Model")
torch.save(model.state_dict(), 'CLIRF100_Weights.pt')
valid_loss_min = valid_loss
What am I doing wrong here?
Thank you for any help with this.