I am working on a pytorch resnet model for image segmentation and while I am getting fairly high accuracy values, both my training and validation losses are zeroes. The shapes of all inputs seem right but when I view my tensors they are all zeroes (or very small values). My training inputs are images and binary masks (two classes). I have been stuck on this for a while so any help would be very appreciated!
I have attached the model training part of my code below, thank you in advance!
for epoch in range(num_epochs):
#zero out epoch based performance variables
print(‘Epoch {}/{}’.format(epoch, num_epochs - 1))
print(‘-’ * 10)
all_acc = {key: 0 for key in phases}
all_loss = {key: torch.zeros(0).to(device) for key in phases}
cmatrix = {key: np.zeros((2,2)) for key in phases}
for phase in phases: #iterate through both training and validation states
if phase == 'train':
model.train() # Set model to training mode
else: #when in eval mode, we don't want parameters to be updated
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
for ii , (X, y, y_weight) in enumerate(dataLoader[phase]): #for each of the batches
optim.zero_grad()
X = X.to(device) # [Nbatch, 3, H, W]
y_weight = y_weight.type('torch.FloatTensor').to(device)
y = y.type('torch.LongTensor').to(device) # [Nbatch, H, W] with class indices (0, 1)
with torch.set_grad_enabled(phase == 'train'): #dynamically set gradient computation, in case of validation, this isn't needed
#disabling is good practice and improves inference time
prediction = model_ft(X) # [N, Nclass]
y = y[:, 0,0]
loss = criterion(prediction, y)
_, preds = torch.max(X, 1)
preds = preds[:,0,0]
if phase=="train": #in case we're in train mode, need to do back propogation
loss.mean().backward()
optim.step()
#all_loss[phase]=torch.cat((all_loss[phase],loss.detach().view(1,-1)))
#all_acc[phase]=(cmatrix[phase]/cmatrix[phase].sum()).trace()
#all_loss[phase] = all_loss[phase].cpu().numpy().mean()
running_loss += loss.data[0]
running_corrects += torch.sum(preds == y)
epoch_loss = running_loss / len(dataLoader[phase].dataset)
epoch_acc = running_corrects.double() / len(dataLoader[phase].dataset)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'val':
val_acc_history.append(epoch_acc)
print()
time_elapsed = time.time() - since
print(‘Training complete in {:.0f}m {:.0f}s’.format(time_elapsed // 60, time_elapsed % 60))
print(‘Best val Acc: {:4f}’.format(best_acc))
model.load_state_dict(best_model_wts)