Hi guys, I noticed a weird behavior today.
If I load some weights to my model and compute my val score I get the same value as the previous trained model. However, if I ever compute a self.forward(im)
in the train loader (I don’t even need to compute loss, or step the optimizer) it mess up with my loaded weights. For example:
self.set_mode('train')
# Begin epoch loop
for i, (index, im, mask) in enumerate(train_loader):
self.step += 1
self.optimizer.zero_grad()
im = im.cuda()
mask = mask.cuda()
# Forward propagation
self.do_validation(val_loader) # <--- if I do it here it's all ok
logit = self.forward(im)
self.do_validation(val_loader) # <--- if I do it here I get all 0 in score
loss = self.criterion(logit, mask)
loss.backward()
self.optimizer.step()
For reference:
def do_validation(self, val_loader):
'''Validation step after epoch end'''
self.set_mode('valid')
val_loss = []
val_iou = []
val_score = []
for i, (index, im, mask, ind_mask) in enumerate(val_loader):
im = im.cuda()
mask = mask.cuda()
with torch.no_grad():
logit = self.forward(im)
pred = torch.sigmoid(logit)
loss = self.criterion(logit, mask)
iou = eval.dice_accuracy(pred.cpu().numpy(), mask.cpu().numpy(), is_average=False)
score_i = eval.do_kaggle_metric(pred.cpu().numpy(), ind_mask)[0]
val_loss.append(loss.item())
val_iou.extend(iou)
val_score.extend(score_i)
# Inference stop here
out = dict(loss=val_loss, iou=val_iou, score=val_score)
# Append epoch data to metrics dict
for metric, value in out.items():
self.update_log(self.val_log, metric, np.mean(value))
I observed this because everytime I start a new epoch my metrics decreases considerably. Any help is appreciated.
Kind regards