I get a sample batch of data from dataloader, I set batch size to 1. The image shape is
1 x 3 x 224 x 224, the label shape is 1 x 7 x 7 x 5. Now I tried to calculate the loss for one image, but i got the nan value, why? I also tried to train the network for whole batch, the loss is still nan. Thank you for reading.
face_data = FaceAnnoDataset(root_dir=path, img_dir ='image', anno_dir='label',
txtfile='image.txt', transform=transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize([0.2341, 0.2388, 0.2622], [0.2210, 0.2150, 0.2543])])
)
train_loader = DataLoader(face_data, batch_size=1, shuffle=False, pin_memory=True,
num_workers=2, collate_fn=collate_fn)
# get one sample of batch, shape: 1 x 3 x 224 x 224
train_sample = next(iter(train_loader))
image = train_sample[0]
print(image.size())
anno = train_sample[1]
print(anno.size())
model = DetectionNet()
# place model to GPU
model.cuda()
image = image.cuda()
anno = anno.cuda()
y_pred = model(image)
y_pred = y_pred.permute(0,2,3,1)
print(y_pred.size())
loss = loss_fn(y_pred, anno)
print(loss)
output:
torch.Size([1, 3, 224, 224])
torch.Size([1, 7, 7, 5])
torch.Size([1, 7, 7, 5])
tensor(nan, device='cuda:0', grad_fn=<AddBackward0>)
def loss_fn(y_pred, y):
loss = conf_regression_loss(y_pred, y) + bbox_regression_loss(y_pred, y)
return loss
def conf_regression_loss(y_pred, y, lamda=0.5):
'''
y_pred: output of forward propagation, shape: batch x grid x grid x 5
y: ground truth, shape: batch x grid x grid x 5
lamda: parameter of loss function of loss_no_obj, as no_obj cells dominates,
we need to decrease the loss, otherwise no_obj overpowers the loss
return: confidence loss
'''
# get the mask of actived grid cell, in which has an object according to ground
# truth label
mask = y[:,:,:,0] # shape: batch x grid x grid x 1
y_pred_c = y_pred[:,:,:,0]
# if object exits in the cell
loss_obj = torch.sum((mask * y_pred_c - mask)**2) # mask == y_c, here confidence is equavalent to mask
# if object doesn't exit in the cell, we have to decrease
# the loss as number of cells which doesn't contain an object is
# much larger that cells do
# get mask of no object where 1 indicates no object
mask_no_obj = mask.clone()
mask_no_obj[mask==0] = 1
mask_no_obj[mask==1] = 0
loss_no_obj = torch.sum((y_pred_c * mask_no_obj - mask)**2)
loss_no_obj = loss_no_obj * lamda
loss = loss_obj + loss_no_obj
return loss
def bbox_regression_loss(y_pred, y, lamda=5):
mask = y[:,:,:,0]
# loss of offset x, y
loss_offset = torch.sum((mask * y_pred[:,:,:,1] - y[:,:,:,1])**2 +\
(mask * y_pred[:,:,:,2] - y[:,:,:,2])**2)
# loss of width and height
loss_w_h = torch.sum((mask * torch.sqrt(y_pred[:,:,:,3]) - torch.sqrt(y[:,:,:,3]))**2 + \
(mask * torch.sqrt(y_pred[:,:,:,3]) - torch.sqrt(y[:,:,:,3]))**2)
loss = lamda * (loss_offset + loss_w_h)
return loss