My loss and accuracy is constant. My forward function is:

def forward(self, inp):

# Preprocessing

out = self.conv3d_1a_7x7(inp)

skip1 = out

out = self.maxPool3d_2a_3x3(out)

out = self.dropout(out)

out = self.conv3d_2b_1x1(out)

out = self.conv3d_2c_3x3(out)

out = self.maxPool3d_3a_3x3(out)

out = self.dropout(out)

out = self.mixed_3b(out)

skip2 = out

out = self.mixed_3c(out)

out = self.maxPool3d_4a_3x3(out)

out = self.dropout(out)

out = self.mixed_4b(out)

out = self.mixed_4c(out)

out = self.dropout(out)

out = self.mixed_4d(out)

skip3 = out

out = self.dropout(out)

out = self.mixed_4e(out)

out = self.mixed_4f(out)

out = self.maxPool3d_5a_2x2(out)

out = self.dropout(out)

out = self.mixed_5b(out)

out = self.mixed_5c(out)

out = self.dropout(out)

out = self.tconv6(out, skip1,skip2,skip3)

out = self.sigmoid(out)

print(“Before permutation”, out.shape)

out = out.permute(0,1,3,4,2)

out_logits = out

return out, out_logits

My train function is:

misc,out_logits[stream] = modelsstream

gt = gt.to(device, dtype=torch.float)

gt = gt.squeeze(1)

gt = gt.squeeze(1)

out_softmax = torch.nn.functional.softmax(out_logits[stream], 1).requires_grad_()

val, preds = torch.max(out_logits[stream].data, 1)

preds = preds.to(device, dtype=torch.float)

gt = torch.round(gt)

gt_avg = torch.mean(gt)

gt[gt>gt_avg] = 1

gt[gt<=gt_avg] = 0

val_avg = torch.mean(val)

val[val>val_avg] = 1

val[val<=val_avg] = 0

out_logits[stream] = out_logits[stream].squeeze(1)

losses[stream] = criterion(val.cpu(), gt.cpu()).requires_grad_()

# backward + optimize only if in training phase

if phase == ‘train’:

optimizers[stream].zero_grad()

losses[stream].backward(retain_graph=True)

optimizers[stream].step()

gt_c = gt.squeeze(1)

running_losses[stream] += losses[stream].item() * data[stream].shape[0]

print(“Current Loss is”, running_losses[stream])

running_corrects[stream] += torch.sum(val.cpu() == gt_c.data.cpu()).item()

correct_t = torch.sum(preds==gt_c).item()

total_t = gt_c.shape[0]*gt_c.shape[1]*gt_c.shape[2]*gt_c.shape[3]
acc_epc = 100*correct_t/total_t

for scheduler in schedulers.values():

scheduler.step()

The loss function is defined as:

criterion = torch.nn.BCEWithLogitsLoss()

I am trying for binary images by converting ground truth and output of model. But model is not converging. I have tried with different optimizers but still no improvement. Currently its SGD

optimizers[stream] = optim.SGD(filter(lambda p: p.requires_grad, models[stream].parameters()), lr=1e-8,

momentum=2, nesterov=True)