When i predict without using validation it’s working fine, but If i use validation loop, my model just shows prediction for first class(0) only. can you guys please have look at my codes?
if __name__ == '__main__':
random.seed(118)
np.random.seed(118)
torch.manual_seed(118)
torch.cuda.manual_seed(118)
torch.cuda.manual_seed_all(118)
# dataset
img_root = '/content/drive/MyDrive/MIDD/working_MIDD_CODES2/DataSet'
save_path = './model'
if not os.path.exists(save_path): os.mkdir(save_path)
lr = 0.001
batch_size = 4
epoch = 15
lr_dec=[21,51]
#%%
data = Data(img_root)
ttrain1, validd = random_split(data,[5420,1352]) #GT_Sub3
ttrain, ttest = random_split(ttrain1,[4740,680])
loader = DataLoader(ttrain, batch_size=batch_size, shuffle=True, num_workers=1)
loader_V=DataLoader(validd, batch_size=batch_size, shuffle=True, num_workers=1)
#%%
net = Mnet().cuda()
#%%
net.load_pretrained_modell()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=lr, weight_decay=0.0005,momentum=0.9)
# Dd=smp.losses.DiceLoss('multilabel')
iter_num = len(loader)
iter_num_k = len(loader_V)
loss=[]
lossf=[]
lossf_V=[]
loss1=[]
loss2=[]
loss1_V=[]
loss2_V=[]
loss_V=[]
overall_accuracy=[]
overall_accuracy_V=[]
net.train()
# weights=torch.tensor([ 0.26960305, 14.45963237, 17.68206829, 6.05581275])
# aa=np.array([ 0.26, 14.45, 17.68, 6.05])
# weights=torch.from_numpy(aa)
accuracy=[]
# class0_accuracy=[]
# class1_accuracy=[]
# class2_accuracy=[]
# class3_accuracy=[]
#%%
for epochi in range(0, epoch):
if epochi in lr_dec :
lr=lr/10
optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=lr, weight_decay=0.0005,momentum=0.9)
print(lr)
prefetcher = DataPrefetcher(loader)
rgb, t, label = prefetcher.next()
r_sal_loss = 0
sal_loss1_=0
sal_loss2_=0
sal_loss3_=0
sal_loss1V_=0
sal_loss2V_=0
sal_loss3V_=0
r_sal_loss_V = 0
total_size=0
correct=0
total_size_V=0
correct_V=0
net.zero_grad()
i = 0
for i in range(0, iter_num):
i+=1
# rgb=rgb.cpu()
# t=t.cpu()
score, score1, score2,g= net(rgb.float(), t.float())
# label = label.permute(0,3,1,2)
# sal_loss= my_loss1( score.cuda(),score1.cuda(),score2.cuda(),g.cuda(),label)
# Dd=torch.nn.CrossEntropyLoss(weight=weights.to('cuda'))
# Dd=DiceLoss(weight=weights.to('cuda'))
# nn.CrossEntropyLoss
Dd=smp.losses.DiceLoss('multilabel')
sal_loss1=Dd(score,label)
sal_loss2=Dd(score1,label)
sal_loss3=Dd(score2,label)
sal_loss=sal_loss1+sal_loss2+sal_loss3
r_sal_loss += sal_loss.data
sal_loss1_+=sal_loss1.data
sal_loss2_+=sal_loss2.data
sal_loss3_+=sal_loss3.data
sal_loss.backward()
optimizer.step()
optimizer.zero_grad()
score_acc=torch.argmax(score,dim=1)
label_acc1=torch.argmax(label,dim=1)
correct += (score_acc == label_acc1).float().sum()
# correct+=(score_acc[0]==label_acc1[0]).sum()+(score_acc[1]==label_acc1[1]).sum()+(score_acc[2]==label_acc1[2]).sum()+(score_acc[3]==label_acc1[3]).sum()
total_size+=label_acc1.cpu().numpy().size
if i % 250 == 0:
print('epoch: [%2d/%2d], iter: [%5d/%5d] || loss : %5.4f || accuracy : %5.2f' % (
epochi, epoch, i, iter_num, r_sal_loss / 100,100*correct/total_size))
# r_sal_loss = 0
rgb, t, label = prefetcher.next()
#validation
loss.append(r_sal_loss.cpu().numpy()/len(loader))
lossf.append(sal_loss1_.cpu().numpy()/len(loader))
loss1.append(sal_loss2_.cpu().numpy()/len(loader))
loss2.append(sal_loss3_.cpu().numpy()/len(loader))
overall_accuracy.append(100*correct.cpu().numpy()/total_size)
prefetcher_V = DataPrefetcher(loader_V)
rgb, t, label = prefetcher_V.next()
net.eval()
# print(correct_V,total_size_V)
k=0
for k in range(0,iter_num_k):
k+=1
score, score1, score2,g= net(rgb.float(), t.float())
Dd=smp.losses.DiceLoss('multilabel')
sal_loss1_V=Dd(score,label)
sal_loss2_V=Dd(score1,label)
sal_loss3_V=Dd(score2,label)
sal_loss=sal_loss1_V+sal_loss2_V+sal_loss3_V
# r_sal_loss += sal_loss.data
r_sal_loss_V += sal_loss.data
score_acc=torch.argmax(score,dim=1)
label_acc1=torch.argmax(label,dim=1)
total_size_V+=label_acc1.cpu().numpy().size
correct_V += (score_acc == label_acc1).float().sum()
sal_loss1V_+=sal_loss1_V.data
sal_loss2V_+=sal_loss2_V.data
sal_loss3V_+=sal_loss3_V.data
# print(correct,total_size)
if k % 100 == 0:
print('epoch: [%2d/%2d], iter: [%5d/%5d] || loss : %5.4f || accuracy_Val: %5.2f' % (
epochi, epoch, k, iter_num_k, r_sal_loss_V / 100,100*correct_V/total_size_V))
rgb, t, label = prefetcher_V.next()
# loss_V.append(r_sal_loss_V/len(loader_V))
loss_V.append(r_sal_loss_V.cpu().numpy()/len(loader_V))
lossf_V.append(sal_loss1V_.cpu().numpy()/len(loader))
loss1_V.append(sal_loss2V_.cpu().numpy()/len(loader))
loss2_V.append(sal_loss3V_.cpu().numpy()/len(loader))
overall_accuracy_V.append(100*correct_V.cpu().numpy()/total_size_V)
if epochi %30==0:
torch.save(net.state_dict(), '%s/epoch_%d.pth' % (save_path, epochi))
torch.save(net.state_dict(), '%s/final_batch1_try18wg.pth' % (save_path))