I tried to instead the double-looping structure (marked as ‘A’) in the loss function with a generator (marked as ‘B’).After the first calculation, A and B can calculate the results of the same level, such as 44.123(A) and 44.231(B).After the second calculation of loss function, A can be reduced to 20.456, but B is still 44.xxx. And after the third loss calculation, A is reduced to 10.xxx, while B still has no obvious change.The two networks are exactly the same except for loss.
I feel like there was no successful back propagation.
In the last step of B,
if IS_USE_GPU:
temp_loss = Variable(temp_loss, requires_grad=True)
else:
temp_loss = Variable(temp_loss, requires_grad=True)
was forced to be added, but it didn’t work.
Has anyone been in a similar situation?Thanks in advance for your guidance.
A:
def MyLoss(pred, label):
if IS_USE_GPU:
temp_loss = Variable(torch.tensor(0.).cuda(), requires_grad=True)
Ld = Variable(torch.tensor(0.).cuda(), requires_grad=True)
Lh = Variable(torch.tensor(0.).cuda(), requires_grad=True)
else:
temp_loss = Variable(torch.tensor(0.), requires_grad=True)
Ld = Variable(torch.tensor(0.), requires_grad=True)
Lh = Variable(torch.tensor(0.), requires_grad=True)
tpred1 , tpred2 = pred
for inxi, di in enumerate(tpred2, 0):
tempLd1 = torch.norm(torch.abs(di)-1., p=1)
if(inxi == len(label)):
break
else:
for inxj, dj in enumerate(tpred2[inxi+1:], inxi+1):
tempLd = tempLd1 + torch.norm(torch.abs(dj)-1., p=1)
Ld = Ld + tempLd
if label[inxi] == label[inxj]:
Rij = 1.0
else:
Rij = 0.0
tempLh1 = Rij * torch.sum(torch.pow(di - dj, 2))
tempLh2 = (1.0 - Rij)* max(48.0 - torch.sum(torch.pow(di - dj, 2)), 0.)
tempLh = 0.5*(tempLh1 + tempLh2)
Lh = Lh + tempLh
Lh = Lh/comb(len(label),2)
Ld = Ld/comb(len(label),2)
temp_loss = Lh + 0.01*Ld
# release GPU
del tempLh1, tempLh2, tempLd, tpred1, tpred2
torch.cuda.empty_cache()
return temp_loss, Lh, Ld
B:
def MyLoss(pred, label):
Ld = torch.tensor(0.)
Lh = torch.tensor(0.)
tpred1 , tpred2 = pred
for indx, di in enumerate(tpred2, 0):
tempLd1 = torch.norm(torch.abs(di)-1., p=1)
tempLd = torch.tensor(list((tempLd1+ torch.norm(torch.abs(dj)-1., p=1) for dj in tpred2[indx+1:])))
tempLd = torch.sum(tempLd)
Rij = torch.tensor(list(label[indx]==label[indx+1:])).float()
Rij_rev = 1.0 - Rij
temp_dis1 = torch.tensor(list((torch.sum(torch.pow(di - dj, 2)) for dj in tpred2[indx+1:]))).float()
temp_dis2 = torch.tensor(list((max(48.0 - torch.sum(torch.pow(di - dj, 2)), 0.) for dj in tpred2[indx+1:]))).float()
tempLh = 0.5*(torch.sum(temp_dis1.mul(Rij)) + torch.sum(temp_dis2.mul(Rij_rev)))
Lh = Lh + tempLh
Ld = Ld + tempLd
Lh = Lh/comb(len(label),2)
Ld = Ld/comb(len(label),2)
temp_loss = Lh + 0.01*Ld
if IS_USE_GPU:
temp_loss = Variable(temp_loss, requires_grad=True)
else:
temp_loss = Variable(temp_loss, requires_grad=True)
# release
del tempLd1,temp_dis1,temp_dis2, tempLd, tpred1, tpred2,Rij,Rij_rev
torch.cuda.empty_cache()
return temp_loss, Lh, Ld```