Can i use loss.item() for backward?

my loss value is

loss_iden_12t = IdentLoss(l1[0], l1[1], l1[2])
loss_iden_rcs1s1t = IdentLoss(l2[0], l2[1], l2[2])
loss_iden_rcs2s2t = IdentLoss(l3[0], l3[1], l3[2])
                
loss_identy = loss_iden_12t.item() + loss_iden_rcs1s1t.item() + loss_iden_rcs2s2t.item()
                 
                
p_pred_recon = p_d(recon)
                
g_pred_recon = g_d(recon)
                

loss_G_g = CELoss(p_pred_recon, torch.ones_like(p_pred_recon))
loss_G_p = CELoss(g_pred_recon, torch.ones_like(g_pred_recon))

loss_G = 20 * loss_G_g + 30 * loss_G_p  + 100 * loss_identy

IdentLoss is my Custom Loss Function

i know that to call loss.backward() object should Tensor

but in composited loss, one of loss value can be float value??

i tried with float value loss_identy no error, but concerned about backpropagation for IdentLoss

even though i used float loss value, it backpropagated??

Hi,

If you use .item(), no gradient will flow back on that side.
This “works” because you later add that python number with a Tensor that requires grad which gives you a final los which is a Tensor that requires grad.

But no the gradient won’t flow back towards the python numbers!

thx for relpying!

i have one more question.

when execute without .item(), gpu memory usage keep increase continously so 1 epoch can’t be done

class TripletLoss(nn.Module):
    def __init__(self, a1, a2, a3):
        super(TripletLoss, self).__init__()
        self.a1 = a1
        self.a2 = a2
        self.a3 = a3
        
    def forward(self, anchor, positive, negative):
        distance_positive = (anchor - positive).pow(2).sum(1)
        distance_negative = (anchor - negative).pow(2).sum(1)
        loss1 = distance_positive - distance_negative + self.a1
        loss2 = self.a3 * (distance_positive)
        losses = loss1 + loss2
        
        return losses.mean()
class TripletNet(nn.Module):
    def __init__(self, device):
        super(TripletNet, self).__init__()
        
        self.embedding_net = Encoder_id(in_channel = 3, n_ker = 32, device=device).to(device)
        
    def forward(self, s1, s2, t):
        s1 = self.embedding_net(s1)
        s2 = self.embedding_net(s2)
        t = self.embedding_net(t)
        
        return s1, s2, t
class Encoder_id(nn.Module):
    def __init__(self, in_channel, n_ker, device):
        super(Encoder_id, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channel, n_ker, 4, 2),
            nn.LeakyReLU(),
            nn.Conv2d(n_ker, n_ker * 2, 4, 2),
            nn.LeakyReLU(),
            nn.Conv2d(n_ker * 2, n_ker * 4, 4, 2),
            nn.LeakyReLU(),
            nn.Conv2d(n_ker * 4, n_ker * 8, 4, 2),
            nn.LeakyReLU(),
            nn.Conv2d(n_ker * 8, n_ker * 16, 4, 2),
            nn.LeakyReLU()
        )
        
        self.device = device
        
    def forward(self, x):
        batch_size = x.size(0)
        
        enc = self.encoder(x)

        enc = enc.view(batch_size, -1)

        fc = nn.Linear(enc.size(1), 128).to(self.device)
        enc = fc(enc)
        enc = torch.sigmoid(enc)
        
        return enc

this is my IdentLoss function, encoder and embedding function for IdentLoss input

is any wrong my code??

You should post your training loop. Growing memory likely occurs because you do not clear the gradients anywhere (zero_grad).

                origin = x[0].to(device)
                lm = x[3].to(device)
                m = x[1].to(device)
                f = x[2].to(device)                

                rec_m, (rec_f, mean_f, logvar_f, latent_f), (rec_l, mean_l, logvar_l, latent_l) = model(origin)
                                              
                # Calc loss
                             
                
                concat_s1t = torch.cat((latent_l[:10], latent_f[20:]),dim = 1)
                concat_s2t = torch.cat((latent_l[10:20], latent_f[20:]), dim = 1)
                image_without_face_st = origin[20:] - (1 - rec_m[20:])
                
                size = torch.cat((latent_l, latent_f), dim = 1).size()
                rand_latent = torch.rand(size).to(device)
                img_without_face = origin * (1 - rec_m)

                recon_s1t = netG(image_without_face_st, torch.cat((latent_l[:10], latent_f[20:]),dim = 1))
                recon_s2t = netG(image_without_face_st, torch.cat((latent_l[10:20], latent_f[20:]), dim = 1))
                
                recon = netG(img_without_face, torch.cat((latent_l, latent_f), dim = 1))
                rand_recon = netG(img_without_face,rand_latent)

                
                
                set_requires_grad(g_d, True)
                optim_g_D.zero_grad()

                g_pred_real = g_d(origin)
                g_pred_recon = g_d(recon.detach())
                g_pred_rand_recon = g_d(rand_recon.detach())

                # print(g_pred_real.size(), g_pred_recon.size(), g_pred_rand_recon.size())

                loss_G_d_real = CELoss(g_pred_real, torch.ones_like(g_pred_real))
                loss_G_d_recon = CELoss(g_pred_recon, torch.zeros_like(g_pred_recon))
                loss_G_d_rand = CELoss(g_pred_rand_recon, torch.zeros_like(g_pred_rand_recon))
                loss_G_d = (loss_G_d_rand + loss_G_d_recon + loss_G_d_real) * (1/3)
                
                loss_G_d.backward()
                optim_g_D.step()
                
                set_requires_grad(p_d, True)
                optim_p_D.zero_grad()


                p_pred_real = p_d(origin)
                p_pred_recon = p_d(recon.detach())
                p_pred_rand_recon = p_d(rand_recon.detach())

                loss_P_d_real = CELoss(p_pred_real, torch.ones_like(p_pred_real))
                loss_P_d_recon = CELoss(p_pred_recon, torch.zeros_like(p_pred_recon))
                loss_P_d_rand = CELoss(p_pred_rand_recon, torch.zeros_like(p_pred_rand_recon))
                
                loss_P_d = loss_P_d_real + loss_P_d_recon + loss_P_d_rand
                loss_P_d = loss_P_d * (1/3)

                loss_P_d.backward()
                optim_p_D.step()
                
                set_requires_grad([p_d, g_d], False)
                set_requires_grad([netG, triplet], True)
                optim_G.zero_grad()

                l1 = triplet(origin[:10], origin[10:20], origin[20:])
                l2 = triplet(recon_s1t, origin[:10], origin[20:])
                l3 = triplet(recon_s2t, origin[10:20], origin[20:])
                
                loss_iden_12t = IdentLoss(l1[0], l1[1], l1[2])
                loss_iden_rcs1s1t = IdentLoss(l2[0], l2[1], l2[2])
                loss_iden_rcs2s2t = IdentLoss(l3[0], l3[1], l3[2])
                
                loss_identy = loss_iden_12t.item() + loss_iden_rcs1s1t.item() + loss_iden_rcs2s2t.item()
                 
                
                p_pred_recon = p_d(recon)
                
                g_pred_recon = g_d(recon)
                

                loss_G_g = CELoss(p_pred_recon, torch.ones_like(p_pred_recon))
                loss_G_p = CELoss(g_pred_recon, torch.ones_like(g_pred_recon))

                loss_G = 20 * loss_G_g + 30 * loss_G_p  + 100 * loss_identy
                
                
                loss_G.backward()
                optim_G.step()

this is my traning code
do you mean optim_g_D.zero_grad(), optim_p_D.zero_grad(), optim_G.zero_grad()??

No offence, but that is quite messy code to dig through :smile: Yes, you can set all optimizer to zero grad, but I suspect it will be easier if you just set zero_grad on the model - that way you only have to do it once.

sorry for dirty code :sweat_smile:

i set zero_grad on model and it has effect on initial gpu memory usage

but still keep increase

thx for reply!