How to update gradients by intermediate loss?

My networks is constructed as follows:

class GetFilter(nn.Module):
    def __init__(self, Fin, K, Fout):
        super(GetFilter, self).__init__()
        nn.init.normal_(self.W, mean=0, std=0.2)
        self.B = nn.Parameter(torch.Tensor(self.Fout))
        nn.init.normal_(self.B, mean=0, std=0.2)
        self.relu = nn.ReLU()

    def forward(self, x, L):
        N, M, Fin = list(x.size())
        K = self.K
        x0 = x.clone()
        x = x0.unsqueeze(0)

        def concat(x, x_):
            x_ = x_.unsqueeze(0)
            return torch.cat((x, x_), dim=0)

        if K > 1:
            x1 = torch.matmul(L, x0)
            x = concat(x, x1)
        for k in range(2, K):
            x2 = 2 * torch.matmul(L, x1) - x0
            x = concat(x, x2)
            x0, x1 = x1, x2
        x = x.permute(1, 2, 3, 0)
        x = x.reshape(N * M, Fin * K)
        x = torch.matmul(x, self.W)
        x = torch.add(x, self.B)
        x = self.relu(x)
        return x.reshape(N, M, self.Fout)

class MyNet(nn.Module):
    def forward(self, x, cat):
        losses = []
        for i in range(len(self.F)):
            x = getattr(self, 'filter%d' % i)(x, L) #filter is the above one
            losses.append(torch.matmul(torch.matmul(x.permute(0,2,1),L),x).requires_grad_())
#             losses.append(x.detach().requires_grad_())
        return x,losses

I want to update weights by L2loss and cross entropy. The cross entropy works well, but I can’t get my weight updated by L2loss. I assume the losses is leaf mode and with requires_grad, so I update the network with test code:

L2Loss = nn.MSELoss()
outputs,losses = model(batch_data, batch_cat)
optimizer = torch.optim.Adam(model.parameters(), lr=1)
los = L2Loss(layers[0],torch.zeros_like(layers[0]))
print(los)
los.backward()
optimizer.step()
print(los)

In first iteration I observe that two loss remains the same(which means the parameters don’t change), and in second iteration error appears:“Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.”

In tensorflow my code works:

#network
self.regularizers.append(tf.nn.l2_loss(tf.matmul(tf.matmul(tf.transpose(x,perm=[0,2,1]),L),x)))

#loss function
with tf.name_scope('regularization'):
            regularization *= tf.add_n(self.regularizers)
            loss = cross_entropy + regularization

#optimizer
             if momentum == 0:
                # optimizer = tf.train.GradientDescentOptimizer(learning_rate)
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            else:
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
            grads = optimizer.compute_gradients(loss)
            optimizer.apply_gradients(grads, global_step=global_step)

Excuse me… If anyone can help me to the problem?

What happens if you just add the losses and backprop the sum, like in the TensorFlow snippet you posted? Most people use that quite successfully.

Best regards

Thomas

I use jupyter notebook to inspect the result:

#Test code
L2Loss = nn.MSELoss()
outputs,losses = model(batch_data, batch_cat)
optimizer = torch.optim.Adam(model.parameters(), lr=1)
los = L2Loss(layers[0],torch.zeros_like(layers[0]))
print(los)
los.backward()
optimizer.step()
print(los)

First time I run the code it output the same value of ‘los’, and the parameters of model don’t change. And when I run the code again, the error appears that “Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.” It’s probably because the grad didn’t back propagate correctly? I just want it updated with respect to L2 loss.

Yeah, you have to redo the calculation to backprop again.
I’m not sure I see where you add the losses.

Best regards

Thomas