My networks is constructed as follows:

```
class GetFilter(nn.Module):
def __init__(self, Fin, K, Fout):
super(GetFilter, self).__init__()
nn.init.normal_(self.W, mean=0, std=0.2)
self.B = nn.Parameter(torch.Tensor(self.Fout))
nn.init.normal_(self.B, mean=0, std=0.2)
self.relu = nn.ReLU()
def forward(self, x, L):
N, M, Fin = list(x.size())
K = self.K
x0 = x.clone()
x = x0.unsqueeze(0)
def concat(x, x_):
x_ = x_.unsqueeze(0)
return torch.cat((x, x_), dim=0)
if K > 1:
x1 = torch.matmul(L, x0)
x = concat(x, x1)
for k in range(2, K):
x2 = 2 * torch.matmul(L, x1) - x0
x = concat(x, x2)
x0, x1 = x1, x2
x = x.permute(1, 2, 3, 0)
x = x.reshape(N * M, Fin * K)
x = torch.matmul(x, self.W)
x = torch.add(x, self.B)
x = self.relu(x)
return x.reshape(N, M, self.Fout)
class MyNet(nn.Module):
def forward(self, x, cat):
losses = []
for i in range(len(self.F)):
x = getattr(self, 'filter%d' % i)(x, L) #filter is the above one
losses.append(torch.matmul(torch.matmul(x.permute(0,2,1),L),x).requires_grad_())
# losses.append(x.detach().requires_grad_())
return x,losses
```

I want to update weights by L2loss and cross entropy. The cross entropy works well, but I can’t get my weight updated by L2loss. I assume the losses is leaf mode and with requires_grad, so I update the network with test code:

```
L2Loss = nn.MSELoss()
outputs,losses = model(batch_data, batch_cat)
optimizer = torch.optim.Adam(model.parameters(), lr=1)
los = L2Loss(layers[0],torch.zeros_like(layers[0]))
print(los)
los.backward()
optimizer.step()
print(los)
```

In first iteration I observe that two loss remains the same(which means the parameters don’t change), and in second iteration error appears:“Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.”

In tensorflow my code works:

```
#network
self.regularizers.append(tf.nn.l2_loss(tf.matmul(tf.matmul(tf.transpose(x,perm=[0,2,1]),L),x)))
#loss function
with tf.name_scope('regularization'):
regularization *= tf.add_n(self.regularizers)
loss = cross_entropy + regularization
#optimizer
if momentum == 0:
# optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
else:
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
grads = optimizer.compute_gradients(loss)
optimizer.apply_gradients(grads, global_step=global_step)
```