I cannot get the gradient (SGD) to work in the below example. Similar posts and autograd documentation have not helped. In the below L.grad is always none. Your help is appreciated.

import numpy as np

import torch

import torch.nn as nn

import torch.nn.functional as F

import torch.optim as optim

from torch.autograd import Variable

def NN( w=np.random.rand(135), x=np.random.rand(4) ):

```
X = torch.Tensor(x.reshape(4,1))
M1 = torch.Tensor(np.array(w[0:36]).reshape(9,4))
b1 = torch.Tensor(np.array(w[36:45]).reshape(9,1))
M2 = torch.Tensor(np.array(w[45:126]).reshape(9,9))
b2 = torch.Tensor(np.array(w[126:135]).reshape(9,1))
nIn = 4
nH1 = 9
nH2 = 9
W1 = Variable(M1, requires_grad=True)
B1 = Variable(b1, requires_grad=True)
Y1 = torch.mm(W1, X) + B1
Y1 = F.relu(Y1)
W2 = Variable(M2, requires_grad=True)
B2 = Variable(b2, requires_grad=True)
Y2 = torch.mm(W2, Y1) + B2
Y2 = F.relu(Y2)
print(Y2.shape)
print(Y2)
return [W1,B1,W2,B2], Y2
```

def loss(Y, T):

```
with torch.enable_grad():
diff = Y.reshape(-1)-T.reshape(-1)
print('loss type', type(diff))
return diff.dot(diff)
```

pars, Y = NN()

T = torch.randn(9,1)

optimizer = optim.SGD(pars, lr=0.1, momentum=0.9)

for j in range(1):

optimizer.zero_grad()

L = loss(Y, T)

L.backward(retain_graph=True)

print(‘grad’, L.grad)

optimizer.step()