I cannot get the gradient (SGD) to work in the below example. Similar posts and autograd documentation have not helped. In the below L.grad is always none. Your help is appreciated.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
def NN( w=np.random.rand(135), x=np.random.rand(4) ):
X = torch.Tensor(x.reshape(4,1))
M1 = torch.Tensor(np.array(w[0:36]).reshape(9,4))
b1 = torch.Tensor(np.array(w[36:45]).reshape(9,1))
M2 = torch.Tensor(np.array(w[45:126]).reshape(9,9))
b2 = torch.Tensor(np.array(w[126:135]).reshape(9,1))
nIn = 4
nH1 = 9
nH2 = 9
W1 = Variable(M1, requires_grad=True)
B1 = Variable(b1, requires_grad=True)
Y1 = torch.mm(W1, X) + B1
Y1 = F.relu(Y1)
W2 = Variable(M2, requires_grad=True)
B2 = Variable(b2, requires_grad=True)
Y2 = torch.mm(W2, Y1) + B2
Y2 = F.relu(Y2)
print(Y2.shape)
print(Y2)
return [W1,B1,W2,B2], Y2
def loss(Y, T):
with torch.enable_grad():
diff = Y.reshape(-1)-T.reshape(-1)
print('loss type', type(diff))
return diff.dot(diff)
pars, Y = NN()
T = torch.randn(9,1)
optimizer = optim.SGD(pars, lr=0.1, momentum=0.9)
for j in range(1):
optimizer.zero_grad()
L = loss(Y, T)
L.backward(retain_graph=True)
print(‘grad’, L.grad)
optimizer.step()