Hi, I started studying pytorch recently and I’m stuck in a problem. The codes below trains w[0], …, w[8] which are the 2 * 2 elements of a weight matrix. The weight matrix is in the function named forward. I want to train w[1], …, w[8] to produce y from a. But after operating the codes, an error occurred. It is ‘RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn.’ I had tried to find causes but I can’t find and understand them. The original code is much more long, the code below is a simplified version. Would you help me how to fix the problem? Any help would be highly appreciated.
import torch
eta_n = torch.arange(1.0, 0, - 0.1)
#eta_n = torch.cat([torch.arange(1.0, 0.2, -0.1), torch.arange(0.2, 0, -0.01)], dim = 0)
delta_eta = eta_n[1:] - eta_n[0:-1] #Difference between the elements of eta_n. In this case, every element is -0.1.
eta_fin = eta_n[-1]
print('eta_n:', eta_n)
print('len(eta_n):', len(eta_n))
print('delta_eta:', delta_eta)
print('len(delta_eta) which is the no. of forward propagations:', len(delta_eta))
print('eta_fin:', eta_fin)
eta_n: tensor([1.0000, 0.9000, 0.8000, 0.7000, 0.6000, 0.5000, 0.4000, 0.3000, 0.2000, 0.1000])
len(eta_n): 10
delta_eta: tensor([-0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000])
len(delta_eta) which is the no. of forward propagations: 9
eta_fin: tensor(0.1000)
#Activation function in hidden layers
def acti1(coordinate, layer_no):#layer_no from 0 to '(the length of eta_n) - 1'
x = coordinate[0]
y = coordinate[1]
return torch.cat([x.reshape(1, -1), (y + delta_eta[layer_no] * x ** 3).reshape(1, -1)], dim = 0)
#Activation function on the output layer
def acti2(coordinate):
y = coordinate[1]
F = y
return (torch.tanh(100 * (F - 0.1)) - torch.tanh(100 * (F + 0.1)) + 2) / 2
#The result is 0 when F is between -0.5 and 0.5 and for the other F, the result is 1
#This function is steep but continuous and differentiable
def forward(coordinate, w, layer_no):#layer_no from 0 to '(the length of eta_n) - 1'
matrix = torch.Tensor([[1, delta_eta[layer_no]], [- delta_eta[layer_no], 1 - delta_eta[layer_no] * w]])
return acti1(matrix.mm(coordinate), layer_no)
y = torch.Tensor([0, 0, 1, 1]) #I want y_pred(later mentioned) to be this value
the_no_of_iterations = 10
learning_rate = 0.01
a = torch.Tensor([[0.5528, 0.8563, 1.0779, 0.5932], [-0.1109, -0.0569, 0.0904, 0.1435]])
torch.manual_seed(1)
w = torch.randn(9, requires_grad = True) #What I want to train to produce y from a
print('w:', w)
print('w.requires_grad:', w.requires_grad)
optimizer = torch.optim.Adam([w], lr = learning_rate)
for i in range(the_no_of_iterations):
#Forward propagation
for j in range(9):
a = acti1(forward(a, w[j], j), j)
print('a:', a)
y_pred =acti2(a)
print('y_pred:', y_pred)
#Loss calculation
loss = (y - y_pred).abs().sum()
print('loss:', loss.item())
print('loss.grad_fn:', loss.grad_fn)
#Autograd
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('w.grad:', w.grad)
w: tensor([ 0.6614, 0.2669, 0.0617, 0.6213, -0.4519, -0.1661, -1.5228, 0.3817, -1.0276], requires_grad=True)
w.requires_grad: True
a: tensor([[ 0.5961, 1.1220, 1.6414, 0.3710],
[ 0.0363, -0.7368, -2.2987, 0.3278]])
y_pred: tensor([2.9206e-06, 1.0000e+00, 1.0000e+00, 1.0000e+00])
loss: 1.0000028610229492
loss.grad_fn: None