Origin
July 27, 2023, 7:43pm
1
class Lag(nn.Module):
def __init__(self):
super().__init__()
self.linear_relu_stack = nn.Sequential(
nn.Linear(5, 200),
nn.ReLU(),
nn.Linear(200, 1)
)
def forward(self, x_dot, x):
q_dots = x_dot[0:2]
q_dots_transpose = q_dots.t()
mass_matrix = m(x[0],x[1])
KE = 0.5*torch.matmul(q_dots_transpose, torch.matmul(mass_matrix, q_dots))
ke_variabels = torch.square(torch.cat([x[0:2], x[4:]]))
qc_x = (x[5]*x[0]).view(1,1)
x_new = torch.cat((ke_variabels, qc_x),0).view(1,-1)
L_wo_KE = self.linear_relu_stack(x_new)
L = L_wo_KE + KE
return L
When I checked grads with the following code
for name, param in L.named_parameters():
print(name, param.grad)
I got
linear_relu_stack.2.bias None
I am confused. Why my network is training even though some of the grads are None. Please help me out here. Thanks in advance.
Origin
July 27, 2023, 8:55pm
2
@ptrblck @KFrank , Could you please help me with it? Thanks.
The bias
gradient is not None
using your code:
class Lag(nn.Module):
def __init__(self):
super().__init__()
self.linear_relu_stack = nn.Sequential(
nn.Linear(5, 200),
nn.ReLU(),
nn.Linear(200, 1)
)
def forward(self, x_dot, x):
q_dots = x_dot[0:2]
q_dots_transpose = q_dots.t()
mass_matrix = torch.randn(2, 2)
KE = 0.5*torch.matmul(q_dots_transpose, torch.matmul(mass_matrix, q_dots))
ke_variabels = torch.square(torch.cat([x[0:2], x[4:]]))
qc_x = (x[5]*x[0]).view(1,1)
x_new = torch.cat((ke_variabels, qc_x),0).view(1,-1)
L_wo_KE = self.linear_relu_stack(x_new)
L = L_wo_KE + KE
return L
model = Lag()
x_dot = torch.randn(6, 1)
x = torch.randn(6, 1)
out = model(x_dot, x)
out.mean().backward()
for name, param in model.named_parameters():
print(name, param.grad.sum())
# linear_relu_stack.0.weight tensor(-1.8680)
# linear_relu_stack.0.bias tensor(-0.6111)
# linear_relu_stack.2.weight tensor(39.8814)
# linear_relu_stack.2.bias tensor(1.)
Note that I needed to replace the undefined mass_matrix
with a random tensor, but I don’t see how this would affect the gradients.
Also, I would recommend avoiding tagging specific users as it could discourage others to post a valid response.
Origin
July 28, 2023, 7:15pm
4
Got it. Thanks. I will recheck it.
Origin
July 28, 2023, 7:39pm
5
def EOM(L, D, x, x_dot, x_dd): # x = [q1,q2, I , qc] ..... x_dot = [q1_dot , q2_dot, I_dot, qc_dot] , .... x_dd = [q1_dd, q2_dd]
dL_dxdot = grad(L, x_dot, create_graph=True, retain_graph= True)
dL_q1dot_xdot = grad(dL_dxdot[0][0], x_dot, create_graph = True, retain_graph= True)
dD_xdot = grad(D,x_dot, create_graph=True, retain_graph= True)
dD_x = grad(D,x, create_graph=True, retain_graph= True)
dL_dx = grad(L, x, create_graph=True, retain_graph= True)
dL_I_x = grad(dL_dx[0][4],x, create_graph = True, retain_graph= True)
dL_q2dot_xdot = grad(dL_dxdot[0][1], x_dot, create_graph = True, retain_graph= True)
q1_eom = dL_q1dot_xdot[0][0]* x_dd[0] + grad(L, x, retain_graph= True)[0][0] + dD_xdot[0][0]
q2_eom = dL_q2dot_xdot[0][1]* x_dd[1] + grad(L, x, retain_graph= True)[0][1] + dD_xdot[0][1]
i_eom = dL_I_x[0][4]*x_dot[4] + dD_x[0][4]
qc_eom = - dL_dx[0][5] + dD_xdot[0][5]
return q1_eom, q2_eom, i_eom, qc_eom
I feel that I am making some mistakes in calculating the gradients. Can anyone find where the computational graph is detaching or the code is perfectly fine?
Origin
July 30, 2023, 5:42pm
6
Please help me out here. I could give more information if needed. Thanks.