I have 2 pytorch modules like this
class Module1(nn.Module):
def __init__(self, opt):
super(Module1, self).__init__()
self.num_features = opt.num_features
self.output_size = opt.output_size
self.w_t = torch.nn.Parameter(data=torch.Tensor(self.num_features,self.output_size), requires_grad=True)
self.w_t.data.uniform_(-1, 1)
self.w_r = torch.nn.Parameter(data=torch.Tensor(self.num_features,self.output_size), requires_grad=True)
self.w_r.data.uniform_(-1, 1)
self.w_l = torch.nn.Parameter(data=torch.Tensor(self.num_features,self.output_size), requires_grad=True)
self.w_l.data.uniform_(-1, 1)
self.b_conv = torch.nn.Parameter(data=torch.Tensor(self.output_size), requires_grad=True)
self.b_conv.data.uniform_(-1, 1)
def forward(self, param_1, param_2)
return some_function(param_1, param_2, self.w_t, self.w_r, self.w_r, b_conv)
.........
class Module2(nn.Module):
def __init__(self, opt):
super(Module2, self).__init__()
self.module_1 = Module1(opt)
def forward(self, param_1, param_2)
result = module_1(param_1, param_2)
return result
x_1 = torch.randn(N, D_in)
x_2 = torch.randn(N, D_in)
y = torch.randn(N, D_out)
module_2 = Module2(opt)
learning_rate = 1e-4
optimizer = torch.optim.Adam(module_2.parameters(), lr=learning_rate)
for t in range(500):
y_pred = module_2(x1, x2)
loss = loss_fn(y_pred, y)
print(t, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
In this case, Module1 is a sub-module of Module2, and Module2 is the main module that will be called in the main training process. Due to my specific task, I need to define 4 weights w_t, w_r, w_l
and b_conv
manually in Module1.
When I try to print if the gradient is afffected on any of these 4 weights, I found that the loss.backward() does not work in by doing:
print(self.w_t)
in the forward pass of Module1.
So my question is that how can I backward the loss correctly in this case?
If the loss.backward() is good in this case, I guess I need to change the place where I define the Variable, not sure what is a good practice here