When I build a custom Module, performing updates doesn’t work as param.grad is not defined. I’m not exactly sure what’s wrong.
The class is defined as such,
class vanillaLargeRNN(nn.Module):
def __init__(self, input_shape, hidden_shape, output_shape):
super(vanillaLargeRNN, self).__init__()
self.hidden_shape = hidden_shape
self.input2hidden = nn.Linear(input_shape + hidden_shape, hidden_shape)
self.input2output = nn.Linear(input_shape + hidden_shape, output_shape)
self.output2output = nn.Linear(output_shape + hidden_shape, output_shape)
self.input2hidden2 = nn.Linear(input_shape + hidden_shape, hidden_shape)
self.input2output2 = nn.Linear(input_shape + hidden_shape, output_shape)
self.output2output2 = nn.Linear(output_shape + hidden_shape, output_shape)
self.input2hidden3 = nn.Linear(input_shape + hidden_shape, hidden_shape)
self.input2output3 = nn.Linear(input_shape + hidden_shape, output_shape)
self.output2output3 = nn.Linear(output_shape + hidden_shape, output_shape)
self.dropout1 = nn.Dropout(0.3)
self.dropout2 = nn.Dropout(0.3)
self.dropout3 = nn.Dropout(0.3)
def forward(self, input, hidden1, hidden2, hidden3):
input_comb = torch.cat([input, hidden1], 1)
hidden = self.input2hidden(input_comb)
output_temp = self.input2output(input_comb)
output_comb = torch.cat([output_temp, hidden1], 1)
output = self.output2output(output_comb)
output = self.dropout1(output)
input_comb2 = torch.cat([output, hidden2], 1)
hidden2 = self.input2hidden2(input_comb2)
output_temp2 = self.input2output2(input_comb2)
output_comb2 = torch.cat([output_temp2, hidden2], 1)
output2 = self.output2output2(output_comb2)
output2 = self.dropout2(output2)
input_comb3 = torch.cat([output2, hidden3], 1)
hidden3 = self.input2hidden3(input_comb3)
output_temp3 = self.input2output3(input_comb3)
output_comb3 = torch.cat([output_temp3, hidden3], 1)
output3 = self.output2output3(output_comb3)
output3 = self.dropout3(output3)
return output3, hidden1, hidden2, hidden3
And I am performing updates as such,
for i in range(min(input_line_vec.size()[0], 150)):
output, hidden1, hidden2, hidden3 = rnn.forward(input_line_vec[i], hidden1, hidden2, hidden3)
loss += criterion(output, target_line_vec[i])
loss.backward()
for param in rnn.parameters():
param.data.add_(-learning_rate, param.grad.data)