I have implemented a custom rnn layer, it is a little different with standard rnn layer. I found the official LSTM/GRU’s implementation doesn’t use for loop, my question is the RNN’s backpropagation is BPTT, if I use a for loop in model, could it get the right BPTT grad?
class MyRNN(nn.Module):
def __ini__(self):
super(MyRNN, self).__init__()
self.hidden_dim = 8
self.embedding_dim = 8
self.embedding = nn.Embedding(n_words, self.embedding_dim)
self.h = torch.randn(batch_size, self.hidden_dim)
self.linear_x = nn.Linear(self.embedding_dim, self.hidden_dim)
self.linear_h = nn.Linear(self.hidden_dim, self.hidden_dim)
def forward(self, x):
x = self.embedding(x)
h = self.h
for i in range(x.shape[1]): # steps
linear_x = self.linear_x(x[:, i, :])
h = self.linear_h(h)
h = F.tanh( linearx + h)
return h