Can someone tell me why the below “Manual RNN” does not tie out to the “nn.RNN” function?
class RNN(nn.Module):
def __init__(self, batchSize, inputSize, outputSize):
super(RNN, self).__init__()
self.batchSize = batchSize
self.inputSize = inputSize
self.hiddenSize = 10
self.rnn = nn.RNN(self.inputSize, self.hiddenSize)
self.hidden = nn.Linear(self.hiddenSize + self.inputSize, self.hiddenSize)
self.linear = nn.Linear(self.hiddenSize, outputSize)
self.tanh = nn.Tanh()
def forward(self, x):
#x = a single name, as one-hot encoded vector (31 possible chars)
#MANUAL RNN
hidden = torch.zeros(1, self.hiddenSize)
for letter in x:
letter = letter.view(1, -1)
hidden = hidden.view(1, -1)
combined = torch.cat( (letter, hidden), dim=1)
hidden = self.tanh(self.hidden(combined))
output1 = self.linear(hidden)
#nn.RNN from pytorch
hidden = torch.zeros(1, self.hiddenSize)
x = x.view(5, 1, -1)
hidden = hidden.view(1, 1, -1)
output, hidden = self.rnn(x, hidden)
hidden = hidden.view(1, -1)
output2 = self.linear(hidden)
if (output1 != output2).any():
print(output1)
print(output2)
raise Exception('Manual RNN does not match nn.RNN')
return output, hidden
Result:
tensor([[ 0.0006, -0.1234, -0.1572, 0.3339, -0.2149, 0.0967, -0.0302, -0.0470,
-0.1969, 0.0827, -0.0481, 0.3200, -0.1252, 0.1596, 0.0969, 0.0946,
0.3446, -0.1893]], grad_fn=<ThAddmmBackward>)
tensor([[-0.0289, -0.1587, -0.1367, 0.1682, -0.1722, 0.2661, 0.1075, -0.2680,
-0.1853, 0.1767, 0.0378, 0.2075, -0.1701, 0.1740, 0.2489, 0.0971,
0.1929, -0.2300]], grad_fn=<ThAddmmBackward>)
Exception: Manual RNN does not match nn.RNN
What am I doing wrong? I tried looking into the underlying code for nn.RNN but the interesting part disappears in their backend function.
Thanks!