Hi. I’m really new to pytorch. I was experimenting with code I found here:
I’m trying to replace the EncoderRNN with a bidirectional version. Here’s my code.
class EncoderBiRNN(nn.Module):
def __init__(self, input_size, hidden_size):
super(EncoderBiRNN, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(input_size, hidden_size)
self.bi_gru = nn.GRU(hidden_size, hidden_size, num_layers=1, batch_first=False,bidirectional=True)
self.reverse_gru = nn.GRU(hidden_size,hidden_size, num_layers=1,batch_first=False,bidirectional=False)
self.reverse_gru.weight_ih_l0 = self.bi_gru.weight_ih_l0_reverse
self.reverse_gru.weight_hh_l0 = self.bi_gru.weight_hh_l0_reverse
self.reverse_gru.bias_ih_l0 = self.bi_gru.bias_ih_l0_reverse
self.reverse_gru.bias_hh_l0 = self.bi_gru.bias_hh_l0_reverse
def forward(self, input, hidden):
embedded = self.embedding(input).view(1, 1, -1)
output = embedded
#output, hidden = self.gru(output, hidden)
bi_output, bi_hidden = self.bi_gru(output,hidden)
reverse_output, reverse_hidden = self.reverse_gru(output,hidden)
#return output, hidden
return torch.cat((bi_output,reverse_output)), torch.cat((bi_hidden, reverse_hidden))
def initHidden(self):
result = Variable(torch.zeros(1, 1, self.hidden_size))
if use_cuda:
return result.cuda()
else:
return result
Here’s the error.
Traceback (most recent call last):
File “pytorch.py”, line 744, in
n.trainIters(None, None, 75000, print_every=n.print_every)
File “pytorch.py”, line 646, in trainIters
decoder, encoder_optimizer, decoder_optimizer, criterion)
File “pytorch.py”, line 574, in train
input_variable[ei], encoder_hidden)
File “/home/dave/.local/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 357, in call
result = self.forward(*input, **kwargs)
File “pytorch.py”, line 85, in forward
bi_output, bi_hidden = self.bi_gru(output,hidden)
File “/home/dave/.local/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 357, in call
result = self.forward(*input, **kwargs)
File “/home/dave/.local/lib/python3.6/site-packages/torch/nn/modules/rnn.py”, line 190, in forward
self.check_forward_args(input, hx, batch_sizes)
File “/home/dave/.local/lib/python3.6/site-packages/torch/nn/modules/rnn.py”, line 162, in check_forward_args
check_hidden_size(hidden, expected_hidden_size)
File “/home/dave/.local/lib/python3.6/site-packages/torch/nn/modules/rnn.py”, line 154, in check_hidden_size
raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
RuntimeError: Expected hidden size (2, 1, 256), got (1, 1, 256)
this is the link to where I read that bi directional RNNs needed to be put together in such a way.
https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
What I’m looking for is advice on my code, how to write it so that it works.