I’m getting this error too. I would love to see a solution.
Code snippet (I’m wrapping the model in nn.DataParallel):
class SimpleLSTM(nn.Module):
def init(self, vocab_size, emb_dim, pad_idx, hid_dim, lstm_layers):
super(SimpleLSTM, self).init()
self.hid_dim = hid_dim #128
self.lstm_layers = lstm_layers #2
self.embeddings = nn.Embedding(vocab_size, emb_dim, padding_idx=pad_idx)
self.lstm = nn.LSTM(emb_dim, hid_dim, lstm_layers)
def forward(self, inputs, targets, input_mask, target_mask):
#batch size is dynamic
batch_size = inputs.size()[1] #inputs = seq_len x batch
emb_inputs = self.embeddings(inputs) # seq_len x batch x emb_dim
lstm_hid_state = (torch.zeros(self.lstm_layers, batch_size, self.hid_dim),
torch.zeros(self.lstm_layers, batch_size, self.hid_dim))
lstm_out, lstm_hid_state = self.lstm(emb_inputs, lstm_hid_state)
return lstm_out[-1]
Error msg (truncated):
in forward(self, inputs, targets, input_mask, target_mask)
45 lstm_hid_state = (torch.zeros(self.lstm_layers, batch_size, self.hid_dim),
46 torch.zeros(self.lstm_layers, batch_size, self.hid_dim))
—> 47 lstm_out, lstm_hid_state = self.lstm(emb_inputs, lstm_hid_state)
48 print(‘last lstm out’, lstm_out[-1].size())
49 #lstm_out, recovered_lengths = nn.utils.rnn.pad_packed_sequence(lstm_out)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
–> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
177 if batch_sizes is None:
178 result = _impl(input, hx, self._flat_weights, self.bias, self.num_layers,
–> 179 self.dropout, self.training, self.bidirectional, self.batch_first)
180 else:
181 result = _impl(input, batch_sizes, hx, self._flat_weights, self.bias,
TypeError: lstm() received an invalid combination of arguments - got (Tensor, tuple, list, float, int, int, bool, bool, bool), but expected one of:
- (Tensor data, Tensor batch_sizes, tuple of Tensors hx, tuple of Tensors params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional)
didn’t match because some of the arguments have invalid types: (Tensor, tuple, list, float, int, int, bool, bool, bool)
- (Tensor input, tuple of Tensors hx, tuple of Tensors params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first)
didn’t match because some of the arguments have invalid types: (Tensor, tuple, list, float, int, int, bool, bool, bool)
Thanks!