I’m trying to deploy a simple lstm network but I keep getting an error for every time I have my nlayers = 2
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
class RNN_ENCODER(nn.Module):
def __init__(self, ntoken, ninput=300, drop_prob=0.5,
nhidden=128, nlayers=2, bidirectional=False):
super(RNN_ENCODER, self).__init__()
self.n_steps = 10
self.ntoken = ntoken # size of the dictionary
self.ninput = ninput # size of each embedding vector
self.drop_prob = drop_prob # probability of an element to be zeroed
self.nlayers = nlayers # Number of recurrent layers
self.bidirectional = bidirectional
self.rnn_type = 'LSTM'
if bidirectional:
self.num_directions = 2
else:
self.num_directions = 1
# number of features in the hidden state
self.nhidden = nhidden // self.num_directions
self.define_module()
self.init_weights()
def define_module(self):
self.encoder = nn.Embedding(self.ntoken, self.ninput)
self.drop = nn.Dropout(self.drop_prob)
if self.rnn_type == 'LSTM':
# dropout: If non-zero, introduces a dropout layer on
# the outputs of each RNN layer except the last layer
self.rnn = nn.LSTM(self.ninput, self.nhidden,
self.nlayers, batch_first=True,
dropout=self.drop_prob,
bidirectional=self.bidirectional)
elif self.rnn_type == 'GRU':
self.rnn = nn.GRU(self.ninput, self.nhidden,
self.nlayers, batch_first=True,
dropout=self.drop_prob,
bidirectional=self.bidirectional)
else:
raise NotImplementedError
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
# Do not need to initialize RNN parameters, which have been initialized
# http://pytorch.org/docs/master/_modules/torch/nn/modules/rnn.html#LSTM
# self.decoder.weight.data.uniform_(-initrange, initrange)
# self.decoder.bias.data.fill_(0)
def init_hidden(self, bsz):
weight = next(self.parameters()).data
if self.rnn_type == 'LSTM':
return (weight.new(self.nlayers * self.num_directions,
bsz, self.nhidden).zero_(),
weight.new(self.nlayers * self.num_directions,
bsz, self.nhidden).zero_())
else:
return weight.new(self.nlayers * self.num_directions,
bsz, self.nhidden).zero_()
def forward(self, captions, cap_lens, hidden, mask=None):
# input: torch.LongTensor of size batch x n_steps
# --> emb: batch x n_steps x ninput
emb = self.drop(self.encoder(captions))
#
# Returns: a PackedSequence object
cap_lens = cap_lens.data
emb = pack_padded_sequence(emb, cap_lens, batch_first=True)
# #hidden and memory (num_layers * num_directions, batch, hidden_size):
# tensor containing the initial hidden state for each element in batch.
# #output (batch, seq_len, hidden_size * num_directions)
# #or a PackedSequence object:
# tensor containing output features (h_t) from the last layer of RNN
output, hidden = self.rnn(emb, hidden)
# PackedSequence object
# --> (batch, seq_len, hidden_size * num_directions)
output = pad_packed_sequence(output, batch_first=True)[0]
# output = self.drop(output)
# --> batch x hidden_size*num_directions x seq_len
words_emb = output.transpose(1, 2)
# --> batch x num_directions*hidden_size
if self.rnn_type == 'LSTM':
sent_emb = hidden[0].transpose(0, 1).contiguous()
else:
sent_emb = hidden.transpose(0, 1).contiguous()
sent_emb = sent_emb.view(-1, self.nhidden * self.num_directions)
return words_emb, sent_emb
model = RNN_ENCODER(27297)
captions = torch.empty(48, 15, dtype=torch.long).random_(27297)
cap_lens = torch.sort(torch.empty(48, dtype=torch.long).random_(1, 15), descending=True)[0]
hidden = (torch.randn(1, 48, 128), torch.randn(1, 48, 128))
traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden), check_trace=False)
traced_script_module.save("lstm.pt")
Traceback (most recent call last):
File "lstm.py", line 98, in <module>
traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden), check_trace=False)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py", line 565, in trace
module._create_method_from_trace('forward', func, example_inputs)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 475, in __call__
result = self._slow_forward(*input, **kwargs)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 465, in _slow_forward
result = self.forward(*input, **kwargs)
File "lstm.py", line 76, in forward
output, hidden = self.rnn(emb, hidden)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 475, in __call__
result = self._slow_forward(*input, **kwargs)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 465, in _slow_forward
result = self.forward(*input, **kwargs)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py", line 175, in forward
self.check_forward_args(input, hx, batch_sizes)
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py", line 152, in check_forward_args
'Expected hidden[0] size {}, got {}')
File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py", line 148, in check_hidden_size
raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
RuntimeError: Expected hidden[0] size (2, 48, 128), got (tensor(1), tensor(48), tensor(128))
However if I set nlayers = 1 I get a warning i think I can live with. I still want to know why nlayers = 2 is an issue for my code
/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py:46: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1
"num_layers={}".format(dropout, num_layers))