I’m trying out jit.trace on a basic lstm program and I keep getting odd warnings I’m not familiar with. No errors but I want to understand and fix them.
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
class RNN_ENCODER(nn.Module):
def __init__(self, ntoken, ninput=300, drop_prob=0.5,
nhidden=128, nlayers=1, bidirectional=False):
super(RNN_ENCODER, self).__init__()
self.n_steps = 10
self.ntoken = ntoken # size of the dictionary
self.ninput = ninput # size of each embedding vector
self.drop_prob = drop_prob # probability of an element to be zeroed
self.nlayers = nlayers # Number of recurrent layers
self.bidirectional = bidirectional
self.rnn_type = 'LSTM'
if bidirectional:
self.num_directions = 2
else:
self.num_directions = 1
# number of features in the hidden state
self.nhidden = nhidden // self.num_directions
self.define_module()
self.init_weights()
def define_module(self):
self.encoder = nn.Embedding(self.ntoken, self.ninput)
self.drop = nn.Dropout(self.drop_prob)
if self.rnn_type == 'LSTM':
# dropout: If non-zero, introduces a dropout layer on
# the outputs of each RNN layer except the last layer
self.rnn = nn.LSTM(self.ninput, self.nhidden,
self.nlayers, batch_first=True,
dropout=self.drop_prob,
bidirectional=self.bidirectional)
elif self.rnn_type == 'GRU':
self.rnn = nn.GRU(self.ninput, self.nhidden,
self.nlayers, batch_first=True,
dropout=self.drop_prob,
bidirectional=self.bidirectional)
else:
raise NotImplementedError
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
# Do not need to initialize RNN parameters, which have been initialized
# http://pytorch.org/docs/master/_modules/torch/nn/modules/rnn.html#LSTM
# self.decoder.weight.data.uniform_(-initrange, initrange)
# self.decoder.bias.data.fill_(0)
def init_hidden(self, bsz):
weight = next(self.parameters()).data
if self.rnn_type == 'LSTM':
return (weight.new(self.nlayers * self.num_directions,
bsz, self.nhidden).zero_(),
weight.new(self.nlayers * self.num_directions,
bsz, self.nhidden).zero_())
else:
return weight.new(self.nlayers * self.num_directions,
bsz, self.nhidden).zero_()
def forward(self, captions, cap_lens, hidden, mask=None):
# input: torch.LongTensor of size batch x n_steps
# --> emb: batch x n_steps x ninput
emb = self.drop(self.encoder(captions))
#
# Returns: a PackedSequence object
cap_lens = cap_lens.data.tolist()
emb = pack_padded_sequence(emb, cap_lens, batch_first=True)
# #hidden and memory (num_layers * num_directions, batch, hidden_size):
# tensor containing the initial hidden state for each element in batch.
# #output (batch, seq_len, hidden_size * num_directions)
# #or a PackedSequence object:
# tensor containing output features (h_t) from the last layer of RNN
output, hidden = self.rnn(emb, hidden)
# PackedSequence object
# --> (batch, seq_len, hidden_size * num_directions)
output = pad_packed_sequence(output, batch_first=True)[0]
# output = self.drop(output)
# --> batch x hidden_size*num_directions x seq_len
words_emb = output.transpose(1, 2)
# --> batch x num_directions*hidden_size
if self.rnn_type == 'LSTM':
sent_emb = hidden[0].transpose(0, 1).contiguous()
else:
sent_emb = hidden.transpose(0, 1).contiguous()
sent_emb = sent_emb.view(-1, self.nhidden * self.num_directions)
return words_emb, sent_emb
model = RNN_ENCODER(27297)
captions = torch.empty(48, 15, dtype=torch.long).random_(27297)
cap_lens = torch.sort(torch.empty(48, dtype=torch.long).random_(1, 15), descending=True)[0]
hidden = (torch.randn(1, 48, 128), torch.randn(1, 48, 128))
# output = model(captions, cap_lens, hidden)
traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden))
/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py:46: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1
"num_layers={}".format(dropout, num_layers))
lstm.py:72: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
cap_lens = cap_lens.data.tolist()
lstm.py:73: TracerWarning: pack_padded_sequence has been called with a Python list of sequence lengths. The tracer cannot track the data flow of Python values, and it will treat them as constants, likely rendering the trace incorrect for any other combination of lengths.
emb = pack_padded_sequence(emb, cap_lens, batch_first=True)
/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py:572: TracerWarning: Trace had nondeterministic nodes. Nodes:
%16 : Float(48, 15, 300) = aten::dropout(%13, %14, %15), scope: RNN_ENCODER/Dropout[drop]
This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace()
_check_trace([example_inputs], func, executor_options, module, check_tolerance)
/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py:572: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error:
Not within tolerance rtol=1e-05 atol=1e-05 at input[2, 99, 0] (-0.168488845229 vs. -0.0769025087357) and 36851 other locations (42.00%)
_check_trace([example_inputs], func, executor_options, module, check_tolerance)
/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py:572: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error:
Not within tolerance rtol=1e-05 atol=1e-05 at input[11, 12] (-0.0212909094989 vs. 0.0580021962523) and 6142 other locations (99.00%)
_check_trace([example_inputs], func, executor_options, module, check_tolerance)