Torch.jit.trace (TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect)

rchavezj · October 5, 2018, 10:48pm

I’m trying out jit.trace on a basic lstm program and I keep getting odd warnings I’m not familiar with. No errors but I want to understand and fix them.

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class RNN_ENCODER(nn.Module):
    def __init__(self, ntoken, ninput=300, drop_prob=0.5,
                 nhidden=128, nlayers=1, bidirectional=False):
        super(RNN_ENCODER, self).__init__()
        self.n_steps = 10
        self.ntoken = ntoken  # size of the dictionary
        self.ninput = ninput  # size of each embedding vector
        self.drop_prob = drop_prob  # probability of an element to be zeroed
        self.nlayers = nlayers  # Number of recurrent layers
        self.bidirectional = bidirectional
        self.rnn_type = 'LSTM'
        if bidirectional:
            self.num_directions = 2
        else:
            self.num_directions = 1
        # number of features in the hidden state
        self.nhidden = nhidden // self.num_directions

        self.define_module()
        self.init_weights()

    def define_module(self):
        self.encoder = nn.Embedding(self.ntoken, self.ninput)
        self.drop = nn.Dropout(self.drop_prob)
        if self.rnn_type == 'LSTM':
            # dropout: If non-zero, introduces a dropout layer on
            # the outputs of each RNN layer except the last layer
            self.rnn = nn.LSTM(self.ninput, self.nhidden,
                               self.nlayers, batch_first=True,
                               dropout=self.drop_prob,
                               bidirectional=self.bidirectional)
        elif self.rnn_type == 'GRU':
            self.rnn = nn.GRU(self.ninput, self.nhidden,
                              self.nlayers, batch_first=True,
                              dropout=self.drop_prob,
                              bidirectional=self.bidirectional)
        else:
            raise NotImplementedError

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        # Do not need to initialize RNN parameters, which have been initialized
        # http://pytorch.org/docs/master/_modules/torch/nn/modules/rnn.html#LSTM
        # self.decoder.weight.data.uniform_(-initrange, initrange)
        # self.decoder.bias.data.fill_(0)

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            return (weight.new(self.nlayers * self.num_directions,
                                        bsz, self.nhidden).zero_(),
                    weight.new(self.nlayers * self.num_directions,
                                        bsz, self.nhidden).zero_())
        else:
            return weight.new(self.nlayers * self.num_directions,
                                       bsz, self.nhidden).zero_()

    def forward(self, captions, cap_lens, hidden, mask=None):
        # input: torch.LongTensor of size batch x n_steps
        # --> emb: batch x n_steps x ninput
        emb = self.drop(self.encoder(captions))
        #
        # Returns: a PackedSequence object
        cap_lens = cap_lens.data.tolist()
        emb = pack_padded_sequence(emb, cap_lens, batch_first=True)
        # #hidden and memory (num_layers * num_directions, batch, hidden_size):
        # tensor containing the initial hidden state for each element in batch.
        # #output (batch, seq_len, hidden_size * num_directions)
        # #or a PackedSequence object:
        # tensor containing output features (h_t) from the last layer of RNN
        output, hidden = self.rnn(emb, hidden)
        # PackedSequence object
        # --> (batch, seq_len, hidden_size * num_directions)
        output = pad_packed_sequence(output, batch_first=True)[0]
        # output = self.drop(output)
        # --> batch x hidden_size*num_directions x seq_len
        words_emb = output.transpose(1, 2)
        # --> batch x num_directions*hidden_size
        if self.rnn_type == 'LSTM':
            sent_emb = hidden[0].transpose(0, 1).contiguous()
        else:
            sent_emb = hidden.transpose(0, 1).contiguous()
        sent_emb = sent_emb.view(-1, self.nhidden * self.num_directions)
        return words_emb, sent_emb


model = RNN_ENCODER(27297)
captions = torch.empty(48, 15, dtype=torch.long).random_(27297)
cap_lens = torch.sort(torch.empty(48, dtype=torch.long).random_(1, 15), descending=True)[0]
hidden = (torch.randn(1, 48, 128), torch.randn(1, 48, 128))

# output = model(captions, cap_lens, hidden)

traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden))

/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py:46: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1
  "num_layers={}".format(dropout, num_layers))

lstm.py:72: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  cap_lens = cap_lens.data.tolist()

lstm.py:73: TracerWarning: pack_padded_sequence has been called with a Python list of sequence lengths. The tracer cannot track the data flow of Python values, and it will treat them as constants, likely rendering the trace incorrect for any other combination of lengths.
  emb = pack_padded_sequence(emb, cap_lens, batch_first=True)

/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py:572: TracerWarning: Trace had nondeterministic nodes. Nodes:
	%16 : Float(48, 15, 300) = aten::dropout(%13, %14, %15), scope: RNN_ENCODER/Dropout[drop]
This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace()
  _check_trace([example_inputs], func, executor_options, module, check_tolerance)

/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py:572: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error:
Not within tolerance rtol=1e-05 atol=1e-05 at input[2, 99, 0] (-0.168488845229 vs. -0.0769025087357) and 36851 other locations (42.00%)
  _check_trace([example_inputs], func, executor_options, module, check_tolerance)

/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py:572: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error:
Not within tolerance rtol=1e-05 atol=1e-05 at input[11, 12] (-0.0212909094989 vs. 0.0580021962523) and 6142 other locations (99.00%)
  _check_trace([example_inputs], func, executor_options, module, check_tolerance)

fmassa · October 6, 2018, 4:00am

Hi,

Let’s go in steps:

lstm.py:72: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  cap_lens = cap_lens.data.tolist()

This warning indicates that you are converting a tensor to a list, and you can’t backpropagate through python values, only Tensors. You can fix this error by removing the .tolist() in that line (it’s not necessary).
This will also fix the other warning just below.

The other errors that you had happen because of Dropout nodes, which are non-deterministic. By default, we check the traces that were generated by computing it twice and seeing if the result has changed (which might imply that there are control flows in your code that should be torch.jit.script instead).
In your case, as you only have a `dropout, I’d say that you could disable the check, by doing the following:

traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden), check_trace=False)

I believe those two points should fix all your issues. You can inspect the generated trace via traced_script_module.graph.

rchavezj · October 6, 2018, 6:42pm

Thank you for the help! After the debugging I only have a warning for only having one layer (nlayers=1)

/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py:46: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1
  "num_layers={}".format(dropout, num_layers))

I set nlayers = 2 and I get the following error

Traceback (most recent call last):
  File "lstm.py", line 101, in <module>
    traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden), check_trace=False)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/jit/__init__.py", line 565, in trace
    module._create_method_from_trace('forward', func, example_inputs)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 475, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 465, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "lstm.py", line 76, in forward
    output, hidden = self.rnn(emb, hidden)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 475, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 465, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py", line 175, in forward
    self.check_forward_args(input, hx, batch_sizes)
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py", line 152, in check_forward_args
    'Expected hidden[0] size {}, got {}')
  File "/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py", line 148, in check_hidden_size
    raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
RuntimeError: Expected hidden[0] size (2, 48, 128), got (tensor(1), tensor(48), tensor(128))

fmassa · October 7, 2018, 12:19am

It’s not clear to me that your warning is specific to the JIT.
But looks like you changed the size of hidden?

rchavezj · October 7, 2018, 3:05am

I don’t remember changing hidden directly. So far this is how my code looks like

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class RNN_ENCODER(nn.Module):
    def __init__(self, ntoken, ninput=300, drop_prob=0.5,
                 nhidden=128, nlayers=2, bidirectional=False):
        super(RNN_ENCODER, self).__init__()
        self.n_steps = 10
        self.ntoken = ntoken  # size of the dictionary
        self.ninput = ninput  # size of each embedding vector
        self.drop_prob = drop_prob  # probability of an element to be zeroed
        self.nlayers = nlayers  # Number of recurrent layers
        self.bidirectional = bidirectional
        self.rnn_type = 'LSTM'
        if bidirectional:
            self.num_directions = 2
        else:
            self.num_directions = 1
        # number of features in the hidden state
        self.nhidden = nhidden // self.num_directions

        self.define_module()
        self.init_weights()

    def define_module(self):
        self.encoder = nn.Embedding(self.ntoken, self.ninput)
        self.drop = nn.Dropout(self.drop_prob)
        if self.rnn_type == 'LSTM':
            # dropout: If non-zero, introduces a dropout layer on
            # the outputs of each RNN layer except the last layer
            self.rnn = nn.LSTM(self.ninput, self.nhidden,
                               self.nlayers, batch_first=True,
                               dropout=self.drop_prob,
                               bidirectional=self.bidirectional)
        elif self.rnn_type == 'GRU':
            self.rnn = nn.GRU(self.ninput, self.nhidden,
                              self.nlayers, batch_first=True,
                              dropout=self.drop_prob,
                              bidirectional=self.bidirectional)
        else:
            raise NotImplementedError

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        # Do not need to initialize RNN parameters, which have been initialized
        # http://pytorch.org/docs/master/_modules/torch/nn/modules/rnn.html#LSTM
        # self.decoder.weight.data.uniform_(-initrange, initrange)
        # self.decoder.bias.data.fill_(0)

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            return (weight.new(self.nlayers * self.num_directions,
                                        bsz, self.nhidden).zero_(),
                    weight.new(self.nlayers * self.num_directions,
                                        bsz, self.nhidden).zero_())
        else:
            return weight.new(self.nlayers * self.num_directions,
                                       bsz, self.nhidden).zero_()

    def forward(self, captions, cap_lens, hidden, mask=None):
        # input: torch.LongTensor of size batch x n_steps
        # --> emb: batch x n_steps x ninput
        emb = self.drop(self.encoder(captions))
        #
        # Returns: a PackedSequence object
        cap_lens = cap_lens.data
        emb = pack_padded_sequence(emb, cap_lens, batch_first=True)
        # #hidden and memory (num_layers * num_directions, batch, hidden_size):
        # tensor containing the initial hidden state for each element in batch.
        # #output (batch, seq_len, hidden_size * num_directions)
        # #or a PackedSequence object:
        # tensor containing output features (h_t) from the last layer of RNN
        output, hidden = self.rnn(emb, hidden)
        # PackedSequence object
        # --> (batch, seq_len, hidden_size * num_directions)
        output = pad_packed_sequence(output, batch_first=True)[0]
        # output = self.drop(output)
        # --> batch x hidden_size*num_directions x seq_len
        words_emb = output.transpose(1, 2)
        # --> batch x num_directions*hidden_size
        if self.rnn_type == 'LSTM':
            sent_emb = hidden[0].transpose(0, 1).contiguous()
        else:
            sent_emb = hidden.transpose(0, 1).contiguous()
        sent_emb = sent_emb.view(-1, self.nhidden * self.num_directions)
        return words_emb, sent_emb


model = RNN_ENCODER(27297)
captions = torch.empty(48, 15, dtype=torch.long).random_(27297)
cap_lens = torch.sort(torch.empty(48, dtype=torch.long).random_(1, 15), descending=True)[0]
hidden = (torch.randn(1, 48, 128), torch.randn(1, 48, 128))


traced_script_module = torch.jit.trace(model, (captions, cap_lens, hidden), check_trace=False)

traced_script_module.save("lstm.pt")

if nlayers=1 i just get the following warning

/Users/user/anaconda2/lib/python2.7/site-packages/torch/nn/modules/rnn.py:46: 
UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero 
dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1
  "num_layers={}".format(dropout, num_layers))