Problem with Packed RNN with DataParallel

kervyRivas · December 26, 2017, 7:08pm

hi, i am developing a machine translator in PyTorch. I have a problem with DataParallel and “pack_padded_sequence” function. when i run my code i have this error

in forward(self, input)
21 print(input_lengths)
22
—> 23 packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
24 outputs, hidden = self.gru(packed, hidden)
25

/home/a20112128/anaconda3/lib/python3.6/site-packages/torch/nn/utils/rnn.py in pack_padded_sequence(input, lengths, batch_first)
68 if l > prev_l:
69 c_batch_size = batch_size - i
—> 70 steps.append(input[prev_l:l, :c_batch_size].contiguous().view(-1, *input.size()[2:]))
71 batch_sizes.extend([c_batch_size] * (l - prev_l))
72 prev_l = l

/home/a20112128/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in getitem(self, key)
76 return IndexSelect.apply(self, 0, key)
77 # else fall through and raise an error in Index
—> 78 return Index.apply(self, key)
79
80 def setitem(self, key, value):

/home/a20112128/anaconda3/lib/python3.6/site-packages/torch/autograd/_functions/tensor.py in forward(ctx, i, index)
87 result = i.index(ctx.index)
88 else:
—> 89 result = i.index(ctx.index)
90 ctx.mark_shared_storage((i, result))
91 return result

ValueError: result of slicing is an empty tensor

This error is from my encoder module.

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1, dropout=0.1):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
        
    def forward(self, input):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        input_seqs, input_lengths, hidden = input
        
        embedded = self.embedding(input_seqs)
        
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        outputs, hidden = self.gru(packed, hidden)      
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        
        padded_output = padded_output[:, :, :self.hidden_size] + padded_output[:, : ,self.hidden_size:] # Sum bidirectional outputs
        return padded_output, hidden



# Initialize models
encoder = EncoderRNN(sentence.n_words, hidden_size, n_layers, dropout=dropout)

# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Move models to GPU
if USE_CUDA:
    encoder = nn.DataParallel(encoder.cuda(), device_ids=[0,1])

richard · December 27, 2017, 12:37am

Sounds like it could be a bug. Could you please provide some random inputs for the forward pass so that I can run it and see what’s up?

I’m curious if the problem is how DataParallel works or the model. What happens if you remove the DataParallel?

kervyRivas · December 27, 2017, 2:46am

thanks for your reply, i change my code, and now works perfectly

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1, dropout=0.1):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
        
    def forward(self, input):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        input_seqs, input_lengths, hidden = input
        
        embedded = self.embedding(input_seqs)
        
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        outputs, hidden = self.gru(packed, hidden)      
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        
        padded_output = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs
        return padded_output, hidden

i changed this part:

# Initialize models
encoder = EncoderRNN(sentence.n_words, hidden_size, n_layers, dropout=dropout)

# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Move models to GPU
if USE_CUDA:
    encoder = encoder.cuda()