hi, i am developing a machine translator in PyTorch. I have a problem with DataParallel and “pack_padded_sequence” function. when i run my code i have this error
in forward(self, input)
21 print(input_lengths)
22
—> 23 packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
24 outputs, hidden = self.gru(packed, hidden)
25
/home/a20112128/anaconda3/lib/python3.6/site-packages/torch/nn/utils/rnn.py in pack_padded_sequence(input, lengths, batch_first)
68 if l > prev_l:
69 c_batch_size = batch_size - i
—> 70 steps.append(input[prev_l:l, :c_batch_size].contiguous().view(-1, *input.size()[2:]))
71 batch_sizes.extend([c_batch_size] * (l - prev_l))
72 prev_l = l
/home/a20112128/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in getitem(self, key)
76 return IndexSelect.apply(self, 0, key)
77 # else fall through and raise an error in Index
—> 78 return Index.apply(self, key)
79
80 def setitem(self, key, value):
/home/a20112128/anaconda3/lib/python3.6/site-packages/torch/autograd/_functions/tensor.py in forward(ctx, i, index)
87 result = i.index(ctx.index)
88 else:
—> 89 result = i.index(ctx.index)
90 ctx.mark_shared_storage((i, result))
91 return result
ValueError: result of slicing is an empty tensor
This error is from my encoder module.
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=1, dropout=0.1):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
def forward(self, input):
# Note: we run this all at once (over multiple batches of multiple sequences)
input_seqs, input_lengths, hidden = input
embedded = self.embedding(input_seqs)
packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
outputs, hidden = self.gru(packed, hidden)
outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
padded_output = padded_output[:, :, :self.hidden_size] + padded_output[:, : ,self.hidden_size:] # Sum bidirectional outputs
return padded_output, hidden
# Initialize models
encoder = EncoderRNN(sentence.n_words, hidden_size, n_layers, dropout=dropout)
# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
# Move models to GPU
if USE_CUDA:
encoder = nn.DataParallel(encoder.cuda(), device_ids=[0,1])