Hi everyone. I’m using a GRU encoder and DataParallel. I already did repadding my sequence to the total length of the input, and it worked for most of my data, but the exception still happens at the last batch which has a different batch size from other batches.
Here is the code of encoder:
`
class BatchEncoderRNN(nn.Module):
def init(self, input_size, hidden_size, num_layers=1, dropout=0.1):
super(BatchEncoderRNN, self).init()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, num_layers, dropout=self.dropout, batch_first=True)
def forward(self, input_seqs, input_lengths, total_length, hidden=None):
# print("inp", input_seqs.size())
batch_size = input_seqs.size()[0]
print("inside batch encoder")
print("input_seq len", input_seqs.size()[1])
print("batch size", batch_size)
hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=DEVICE)
embedded = self.embedding(input_seqs)
# print("embedded", embedded)
# print("emb size", embedded.size())
packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths, batch_first=True)
try:
print("packed", packed.data.size())
except:
print("packed", packed)
# print("hidden", hidden.size())
self.gru.flatten_parameters()
output, hidden = self.gru(packed, hidden)
print("output", output.data.size())
output, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True, total_length=total_length) # unpack (back to padded)
return output, hidden
`
Here is the code calling the encoder:
encoder_outputs, encoder_hidden = self.encoder(batch['inputs'].to(device=DEVICE), batch['input_lens'], batch['inputs'].size()[1])
batch is drawn from a dataloader.
And here is the error:
Traceback (most recent call last): File "/mnt/nfs/work1/miyyer/wyou/rnn_nmt_syntax/main.py", line 108, in <module> main() File "/mnt/nfs/work1/miyyer/wyou/rnn_nmt_syntax/main.py", line 95, in main trainer.train(args.train_size) File "/mnt/nfs/work1/miyyer/wyou/rnn_nmt_syntax/actions/train.py", line 185, in train self.train_epoch(epoch, train_size) File "/mnt/nfs/work1/miyyer/wyou/rnn_nmt_syntax/actions/train.py", line 141, in train_epoch loss = self.train_batch3(batch) File "/mnt/nfs/work1/miyyer/wyou/rnn_nmt_syntax/actions/train.py", line 64, in train_batch3 encoder_outputs, encoder_hidden = self.encoder(batch['inputs'].to(device=DEVICE), batch['input_lens'], batch['inputs'].size()[1]) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__ result = self.forward(*input, **kwargs) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 144, in forward return self.gather(outputs, self.output_device) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 156, in gather return gather(outputs, output_device, dim=self.dim) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 67, in gather return gather_map(outputs) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 62, in gather_map return type(out)(map(gather_map, zip(*outputs))) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 54, in gather_map return Gather.apply(target_device, dim, *outputs) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/nn/parallel/_functions.py", line 68, in forward return comm.gather(inputs, ctx.dim, ctx.target_device) File "/mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/cuda/comm.py", line 166, in gather return torch._C._gather(tensors, dim, destination) RuntimeError: Gather got an input of invalid size: got [1, 4, 512], but expected [1, 6, 512] (gather at /pytorch/torch/csrc/cuda/comm.cpp:239) frame #0: std::function<std::string ()>::operator()() const + 0x11 (0x2aab29483021 in /mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/lib/libc10.so) frame #1: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x2a (0x2aab294828ea in /mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/lib/libc10.so) frame #2: torch::cuda::gather(c10::ArrayRef<at::Tensor>, long, c10::optional<int>) + 0x4e8 (0x2aaaf0ccdea8 in /mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/lib/libtorch_python.so) frame #3: <unknown function> + 0x4eb74c (0x2aaaf0cd074c in /mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/lib/libtorch_python.so) frame #4: <unknown function> + 0x11642e (0x2aaaf08fb42e in /mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/lib/libtorch_python.so) <omitting python frames> frame #15: THPFunction_apply(_object*, _object*) + 0x581 (0x2aaaf0af7ab1 in /mnt/nfs/work1/miyyer/wyou/py36/lib/python3.6/site-packages/torch/lib/libtorch_python.so)