I posted this issue about 5 months ago, now I’m back at this project and was eagerly awaiting for 0.3 to come out but the issue remains.
This code works with a single GPU, fails with more than one (specifically 8):
import torch
import torch.nn as nn
from torch.autograd import Variable
class StepRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_layers): #
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.num_layers = num_layers
self.encoder = nn.Embedding(input_size, hidden_size)
self.rnn = nn.LSTM(input_size=hidden_size, \
hidden_size=hidden_size,\
num_layers=num_layers)
self.decoder = nn.Linear(hidden_size, output_size)
def forward(self, input, hidden):
batch_size = input.size(0)
encoded = self.encoder(input)
output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
output = self.decoder(output.view(batch_size, -1))
return output, hidden
def init_hidden(self, batch_size):
return (Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).cuda(),
Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size).cuda()))
decoder = StepRNN(
input_size=100,
hidden_size=8,
output_size=100,
num_layers=1)
decoder_dist = nn.DataParallel(decoder, device_ids=[0,1,2,3,4,5,6,7], dim=0)
decoder_dist.cuda()
batch_size = 16
hidden = decoder.init_hidden(batch_size)
input_ = Variable(torch.LongTensor(batch_size, 10)).cuda()
target = Variable(torch.LongTensor(batch_size, 10)).cuda()
for c in range(10):
decoder_dist(input_[:,c].contiguous(), hidden)
RuntimeError: Expected hidden size (1, 2, 8), got (1, 16, 8)
pytorch 0.3
CUDA 9
cuDNN 7
It obviously has something to do with batch_first being set to false (as per defaults). Is this a bug? I’ll be happy to file it unless I’m missing something.