My code for batching is:
train_data = torch.utils.data.TensorDataset(seq_tensor.cuda(),target.data.cuda())
trainloader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)
I am using the inputs(seq_tensor) and targets(target) as LongTensors (1-D of size 13k each), since the data first gets passed into the embedding layer.
model = model.LSTM1(args,embeddings, num_layers).cuda()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=1e-5)
for epoch in range(1000):
for i, data in enumerate(trainloader,0):
seq, target = data
seq_var, target = autograd.Variable(seq.cuda()), autograd.Variable(target.cuda())
optimizer.zero_grad()
model.hidden=model.init_hidden()
score=model(seq_var)
loss = criterion(score,target)
loss.backward()
optimizer.step()
The init_hidden function for lstm:
def init_hidden(self):
# The axes semantics are (num_layers, minibatch_size, hidden_dim)
return (autograd.Variable(torch.zeros(self.num_layers,100, self.hidden_dim).cuda()),
autograd.Variable(torch.zeros(self.num_layers,100, self.hidden_dim).cuda()))
def forward(self,sentence):
embeds = self.embed(sentence)
lstm_output, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
tag_space = self.hidden2tag(lstm_output.view(len(sentence), -1))
scores = F.log_softmax(tag_space)
return scores
I have passed the minibatch size in the init_hidden function as 100, but it throws the error:
File “/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py”, line 224, in call
result = self.forward(*input, **kwargs)
File “/home/kuntadey/acad/ritvik/lstm_simple_paraphrase/model2.py”, line 33, in forward
lstm_output, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
File “/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py”, line 224, in call
result = self.forward(*input, **kwargs)
File “/usr/local/lib/python2.7/dist-packages/torch/nn/modules/rnn.py”, line 162, in forward
output, hidden = func(input, self.all_weights, hx)
File “/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/rnn.py”, line 351, in forward
return func(input, *fargs, **fkwargs)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/function.py”, line 284, in _do_forward
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/function.py”, line 306, in forward
result = self.forward_extended(*nested_tensors)
File “/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/rnn.py”, line 293, in forward_extended
cudnn.rnn.forward(self, input, hx, weight, output, hy)
File “/usr/local/lib/python2.7/dist-packages/torch/backends/cudnn/rnn.py”, line 266, in forward
hidden_size, tuple(hx.size())))
RuntimeError: Expected hidden size (10, 1L, 300), got (10L, 100L, 300L)
It works on passing the minibatch size as 1, inside init_hidden.
Is there a dimension detail while making the batches that I am missing out on?