Facing an issue while using batching in LSTM

Please see the below code and guide me as I am new to deep learning and PyTorch.
The code runs fine if I use all the dataset at a time but throws an error while using batches of 32.
class LSTMTagger(nn.Module):

def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
    super(LSTMTagger, self).__init__()
    self.hidden_dim = hidden_dim

    self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

    # The LSTM takes word embeddings as inputs, and outputs hidden states
    # with dimensionality hidden_dim.
    self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first = True)

    # The linear layer that maps from hidden state space to tag space
    self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

def forward(self, sentence):
    embeds = self.word_embeddings(sentence)
    lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
    tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
    tag_scores = F.log_softmax(tag_space, dim=1)
    return tag_scores

EMBEDDING_DIM = 32
HIDDEN_DIM = 32
EPOCHS = 10

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(EPOCHS):
for sentence, tags in train_loader:
model.zero_grad()

    tag_scores = model(sentence)
    
    loss = loss_function(tag_scores, tags)
    loss.backward()
    optimizer.step()

Error:

RuntimeError Traceback (most recent call last)
in
18 model.zero_grad()
19
—> 20 tag_scores = model(sentence)
21
22 loss = loss_function(tag_scores, tags)

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

in forward(self, sentence)
16 def forward(self, sentence):
17 embeds = self.word_embeddings(sentence)
—> 18 lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
19 tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
20 tag_scores = F.log_softmax(tag_space, dim=1)

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
565 hx = self.permute_hidden(hx, sorted_indices)
566
–> 567 self.check_forward_args(input, hx, batch_sizes)
568 if batch_sizes is None:
569 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\rnn.py in check_forward_args(self, input, hidden, batch_sizes)
517 def check_forward_args(self, input, hidden, batch_sizes):
518 # type: (Tensor, Tuple[Tensor, Tensor], Optional[Tensor]) -> None
–> 519 self.check_input(input, batch_sizes)
520 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
521

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\rnn.py in check_input(self, input, batch_sizes)
168 raise RuntimeError(
169 ‘input.size(-1) must be equal to input_size. Expected {}, got {}’.format(
–> 170 self.input_size, input.size(-1)))
171
172 def get_expected_hidden_size(self, input, batch_sizes):

RuntimeError: input.size(-1) must be equal to input_size. Expected 32, got 9952

Hy @samsumankumar. Have appended batch_size while passing input to the model.
Just check if by printing the shape of the batch

I see several issues:

  • You create your nn.LSTM with batch_first=True, but then you want to reshape your input so that the sequence length is your first dimension: embeds.view(len(sentence), 1, -1)

  • Your view() commands might be a problem anyway; please see this post of mine. The following change should actually work:

    lstm_out, _ = self.lstm(embeds)

class LSTMTagger(nn.Module):

def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
    super(LSTMTagger, self).__init__()
    self.hidden_dim = hidden_dim

    self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

    # The LSTM takes word embeddings as inputs, and outputs hidden states
    # with dimensionality hidden_dim.
    self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first = True)

    # The linear layer that maps from hidden state space to tag space
    self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

def forward(self, sentence):
    batch_size = sentence.size(0)
    sentence = sentence.long()
    embeds = self.word_embeddings(sentence)
    lstm_out, _ = self.lstm(embeds)
    lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
    tag_space = self.hidden2tag(lstm_out)
    tag_scores = F.log_softmax(tag_space, dim=1)
    return tag_scores

EMBEDDING_DIM = 32
HIDDEN_DIM = 32
EPOCHS = 10

#len(word_to_ix) = vocab_size, len(tag_to_ix) = tagset_size
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(EPOCHS):
for sentence, tags in train_loader:
model.zero_grad()

    tag_scores = model(sentence)
    loss = loss_function(tag_scores, tags.flatten())
    loss.backward()
    optimizer.step()

The above code works now. Please look into it and tell me is there any fault?