ValueError: Expected input batch_size (165) to match target batch_size (32)

This is my model architecture.

#defining model
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, output_size, embedding_dim,hidden_size, n_layers, drop_prob=0.5):
        super().__init__()
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_size
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob,
                           batch_first = True)
        self.dropout = nn.Dropout(0.3)
        
        self.fc1=nn.Linear(hidden_dim, 64)
        self.fc2 = nn.Linear(64, 16)
        self.fc3 = nn.Linear(16, output_size)
        self.softmax = nn.Softmax()
    
        
    
    def forward(self, x, hidden):
 
        batch_size=x.size()
        
        #Embadding and LSTM output
        embedd=self.embedding(x)
        
        out, hidden = self.lstm(embedd, hidden)
        
        #dropout and fully connected layers
        out=self.dropout(out)
        out=self.fc1(out)
        out=self.dropout(out)
        out=self.fc2(out)
        out=self.dropout(out)
        out=self.fc3(out)
        return self.softmax(out)
    
    def init_hidden(self, batch_size):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_dim), torch.zeros(self.n_layers, batch_size, self.hidden_dim))

This is my model instantiation.

vocab_size = 10000 # +1 for the 0 padding
output_size = 5
embedding_dim = 100
hidden_dim = 256
n_layers = 2

model = LSTMModel(vocab_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)

This is my training loop.

train_loss = []
train_accuracies= []
val_loss = [] 
val_accuracies = []

for i in range(10):

    y_pred= []
    y_true = []
    print(f'Epoch: _________*****{i}*****_______')
    train_epoch_losses, train_epoch_accuracies = [], []
    val_epoch_accuracies, val_epoch_losses = [], []



    for ix, batch in (enumerate(iter(trn_ldr))):
        x, y = batch
#         print(len(y))
#         print(y)
#         print(x[1])
        batch_size = x.size(0) 
#         print(len(batch[1]))
        h0, c0 =  model.init_hidden(batch_size)
#         print(len(x[0]))
#         print(batch_size)
        h0 = h0.to(device)
        c0 = c0.to(device)
        k =(h0, c0)
#         print(h0.shape, c0.shape)
#         print(x.shape)
        y = torch.tensor(y).to(device)
        # x = x.permute(0, 3,1,2)
        train_epoch_losses.append(train_batch(x, k, y, model, loss_fn, optimizer))
    train_epoch_loss = np.array(train_epoch_losses).mean()
    print(f'Epoch: _________*****{i} Training Loss : {train_epoch_loss} *****_______')



    for ix, batch in (enumerate(iter(trn_ldr))):
        x, y = batch
        batch_size = x[0].size(0)
        
        h0, c0 =  model.init_hidden(batch_size)
        h0 = h0.to(device)
        c0 = c0.to(device)
        k =(h0, c0) 
        
        y = torch.tensor(y).to(device)
        # x = x.permute(0, 3,1,2)
        train_epoch_accuracies.append(sum(accuracy(x[0], y, model)) / len(y))
    train_epoch_accuracy = np.array(train_epoch_accuracies).mean()
    print(f'Epoch: _________*****{i} Training Accuracy: {train_epoch_accuracy} *****_______')


    for ix, batch in (enumerate(iter(test_ldr))):
        h0, c0 =  model.init_hidden(batch_size)

        h0 = h0.to(device)
        c0 = c0.to(device)
        k =(h0, c0)
        x, y = batch
        y = torch.tensor(y).to(device)
        # x = x.permute(0, 3,1,2)
        val_epoch_losses.append(val_loss_trn(x, y, model, loss_fn))
    val_epoch_loss = np.array(val_epoch_losses).mean()
    print(f'Epoch: _________*****{i} Validation Loss : {val_epoch_loss} *****_______')

    for ix, batch in (enumerate(iter(test_ldr))):
        h0, c0 =  model.init_hidden(batch_size)

        h0 = h0.to(device)
        c0 = c0.to(device)
        k =(h0, c0)
        x, y = batch
        y_true.append(y)
        y = torch.tensor(y).to(device)
        test_acc_return  = accuracy(x, y, model, test=True)[0].tolist()
        y_pred.append(accuracy(x, y, model, test=True)[1])
        val_epoch_accuracies.append(sum(test_acc_return) / len(y))
    val_epoch_accuracy = np.array(val_epoch_accuracies).mean()
    print(f'Epoch: _________*****{i} Validation Accuracy. {val_epoch_accuracy} *****_______')
    print('\n')

    train_loss.append(train_epoch_loss)
    train_accuracies.append(train_epoch_accuracy)
    val_accuracies.append(val_epoch_accuracy)
    val_loss.append(val_epoch_loss)

And this is my error.

ValueError                                Traceback (most recent call last)
Cell In [251], line 33
     31     y = torch.tensor(y).to(device)
     32     # x = x.permute(0, 3,1,2)
---> 33     train_epoch_losses.append(train_batch(x, k, y, model, loss_fn, optimizer))
     34 train_epoch_loss = np.array(train_epoch_losses).mean()
     35 print(f'Epoch: _________*****{i} Training Loss : {train_epoch_loss} *****_______')

Cell In [217], line 5, in train_batch(x, k, y, model, loss_fn, opt)
      3 prediction = model(x, k)
      4 y = torch.tensor(y)
----> 5 batch_loss = loss_fn(torch.tensor(prediction, requires_grad=True), y)
      6 batch_loss.backward()
      7 optimizer.step()

File ~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/loss.py:1174, in CrossEntropyLoss.forward(self, input, target)
   1173 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1174     return F.cross_entropy(input, target, weight=self.weight,
   1175                            ignore_index=self.ignore_index, reduction=self.reduction,
   1176                            label_smoothing=self.label_smoothing)

File ~/Library/Python/3.9/lib/python/site-packages/torch/nn/functional.py:3029, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
   3027 if size_average is not None or reduce is not None:
   3028     reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3029 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)

ValueError: Expected input batch_size (165) to match target batch_size (32).

Can anyone help me to solve this issue. I am new to NLP and torchtext and pytorch.

Since you’re not mentioning what you’re trying to do and how your data actually looks like, I assume you’re trying to build a classifier. In this case, note that after

out, hidden = self.lstm(embedd, hidden)

out has shape of (batch_size, seq_len, hidden_dim) -- that is, out` contains the hidden states for all time steps. Again, I assume you only want to use the last hidden state, in which case you would need a

out = out[:,-1,:]

to get that last hidden state. The out has shape of (batch_size, hidden_dim) which is arguably what you want. The alternative is to use hidden[0] which contains only the last hidden states but for each layer. More specifically, hidden[0] will have a shape of (num_layers, batch_size, hidden_dim), so here you need a

h = hidden[0][-1]

To get the same tensor as above. nn.LSTM returns the hidden and cell state as tuples, hence the hidden[0]

Note that his only works as long as bidirectional=False (default), otherwise correctly handling out and hidden[0] is a bit trickier,