Missmatching target size in criterion

I’m trying to use pytorch in for the IMBD dataset, to predict the positive and negative reviews. When I get to the training state, the following error is given by the criterion function:

ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([1136, 64, 1]))

After some research, I saw that the error is because the output of the model is returning a tensor of size [1136, 64, 1], and criterion is expecting only batch results.

Howerver, I don’t know how to solve this error.

My code:

import torch
import spacy
import torch.nn as nn
from torchtext.legacy import data
import sys
import csv
import torch.optim as optim
import re
import nltk
from nltk.corpus import stopwords
from torchtext import vocab
from torchtext.legacy.data import Field
from torchtext.legacy import datasets
import pandas as pd
import re  
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import random

SEED = 1234

torch.manual_seed(SEED) # For reproducibility
torch.backends.cudnn.deterministic = True

import torch.nn.functional as F
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.hidden_fc = nn.Linear(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text):
        #text = [sent len, batch size]
        embedded = self.embedding(text)
        
        h_1 = F.relu(self.hidden_fc(embedded))
        
        # assert torch.equal(output[-1,:,:], h_1.squeeze(0))
                # [batch size, output dim]
        return self.fc(h_1.squeeze(0))


def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds)) # 0.75 --> 1 0.4 --> 0
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train() #Train mode is on
    
    for batch in iterator:
        
        optimizer.zero_grad() #Reset the gradients
        predictions = model(batch.text) ## forward propagation
        print(predictions.shape)
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        loss.backward() ## backward propagation / calculate gradients
        optimizer.step() ## update parameters
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval() #Evaluation mode is on
    
    with torch.no_grad():

        for batch in iterator:
            predictions = model(batch.text).squeeze(1) 
            loss = criterion(predictions, batch.label)
            acc = binary_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)



TEXT = data.Field(tokenize = 'spacy', 
                  tokenizer_language = 'en_core_web_sm',
                  lower = True)
                  

LABEL = data.LabelField(dtype = torch.float)

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)  ## IMDB reviews dataset
train_data, valid_data = train_data.split(random_state = random.seed(SEED)) 

MAX_VOCAB_SIZE = 25_000 

TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE) #Build the vocabulary using the top frequent 25K words
LABEL.build_vocab(train_data)
BATCH_SIZE = 64
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE)


INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = MLP(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()


N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')

Hello, based on BCEWITHLOGITSLOSS from Pytorch. Your target shape should be torch.Size([64, 1]) , while the output of the model should be torch.Size([64, 1]). so output_dim should be = 1
This means each of 64 of the model’s output is a score of positive review.
So make sure you pass the proper dimension to the model in order to get an output that has a shape of [64,1]

The thing is that I don’t know how to reshape the output of the model to be [64, 1], nor why the output of the model is [1136, 64, 1]

hi, could it be that you are trying to calculate the loss for all batches at once? maybe try calculating the loss for each batch and then take the average

don’t think that’s the problem here