I’m trying to use pytorch in for the IMBD dataset, to predict the positive and negative reviews. When I get to the training state, the following error is given by the criterion
function:
ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([1136, 64, 1]))
After some research, I saw that the error is because the output of the model is returning a tensor of size [1136, 64, 1]
, and criterion
is expecting only batch
results.
Howerver, I don’t know how to solve this error.
My code:
import torch
import spacy
import torch.nn as nn
from torchtext.legacy import data
import sys
import csv
import torch.optim as optim
import re
import nltk
from nltk.corpus import stopwords
from torchtext import vocab
from torchtext.legacy.data import Field
from torchtext.legacy import datasets
import pandas as pd
import re
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import random
SEED = 1234
torch.manual_seed(SEED) # For reproducibility
torch.backends.cudnn.deterministic = True
import torch.nn.functional as F
import torch.nn as nn
class MLP(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.hidden_fc = nn.Linear(embedding_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, text):
#text = [sent len, batch size]
embedded = self.embedding(text)
h_1 = F.relu(self.hidden_fc(embedded))
# assert torch.equal(output[-1,:,:], h_1.squeeze(0))
# [batch size, output dim]
return self.fc(h_1.squeeze(0))
def binary_accuracy(preds, y):
"""
Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
"""
#round predictions to the closest integer
rounded_preds = torch.round(torch.sigmoid(preds)) # 0.75 --> 1 0.4 --> 0
correct = (rounded_preds == y).float() #convert into float for division
acc = correct.sum() / len(correct)
return acc
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train() #Train mode is on
for batch in iterator:
optimizer.zero_grad() #Reset the gradients
predictions = model(batch.text) ## forward propagation
print(predictions.shape)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
loss.backward() ## backward propagation / calculate gradients
optimizer.step() ## update parameters
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval() #Evaluation mode is on
with torch.no_grad():
for batch in iterator:
predictions = model(batch.text).squeeze(1)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
TEXT = data.Field(tokenize = 'spacy',
tokenizer_language = 'en_core_web_sm',
lower = True)
LABEL = data.LabelField(dtype = torch.float)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL) ## IMDB reviews dataset
train_data, valid_data = train_data.split(random_state = random.seed(SEED))
MAX_VOCAB_SIZE = 25_000
TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE) #Build the vocabulary using the top frequent 25K words
LABEL.build_vocab(train_data)
BATCH_SIZE = 64
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
(train_data, valid_data, test_data),
batch_size = BATCH_SIZE)
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model = MLP(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')