Hi, I need help with this value error when running PyTorch for a binary classification problem.
I’m using a kaggle dataset, News Category Dataset | Kaggle.
and am implementing the code from here (which is for a multi-class classfication: Google Colab)
# Importing the libraries needed
import pandas as pd
import numpy as np
import pickle
import json
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertModel, DistilBertTokenizer
# load labels
with open('.../News_Category_Dataset_v3.json','r') as f:
jdata = f.read()
jdata2 = [json.loads(line) for line in jdata.split('\n') if line]
df = pd.DataFrame.from_records(jdata2)
df["category"] = np.where(df["category"] == "CRIME", 1, 0) # binomial classification
y = df['category'].astype(int)
X = df['short_description']
# Defining some key variables that will be used later on in the training
MAX_LEN = 512
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 2
EPOCHS = 5
LEARNING_RATE = 1e-05
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
#class Triage(Dataset):
def __init__(self, dataframe, tokenizer, max_len):
self.len = len(dataframe)
self.data = dataframe
self.tokenizer = tokenizer
self.max_len = max_len
def __getitem__(self, index):
title = str(self.data.short_description[index])
title = " ".join(title.split())
inputs = self.tokenizer.encode_plus(
title,
None,
add_special_tokens=True,
max_length=self.max_len,
padding='max_length',
return_token_type_ids=True,
truncation=True
)
ids = inputs['input_ids']
mask = inputs['attention_mask']
return {
'ids': torch.tensor(ids, dtype=torch.long),
'mask': torch.tensor(mask, dtype=torch.long),
'targets': torch.tensor(self.data.category[index], dtype=torch.long)
}
def __len__(self):
return self.len
# Creating the dataset and dataloader for the neural network
train_size = 0.8
train_dataset=df.sample(frac=train_size,random_state=200)
test_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)
print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))
training_set = Triage(train_dataset, tokenizer, MAX_LEN)
testing_set = Triage(test_dataset, tokenizer, MAX_LEN)
# Creating the dataset and dataloader for the neural network
train_size = 0.8
train_dataset=df.sample(frac=train_size,random_state=200)
test_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)
print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))
training_set = Triage(train_dataset, tokenizer, MAX_LEN)
testing_set = Triage(test_dataset, tokenizer, MAX_LEN)
# Creating the customized model
class DistillBERTClass(torch.nn.Module):
def __init__(self):
super(DistillBERTClass, self).__init__()
self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(0.3)
self.classifier = torch.nn.Linear(768, 4)
def forward(self, input_ids, attention_mask):
output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
hidden_state = output_1[0]
pooler = hidden_state[:, 0]
pooler = self.pre_classifier(pooler)
pooler = torch.nn.ReLU()(pooler)
pooler = self.dropout(pooler)
output = self.classifier(pooler)
return output
model = DistillBERTClass()
model.to(device)
# Creating the loss function and optimizer
loss_function = torch.nn.BCELoss()
optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)
# Function to calcuate the accuracy of the model
def calculate_accu(big_idx, targets):
n_correct = (big_idx==targets).sum().item()
return n_correct
# Defining the training function on the 80% of the dataset for tuning the distilbert model
def train(epoch):
tr_loss = 0
n_correct = 0
nb_tr_steps = 0
nb_tr_examples = 0
model.train()
for _,data in enumerate(training_loader, 0):
ids = data['ids'].to(device, dtype = torch.long)
mask = data['mask'].to(device, dtype = torch.long)
targets = data['targets'].to(device, dtype = torch.long)
outputs = model(ids, mask)
loss = loss_function(outputs, targets)
tr_loss += loss.item()
big_val, big_idx = torch.max(outputs.data, dim=1)
n_correct += calcuate_accu(big_idx, targets)
nb_tr_steps += 1
nb_tr_examples+=targets.size(0)
if _%5000==0:
loss_step = tr_loss/nb_tr_steps
accu_step = (n_correct*100)/nb_tr_examples
print(f"Training Loss per 5000 steps: {loss_step}")
print(f"Training Accuracy per 5000 steps: {accu_step}")
optimizer.zero_grad()
loss.backward()
# # When using GPU
optimizer.step()
print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
epoch_loss = tr_loss/nb_tr_steps
epoch_accu = (n_correct*100)/nb_tr_examples
print(f"Training Loss Epoch: {epoch_loss}")
print(f"Training Accuracy Epoch: {epoch_accu}")
return
for epoch in range(EPOCHS):
train(epoch)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [36], in <cell line: 1>()
1 for epoch in range(EPOCHS):
----> 2 train(epoch)
Input In [35], in train(epoch)
12 targets = data['targets'].to(device, dtype = torch.long)
14 outputs = model(ids, mask)
---> 15 loss = loss_function(outputs, targets)
16 tr_loss += loss.item()
17 big_val, big_idx = torch.max(outputs.data, dim=1)
.....
File ~\anaconda3\lib\site-packages\torch\nn\functional.py:3089, in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
3087 reduction_enum = _Reduction.get_enum(reduction)
3088 if target.size() != input.size():
-> 3089 raise ValueError(
3090 "Using a target size ({}) that is different to the input size ({}) is deprecated. "
3091 "Please ensure they have the same size.".format(target.size(), input.size())
3092 )
3094 if weight is not None:
3095 new_size = _infer_size(target.size(), weight.size())
ValueError: Using a target size (torch.Size([4])) that is different to the input size (torch.Size([4, 4])) is deprecated. Please ensure they have the same size.
Can someone help me to understand this error please?
In addition, if someone could direct me to a pytorch tutorial as I’d also like to know how to get the precision/recall metrics in pytorch, I would be really grateful…