This is my first time working on a multilabel text classification with Bert, and the model I used is BertForSequenceClassification
for epoch in tqdm(range(1, epochs+1)):
model.train()
loss_train_total = 0
progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
for batch in progress_bar:
model.zero_grad()
batch = tuple(b.to(device) for b in batch)
# print(batch[0].size())
# print(batch[1].size())
# print(batch[2].size())
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'labels': batch[2],
}
outputs = model(**inputs)
loss = outputs[0]
loss_train_total += loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
torch.save(model.state_dict(), f'data_volume/finetuned_BERT_epoch_{epoch}.model')
tqdm.write(f'\nEpoch {epoch}')
loss_train_avg = loss_train_total/len(dataloader_train)
tqdm.write(f'Training loss: {loss_train_avg}')
val_loss, predictions, true_vals = evaluate(dataloader_validation)
val_f1 = f1_score_func(predictions, true_vals)
tqdm.write(f'Validation loss: {val_loss}')
tqdm.write(f'F1 Score (Weighted): {val_f1}')
But this results in the error about batch size for the line: “outputs = model(**inputs)”
When printing out the dimension
print(batch[0].size())
print(batch[1].size())
print(batch[2].size())
I got
torch.Size([3, 133])
torch.Size([3, 133])
torch.Size([3, 54])
where 54 is the number of classes I am having.
I saw some solution about how the target batch_size could be platten the tensor. The number 162 is coming from 3*54.
I also saw the solution “Use target = torch.argmax(target, dim=1) to create the expected target tensor.” but I am not sure how to do this and where to do this in my code.
Thank you in advance.