# Import the modern Greek BERT
tokenizer = AutoTokenizer.from_pretrained("nlpaueb/bert-base-greek-uncased-v1")
bert = AutoModel.from_pretrained("nlpaueb/bert-base-greek-uncased-v1")
Then I am using the code from the previous link to train and evaluate the model
# set initial loss to infinite
best_valid_loss = float('inf')
epochs = 5
# empty lists to store training and validation loss of each epoch
train_losses = []
valid_losses = []
#for each epoch
for epoch in tqdm.tqdm(range(epochs)):
print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
#train model
train_loss, _ = train()
#evaluate model
valid_loss, _ = evaluate()
#save the best model
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'saved_weights.pt')
# append training and validation loss
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'\nTraining Loss: {train_loss:.3f}')
print(f'Validation Loss: {valid_loss:.3f}')
and I have the following error
RuntimeError: weight tensor should be defined either for all 2 classes or no classes but got weight tensor of shape: [5]
Based on the error message it seems that a weighted loss is used. While logits (or log probabilities) for 2 classes are apparently given as the output of the model (could you confirm this?), the weight tensor contains 5 values (i.e. for 5 classes).
It’s unclear how the criterion is created as it seems to be hidden in the train() method so you should make sure to define weights for the valid two classes only.
I have 6 classes, this is the following code for cross entropy and train function
# compute the class weights
class_weights = compute_class_weight(class_weight='balanced',
classes=np.unique(authors_train), y=authors_train)
# converting list of class weights to a tensor
weights = torch.tensor(class_weights, dtype=torch.float)
# push to GPU
weights = weights.to(device)
# define the loss function
cross_entropy = nn.NLLLoss(weight=weights, size_average=None, reduce=None)
----------------------------------------------------------------------------------------------------------------
# function to train the model
def train():
model.train()
total_loss, total_accuracy = 0, 0
# empty list to save model predictions
total_preds = []
# iterate over batches
for step,batch in enumerate(train_dataloader):
# progress update after every 50 batches.
if step % 50 == 0 and not step == 0:
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
# push the batch to gpu
batch = [r.to(device) for r in batch]
sent_id, mask, labels = batch
# clear previously calculated gradients
model.zero_grad()
# get model predictions for the current batch
preds = model(sent_id, mask)
# compute the loss between actual and predicted values
loss = cross_entropy(preds, labels)
# add on to the total loss
total_loss = total_loss + loss.item()
# backward pass to calculate the gradients
loss.backward()
# clip the gradients to 1.0. It helps in preventing the exploding gradient problem
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# update parameters
optimizer.step()
# model predictions are stored on GPU. So, push it to CPU
preds = preds.detach().cpu().numpy()
# append the model predictions
total_preds.append(preds)
# compute the training loss of the epoch
avg_loss = total_loss / len(train_dataloader)
# predictions are in the form of (no. of batches, size of batch, no. of classes).
# reshape the predictions in form of (number of samples, no. of classes)
total_preds = np.concatenate(total_preds, axis=0)
# returns the loss and predictions
return avg_loss, total_preds
So my output should be 6, since we have 6 classes.
Yes, this should be the case, but did you also verify it by checking the shape?
Based on the error message, this doesn’t seem to be the case as seen here:
criterion = nn.CrossEntropyLoss(weight=torch.randn(6))
x = torch.randn(1, 2)
target = torch.randint(0, 6, (1,))
out = criterion(x, target)
> RuntimeError: weight tensor should be defined either for all 2 classes or no classes but got weight tensor of shape: [6]