I am having problems calculating the training accuracy of my model. I extracted the logits and applied softmax onto it before calculating the accuracy. The accuracy is increasing but the numbers are, 1182.91, 2409.33, 3661.98, 4927.61, 6197.20 for 5 epochs. The function for measuring the accuracy is as follows :
def flat_accuracy(preds, labels):
pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
And the training epoch code is as follows:
loss_values = []
train_acc = 0
# For each epoch...
for epoch_i in range(0, epochs):
print("")
print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
print('Training...')
# Measure how long the training epoch takes.
t0 = time.time()
# Reset the total loss for this epoch.
total_loss = 0
model.train()
#correct_prediction= 0
history = defaultdict(list)
for step, batch in enumerate(train_dataloader):
# Progress update every 100 batches.
if step % 50 == 0 and not step == 0:
# Calculate elapsed time in minutes.
elapsed = format_time(time.time() - t0)
print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
# `batch` contains three pytorch tensors: [0]: input ids ,[1]: attention masks,[2]: labels
b_input_ids = batch[0].to(device)
b_input_mask = batch[1].to(device)
b_labels = batch[2].to(device)
# Clear any previously calculated gradients.
# (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
model.zero_grad()
# Evaluate the model on this training batch.
outputs = model(b_input_ids,
token_type_ids=None,
attention_mask=b_input_mask,
labels=b_labels)
loss = outputs[0]
#logits = outputs[1]
#_, preds = torch.max(outputs, dim=1)
# Accumulate the training loss over all of the batches
total_loss += loss.item()
#correct_predictions += torch.sum(preds == b_labels)
loss.backward()
# Clip the norm of the gradients to 1.0. to prevent the "exploding gradients" problem.
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
# Update the learning rate.
scheduler.step()
logits = outputs.logits
logits = F.softmax(logits, dim= -1)
#logits= torch.nn.Softmax(logits)
logits = logits.detach().cpu().numpy()
labels = b_labels.to('cpu').numpy()
tmp_train_accuracy = flat_accuracy(logits, labels)
train_acc += tmp_train_accuracy
# Calculate the average loss over the training data.
avg_train_loss = total_loss / len(train_dataloader)
# train_acc= correct_predictions.double() / len(train_dataloader)
history['train_loss'].append(loss.item())
history['train_acc'].append(train_acc.item())
# Store the loss value for plotting the learning curve.
#loss_values.append(avg_train_loss)
print("")
print("Training accuracy: {}".format(train_acc))
print(" Average training loss:{0:.2f}".format(avg_train_loss))
print(" Training epoch took: {:}".format(format_time(time.time() - t0)))
# plt.ylim([0, 1]);
print("")
print("Training complete!")
Is there any mistakes in my logit calculation or in the accuracy function? Any thoughts?