Dear @all,
I’m trying to calculate F1 score for NMT task. I found some good resources online:
pytorch-lightning
scikit-learn metrics
ignite.metrics
I have used ignite.metrics
as follow:
def eph_precision(output, trg):
output = torch.tensor(output) #predicted
trg = torch.tensor(trg) #output
precision = Precision(output_transform=thresholded_output_transform,average=True)
precision.update((output, trg))
epoch_precision = precision.compute()
return epoch_precision
def eph_recall(output, trg):
output = torch.tensor(output) #predicted
trg = torch.tensor(trg) #output
recall = Recall(output_transform=thresholded_output_transform,average=True)
recall.update((output, trg))
epoch_recall = recall.compute()
return epoch_recall
def evaluate(model, iterator, criterion):
model.eval()
epoch_loss = 0
epoch_acc = 0
epoch_precision = 0
epoch_recall = 0
with torch.no_grad():
for i, batch in enumerate(iterator):
src = batch.src
trg = batch.trg
output, _ = model(src, trg[:,:-1])
#output, _ = model(src, trg[:,:-1])
output = output.contiguous().view(-1, output.shape[-1])
trg = trg[:,1:].contiguous().view(-1)
loss = criterion(output, trg)
precision = eph_precision(output, trg)
recall = eph_recall(output, trg)
epoch_loss += loss.item()
epoch_precision += precision
epoch_recall += recall
return epoch_loss / len(iterator), epoch_precision / len(iterator), epoch_recall / len(iterator)
Print epoch output :
valid_loss, valid_prce, valid_recall = evaluate(model, vali_iterator, criterion)
vali_f1score = valid_prce * valid_recall * 2 / (valid_prce+ valid_recall + 1e-20)
I got an unexpected F1 score 0.1245
compare to BLEU score = 49.31
.
Any suggestions to fix this issue, also what is the optimal way to apply the F1 score for NMT task?
Kind regards,