Hey everyone!
What is the best way to find probabilities of predictions.
I have custom dataset trained on ‘bert-base-german-cased’. The prediction functions look like this:
def get_predictions(model, data_loader):
model = model.eval()
passage_text = []
predictions = []
prediction_probs = []
real_values = []
with torch.no_grad():
for d in data_loader:
texts = d["passage_text"]
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
probs = F.softmax(outputs, dim=1)
passage_text.extend(texts)
predictions.extend(preds)
prediction_probs.extend(probs)
real_values.extend(targets)
predictions = torch.stack(predictions).cpu()
prediction_probs = torch.stack(prediction_probs).cpu()
real_values = torch.stack(real_values).cpu()
return passage_text, predictions, prediction_probs, real_values
y_passage_text, y_pred, y_pred_probs, y_test = get_predictions(
model,
test_data_loader
)
encoded_custom_entity = tokenizer.encode_plus(
passage_text,
max_length=MAX_LEN,
add_special_tokens=True,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors=‘pt’,
)
input_ids = encoded_custom_entity[‘input_ids’].to(device)
attention_mask = encoded_custom_entity[‘attention_mask’].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f’custom_entity: {passage_text}’)
print(f’entity_id : {class_names[prediction]}’)
How do i find the prediction probability, given a passage?