In those two scripts after the inference it calculates the Top5/1-accuracy
the first one performs the inference
import time
import json
from collections import defaultdict
import torch
import torch.nn.functional as F
from utils import AverageMeter
def get_video_results(outputs, class_names, output_topk):
sorted_scores, locs = torch.topk(outputs,
k=min(output_topk, len(class_names)))
video_results = []
for i in range(sorted_scores.size(0)):
video_results.append({
'label': class_names[locs[i].item()],
'score': sorted_scores[i].item()
})
return video_results
def inference(data_loader, model, result_path, class_names, no_average,
output_topk):
print('inference')
model.eval()
batch_time = AverageMeter()
data_time = AverageMeter()
results = {'results': defaultdict(list)}
end_time = time.time()
with torch.no_grad():
for i, (inputs, targets) in enumerate(data_loader):
data_time.update(time.time() - end_time)
video_ids, segments = zip(*targets)
outputs = model(inputs)
outputs = F.softmax(outputs, dim=1).cpu()
for j in range(outputs.size(0)):
results['results'][video_ids[j]].append({
'segment': segments[j],
'output': outputs[j]
})
batch_time.update(time.time() - end_time)
end_time = time.time()
print('[{}/{}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
i + 1,
len(data_loader),
batch_time=batch_time,
data_time=data_time))
inference_results = {'results': {}}
if not no_average:
for video_id, video_results in results['results'].items():
video_outputs = [
segment_result['output'] for segment_result in video_results
]
video_outputs = torch.stack(video_outputs)
average_scores = torch.mean(video_outputs, dim=0)
inference_results['results'][video_id] = get_video_results(
average_scores, class_names, output_topk)
else:
for video_id, video_results in results['results'].items():
inference_results['results'][video_id] = []
for segment_result in video_results:
segment = segment_result['segment']
result = get_video_results(segment_result['output'],
class_names, output_topk)
inference_results['results'][video_id].append({
'segment': segment,
'result': result
})
with result_path.open('w') as f:
json.dump(inference_results, f)
the second one evaluate the accuracy based on the result given by the first one
import json
import argparse
from pathlib import Path
def get_class_labels(data):
class_labels_map = {}
index = 0
for class_label in data['labels']:
class_labels_map[class_label] = index
index += 1
return class_labels_map
def load_ground_truth(ground_truth_path, subset):
with ground_truth_path.open('r') as f:
data = json.load(f)
class_labels_map = get_class_labels(data)
ground_truth = []
for video_id, v in data['database'].items():
if subset != v['subset']:
continue
this_label = v['annotations']['label']
ground_truth.append((video_id, class_labels_map[this_label]))
return ground_truth, class_labels_map
def load_result(result_path, top_k, class_labels_map):
with result_path.open('r') as f:
data = json.load(f)
result = {}
for video_id, v in data['results'].items():
labels_and_scores = []
for this_result in v:
label = class_labels_map[this_result['label']]
score = this_result['score']
labels_and_scores.append((label, score))
labels_and_scores.sort(key=lambda x: x[1], reverse=True)
result[video_id] = list(zip(*labels_and_scores[:top_k]))[0]
return result
def remove_nonexistent_ground_truth(ground_truth, result):
exist_ground_truth = [line for line in ground_truth if line[0] in result]
return exist_ground_truth
def evaluate(ground_truth_path, result_path, subset, top_k, ignore):
print('load ground truth')
ground_truth, class_labels_map = load_ground_truth(ground_truth_path,
subset)
print('number of ground truth: {}'.format(len(ground_truth)))
print('load result')
result = load_result(result_path, top_k, class_labels_map)
print('number of result: {}'.format(len(result)))
n_ground_truth = len(ground_truth)
ground_truth = remove_nonexistent_ground_truth(ground_truth, result)
if ignore:
n_ground_truth = len(ground_truth)
print('calculate top-{} accuracy'.format(top_k))
correct = [1 if line[1] in result[line[0]] else 0 for line in ground_truth]
accuracy = sum(correct) / n_ground_truth
print('top-{} accuracy: {}'.format(top_k, accuracy))
return accuracy
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('ground_truth_path', type=Path)
parser.add_argument('result_path', type=Path)
parser.add_argument('-k', type=int, default=1)
parser.add_argument('--subset', type=str, default='validation')
parser.add_argument('--save', action='store_true')
parser.add_argument(
'--ignore',
action='store_true',
help='ignore nonexistent videos in result')
args = parser.parse_args()
accuracy = evaluate(args.ground_truth_path, args.result_path, args.subset,
args.k, args.ignore)
if args.save:
with (args.result_path.parent / 'top{}.txt'.format(
args.k)).open('w') as f:
f.write(str(accuracy))
I wanted to add a function that calculates the overall precision and recall of the model
in the evaluate function I added this
ground_truth = get_class_labels()
y_true = []
y_pred = []
for video_id, video_results in inference_results['results'].items():
for result in video_results:
predicted_label = result['label']
true_label = ground_truth[video_id]
y_true.append(true_label)
y_pred.append(predicted_label)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
print("Overall Precision: {:.4f}".format(precision))
print("Overall Recall: {:.4f}".format(recall))
But I got confused weather the results from the inference are the right ones to take as the predicted labels
can someone help me