I need to calculate precision and recall to evaluate my model performance,so I am using this code that perform inference,annotate the images with the resulted class
and calculates the precision and recall
this is the script I am using
import torch
import numpy as np
import cv2
import os
import torch.nn.functional as F
import torchvision.transforms as transforms
import glob
import argparse
import pathlib
from model import build_model
from class_names import class_names as CLASS_NAMES
import pandas as pd
from sklearn.metrics import precision_score, recall_score, classification_report, confusion_matrix
# Argument parser
parser = argparse.ArgumentParser()
parser.add_argument('-w', '--weights', default='../action_recognition/outputs/best_model.pth',
help='path to the model weights')
parser.add_argument('-c', '--csv_file', default='/action_recognition/input/testing.csv',
help='path to the CSV file containing ground truth labels')
args = parser.parse_args()
# Constants and configurations
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
IMAGE_RESIZE = 224
# Define transforms
def get_test_transform(image_size):
return transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((image_size, image_size)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# Function to denormalize and annotate images
def annotate_image(image, output_class):
image = image.squeeze(0).permute((1, 2, 0)).cpu().numpy() # Permute dimensions for cv2
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert to BGR for cv2
class_name = CLASS_NAMES[int(output_class)]
cv2.putText(image, class_name, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, lineType=cv2.LINE_AA)
return image
# Function to perform inference
def inference(model, image, device):
model.eval()
with torch.no_grad():
image = image.to(device)
outputs = model(image)
predictions = F.softmax(outputs, dim=1).cpu().numpy()
output_class = np.argmax(predictions)
return output_class
# Function to load ground truth labels
def load_ground_truth(csv_path):
df = pd.read_csv(csv_path)
df['label'] = df['label'].map(lambda x: CLASS_NAMES.index(x))
return df['label'].values
# Function to calculate precision and recall
def calculate_metrics(true_labels, pred_labels):
precision = precision_score(true_labels, pred_labels, average='weighted')
recall = recall_score(true_labels, pred_labels, average='weighted')
return precision, recall
if __name__ == '__main__':
# Load model
weights_path = pathlib.Path(args.weights)
checkpoint = torch.load(weights_path, map_location=DEVICE)
model = build_model(fine_tune=False, num_classes=len(CLASS_NAMES)).to(DEVICE)
model.load_state_dict(checkpoint['model_state_dict'])
# Load images and ground truth labels
all_image_paths = glob.glob('action_recognition/input/test/*')
ground_truth_labels = load_ground_truth(args.csv_file)
y_true = ground_truth_labels
y_pred = []
infer_result_path = os.path.join(
'../action_recognition', 'outputs', 'inference_results', 'image_outputs'
)
os.makedirs(infer_result_path, exist_ok=True)
# Perform inference and save annotated images
transform = get_test_transform(IMAGE_RESIZE)
for i, image_path in enumerate(all_image_paths):
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_tensor = transform(image)
image_tensor = torch.unsqueeze(image_tensor, 0)
# Inference
predicted_class = inference(model, image_tensor, DEVICE)
y_pred.append(predicted_class)
# Annotate and save result image
annotated_image = annotate_image(image_tensor, predicted_class)
image_name = os.path.basename(image_path)
cv2.imwrite(os.path.join(infer_result_path, image_name), annotated_image)
if len(y_true) != len(y_pred):
raise ValueError(f'Number of samples in y_true ({len(y_true)}) and y_pred ({len(y_pred)}) do not match.')
print("Ground Truth Labels:", y_true)
print("******************************")
print("Predicted Labels:", y_pred)
# Calculate precision and recall
y_pred = np.array(y_pred)
precision, recall = calculate_metrics(y_true, y_pred)
print(f'Precision: {precision:.4f}, Recall: {recall:.4f}')
print("Classification Report:\n", classification_report(y_true, y_pred, target_names=CLASS_NAMES))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
I have two directories traning_set and testing_set
and two csv files train.csv and test csv
both files are structured as follow
filename,label
0_0_image.jpg,CallCellphone
0_101_image.jpg,CallCellphone
0_104_image.jpg,CallCellphone
0_110_image.jpg,CallCellphone
0_117_image.jpg,CallCellphone
0_125_image.jpg,CallCellphone
…
the results of the precision and recall were very poor Precision: 0.0637, Recall: 0.0638
but I think it does’t make sense cause the training results were very good reaching 99% for the training accuracy and 96% for the validation
I think I made a mistake somewhere in the script But I can’t figure it out
can someone help me ?