Hello all, I have inistalised the resnet50 model and default weights, and have trained/validated/tested the model to distinguish 2 classes using the following code
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision.datasets import CocoDetection
from torchvision.transforms import ToTensor, Compose, RandomHorizontalFlip, RandomResizedCrop, Normalize, Resize
import torchvision.models as models
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from PIL import Image
import os
class CocoDetection(Dataset):
def __init__(self, root, ann_file, transform=None):
"""
Args:
root (str): Root directory where images are stored.
ann_file (str): Path to the annotation file in COCO format.
transform (callable, optional): A function/transform to apply to the images.
"""
self.root = root
self.coco = COCO(ann_file)
self.ids = list(self.coco.imgs.keys())
self.transform = transform
def __getitem__(self, index):
"""
Args:
index (int): Index of the item to fetch.
Returns:
tuple: (image, target) where target is the class label.
"""
coco = self.coco
img_id = self.ids[index]
img_info = coco.loadImgs(img_id)[0]
path = img_info['file_name']
img = Image.open(os.path.join(self.root, path)).convert('RGB')
# Get annotations for the image
ann_ids = coco.getAnnIds(imgIds=img_id)
annotations = coco.loadAnns(ann_ids)
# Extract class ids from annotations
class_ids = [ann['category_id'] for ann in annotations]
# Assuming only one class per image; adjust if necessary
label = int(class_ids[0] - 1) if class_ids else -1
if self.transform is not None:
img = self.transform(img)
return img, torch.tensor(label, dtype=torch.long)
def __len__(self):
return len(self.ids)
img_dir = "datasets/maybegotthisright/images/default"
ann_file = "datasets/maybegotthisright/annotations/instances_default.json"
num_classes = 2
transform = Compose([
# RandomResizedCrop(224),
RandomResizedCrop(448),
RandomHorizontalFlip(),
ToTensor(),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# no random trasnform
# transform = Compose([
# Resize((224, 224)),
# ToTensor(),
# ])
dataset = CocoDetection(img_dir, ann_file, transform=transform)
num_samples = len(dataset)
train_size = int(0.75 * num_samples) # 75% for training
val_size = int(0.15 * num_samples) # 15% for validation
test_size = num_samples - train_size - val_size # 10% for testing
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
train_losses = []
val_losses = []
num_epochs = 125
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
# print("INPUTS")
# print(inputs)
# print("LABELS")
# print(labels)
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_losses.append(running_loss / len(train_loader))
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')
model.eval()
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
val_losses.append(val_loss / len(val_loader))
print(f'Validation Loss: {val_loss / len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')
# scheduler.step()
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss over Epochs')
plt.legend()
plt.savefig("loss_graph.png")
plt.show()
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the model on the test set: {100 * correct / total:.2f}%')
torch.save(model, 'models/model.pt')
this gives a great result after ~40 epochs, here is a snippet of the terminal output.
Epoch [102/125], Loss: 0.0573
Validation Loss: 0.0557, Accuracy: 98.11%
Epoch [103/125], Loss: 0.0789
Validation Loss: 0.0785, Accuracy: 96.23%
Epoch [104/125], Loss: 0.0650
Validation Loss: 0.2081, Accuracy: 92.45%
Epoch [105/125], Loss: 0.1045
Validation Loss: 0.0638, Accuracy: 98.11%
Epoch [106/125], Loss: 0.0852
Validation Loss: 0.0914, Accuracy: 98.11%
Epoch [107/125], Loss: 0.0682
Validation Loss: 0.0433, Accuracy: 98.11%
Epoch [108/125], Loss: 0.0801
Validation Loss: 0.0335, Accuracy: 98.11%
Epoch [109/125], Loss: 0.1045
Validation Loss: 0.0615, Accuracy: 98.11%
Epoch [110/125], Loss: 0.0748
Validation Loss: 0.0561, Accuracy: 98.11%
Epoch [111/125], Loss: 0.0646
Validation Loss: 0.0196, Accuracy: 100.00%
Epoch [112/125], Loss: 0.0842
Validation Loss: 0.2006, Accuracy: 86.79%
Epoch [113/125], Loss: 0.0603
Validation Loss: 0.0755, Accuracy: 96.23%
Epoch [114/125], Loss: 0.0832
Validation Loss: 0.0231, Accuracy: 100.00%
Epoch [115/125], Loss: 0.0733
Validation Loss: 0.0333, Accuracy: 100.00%
Epoch [116/125], Loss: 0.1071
Validation Loss: 0.0361, Accuracy: 98.11%
Epoch [117/125], Loss: 0.0858
Validation Loss: 0.0154, Accuracy: 100.00%
Epoch [118/125], Loss: 0.0508
Validation Loss: 0.1709, Accuracy: 90.57%
Epoch [119/125], Loss: 0.0678
Validation Loss: 0.0307, Accuracy: 100.00%
Epoch [120/125], Loss: 0.1346
Validation Loss: 0.1028, Accuracy: 96.23%
Epoch [121/125], Loss: 0.0780
Validation Loss: 0.0486, Accuracy: 98.11%
Epoch [122/125], Loss: 0.0649
Validation Loss: 0.0445, Accuracy: 98.11%
Epoch [123/125], Loss: 0.0357
Validation Loss: 0.1038, Accuracy: 96.23%
Epoch [124/125], Loss: 0.0707
Validation Loss: 0.1060, Accuracy: 98.11%
Epoch [125/125], Loss: 0.0881
Validation Loss: 0.0482, Accuracy: 96.23%
Accuracy of the model on the test set: 97.30%
However, when i load the model (have tried both state_dict and full model methods), the model is unable to make any reasonable inferences on an image from the dataset used to train the model, it tends to get it right ~20% of the time, i believe i am handling loading the model correctly, but how would the performance be so poor, if both validation and testing provided great results, and the images used for inferencing are from the complete dataset used to train the model?
Here is my inference code.
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision.transforms import ToTensor
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from enum import Enum
class ClassLabel(Enum):
redacted1 = 0
redacted2 = 1
@staticmethod
def get_label(index):
print(index)
for label in ClassLabel:
if label.value == index:
return label.name.replace('_', ' ')
return "Unknown"
# model_path = "models/model.pth"
# num_classes = 2
# model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, num_classes)
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model = model.to(device)
# model.load_state_dict(torch.load(model_path))
# model.eval() # Set model to evaluation mode
model = torch.load("models/model.pt")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
def infer_image(image_path):
image = Image.open(image_path).convert("RGB")
transform = transforms.Compose([
# transforms.Resize((224, 224)),
transforms.Resize((448, 448)),
transforms.ToTensor(),
])
image = transform(image)
image = image.unsqueeze(0)
image = image.to(device)
# Perform inference
with torch.no_grad():
outputs = model(image)
probabilities = torch.softmax(outputs, dim=1)
_, predicted = torch.max(outputs.data, 1)
print(f'Raw output: {outputs}')
predicted_class_index = predicted.item()
probability = probabilities[0, predicted_class_index].item()
print(f'Probability: {probability:.2f}')
class_label = ClassLabel.get_label(predicted_class_index)
return class_label
Any help would be greatly appreciated, I sadly cannot provide the dataset due to legal constraints, but I hope the code provides enough detail.