**I am trying to create an image classification model for architectual styles of buildings. I use transfer learning for this (ResNet50 and Pytorch), but it seems that the validation accuracy does not go over 80 percent. Did I do something incorrectly or how can I fine tune it? (My dataset consists of classes (150 images per each class):
- American Craftsman
- Art Deco
- Art Nouveau
- Baroque
- Beaux-Arts
- Georgian
- Japanese Traditional
- Queen Annes
- Russian Revival
- Tudor Revival
**```
import torch
from torchvision import transforms, datasets
import yaml
import torch.nn as nn
from torchvision import models
import os
def build_model(num_classes, pretrained=True):
“”“Build ResNet-50 model with custom classifier”“”
# Load pretrained ResNet-50
model = models.resnet50(
weights=models.ResNet50_Weights.IMAGENET1K_V2 if pretrained else None
)
# Freeze initial layers
for param in model.parameters():
param.requires_grad = False
# unfreeze layer 3
for param in model.layer3.parameters():
param.requires_grad = True
# Unfreeze layer4
for param in model.layer4.parameters():
param.requires_grad = True
# Replace final fully connected layer
model.fc = nn.Sequential(
nn.Linear(model.fc.in_features, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, num_classes),
)
return model
def load_data(config_path=“dataset_config.yaml”):
with open(config_path) as f:
config = yaml.safe_load(f)
# Data augmentation
train_transform = transforms.Compose(
[
transforms.RandomResizedCrop(config["dataset"]["image_size"]),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(15), # Rotates image by ±15 degrees
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), # Small shifts
transforms.RandomApply(
[transforms.GaussianBlur(kernel_size=3)], p=0.5
), # Blurs 50% of images
transforms.ColorJitter(
brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1
), # Stronger jitter
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
)
test_transform = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(config["dataset"]["image_size"]),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
)
# Load datasets
train_path = (
f"{config['dataset']['root_dir']}/{config['dataset']['set_type']}/training_set"
)
test_path = (
f"{config['dataset']['root_dir']}/{config['dataset']['set_type']}/testing_set"
)
train_data = datasets.ImageFolder(train_path, train_transform)
test_data = datasets.ImageFolder(test_path, test_transform)
# Create dataloaders
dataloaders = {
"train": torch.utils.data.DataLoader(
train_data,
batch_size=config["dataset"]["batch_size"],
shuffle=True,
num_workers=config["dataset"]["num_workers"],
),
"test": torch.utils.data.DataLoader(
test_data,
batch_size=config["dataset"]["batch_size"],
shuffle=False,
num_workers=config["dataset"]["num_workers"],
),
}
return dataloaders, train_data.classes
def load_model():
# Define the path to the dataset root (where the train folder is located)
dataset_root = “…/architecture_classifier/ADrepo-main/150_set/Training_Set”
train_dir = os.path.join(dataset_root, “”) # Or whatever your train folder is named
# Get class names directly from the folder names in the 'train' directory
# Filter out non-directories (like foo.txt) and only keep folders
class_names = sorted(
[d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))]
)
# Initialize the model (assuming 'build_model' creates the model architecture)
model = build_model(
len(class_names)
) # Adjust number of output classes based on class_names
# Optionally load model weights if they exist
model.load_state_dict(torch.load("../architecture_classifier/best_model.pth"))
return model, class_names