Training fcn_resnet50 from scratch on PASCAL VOC

Hi everyone!

Thanks for dropping by, and apologies if this is a dumb post but this is my first big project in Deep Learning and Computer Vision. I’m trying to training the fcn_resnet50 on the PASCAL VOC 2012 SegmentationClass dataset. But I’ve been constantly getting a low mIoU score. I’ve also tried to visualize the masks generated by the model but they seem to reflect the poor performance.

import torch
import os
import csv
from torch import optim
from dataloader import CustomVOCDataset
from torch.utils.data import DataLoader
from torchvision.models.segmentation import fcn_resnet50
from eval import model_eval

train = CustomVOCDataset("VOCtrainval_11-May-2012", "train", True)
val = CustomVOCDataset("VOCtrainval_11-May-2012", "val", False)

train_loader = DataLoader(train, batch_size=16, shuffle=True, num_workers=12)
val_loader = DataLoader(val, batch_size=16, shuffle=True, num_workers=12)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wsss_model = fcn_resnet50(weights=None, weights_backbone=None, num_classes=21)
wsss_model.to(device)

optimizer = optim.Adam(wsss_model.parameters(), 1e-4)
num_epochs = 200
criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device)

batches = len(train_loader)

best_miou = 0.0

for epoch in range(num_epochs):
    wsss_model.train()
    total_loss = 0.0

    for batch_idx, (data, target) in enumerate(train_loader):
        if (batch_idx + 1) % 30 == 0:
            print(f"Now on batch - {batch_idx+1} / {batches}")
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = wsss_model(data)
        loss = criterion(output["out"], target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    loss_epoch = total_loss / batches

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss_epoch}")

    # if (epoch + 1) % 3 == 0:
    mean_iou = model_eval(wsss_model, val_loader, 21, device)
    print("Mean IoU on evaluation data: {}".format(mean_iou))
    with open("model_train_log.csv", "a") as file:
        writer = csv.writer(file)
        writer.writerow([epoch + 1, loss_epoch, mean_iou.item()])
        if mean_iou > best_miou:
            best_miou = mean_iou
            torch.save(
                wsss_model.state_dict(),
                "model_weights/voc_model_epoch_{}_weights.pth".format(epoch + 1),
            )

I have the training script here and for some reason, it always seems to plateau around 18% mIoU on the validation set after training for around 140 epochs. Is this behavior expected? Or have I royally screwed up somewhere?

Thanks for all your help!