Multiclass semantic segmentation DeepLabV3

I’m trying to do multi-class semantic segmentation on modified Cityscapes dataset. Masks have been modified so that color(shade of gray) matches id of the class. Since i’m new to pytorch i don’t know if setup of my project is any good.

This is my dataloader (Note: i’m resizing images to 100x100 just so i can test training before running it on dedicated machines)

import torch
from import Dataset  # For custom data-sets
from torchvision import transforms
from import DataLoader
import torchvision.transforms.functional as TF
from PIL import Image
import glob
import random
import numpy as np
import matplotlib.pyplot as plt 

train_image_path = glob.glob("/home/filip/diplomski-code/data/rvc_uint8/images/train/cityscapes-34/*.png")
train_mask_path = glob.glob("/home/filip/diplomski-code/data/rvc_uint8/annotations/train/cityscapes-34/*.png")

val_image_path = glob.glob("/home/filip/diplomski-code/data/rvc_uint8/images/val/cityscapes-34/*.png")
val_mask_path = glob.glob("/home/filip/diplomski-code/data/rvc_uint8/annotations/val/cityscapes-34/*.png")

class MyDataset(Dataset):
    def __init__(self, image_path, mask_path,train=True):
        self.image_path = image_path
        self.mask_path = mask_path
        self.transformT = transforms.ToTensor()

    def mask_to_class(self, mask):
        for k in self.mapping:
            mask[mask==k] = self.mapping[k]
        return mask

    def transform(self, image, mask):
        resize = transforms.Resize(size=(100,100), interpolation=Image.NEAREST) #Resize
        image = resize(image)
        mask = resize(mask)
        #Random crop
        i, j, h, w = transforms.RandomCrop.get_params(image, output_size=(90,90))
        image = TF.crop(image,i,j,h,w)
        mask = TF.crop(mask, i ,j, h, w)
        #Random vertical flip
        if random.random() > 0.5:
            image = TF.vflip(image)
            mask = TF.vflip(mask)
        return image, mask

    def __getitem__(self, index):
        image =[index])
        mask =[index])
        image, mask = self.transform(image, mask)
        image = self.transformT(image)
        mask = torch.from_numpy(np.array(mask, dtype=np.uint8))
        #mask = self.mask_to_class(mask) 
        mask = mask.long()
        return image, mask

    def __len__(self):
        return len(self.image_path)

train_dataset = MyDataset(train_image_path, train_mask_path, train=True)
train_dataloader =, batch_size=16, shuffle=True)

val_dataset = MyDataset(val_image_path, val_mask_path, train=True)
val_dataloader =, batch_size=16, shuffle=False)

batch_x, batch_y = next(iter(val_dataloader))

print('y shape    ', batch_y.shape)
print('x shape    ', batch_x.shape)
#print('unique values of x rgb    ', torch.unique(batch_x[0]))
print('unique values y     ', torch.unique(batch_y)) 

This is the example of image and of the mask:

This is how i defined pixel accuracy and mIoU, which i used in training script:

import torch
import numpy as np 
import torch.nn.functional as F

def pixel_accuracy(output, mask):
    with torch.no_grad():
        output = torch.argmax(F.softmax(output, dim=1), dim=1)
        correct = torch.eq(output, mask).int()
        accuracy = float(correct.sum()) / float(correct.numel())
    return accuracy

def mIoU(pred_mask, mask, smooth=1e-10, n_classes=256):
    with torch.no_grad():
        pred_mask = F.softmax(pred_mask, dim=1)
        pred_mask = torch.argmax(pred_mask, dim=1)
        pred_mask = pred_mask.contiguous().view(-1)
        mask = mask.contiguous().view(-1)

        iou_per_class = []
        for clas in range(0, n_classes): #loop per pixel class
            true_class = pred_mask == clas
            true_label = mask == clas

            if true_label.long().sum().item() == 0: #no exist label in this loop
                intersect = torch.logical_and(true_class, true_label).sum().float().item()
                union = torch.logical_or(true_class, true_label).sum().float().item()

                iou = (intersect + smooth) / (union +smooth)
        return np.nanmean(iou_per_class)

And this is a script i use for training(Note: i’m preparing it for later use with different args because I later want to train same model on multiple datasets - robust vision challenge)

import torch
import argparse
from torch import nn
import numpy as np
from model.model import custom_DeepLabv3
from model.metrics import mIoU, pixel_accuracy
from dataloader.dataloader2 import train_dataset, val_dataset
import matplotlib.pyplot as plt

def get_argparser():
    parser = argparse.ArgumentParser()
    #Dataset Options
    choices=['cityscapes','kitti','mapillary','viper','wilddash'],help="Name of dataset")
    parser.add_argument("--epochs", type=int, default = 100,
                        help = "number of epochs to train for(default = 100)")
    parser.add_argument("--learning_rate", type=float, default=1e-3,
                        help="set the learning rate(default = 1e-3")
    #Ako cu ga koristiti
    parser.add_argument("--weight_decay", type=float, default=1e-4,
                        help='weight decay (default: 1e-4)')
    parser.add_argument("--batch_size",type=int,default=16,help="set the batch size(default=16)")
    #Dodati jos loss funkcija ako ce se eksperimentirati 
    parser.add_argument("--loss_function",type=str, default="CrossEntropyLoss",choices=["CrossEntropyLoss"],help="define loss type")

    return parser

opts = get_argparser().parse_args()
learning_rate = opts.learning_rate
batch_size = opts.batch_size
epochs = opts.epochs


train_dataloader =, batch_size=batch_size, shuffle=True)
val_dataloader =, batch_size=batch_size, shuffle=False)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

train_losses = []
val_losses = []
val_iou = []; val_acc = []
train_iou = []; train_acc = []
min_loss = np.inf
decrease = 1 ; not_improve=0

for epoch in range(epochs):
    running_loss = 0
    accuracy = 0
    print(f"Epoch {epoch+1}\n-------------------------------")
    for i, data in enumerate(train_dataloader):
        # Compute prediction and loss
        x, y = data
        if torch.cuda.is_available():
            x, y ='cuda'),'cuda')
        pred = model(x)
        loss = loss_fn(pred['out'],y)
        #eval metrics
        iou_score += mIoU(pred['out'], y)
        accuracy +=pixel_accuracy(pred['out'], y)
        # Backpropagation
        running_loss += loss.item()

    val_loss = 0
    val_accuracy = 0
    val_iou_score = 0
    for i, data in enumerate(val_dataloader):
        with torch.no_grad():
            x, y = data
            if torch.cuda.is_available():
                x, y ='cuda'),'cuda')
            pred = model(x)
            #eval metrics
            val_iou_score += mIoU(pred['out'], y)
            val_accuracy += pixel_accuracy(pred['out'], y)
            loss = loss_fn(pred['out'], y)
            val_loss += loss.item()

    #metrics report
    val_acc.append(val_accuracy/ len(val_dataloader))
    print("Epoch:{}/{}..".format(epoch+1, epochs),
            "Train Loss: {:.3f}..".format(running_loss/len(train_dataloader)),
            "Val Loss: {:.3f}..".format(val_loss/len(val_dataloader)),
            "Train mIoU:{:.3f}..".format(iou_score/len(train_dataloader)),
            "Val mIoU: {:.3f}..".format(val_iou_score/len(val_dataloader)),
            "Train Acc:{:.3f}..".format(accuracy/len(train_dataloader)),
            "Val Acc:{:.3f}..".format(val_accuracy/len(val_dataloader)))
    #saving model with min loss
    if min_loss > (val_loss/len(val_dataloader)):
        print('Loss Decreasing.. {:.3f} >> {:.3f} '.format(min_loss, (val_loss/len(val_dataloader))))
        min_loss = (val_loss/len(val_dataloader))
        decrease += 1
        if decrease % 5 == 0:
            print('saving model...')
  , 'model_folder/DeepLabV3_mIoU-{:.3f}.pth'.format(val_iou_score/len(val_dataloader)))

#plot history
history = {'train_loss' : train_losses, 'val_loss': val_losses,
               'train_miou' :train_iou, 'val_miou':val_iou,
               'train_acc' :train_acc, 'val_acc':val_acc}

def plot_loss(history):
    plt.plot(history['val_loss'], label='val', marker='*')
    plt.plot( history['train_loss'], label='train', marker='*')
    plt.title('Loss per epoch'); plt.ylabel('loss')
    plt.legend(), plt.grid()
def plot_score(history):
    plt.plot(history['train_miou'], label='train_mIoU', marker='*')
    plt.plot(history['val_miou'], label='val_mIoU',  marker='*')
    plt.title('Score per epoch'); plt.ylabel('mean IoU')
    plt.legend(), plt.grid()
def plot_acc(history):
    plt.plot(history['train_acc'], label='train_accuracy', marker='*')
    plt.plot(history['val_acc'], label='val_accuracy',  marker='*')
    plt.title('Accuracy per epoch'); plt.ylabel('Accuracy')
    plt.legend(), plt.grid()


My training seems to run and I get mIoU scores as shown on the picture belowe , which does indicate to overfitting but i just want to make sure everything is setup properly before optimizing and experimenting. No point in experimenting if something is fundamentally wrong.

Thanks for reading ! All comments are welcome!

Hey @jur123 , very interesting. Did you ever resolve this issue? I would be keen to know how you improved it.

1 Like

I would also be interested in knowing how this turned out. Do you have any updates @jur123 ?