My predicted output ends up being a list of outputs

In this loop, I’m trying to calculate meanIOU and DICE, but I’m getting an error saying the shapes don’t match

        for images, targets in dataloader_val:
            images = [i.to(device) for i in images]
            outs = model(images)
            
            for out, target in zip(outs, targets):
                #print("=== Predicted Mask ===\n", out['masks'])
               #print("=== True Mask ===\n", target['masks'])
                print("Predicted Class: ", out['labels'])
                print("True Class: ", target['labels'])
                
                masks_pred = out['masks']# > 0.5
                masks_true = target['masks']# .bool()
                for mp, mt in zip(masks_pred, masks_true):
                    iou_metric.update(mp, mt)
                    dice_metric.update(mp, mt)
        print(f"IoU (macro): {iou_metric.compute():.4f}")
        print(f"Dice Score (macro): {dice_metric.compute():.4f}")

RuntimeError: Predictions and targets are expected to have the same shape, but got torch.Size([1, 100, 100]) and torch.Size([256, 256]).

torch.Size([1, 100, 100]) for predicted out
torch.Size([240, 240]) for ground truth from my validation set```

When I printed the labels I get

Predicted Class:  tensor([4, 2, 1, 3], device='cuda:0')
True Class:  tensor([3])

I’m using Pytorch’s maskrcnn_resnet50_fpn, and I’m not sure what I’m doing wrong when I pass images

Terminal Output

Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([2])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([1])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([1])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([3])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([1])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([2])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([4])
Predicted Class:  tensor([], device='cuda:0', dtype=torch.int64)
True Class:  tensor([4])
Predicted Class:  tensor([4, 2, 1, 3], device='cuda:0')
True Class:  tensor([3])
Traceback (most recent call last):
  File "/run/media/amrut/4.0 TB HDD/GT Masters/5CS7643/CS7643-Project-Brain-Tumor-Segmentation/main_maskrcnn_pytorch.py", line 220, in <module>
    iou_metric.update(mp, mt)
  File "/home/amrut/anaconda3/envs/cs7643-final-project/lib/python3.12/site-packages/torchmetrics/metric.py", line 559, in wrapped_func
    raise err
  File "/home/amrut/anaconda3/envs/cs7643-final-project/lib/python3.12/site-packages/torchmetrics/metric.py", line 549, in wrapped_func
    update(*args, **kwargs)
  File "/home/amrut/anaconda3/envs/cs7643-final-project/lib/python3.12/site-packages/torchmetrics/segmentation/mean_iou.py", line 156, in update
    intersection, union = _mean_iou_update(
                          ^^^^^^^^^^^^^^^^^
  File "/home/amrut/anaconda3/envs/cs7643-final-project/lib/python3.12/site-packages/torchmetrics/functional/segmentation/mean_iou.py", line 77, in _mean_iou_update
    _check_same_shape(preds, target)
  File "/home/amrut/anaconda3/envs/cs7643-final-project/lib/python3.12/site-packages/torchmetrics/utilities/checks.py", line 39, in _check_same_shape
    raise RuntimeError(
RuntimeError: Predictions and targets are expected to have the same shape, but got torch.Size([1, 100, 100]) and torch.Size([256, 256]).

Dataset File

import os
import numpy as np
from PIL import Image
from natsort import natsorted
import torch
from torch.utils.data import Dataset
from torchvision import tv_tensors
from torchvision.transforms import v2
from torchvision.io import read_image
from torchvision.ops import masks_to_boxes

class BrainTumorDataset(Dataset):
    def __init__(self, root_dir, transforms=None):
        self.root_dir = root_dir
        self.transforms = transforms

        self.tumor_class_codes = {
            # 'Normal': 0,
            'Astrocytoma': 1,
            'Glioblastoma': 2,
            'Gliomas': 3,
            'Meningioma': 4
        }

        self.image_paths = []
        self.mask_paths = []
        self.labels = []

        self.image_type = '.png'

        tumor_types = os.listdir(self.root_dir)
        # self.class_name_to_idx = {name: idx+1 for idx, name in enumerate(tumor_types)}

        for tumor_type in tumor_types:
            images = list(natsorted(os.listdir(os.path.join(self.root_dir, tumor_type, 'images'))))
            # print(images[500])
            masks = list(natsorted(os.listdir(os.path.join(self.root_dir, tumor_type, 'masks'))))
            # print(masks[500])
            for image in images:
                # print(image)
                if image.endswith(self.image_type):
                    self.image_paths.append(os.path.join(self.root_dir, tumor_type, 'images', image))
            for mask in masks:
                # print(mask)
                if mask.endswith(self.image_type):
                    self.mask_paths.append(os.path.join(self.root_dir, tumor_type, 'masks', mask))
                self.labels.append(self.tumor_class_codes[tumor_type])

    def __getitem__(self, idx):
        # Reference: https://github.com/DatumLearning/Mask-RCNN-finetuning-PyTorch/blob/main/notebook.ipynb
        image_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]
        label = self.labels[idx]
        '''
        image = read_image(image_path)
        mask = read_image(mask_path)

        obj_ids = torch.unique(mask)
        obj_ids = obj_ids[1:]
        masks = mask == obj_ids[:, None, None]

        boxes = masks_to_boxes(masks)
        '''
        image = Image.open(image_path).convert('RGB')
        mask = Image.open(mask_path)

        mask = np.array(mask)
        obj_ids = np.unique(mask)
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)
        masks = np.zeros((num_objs , mask.shape[0] , mask.shape[1]))
        for i in range(num_objs):
            masks[i][mask == i+1] = True
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin , ymin , xmax , ymax])
        boxes = torch.as_tensor(boxes , dtype = torch.float32)
        #labels = torch.ones((num_objs,) , dtype = torch.int64)
        masks = torch.as_tensor(masks , dtype = torch.uint8)

        labels = torch.tensor([label], dtype=torch.int64)

        image_id = idx

        image = tv_tensors.Image(image)

        target = {
            'boxes': boxes,
            'labels': labels,
            'masks': masks,
            'image_id': image_id,
            #'area': area,
            #'iscrowd': iscrowd
        }

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self):
        return len(self.image_paths)

Model file

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_model_instance_segmentation(num_classes=5, pretrained = False):
    # load an instance segmentation model pre-trained on COCO
    if pretrained:
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT", weights_backbone='DEFAULT')
    else:
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=None, weights_backbone='DEFAULT')

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask,
        hidden_layer,
        num_classes
    )

    return model

Main file

from models.maskrcnn_resnet50 import BrainTumorDataset as btd
import numpy as np
from PIL import Image
import sys
import os
from loader import ImageData
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torch
import matplotlib.pyplot as plt
from torchvision.transforms import v2
import torch.nn.functional as F
import torchvision
from torchvision import tv_tensors
from losses import reweight, FocalLoss
from models.maskrcnn_resnet50 import pytorch_maskrcnn as pm
from torchvision.utils import draw_segmentation_masks
import torchmetrics
from torchmetrics.segmentation import MeanIoU, DiceScore
from torchmetrics.classification import MulticlassF1Score, MulticlassPrecision, MulticlassRecall

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_transforms():
    transforms = v2.Compose([
        v2.Resize((100,100)),
        v2.ConvertImageDtype(torch.float)
    ])
    return transforms

def collate_fn(batch):
    inputs, targets = zip(*batch)  # tuples of inputs and targets
    return list(inputs), list(targets)

if __name__=="__main__":
    train_model = True

    model_tag = None
    if len(sys.argv) < 2:
        print('Enter arg to train or test network')
        sys.exit()
    else:
        model_tag = sys.argv[1]

    if model_tag == 'train':
        train_model = True
    elif model_tag == 'test':
        train_model = False

    print("device: ", device)

    transforms = get_transforms()

    dataset = btd.BrainTumorDataset(
        root_dir='/run/media/amrut/4.0 TB HDD/GT Masters/5CS7643/CS7643-Project-Brain-Tumor-Segmentation/Brain_Tumor_2D_Dataset',
        transforms=transforms
    )

    train_ds, val_ds = random_split(dataset=dataset, lengths=[1006, 252])
    print('training set length: ', train_ds.__len__())
    print('validation set length: ', val_ds.__len__())

    dataloader_train = DataLoader(train_ds, batch_size=16, shuffle=True, collate_fn=collate_fn, num_workers=4, pin_memory=True)
    dataloader_val = DataLoader(val_ds, batch_size=16, shuffle=False, collate_fn=collate_fn, num_workers=4, pin_memory=True)


    classes = ['Astrocytoma', 'Glioblastoma', 'Gliomas', 'Meningioma']

    num_classes = len(classes) + 1 # includes background class

    weights = torch.tensor([0.1, 1.0, 1.0, 1.0, 1.0], device=device) #manual weights assigned
    criterion = FocalLoss(weight=weights, gamma=0)

    if train_model:
        print('==== Training ====')
        total_loss_per_epoch = []
        split_loss_per_epoch = []
        total_val_per_epoch = []
        split_val_per_epoch = []

        model = pm.get_model_instance_segmentation(num_classes=num_classes, pretrained=False).to(device)

        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=1e-5,             
            weight_decay=0       # for regularization
            )

        num_epochs = 30
        for epoch in range(num_epochs):
            print("Start of Epoch ", epoch)
            train_epoch_loss = 0
            val_epoch_loss = 0
            model.train()
            for images, targets in dataloader_train:
                images = [i.to(device) for i in images]

                targets = [{k: (v.to(device) if torch.is_tensor(v) else v) for k, v in t.items()} for t in targets]

                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                # train_epoch_loss += losses.cpu().detach().numpy()
                optimizer.zero_grad()
                losses.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
                optimizer.step()
                # del loss_dict, losses
            total_loss_per_epoch.append(losses.cpu().detach().numpy())
            split_loss_per_epoch.append(loss_dict)
            with torch.no_grad():
                for img, tgt in dataloader_val:
                    img = [i.to(device) for i in img]
                    tgt = [{k: (v.to(device) if torch.is_tensor(v) else v) for k, v in t.items()} for t in tgt]
                    val_dict = model(img, tgt)
                    val_losses = sum(val for val in val_dict.values())
                    #val_epoch_loss += losses.cpu().detach().numpy()
            total_val_per_epoch.append(val_losses.cpu().detach().numpy())
            split_val_per_epoch.append(val_dict)
            print("Loss: ", total_loss_per_epoch[-1], 'Val: ', total_val_per_epoch[-1], "\nEnd of Epoch ", epoch, "\n")
        torch.save(model.state_dict(), 'maskrcnn_best_model.pth')

        plt.title('Total Loss (Lcls + Lbox + Lmask) vs Epochs')
        plt.plot(total_loss_per_epoch, label='Loss')
        plt.plot(total_val_per_epoch, label='Validation')
        plt.xlabel('Epochs')
        plt.ylabel('Total Loss')
        plt.legend()
        plt.savefig('Total_Loss_vs_Epoch.png')
        plt.clf()

        bbox_loss = []
        bbox_val = []
        mask_loss = []
        mask_val = []
        label_loss = []
        label_val = []
        for i in range(len(split_loss_per_epoch)):
            bbox_loss.append(split_loss_per_epoch[i]['loss_box_reg'].item())
            mask_loss.append(split_loss_per_epoch[i]['loss_mask'].item())
            label_loss.append(split_loss_per_epoch[i]['loss_objectness'].item())

        for i in range(len(split_val_per_epoch)):
            bbox_val.append(split_val_per_epoch[i]['loss_box_reg'].item())
            mask_val.append(split_val_per_epoch[i]['loss_mask'].item())
            label_val.append(split_val_per_epoch[i]['loss_objectness'].item())
            pass
        
        plt.title('Bounding Box Loss vs Epochs')
        plt.plot(bbox_loss, label='Loss')
        plt.plot(bbox_val, label='Validation')
        plt.xlabel('Epochs')
        plt.ylabel('Bounding Box Loss')
        plt.legend()
        plt.savefig('Box_Loss_vs_Epoch.png')
        plt.clf()

        plt.title('Mask Loss vs Epochs')
        plt.plot(mask_loss, label='Loss')
        plt.plot(mask_val, label='Validation')
        plt.xlabel('Epochs')
        plt.ylabel('Mask Loss')
        plt.legend()
        plt.savefig('Mask_Loss_vs_Epoch.png')
        plt.clf()

        plt.title('Class Label Loss vs Epochs')
        plt.plot(label_loss, label='Loss')
        plt.plot(label_val, label='Validation')
        plt.xlabel('Epochs')
        plt.ylabel('Label Loss')
        plt.legend()
        plt.savefig('Label_Loss_vs_Epoch.png')
        plt.clf()
    else:
        print('==== Testing ====')
        model = pm.get_model_instance_segmentation(num_classes=num_classes, pretrained=False).to(device)
        saved_weights = torch.load('maskrcnn_best_model.pth')
        model.load_state_dict(saved_weights)
        model.eval()

        transforms = get_transforms()

        Astro_img = Image.open('/run/media/amrut/4.0 TB HDD/GT Masters/5CS7643/CS7643-Project-Brain-Tumor-Segmentation/Brain_Tumor_2D_Dataset/Astrocytoma/images/Astrocytoma_T1_004.png')
        Glioblast_img = Image.open('/run/media/amrut/4.0 TB HDD/GT Masters/5CS7643/CS7643-Project-Brain-Tumor-Segmentation/Brain_Tumor_2D_Dataset/Glioblastoma/images/Glioblastoma_flair_004.png')
        Glioma_img = Image.open('/run/media/amrut/4.0 TB HDD/GT Masters/5CS7643/CS7643-Project-Brain-Tumor-Segmentation/Brain_Tumor_2D_Dataset/Gliomas/images/Glioma_4.png')
        Menin_img = Image.open('/run/media/amrut/4.0 TB HDD/GT Masters/5CS7643/CS7643-Project-Brain-Tumor-Segmentation/Brain_Tumor_2D_Dataset/Meningioma/images/Meningioma_4.png')

        Astro_img = transforms(Astro_img)
        Glioblast_img = transforms(Glioblast_img)
        Glioma_img = transforms(Glioma_img)
        Menin_img = transforms(Menin_img)

        iou_metric = MeanIoU(num_classes=num_classes, per_class=True)
        dice_metric = DiceScore(num_classes=num_classes)
        precision_metric = MulticlassPrecision(num_classes=num_classes)
        recall_metric = MulticlassRecall(num_classes=num_classes)
        f1_metric = MulticlassF1Score(num_classes=num_classes)

        '''with torch.no_grad():
            pred_Astr = model([tv_tensors.Image(Astro_img).to(device)])
            pred_Gliob = model([tv_tensors.Image(Glioblast_img).to(device)])
            pred_Glio = model([tv_tensors.Image(Glioma_img).to(device)])
            pred_Menin = model([tv_tensors.Image(Menin_img).to(device)])'''

        for images, targets in dataloader_val:
            images = [i.to(device) for i in images]
            outs = model(images)
            print(outs[1])

            for out, target in zip(outs, targets):
                #print("=== Predicted Mask ===\n", out['masks'])
               #print("=== True Mask ===\n", target['masks'])
                print("Predicted Class: ", out['labels'])
                print("True Class: ", target['labels'])

                masks_pred = out['masks']# > 0.5
                masks_true = target['masks']# .bool()
                for mp, mt in zip(masks_pred, masks_true):
                    iou_metric.update(mp, mt)
                    dice_metric.update(mp, mt)
        print(f"IoU (macro): {iou_metric.compute():.4f}")
        print(f"Dice Score (macro): {dice_metric.compute():.4f}")