I am facing Indexerror when training MaskRCNN and doing transformation using albumentations

This is my dataset class

import os
import numpy as np
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms
from PIL import Image, ImageDraw
import glob
import  json
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection._utils import Matcher
from torch.autograd import Variable
import torch.nn.functional as F
import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import Callback, ModelCheckpoint
from pytorch_lightning.callbacks import LearningRateMonitor
from torch import nn
import torch.utils.data


class Cityscapes(Dataset):
    def __init__(self, img_dir, ann_dir, split, transforms= None):

        self.categories = {'person': 1, 'car':2, 'rider':3, 'bus': 4, 'train': 5, 'truck': 6, 'motorcycle': 7, 'bicycle': 8 }
        #self.root_dir = root
        assert split in ["train", "val", "test"]
        img_dir = os.path.abspath(os.path.join(img_dir, split))
        ann_dir = os.path.abspath(os.path.join(ann_dir, split))
        self.ann_dir = ann_dir
        img_name = os.path.join(img_dir, "*", "*_leftImg8bit.png")
        #print(ann_dir)
        self.img_paths = sorted(glob.glob(img_name))
        #print(self.img_paths)
        #similarly for annotations (polygons.json path)
        ann_name = os.path.join(ann_dir, "*", "*_polygons.json")
        self.annots_paths = list(sorted(glob.glob(ann_name)))
        #print(self.annots_paths)
        assert len(self.img_paths) == len(self.annots_paths)
        self.transforms = transforms

        #print(self.categories)
        
    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        print('test2')
        annots_file = self.annots_paths[index]
        #print(annots_file)
        images = self.img_paths[index]
        img_read = cv2.imread(images)
        # opencv takes the image and open into BGR format, so need to convert to RGB format
        rgb = cv2.cvtColor(img_read, cv2.COLOR_BGR2RGB)
        with open(annots_file, 'r') as f:
            data = json.load(f)
        height = data['imgHeight']
        width = data['imgWidth']
        bboxes = []
        labels = []
        masks = []
        for item in data['objects']:
            label = item['label']
            if label not in self.categories.keys():
                continue
            label = self.categories[item['label']]
            #extract the bounding box coordinates from the polygons.json file.
            poly = np.array(item['polygon'], dtype=np.int32)
            x_min = np.min(poly[:, 0])
            y_min = np.min(poly[:, 1])
            x_max = np.max(poly[:, 0])
            y_max = np.max(poly[:, 1])
            mask = np.zeros((height, width), dtype=np.uint8)
            bboxes.append([x_min, y_min, x_max, y_max])
            labels.append(label)
            # after getting the bboxes and labels, use opencv fillpoly method to draw masks over the object labels selected in self.categories
            cv2.fillPoly(mask, pts=[poly], color=255)
            masks.append(mask)
        bboxes = np.array(bboxes)
        # print(len(bboxes))
        transformed = self.transform(image=rgb, bboxes=bboxes, class_labels=labels, masks=masks)
        image_tr = transformed["image"] / 255.0
        bboxes = transformed["bboxes"]
        masks = transformed['masks']
        masks = torch.tensor(np.stack(masks, axis=0)) // 255  # Transformed masks as input
        # print(masks)
       
        if len(bboxes) > 0:
            bboxes = torch.stack([torch.tensor(item) for item in bboxes])
            labels = torch.stack([torch.tensor(item) for item in labels])
        else:
            bboxes = torch.zeros(0, 4)
        return image_tr, masks, bboxes, labels,  # city_domains


'''performing augmentation using Albumentation'''
train_transform = A.Compose(
    [
        A.Resize(height=600, width=1200, always_apply=True, p=1.0),
        A.HorizontalFlip(p=0.5),
        ToTensorV2(p=1.0),
    ],
    p=1.0,
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels'], min_area=0)
)
val_transform = A.Compose([
     A.Resize(600, 1200, always_apply=True,p=1.0),
     ToTensorV2(p=1.0)],
     p=1.0,
     bbox_params= A.BboxParams(format='pascal_voc', label_fields=['class_labels']))


img_dir = '/home/computing/cityscapes/leftImg8bit'
ann_dir = '/home/computing/cityscapes/gtFine'
cityscapes_train = Cityscapes(img_dir, ann_dir, split='train', transforms= train_transform)
cityscapes_val = Cityscapes(img_dir, ann_dir, split = 'val', transforms= val_transform)


#print(cityscapes_train[100])
def collate_fn(batch):
    """
    Since each image may have a different number of objects, we need a collate function (to be passed to the DataLoader).
    """
    print('test')
    images = list()
    masks=list()
    orig_img = list()
    bboxes = list()
    for i, m, bb, io in batch:
        images.append(i)

        masks.append(m)
        print('sum:t',torch.sum(m))
        bboxes.append(bb)
        orig_img.append(io)
    images = torch.stack(images, dim=0)
    return images, masks, bboxes, orig_img

data_loader_train = torch.utils.data.DataLoader(cityscapes_train, batch_size=2, shuffle=True,collate_fn=collate_fn)

The Error

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
  warning_cache.warn(
/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:615: UserWarning: Checkpoint directory /home/computing/PycharmProjects/cityscapes_FRCNN_MRCNN exists and is not empty.
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type     | Params
--------------------------------------
0 | detector | MaskRCNN | 44.0 M
--------------------------------------
43.7 M    Trainable params
222 K     Non-trainable params
44.0 M    Total params
175.840   Total estimated model params size (MB)
/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:432: PossibleUserWarning: The dataloader, val_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 4 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  rank_zero_warn(
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:01<00:00,  1.97it/s]Validation IOU:  tensor(0., device='cuda:0')
Epoch 0:   0%|          | 0/1487 [00:00<?, ?it/s] /home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:432: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 4 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  rank_zero_warn(
Epoch 0:   6%|▌         | 82/1487 [00:56<16:11,  1.45it/s, v_num=94]Traceback (most recent call last):
  File "/home/computing/PycharmProjects/cityscapes_FRCNN_MRCNN/cityscapes.py", line 346, in <module>
    trainer.fit(detector, val_dataloaders=val_dataloader)
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 531, in fit
    call._call_and_handle_interrupt(
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py", line 42, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 570, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 975, in _run
    results = self._run_stage()
              ^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 1018, in _run_stage
    self.fit_loop.run()
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py", line 201, in run
    self.advance()
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py", line 354, in advance
    self.epoch_loop.run(self._data_fetcher)
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 133, in run
    self.advance(data_fetcher)
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 189, in advance
    batch = next(data_fetcher)
            ^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/loops/fetchers.py", line 136, in __next__
    self._fetch_next_batch(self.dataloader_iter)
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/loops/fetchers.py", line 150, in _fetch_next_batch
    batch = next(iterator)
            ^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/utilities/combined_loader.py", line 284, in __next__
    out = next(self._iterator)
          ^^^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/pytorch_lightning/utilities/combined_loader.py", line 65, in __next__
    out[i] = next(self.iterators[i])
             ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/home/computing/PycharmProjects/cityscapes_FRCNN_MRCNN/cityscapes.py", line 98, in __getitem__
    transformed = self.transform(image= rgb, bboxes=bboxes, class_labels=labels, masks = masks)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/albumentations/core/composition.py", line 195, in __call__
    self._check_args(**data)
  File "/home/computing/miniconda3/envs/Cityscapes/lib/python3.11/site-packages/albumentations/core/composition.py", line 282, in _check_args
    if not isinstance(data[0], np.ndarray):
                      ~~~~^^^
IndexError: list index out of range
Epoch 0:   6%|▌         | 82/1487 [00:57<16:29,  1.42it/s, v_num=94]

Process finished with exit code 1

Also, I am using polygons.json file of cityscapes dataset for creating masks of the selected classes, how can it be used for instance segmentation of MaskRCNN?
When I am viewing the mask, there are mask present then why albumentation throwing index error.
How to handle the condition when it encounters images which are not having the objects/instances I specified in self.categories in the above code?