TorchVision -- IndexError: Dimension out of range

I’m using the Finetuning tutorial for instance segmentation with a very small data set, 24 images total, and I get the following error:

IndexError: Dimension out of range (expected to be in range of [-3, 2], but got 3)

This is the stacktrace:

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
cpuset_checked))


IndexError Traceback (most recent call last)

in ()
4 for epoch in range(num_epochs):
5 # train for one epoch, printing every 10 iterations
----> 6 train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
7 # update the learning rate
8 lr_scheduler.step()

8 frames

/content/engine.py in train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq)
28 targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
29
—> 30 loss_dict = model(images, targets)
31
32 losses = sum(loss for loss in loss_dict.values())

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
96 features = OrderedDict([(‘0’, features)])
97 proposals, proposal_losses = self.rpn(images, features, targets)
—> 98 detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
99 detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
100

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/roi_heads.py in forward(self, features, proposals, image_shapes, targets)
808 rcnn_loss_mask = maskrcnn_loss(
809 mask_logits, mask_proposals,
→ 810 gt_masks, gt_labels, pos_matched_idxs)
811 loss_mask = {
812 “loss_mask”: rcnn_loss_mask

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/roi_heads.py in maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs)
115 mask_targets = [
116 project_masks_on_boxes(m, p, i, discretization_size)
→ 117 for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
118 ]
119

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/roi_heads.py in (.0)
115 mask_targets = [
116 project_masks_on_boxes(m, p, i, discretization_size)
→ 117 for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
118 ]
119

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/roi_heads.py in project_masks_on_boxes(gt_masks, boxes, matched_idxs, M)
96 rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
97 gt_masks = gt_masks[:, None].to(rois)
—> 98 return roi_align(gt_masks, rois, (M, M), 1.)[:, 0]
99
100

/usr/local/lib/python3.7/dist-packages/torchvision/ops/roi_align.py in roi_align(input, boxes, output_size, spatial_scale, sampling_ratio, aligned)
53 return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
54 output_size[0], output_size[1],
—> 55 sampling_ratio, aligned)
56
57

IndexError: Dimension out of range (expected to be in range of [-3, 2], but got 3)

I tried to follow the trace, but PyTorch is new to me and I’m not very good with Python to begin with. lol. I copied most of the tutorial and modified things where I think I needed to. I had a heck of time getting my masks’ color in a suitable space to work with the dataset code, but I figured out colors that work and I checked and hard-coded their values.

This is a copy of what the image mask looks like. I can’t share the image data though.
https://drive.google.com/file/d/1Ky00zGz5_ChmbG6kbr2ZkVNIk_ag_SEy/view?usp=sharing

At the risk of supplying too much info here is all of my code. It’s most of what appears in the Colab notebook for this tutorial. If this is too much, let me know and I’ll just include the lines I edited for my purposes:

import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

class four_chs(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None, target_transform=None):
        self.root = root
        self.transforms = transforms
        self.target_transform = target_transform
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "imgs"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "masks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "imgs", self.imgs[idx])
        mask_path = os.path.join(self.root, "masks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        # Convert from image object to array
        mask1 = np.array(mask)
        
        # instances are encoded as different colors
        # hard-coded because of the colors associated with my masks
        obj_ids = np.array(7, dtype='uint8')
        obj_ids = np.append(obj_ids, 8)
        obj_ids = np.append(obj_ids, 11)
        obj_ids = np.append(obj_ids, 14)

        masks = mask1 == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(mask1, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["background"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

dataset = four_chs(root="/content/drive/MyDrive/setforJohn/four_chambers")

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
 
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 5  # 4 shapes + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

from engine import train_one_epoch, evaluate
import utils
import transforms as T

def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = four_chs(root = '/content/drive/MyDrive/APHIS Farm Bill (2020Milestones)/Protocols/For John/images/New set for John/collection/four_chambers', transforms = get_transform(train=True))    
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=2, shuffle=True, num_workers=2,
 collate_fn=utils.collate_fn)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)          

# use our dataset and defined transformations
dataset = four_chs(root = '/content/drive/MyDrive/setorJohn/four_chambers', transforms = get_transform(train=True))
dataset_test = four_chs(root = '/content/drive/MyDrive/setforJohn/four_chambers', transforms = get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-5])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-5:])


# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has five classes
num_classes = 5

# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)