IndexError when training faster RCNN

Hello all,

I would appreciate if anyone whats to help me out with the following issue. I’ve been following this guide in order to create a faster-rcnn model. I’ve managed to do this successfully once before and wanted to use the same code i’ve written before but fit it on a different dataset. However this time I get the following error thrown at me:

  File "", line 122, in <module>
  File "", line 81, in train
    loss_dict = model(images, targets)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torch/nn/modules/", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torchvision/models/detection/", line 71, in forward
    detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torch/nn/modules/", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torchvision/models/detection/", line 748, in forward
    proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torchvision/models/detection/", line 659, in select_training_samples
    sampled_inds = self.subsample(labels)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torchvision/models/detection/", line 610, in subsample
    sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  File "/Users/Luuk/Desktop/thesisvenv/venv/lib/python3.7/site-packages/torchvision/models/detection/", line 67, in __call__
    pos_idx_per_image = positive[perm1]
IndexError: index 1 is out of bounds for dimension 0 with size 1

This is what my train functions looks like up until the point of error:

def train():

    # Train on GPU or CPU
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(f'Running on: {device}')

    # Initiate training and test datasets
    train_data = FacesDataset(df, IMG_DIR, transform=get_transforms(train=False))
    val_data = FacesDataset(df, IMG_DIR, transform=get_transforms(train=False))

    # Split dataset in train & test (use 250 images for test)
    indices = torch.randperm(len(train_data)).tolist()
    train_data =, indices[:-250])
    val_data =, indices[-250:])

    print(f'Number of training examples: {len(train_data)}, Number of validation examples: {len(val_data)}')

    # Initiate training and test data loaders
    data_loader_train =, batch_size=BATCH_SIZE,
                                                    shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate_fn)

    data_loader_val =, batch_size=BATCH_SIZE,
                                                  shuffle=False, num_workers=NUM_WORKERS, collate_fn=collate_fn)
    # Initiate model
    model = initiate_model(3)

    # Get the learnable parameters of our model
    params = [p for p in model.parameters() if p.requires_grad]
    # Initaite optimizer on learnable parameters
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    # Set up a dynamic learning rate in order to converage faser
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,

    # Initiate averager class to keep track of losses
    loss_hist = Averager()
    val_loss_hist = Averager()
    # Set to high value on purpose
    least_loss = 10e6
    # Allow model to learn parameters

    print(f'Starting training....')

    for epoch in range(NUM_EPOCS):

        itr = 1

        for images, targets, img_id in data_loader_train:

            images = list( for image in images)
            targets = [{k: for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

In addition, this is what my dataset class looks like

class FacesDataset(Dataset):
    """Data class for facial images"""

    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.images = dataframe['image_id'].unique()
        self.transform = transform

    def __getitem__(self, idx):
        target = {}
        img_id = self.images[idx]

        img_arr = cv2.imread(f'{self.img_dir}/{img_id}', cv2.IMREAD_COLOR)
        img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGR2RGB).astype(np.uint8)
        # img_arr = img_arr / 255

        boxes_array = np.array(self.dataframe.loc[self.dataframe['image_id'] == img_id, ['x0', 'y0', 'x1', 'y1']])
        boxes = torch.tensor(boxes_array, dtype=torch.float32)
        target['boxes'] = boxes

        area = []
        for box in boxes:
            width = box[2] - box[0]
            height = box[3] - box[1]
            area.append(width * height)
        target['area'] = torch.tensor(area)

        labels_array = np.array(self.dataframe.loc[self.dataframe['image_id'] == img_id, ['label']])
        labels = torch.tensor(labels_array, dtype=torch.int64)
        target['labels'] = labels

        #occlusion_array = np.array(self.dataframe.loc[self.dataframe['image_id'] == img_id, ['occlusion']])
        #occlusion = torch.tensor(occlusion_array, dtype=torch.float32)
        #target['occlusion'] = occlusion

        iscrowd = np.array(self.dataframe.loc[self.dataframe['image_id'] == img_id])
        target['iscrowd'] = torch.zeros((len(iscrowd),), dtype=torch.int64)

        if self.transform is not None:
            img_arr = self.transform(img_arr)

        return img_arr, target, img_id

    def __len__(self):
        return len(self.images)

The issue looks similar to this post however implementing that solution does not seem to resolve the issue for me.

The error was in the following line of code:

 labels_array = np.array(self.dataframe.loc[self.dataframe['image_id'] == img_id, ['label']])
        labels = torch.tensor(labels_array, dtype=torch.int64)
        target['labels'] = labels

This resulted in an embedded list of targets. Changing it to

        final_labels = []
        labels_array = np.array(self.dataframe.loc[self.dataframe['image_id'] == img_id, ['label']])
        for value in labels_array:

        labels = torch.tensor(final_labels, dtype=torch.int64)
        target['labels'] = labels

solved it for me. The solution above could probably be done more elegantly but this workaround worked for me.

This issue seems to be quite weird, as these lines of code shouldn’t create out of bounds indices:

# Removing some code for better readability
positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)

num_pos = int(self.batch_size_per_image * self.positive_fraction)
# protect against not enough positive examples
num_pos = min(positive.numel(), num_pos)

# randomly select positive and negative examples
perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]

pos_idx_per_image = positive[perm1]

It seems the last line of code is creating this issue.
My best guess is that the shape of some tensors are not what is expected.
Did you change anything in the model implementation or could you explain your use case a bit?

EDIT: Apparently I’m too late. :stuck_out_tongue:

No worries Patrick, thanks for looking at the issue either way :slight_smile: