Index 1 out of bounds

Hello im confused on why im getting the index error for my model when the output for my labels in the dataloader is
DATALOADER Output

 {'area': tensor(1006000),
  'boxes': tensor([[ 52,   7, 948, 999]]),
  'image_id': tensor(9),
  'iscrowd': tensor([0]),
  'labels': tensor([1])})

ERROR

IndexError                                Traceback (most recent call last)
<ipython-input-16-36289658a075> in <module>()
     12         targets = [{k: v.cpu() for k, v in t.items()} for t in targets] # sending targets to the GPU
     13         bs = BATCH_SIZE
---> 14         loss_dict = model(images, targets) # passing our model a single batch of images with repective targets
     15         totalLoss = sum(loss for loss in loss_dict.values()) # adds up all the losses from the models output
     16         lossValue = totalLoss.item() # Converts tensor loss to interger Loss

5 frames
/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/roi_heads.py in assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels)
    586                 clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
    587 
--> 588                 labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
    589                 labels_in_image = labels_in_image.to(dtype=torch.int64)
    590 

IndexError: index 1 is out of bounds for dimension 0 with size 1

DATASET CLASS

# Create Custom DataSet
class ObjectDetectionDataset(torch.utils.data.Dataset):
  def __init__(self,phase:str,transforms=None):
    image_dir=f"/content/drive/My Drive/{phase} set/images"
    annotations_dir=f"/content/drive/My Drive/{phase} set/labels"
    self.image_dir = Path(image_dir) # image directory path
    self.annotations_dir = Path(annotations_dir) # annotations directory path
    # self.transforms = transforms # Transforms 
  
    self.imgs_names = list(sorted(self.image_dir.iterdir()))  # create list of path objects to photos, sorted()
    self.imgs_names = [Path(self.image_dir/img_name)for img_name in self.imgs_names]
  
    self.annotation_names = list(sorted(self.annotations_dir.iterdir()))
    self.annotation_names = [Path(annotations_dir/ann_name)for ann_name in self.annotation_names]

  # What happens when __getitem__ is used on the object, example dataset_object[0]
  def __getitem__(self,index):
    # Grabing path to image at "index" and transforming to tensor between 0-1
    img_path = self.imgs_names[index]  # Getting Image path object at idx and display the image
    image_pic = Image.open(img_path)
    rows = image_pic.size[0]
    columns = image_pic.size[1]
    resize_amount = (1024,1024)
    image_resized = transforms.Resize(resize_amount)(image_pic)
    image = transforms.ToTensor()(image_resized)
    
    #----------------------------------------------------------------------------------------------------

    # Grabing path to bounding box at "index" and grabbing its contents 
    annotation_path = self.annotation_names[index]
    annotation_tree = ET.parse(annotation_path)
    bounding_box = []
    for element in annotation_tree.findall("object"):
      bound_box_obj = element.find("bndbox")    
      resize_ratio_rows = resize_amount[0]/ rows
      resize_ratio_columns = resize_amount[1]/columns
      x_max = int(bound_box_obj.find('xmax').text) 
      x_min = int(bound_box_obj.find('xmin').text)
      y_max = int(bound_box_obj.find('ymax').text)
      y_min = int(bound_box_obj.find('ymin').text)
      x2 = round(x_max * resize_ratio_rows)
      x1 = round(x_min * resize_ratio_rows)
      y2 = round(y_max * resize_ratio_columns)
      y1 = round(y_min * resize_ratio_columns)
      blist = [x1,y1,x2,y2]
      bounding_box.append(blist)
      #-------------------------------------------
    bounding_box = torch.tensor(bounding_box)
  



   #----------------------------------------------------------------------------------------------------

   # Getting Label
    label_list =["raccoon"]
    annotation_path = self.annotation_names[index]
    annotation_tree = ET.parse(annotation_path)
    label_name = annotation_tree.find("object").find("name").text
    
    if label_name in label_list:
      label = (label_list.index(label_name)+1) 
      label = torch.tensor([label],dtype=torch.int64)
  #----------------------------------------------------------------------------------------------------

  # Calculating Area
    area = torch.tensor((x1+x2)*(y1+y2))
  #----------------------------------------------------------------------------------------------------
   
  # Creating Image_Ids
    image_id = torch.tensor(index)

  #----------------------------------------------------------------------------------------------------
  # Setting "iscrowd to zero"
    iscrowd = torch.zeros(1,dtype=torch.int64)
  #----------------------------------------------------------------------------------------------------

  # Creating Tagets Dictionary 
    target = {}
    target["boxes"] = bounding_box
    target["labels"] = label
    target["image_id"] = image_id
    target["area"] = area
    target["iscrowd"] = iscrowd      

    return image,target

  def __len__(self):
    return len(self.imgs_names)

TRAINING LOOP

lossHist = LossAverager()
valLossHist = LossAverager()
column_names = ["Epoch","Train_loss","Valid_loss","Error_rate","Duration"]
df  = pd.DataFrame(columns = column_names)
for epoch in tqdm(range(1,EPOCHS)):
    
    start_time = time()
    model.train() # setting out model to train
    lossHist.reset()  # resets our values,averages,sums,counts in lossHist
    for images, targets in dl_test:
        images = torch.stack(images).cpu() 
        targets = [{k: v.cpu() for k, v in t.items()} for t in targets] # sending targets to the GPU  
        bs = BATCH_SIZE
        loss_dict = model(images, targets) # passing our model a single batch of images with repective targets       
        totalLoss = sum(loss for loss in loss_dict.values()) # adds up all the losses from the models output
        lossValue = totalLoss.item() # Converts tensor loss to interger Loss        
        lossHist.update(lossValue,bs) 
        optimizer.zero_grad() # zero outs any previous gradietns from our training
        totalLoss.backward() # finds the derivative of the total loss 
        optimizer.step() # optimizer takes a step based on derivative
    if lr_scheduler is not None:
      lr_scheduler.step(totalLoss)

    df = df.append({"Epoch":epoch,"Train_loss":lossHist.avg,"Duration":str(datetime.timedelta(seconds = time() - start_time))[2:7],"Valid_loss":valLossHist.avg},ignore_index=True)
torch.save(model.state_dict(), r'model_raccoonsv2.pth')
df

Based on the stack trace labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image] fails and I guess you might be trying to index the labels tensor, which currently has a single dimension:

labels = torch.tensor([1])
labels[0] # works

labels[1]
# IndexError: index 1 is out of bounds for dimension 0 with size 1

Check which input shapes are expected to the model and make sure your current inputs are indeed compatible.

For the fasterrcnn_resnet50_fpn the input to the model should be as such.

  • boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
  • labels (Int64Tensor[N]): the class label for each ground-truth box

This is the same output from the Dataset Class at the same index as the multi-box dataset class so just confused as since the dataset class have identical output that should mean the dataloader batches that get past should be the same

(tensor([[[0.7961, 0.7922, 0.7843,  ..., 0.5608, 0.5608, 0.5608],
         [0.8039, 0.8000, 0.7922,  ..., 0.5529, 0.5529, 0.5529],
         [0.8235, 0.8235, 0.8157,  ..., 0.5294, 0.5294, 0.5294],
         ...,
         [0.5255, 0.5255, 0.5255,  ..., 0.4431, 0.4392, 0.4392],
         [0.5137, 0.5137, 0.5137,  ..., 0.4392, 0.4353, 0.4353],
         [0.5098, 0.5098, 0.5098,  ..., 0.4392, 0.4353, 0.4353]],

        [[0.7843, 0.7804, 0.7725,  ..., 0.5333, 0.5333, 0.5333],
         [0.7922, 0.7882, 0.7804,  ..., 0.5255, 0.5255, 0.5255],
         [0.8118, 0.8118, 0.8039,  ..., 0.5020, 0.5020, 0.5020],
         ...,
         [0.4980, 0.4980, 0.4980,  ..., 0.4471, 0.4431, 0.4431],
         [0.4863, 0.4863, 0.4863,  ..., 0.4431, 0.4392, 0.4392],
         [0.4824, 0.4824, 0.4824,  ..., 0.4431, 0.4392, 0.4392]],

        [[0.7176, 0.7137, 0.7059,  ..., 0.5020, 0.5020, 0.5020],
         [0.7255, 0.7216, 0.7137,  ..., 0.4941, 0.4941, 0.4941],
         [0.7451, 0.7451, 0.7373,  ..., 0.4706, 0.4706, 0.4706],
         ...,
         [0.4235, 0.4235, 0.4235,  ..., 0.4235, 0.4196, 0.4196],
         [0.4118, 0.4118, 0.4118,  ..., 0.4196, 0.4157, 0.4157],
         [0.4078, 0.4078, 0.4078,  ..., 0.4196, 0.4157, 0.4157]]]),
{'area': tensor(1006000),
 'boxes': tensor([[ 52,   7, 948, 999]]),
 'image_id': tensor(9),
 'iscrowd': tensor([0]),
 'labels': tensor([1])})