How did adding a for loop in my dataset class break model?

Hello, I am getting the error code, my model was working before and have not changed anything except in my dataset class I added a for loop to grab more than one bounding box if there was more than one in the XML file. I indexed a dataset at the same place on both the original dataset class and the newer one with the for loop for the multi-bounding box and both returned the same data. Yet when I run the newer code that extracts more than one bounding box I get this error code.

Error Code

    586                 clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
    587 
--> 588                 labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
    589                 labels_in_image = labels_in_image.to(dtype=torch.int64)
    590 

RuntimeError: CUDA error: device-side assert triggered

Original Code


# Create Custom DataSet
class ObjectDetectionDataset(torch.utils.data.Dataset):
  def __init__(self,phase:str,transforms=None):
    image_dir=f"/content/drive/My Drive/{phase} set/images"
    annotations_dir=f"/content/drive/My Drive/{phase} set/labels"
    self.image_dir = Path(image_dir) # image directory path
    self.annotations_dir = Path(annotations_dir) # annotations directory path
    # self.transforms = transforms # Transforms 
  
    self.imgs_names = list(sorted(self.image_dir.iterdir()))  # create list of path objects to photos, sorted()
    self.imgs_names = [Path(self.image_dir/img_name)for img_name in self.imgs_names]
  
    self.annotation_names = list(sorted(self.annotations_dir.iterdir()))
    self.annotation_names = [Path(annotations_dir/ann_name)for ann_name in self.annotation_names]

  # What happens when __getitem__ is used on the object, example dataset_object[0]
  def __getitem__(self,index):
    # Grabing path to image at "index" and transforming to tensor between 0-1
    img_path = self.imgs_names[index]  # Getting Image path object at idx and display the image
    image_pic = Image.open(img_path)
    rows = image_pic.size[0]
    columns = image_pic.size[1]
    resize_amount = (1024,1024)
    image_resized = transforms.Resize(resize_amount)(image_pic)
    image = transforms.ToTensor()(image_resized)
    
    #----------------------------------------------------------------------------------------------------

    # Grabing path to bounding box at "index" and grabbing its contents 
    annotation_path = self.annotation_names[index]
    annotation_tree = ET.parse(annotation_path)
    bound_box_obj = annotation_tree.find("object").find("bndbox") # find and returns contents in bndbox
    #----------------------------------------------------------------------------------------------------

    #Getting the x and y values of the bounding box
    # ---------- Going from larger image to small ----------
    resize_ratio_rows = resize_amount[0]/ rows
    resize_ratio_columns = resize_amount[1]/columns
    x_max = int(bound_box_obj.find('xmax').text) 
    x_min = int(bound_box_obj.find('xmin').text)
    y_max = int(bound_box_obj.find('ymax').text)
    y_min = int(bound_box_obj.find('ymin').text)
    x2 = round(x_max * resize_ratio_rows)
    x1 = round(x_min * resize_ratio_rows)
    y2 = round(y_max * resize_ratio_columns)
    y1 = round(y_min * resize_ratio_columns)
    bounding_box = torch.tensor((x1,y1,x2,y2))
    bounding_box = bounding_box.unsqueeze(0)

   # ---------- Going from smaller image to larger ----------
  



   #----------------------------------------------------------------------------------------------------

   # Getting Label
    # label_list =["Truck","Car","Jeep"]
    label_list =["raccoon"]
    annotation_path = self.annotation_names[index]
    annotation_tree = ET.parse(annotation_path)
    label_name = annotation_tree.find("object").find("name").text
    
    if label_name in label_list:
      label = (label_list.index(label_name)+1) 
      label = torch.tensor([label],dtype=torch.int64)
  #----------------------------------------------------------------------------------------------------

  # Calculating Area
    area = torch.tensor((x1+x2)*(y1+y2))
  #----------------------------------------------------------------------------------------------------
   
  # Creating Image_Ids
    image_id = torch.tensor(index)

  #----------------------------------------------------------------------------------------------------
  # Setting "iscrowd to zero"
    iscrowd = torch.zeros(1,dtype=torch.int64)
  #----------------------------------------------------------------------------------------------------

  # Creating Tagets Dictionary 
    target = {}
    target["boxes"] = bounding_box
    target["labels"] = label
    target["image_id"] = image_id
    target["area"] = area
    target["iscrowd"] = iscrowd      

    return image,target

  def __len__(self):
    return len(self.imgs_names)

Just adding a for loop for the grabbing multiply Boxes in an XML file


# Create Custom DataSet
class ObjectDetectionDataset(torch.utils.data.Dataset):
  def __init__(self,phase:str,transforms=None):
    image_dir=f"/content/drive/My Drive/{phase} set/images"
    annotations_dir=f"/content/drive/My Drive/{phase} set/labels"
    self.image_dir = Path(image_dir) # image directory path
    self.annotations_dir = Path(annotations_dir) # annotations directory path
    # self.transforms = transforms # Transforms 
  
    self.imgs_names = list(sorted(self.image_dir.iterdir()))  # create list of path objects to photos, sorted()
    self.imgs_names = [Path(self.image_dir/img_name)for img_name in self.imgs_names]
  
    self.annotation_names = list(sorted(self.annotations_dir.iterdir()))
    self.annotation_names = [Path(annotations_dir/ann_name)for ann_name in self.annotation_names]

  # What happens when __getitem__ is used on the object, example dataset_object[0]
  def __getitem__(self,index):
    # Grabing path to image at "index" and transforming to tensor between 0-1
    img_path = self.imgs_names[index]  # Getting Image path object at idx and display the image
    image_pic = Image.open(img_path)
    rows = image_pic.size[0]
    columns = image_pic.size[1]
    resize_amount = (1024,1024)
    image_resized = transforms.Resize(resize_amount)(image_pic)
    image = transforms.ToTensor()(image_resized)
    
    #----------------------------------------------------------------------------------------------------

    # Grabing path to bounding box at "index" and grabbing its contents 
    annotation_path = self.annotation_names[index]
    annotation_tree = ET.parse(annotation_path)
    bounding_box = []
    for element in annotation_tree.findall("object"):
      bound_box_obj = element.find("bndbox")    
      resize_ratio_rows = resize_amount[0]/ rows
      resize_ratio_columns = resize_amount[1]/columns
      x_max = int(bound_box_obj.find('xmax').text) 
      x_min = int(bound_box_obj.find('xmin').text)
      y_max = int(bound_box_obj.find('ymax').text)
      y_min = int(bound_box_obj.find('ymin').text)
      x2 = round(x_max * resize_ratio_rows)
      x1 = round(x_min * resize_ratio_rows)
      y2 = round(y_max * resize_ratio_columns)
      y1 = round(y_min * resize_ratio_columns)
      blist = [x1,y1,x2,y2]
      bounding_box.append(blist)
      #-------------------------------------------
    bounding_box = torch.tensor(bounding_box)
  



   #----------------------------------------------------------------------------------------------------

   # Getting Label
    # label_list =["Truck","Car","Jeep"]
    label_list =["background","raccoon"]
    annotation_path = self.annotation_names[index]
    annotation_tree = ET.parse(annotation_path)
    label_name = annotation_tree.find("object").find("name").text
    
    if label_name in label_list:
      label = (label_list.index(label_name)) 
      label = torch.tensor([label],dtype=torch.int64)
  #----------------------------------------------------------------------------------------------------

  # Calculating Area
    area = torch.tensor((x1+x2)*(y1+y2))
  #----------------------------------------------------------------------------------------------------
   
  # Creating Image_Ids
    image_id = torch.tensor(index)

  #----------------------------------------------------------------------------------------------------
  # Setting "iscrowd to zero"
    iscrowd = torch.zeros(1,dtype=torch.int64)
  #----------------------------------------------------------------------------------------------------

  # Creating Tagets Dictionary 
    target = {}
    target["boxes"] = bounding_box
    target["labels"] = label
    target["image_id"] = image_id
    target["area"] = area
    target["iscrowd"] = iscrowd      

    return image,target

  def __len__(self):
    return len(self.imgs_names)

Indexed both dataset to get identical values

(tensor([[[0.7961, 0.7922, 0.7843,  ..., 0.5608, 0.5608, 0.5608],
          [0.8039, 0.8000, 0.7922,  ..., 0.5529, 0.5529, 0.5529],
          [0.8235, 0.8235, 0.8157,  ..., 0.5294, 0.5294, 0.5294],
          ...,
          [0.5255, 0.5255, 0.5255,  ..., 0.4431, 0.4392, 0.4392],
          [0.5137, 0.5137, 0.5137,  ..., 0.4392, 0.4353, 0.4353],
          [0.5098, 0.5098, 0.5098,  ..., 0.4392, 0.4353, 0.4353]],
 
         [[0.7843, 0.7804, 0.7725,  ..., 0.5333, 0.5333, 0.5333],
          [0.7922, 0.7882, 0.7804,  ..., 0.5255, 0.5255, 0.5255],
          [0.8118, 0.8118, 0.8039,  ..., 0.5020, 0.5020, 0.5020],
          ...,
          [0.4980, 0.4980, 0.4980,  ..., 0.4471, 0.4431, 0.4431],
          [0.4863, 0.4863, 0.4863,  ..., 0.4431, 0.4392, 0.4392],
          [0.4824, 0.4824, 0.4824,  ..., 0.4431, 0.4392, 0.4392]],
 
         [[0.7176, 0.7137, 0.7059,  ..., 0.5020, 0.5020, 0.5020],
          [0.7255, 0.7216, 0.7137,  ..., 0.4941, 0.4941, 0.4941],
          [0.7451, 0.7451, 0.7373,  ..., 0.4706, 0.4706, 0.4706],
          ...,
          [0.4235, 0.4235, 0.4235,  ..., 0.4235, 0.4196, 0.4196],
          [0.4118, 0.4118, 0.4118,  ..., 0.4196, 0.4157, 0.4157],
          [0.4078, 0.4078, 0.4078,  ..., 0.4196, 0.4157, 0.4157]]]),
 {'area': tensor(1006000),
  'boxes': tensor([[ 52,   7, 948, 999]]),
  'image_id': tensor(9),
  'iscrowd': tensor([0]),
  'labels': tensor([1])})

You could rerun the script via CUDA_LAUNCH_BLOCKING=1 python script.pt args to check where the device assert is triggered. Depending on the failure you might then want to check the input shapes, values etc. of the operation (e.g. an indexing operation might be failing).
Alternatively, you could also run your use case on the CPU to get a better stacktrace.