Mask_b_box errors

I have been extracting the bounding box from masks to give and train segment anything model, I have a 16 uint datatype image,masks are also 16 uint image.

class segmentationdataset(Dataset):
    def __init__(self, csv, augmentation =None, transform_image = None, transform_label = None,transform_bbox= None):
        self.df=pd.read_csv(csv)
        self.ids = self.df["file_ids"]
        self.transform_image = transform_image
        self.transform_label = transform_label
        self.transform_bbox = transform_bbox
        self.augmentation = augmentation
        
    def __getitem__(self,idx):
        image=np.array(Image.open("/kaggle/input/nucleus-data-c-elegans/nucleus_data/features/F"+self.ids[idx]))
        mask=np.array(Image.open("/kaggle/input/nucleus-data-c-elegans/nucleus_data/segmentation_maps/L"+self.ids[idx]))
        b_box=np.array(Image.open("/kaggle/input/nucleus-data-c-elegans/nucleus_data/segmentation_maps/L"+self.ids[idx]))
        if self.augmentation is not None:
            augmented=self.augmentation(image=image,mask=mask,b_box=b_box)
            image = augmented["image"]
            mask = augmented["mask"]
            b_box = augmented["b_box"]
            
        image = self.transform_image(image)
        mask = self.transform_label(mask)
        b_box = self.transform_bbox(b_box)
        b_box = np.array(b_box, dtype=np.uint8)  # Convert PIL Image to NumPy array
        b_box = torch.as_tensor(b_box)
        obj_ids = torch.unique(b_box)
        obj_ids = obj_ids[1:]
        b_boxes = b_box == obj_ids[:, None, None]
        boxes = masks_to_boxes(b_boxes)
        return image.float(), mask.float(), boxes.float()
    
    def __len__(self):
        return len(self.ids)
              

this is dataset class

here are data augmentation which , I am doing

full_dataset = segmentationdataset(csv = "file_ids.csv",
                                    augmentation =  Compose([
                                                            #GridDistortion(p=0.5),
                                                            Transpose(p=0.5),
                                                            VerticalFlip(p=0.5),
                                                            HorizontalFlip(p=0.5),
                                                            RandomRotate90(p=0.5),
                                                            ShiftScaleRotate(p=0.1),
                                                            OpticalDistortion(distort_limit=0.3, shift_limit=0.3, p=1),
                                                            ]),

                                    transform_image = transforms.Compose([ 
                                                                    
                                                                    transforms.ToPILImage(),
                                                                    ToTensor(),
                                                                    transforms.RandomApply([AddGaussianNoise( mean = 0.5,std= 0.05)], p=0.5)
                                                                ]),                                  
                                    transform_label = transforms.Compose([ 
                                                                                                      
                                                                    transforms.ToPILImage(),
                                                                    ToTensor(),

                                                                ]),
                                   transform_bbox = transforms.Compose([ 
                                                                                                      
                                                                    transforms.ToPILImage(),
                                                                    ToTensor()

                                                                ]))
train_batch_size = 32
train_size = int(0.8* len(full_dataset))  ## 80/20 split
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(dataset=test_dataset, batch_size= 4, shuffle = True)

print(len(train_loader), "batches ")
print(len(val_loader), " batches ")

for image, mask, bboxes in train_loader:
    # img_embed: (B, 256, 64, 64), gt2D: (B, 1, 256, 256), bboxes: (B, 4)
    print(f"{image.shape=}, {mask.shape=}, {bboxes.shape=}")
    break


When I tried to extract the shapes of the image, mask, and bounding_box ,I encountered this error. What does the error mean??

This is what my mask looks like!
Screenshot 2023-06-06 at 10.14.11 AM

thanks in advance

Based on the error message it seems the bounding boxes are empty for some samples (indicated by the shape [0, 4]) while other samples seem to contain a lot of bounding boxes (indicated by the shape [114, 4]). Your current data loading pipeline then tries to stack these tensors and fails.
The object detection tutorial might be helpful as it shows how the dataset is implemented and how a dict containing all needed data is returned.