Calculating bounding box area, IndexError: too many indices for tensor of dimension 1

sven_cicca · April 25, 2020, 5:37pm

Hello,

I am trying to calculate bounding boxes from masks on the EgoHands dataset. I keep getting this error:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-48-9622c067c627> in <module>
      1 t_path = '/home/jovyan/shared/EgoHands/_LABELLED_SAMPLES'
      2 dataset = EgoHands(t_path)
----> 3 dataset[0]

<ipython-input-47-ad8f0b861342> in __getitem__(self, index)
     75         image_id = torch.tensor([index])
     76 
---> 77         area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
     78 
     79         # suppose all instances are not crowd

IndexError: too many indices for tensor of dimension 1

below is my code for the dataset, I am not sure why this is occuring, because, I thought I was calculating the area correctly

class EgoHands(torch.utils.data.Dataset):
    def __init__(self, path, transform=None):
        self.path = path
#         self.path = '/home/jovyan/shared/EgoHands/'
        folders = sorted(glob(os.path.join(self.path, "*")))
        self.imgs = []
        self.polygons = []
        for folder in folders:
            # Add images
            self.imgs += sorted(glob(os.path.join(folder, "*.jpg")))

            # Add polygons
            polygon_path = glob(os.path.join(folder, "*.mat"))[0]
            polygon = loadmat(polygon_path)['polygons'][0]
            for i in range(len(polygon)):
                self.polygons.append(polygon[i])

        # TODO: use suitable transformations
        
#         self.transform = transforms.Compose([
#             transforms.ToTensor(),
#             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
#         ])
        
        
        self.transform = transform
        
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])

    def __getitem__(self, index):
        # Load image
        img = np.array(Image.open(self.imgs[index]))

        # Compute mask
        polygons = self.polygons[index]
        gt_mask = []
        x, y = np.meshgrid(
            np.arange(img.shape[1]), np.arange(img.shape[0]))
        x, y = x.flatten(), y.flatten()
        points = np.vstack((x, y)).T
        for i, polygon in enumerate(polygons):
            if polygon.size == 0:
                continue
            path = Path(polygon)
            grid = path.contains_points(points)
            grid = grid.reshape((*img.shape[:2]))
            gt_mask.append(np.expand_dims(grid, axis=-1))
        gt_mask = np.concatenate(gt_mask, axis=-1)
        

        
        # TODO: compute minimal bounding boxes
        
        
        boxes = []
        num_objs = len(boxes)
        for i in range(num_objs):
            pos = np.where(gt_mask[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        gt_mask = torch.as_tensor(gt_mask, dtype=torch.uint8)

        image_id = torch.tensor([index])
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        


        target = None

        if self.transform:
            img = transform(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

thanks!

ptrblck · April 26, 2020, 7:26am

Could you check the shape of boxes?
While your indexing assumes boxes has two dimensions, the error message states that it only contains a single dimension.

In your __getitem__ boxes are created via boxes = torch.as_tensor(boxes, dtype=torch.float32), which also works for a single object:

boxes = []
boxes.append([0, 0, 0, 0])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
print(boxes.shape)
> torch.Size([1, 4])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # works

sven_cicca · April 27, 2020, 3:22pm

Thank you for your help, I think the dataset has that some images have no bounding boxes, so boxes is an empty array

ptrblck · April 27, 2020, 4:30pm

Ah, that makes sense.
I’m not familiar with your use case, but a condition for an empty box could get rid of the error.
However, I’m not sure if your model will work with these empty bboxes or if you would have to remove the sample from the dataset.