Hello,
I am trying to calculate bounding boxes from masks on the EgoHands dataset. I keep getting this error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-48-9622c067c627> in <module>
1 t_path = '/home/jovyan/shared/EgoHands/_LABELLED_SAMPLES'
2 dataset = EgoHands(t_path)
----> 3 dataset[0]
<ipython-input-47-ad8f0b861342> in __getitem__(self, index)
75 image_id = torch.tensor([index])
76
---> 77 area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
78
79 # suppose all instances are not crowd
IndexError: too many indices for tensor of dimension 1
below is my code for the dataset, I am not sure why this is occuring, because, I thought I was calculating the area correctly
class EgoHands(torch.utils.data.Dataset):
def __init__(self, path, transform=None):
self.path = path
# self.path = '/home/jovyan/shared/EgoHands/'
folders = sorted(glob(os.path.join(self.path, "*")))
self.imgs = []
self.polygons = []
for folder in folders:
# Add images
self.imgs += sorted(glob(os.path.join(folder, "*.jpg")))
# Add polygons
polygon_path = glob(os.path.join(folder, "*.mat"))[0]
polygon = loadmat(polygon_path)['polygons'][0]
for i in range(len(polygon)):
self.polygons.append(polygon[i])
# TODO: use suitable transformations
# self.transform = transforms.Compose([
# transforms.ToTensor(),
# transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
# ])
self.transform = transform
transform = transforms.Compose([
transforms.Resize(256),
transforms.RandomCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
def __getitem__(self, index):
# Load image
img = np.array(Image.open(self.imgs[index]))
# Compute mask
polygons = self.polygons[index]
gt_mask = []
x, y = np.meshgrid(
np.arange(img.shape[1]), np.arange(img.shape[0]))
x, y = x.flatten(), y.flatten()
points = np.vstack((x, y)).T
for i, polygon in enumerate(polygons):
if polygon.size == 0:
continue
path = Path(polygon)
grid = path.contains_points(points)
grid = grid.reshape((*img.shape[:2]))
gt_mask.append(np.expand_dims(grid, axis=-1))
gt_mask = np.concatenate(gt_mask, axis=-1)
# TODO: compute minimal bounding boxes
boxes = []
num_objs = len(boxes)
for i in range(num_objs):
pos = np.where(gt_mask[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
gt_mask = torch.as_tensor(gt_mask, dtype=torch.uint8)
image_id = torch.tensor([index])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
target = None
if self.transform:
img = transform(img)
return img, target
def __len__(self):
return len(self.imgs)
thanks!