Hello I am new in this field and I am wondering if someone can help me identifying if I am in right track. I am creating a code for custom dataset, I am using VIA annotation tool which I am using COCO format.
class MaskRCnnDataset(torch.utils.data.Dataset):
def __init__(self,image_root, anno_file, transform = None):
self.image_root = image_root
self.anno_file = anno_file
self.transform = transform
#load the annotations
self.coco = COCO(self.anno_file)
#get the category ids
self.category_ids = self.coco.getCatIds()
#get the image id for each category
self.image_ids = []
for category_id in self.category_ids:
image_ids = self.coco.getImgIds(catIds = [category_id])
self.image_ids += image_ids
def __getitem__(self, idx):
#get the image id
image_id = self.image_ids(idx)
#lets load image
imageInfo = self.coco.loadImgs(image_id)
imagePath = os.path.join(self.image_root, imageInfo['file_name'])
image = Image.open(imagePath).convert('RGB')
#get the annotations from annotation_file
annotation_ids = self.coco.getAnnIds(imageIds = image_id, catIds = self.category_ids, iscrowd = None)
annontations = self.coco.loadAnns(annotation_ids)
#extracting boxes and labels
boxes = []
masks = []
labels = []
lengths = []
widths = []
for ann in annotations:
x, y, w, h = ann['bbox']
box = [x, y, x + w, y + h]
boxes.append(box)
mask = self.coco.annToMask(ann)
masks.append(mask)
label = ann['category_id']
labels.append(label)
length = ann['length']
lengths.append(length)
width = ann['width']
widths.append(width)
#after extracting apply transforms
if self.transform is not None:
image = self.transform(image)
#lets converts bboxes to tensor
bboxes = torch.as_tensor(bboxes, dtype=torch.int32)
#lets create the target dict
targets = {}
targets['boxes'] = bboxes
target['labels'] = torch.as_tensor(labels, dtype=torch.int64)
target['masks'] = masks
target['image_id'] = torch.tensor([img_id])
target['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
target['iscrowd'] = torch.zeros((len(anns),), dtype=torch.int64)
target['lengths'] = lengths
target['widths'] = widths
return image, target
def __len__(self):
return len(self.image_ids)