Creating custom dataset of tilapia length and width measurement using mask r-cnn

Orekih · May 21, 2023, 9:36am

Hello I am new in this field and I am wondering if someone can help me identifying if I am in right track. I am creating a code for custom dataset, I am using VIA annotation tool which I am using COCO format.

class MaskRCnnDataset(torch.utils.data.Dataset):
  def __init__(self,image_root, anno_file, transform = None):
    self.image_root = image_root
    self.anno_file = anno_file
    self.transform = transform

    #load the annotations
    self.coco = COCO(self.anno_file)

    #get the category ids
    self.category_ids = self.coco.getCatIds()

    #get the image id for each category
    self.image_ids = []
    for category_id in self.category_ids:
      image_ids = self.coco.getImgIds(catIds = [category_id])
      self.image_ids += image_ids

  def __getitem__(self, idx):
    #get the image id
    image_id = self.image_ids(idx)

    #lets load image
    imageInfo = self.coco.loadImgs(image_id)
    imagePath = os.path.join(self.image_root, imageInfo['file_name'])
    image = Image.open(imagePath).convert('RGB')

    #get the annotations from annotation_file
    annotation_ids = self.coco.getAnnIds(imageIds = image_id, catIds = self.category_ids, iscrowd = None)
    annontations = self.coco.loadAnns(annotation_ids)

    #extracting boxes and labels
    boxes = []
    masks = []
    labels = []
    lengths = []
    widths = []
      for ann in annotations:
        x, y, w, h = ann['bbox']
        box = [x, y, x + w, y + h]
        boxes.append(box)
        mask = self.coco.annToMask(ann)
        masks.append(mask)
        label = ann['category_id']
        labels.append(label)
        length = ann['length']
        lengths.append(length)
        width = ann['width']
        widths.append(width)

    #after extracting apply transforms
    if self.transform is not None:
      image = self.transform(image)
    
    #lets converts bboxes to tensor
    bboxes = torch.as_tensor(bboxes, dtype=torch.int32)

    #lets create the target dict
    targets = {}
    targets['boxes'] = bboxes
    target['labels'] = torch.as_tensor(labels, dtype=torch.int64)
    target['masks'] = masks
    target['image_id'] = torch.tensor([img_id])
    target['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
    target['iscrowd'] = torch.zeros((len(anns),), dtype=torch.int64)
    target['lengths'] = lengths
    target['widths'] = widths

    return image, target
  
  def __len__(self):
    return len(self.image_ids)