How to Re-Train keypointrcnn model on custom dataset

Hi,

I have some questions about the pre-trained model keypointrcnn_resnet50_fpn from torchvision. I would like to do finetuning on my own dataset with 12 Keypoints to do single person pose keypoints detection. I’ve followed some code provided by the official documentation here, but not able to implement my own dataset and training loop because the example is mostly about segmentation.

My dataset only contains keypoints without information about visibility, so my first question is, can I train on this kind of data?

UPDATE: My code and error is in the next comment

Thank you,
Medhy

My custom dataset look like this now

class FotDataset(object):
    def __init__(self, csv_name):
        
        self.dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
        self.to_tensor = T.ToTensor()
        
        self.df = pd.read_csv(csv_name)
        if len(self.df.columns) < 3:
            self.df = pd.read_csv(csv_name, sep=';')
        
        self.df.columns = [str(i) for i in self.df.columns]
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_name = self.df.loc[idx, 'img']
        img_path = os.path.join('imgFot',img_name)
        
        image = cv2.imread(img_path)
        
        kpts = np.array(self.df.iloc[idx, 1:]).astype(np.float32).reshape([-1, 3]) # [x, y, visibility]
        h, w = image.shape[:2]
        
        if w < 640:
            image = imutils.resize(image, width=640)
        
        boxes = np.array([0, 0, w, h])
        labels = np.zeros((1, 1), dtype=np.float)
        
        target = {}
        target["boxes"] = torch.from_numpy(boxes).type(self.dtype)
        target["labels"] = torch.from_numpy(labels).type(self.dtype)
        target["keypoints"] = torch.from_numpy(kpts).type(self.dtype)
        
        img = T.ToPILImage()(image).convert('RGB')
        img = self.to_tensor(img)        

        return img, target

And I’m getting the model:

def get_model():
    is_available = torch.cuda.is_available()
    device = torch.device('cuda:0' if is_available else 'cpu')
    dtype = torch.cuda.FloatTensor if is_available else torch.FloatTensor
    model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True, min_size=640)
    
    for param in model.parameters():
        param.requires_grad = False
    
    out = nn.ConvTranspose2d(512, 12, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    model.roi_heads.keypoint_predictor.kps_score_lowres = out
    
    return model, device, dtype

To test the training pipeline:

for img, target in train_loader:
    img = img.type(dtype).to(device)
    
    print(img.size())
    
    boxes = target['boxes']
    labels = target['labels']
    keypoints = target['keypoints']

    print(model(img, [target]))

But I got this error:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-25-62c932082619> in <module>
      8     keypoints = target['keypoints']
      9 
---> 10     print(model(img, [target]))
     11 

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
     50             features = OrderedDict([(0, features)])
     51         proposals, proposal_losses = self.rpn(images, features, targets)
---> 52         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
     53         detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
     54 

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/roi_heads.py in forward(self, features, proposals, image_shapes, targets)
    527         """
    528         if self.training:
--> 529             proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
    530 
    531         box_features = self.box_roi_pool(features, proposals, image_shapes)

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/roi_heads.py in select_training_samples(self, proposals, targets)
    452         matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
    453         # sample a fixed proportion of positive-negative proposals
--> 454         sampled_inds = self.subsample(labels)
    455         matched_gt_boxes = []
    456         num_images = len(proposals)

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/roi_heads.py in subsample(self, labels)
    417 
    418     def subsample(self, labels):
--> 419         sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
    420         sampled_inds = []
    421         for img_idx, (pos_inds_img, neg_inds_img) in enumerate(

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/_utils.py in __call__(self, matched_idxs)
     54 
     55             pos_idx_per_image = positive[perm1]
---> 56             neg_idx_per_image = negative[perm2]
     57 
     58             # create binary mask from indices

IndexError: index 3764 is out of bounds for dimension 0 with size 2001

I really need some help to fix this and be able to retrain this model on my data. Even if you are stuck too, any thoughts will be appreciated.

adding this collate function in data loaders solved my problem:

def collate_fn(batch):
    return tuple(zip(*batch))