Need some help on using the new KeyPointRCNN

Hi !

I’m trying to create a model for hand keypoints detection, using this dataset.

In the first place, I just wanted to test the pipeline so here’s what I’m doing:

import torch 
import torchvision 


# create model 
predictor = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained = True,
                    num_classes = 0, num_keypoints = 21, pretrained_backbone = True)

# create image placeholder
im = torch.rand(3,64,64, dtype = torch.float32)

# create keypoints placeholder
kp = torch.rand(1,21,3, dtype = torch.float32)
kp[:,:,-1] = 1.

# Making dict
# I just want to focus on keypoints detection, so I'm setting the box to be the full image size and to belong to the single class. 
targets = {'boxes':  torch.tensor([[0,0,64,64]], dtype = torch.float32),
            'labels': torch.zeros(1,1, dtype = torch.int64),
            'keypoints': kp}


model([im], [targets])

I get the following error:
IndexError: index 3037 is out of bounds for dimension 0 with size 2001

Could someone explain ?
Thanks a lot !

I have some problems to re-train this model too, any changes in your code since this post ?

Unfortunately, no. I moved to other topic waiting for this to be fixed.

I don’t know if this can help you, but here is a piece of my code with a different error.

I’m getting the model for predicting 12 keypoints like this:

def get_model():
    is_available = torch.cuda.is_available()
    device = torch.device('cuda:0' if is_available else 'cpu')
    dtype = torch.cuda.FloatTensor if is_available else torch.FloatTensor
    model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
    
    for param in model.parameters():
        param.requires_grad = False
    
    out = nn.ConvTranspose2d(512, 12, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    model.roi_heads.keypoint_predictor.kps_score_lowres = out
    
    return model, device, dtype

I’m Getting Data from custom dataset class like this:

def __getitem__(self, idx):
        img_name = self.df.loc[idx, 'img']
        img_path = os.path.join('imgFot',img_name)
        
        #image = PIL.Image.open(img_path)
        image = cv2.imread(img_path)
        
        # if image has an alpha color channel, get rid of it
        if(image.shape[2] == 4):
            image = image[:,:,0:3]
        
        kpts = np.array(self.df.iloc[idx, 1:]).astype(np.float32).reshape([-1, 3]) # [x, y, visibility]
        
        xmin = np.min(kpts[:,0])
        ymin = np.min(kpts[:,1])
        xmax = np.max(kpts[:,0])
        ymax = np.max(kpts[:,1])
        
        image = image[int(ymin-10):int(ymax+10), int(xmin-10):int(xmax+10)]
        h, w = image.shape[:2]
        
        boxes = np.array([0, 0, w, h]) #take the whole image
                
        target = {}
        target["boxes"] = torch.from_numpy(boxes)
        target["labels"] = torch.ones((1,), dtype=torch.int64)
        target["keypoints"] = torch.from_numpy(kpts)
        
        
        img_copy = image.copy()
        img_copy = cv2.cvtColor(img_copy, cv2.COLOR_BGR2RGB)
        
        img = PIL.Image.fromarray(img_copy)
        img = self.to_tensor(img)
        
        return img, target

then to test the pipeline:

model, device, dtype = get_model()
model.to(device)
train_set = FotDataset(csv_name='test.csv')
train_loader = DataLoader(train_set, batch_size=1, num_workers=2, shuffle=True)

test = train_set[0]


img = test[0].to(device).type(dtype)
target = test[1]

model([img], [target])

got this error:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-116-63d81987447a> in <module>
      2 target = test[1]
      3 
----> 4 model([img], [target])

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
     45             raise ValueError("In training mode, targets should be passed")
     46         original_image_sizes = [img.shape[-2:] for img in images]
---> 47         images, targets = self.transform(images, targets)
     48         features = self.backbone(images.tensors)
     49         if isinstance(features, torch.Tensor):

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/transform.py in forward(self, images, targets)
     38                                  "of shape [C, H, W], got {}".format(image.shape))
     39             image = self.normalize(image)
---> 40             image, target = self.resize(image, target)
     41             images[i] = image
     42             if targets is not None:

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/transform.py in resize(self, image, target)
     72 
     73         bbox = target["boxes"]
---> 74         bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
     75         target["boxes"] = bbox
     76 

~/anaconda3/lib/python3.7/site-packages/torchvision-0.3.0a0+427633a-py3.7-macosx-10.7-x86_64.egg/torchvision/models/detection/transform.py in resize_boxes(boxes, original_size, new_size)
    133     ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(new_size, original_size))
    134     ratio_height, ratio_width = ratios
--> 135     xmin, ymin, xmax, ymax = boxes.unbind(1)
    136     xmin = xmin * ratio_width
    137     xmax = xmax * ratio_width

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
1 Like

Try to build a dataloader and add this collate function:

def collate_fn(batch):
    return tuple(zip(*batch))

if you look at the output of DataLoader in this colab notebook from pytorch doc , the output is in a kind of big tuple.

Check out my code for fine tuning the keypointrcnn_resnet50_fpn model for detecting 2 keypoints.


It works!