Data formats for training FasterRCNN detection

I am trying to train FasterRCNN detection.

As per the notes I am feeding in a list of images and an associated label dict with boxes and labels into the forward pass.

My model was created as:

import torchvision.models.detection as models
model_test = models.__dict__['fasterrcnn_resnet50_fpn'](pretrained=False)

model_test.train()

For images I created some dummies:

x = [torch.rand(3, 300, 400), torch.rand(3, 300, 400)]
a = torch.tensor([[100,100,200,200],[100,100,250,200]], dtype=torch.float64)
b = torch.tensor([1,2],dtype=torch.int64)
targets = [{'boxes': a, 
       'labels':  b},
      {'boxes': a, 
       'labels':  b}]

when I enter this data into the model:

losses = model_test(images = x, targets=targets)

I get:

    RuntimeErrorTraceback (most recent call last)
<ipython-input-31-fbfe5b543968> in <module>
----> 1 losses = model_test(images = x, targets=targets)

/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

/opt/conda/lib/python3.6/site-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
     49         if isinstance(features, torch.Tensor):
     50             features = OrderedDict([(0, features)])
---> 51         proposals, proposal_losses = self.rpn(images, features, targets)
     52         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
     53         detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)

/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

/opt/conda/lib/python3.6/site-packages/torchvision/models/detection/rpn.py in forward(self, images, features, targets)
    413         losses = {}
    414         if self.training:
--> 415             labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
    416             regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
    417             loss_objectness, loss_rpn_box_reg = self.compute_loss(

/opt/conda/lib/python3.6/site-packages/torchvision/models/detection/rpn.py in assign_targets_to_anchors(self, anchors, targets)
    272         for anchors_per_image, targets_per_image in zip(anchors, targets):
    273             gt_boxes = targets_per_image["boxes"]
--> 274             match_quality_matrix = self.box_similarity(gt_boxes, anchors_per_image)
    275             matched_idxs = self.proposal_matcher(match_quality_matrix)
    276             # get the targets corresponding GT for each proposal

/opt/conda/lib/python3.6/site-packages/torchvision/ops/boxes.py in box_iou(boxes1, boxes2)
    130     area2 = box_area(boxes2)
    131 
--> 132     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    133     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
    134 

RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 'other'

Am I missing a field or a dimension?

1 Like

Try using

a = torch.tensor([[100,100,200,200],[100,100,250,200]], dtype=torch.float)