I need to know what feature location every anchor box is corresponding to, so I modify the
torchvision.models.detection.rpn.AnchorGenerator in this way:
class AnchorGeneratorMP(AnchorGenerator): def __init__( self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0), ): super(AnchorGeneratorMP, self).__init__(sizes, aspect_ratios) def forward(self, image_list, feature_maps): grid_sizes = tuple([feature_map.shape[-2:] for feature_map in feature_maps]) image_size = image_list.tensors.shape[-2:] strides = tuple((image_size / g, image_size / g) for g in grid_sizes) self.set_cell_anchors(feature_maps.device) anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) anchors =  for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image =  for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) # here is the only place where I modified, except the returns anchor_len = [len(anchor) for anchor in anchors_over_all_feature_maps] anchors = [t.cat(anchors_per_image) for anchors_per_image in anchors] return anchors, anchor_len
Conforming to the
FasterRCNN implemented by pytorch, the
anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
So it is supposed to output 15 anchors per feature location. What makes me confused is that for every
anchor_len, it is only 3 times the total locations of the corresponding feature. Where am I missing?