I’m trying to implement Faster R-CNN with ResNet50 as backbone. This is the class of my model:
class MyFasterRCNN(nn.Module):
def __init__(self, num_classes=5):
super(MyFasterRCNN, self).__init__()
backbone = resnet50(pretrained=False)
backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
backbone_with_fpn = BackboneWithFPN(
backbone,
return_layers={'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'},
in_channels_list=[256, 512, 1024, 2048],
out_channels=256
)
# Define the Region Proposal Network (RPN) anchor generator
rpn_anchor_generator = AnchorGenerator(
sizes=((16, 64, 128, 256, 512),), # Added a larger size
aspect_ratios=((0.1, 0.5, 1.0, 2.0, 4.0),) * 5 # Added a larger aspect ratio for extremely tall objects
)
# Define the ROI Pooling feature extractor
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0', '1', '2', '3'],
output_size=7,
sampling_ratio=2
)
# Create the Faster R-CNN model
self.model = FasterRCNN(
backbone_with_fpn,
num_classes=num_classes,
rpn_anchor_generator=rpn_anchor_generator,
box_roi_pool=roi_pooler
)
def forward(self, x):
if len(x) == 2:
images = [ img for img in x[0] ]
targets = [ element for element in x[1] ]
return self.model(images, targets)
else:
return self.model(x)
My images are single channel so I have updated the first layer of the resnet model.
This debug log displays the input values of a batch with batch size set to 2 (it seems to me that the input is what the model is expecting)
The images are shrinked to 512x512 before been fed to the model
images
[tensor([[[-2.1194, -2.1779, -2.2365, …, -1.7095, -1.3972, -1.1435], [-2.3…7532, …, -0.4799, -0.4995, -0.5190]]]), tensor([[[-0.5775, -0.5580, -0.5190, …, -0.7337, -0.7532, -0.7922], [0.5…6705, …, -2.2560, -2.2365, -2.2169]]])]
len(images)
2
images[0].shape
torch.Size([1, 512, 512])
images[1].shape
torch.Size([1, 512, 512])
targets
[{‘boxes’: tensor([[310., 131., 378., 133.]]), ‘labels’: tensor([4])}, {‘boxes’: tensor([[104., 67., 126., 438.], [376., 105., 387., 437.]]), ‘labels’: tensor([1, 1])}]
This is the error i get:
File "C:\Users\hidri\MLA\AM04\src\models\fasterrcnn.py", line 55, in forward
return self.model(images, targets)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hidri\MLA\AM04\src\modules\object_detection_module.py", line 35, in forward
return self.model(x)
^^^^^^^^^^^^^
File "C:\Users\hidri\MLA\AM04\src\modules\object_detection_module.py", line 39, in training_step
predictions = self((images, targets))
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hidri\MLA\AM04\src\train_object_detection.py", line 83, in <module> trainer.fit(model=module, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)
RuntimeError: Given groups=1, weight of size [64, 1, 7, 7], expected input[2, 3, 800, 800] to have 1 channels, but got 3 channels instead
Can someone help me to understand what I’m missing?