ResNet50 + multi input model + torch concate

Hello everyone,

I am working on a model to convert mono frontal image to a bird eye view.
My plan is to use ResNet50 as a features extractor for each object and combine the features with coordinates of the bounding box and the output would be the bounding box coordinate in the bird eye view. I’ve built my model using following code:

class DriverBEV(nn.Module):
    This model is designed to estimate the location of objects in the bird eye view
    using images and coordinates of objects in the frontal view
    def __init__(self):
        super(DriverBEV, self).__init__()
        # we are using ResNet50 to extract object features
        self.resnet50 = models.resnet50(pretrained=True)
        # get the second-to-last layer to extract the features
        self.resnet50 = nn.Sequential(*list(self.resnet50.children())[:-1])
        # disable gradient computation
        for param in self.resnet50.parameters():
            param.requires_grad = False
        # starting to build a fully connected net to intake the coordinates
        self.fc1 = nn.Linear(4, 64)
        self.fc2 = nn.Linear(64, 256)
        self.fc3 = nn.Linear(256,500)
        self.fc4 = nn.Linear(2548, 1024)
        self.fc5 = nn.Linear(1024, 512)
        self.fc6 = nn.Linear(512, 256)
        self.fc7 = nn.Linear(256, 128)
        self.fc8 = nn.Linear(128, 4)
        self.dropout = nn.Dropout(0.25)

    def forward(self, image, coordinates):
        # extract img's features
        features = self.resnet50(image)
        coord_enco = self.dropout(F.relu(self.fc1(coordinates)))
        coord_enco = self.dropout(F.relu(self.fc2(coord_enco)))
        coord_enco = self.dropout(F.relu(self.fc3(coord_enco)))
        merge =, coord_enco), 1)
        cood_dec = self.dropout(F.relu(self.fc4(merge)))
        cood_dec = self.dropout(F.relu(self.fc5(cood_dec)))
        cood_dec = self.dropout(F.relu(self.fc6(cood_dec)))
        cood_dec = self.dropout(F.relu(self.fc7(cood_dec)))
        cood_dec = self.fc8(cood_dec)
        return cood_dec

I am not sure if my model is correct, any suggestions/corrections would be appreciated.