RuntimeError: Sizes of tensors must match except in dimension 1. Got 2208 and 512 (The offending index is 0)

Hi!

I am trying to combine two pre-trained models. Unfortunately, I get the error message specified in the title, and I do not understand. it Could you please help me figure out what am I doing wrong?

Thank you very much!

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        
        self.model_1 = models.resnet18(pretrained = True)
        self.model_2 = models.densenet161(pretrained = True)
        for param in self.model_1.parameters():
            param.requires_grad = False
        for param in self.model_2.parameters():
            param.requires_grad = False
            
        self.feature1=nn.Sequential(*list(self.model_1.children())[:-1])
        self.feature2=nn.Sequential(*list(self.model_2.children())[:-1])
        self.fc1=nn.Linear(256,128)
        self.act = nn.ReLU()
        self.fc2=nn.Linear(128,81)
    
    def forward (self,x):
        y1=self.feature1(x)
        y2=self.feature2(x)
#         y2 = F.relu(y2)
#         y2 = F.adaptive_avg_pool2d(y2, (1, 1))
#         y2 = y2.view(y2.size(0), -1)
        y3=torch.cat((y1,y2),2)
        y3=y3.view(y3.size(0),-1)
        y3=self.fc1(y3)
        y3=self.act(y3)
        y3=self.fc2(y3)
        return y3
    
final_model=Net()
        
criterion = nn.CrossEntropyLoss()
optimizer= optim.Adam(final_model.parameters(), lr = 0.001)

if torch.cuda.is_available():
    final_model.cuda()        
        

I would not recommend to wrap random models into an nn.Sequential container, since you would lose all operations from the functional API in the forward method.
While your approach might work for the resnet, the densenet would miss these ops.

The better way would be to either derive your custom modules or replace the classifier layer with e.g. nn.Identity.

This code should work:

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        
        self.model_1 = models.resnet18(pretrained = True)
        self.model_2 = models.densenet161(pretrained = True)
        for param in self.model_1.parameters():
            param.requires_grad = False
        for param in self.model_2.parameters():
            param.requires_grad = False
        
        self.model_1.fc = nn.Identity()
        self.model_2.classifier = nn.Identity()
        
        self.feature1 = self.model_1
        self.feature2 = self.model_2
        self.fc1=nn.Linear(512+2208, 128)
        self.act = nn.ReLU()
        self.fc2=nn.Linear(128, 81)
    
    def forward (self,x):
        y1 = self.feature1(x)
        y2 = self.feature2(x)
        y3 = torch.cat((y1, y2), 1)
        y3 = self.fc1(y3)
        y3 = self.act(y3)
        y3 = self.fc2(y3)
        return y3
    
final_model = Net()
x = torch.randn(2, 3, 224, 224)
out = final_model(x)

Thank you very much @ptrblck!! As always, you’re super helpful! Have a great day!