Classification model with mixed input types

Good afternoon,

I’m building a binary classification model based on 2 inputs: images and numeric data.
I’ve encountered a PyTorch tutorial on building such a model and built my model in a similar way:

class MixedNetwork(nn.Module):
    def __init__(self):
        super(MixedNetwork, self).__init__()
        
        image_modules = list(models.resnet50().children())[:-1]
        self.image_features = nn.Sequential(*image_modules)

        self.landmark_features = nn.Sequential(
            nn.Linear(in_features=96, out_features=192,bias=False), 
            nn.ReLU(inplace=True), 
            nn.Dropout(p=0.25),
            nn.Linear(in_features=192,out_features=1000,bias=False), 
            nn.ReLU(inplace=True), 
            nn.Dropout(p=0.25))
        
        self.combined_features = nn.Sequential(
            nn.Linear(3048, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32,1))
        
    def forward(self, image, landmarks):
        a = self.image_features(image)
        # print("shape of a", a.shape)
        b = self.landmark_features(landmarks)
        # print("shape of b", b.shape)
        x = torch.cat((a.view(a.size(0), -1), b.view(b.size(0), -1)), dim=1)
        x = self.combined_features(x)
        x = torch.sigmoid(x)
        return x

How can I make sure that the model is using pre-trained resnet50 weights?

Specify the weights in models.resnet50 to the desired set of pretrained weights or use the older pretrained=True argument to use the pretrained resnet.

Thanks a lot!

One more question - I tried another model, vgg16, but there was the following mistake:

RuntimeError                              Traceback (most recent call last)
<ipython-input-17-8fdc3c23039e> in <module>
     11 
     12 trainer = Trainer(criterion,optimizer)
---> 13 trainer.fit(model, train_dataloader, val_dataloader,epochs = epochs)

7 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x26088 and 3048x32)

What needs to be changed to make it work?

torchvision.models.vgg16 uses an adaptive pooling layer to avoid shape mismatch errors in the linear layer. Could you describe if and what you’ve changed in the model which raises the shape mismatch?

I didn’t change anything but the model name.
The line:

image_modules = list(models.resnet50().children())[:-1]

just put vgg16 instead of resnet50, tried to run it without any changes.

Data dimensions are:
images = torch.Size([1, 3, 224, 224])
numeric data = torch.float32 torch.Size([1, 96])

It’s unclear to me how you are executing the model, so please post a minimal and executable code snippet to reproduce the error.
Based on the already posted code snippet I would assume you are breaking the forward pass by trying to wrap some modules into an nn.Sequential container without making sure the functional API calls from the original model’s forward method are also used.

Here’s the training functions:

class Trainer():
    
    def __init__(self,criterion = None,optimizer = None,schedular = None):
        
        self.criterion = criterion
        self.optimizer = optimizer
        self.schedular = schedular
    
    def train_batch_loop(self,model,train_dataloader):
        
        train_loss = 0.0
        train_acc = 0.0
        metric = BinaryAccuracy().to(device)
                
        for sample in train_dataloader:
            # move the data to CPU
            images = sample["image"].type(torch.cuda.DoubleTensor).to(device)
            landmarks = sample["landmarks"].type(torch.cuda.DoubleTensor).to(device)
            labels = sample["labels"].type(torch.cuda.DoubleTensor).to(device)
                    
            self.optimizer.zero_grad()
            logits = model(images, landmarks)
            loss = self.criterion(logits, labels)
            
            loss.backward()
            self.optimizer.step()
            
            train_loss += loss.item()
            # train_acc += accuracy(logits, labels)
            train_acc += metric(logits, labels)
            
        return train_loss / len(train_dataloader), train_acc / len(train_dataloader) 

    
    def valid_batch_loop(self,model,val_dataloader):
        
        valid_loss = 0.0
        valid_acc = 0.0
        metric = BinaryAccuracy().to(device)
        
        for sample in val_dataloader:
            # move the data to CPU
            images = sample["image"].type(torch.cuda.DoubleTensor).to(device)
            landmarks = sample["landmarks"].type(torch.cuda.DoubleTensor).to(device)
            labels = sample["labels"].type(torch.cuda.DoubleTensor).to(device)

            self.optimizer.zero_grad()
            logits = model(images, landmarks)
            loss = self.criterion(logits,labels)
            
            valid_loss += loss.item()
            valid_acc += metric(logits, labels)
            
        return valid_loss / len(val_dataloader), valid_acc / len(val_dataloader)
            
        
    def fit(self,model,trainloader,validloader,epochs):
        
        valid_min_loss = np.Inf 
        
        for i in range(epochs):
            
            model.train() # this turn on dropout
            avg_train_loss, avg_train_acc = self.train_batch_loop(model,trainloader) ###
            # print("Epoch : {} Train Loss : {:.6f} Train Acc : {:.6f}".format(i+1, avg_train_loss, avg_train_acc))


            model.eval()  # this turns off the dropout lapyer and batch norm
            avg_valid_loss, avg_valid_acc = self.valid_batch_loop(model,validloader) ###
            
            if avg_valid_loss <= valid_min_loss :
                print("Valid_loss decreased {} --> {}".format(valid_min_loss,avg_valid_loss))
                torch.save(model.state_dict(),'ColabCatPainModel.pt')
                valid_min_loss = avg_valid_loss

                
            print("Epoch : {} Train Loss : {:.6f} Train Acc : {:.6f}".format(i+1, avg_train_loss, avg_train_acc))
            print("Epoch : {} Valid Loss : {:.6f} Valid Acc : {:.6f}".format(i+1, avg_valid_loss, avg_valid_acc))

Execution:

model = MixedNetwork()
model.double()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters() , lr = 0.0001)
epochs = 10
batch_size = 10

trainer = Trainer(criterion,optimizer)
trainer.fit(model, train_dataloader, val_dataloader,epochs = epochs)

@ptrblck

Your code is unfortunately not executable, so please update it so that I could run it without any dataset dependencies to reproduce the issue and debug it further.

1 Like