RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x24057 and 33x512)

vimal_william · August 23, 2023, 5:20pm

Hi people,
I’m Vimal and I’m new PyTorch, I’m converting a model from tensor flow to PyTorch manually and so i constructed the model and loaded weights. During the model predict I got the error which is mentioned as title. Kindly help with me people.

I attach the model structure for reference

#construct the skeleton for PyTorch model
import torch
import torch.nn as nn

class Senic(nn.Module):
def init(self):
super().init()

    #initalize network layers
    self.conv2d_1 =       nn.Conv2d(in_channels=3, out_channels=10, kernel_size=(5,5))
    self.activation1 =    nn.Tanh()
    self.maxpool1  =      nn.MaxPool2d(kernel_size=(2,2))
    self.dropout1 =       nn.Dropout2d(p=0.4)

    self.conv2d_2 =       nn.Conv2d(in_channels=10, out_channels=96, kernel_size=(4,4))
    self.activation2 =    nn.SELU()
    self.maxpool2 =       nn.MaxPool2d(kernel_size=(2,2))
    self.dropout2 =       nn.Dropout2d(p=0.2)

    self.conv2d_3 =       nn.Conv2d(in_channels=96, out_channels=33, kernel_size=(3,3))
    self.activation3 =    nn.SELU()
    self.maxpool3 =       nn.MaxPool2d(kernel_size=(2,2))
    self.dropout3 =       nn.Dropout2d(p=0.2)
    
    self.dense1 =         nn.Linear(in_features=33, out_features=512)
    self.activation4 =    nn.ReLU()
    self.dropout4 =       nn.Dropout(p=0.2)

    self.dense2 =         nn.Linear(in_features=512, out_features=3)
    self.output =         nn.Softmax(dim=1)

def forward(self, x):

    x = self.conv2d_1(x)
    x = self.activation1(x)
    x = self.maxpool1(x)
    x = self.dropout1(x)

    x = self.conv2d_2(x)
    x = self.activation2(x)
    x = self.maxpool2(x)
    x = self.dropout2(x)

    x = self.conv2d_3(x)
    x = self.activation3(x)
    x = self.maxpool3(x)
    x = self.dropout3(x)

    x = nn.Flatten()(x)
    
    x = self.dense1(x)
    x = self.activation4(x)
    x = self.dropout4(x)

    x = self.dense2(x)
    x = self.output(x)

    return x

#pack the model
senic_model = Senic()

ptrblck · August 23, 2023, 6:56pm

What’s the input shape of your model?
Based on the error message it seems self.dense1’s in_features do not match the activation features.

vimal_william · August 24, 2023, 2:48am

I took one image for testing which is [1, 3, 240, 240] in shape

Mah_Neh · August 24, 2023, 3:24am

Figured out one way, i.e using LazyLinear, So you do not need to pass the input feature size if you do not know, mind that, even though the feature exist from a long time ago there is a warning:

Warning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development

# construct the skeleton for PyTorch model
import torch
import torch.nn as nn
from torchinfo import summary


class Senic(nn.Module):
    def __init__(self):
        super().__init__()
        # initalize network layers
        self.conv2d_1 = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=(5, 5))
        self.activation1 = nn.Tanh()
        self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2))
        self.dropout1 = nn.Dropout2d(p=0.4)

        self.conv2d_2 = nn.Conv2d(in_channels=10, out_channels=96, kernel_size=(4, 4))
        self.activation2 = nn.SELU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2))
        self.dropout2 = nn.Dropout2d(p=0.2)

        self.conv2d_3 = nn.Conv2d(in_channels=96, out_channels=33, kernel_size=(3, 3))
        self.activation3 = nn.SELU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=(2, 2))
        self.dropout3 = nn.Dropout2d(p=0.2)

        self.dense1 = nn.LazyLinear(out_features=512)  # <--- this is new
        self.activation4 = nn.ReLU()
        self.dropout4 = nn.Dropout(p=0.2)

        self.dense2 = nn.Linear(in_features=512, out_features=3)
        self.output = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv2d_1(x)
        x = self.activation1(x)
        x = self.maxpool1(x)
        x = self.dropout1(x)

        x = self.conv2d_2(x)
        x = self.activation2(x)
        x = self.maxpool2(x)
        x = self.dropout2(x)
        x = self.conv2d_3(x)
        x = self.activation3(x)
        x = self.maxpool3(x)
        x = self.dropout3(x)

        x = nn.Flatten()(x)

        x = self.dense1(x)
        x = self.activation4(x)
        x = self.dropout4(x)

        x = self.dense2(x)
        x = self.output(x)

        return x
        # pack the model

# example
image = torch.randn(1, 3, 224, 224)
senic_model = Senic()
summary(senic_model, input_data=image, verbose=2)

ptrblck · August 24, 2023, 4:04am

@Mah_Neh’s suggestion is valid, but alternatively you could also manually check the activation shape before the Flatten() module is used, which will show [batch_size, 33, 27, 27]. Your self.dense1 layer currently expects an activation with 33 input features, which will cause the error, since the activation has 33 * 27 * 27 = 24057 features. Use either the lazy layer or set in_features=24057 in self.dense1.

vimal_william · August 24, 2023, 5:29am

@ptrblck and @Mah_Neh thanks people for your suggestions. I will make those changes and test it again