RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Coda · July 20, 2022, 5:58am

I have changed in_features=73984 in the first linear layer, but it still can’t work. Please help me find out where the problem is.

class AlexNet(nn.Module):
    def __init__(self, num_classes=2, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=3, stride=3, padding=2),  # input[3, 32, 32]  output[96, 12, 12]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1),  # output[96, 10, 10]
            nn.BatchNorm2d(96),
            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # output[256, 10, 10]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1),  # output[256, 8, 8]
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # output[384, 8, 8]
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # output[384, 8, 8]
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # output[256, 8, 8]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1),  # output[256, 6, 6],
        )
        self.classifier = torch.nn.Sequential(
            nn.Linear(in_features=73984, out_features=4096, ),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

ptrblck · July 20, 2022, 6:01am

Are you not passing samples with the same input shape to the model?
The new error complains now about an activation shape of [256, 9216] which fits the previous model architecture (while previously your activation was [24, 73984])?

Coda · July 20, 2022, 10:03am

Thanks pal, I made a stupid mistake.

Harry-KIT · July 21, 2022, 12:34am

Hi @ptrblck
I solved it like that

thank you

Pravin · August 2, 2022, 8:26pm

Hi @ptrblck

I received the same error mat1 and mat2 shapes cannot be multiplied (65536x8 and 18432x1024) Can you please let me know where is the issue?

Below is my discriminator

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        # self.input_shape = input_shape
        # in_height, in_width = self.input_shape
        # patch_h, patch_w = int(in_height / 2 ** 4), int(in_width / 2 ** 4)
        # self.output_shape = (1, patch_h, patch_w)
        self.linear_size = ((96 // (2 ** (3 + 1))) ** 2) * (64 * (2 ** 3))
        self.features = nn.Sequential(
            # input size. (3) x 96 x 96
            nn.Conv2d(3, 64, (3, 3), (1, 1), (1, 1), bias=True),
            nn.LeakyReLU(0.2, True),
            # state size. (64) x 48 x 48
            nn.Conv2d(64, 64, (3, 3), (2, 2), (1, 1), bias=False),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(64, 128, (3, 3), (1, 1), (1, 1), bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, True),
            # state size. (128) x 24 x 24
            nn.Conv2d(128, 128, (3, 3), (2, 2), (1, 1), bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(128, 256, (3, 3), (1, 1), (1, 1), bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, True),
            # state size. (256) x 12 x 12
            nn.Conv2d(256, 256, (3, 3), (2, 2), (1, 1), bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(256, 512, (3, 3), (1, 1), (1, 1), bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, True),
            # state size. (512) x 6 x 6
            nn.Conv2d(512, 512, (3, 3), (2, 2), (1, 1), bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, True),

            # nn.Linear(512 * 8 * 8, 1024),
            # nn.AdaptiveAvgPool2d((8,8)),
        
        
            nn.Linear(512 * 6 * 6, 1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 1),
            # nn.Conv2d(1024, 1, kernel_size=1),
            nn.Sigmoid(),
        )

        
    def forward(self, x):
        out = self.features(x)
        # out = torch.flatten(out, 1)
        # out = self.classifier(out)

        return out

ptrblck · August 2, 2022, 10:00pm

The error is raised in:

nn.Linear(512 * 6 * 6, 1024)

as this layer expects an activation input with 18432 input features.
Based on your code I guess you are missing an nn.Flatten module before this linear layer, which might directly solve the issue.

Pravin · August 3, 2022, 9:22pm

yeah, Thank you @ptrblck . Its working now.

But I’m getting the discriminator loss decreased to zero. Can you please let me know how can I fix this

Pravin · August 3, 2022, 11:20pm

@ptrblck I trained the model with BCELogitsLoss as loss function with learning rate as 0.4 for Generator and 0.2 for discriminator using Adam optimizer. But the discriminator loss goes to zero after 18 epochs. Any suggestions on how can i fix the issue? I have commented the sigmoid layer in the discriminator.

ptrblck · August 3, 2022, 11:55pm

No, unfortunately I don’t know any specific tips I could share for GAN training and how to improve the generator in your case.

Aditya_Aryan · August 21, 2022, 6:46pm

Hello, I’m working on a project and received the same error:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 512x32)
I’m new to PyTorch and DL. Any help would be appreciated.

import torch.nn as nn
class MLP(nn.Module):
def init(self, num_features, num_classes, num_hidden):
super(MLP, self).init()

    #### YOUR CODE STARTS HERE ####
    
    # define a linear layer with output channels as 32
    self.hidden = nn.Linear(num_hidden, 32)
    # Define a ReLU activation
    self.relu = torch.nn.ReLU()
    # define a linear layer with output features corresponding to the number of classes
    self.classifier = nn.Linear(32, num_classes)
    
    #### YOUR CODE ENDS HERE ####

def forward(self, x):
    # Use the layers defined above in a sequential way (follow the same as the layer definitions above) and 
    # write the forward pass, use a relu activation after the hidden layer
    
    #### YOUR CODE STARTS HERE ####
    x = torch.tensor(x).unsqueeze(dim=0)
    self.layers = nn.Sequential(
        self.hidden,
        self.relu,
        self.classifier
    )
    out = self.layers(x)
    #### YOUR CODE ENDS HERE ####
    
    return out

ptrblck · August 21, 2022, 9:35pm

The error message is most likely raised in a linear layer as was already described before in this topic. Check which layer is raising the error, make sure the right activation is passed to it, and change the in_features of the linear layer in case they are set to a wrong value.
Based on your code it also seems like an assignment/homework and I would be hesitant to solve it for your directly.

Aditya_Aryan · August 22, 2022, 8:21am

From what I’ve read on the internet, MLP consists of 3 or more layers, 1 input, 1 hidden, and 1 output. But in this case, there is only 1 hidden layer and 1 output layer. I am a bit confused regarding the in_features and out_features of the hidden layer, should it be (num_hidden, num_featuresnum_samples32) or just(num_hidden, 32)?

ptrblck · August 22, 2022, 8:31am

The number of samples (from your dataset) does not define the input or output features of the model or any other part of the model architecture.

Aditya_Aryan · August 22, 2022, 6:17pm

I am still confused regarding the input and output features of the hidden layer. And what is a 1x1 matrix in the error? Any help would be appreciated.

ptrblck · August 25, 2022, 7:41am

The 1x1 shape in the error message is the activation shape of the tensor you are passing to the linear layer. I.e. it’s a tensor with a single sample and with a feature dimension of 1 while the linear layer expects a feature dimension of the incoming tensor of 512.

Sufiyan_Mukadam · August 27, 2022, 7:08am

This is the code and i am getting error of mat1 and mat2 shapes cannot be multiplied (128x18432 and 4608x256). Please Help.

class CNN(nn.Module):
def init(self):
super(CNN,self).init()

    self.conv1 = nn.Sequential(
                    nn.Conv2d(3, 32, 3, stride=1, padding=1),
                    nn.BatchNorm2d(32),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv2 = nn.Sequential(
                    nn.Conv2d(32, 64, 3, stride=1, padding=1),
                    nn.BatchNorm2d(64),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv3 = nn.Sequential(
                    nn.Conv2d(64, 128, 3, stride=1, padding=1),
                    nn.BatchNorm2d(128),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv4 = nn.Sequential(
                    nn.Conv2d(128, 256, 3, stride=1, padding=1),
                    nn.BatchNorm2d(256),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv5 = nn.Sequential(
                    nn.Conv2d(256, 512, 3, stride=1, padding=1),
                    nn.BatchNorm2d(512),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.fc=nn.Sequential(
            nn.Linear(512*3*3, 256),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(256),
            nn.Dropout(0.4),
            nn.Linear(256, num_classes))
    
def forward(self,x):
    x=self.conv1(x)
    x=self.conv2(x)
    x=self.conv3(x)
    x=self.conv4(x)
    x=self.conv5(x)

print(x.shape)

    x=x.view(x.shape[0],-1)
    x=self.fc(x)
    return x

ptrblck · August 27, 2022, 7:23am

The error is raised in the first linear layer of self.fc which expects 512*3*3=4608 features in the input activation, while your input activation x has 18432 features after the flattening operation via view.
Change the in_features of the first nn.Linear layer to 18432 and it should work.

Sufiyan_Mukadam · September 27, 2022, 5:53pm

class CNN(nn.Module):
def init(self):
super(CNN,self).init()

    self.conv1 = nn.Sequential(
                    nn.Conv2d(3, 32, 3, stride=1, padding=1),
                    nn.BatchNorm2d(32),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv2 = nn.Sequential(
                    nn.Conv2d(32, 64, 3, stride=1, padding=1),
                    nn.BatchNorm2d(64),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv3 = nn.Sequential(
                    nn.Conv2d(64, 128, 3, stride=1, padding=1),
                    nn.BatchNorm2d(128),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv4 = nn.Sequential(
                    nn.Conv2d(128, 256, 3, stride=1, padding=1),
                    nn.BatchNorm2d(256),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.conv5 = nn.Sequential(
                    nn.Conv2d(256, 512, 3, stride=1, padding=1),
                    nn.BatchNorm2d(512),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2))
    
    self.fc=nn.Sequential(
            nn.Linear(512*6*6, 256),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(256),
            nn.Dropout(0.4),
            nn.Linear(256, num_classes))
    
def forward(self,x):
    x=self.conv1(x)
    x=self.conv2(x)
    x=self.conv3(x)
    x=self.conv4(x)
    x=self.conv5(x)

print(x.shape)

    x=x.view(x.shape[0],-1)
    x=self.fc(x)
    return x

from torch.autograd import Variable
num_epochs = 10
def train(num_epochs, cnn, loaders):

cnn.train()
    
# Train the model
total_step = len(loaders['training'])
    
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(loaders['training']):
        
        # gives batch data, normalize x when iterate train_loader
        b_x = Variable(images)   # batch x
        b_y = Variable(labels)   # batch y
        b_x, b_y = b_x.cuda(), b_y.cuda()
        output = cnn(b_x)[0]
        print(output)               
        loss = loss_func(output, b_y)
        
        # clear gradients for this training step   
        optimizer.zero_grad()           
        
        # backpropagation, compute gradients 
        loss.backward()    
        # apply gradients             
        optimizer.step()                
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
            pass
    
    pass


pass

train(num_epochs, cnn, loaders)

RuntimeError: size mismatch (got input: [2], target: [128])
Batch size=128 and Input shape is 192X192X3
Please help

Sufiyan_Mukadam · September 27, 2022, 5:56pm

Batch size=128 and Input shape is 192X192X3

srishti-git1110 · September 27, 2022, 6:12pm

Please post the exact line that gives the error.