RuntimeError: size mismatch, m1: [20 x 2048], m2: [25088 x 4096] at /opt/conda/conda-bld/pytorch_1565272269120/work/aten/src/THC/generic/THCTensorMathBlas.cu:273

import os
from PIL import Image
# import numpy as np
from torchvision import transforms
import torch.nn.functional as F
import pandas as pd
import torch
import torch.utils.data as utils
import torch.nn as nn
from torchsummary import summary



## list of images
Images_train = []
Images_val = []

############### IF IT'S A CAT, it's '0' and IF IT'S A DOG, it's '1' #######################

# Labels = []



## defining required transforms or preprocessing on the images
data_transforms = transforms.Compose([
        transforms.RandomResizedCrop(64),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])


## Get the list of all images 
        # files = os.listdir("dataset/val/cats")
        # files.remove(".DS_Store")
        # print(files[0])



## reading the images applying the transformations, converting each of them to pytorch tensors and storing them in images list
        # for i in files:
        # 	image = os.path.join("dataset/val/cats",i)
        # 	im = Image.open(image)
        # 	# imm = np.asarray(im)
        # 	im = data_transforms(im)
        # 	# Images.append(im)
        # 	# Labels.append(0)
        # 	Images_val.append([im, 0])

# print(len(Images_val[0]))
# print(len(Images_val[0][0]))
# print(len(Images_val[0][0][0]))

#### after applying the transforms, input shape: 32*32*3


files = os.listdir("dataset/train/cats")
files.remove(".DS_Store")
print(files[0])


for i in files:
	image = os.path.join("dataset/train/cats",i)
	im = Image.open(image)
	# imm = np.asarray(im)
	im = data_transforms(im)
	# Images.append(im)
	# Labels.append('cat')
	Images_train.append([im, 0])


        # files = os.listdir("dataset/val/dogs")
        # files.remove(".DS_Store")
        # print(files[0])


        # for i in files:
        # 	image = os.path.join("dataset/val/dogs",i)
        # 	im = Image.open(image)
        # 	# imm = np.asarray(im)
        # 	im = data_transforms(im)
        # 	# Images.append(im)
        # 	# Labels.append('dog')
        # 	Images_val.append([im, 1])


files = os.listdir("dataset/train/dogs")
files.remove(".DS_Store")
print(files[0])



for i in files:
	image = os.path.join("dataset/train/dogs",i)
	im = Image.open(image)
	# imm = np.asarray(im)
	im = data_transforms(im)
	# Images.append(im)
	# Labels.append('dog')
	Images_train.append([im, 1])




# df = pd.DataFrame(Images, columns=['Image', 'Label'])
# print(df.head())

# print(Images_val[0])
# print(Images_val[0][0])
# print(Images_val[0][0][0])

print("Number of training samples: ", len(Images_train), "\n")


batch_size = 20
# n_iters = 800
# num_epochs = n_iters / (len(Images_train) / batch_size)
# num_epochs = 10
#tensor_x = torch.stack([torch.Tensor(i) for i in Images_val])
train_loader = torch.utils.data.DataLoader(dataset=Images_train, 
                                           batch_size=batch_size, 
                                           shuffle=True)

        # test_loader = torch.utils.data.DataLoader(dataset=Images_val, 
        #                                           batch_size=batch_size, 
        #                                           shuffle=False)


class VGG(torch.nn.Module):
    
    #Our batch shape for input x is (3, 64, 64)
    
    def __init__(self):
        super(VGG, self).__init__()
        
        #Input channels = 3, output channels = 96
        self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = torch.nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv5 = torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv6 = torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.conv7 = torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv8 = torch.nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv9 = torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.conv10 = torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.pool4 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv11 = torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.conv12 = torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.conv13 = torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.pool5 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.fc1 = torch.nn.Linear(512*7*7, 4096)
        self.fc2 = torch.nn.Linear(4096, 4096)
        self.fc3 = torch.nn.Linear(4096, 2)

        
    
    def forward(self, x):        
        
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.pool2(x)

        x = self.conv5(x)
        x = F.relu(x)
        x = self.conv6(x)
        x = F.relu(x)
        x = self.conv7(x)
        x = F.relu(x)
        x = self.pool3(x)

        x = self.conv8(x)
        x = F.relu(x)
        x = self.conv9(x)
        x = F.relu(x)
        x = self.conv10(x)
        x = F.relu(x)
        x = self.pool4(x)

        x = self.conv11(x)
        x = F.relu(x)
        x = self.conv12(x)
        x = F.relu(x)
        x = self.conv13(x)
        x = F.relu(x)
        x = self.pool5(x)

        x = torch.flatten(x, 1)

        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

        return(x)


model = VGG()



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)


# summary(model, (3, 224, 224))


criterion = nn.CrossEntropyLoss()

learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

iter = 0
for epoch in range(0, 20):
    print("epoch: ",epoch)
    for i, (images, labels) in enumerate(train_loader):
        # Load images
        # images = images.requires_grad_()
        images = images.requires_grad_().to(device)
        labels = labels.to(device)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1
        # print(iter)

        if iter % 200 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Load images
                # images = images.requires_grad_()
                images = images.requires_grad_().to(device)
                labels = labels.to(device)

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                # correct += (predicted == labels).sum()
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()


            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))
            # print("hello")

RuntimeError:
as explained in the heading.
I have no clue about the error. I am a student, learning. Please help me to understand where did I go wrong. Thank You.

The problem comes from your first nn.Linear layer, which has input dimension 512 * 7 * 7 = 25088 and should have 2048, according to your error at least.

Where does the 512 * 7 * 7 come from?

I not an expert but I think the first linear layer takes input from the last pooling layer. Now, I’ve hand calculated the output of last pooling layer and it is coming out as 512 * 7 * 7. Size of each feature map is 7 * 7 and 512 such feature maps.
Input to first layer is : 224 * 224 * 3

Maybe I’m wrong, but this is my calculation:

Output dimension of each layer:
Conv L1: 224 * 224 * 64
Conv L2: 224 * 224 * 64
Pool L1: 112 * 112 * 64

Conv L3: 112 * 112 * 128
Conv L4: 112 * 112 * 128
Pool L2: 56 * 56 * 128

Conv L5: 56 * 56 * 256
Conv L6: 56 * 56 * 256
Conv L7: 56 * 56 * 256
Pool L3: 28 * 28 * 256

Conv L8: 28 * 28 * 512
Conv L9: 28 * 28 * 512
Conv L10: 28 * 28 * 512
Pool L4: 14 * 14 * 512

Conv L11: 14 * 14 * 512
Conv L12: 14 * 14 * 512
Conv L13: 14 * 14 * 512
Pool L5: 7 * 7 * 512

Output of Conv Layer: (I + 2*P - K)/S + 1 ; I: input width(assuming height = width), P: padding, K: Kernel size, S: Stride.
Output of Pooling Layer: (I - M)/S + 1; I: input width, M: Pooling Kernel size; S: Stride.

Please let me know where am I wrong. And how should I decide the input for the first Linear Layer. Thank You.

Well, as you see, it’s a bit hard to make the computation by hand :sweat_smile:

The quickest way to have working code is just swapping the 512 * 7 * 7 out by 2048… However, that does not really tell you if your network is doing what you want it to.

I recommend printing the shape of every layer’s input and ouput to verify your calculations, which look fine after a very quick look. Then you’ll know what happens and probably why as well!

Ok. I had never thought about printing the shape :yum: Thanks, let me analyze again.

I’m so sorry, I was using wrong input dimension.
Thank You so much for showing the support.
Problem has been immediately identified after printing the shape of x.

1 Like

Hi, I have the same problem. My input tesnor has this shape : torch.Size([1, 3, 224, 224]) and the shape of the last pooling layer is torch.Size([1, 512, 7, 7]). But I couldnot figure out where is the problem. thanks.

Most likely the shape mismatch is raised by the first linear layer after you are flattening the activation via:

x = x.view(x.size(0), -1)

If your last activation output has a shape of [1, 512, 7, 7], the number of input features of the following linear layer would have to be [512*7*7 = 25088].

Feel free to post the model definition in case you get stuck. :wink:

Thanks for your help. Yeah the problem was that I forgot to add this x = x.view(x.size(0), -1) to my model. :expressionless::see_no_evil:

Hi friend,

Sorry to bother you under this question. I have met similar error as below:

Traceback (most recent call last):
  File "D:/Code/Pconv/calculate param.py", line 64, in <module>
    flops7, params7 = profile(model7, inputs=(input,),verbose=False)
  File "C:\Users\zhy34\anaconda3\lib\site-packages\thop\profile.py", line 188, in profile
    model(*inputs)
  File "C:\Users\zhy34\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "D:\Code\Pconv\models\resnet_AC.py", line 161, in forward
    out = self.linear(out)
  File "C:\Users\zhy34\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\zhy34\anaconda3\lib\site-packages\torch\nn\modules\linear.py", line 91, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Users\zhy34\anaconda3\lib\site-packages\torch\nn\functional.py", line 1674, in linear
    ret = torch.addmm(bias, input, weight.t())
RuntimeError: size mismatch, m1: [1 x 25088], m2: [512 x 7] at ..\aten\src\TH/generic/THTensorMath.cpp:41

Process finished with exit code 1

The error comes from the condition when I calculate the FLOPs of my model. I used this code:

model7 = ResNet18_AC()
input = torch.randn(1, 3, 224, 224)
flops7, params7 = profile(model7, inputs=(input,),verbose=False)
print(params7)
print(flops7)

And my model is here:

from ACNet_master.acnet.acb import ACBlock

def conv(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1):
    """replace 3*3 to AC block"""
    return ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups)

def conv3x3(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, groups=1):
    """standard convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv(in_planes, planes, kernel_size=3, stride=stride, padding=1)
        self.conv2 = conv(planes, planes, kernel_size=3, stride=1, padding=1)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = self.conv1(x)
        #out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)

        out += self.shortcut(x)
        out = F.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = conv(planes, planes, kernel_size=3, stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(planes)

        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = F.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += self.shortcut(x)
        out = F.relu(out)

        return out


class resnet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=7, zero_init_residual=False):
        super(resnet, self).__init__()
        self.in_planes = 64

        self.conv1 = conv(3, 64, kernel_size=3, stride=1, padding=1)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        #out = self.bn1(out)
        out = F.relu(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = F.dropout(out, p=0.5, training=self.training)
        out = self.linear(out)

        return out


def ResNet18_AC():
    return resnet(BasicBlock, [2,2,2,2])

def ResNet34_AC():
    return resnet(BasicBlock, [3,4,6,3])

def ResNet50_AC():
    return resnet(Bottleneck, [3,4,6,3])

I just use ResNet, but change 33 block to an asymmetric convolution (a sequence of 33, 31, and 13). I don’t wanna post too much code here, the library I import to the model “from ACNet_master.acnet.acb import ACBlock” is from this paper “https://openaccess.thecvf.com/content_ICCV_2019/papers/Ding_ACNet_Strengthening_the_Kernel_Skeletons_for_Powerful_CNN_via_Asymmetric_ICCV_2019_paper.pdf”. And the code is this: “ACNet/acb.py at master · DingXiaoH/ACNet · GitHub

I will be very appreciated if you can give me some suggestions!

Based on the shape information in the error message, the error seems to be raised in self.linear, which expects the input activation to have 512 features.
However, out seems to have 25088 features and you could verify it by printing the shape after the flattening of the tensor in:

        ...
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = F.dropout(out, p=0.5, training=self.training)
        print(out.shape)
        out = self.linear(out)

        return out

To fix this you would have to change the in_features in self.linear to 25088.

I have a similar error but I’m unable to resolve the mismatch. I have shared the model as follows

import torch
import torch.nn as nn
from .quant import QuantizeConv2d, QuantizeLinear

cfg = {
    'VGG': [128, 128, 'M', 256, 256, 'M', 512, 512, 'M'],
    'VGGS': [128, 128, 'M', 256, 256, 'M', 256, 256, 'M'],
    'VGGT': [128, 128, 'M', 128, 256, 'M', 256, 256, 'M'],
    'VGGD': [128, 128, 'M', 256, 256, 'M', 512, 512, 'M'],
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG_quant(nn.Module):
    def __init__(self, vgg_name, a_bits=1, w_bits=1, fc=1024):
        super(VGG_quant, self).__init__()
        self.a_bits = a_bits
        self.w_bits = w_bits
        self.features = self._make_layers(cfg[vgg_name])
        num_maxpooling_layers = cfg[vgg_name].count('M')
        if 'S' in vgg_name or 'T' in vgg_name:
            last_conv_layer_output_dim = 256 * (4 ** (5 - num_maxpooling_layers))
        elif 'D' in vgg_name:
            last_conv_layer_output_dim = 512 * (4 ** (5 - num_maxpooling_layers))
        else:
            last_conv_layer_output_dim = 512 * (4 ** (5 - num_maxpooling_layers))
        self.classifier = nn.Sequential(
                QuantizeLinear(last_conv_layer_output_dim, fc, w_bits=w_bits, a_bits=a_bits),
                nn.BatchNorm1d(fc),
                QuantizeLinear(fc, fc, w_bits=w_bits, a_bits=a_bits),
                nn.BatchNorm1d(fc),
                QuantizeLinear(fc, 10, w_bits=w_bits, a_bits=a_bits),
                )
       #self.regime = {
       #    0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3},
       #    40: {'lr': 1e-3},
       #    80: {'lr': 5e-4},
       #    100: {'lr': 1e-4},
       #    120: {'lr': 5e-5},
       #    140: {'lr': 1e-5}
       #}
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if in_channels == 3:
                layers += [QuantizeConv2d(in_channels, x, kernel_size=3, padding=1, w_bits=self.w_bits, a_bits=self.a_bits)]
                layers += [nn.BatchNorm2d(x)]
                in_channels = x
            else:
                if x == 'M':
                    layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
                else:
                    layers += [QuantizeConv2d(in_channels, x, kernel_size=3, padding=1, w_bits=self.w_bits, a_bits=self.a_bits),
                               nn.BatchNorm2d(x)]
                    in_channels = x
        return nn.Sequential(*layers)


def test():
    net = VGG_quant('VGG16')
    y = net(torch.randn(2,3,32,32))
    print(y.size())