RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Based on the error message it seems you are not flattening the input tensor before passing it to the model.
Add:

image = image.view(image.size(0), -1)
output = model(image)

and I think it should work.

Also, Variables are deprecated since PyTorch 0.4, so you can use tensors in newer versions, :wink:

This is the error I am getting.
mat1 and mat2 shapes cannot be multiplied (87852x10 and 87852x10)

class ConvModel(nn.Module):
    def __init__(self):
        super(ConvModel, self).__init__()
        self.conv1 = nn.Conv1d(50, 10, kernel_size = 2, stride = 1, padding =1)
        self.conv2 = nn.Conv1d(10, 5, kernel_size=2, stride=2, padding=1)
        self.maxpool1 = nn.MaxPool1d(kernel_size=2, stride=2, padding=1)
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.3)
    
        self.fc1= nn.Linear(87852, 10)
        self.fc2= nn.Linear(10, 6)
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.maxpool1(x)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x) 
        
        return x


model = ConvModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

The input size is [87852, 50, 3]

As the error message claims, the number of input features in first linear layer is wrong and should be set to 10:

self.fc1= nn.Linear(10, 10)

Hi, I think I get a similar error, but can’t figure it out, can someone help ? :slight_smile:

class Generator_1(nn.Module):
  def __init__(self):
    super(Generator_1,self).__init__()
    
    self.main = nn.Sequential(
      #Dense, 256*16*16 nodes
      nn.Linear(100,256), #Latent space vector size = 100
      #Activation
      nn.ReLU(True),
      #Maybe need reshape here ?
      #Up sampling 2D ; (2,2) size
      nn.Upsample((2,2)),
      #Convolution 2D ; 128 filters, 4*4 kernel
      nn.Conv2d(256,128,4),
      #Activation
      nn.ReLU(True),
      #Up sampling 2D ; (2,2) size
      nn.Upsample((2,2)),
      #Convolution 2D ; 64 Filters, 4*4 kernel
      nn.Conv2d(128,64,4),
      #Activation
      nn.ReLU(True),
      #Up sampling 2D ; (2,2) size
      nn.Upsample((2,2)),
      #Convolutio 2D ; 32 filters, 4*4 kernel
      nn.Conv2d(64,32,4),
      #Activation
      nn.ReLU(True),
      #Convolution 2D ; 1 filters, 4*4 kernel
      nn.Conv2d(32,1,4),
      #Activation
      nn.Tanh()
    )
    
  def forward(self,x):
    return self.main(x)

for epoch in range(num_epoch_first_stage) : 
  for i, data_mask in enumerate(dataloader_mask, 0) : 


    D1.zero_grad()

    #The 1st Stage GAN is fed with Real Mask
    real_data = data_mask[0].to(device)
    b_size = real_data.size(0)

    noise = torch.randn(b_size, z_size, 1, 1, device=device)

    fake_data = G1(noise).detach()

    D1_real = D1(real_data).view(-1)
    D1_fake = D1(fake_data).view(-1)  #.detach ?????

    gp = gradient_penalty(D1, real_date, fake_date, device=device)

    loss_D1 = (-(torch.mean(D1_real) - torch.mean(D1_fake)) + LAMBDA_GP * gp)

    loss_D1.backward()
    optimizer_D1.step()
    
    #On train le générateur toutes les n_critic iterations 
    if i%n_critic == 0 : 
      G1.zero_grad()

      img_gen = G1(noise)
      loss_G1 = -torch.mean(D1(img_gen))
      
      loss_G1.backward()
      optimizer_G1.step()

and here is the error message I get :

  File "GAN_Pandey_Implementation.py", line 207, in <module>
    fake_data = G1(noise).detach()

...

  File "/env/lib/python3.6/site-packages/torch/nn/functional.py", line 1847, in linear
    return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (3200x1 and 100x256)

I don’t understand what’s wrong :confused:
Thanks !

Your noise tensor has 4 dimensions at the moment, while the first layer is nn.Linear and would thus expect an in_features value of 1.
I assume you want to flatten the input tensor (or just remove the dimensions with size 1) before passing it to the model.
Also, your comment is correct:

#Maybe need reshape here ?

and you would need to reshape the 2D output of nn.Linear to a 4D tensor before passing it to the nn.Conv2d layer.

1 Like

Hello,
I am getting the following error message: “mat1 and mat2 shapes cannot be multiplied (448x4 and 576x10)”.

I am trying to do Layer Wise Relevance Propagation according to this tutorial: Layer Wise Relevance Propagation In Pytorch – GiorgioML – Ph.D. Student in Computer Science, MSU

Here is the NN class:

class Cnn(nn.Module):
    def __init__(self):
       super(Cnn,self).__init__()
    
        self.layer1 = nn.Sequential(
         nn.Conv2d(3,16,kernel_size=3, padding=0,stride=2),
         nn.BatchNorm2d(16),
         nn.ReLU(),
         nn.MaxPool2d(2)
       )
    
        self.layer2 = nn.Sequential(
          nn.Conv2d(16,32, kernel_size=3, padding=0, stride=2),
          nn.BatchNorm2d(32),
          nn.ReLU(),
          nn.MaxPool2d(2)
        )
    
        self.layer3 = nn.Sequential(
          nn.Conv2d(32,64, kernel_size=3, padding=0, stride=2),
          nn.BatchNorm2d(64),
          nn.ReLU(),
          nn.MaxPool2d(2)
       )
    
    
        self.fc1 = nn.Linear(3*3*64,10)
        self.fc2 = nn.Linear(10,2)
        self.relu = nn.ReLU()
    
    
    def forward(self,x):
    
       out = self.layer1(x)
       out = self.layer2(out)
       out = self.layer3(out)
       out = out.view(out.size(0),-1)
       out = self.relu(self.fc1(out))
       out = self.fc2(out)
       return out

I have used this to train a network to do binary classification, and am now trying to run Layer Wise Relevance Propagation on it. Here is the part of the LRP code where I am getting the error:

def LRP_individual(model, X, device):
   # Get the list of layers of the network
   layers = [module for module in model.modules() if not isinstance(module, torch.nn.Sequential)][1:]

   # Propagate the input
   L = len(layers)
   A = [X] + [X] * L # Create a list to store the activation produced by each layer

   for layer in range(L):
       A[layer + 1] = layers[layer].forward(A[layer])
    
  # Rest of the LRP function

I would be very grateful for an explanation or solution to this problem.

Hi @ptrblck I newbie here with a similar error. any idea how to fix it ?

import torch.nn as nn

class _ResidualBlock(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding=1, bias=True):
    super(_ResidualBlock, self).__init__()
    self.conv_1 = nn.Conv2d(in_channels, out_channels,
                  kernel_size=kernel_size, stride=stride, padding=padding, bias=bias)
    self.leaky_relu_1 = nn.LeakyReLU()
    self.conv_2 = nn.Conv2d(out_channels, out_channels,
                  kernel_size=kernel_size, stride=stride, padding=padding, bias=bias)
    self.leaky_relu_2 = nn.LeakyReLU()

  def forward(self, tensor):
    r_tensor = tensor
    output = self.conv_1(tensor)
    output = self.leaky_relu_1(output)
    output = self.conv_2(output)
    output += r_tensor
    output = self.leaky_relu_2(output)
    return output


class _Block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True, conv_stride=2, conv_kernel_size=4):
    super(_Block, self).__init__()
    self.conv_1 = nn.Conv2d(in_channels, out_channels,
                    kernel_size=conv_kernel_size, stride=conv_stride, padding=padding, bias=bias)
    self.leaky_relu_1 = nn.LeakyReLU()
    self.residual_block_1 = _ResidualBlock(out_channels, out_channels, kernel_size, stride)
    self.residual_block_2 = _ResidualBlock(out_channels, out_channels, kernel_size, stride)

  def forward(self, tensor):
    output = self.conv_1(tensor)
    output = self.leaky_relu_1(output)
    output = self.residual_block_1(output)
    output = self.residual_block_2(output)
    return output


class D(nn.Module):
  def __init__(self, tag=34, ngpu=0):
    super(D, self).__init__()
    self.ngpu = ngpu
    self.reduce_block_1 = _Block(3, 32, conv_kernel_size=4)
    self.reduce_block_2 = _Block(32, 64, conv_kernel_size=4)
    self.reduce_block_3 = _Block(64, 128, conv_kernel_size=4)
    self.reduce_block_4 = _Block(128, 256, conv_kernel_size=3)
    self.reduce_block_5 = _Block(256, 512, conv_kernel_size=3)
    self.conv_1 = nn.Conv2d(512, 1024, kernel_size=3, stride=2, padding=1, bias=True)
    self.leaky_relu_1 = nn.LeakyReLU()
    self.dense_1 = nn.Linear(2*2*1024, 1)
    self.dense_tag = nn.Linear(2*2*1024, tag)
    # self.sigmoid = nn.Sigmoid()

  def forward(self, tensor):
    output = self.reduce_block_1(tensor)
    output = self.reduce_block_2(output)
    output = self.reduce_block_3(output)
    output = self.reduce_block_4(output)
    output = self.reduce_block_5(output)
    output = self.conv_1(output)
    output = self.leaky_relu_1(output)
    output = output.view(output.size(0), -1)
    output1 = self.dense_1(output)
    output2 = self.dense_tag(output)
    return output1, output2

Here is my error

Traceback (most recent call last):
  File ".\gans.py", line 185, in <module>
    main()
  File ".\gans.py", line 140, in main
    output = netD(input)
  File "C:\Users\Akila\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "G:\Python_Projects\computer-vision\Module 3 - GANs\discriminator.py", line 64, in forward
    output1 = self.dense_1(output)
  File "C:\Users\Akila\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\Akila\anaconda3\lib\site-packages\torch\nn\modules\linear.py", line 96, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Users\Akila\anaconda3\lib\site-packages\torch\nn\functional.py", line 1847, in linear
    return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x1024 and 4096x1)

I guess the shape mismatch is raised in:

    output = output.view(output.size(0), -1)
    output1 = self.dense_1(output)
    output2 = self.dense_tag(output)

as output seems to have the shape [batch_size=64, 1024], while both linear layers expect 4096 input features. You could verify it by printing the shape via print(output.shape) before passing it to these layers and if my assumption is correct change the in_features of both layers to 1024.

Hi @ptrblck I newbie here and i have similar error.can you give me the way to find out please blockquote
class CNN(nn.Module):

def __init__(self):
    super(CNN, self).__init__()
    
    self.nn = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2),

        nn.Flatten(),
        nn.Linear(in_features=2304, out_features=512),
        nn.Dropout2d(0.25),
        nn.Linear(in_features=512, out_features=128),
        )

    self.linear = nn.Linear(in_features=128, out_features=6)
    
def forward(self, x):
    embedding = self.nn(x)
    x = self.linear(embedding)
    return embedding, x

here is my error

Blockquote
1845 if has_torch_function_variadic(input, weight):
1846 return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
→ 1847 return torch._C._nn.linear(input, weight, bias)
1848
1849
RuntimeError: mat1 and mat2 shapes cannot be multiplied (501x61504 and 2304x512)

What is the shape of the input x to the forward() function?

Sorry but i don’t know what your mean

If you add

print(x.shape)

as the first line of your forward() function, what does it print?

1 Like

You are running most likely into the same issue and could follow the same debugging steps as previously mentioned, i.e. nn.Linear(in_features=2304, out_features=512), expects an activation input with 2304 features, while you are passing an activation with 61504 features.

1 Like

Hi, I came into similar problem, but I can not understand ,could any one give me some suggestions?

Blockquote
mat1 and mat2 shapes cannot be multiplied (1x1867776 and 1048576x256)

Blockquote
x = torch.cat([gap, gmp], 1)
x = self.relu(self.conv1x1(x))

    if self.light > 0:
        x_ = torch.nn.functional.adaptive_avg_pool2d(x, self.light)
        x_ = self.FC(x_.view(x_.shape[0], -1))
    else:
        x_ = self.FC(x.view(x.shape[0], -1))

The shape mismatch is most likely created in the self.FC(x_...) operation so make sure the feature dimension shape of x_ matches the in_features of self.FC.

nice to see u guys.how about add my wechat number,learn together

mat1 and mat2 shapes cannot be multiplied (19800x10000 and 784x10000)

for
def classify(images):
classifications=torch.zeros((images.shape[0], D)).to(device)
for i in range(images.shape[0]):
cosine_similarity_value= []
for j in range(digit_vectors.shape[0]):
#cosine_similarity_value.cat(torch.tensor(F.cosine_similarity( images[i], digit_vectors[j], dim=0)))
#cosine_similarity_value = [torch.Tensor(
), torch.Tensor(
),torch.Tensor(
)]
classifications[i,j] = F.cosine_similarity(images[i], digit_vectors[j], dim=0)
#print(classifications)
# classifications.cat(torch.tensor(cosine_similarity_value.index(max(cosine_similarity_value))))
return(torch.tensor(classifications).to(device))

print(“Train accuracy:”)

acc = 0
for i in range(X_train.shape[0]):
predictions = classify(X_train[i:i+1])
#print(y_train[i:i+1], predictions)

if y_train[i:i+1].cpu() == predictions.argmax().cpu(): acc += 1

acc/= X_train.shape[0]

#acc = accuracy_score(y_train[:X_train.shape[0]].cpu(), predictions.cpu())
print(acc)

print(“Test accuracy:”)
X_test = get_scenes(X_test, proj)

acc = 0
for i in range(X_test.shape[0]):
predictions = classify(X_test[i:i+1])
#print(y_train[i:i+1], predictions)
if y_test[i:i+1].cpu() == predictions.argmax().cpu(): acc += 1
acc/=X_test.shape[0]

print(acc)

please help me to solve my error also.
My code is below


I’m getting RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x119072 and 800x300)

Based on the shapes reported in the error message, the shape mismatch is raised in the first linear layer of the self.fc_model. Print the shape of x in the forward method before feeding it into self.fc_model and make sure it’s feature dimension matches the in_features of the linear layer or just set it to in_features=119072.

Please help me ,i get this wrong return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x22 and 18x2048)
import torch
import torch.nn as nn

class residualBlock(nn.Module):
def init(self, in_channels=64, k=3, n=64, s=1):
super(residualBlock, self).init()

    self.conv1 = nn.Conv2d(in_channels, n, k, stride=s, padding=1)
    self.bn1 = nn.BatchNorm2d(n)
    self.conv2 = nn.Conv2d(n, n, k, stride=s, padding=1)
    self.bn2 = nn.BatchNorm2d(n)

    self.ac = nn.ReLU()

def forward(self, x):
    y = self.ac(self.bn1(self.conv1(x)))
    return self.bn2(self.conv2(y)) + x

class upsampleBlock(nn.Module):
def init(self, in_channels, out_channels):
super(upsampleBlock, self).init()
self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=1, padding=1)
self.shuffler = nn.PixelShuffle(2)
self.bn = nn.BatchNorm2d(in_channels)
self.ac = nn.ReLU()

def forward(self, x):
    return self.ac(self.bn(self.shuffler(self.conv(x))))

class ResBlock(nn.Module):
def init(self, in_channels=64, k=3, n=64, s=1):
super(ResBlock, self).init()
self.conv1 = nn.Conv2d(in_channels, n, k, stride=s, padding=1)
self.conv2 = nn.Conv2d(n, n, k, stride=s, padding=1)
self.ac = nn.LeakyReLU()

def forward(self, x):
    y = self.ac(self.conv1(x))
    return self.ac(self.conv2(y) + x)

class DBlock(nn.Module):
def init(self, n=64, k=3, s=1):
super(DBlock, self).init()
self.block1 = ResBlock(n, k, n, s)
self.block2 = ResBlock(n, k, n, s)
self.conv1 = nn.Conv2d(n, 2*n, 4, stride=2, padding=1)
self.ac = nn.LeakyReLU()

def forward(self, x):
    x = self.block1(x)
    x = self.block2(x)
    return self.ac(self.conv1(x))

def weights_init(m):
classname = m.class.name
if classname.find(‘Conv’) != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find(‘BatchNorm’) != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)

class Generator(nn.Module):
def init(self, n_residual_blocks=8, upsample_factor=5, tag_num=19): # rb=16
super(Generator, self).init()
self.n_residual_blocks = n_residual_blocks
self.upsample_factor = upsample_factor

    self.conv1 = nn.Conv2d(3, 32, 4, stride=2, padding=1)
    self.conv2 = nn.Conv2d(32, 64, 4, stride=2, padding=1)
    
    self.dense = nn.Linear(tag_num, 8*16*16)
    self.conv3 = nn.Conv2d(72, 64, 3, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(True)

    for i in range(self.n_residual_blocks):
        self.add_module('residual_block' + str(i+1), residualBlock())

    self.bn2 = nn.BatchNorm2d(64)

    for i in range(self.upsample_factor//2):
        self.add_module('upsample' + str(i+1), upsampleBlock(64, 256))

    self.conv4 = nn.Conv2d(64, 3, 9, stride=1, padding=4)
    self.tanh = nn.Tanh()
    self.ac = nn.LeakyReLU()

def forward(self, x, c):
    
    x = self.ac(self.conv1(x))
    x = self.ac(self.conv2(x))
    
    c = self.dense(c)
    c = c.view(-1, 8, 16, 16)
    x = torch.cat((x, c), 1)
    x = self.conv3(x)
    x = self.relu(self.bn1(x))

    y = x.clone()
    for i in range(self.n_residual_blocks):
        y = self.__getattr__('residual_block' + str(i+1))(y)

    x = self.relu(self.bn2(y)) + x

    for i in range(self.upsample_factor//2):
        x = self.__getattr__('upsample' + str(i+1))(x)

    return self.tanh(self.conv4(x))

class Discriminator(nn.Module):
def init(self, hair_tag_num=1, eyes_tag_num=1):
super(Discriminator, self).init()
self.conv1 = nn.Conv2d(3, 32, 4, stride=2, padding=1)

    self.block1 = DBlock(n=32)
    self.block2 = DBlock(n=64)
    self.block3 = DBlock(n=128)
    self.block4 = DBlock(n=256)

    self.head1 = nn.Sequential(
        nn.Linear(512*2*2, 512*2),
        nn.ReLU(True),
        nn.Linear(512*2, 512),
        nn.ReLU(True),
        nn.Linear(512, 128),
        nn.ReLU(True),
        nn.Linear(128, 1),
    )
    self.head2 = nn.Sequential(
        nn.Linear(512*2*2, 512*2),
        nn.ReLU(True),
        nn.Linear(512*2, 512),
        nn.ReLU(True),
        nn.Linear(512, 128),
        nn.ReLU(True),
        nn.Linear(128, hair_tag_num),
    )
    self.head3 = nn.Sequential(
        nn.Linear(512*2*2, 512*2),
        nn.ReLU(True),
        nn.Linear(512*2, 512),
        nn.ReLU(True),
        nn.Linear(512, 128),
        nn.ReLU(True),
        nn.Linear(128, eyes_tag_num),
    )

    self.ac = nn.LeakyReLU()
    self.sigmoid = nn.Sigmoid()

def forward(self, x):
    x = self.ac(self.conv1(x))

    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)
    x = self.block4(x)
    x = x.view(x.size()[0], -1)

    return self.sigmoid(self.head1(x)), self.head2(x), self.head3(x)

this is my staragan tessting
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import models
from torch.autograd import Variable, grad

import sys
import argparse
import os
from os import listdir
from os.path import isfile, join
import numpy as np
import pickle
import random
import math
import matplotlib.pyplot as plt
from PIL import Image

from models import *
from utils import *

parser = argparse.ArgumentParser()
parser.add_argument(’–model_dir’, required=True, help=‘path to model’)
parser.add_argument(’–input_dir’, required=True, help=‘path to model’)
parser.add_argument(’–cuda’ , action=‘store_true’, help=‘enables cuda’)
parser.add_argument(’–hair_tag_num’, type=int, default=12, help=‘total hair color number’)
parser.add_argument(’–eyes_tag_num’, type=int, default=10, help=‘total eyes color number’)
parser.add_argument(’–output’, default=’./output.jpg’, help=‘folder to output images and model checkpoints’)

opt = parser.parse_args()
print(opt)

cuda = opt.cuda
hair_tag_num = opt.hair_tag_num
eyes_tag_num = opt.eyes_tag_num
tag_num = hair_tag_num + eyes_tag_num

test_x = []
test_dir = opt.input_dir
test_files = [f for f in listdir(test_dir) if isfile(join(test_dir, f))]

for test_file in test_files:

img = Image.open(join(test_dir, test_file))
img = np.array(img, dtype='float')
img = img.transpose((2,0,1)) # (512, 512, 3) --> (3, 512, 512)
img = np.expand_dims(img, axis=0)
img = torch.FloatTensor(img/255)*2-1
test_x.append(img)

tp_tags = torch.zeros(hair_tag_numlen(test_files), tag_num)
hair_color_index=0
eyes_color_index=0
for i in range(hair_tag_num
len(test_files)):
tp_tags[i][hair_color_index] = 1
tp_tags[i][hair_tag_num + eyes_color_index] = 1
hair_color_index += 1
if hair_color_index == hair_tag_num:
hair_color_index = 0
eyes_color_index += 1
if eyes_color_index == eyes_tag_num:
eyes_color_index = 0

tp_tags = Variable(tp_tags)
if cuda:
tp_tags = tp_tags.cuda()
tp_X = Variable(torch.cat([img for img in test_x for i in range(hair_tag_num)], 0)).cuda()

generator = torch.load(opt.model_dir)
generator.eval()
generated = []
for i in range(len(test_x)):
generated_row = []
for j in range(hair_tag_num):
output = generator.forward(tp_X[i*hair_tag_num+j:i*hair_tag_num+j+1], tp_tags[i*hair_tag_num+j:i*hair_tag_num+j+1]).detach()
img = np.squeeze(output.data.cpu().numpy())
img = ((img+1)/2*255).astype(np.uint8)
img = img.transpose((1,2,0))
generated_row.append(img)
generated.append(np.concatenate([img for img in generated_row], axis=1))
concat_img = np.concatenate([img for img in generated], axis=0)
plt.imsave(opt.output, concat_img, vmin=0, vmax=255)