RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

It seems your input contains 31 features while the first linear layer (self.fc1) expects an input with 30 features.
You could fix it by changing the in_features argument of self.fc1 or by making sure the input contains 30 features.

1 Like

Hi , i have same error RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 768x32)
th output

Hope you can help @ptrblck
the model notebook
[Google Colab](https://POP model)

Your notebook doesn’t show any code besides executing a Python script from another folder.
However, the error:

   word_embeddings = self.dropout(self.fc(word_embeddings))
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 768x32)

points to a shape mismatch in self.fc which expects 768 input features while the activation only contains 1 feature.

here is the code
https://github.com/HumaticsLAB/POP-Mining-POtential-Performance
how can i solve it ?

Check the aforementioned layer specs and the corresponding input activation as described before.
In case you get stuck, please create a minimal and executable code snippet to reproduce the issue.

sorry i don’t get it, can you explain more?

Hi, I get the same run time error with my code (mat1 and mat2 shapes cannot be multiplied (1x25088 and 15x10)):

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

class NetMLP(torch.nn.Module):
    def __init__(self, input_features, size_hidden, n_output):
        super(NetMLP, self).__init__()
        self.hidden1 = torch.nn.Linear(input_features, size_hidden, bias=True, device=None, dtype=None)
        self.hidden2 = torch.nn.Linear(size_hidden, size_hidden, bias=True, device=None, dtype=None) 
        self.out = torch.nn.Linear(size_hidden, n_output, bias=True, device=None, dtype=None) 

    def forward(self, x):
        x = x.view(-1,)
        x = self.out(x)       # linear output
        return x
input_features = 28*28
hidden_neurons = 15
num_outputs = 10

batch_size = 50

learning_rate = 0.0001

device = torch.device('cuda:0')

net = NetMLP(input_features, hidden_neurons, num_outputs)
net.to(device)

optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

criterion = nn.CrossEntropyLoss()

loader = DataLoader(dataset=mnist_train, batch_size=32, shuffle=True)

num_epochs = 5

loss_list = []

for i in range(num_epochs):

    for x, y in loader:
        optimizer.zero_grad()

        x = x.to(device)
        y = y.to(device)

        loss = criterion(net(x), y)
        
        loss.backward()
        
        optimizer.step()

        loss_list.append(loss.data.item())
  
    #if i > 0 and i % 100 == 0:
    print('Epoch %d, loss = %g' % (i, loss))
```´

Many thanks!

In your model you are using only the last linear layer via self.out while I would guess self.hidden1 and self.hidden2 should also be used.
Additionally, you are flattening the input tensor and are mixing the batch with the feature dimension in:

x = x.view(-1)

Take a look at this tutorial to see how a simple neural network is implemented and used.

Hi I am a newbie, Even I have a similar error. `import torch

import torch.nn as nn
import torch.nn.functional as F
class Auxiliary(nn.Module):
    def __init__(self, input_channels, n_classes):
        super(Auxiliary, self).__init__()
        self.Conv2 = nn.Conv2d(input_channels, 128, kernel_size=1)
        self.FC1 = nn.Linear(2048, 1024)
        self.FC2 = nn.Linear(1024, n_classes)
    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.Conv2(x)
        # N x 128 x 4 x 4
        x = x.view(x.size(0), -1)
        # N x 2048
        x = F.relu(self.FC1(x), inplace=True)
        # N x 2048
        x = F.dropout(x, 0.7, training=self.training)
        # N x 2048
        x = self.FC2(x)
        # N x 1024
        return x


class Inception(nn.Module):
    def __init__(self, input_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
        super(Inception, self).__init__()


        # 1x1conv branch
        self.inception_1 = nn.Sequential(
            nn.Conv2d(input_channels, n1x1, kernel_size=1)
        )

        self.inception_2 = nn.Sequential(
            nn.Conv2d(input_channels, n3x3_reduce, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(n3x3_reduce, n3x3, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        self.inception_3 = nn.Sequential(
            nn.Conv2d(input_channels, n5x5_reduce, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(n5x5_reduce, n5x5, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        self.inception_4 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(input_channels, pool_proj, kernel_size=1),
            nn.ReLU(inplace=True)
        )


    def forward(self, x):
        return torch.cat((self.inception_1(x),self.inception_2(x),self.inception_3(x),self.inception_4(x)), dim=1)




class Net(nn.Module):
    def __init__(self, n_classes):
        super(Net, self).__init__()

        self.pre_layer = nn.Sequential(
            # N x 3 x 224 x 224
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            # N x 64 x 112 x 112
            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
            # N x 64 x 56 x 56
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            # N x 64 x 56 x 56
            nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            # N x 192 x 56 x 56
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True),
        )

        # N x 192 x 28 x 28
        self.inception_3a = Inception(192, 64, 96, 128, 16, 32, 32)
        # N x 256 x 28 x 28
        self.inception_3b = Inception(256, 128, 128, 192, 32, 96, 64)
        # N x 480 x 28 x 28
        self.maxPool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # N x 480 x 14 x 14
        self.inception_4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.aux1 = Auxiliary(512, n_classes)
        # N x 512 x 14 x 14
        self.inception_4b = Inception(512, 160, 112, 224, 24, 64, 64)
        # N x 512 x 14 x 14
        self.inception_4c = Inception(512, 128, 128, 256, 24, 64, 64)
        # N x 512 x 14 x 14
        self.inception_4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.aux2 = Auxiliary(528, n_classes)
        # N x 528 x 14 x 14
        self.inception_4e = Inception(528, 256, 160, 320, 32, 128, 128)
        # N x 832 x 14 x 14
        self.maxPool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # N x 832 x 7 x 7
        self.inception_5a = Inception(832, 256, 160, 320, 32, 128, 128)
        # N x 832 x 7 x 7
        self.inception_5b = Inception(832, 384, 192, 384, 48, 128, 128)
        # N x 1024 x 7 x 7
        self.avgPool5 = nn.AvgPool2d(kernel_size=7, stride=1)

        # N x 1024 x 1 x 1
        self.dropout = nn.Dropout(p=0.4)
        # N x 1024
        self.linear = nn.Linear(in_features=1024, out_features=n_classes)



    def forward(self, x, mode=True):
        x = self.pre_layer(x)

        x = self.inception_3a(x)
        x = self.inception_3b(x)
        x = self.maxPool3(x)

        x = self.inception_4a(x)

        if mode:
            aux1 = self.aux1(x)
        else:
            aux1 = None

        x = self.inception_4b(x)
        x = self.inception_4c(x)
        x = self.inception_4d(x)

        if mode:
            aux2 = self.aux2(x)
        else:
            aux2 = None

        x = self.inception_4e(x)
        x = self.maxPool4(x)

        x = self.inception_5a(x)
        x = self.inception_5b(x)

        x = self.avgPool5(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.linear(x)
        x = F.softmax(x, dim=1)

        return x, aux1, aux2

`

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-123-6378d8f2f25e> in <module>
     11         labels=labels.to(device)
     12         total+=labels.size(0)
---> 13         output,aux1,aux2=model(images)
     14         aux1_loss=criterion(aux1, labels)
     15         aux2_loss=criterion(aux2, labels)

3 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x16384 and 1024x2)`
type or paste code here

I guess self.linear is raising the error in Net so you should also check the shape of its input activation in Net.forward and change the in_features to 16384 as mentioned in previous posts.

Also, assuming you are working on a multi-class classification use case using nn.CrossEntropyLoss as the criterion remove the last F.softmax activation as raw logits are expected.