RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

who can help me?

import numpy as np
from PIL import Image
import glob
import torch
import torch.nn as nn
import torch.optim
from torch.autograd import Variable
from import DataLoader, Dataset
import torch.nn.functional as F
from torchvision import transforms as T

def read_data():
    read img_file data
    :return: img_path, img_name
    train_data = glob.glob('../train-images/*.jpeg')
    train_label = np.array(
        [train_data[index].split('/')[-1].split('.')[0].split('_')[0] for index in
    return train_data, train_label

out_place = (
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
    'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z')
transform = T.Compose([
   # T.Resize((128,128)),
    T.Normalize(std=[0.5, 0.5, 0.5], mean=[0.5, 0.5, 0.5])

def one_hot(word: str) -> np.array:
    tmp = []
    # for i in range(len(word)):
    for i in range(4):
        item_tmp = [0 for x in range(144)]   # [0 for x in range(len(word) * len(out_place))]
        word_idx = out_place.index(word[i].lower())
        item_tmp[i*36+word_idx] = 1
    return np.array(tmp)

class DataSet(Dataset):
    def __init__(self):
        self.img_path, self.label = read_data()

    def __getitem__(self, index):
        img_path = self.img_path[index]
        img ="RGB")
        img = transform(img)
        label = torch.from_numpy(one_hot(self.label[index])).float()
        return img, label

    def __len__(self):
        return len(self.img_path)

data = DataSet()
data_loader = DataLoader(data, shuffle=True, batch_size=64, drop_last=True)

class CNN_Network(nn.Module):
    def __init__(self):
        super(CNN_Network, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, stride=1, kernel_size=3, padding=1),

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, stride=1, kernel_size=3, padding=1),
            nn.MaxPool2d(stride=2, kernel_size=2),  # 30 80
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, stride=1, kernel_size=3, padding=1),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),   # 15 40

        self.fc = nn.Sequential(
            nn.Linear(256 * 15 * 40, 2048),
            nn.Linear(2048, 1024),
            nn.Linear(1024, 40)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = F.softmax(x, dim=-1)
        return x

model = CNN_Network()

optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

error = nn.MultiLabelSoftMarginLoss()

for i in range(5):
    for (batch_x, batch_y) in data_loader:
        image = Variable(batch_x)
        label = Variable(batch_y)

        out = model(image)

        loss = error(out, label)


        optimizer.step(), "model.pth")

my image size [70, 26] and one of images name is okzi.jpeg
I am first touch pytorch so i don`t understand this question


This text will be hidden

The activation input x to self.fc doesn’t have the expected number of features, so you would need to change the in_features of the first nn.Linear layer in self.fc to 13056.

Also, unrelated to this issue, but Variables are deprecated since PyTorch 0.4.0, so you can use tensors now. :wink:

1 Like

thanks for you :wink:

maybe i make a mistation, this is difficut for me :thinking:

The error points to a shape mismatch in the model output and target.
It seems one of these tensors has a batch size of 144 while the other has only 4 samples.
Print the shapes of the output and target before passing them to the criterion and make sure they have the same number of samples.

I’m getting RuntimeError: mat1 and mat2 shapes cannot be multiplied (836x35344 and 836x35344).

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(836, 16*47*47)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x.float())))
        x = self.pool(F.relu(self.conv2(x.float())))
        x = x.view(836, 16 * 47 * 47)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

The linear layer in self.fc1 expects 836 input features, while you are reshaping the activation to [batch_size=836, features=16*47*47=35344].
Use x = x.view(x.size(0), -1) instead and make sure the in_features of self.fc1 are matching the number of features of the incoming activation.

I’m brand new to deep learning and I’m curious as to the principles that inform manipulation of the CNN architecture as well as tuning hyperparameters. I have a test case of 11383 (200x200x3) training samples of 5 classes which I am modifying to 200x200x1. What would be the ideal starting point for architecture design/parameters as well as batch size and other hyperparameters based on information presented herein and my prior post?

You could start with some known architectures and either play around with them or use them as a base line to create your custom model. The same applies for all hyperparameters.
Also, I would recommend to take a look at the PyTorch tutorials and/or some courses such as FastAI.

class Net(nn.Module):

def __init__(self):

    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 5)

    self.pool = nn.MaxPool2d(2, 2)

    self.conv2 = nn.Conv2d(32, 32, 5)

    self.fc = nn.Linear(86528 ,136)

def forward(self, x):

    x = self.pool(F.relu(self.conv1(x)))

    x = self.pool(F.relu(self.conv2(x)))

    x = x.view(x.size(0), -1)

    x = self.fc(x) 

    return x

I’m getting RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x89888 and 86528x136)

This error seems to be raised, as x = x.view(x.size(0), -1) creates a tensor in the shape [batch_size, 89888], while 86528 features are expected.
You could either make sure the input to this layer has the right shape or change the in_features of self.fc to 89888.