CNN for image classification - mat1 and mat2 cannot be multiplied

Hi, I’m trying to run a cnn for image classification. Very new to this, so using a mashup of tutorial code and my own inputs. I get this error when I try to run:
mat1 and mat2 shapes cannot be multiplied (12x403328 and 1266750x120)

All my image inputs are 750x563 jpgs.

Torch size is:

Shape of X [N, C, H, W]:  torch.Size([12, 3, 563, 750])
Shape of y:  torch.Size([12]) torch.int64

Any help would be appreciated - I’ve seen similar posts, but can’t figure out how to apply those answers to my code :frowning:

if __name__ == '__main__':
    import torch
    from torchvision import datasets, transforms

    import numpy as np # for transformation

    import torch # PyTorch package
    import torchvision # load datasets
    import torchvision.transforms as transforms # transform data
    import torch.nn as nn # basic building block for neural neteorks
    import torch.nn.functional as F # import convolution functions like Relu
    import torch.optim as optim # optimzer

    train_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
    train_dataset = datasets.ImageFolder('H:/01 - Tech Refs/data2/train', transform=train_transform)
    train_dataloader =, batch_size=12, shuffle=True, num_workers=4)

    test_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
    test_dataset = datasets.ImageFolder('H:/01 - Tech Refs/data2/test', transform=test_transform)
    test_dataloader =, batch_size=12, shuffle=False,num_workers=4)

    for X, y in test_dataloader:
        print("Shape of X [N, C, H, W]: ", X.shape)
        print("Shape of y: ", y.shape, y.dtype)

            #self.fc1 = nn.Linear(3*563*750, 120)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using {device} device")

    class NeuralNetwork(nn.Module):
        def __init__(self):
            self.conv1 = nn.Conv2d(3, 6, 5)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(6, 16, 5)
            self.fc1 = nn.Linear(3*563*750, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 17)

        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = torch.flatten(x, 1) # flatten all dimensions except batch
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model = NeuralNetwork().to(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    def train(dataloader, model, loss_fn, optimizer):
        size = len(dataloader.dataset)
        for batch, (X, y) in enumerate(dataloader):
            X, y =,

            # Compute prediction error
            pred = model(X)
            loss = loss_fn(pred, y)

            # Backpropagation

            if batch % 100 == 0:
                loss, current = loss.item(), batch * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    def test(dataloader, model, loss_fn):
        size = len(dataloader.dataset)
        num_batches = len(dataloader)
        test_loss, correct = 0, 0
        with torch.no_grad():
            for X, y in dataloader:
                X, y =,
                pred = model(X)
                test_loss += loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    epochs = 5

    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer)
        test(test_dataloader, model, loss_fn)

Hi Luke!

By the time you get to fc1 – where the error is occurring – your images
are no longer of the same size as when passed into forward().

Conv2d changes (reduces) the size of your image.

MaxPool2d also reduces the size of your image.

In forward(), print out the shape of x (initially your input image), first
at the beginning, after the application of conv1(), after the first application
of pool(), after conv2(), after the second pool(), and then again after
flatten(). You will see the height and width of x shrinking. The shape
of x after flatten will tell you the number in_features you need for fc1
and it will (presumably) be 403328 rather than 1266750.


K. Frank

1 Like

Thank you! I’ll give that a try.