RuntimeError: Given input size: (6x253x1). Calculated output size: (6x126x0). Output size is too small

The following is my code, when i run it it gives me the error written in the title:

import os
import random
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from matplotlib import pyplot as plt
from tqdm import tqdm

# Hyper-parameters
num_epochs = 5
batch_size = 4
learning_rate = 0.001


# Device will determine whether to run the training on GPU or CPU.
use_cuda = torch.cuda.is_available()
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if __name__ == '__main__':
    # Use transforms.compose method to reformat images for modeling and save to variable all_transforms for later use dataset has PILImage images of range [0, 1].  We transform them to Tensors of normalized range [-1, 1]
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    #give paths to train and test datasets
    ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
    test_dataset = torchvision.datasets.ImageFolder(root=ROOT_DIR + "/Face_Mask_Dataset/Test/", transform=transform)
    classes = ('WithoutMask', 'Clothmask', 'Surgicalmask', 'N95Mask')
    imgSize = 255
    train_data = []
    test_data = []


    def imshow(img):
        img = img / 2 + 0.5  # unnormalize
        npimg = img.numpy()
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
        plt.show()

    def create_training_data():
        counter = 0
        rnd = random.randrange(0, 1001)
        for category in classes:  # cycle through categories
            path = os.path.join(ROOT_DIR + "/Face_Mask_Dataset/Train/", category)  # create path to categories
            class_num = classes.index(category)  # get the classification by index per category
            for img in tqdm(os.listdir(path)):  # iterate over each image per category
                try:
                    img_array = cv2.imread(os.path.join(path, img))  # convert to array
                    new_array = cv2.resize(img_array, (imgSize, imgSize)) # resize to normalize data size
                    train_data.append([new_array, class_num])  # add this to our training_data
                    counter += 1
                    if counter == rnd:
                        plt.imshow(new_array, cmap='gray')  # graph it
                        plt.show()

                except Exception as e:
                    pass

    train_dataset = train_data
    create_training_data()
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    class CNN(nn.Module):

        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(255, 6, 3)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(6, 16, 5)
            self.fc1 = nn.Linear(16 * 5 * 5, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)

        def forward(self, x):
            # -> n, 3, 32, 32
            x = self.pool(F.relu(self.conv1(x)))  # -> n, 6, 14, 14
            x = self.pool(F.relu(self.conv2(x)))  # -> n, 16, 5, 5
            x = x.view(-1, 16 * 5 * 5)  # -> n, 400
            x = F.relu(self.fc1(x))  # -> n, 120
            x = F.relu(self.fc2(x))  # -> n, 84
            x = self.fc3(x)  # -> n, 10
            return x

    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    n_total_steps = len(train_loader)

    for epoch in range(num_epochs):

        for i, (images, labels) in enumerate(train_loader):
            # origin shape: [6, 3, 5, 5] = 6, 3, 25
            # input_layer: 3 input channels, 6 output channels, 5 kernel size
            images = images.to(device)
            labels = labels.to(device)
            # Forward pass
            images = images.float()
            outputs = model(images)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i + 1) % 2000 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')

    print('Finished Training')
    PATH = './cnn.pth'
    torch.save(model.state_dict(), PATH)

    with torch.no_grad():
        n_correct = 0
        n_samples = 0
        n_class_correct = [0 for i in range(10)]
        n_class_samples = [0 for i in range(10)]

        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            # max returns (value ,index)
            _, predicted = torch.max(outputs, 1)
            n_samples += labels.size(0)
            n_correct += (predicted == labels).sum().item()

            for i in range(batch_size):
                label = labels[i]
                pred = predicted[i]

                if (label == pred):
                    n_class_correct[label] += 1
                n_class_samples[label] += 1

        acc = 100.0 * n_correct / n_samples
        print(f'Accuracy of the network: {acc} %')

        for i in range(10):
            acc = 100.0 * n_class_correct[i] / n_class_samples[i]
            print(f'Accuracy of {classes[i]}: {acc} %')


The point of this code is to take in a training dataset that recognizes if someone in a picture is wearing a mask or not, and if they are what kind of mask. it contains around 1500 pictures in the training set and around 400 in the test. please advice on how to fix the issue. Also i’m pretty sure the values in the CNN are wrong since i’m a beginner in AI so any advice on how to get the right values would also be appreciated!

The issue is raised if the output of a layer would be empty, which could be the case in e.g. conv or pooling layers.
You could try to increase the spatial size of the input or use a less aggressive pooling.

so do i modify those values?
sorry if it seems like as simple question, i’m still a beginner at AI

Most likely, but it also seems you are passing the input tensor in the shape [batch_size, height, width, channels] which is wrong as the channels-first memory layout is expected by default (you can call to(memory_format=torch.channels_last) but the returned shape will still be shown in the default NCHW format).

This code snippet uses a random input tensor, fixes the wrong view operation, and the feature mismatch in self.fc1 for an input of [batch_size, 3, 255, 255]:

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(59536, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = CNN()
x = torch.randn(1, 3, 255, 255)
out = model(x)

Thank you very much, it has solved the issue, however right now another issue arose. it the below code it give me this issue:
“ValueError: optimizer got an empty parameter list”
My guess is that the hyperparameters are never initialized in the class CNN but i may be wrong. Please advice!

 model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters, lr=learning_rate)
    n_total_steps = len(train_loader)

.parameters is a method and you would need to call it:

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

oh yeah my mistake, i was trying something before i copy and pasted haha. but the error is given even when i have this piece of code written:

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

It works for me, so I guess you might be running another code:

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(59536, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = CNN()
x = torch.randn(1, 3, 255, 255)
out = model(x)

optimizer = torch.optim.SGD(model.parameters(), lr=1.)
optimizer.param_groups

sorry, i’ve run into another issue, im running this code:

    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 6, 3)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(6, 16, 5)
            self.fc1 = nn.Linear(59536, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)

        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = x.view(x.size(0), -1)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model = CNN()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    optimizer.param_groups
    criterion = nn.CrossEntropyLoss()
    n_total_steps = len(train_loader)

    for epoch in range(num_epochs):

        for i, (images, labels) in enumerate(train_loader):
            # origin shape: [6, 3, 5, 5] = 6, 3, 25
            # input_layer: 3 input channels, 6 output channels, 5 kernel size
            images = images.to(device)
            labels = labels.to(device)
            # Forward pass
            images = images.float()
            **error happens here------>** outputs = model(images)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            torch.autograd.set_detect_anomaly(True)
            loss.backward(retain_graph=True)
            optimizer.step()

            if (i + 1) % 2000 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')

    print('Finished Training')
    PATH = './cnn.pth'
    torch.save(model.state_dict(), PATH)

however i get this error:

RuntimeError: Given groups=1, weight of size [6, 3, 3, 3], expected input[4, 255, 255, 3] to have 3 channels, but got 255 channels instead

sorry again, I’m really confused as to what’s happening

Check my previous post here which explains the expected memory format and also uses an example input to show how to run the model.
In your code you would most likely need to permute the input tensor to match the size.
x = x.permute(0, 3, 1, 2) should work.