RuntimeError: Given groups=1, weight of size [32, 1, 3, 3], expected input[1, 32, 300, 300] to have 1 channels, but got 32 channels instead

This is going to be a long question. This below is my neural net.

class CarBrand(nn.Module):
    def __init__(self, num_classes):
        super(CarBrand, self).__init__()
        
        # 'in_channels': color channels in images, since images in dataset are greyscaled, in_channels start from 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)

        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 128: comes from "conv3", 124: image size is 124x124, 1: only greyscaled, not RGB
        self.fc1 = nn.Linear(128 * (image_height // 8) * (image_width // 8), 256)  # Adjust the input size
        #self.fc2_class = nn.Linear(512, num_classes)         
        self.fc2_class = nn.Linear(256, num_classes) 
           
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)       
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)        
        x = self.relu(self.conv3(x))
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        brand_pred = self.fc2_class(x)   
        return brand_pred

And that is my Dataset class

class CarBrandDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = [filename for filename in os.listdir(data_dir) if filename.endswith('.jpg')]
    
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, index):
        image_path = os.path.join(self.data_dir, self.image_paths[index])
        
        xml_path = os.path.splitext(image_path)[0] + '.xml'
        
        image = Image.open(image_path).convert('L') # convert to greyscale
        if self.transform:
            data = (2, 0.5, -10, -0.5, 1.5, 20)
            image = image.transform(image.size, Image.AFFINE, data)
        
        # parse XML
        tree = ET.parse(xml_path)
        root = tree.getroot()
        
        # extract label information from XML 
        brand_label_mapping = {
            'bmw': 0,
            'citroen': 1,
            'mercedes': 2,
            'maserati': 3,
            'audi': 4,
            'honda': 5
        }
        
        brand_label_str = root.text
        
        brand_label =  brand_label_mapping.get(brand_label_str, -1)
        
        image_np = np.asarray(image, dtype=np.float32)
        
        return image_np, brand_label

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor() 
])
data_dir = "splitted_data/train"
dataset = CarBrandDataset(data_dir, transform=transform)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
car_brands: int = 6
num_epochs: int = 100

and finally this is my training loop:

model = CarBrand(num_classes=car_brands)
criterion_class = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(num_epochs):
    running_loss = 0.0
    for images, brand_labels in dataloader:
        optimizer.zero_grad()

        brand_pred = model(images)

        loss_brand = criterion_class(brand_pred, brand_labels)
 
        loss_brand.backward()
        optimizer.step()

        running_loss += loss_brand.item()
        
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {running_loss / len(dataloader)}")

But I get errors,

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[38], line 12
      9 for images, brand_labels in dataloader:
     10     optimizer.zero_grad()
---> 12     brand_pred = model(images)
     14     loss_brand = criterion_class(brand_pred, brand_labels)
     16     loss_brand.backward()

File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[33], line 20, in CarBrand.forward(self, x)
     18 def forward(self, x):
---> 20     x = self.relu(self.conv1(x))
     21     x = self.maxpool(x)
     23     x = self.relu(self.conv2(x))

File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\conv.py:463, in Conv2d.forward(self, input)
    462 def forward(self, input: Tensor) -> Tensor:
--> 463     return self._conv_forward(input, self.weight, self.bias)

File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
    455 if self.padding_mode != 'zeros':
    456     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    457                     weight, bias, self.stride,
    458                     _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
    460                 self.padding, self.dilation, self.groups)

RuntimeError: Given groups=1, weight of size [32, 1, 3, 3], expected input[1, 32, 300, 300] to have 1 channels, but got 32 channels instead```

I searched Stackoverflow, I asked ChatGPT,Bard,BingAI.
I am really about to lose my mind, I just want to finish it and get some peaceful sleep, Please solve this annoying problem.Sorry it's too long, it's my whole code.

I guess your data is missing the channel dimension and the conv layer assumes an explicit batch size of 1 thus making the batch size of 32 the channel dim.
Code is easier to understand so take a look at this:

conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)

# default shape as [batch_size, channels, height, width]
x = torch.randn(32, 1, 300, 300)
# works
out = conv1(x)

# input missing the batch dimension as [channels, height, width]
x = torch.randn(1, 300, 300)
# works
out = conv1(x)

# input missing the channel dimension as [batch_size, height, width]
x = torch.randn(32, 300, 300)
# breaks as the second case is assumed
out = conv1(x)
# RuntimeError: Given groups=1, weight of size [32, 1, 3, 3], expected input[1, 32, 300, 300] to have 1 channels, but got 32 channels instead

Add the channel dimension via x = x.unsqueeze(1) and it should work.

I have tried to put that code piece into somewhere “logical” but I can’t. Excuse my inexperience. Can you add it to my code?

Try: brand_pred = model(images.unsqueeze(1)).

1 Like