RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 1, 224, 224] to have 3 channels, but got 1 channels instead

I see a couple of such posts in forum but I have hardtime generalizing it to my own problem. Here’s the error:

torch.Size([3, 1, 224, 224])
Traceback (most recent call last):
  File "test_loocv.py", line 245, in <module>
    output = model_ft(test_data)
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
    result = self.forward(*input, **kwargs)
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torchvision-0.2.1-py3.6.egg/torchvision/models/resnet.py", line 139, in forward
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
    result = self.forward(*input, **kwargs)
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 301, in forward
    self.padding, self.dilation, self.groups)
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 1, 224, 224] to have 3 channels, but got 1 channels instead

and here is the entire code:

from __future__ import print_function, division

import torch
from torch.autograd import Variable



import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy



import torch.utils.data as data_utils
from torch.utils import data


data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}


data_dir = "test_images"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated



def train_model(model, criterion, optimizer, scheduler, dataloader, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            #for inputs, labels in dataloaders[phase]:
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
 #           if phase == 'val' and epoch_acc > best_acc:
 #               best_acc = epoch_acc
 #               best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
#    print('Best val Acc: {:4f}'.format(best_acc))

#    model.load_state_dict(best_model_wts)
    return model


def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        #for i, (inputs, labels) in enumerate(dataloaders['test]):
        for i, (inputs, labels) in enumerate(dataloaders['train']):

            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)



######################################################################
# Finetuning the convnet
# ----------------------
#
# Load a pretrained model and reset final fully connected layer.
#

#model_ft = models.resnet18(pretrained=True)
model_ft = models.resnet50(pretrained=True)

num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)



#model_ft = model_ft.cuda()
nb_samples = 10
nb_classes = 2


data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

'''val_loader = data.DataLoader(
        image_datasets['train'],
        num_workers=2,
        batch_size=1
    )
val_loader = iter(val_loader)'''

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train']}
class_names = image_datasets['train'].classes

# LOOCV
loocv_preds = []
loocv_targets = []
for idx in range(nb_samples):
    
    print('Using sample {} as test data'.format(idx))
    
    # Get all indices and remove test sample
    train_indices = list(range(len(image_datasets['train']))) 
    del train_indices[idx]
    
    # Create new sampler
    sampler = data.SubsetRandomSampler(train_indices)

    dataloader = data.DataLoader(
        image_datasets['train'],
        num_workers=2,
        batch_size=1,
        sampler=sampler
    )
    
    # Train model
    for batch_idx, (samples, target) in enumerate(dataloader):
        print('Batch {}'.format(batch_idx))
        model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, dataloader, num_epochs=2) # do I add this line here?
                
    # Test on LOO sample
    model_ft.eval()
    test_data, test_target = image_datasets['train'][idx]
    test_data = test_data.cuda()
    #test_target = test_target.cuda()
    test_target = torch.tensor(test_target)
    test_target = test_target.cuda()
    test_data.unsqueeze_(1)
    test_target.unsqueeze_(0)
    print(test_data.shape)
    output = model_ft(test_data)
    pred = torch.argmax(output, 1)
    loocv_preds.append(pred)
    loocv_targets.append(test_target.item())

As shown above, test_data has the shape: torch.Size([3, 1, 224, 224])

1 Like

Why do you unsqueeze dimension 1 for test data? The error reported says the input should be of size [b, 3, w, h], but yours is [3, 1, 224, 224], so it does not accept it. Try to use transpose to change the first two dimensions.

thanks for the reply. If I comment the squeeze parts I get this other error. Can you please write the code for correct transpose? also, do I eventually need the squeeze part or not?
Entire code can be seen here https://pastebin.com/8p51Zz2A

Training complete in 0m 1s
torch.Size([3, 224, 224])
Traceback (most recent call last):
  File "test_loocv.py", line 244, in <module>
    output = model_ft(test_data)
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
    result = self.forward(*input, **kwargs)
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torchvision-0.2.1-py3.6.egg/torchvision/models/resnet.py", line 139, in forward
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
    result = self.forward(*input, **kwargs)
  File "/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 301, in forward
    self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got input of size [3, 224, 224] instead

The code is:

    model_ft.eval()
    test_data, test_target = image_datasets['train'][idx]
    test_data = test_data.cuda()
    #test_target = test_target.cuda()
    test_target = torch.tensor(test_target)
    test_target = test_target.cuda()
    ##test_data.unsqueeze_(1)
    ##test_target.unsqueeze_(0)
    print(test_data.shape)
    output = model_ft(test_data)
1 Like

test_data.unsqueeze_(0)

1 Like

Your input images had only one channel, which is not consistent with three channels RGB image in ResNet50 network. You should change Input dim in ResNet50 or convert your images to three channels.

because I had only one or two of my images like so I manually fixed them:

import cv2
import numpy as np
img = cv2.imread('110249.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img2 = np.zeros_like(img)
img2[:,:,0] = gray
img2[:,:,1] = gray
img2[:,:,2] = gray
cv2.imwrite('110249.jpg', img2)
2 Likes

i tried the above but am still getting a “Given groups=1, weight of size [64, 3, 7, 7], expected input[4, 1, 224, 224] to have 3 channels, but got 1 channels instead” error.
Did you find any other way to go about it
This is my model arch.

1 Like

What are the dimensions in your [4, 1, 224, 224] input? They should be NCHW (batch size, number of channels, height, width).

If that’s the case, then it means you have 4 examples in your batch, 1 channel (grayscale image) and 224x224 images. If you want the code to work, you need to change either your input to have 3 channels (duplicate the gray channel for RGB) or change the model to accept 1 channel images. For the second solution you need to modify the model code (e.g. by copying the source in your own codebase and changing what you need).

As seen in the Densenet source code, the first convolution expects 3 channels as input.

2 Likes

I faced the same problem and alex has explained it properly.

Use this, to convert a [H,W,C] into [C,H,W]
For including batch_size(N) use DataLoader, it will automatically include
image = image.transpose((2, 0, 1))

3 Likes

this one worked for me

Given groups=1, weight of size 64 1 7 7, expected input[64, 3, 160, 160] to have 1 channels, but got 3 channels instead

I am getting the above error even whilst giving 3 input channels.
This is my architecture code.

class Dnet_1ch(nn.Module):
def __init__(self, arch=arch, n=nunique, pre=True, ps=0.5):
    super().__init__()
    m = arch(True) if pre else arch()
    
    conv = nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=3, bias=False)
    w = (m.features.conv0.weight.sum(1)).unsqueeze(1)
    conv.weight = nn.Parameter(w)
    
    self.layer0 = nn.Sequential(conv, m.features.norm0, nn.ReLU(inplace=True))
    self.layer1 = nn.Sequential(
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
        m.features.denseblock1)
    self.layer2 = nn.Sequential(m.features.transition1,m.features.denseblock2)
    self.layer3 = nn.Sequential(m.features.transition2,m.features.denseblock3)
    self.layer4 = nn.Sequential(m.features.transition3,m.features.denseblock4,
                                m.features.norm5)
    
    nc = self.layer4[-1].weight.shape[0]
    self.head1 = Head(nc,n)
    #to_Mish(self.layer0), to_Mish(self.layer1), to_Mish(self.layer2)
    #to_Mish(self.layer3), to_Mish(self.layer4)
    
def forward(self, x):    
    x = self.layer0(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    
    x1 = self.head1(x)
    
    return x1

Can you figure out what the problem is?

You are setting the input channels of the first convolution to a single channel in these lines of code:

conv = nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=3, bias=False)
w = (m.features.conv0.weight.sum(1)).unsqueeze(1)
conv.weight = nn.Parameter(w)

while you are passing an input with 3 channels.
Either leave the convolution as it is with in_channels=3 or change the number of channels in your input tensor to a single channel, e.g. by slicing.

1 Like

Thanks for the response. I corrected it.

When does “CUDA error: device-side assert triggered” generally occur?

Generally whenever an assert statement is violated.
E.g. if you are using nn.NLLLoss as the criterion and pass a target with out of bounds values, this line of code will raise this issue.

Note that CUDA operations are asynchronous, so whenever you encounter an error running with CUDA, I would suggest to make sure the code works fine using the CPU (as this might give you a better error message), and if that’s the case run the code with CUDA_LAUNCH_BLOCKING=1 python script.py args.

Thanks for the help. I rectified it.

data_preprocess = transforms.Compose([
        #transforms.RandomResizedCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
def preprocess_input(img):
  x = data_preprocess(img)
  return x.unsqueeze_(0).to(device)
def display_img_pred_and_heatmap(path,model,label=None):
  original_img = cv.imread(path)
  original_img = cv.resize(original_img, (224,224))
  #print("original_img: " , original_img.shape)
  row_size = original_img.shape[0]
  col_size = original_img.shape[1]
  plt.imshow(cv.cvtColor(original_img, cv.COLOR_BGR2RGB))
  plt.show()
  preprocessed_img = preprocess_input(original_img)
  sub_model = nn.Sequential(*list(model.children())[:-1])
  print("sub_model: ",sub_model)
  print("sub_model(preprocessed_img).shape: ",sub_model(preprocessed_img).shape)

I am getting a similar error when I try to get output from an intermediate layer. I don’t get any errors when I try to get output from the complete model. How should I fix it?

RuntimeError: Given groups=1, weight of size [1024, 512, 3, 3], expected input[1, 200, 14, 14] to have 512 channels, but got 200 channels instead

Update: I don’t get any error when I change sub_model = nn.Sequential(*list(model.children())[:-1]) to sub_model = nn.Sequential(*list(model.children())[:-6])

This is my model:

class myModel5(nn.Module):

    def __init__(self, features, num_classes=200, **kwargs):
        super(myModel5, self).__init__()
        self.features = features
        self.conv6 = nn.Conv2d(512,  1024, kernel_size=3, padding=1) 
        self.conv7 = nn.Conv2d(1024, num_classes, kernel_size=1)
        self.conv8 = nn.Conv2d(512,  1024, kernel_size=3, padding=1) 
        self.conv9 = nn.Conv2d(1024, num_classes, kernel_size=1)
        self.relu = nn.ReLU(inplace=False)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        #self.fc = nn.Linear(1024, num_classes)
        initialize_weights(self.modules(), init_mode='he')
    def forward(self, x, labels=None, return_cam=False):
        batch_size = x.shape[0]
        x1 = self.features(x)
        x1 = self.conv6(x1)
        x1 = self.relu(x1)
        x1 = self.conv7(x1)
        x1 = self.relu(x1)
        x2 = self.features(x)
        x2 = self.conv8(x2)
        x2 = self.relu(x2)
        x2 = self.conv9(x2)
        x2 = self.relu(x2)
        x = x1 + x2

Well, you are asking it to feed the output of conv7 into conv8, which is clearly unintended. So think about what you actually want to do.

How do I get outputs of conv7 and conv9 then?

There are many ways to accomplish that, one way would be

submodel = nn.Sequential(
    model.features,
    model.conv6,
    model.relu,
    model.conv7,
    model.relu
)

To get the output of relu(conv7). Children will return registered submodules, which means as they are added in __init__. Since relu is applied multiple times in forward, you can’t just take children.

Another way would be to just return the intermediate values along with final value in the forward.

1 Like