Given groups=1, weight[16, 1, 5, 5], so expected input[100, 3, 64, 64] to have 1 channels, but got 3 channels instead

I have gray scale image dataset. This is my code,

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel,self).__init__()
        #Convo 1
        self.cnn1=nn.Conv2d(in_channels=1,out_channels=16,kernel_size=5,stride=1,padding=2)
        self.relu1=nn.ReLU()
        #Maxpool_1
        self.maxpool1=nn.MaxPool2d(kernel_size=2)
        #Convo_2
        self.cnn2=nn.Conv2d(in_channels=16,out_channels=32,kernel_size=5,stride=1,padding=2)
        self.relu2=nn.ReLU()
        self.maxpool2=nn.MaxPool2d(kernel_size=2)
        self.fc1=nn.Linear(32*16*16,2)
    def forward(self,x):
        #Convo_1
        out=self.cnn1(x)
        out=self.relu1(out)
        #Max_pool1
        out=self.maxpool1(out)
        #Convo_2
        out=self.cnn2(out)
        out=self.relu2(out)
        out=self.maxpool2(out)
        out=out.view(out.size(0),-1)#Flattening out 
        out=self.fc1(out)
        return out

Iam getting,

RuntimeError                              Traceback (most recent call last)
<ipython-input-159-7279b8172617> in <module>
      5         labels=Variable(labels)
      6         optimizer.zero_grad()
----> 7         outputs=model(img)
      8         loss=criterion(outputs,labels)
      9         loss.backward()

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    489             result = self._slow_forward(*input, **kwargs)
    490         else:
--> 491             result = self.forward(*input, **kwargs)
    492         for hook in self._forward_hooks.values():
    493             hook_result = hook(self, input, result)

<ipython-input-154-0291a955352f> in forward(self, x)
     14     def forward(self,x):
     15         #Convo_1
---> 16         out=self.cnn1(x)
     17         out=self.relu1(out)
     18         #Max_pool1

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    489             result = self._slow_forward(*input, **kwargs)
    490         else:
--> 491             result = self.forward(*input, **kwargs)
    492         for hook in self._forward_hooks.values():
    493             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
    299     def forward(self, input):
    300         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 301                         self.padding, self.dilation, self.groups)
    302 
    303 

RuntimeError: Given groups=1, weight[16, 1, 5, 5], so expected input[100, 3, 64, 64] to have 1 channels, but got 3 channels instead

Please tell me where I went wrong

1 Like

You are giving to your network an image encoded over 3 channels instead of 1.

Thanks. I saved all the images as gray scale. But when I read the shape of those images iam getting (64,64,3).
How should I work with gray scale in pytorch.

How do you open your images ? You have to check that that the opened image is the correct shape and it depends on how you open it.

I saved using opencv and opened with

for F in os.listdir(directory):
        img=cv2.imread(directory+'/'+ F)
        img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

        cv2.imwrite(f"dogs/{i}.jpg",img)

This is how you save your image, but how do you open it for training ?

Root='train_set'
`train_data = ImageFolder(root=Root,transform=ToTensor())`

root='test_set'
test_data = ImageFolder(root=root,transform=ToTensor())

To check the shape I used
cv2.imread(path).shape
I also tried

from torchvision.transforms import Grayscale
train_data = ImageFolder(root=Root,transform=[ToTensor(),Grayscale()])

It shows,

TypeError                                 Traceback (most recent call last)
<ipython-input-197-7279b8172617> in <module>
      1 iter=0
      2 for epoch in range(num_epochs):
----> 3     for img, labels in train_loader:
      4         img=Variable(img)
      5         labels=Variable(labels)

~\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
    262         if self.num_workers == 0:  # same-process loading
    263             indices = next(self.sample_iter)  # may raise StopIteration
--> 264             batch = self.collate_fn([self.dataset[i] for i in indices])
    265             if self.pin_memory:
    266                 batch = pin_memory_batch(batch)

~\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py in <listcomp>(.0)
    262         if self.num_workers == 0:  # same-process loading
    263             indices = next(self.sample_iter)  # may raise StopIteration
--> 264             batch = self.collate_fn([self.dataset[i] for i in indices])
    265             if self.pin_memory:
    266                 batch = pin_memory_batch(batch)

~\Anaconda3\lib\site-packages\torchvision\datasets\folder.py in __getitem__(self, index)
    101         sample = self.loader(path)
    102         if self.transform is not None:
--> 103             sample = self.transform(sample)
    104         if self.target_transform is not None:
    105             target = self.target_transform(target)

TypeError: 'list' object is not callable

Please help me with this

Please someone help me with this

Use torchvision.transforms.Compose instead of the list for your transformations.

Thanks for the response. It worked but training is taking slower than colored image. I thought training would be very fast. I tried to convert all the images to gray but it showed me an error. Why is it so?

How did you measure the time using RGB and gray images?
Are you using multiple workers for the preprocessing?
I’m not sure you’ll see a huge performance gain using grayscale images.

Which error do you get if you convert all images?

I converted all the train and test images using both opencv and PIL
This is PIL code

for F in os.listdir(directory):
        img=Image.open(directory+'/'+'{}'.format(F))
        img=img.resize((64,64))
        img=img.convert('LA')
        name=F.split('.')[1]
        img.save(final_url+'/'+'{}.jpg'.format(name))

This is opencv code for converting all images to gray scale and saving it to a directory,

for F in os.listdir(directory):
        img=cv2.imread(path)
        img=cv2.resize((64,64),img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        name=F.split('.')[1]
        cv2.imwrite(final_url+'/'+'{}.jpg'.format(name))

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel,self).__init__()
        #Convo 1
        self.cnn1=nn.Conv2d(in_channels=1,out_channels=16,kernel_size=5,stride=1,padding=2)
        self.relu1=nn.ReLU()
        #Maxpool_1
        self.maxpool1=nn.MaxPool2d(kernel_size=2)
        #Convo_2
        self.cnn2=nn.Conv2d(in_channels=16,out_channels=32,kernel_size=5,stride=1,padding=2)
        self.relu2=nn.ReLU()
        self.maxpool2=nn.MaxPool2d(kernel_size=2)
        self.fc1=nn.Linear(32*16*16,2)
    def forward(self,x):
        #Convo_1
        out=self.cnn1(x)
        out=self.relu1(out)
        #Max_pool1
        out=self.maxpool1(out)
        #Convo_2
        out=self.cnn2(out)
        out=self.relu2(out)
        out=self.maxpool2(out)
        out=out.view(out.size(0),-1)#Flattening out 
        out=self.fc1(out)
        return out

Iam getting this error,
RuntimeError: Given groups=1, weight[16, 1, 5, 5], so expected input[100, 3, 64, 64] to have 1 channels, but got 3 channels instead.

Information is less in gray scale compared to RGB images,so I thought it would take lesser time.

It seems you are loading the images in RGB format again, although probably all channels contain the same information.
I’m wondering, how you’ve managed to save the grayscale images as jpg, as I thought this format needs three channels.
However, could you check the number of channels after loading the grayscale images?

Yeah I checked the shape of the image and it is (64,64,3)…Should I save it as PNG format?.

Yes, try that. Usually PIL should give you a warning about saving single channel images as jpg.

You are right regarding the speedup, however I think the workload in the first layer might be small compared to deeper layers, so that a potential speed gain might not be noticeable.

Actually while converting jpg format to gray scale we have to use img.convert(‘LA’) instead of img.convert(‘L’).
I read that while reading gray scale image in opencv it gives us 3 layers of gray scale. I tried PNG format still its showing the same error.

I’ve used PIL and it’s working using the L format:

img=Image.open('PATH')
img=img.convert('L')
img.save('tmp.png')
gray = Image.open('tmp.png')

gray won’t have a color channel after loading, so you might need to unsqueeze(0) the tensor.

Thanks I saved it as PNG using ‘L’.
I used unsqueeze(0) in this part

iter=0
for epoch in range(num_epochs):
    for img, labels in train_loader:
        **img=img.unsqueeze(0)**
        print(img.size())
        img=Variable(img)
        labels=Variable(labels)
        optimizer.zero_grad()
        outputs=model(img)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        iter+=1
        if(iter%100==0):
            correct=0
            total=0
            for Images,labels in test_loader:
                Images=Variable(Images)
                outputs=model(Images)
                _,predicted=torch.max(outputs.data,1)
                total+=labels.size(0)
                correct+=(predicted==labels).sum()
            accuracy=100*correct/total
            print('Iterations: {}. Loss: {}, Accuracy: {}'.format(iter,loss.data[0],accuracy))

It shows,

torch.Size([1, 100, 3, 64, 64])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-90-59c079bda59c> in <module>
      7         labels=Variable(labels)
      8         optimizer.zero_grad()
----> 9         outputs=model(img)
     10         loss=criterion(outputs,labels)
     11         loss.backward()

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    489             result = self._slow_forward(*input, **kwargs)
    490         else:
--> 491             result = self.forward(*input, **kwargs)
    492         for hook in self._forward_hooks.values():
    493             hook_result = hook(self, input, result)

<ipython-input-82-0291a955352f> in forward(self, x)
     14     def forward(self,x):
     15         #Convo_1
---> 16         out=self.cnn1(x)
     17         out=self.relu1(out)
     18         #Max_pool1

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    489             result = self._slow_forward(*input, **kwargs)
    490         else:
--> 491             result = self.forward(*input, **kwargs)
    492         for hook in self._forward_hooks.values():
    493             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
    299     def forward(self, input):
    300         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 301                         self.padding, self.dilation, self.groups)
    302 
    303 

RuntimeError: expected stride to be a single integer value or a list of 3 values to match the convolution dimensions, but got stride=[1, 1]

Are you using an older PyTorch version?
If so, this RuntimeError might be an old error message stating that your batch dimension is missing.
Try to unsqueeze your tensor twice to add the channel and batch dimension.