RuntimeError: Given groups=1, weight[64, 3, 3, 3], so expected input[16, 64, 256, 256] to have 3 channels, but got 64 channels instead

chunkychung · July 1, 2022, 6:35am

Hello, I’m trying to compute gradient for ResNet with the following code. It is not shown here but my input images are transformed into 224x224

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        
        # get the pretrained ResNet network
        self.resnet = resnet50(pretrained=True)
        
        # access its last convolutional layer.
        # 4th layer is the last layer and fc is the classifier
        self.features_conv = self.resnet.layer4
        
        # get the max pool of the features stem
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
        
        # get the classifier for Resnet
        self.classifier = self.resnet.fc
        
        # placeholder for the gradients
        self.gradients = None
    
    # hook for the gradients of the activations
    def activations_hook(self, grad):
        self.gradients = grad
        
    def forward(self, x):
        x = self.features_conv(x)
        
        # register the hook
        h = x.register_hook(self.activations_hook)
        
        # apply the remaining pooling
        x = self.max_pool(x)
        x = x.view((1, -1))
        x = self.classifier(x)
        return x
    
    # method for the gradient extraction
    def get_activations_gradient(self):
        return self.gradients
    
    # method for the activation exctraction
    def get_activations(self, x):
        return self.features_conv(x)

When I initialize the model to get pred (the most probable class tensor) using the code below, i get the following error. I cannot figure out where the size [512, 1024, 1, 1] came from and how to fix into [1,3, 224, 224]


# initialize the model
resnet = ResNet()

# set the evaluation mode
resnet.eval()

# get the image and target from the dataloader
img, target = next(iter(dataloader))

# get the most likely prediction of the model
pred = resnet(img).argmax(dim=1)

>>>Given groups=1, weight of size [512, 1024, 1, 1], expected input[1, 3, 224, 224] to have 1024 channels, but got 3 channels instead

any ideas? thanks.

ptrblck · July 1, 2022, 6:54am

The shape mismatch is caused by resnet.layer4 as you are pushing the input in the shape of [1, 3, 224, 224] to this layer, while it expects 1024 input channels.

Faisal232 · July 10, 2022, 8:18am

hello, i get the same error Given groups=1, weight of size [8, 1, 7, 7], expected input[128, 3, 48, 48] to have 1 channels, but got 3 channels instead
here my code

from __future__ import print_function
import argparse
import pandas as pd
import numpy  as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms
import cv2
import matplotlib.pyplot as plt

from data_loaders import Plain_Dataset, eval_data_dataloader
from deep_emotion import Deep_Emotion
from generate_data import Generate_data

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def Train(epochs,train_loader,val_loader,criterion,optmizer,device):
    '''
    Training Loop
    '''
    print("===================================Start Training===================================")
    for e in range(epochs):
        train_loss = 0
        validation_loss = 0
        train_correct = 0
        val_correct = 0
        # Train the model  #
        net.train()
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            optmizer.zero_grad()
            outputs = net(data)
            loss = criterion(outputs,labels)
            loss.backward()
            optmizer.step()
            train_loss += loss.item()
            _, preds = torch.max(outputs,1)
            train_correct += torch.sum(preds == labels.data)

        #validate the model#
        net.eval()
        for data,labels in val_loader:
            data, labels = data.to(device), labels.to(device)
            val_outputs = net(data)
            val_loss = criterion(val_outputs, labels)
            validation_loss += val_loss.item()
            _, val_preds = torch.max(val_outputs,1)
            val_correct += torch.sum(val_preds == labels.data)

        train_loss = train_loss/len(train_dataset)
        train_acc = train_correct.double() / len(train_dataset)
        validation_loss =  validation_loss / len(validation_dataset)
        val_acc = val_correct.double() / len(validation_dataset)
        print('Iterasi: {} \tTraining Loss: {:.8f} \tValidation Loss {:.8f} \tAkurasi Training {:.3f}% \tAkurasi Validasi {:.3f}%'
                                                           .format(e+1, train_loss,validation_loss,train_acc * 100, val_acc*100))

    torch.save(net.state_dict(),'deep_emotion-{}-{}-{}.pt'.format(epochs,batchsize,lr))
    print("===================================Training Finished===================================")

epochs = 500
lr = 0.005
batchsize = 128

net = Deep_Emotion()
net.to(device)
print("Model archticture: ", net)
traincsv_file = 'dataset_final'+'/'+'train_aug.csv'
validationcsv_file = 'dataset_final'+'/'+'val_aug.csv'
train_img_dir = 'dataset_final'+'/'+'train_aug/'
validation_img_dir = 'dataset_final'+'/'+'val_aug/'

transformation= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
train_dataset= Plain_Dataset(csv_file=traincsv_file, img_dir = train_img_dir, datatype = 'train', transform = transformation)
validation_dataset= Plain_Dataset(csv_file=validationcsv_file, img_dir = validation_img_dir, datatype = 'val', transform = transformation)
train_loader= DataLoader(train_dataset,batch_size=batchsize,shuffle = True,num_workers=0)
val_loader=   DataLoader(validation_dataset,batch_size=batchsize,shuffle = True,num_workers=0)

criterion= nn.CrossEntropyLoss()
optmizer= optim.Adam(net.parameters())
Train(epochs, train_loader, val_loader, criterion, optmizer, device)

and this for the model i code

import torch
import torch.nn as nn
import torch.nn.functional as F

class Deep_Emotion(nn.Module):
    def __init__(self):
        '''
        Deep_Emotion class contains the network architecture.
        '''
        super(Deep_Emotion,self).__init__()
        self.conv1 = nn.Conv2d(1,10,3)
        self.conv2 = nn.Conv2d(10,10,3)
        self.pool2 = nn.MaxPool2d(2,2)

        self.conv3 = nn.Conv2d(10,10,3)
        self.conv4 = nn.Conv2d(10,10,3)
        self.pool4 = nn.MaxPool2d(2,2)

        self.norm = nn.BatchNorm2d(10)

        self.fc1 = nn.Linear(810,50)
        self.fc2 = nn.Linear(50,7)
        

        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )

        self.fc_loc = nn.Sequential(
            nn.Linear(640, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 640)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self,input):
        out = self.stn(input)

        out = F.relu(self.conv1(out))
        out = self.conv2(out)
        out = F.relu(self.pool2(out))

        out = F.relu(self.conv3(out))
        out = self.norm(self.conv4(out))
        out = F.relu(self.pool4(out))

        out = F.dropout(out)
        out = out.view(-1, 810)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)

        return out

any help will apreciated, sorry for my bad english

ptrblck · July 10, 2022, 7:24pm

The first conv layer in self.localization expects an input with a single channel while your input to the model has 3 channels. Change in_channels=3 in this layer and it should work.

yeasmin · August 18, 2022, 7:38pm

@ptrblck_de Hi! Firstly, thank you so much for helping out all these people in this forum and especially for your fast/patient replies!

I am trying to fine-tune a 3DGANs model. The training dataset consists of voxels of size 30x30x30. However, this resolution is too low and I have replaced this dataset with voxels of 100x100x100. However, now I am getting the error (with the original code):

    out = x.view(-1, 1, self.cube_len, self.cube_len, self.cube_len)
RuntimeError: shape '[-1, 1, 32, 32, 32]' is invalid for input of size 2122416

Here is the code I am trying to run:

    def __init__(self, args):
        super(net_D, self).__init__()
        self.args = args
        self.cube_len = params.cube_len
        self.leak_value = params.leak_value
        self.bias = params.bias

        padd = (0,0,0)
        if self.cube_len == 32:
            padd = (1,1,1)

        self.f_dim = 32

        self.layer1 = self.conv_layer(1, self.f_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer2 = self.conv_layer(self.f_dim, self.f_dim*2, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer3 = self.conv_layer(self.f_dim*2, self.f_dim*4, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer4 = self.conv_layer(self.f_dim*4, self.f_dim*8, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        # self.layer5 = self.conv_layer(self.f_dim*8, self.f_dim*16, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias )

        self.layer5 = torch.nn.Sequential(
            torch.nn.Conv3d(self.f_dim*8, 1, kernel_size=4, stride=2, bias=self.bias, padding=padd),
            torch.nn.Sigmoid()
        )

        # self.layer5 = torch.nn.Sequential(
        #     torch.nn.Linear(256*2*2*2, 1),
        #     torch.nn.Sigmoid()
        # )

    def conv_layer(self, input_dim, output_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=False):
        layer = torch.nn.Sequential(
            torch.nn.Conv3d(input_dim, output_dim, kernel_size=kernel_size, stride=stride, bias=bias, padding=padding),
            torch.nn.BatchNorm3d(output_dim),
            torch.nn.LeakyReLU(self.leak_value, inplace=True)
        )
        return layer

    def forward(self, x):
        # out = torch.unsqueeze(x, dim=1)
        print(x.shape)
        out = x.view(-1, 1, self.cube_len, self.cube_len, self.cube_len)
        # print(out.size()) # torch.Size([32, 1, 32, 32, 32])
        out = self.layer1(out)
        # print(out.size())  # torch.Size([32, 32, 16, 16, 16])
        out = self.layer2(out)
        # print(out.size())  # torch.Size([32, 64, 8, 8, 8])
        out = self.layer3(out)
        # print(out.size())  # torch.Size([32, 128, 4, 4, 4])
        out = self.layer4(out)
        # print(out.size())  # torch.Size([32, 256, 2, 2, 2])
        # out = out.view(-1, 256*2*2*2)
        # print (out.size())
        out = self.layer5(out)
        # print(out.size())  # torch.Size([32, 1, 1, 1, 1])
        out = torch.squeeze(out)
        return out

I tried playing around with the parameters and here are the errors I got with those as well:

Out = x.view(2, 1, 102, 102, 102)

RuntimeError: Given transposed=1, weight of size [32, 1, 4, 4, 4], expected input[2, 64, 16, 16, 16] to have 32 channels, but got 64 channels instead

Out = x.view(1, 1, 102, 102, 102)

RuntimeError: shape ‘[1, 1, 102, 102, 102]’ is invalid for input of size 2122416

Out = x.view(-1, 1, 102, 102, 102)

RuntimeError: Given transposed=1, weight of size [32, 1, 4, 4, 4], expected input[2, 64, 16, 16, 16] to have 32 channels, but got 64 channels instead

Out = x.view(2, 102, 102, 102)

RuntimeError: Given groups=1, weight of size [32, 1, 4, 4, 4], expected input[1, 2, 102, 102, 102] to have 1 channels, but got 2 channels instead

I also wanted to say that torch.Size is (2, 102, 102, 102). However, I’m not sure how to use that information. Thank you so much for the help!

ptrblck · August 18, 2022, 10:30pm

This doesn’t seem to fit the error messages, as the view operation indicates your input has 2122416 elements, which would correspond to e.g. [2, 1, 102, 102, 102] which fits:

The posted errors are either caused by a wrong view operation or by a shape mismatch in a conv kernel.
However, your code seems to work fine:

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.f_dim = 32
        self.bias = True
        self.leak_value = 0.01
        padd = (0,0,0)

        self.layer1 = self.conv_layer(1, self.f_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer2 = self.conv_layer(self.f_dim, self.f_dim*2, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer3 = self.conv_layer(self.f_dim*2, self.f_dim*4, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer4 = self.conv_layer(self.f_dim*4, self.f_dim*8, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)

        self.layer5 = torch.nn.Sequential(
            torch.nn.Conv3d(self.f_dim*8, 1, kernel_size=4, stride=2, bias=self.bias, padding=padd),
            torch.nn.Sigmoid()
        )

    def conv_layer(self, input_dim, output_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=False):
        layer = torch.nn.Sequential(
            torch.nn.Conv3d(input_dim, output_dim, kernel_size=kernel_size, stride=stride, bias=bias, padding=padding),
            torch.nn.BatchNorm3d(output_dim),
            torch.nn.LeakyReLU(self.leak_value, inplace=True)
        )
        return layer

    def forward(self, x):
        print(x.shape)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = torch.squeeze(out)
        return out
    
model = MyModel()
x = torch.randn(2, 1, 102, 102, 102)
out = model(x)
print(out.shape)
# torch.Size([2, 2, 2, 2])

Tuqa_colab · December 31, 2022, 4:01pm

Hi ,i have the same problem
class CNN (nn.Module):
def init(self):
super(CNN, self).init()
self.cnn1=nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1,padding=1)
self.batchnorm1=nn.BatchNorm2d(8)
self.relu=nn.ReLU()
self.maxpool1=nn.MaxPool2d(kernel_size=2)
self.cnn2=nn.Conv2d(in_channels=8,out_channels=32,kernel_size=5,stride=1,padding=2)
self.batchnorm2=nn.BatchNorm2d(32)

self.maxpool2=nn.MaxPool2d(kernel_size=2)

self.fc1=nn.Linear(in_features=1568,out_features=600)
self.fc2=nn.Linear(in_features=600,out_features=10)

RuntimeError: Given groups=1, weight of size [1, 8, 5, 5], expected input[100, 1, 28, 28] to have 8 channels, but got 1 channels instead

ptrblck · December 31, 2022, 7:50pm

Could you post a minimal, executable code snippet reproducing the issue by wrapping it into three backticks ```, please?

kotiprerana1993 · March 21, 2023, 9:05am

Hello Sir, @ptrblck Can you also help me with my code?
I too have same error “RuntimeError: Given groups=1, weight of size [3, 1, 1, 11], expected input[1, 3, 1014, 1024] to have 1 channels, but got 3 channels instead”.

Here is my code :
#For combining the data from Belgium for moisture content of 0.78
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as skio
from sklearn.model_selection import train_test_split
import glob
Scan1_22_09_2022_Box1_ref1 = glob.glob(path)
Scan1_22_09_2022_Box2_ref1 = glob.glob(path)
Scan1_30_09_2022_Box3_ref1 = glob.glob(path)

listOfAllImages = Scan1_22_09_2022_Box1_ref1 + Scan1_22_09_2022_Box2_ref1 + Scan1_30_09_2022_Box3_ref1
len(listOfAllImages)

from skimage import transform

def getData(path):
ArryImage = skio.imread(path,plugin=‘pil’)
ArryImage= ArryImage/np.max(ArryImage)
ArryImage = transform.resize(ArryImage, (1024, 1024))
return ArryImage

#Splitting of listOfAllImages into training and validation set
train_list, val_list = train_test_split(listOfAllImages, test_size = 0.2,random_state=42)
len(train_list)
len(val_list)
type(val_list)

#Defining the Neural network
import torch
import torch.optim as optim

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
device

import torch
import torch.nn as nn

class Net(nn.Module):
‘’’
Model that takes one 2D image as input and one 2D image as output.

The inputs are processed using 2 encoders, one for the 3D data and the other for the
2D image. The representations of the both inputs are concatenated. 
Thereafter, the merged representations are upsampled in a decoder into a 2D image.

'''
def __init__(self):
	super(Net, self).__init__()
	# Layers going down
	## Encoder 2D input branch
	self.pool2d = nn.MaxPool2d(2) # 2D Pooling layer, max pooling
	self.encoder2d_lvl_1 = nn.Sequential(
		nn.Conv2d(1, 32, 3, padding=1),           #inchannel;outchannel:number of filters; kernel size
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True)
	)
	self.encoder2d_lvl_2 = nn.Sequential(
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		)
	self.encoder2d_lvl_3 = nn.Sequential(
		nn.Conv2d(32, 64, 3, padding=1),
		nn.BatchNorm2d(64),
		nn.ReLU(True),
		nn.Conv2d(64, 64, 3, padding=1),
		nn.BatchNorm2d(64),
		nn.ReLU(True)
		)
	self.encoder2d_lvl_4 = nn.Sequential(
		nn.Conv2d(64, 128, 3, padding=1),
		nn.BatchNorm2d(128),
		nn.ReLU(True),
		nn.Conv2d(128, 128, 3, padding=1),
		nn.BatchNorm2d(128),
		nn.ReLU(True)
		)
	self.encoder2d_lvl_5 = nn.Sequential(
		nn.Conv2d(128,256, 3, padding=1),
		nn.BatchNorm2d(256),
		nn.ReLU(True),
		nn.Conv2d(256,256, 3, padding=1),
		nn.BatchNorm2d(256),
		nn.ReLU(True)
		)
	self.encoder2d_lvl_6 = nn.Sequential(
		nn.Conv2d(256,512, 3, padding=1),
		nn.BatchNorm2d(512),
		nn.ReLU(True),
		nn.Conv2d(512,512, 3, padding=1),
		nn.BatchNorm2d(512),
		nn.ReLU(True)
		)
	# Layers going up
	self.decoder2d_lvl_5 = nn.Sequential(
		nn.ConvTranspose2d(512,512, 4, stride=2, groups=512), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(512, 512, 3, padding=1),
		nn.BatchNorm2d(512),
		nn.ReLU(True),
		nn.Conv2d(512,256, 3, padding=0),
		nn.BatchNorm2d(256),
		nn.ReLU(True)
	)
	self.decoder2d_lvl_4 = nn.Sequential(
		nn.ConvTranspose2d(256,256, 4, stride=2, groups=256), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(256, 256, 3, padding=1),
		nn.BatchNorm2d(256),
		nn.ReLU(True),
		nn.Conv2d(256,128, 3, padding=0),
		nn.BatchNorm2d(128),
		nn.ReLU(True)
	)
	self.decoder2d_lvl_3 = nn.Sequential(
		nn.ConvTranspose2d(128, 128, 4, stride=2, groups=128), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(128, 128, 3, padding=1),
		nn.BatchNorm2d(128),
		nn.ReLU(True),
		nn.Conv2d(128, 64, 3, padding=0),
		nn.BatchNorm2d(64),
		nn.ReLU(True)
	)

	self.decoder2d_lvl_2 = nn.Sequential(
		nn.ConvTranspose2d(64, 64, 4, stride=2, groups=64), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(64, 64, 3, padding=1),
		nn.BatchNorm2d(64),
		nn.ReLU(True),
		nn.Conv2d(64, 32, 3, padding=0),
		nn.BatchNorm2d(32),
		nn.ReLU(True)
	)

	self.decoder2d_lvl_1 = nn.Sequential(
		nn.ConvTranspose2d(32, 32, 4, stride=2, groups=32),
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		nn.Conv2d(32, 16, 3, padding=1),
		nn.BatchNorm2d(16),
		nn.ReLU(True),
		nn.Conv2d(16, 1, 3, padding=0)		  
	)

def forward(self, x):
	# Encoding 2D input data						# input ([1, 1, 128, 128])
	out = self.encoder2d_lvl_1(x)	
	#print(out.size())				    	#torch.Size([1, 32, 128, 128])
	out = self.encoder2d_lvl_2(self.pool2d(out))
	#print(out.size())								#torch.Size([1, 128, 64, 64])
	out = self.encoder2d_lvl_3(self.pool2d(out))
	#print(out.size())								#torch.Size([1, 256, 32, 32])
	out = self.encoder2d_lvl_4(self.pool2d(out))
	#print(out.size())								##torch.Size([1, 512,16, 16])
	out = self.encoder2d_lvl_5(self.pool2d(out))
	#print(out.size())								#torch.Size([1, 1024, 8, 8])
	out = self.encoder2d_lvl_6(self.pool2d(out))
	#print(out.size())								

	# Decoding to 2D data
	out = self.decoder2d_lvl_5(out)
	#print(out.size())			
	out = self.decoder2d_lvl_4(out)
	#print(out.size())						#torch.Size([1, 512, 16, 16])
	out = self.decoder2d_lvl_3(out)
	#print(out.size())						#torch.Size([1, 256, 32, 32])
	out = self.decoder2d_lvl_2(out)
	#print(out.size())						#torch.Size([1, 128, 66, 66])
	out = self.decoder2d_lvl_1(out)
	#print(out.size())						#torch.Size([1, 1, 128, 128])
	out = torch.sigmoid(out)
    
	#print(out.size())						#torch.Size([1, 1, 128, 128])
	return out

#loss function defining
from pytorch_msssim import SSIM
#loss function defining
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
criterion = SSIM()
model = model.to(device)
criterion = criterion.to(device)

#training the autoencoder network and print loss statistics
trainLos = []
ValLos = []

lenTrainingData = len(train_list)
lenValData = len(val_list)

counterTrain = 0
counterVal = 0

counter = 0

for iteration in range(100 * lenTrainingData):
# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
sampleGT = getData(train_list[counterTrain])
sampleGT = torch.from_numpy(sampleGT).float().to(device)
sample = sampleGT.unsqueeze_(0).unsqueeze_(0)
outputs = model(sample)
loss = 1- criterion(outputs, sampleGT)
loss.backward()
optimizer.step()

counterTrain = counterTrain + 1

if counterTrain == lenTrainingData:
    counterTrain = 0

# print statistics

trainLos.append(loss.item())

sampleGT = getData(val_list[counterVal])
sampleGT = torch.from_numpy(sampleGT).float().to(device)
sample = sampleGT.unsqueeze_(0).unsqueeze_(0)
outputs = model(sample)
loss = 1- criterion(outputs, sampleGT)
ValLos.append(loss.item())

counterVal = counterVal + 1
counter = counter + 1

if counterVal == lenValData:
    counterVal = 0

if counter == 100:
    plt.figure()
    plt.plot(trainLos,label = "Training los")
    plt.plot(ValLos,label = "validation los")
    plt.ylabel('Iterations')
    plt.ylabel('Los')
    plt.legend()
    #plt.show()
    plt.savefig('./autoEncouder.png')
    plt.close()
    torch.save(model.state_dict(), "./autoEncouder.pth")
    counter = 0
    print(iteration)
    print(loss.item())

ptrblck · March 21, 2023, 9:07am

The first conv layer in your model is expecting an input with 1 channel as it’s defined via: nn.Conv2d(1, 32, 3, padding=1) while the input tensor has 3 channels.
Either set in_channels to 3 in:

	self.encoder2d_lvl_1 = nn.Sequential(
		nn.Conv2d(1, 32, 3, padding=1),

or pass inputs with a single channel to the model.

kotiprerana1993 · March 21, 2023, 9:10am

@ptrblck I did check the input tensor,

ArryImage = getData(listOfAllImages[0])
tensor = torch.from_numpy(ArryImage)
print(tensor.shape)

its shape is : torch.Size([1024, 1024]) , meaning it has 1 channel(as my images are grey scale)

ptrblck · March 21, 2023, 9:11am

This doesn’t match the error message:

RuntimeError: Given groups=1, weight of size [3, 1, 1, 11], expected input[1, 3, 1014, 1024] to have 1 channels, but got 3 channels instead

which indicates 3 channels are used, so narrow down which operation is creating these additional channels.

kotiprerana1993 · March 21, 2023, 9:13am

So I should change my input channel to 3 in the first layer instead of 1?

kotiprerana1993 · March 21, 2023, 9:16am

Thank you sir @ptrblck

kotiprerana1993 · March 21, 2023, 9:17am

@ptrblck But after changing the in_chal to 3, I get this error "RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[1, 1, 1024, 1024] to have 3 channels, but got 1 channels instead.

ptrblck · March 21, 2023, 9:19am

This would mean that some of your input images use a single channel while other use 3.
You could transform the images in the __getitem__ method of your Dataset to make sure each of the images has 3 channels e.g. by repeating the single-channel images 3 times in their channel dimension. Something like this might work:

x = torch.randn(1, 224, 224)
# single-channel image
if x.size(0) == 1:
    x = x.repeat(3, 1, 1)
print(x.shape)
# torch.Size([3, 224, 224])

kotiprerana1993 · March 21, 2023, 9:21am

Thank you sir I will try @ptrblck

kotiprerana1993 · March 21, 2023, 10:39am

@ptrblck sir, I did modify the changes you suggested by changing the getData() function but am still getting the same error.

ptrblck · March 21, 2023, 10:56am

Are you using a custom Dataset and did you add the posted change to the __getitem__ method or into the DataLoader loop?

kotiprerana1993 · March 21, 2023, 12:09pm

Actually am very beginner in this. I fail to understand what exatly your telling. Could you please explain me bit more in detail?

This is how I changed my getData() function in my code.
def getData(path):
ArryImage = skio.imread(path, plugin=‘pil’)
ArryImage = ArryImage/np.max(ArryImage)
ArryImage = transform.resize(ArryImage, (1024, 1024))
x = torch.from_numpy(ArryImage).float()
# single-channel image
if x.size(0) == 1:
x = x.repeat(3, 1, 1)
return x

Thank you in advance @ptrblck