RuntimeError: Given groups=1, weight[64, 3, 3, 3], so expected input[16, 64, 256, 256] to have 3 channels, but got 64 channels instead

Hello, I’m trying to compute gradient for ResNet with the following code. It is not shown here but my input images are transformed into 224x224

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        
        # get the pretrained ResNet network
        self.resnet = resnet50(pretrained=True)
        
        # access its last convolutional layer.
        # 4th layer is the last layer and fc is the classifier
        self.features_conv = self.resnet.layer4
        
        # get the max pool of the features stem
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
        
        # get the classifier for Resnet
        self.classifier = self.resnet.fc
        
        # placeholder for the gradients
        self.gradients = None
    
    # hook for the gradients of the activations
    def activations_hook(self, grad):
        self.gradients = grad
        
    def forward(self, x):
        x = self.features_conv(x)
        
        # register the hook
        h = x.register_hook(self.activations_hook)
        
        # apply the remaining pooling
        x = self.max_pool(x)
        x = x.view((1, -1))
        x = self.classifier(x)
        return x
    
    # method for the gradient extraction
    def get_activations_gradient(self):
        return self.gradients
    
    # method for the activation exctraction
    def get_activations(self, x):
        return self.features_conv(x)

When I initialize the model to get pred (the most probable class tensor) using the code below, i get the following error. I cannot figure out where the size [512, 1024, 1, 1] came from and how to fix into [1,3, 224, 224]


# initialize the model
resnet = ResNet()

# set the evaluation mode
resnet.eval()

# get the image and target from the dataloader
img, target = next(iter(dataloader))

# get the most likely prediction of the model
pred = resnet(img).argmax(dim=1)

>>>Given groups=1, weight of size [512, 1024, 1, 1], expected input[1, 3, 224, 224] to have 1024 channels, but got 3 channels instead

any ideas? thanks.

The shape mismatch is caused by resnet.layer4 as you are pushing the input in the shape of [1, 3, 224, 224] to this layer, while it expects 1024 input channels.

hello, i get the same error Given groups=1, weight of size [8, 1, 7, 7], expected input[128, 3, 48, 48] to have 1 channels, but got 3 channels instead
here my code

from __future__ import print_function
import argparse
import pandas as pd
import numpy  as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms
import cv2
import matplotlib.pyplot as plt

from data_loaders import Plain_Dataset, eval_data_dataloader
from deep_emotion import Deep_Emotion
from generate_data import Generate_data

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def Train(epochs,train_loader,val_loader,criterion,optmizer,device):
    '''
    Training Loop
    '''
    print("===================================Start Training===================================")
    for e in range(epochs):
        train_loss = 0
        validation_loss = 0
        train_correct = 0
        val_correct = 0
        # Train the model  #
        net.train()
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            optmizer.zero_grad()
            outputs = net(data)
            loss = criterion(outputs,labels)
            loss.backward()
            optmizer.step()
            train_loss += loss.item()
            _, preds = torch.max(outputs,1)
            train_correct += torch.sum(preds == labels.data)

        #validate the model#
        net.eval()
        for data,labels in val_loader:
            data, labels = data.to(device), labels.to(device)
            val_outputs = net(data)
            val_loss = criterion(val_outputs, labels)
            validation_loss += val_loss.item()
            _, val_preds = torch.max(val_outputs,1)
            val_correct += torch.sum(val_preds == labels.data)

        train_loss = train_loss/len(train_dataset)
        train_acc = train_correct.double() / len(train_dataset)
        validation_loss =  validation_loss / len(validation_dataset)
        val_acc = val_correct.double() / len(validation_dataset)
        print('Iterasi: {} \tTraining Loss: {:.8f} \tValidation Loss {:.8f} \tAkurasi Training {:.3f}% \tAkurasi Validasi {:.3f}%'
                                                           .format(e+1, train_loss,validation_loss,train_acc * 100, val_acc*100))

    torch.save(net.state_dict(),'deep_emotion-{}-{}-{}.pt'.format(epochs,batchsize,lr))
    print("===================================Training Finished===================================")

epochs = 500
lr = 0.005
batchsize = 128

net = Deep_Emotion()
net.to(device)
print("Model archticture: ", net)
traincsv_file = 'dataset_final'+'/'+'train_aug.csv'
validationcsv_file = 'dataset_final'+'/'+'val_aug.csv'
train_img_dir = 'dataset_final'+'/'+'train_aug/'
validation_img_dir = 'dataset_final'+'/'+'val_aug/'

transformation= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
train_dataset= Plain_Dataset(csv_file=traincsv_file, img_dir = train_img_dir, datatype = 'train', transform = transformation)
validation_dataset= Plain_Dataset(csv_file=validationcsv_file, img_dir = validation_img_dir, datatype = 'val', transform = transformation)
train_loader= DataLoader(train_dataset,batch_size=batchsize,shuffle = True,num_workers=0)
val_loader=   DataLoader(validation_dataset,batch_size=batchsize,shuffle = True,num_workers=0)

criterion= nn.CrossEntropyLoss()
optmizer= optim.Adam(net.parameters())
Train(epochs, train_loader, val_loader, criterion, optmizer, device)


and this for the model i code

import torch
import torch.nn as nn
import torch.nn.functional as F

class Deep_Emotion(nn.Module):
    def __init__(self):
        '''
        Deep_Emotion class contains the network architecture.
        '''
        super(Deep_Emotion,self).__init__()
        self.conv1 = nn.Conv2d(1,10,3)
        self.conv2 = nn.Conv2d(10,10,3)
        self.pool2 = nn.MaxPool2d(2,2)

        self.conv3 = nn.Conv2d(10,10,3)
        self.conv4 = nn.Conv2d(10,10,3)
        self.pool4 = nn.MaxPool2d(2,2)

        self.norm = nn.BatchNorm2d(10)

        self.fc1 = nn.Linear(810,50)
        self.fc2 = nn.Linear(50,7)
        

        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )

        self.fc_loc = nn.Sequential(
            nn.Linear(640, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 640)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self,input):
        out = self.stn(input)

        out = F.relu(self.conv1(out))
        out = self.conv2(out)
        out = F.relu(self.pool2(out))

        out = F.relu(self.conv3(out))
        out = self.norm(self.conv4(out))
        out = F.relu(self.pool4(out))

        out = F.dropout(out)
        out = out.view(-1, 810)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)

        return out

any help will apreciated, sorry for my bad english

The first conv layer in self.localization expects an input with a single channel while your input to the model has 3 channels. Change in_channels=3 in this layer and it should work.

@ptrblck_de Hi! Firstly, thank you so much for helping out all these people in this forum and especially for your fast/patient replies!

I am trying to fine-tune a 3DGANs model. The training dataset consists of voxels of size 30x30x30. However, this resolution is too low and I have replaced this dataset with voxels of 100x100x100. However, now I am getting the error (with the original code):

    out = x.view(-1, 1, self.cube_len, self.cube_len, self.cube_len)
RuntimeError: shape '[-1, 1, 32, 32, 32]' is invalid for input of size 2122416

Here is the code I am trying to run:

    def __init__(self, args):
        super(net_D, self).__init__()
        self.args = args
        self.cube_len = params.cube_len
        self.leak_value = params.leak_value
        self.bias = params.bias

        padd = (0,0,0)
        if self.cube_len == 32:
            padd = (1,1,1)

        self.f_dim = 32

        self.layer1 = self.conv_layer(1, self.f_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer2 = self.conv_layer(self.f_dim, self.f_dim*2, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer3 = self.conv_layer(self.f_dim*2, self.f_dim*4, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer4 = self.conv_layer(self.f_dim*4, self.f_dim*8, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        # self.layer5 = self.conv_layer(self.f_dim*8, self.f_dim*16, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias )

        self.layer5 = torch.nn.Sequential(
            torch.nn.Conv3d(self.f_dim*8, 1, kernel_size=4, stride=2, bias=self.bias, padding=padd),
            torch.nn.Sigmoid()
        )

        # self.layer5 = torch.nn.Sequential(
        #     torch.nn.Linear(256*2*2*2, 1),
        #     torch.nn.Sigmoid()
        # )

    def conv_layer(self, input_dim, output_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=False):
        layer = torch.nn.Sequential(
            torch.nn.Conv3d(input_dim, output_dim, kernel_size=kernel_size, stride=stride, bias=bias, padding=padding),
            torch.nn.BatchNorm3d(output_dim),
            torch.nn.LeakyReLU(self.leak_value, inplace=True)
        )
        return layer

    def forward(self, x):
        # out = torch.unsqueeze(x, dim=1)
        print(x.shape)
        out = x.view(-1, 1, self.cube_len, self.cube_len, self.cube_len)
        # print(out.size()) # torch.Size([32, 1, 32, 32, 32])
        out = self.layer1(out)
        # print(out.size())  # torch.Size([32, 32, 16, 16, 16])
        out = self.layer2(out)
        # print(out.size())  # torch.Size([32, 64, 8, 8, 8])
        out = self.layer3(out)
        # print(out.size())  # torch.Size([32, 128, 4, 4, 4])
        out = self.layer4(out)
        # print(out.size())  # torch.Size([32, 256, 2, 2, 2])
        # out = out.view(-1, 256*2*2*2)
        # print (out.size())
        out = self.layer5(out)
        # print(out.size())  # torch.Size([32, 1, 1, 1, 1])
        out = torch.squeeze(out)
        return out

I tried playing around with the parameters and here are the errors I got with those as well:

Out = x.view(2, 1, 102, 102, 102)

RuntimeError: Given transposed=1, weight of size [32, 1, 4, 4, 4], expected input[2, 64, 16, 16, 16] to have 32 channels, but got 64 channels instead

Out = x.view(1, 1, 102, 102, 102)

  • RuntimeError: shape ‘[1, 1, 102, 102, 102]’ is invalid for input of size 2122416

Out = x.view(-1, 1, 102, 102, 102)

  • RuntimeError: Given transposed=1, weight of size [32, 1, 4, 4, 4], expected input[2, 64, 16, 16, 16] to have 32 channels, but got 64 channels instead

Out = x.view(2, 102, 102, 102)

  • RuntimeError: Given groups=1, weight of size [32, 1, 4, 4, 4], expected input[1, 2, 102, 102, 102] to have 1 channels, but got 2 channels instead

I also wanted to say that torch.Size is (2, 102, 102, 102). However, I’m not sure how to use that information. Thank you so much for the help!

This doesn’t seem to fit the error messages, as the view operation indicates your input has 2122416 elements, which would correspond to e.g. [2, 1, 102, 102, 102] which fits:

The posted errors are either caused by a wrong view operation or by a shape mismatch in a conv kernel.
However, your code seems to work fine:

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.f_dim = 32
        self.bias = True
        self.leak_value = 0.01
        padd = (0,0,0)

        self.layer1 = self.conv_layer(1, self.f_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer2 = self.conv_layer(self.f_dim, self.f_dim*2, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer3 = self.conv_layer(self.f_dim*2, self.f_dim*4, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)
        self.layer4 = self.conv_layer(self.f_dim*4, self.f_dim*8, kernel_size=4, stride=2, padding=(1,1,1), bias=self.bias)

        self.layer5 = torch.nn.Sequential(
            torch.nn.Conv3d(self.f_dim*8, 1, kernel_size=4, stride=2, bias=self.bias, padding=padd),
            torch.nn.Sigmoid()
        )

    def conv_layer(self, input_dim, output_dim, kernel_size=4, stride=2, padding=(1,1,1), bias=False):
        layer = torch.nn.Sequential(
            torch.nn.Conv3d(input_dim, output_dim, kernel_size=kernel_size, stride=stride, bias=bias, padding=padding),
            torch.nn.BatchNorm3d(output_dim),
            torch.nn.LeakyReLU(self.leak_value, inplace=True)
        )
        return layer

    def forward(self, x):
        print(x.shape)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = torch.squeeze(out)
        return out
    
model = MyModel()
x = torch.randn(2, 1, 102, 102, 102)
out = model(x)
print(out.shape)
# torch.Size([2, 2, 2, 2])

Hi ,i have the same problem
class CNN (nn.Module):
def init(self):
super(CNN, self).init()
self.cnn1=nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1,padding=1)
self.batchnorm1=nn.BatchNorm2d(8)
self.relu=nn.ReLU()
self.maxpool1=nn.MaxPool2d(kernel_size=2)
self.cnn2=nn.Conv2d(in_channels=8,out_channels=32,kernel_size=5,stride=1,padding=2)
self.batchnorm2=nn.BatchNorm2d(32)

self.maxpool2=nn.MaxPool2d(kernel_size=2)

self.fc1=nn.Linear(in_features=1568,out_features=600)
self.fc2=nn.Linear(in_features=600,out_features=10)

RuntimeError: Given groups=1, weight of size [1, 8, 5, 5], expected input[100, 1, 28, 28] to have 8 channels, but got 1 channels instead

Could you post a minimal, executable code snippet reproducing the issue by wrapping it into three backticks ```, please?

Hello Sir, @ptrblck Can you also help me with my code?
I too have same error “RuntimeError: Given groups=1, weight of size [3, 1, 1, 11], expected input[1, 3, 1014, 1024] to have 1 channels, but got 3 channels instead”.

Here is my code :
#For combining the data from Belgium for moisture content of 0.78
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as skio
from sklearn.model_selection import train_test_split
import glob
Scan1_22_09_2022_Box1_ref1 = glob.glob(path)
Scan1_22_09_2022_Box2_ref1 = glob.glob(path)
Scan1_30_09_2022_Box3_ref1 = glob.glob(path)

listOfAllImages = Scan1_22_09_2022_Box1_ref1 + Scan1_22_09_2022_Box2_ref1 + Scan1_30_09_2022_Box3_ref1
len(listOfAllImages)

from skimage import transform

def getData(path):
ArryImage = skio.imread(path,plugin=‘pil’)
ArryImage= ArryImage/np.max(ArryImage)
ArryImage = transform.resize(ArryImage, (1024, 1024))
return ArryImage

#Splitting of listOfAllImages into training and validation set
train_list, val_list = train_test_split(listOfAllImages, test_size = 0.2,random_state=42)
len(train_list)
len(val_list)
type(val_list)

#Defining the Neural network
import torch
import torch.optim as optim

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
device

import torch
import torch.nn as nn

class Net(nn.Module):
‘’’
Model that takes one 2D image as input and one 2D image as output.

The inputs are processed using 2 encoders, one for the 3D data and the other for the
2D image. The representations of the both inputs are concatenated. 
Thereafter, the merged representations are upsampled in a decoder into a 2D image.

'''
def __init__(self):
	super(Net, self).__init__()
	# Layers going down
	## Encoder 2D input branch
	self.pool2d = nn.MaxPool2d(2) # 2D Pooling layer, max pooling
	self.encoder2d_lvl_1 = nn.Sequential(
		nn.Conv2d(1, 32, 3, padding=1),           #inchannel;outchannel:number of filters; kernel size
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True)
	)
	self.encoder2d_lvl_2 = nn.Sequential(
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		)
	self.encoder2d_lvl_3 = nn.Sequential(
		nn.Conv2d(32, 64, 3, padding=1),
		nn.BatchNorm2d(64),
		nn.ReLU(True),
		nn.Conv2d(64, 64, 3, padding=1),
		nn.BatchNorm2d(64),
		nn.ReLU(True)
		)
	self.encoder2d_lvl_4 = nn.Sequential(
		nn.Conv2d(64, 128, 3, padding=1),
		nn.BatchNorm2d(128),
		nn.ReLU(True),
		nn.Conv2d(128, 128, 3, padding=1),
		nn.BatchNorm2d(128),
		nn.ReLU(True)
		)
	self.encoder2d_lvl_5 = nn.Sequential(
		nn.Conv2d(128,256, 3, padding=1),
		nn.BatchNorm2d(256),
		nn.ReLU(True),
		nn.Conv2d(256,256, 3, padding=1),
		nn.BatchNorm2d(256),
		nn.ReLU(True)
		)
	self.encoder2d_lvl_6 = nn.Sequential(
		nn.Conv2d(256,512, 3, padding=1),
		nn.BatchNorm2d(512),
		nn.ReLU(True),
		nn.Conv2d(512,512, 3, padding=1),
		nn.BatchNorm2d(512),
		nn.ReLU(True)
		)
	# Layers going up
	self.decoder2d_lvl_5 = nn.Sequential(
		nn.ConvTranspose2d(512,512, 4, stride=2, groups=512), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(512, 512, 3, padding=1),
		nn.BatchNorm2d(512),
		nn.ReLU(True),
		nn.Conv2d(512,256, 3, padding=0),
		nn.BatchNorm2d(256),
		nn.ReLU(True)
	)
	self.decoder2d_lvl_4 = nn.Sequential(
		nn.ConvTranspose2d(256,256, 4, stride=2, groups=256), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(256, 256, 3, padding=1),
		nn.BatchNorm2d(256),
		nn.ReLU(True),
		nn.Conv2d(256,128, 3, padding=0),
		nn.BatchNorm2d(128),
		nn.ReLU(True)
	)
	self.decoder2d_lvl_3 = nn.Sequential(
		nn.ConvTranspose2d(128, 128, 4, stride=2, groups=128), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(128, 128, 3, padding=1),
		nn.BatchNorm2d(128),
		nn.ReLU(True),
		nn.Conv2d(128, 64, 3, padding=0),
		nn.BatchNorm2d(64),
		nn.ReLU(True)
	)

	self.decoder2d_lvl_2 = nn.Sequential(
		nn.ConvTranspose2d(64, 64, 4, stride=2, groups=64), #groups=in_channels-> each input channel is convolved with its own set of filters of size(out_channels/in_channels)
		nn.Conv2d(64, 64, 3, padding=1),
		nn.BatchNorm2d(64),
		nn.ReLU(True),
		nn.Conv2d(64, 32, 3, padding=0),
		nn.BatchNorm2d(32),
		nn.ReLU(True)
	)

	self.decoder2d_lvl_1 = nn.Sequential(
		nn.ConvTranspose2d(32, 32, 4, stride=2, groups=32),
		nn.Conv2d(32, 32, 3, padding=1),
		nn.BatchNorm2d(32),
		nn.ReLU(True),
		nn.Conv2d(32, 16, 3, padding=1),
		nn.BatchNorm2d(16),
		nn.ReLU(True),
		nn.Conv2d(16, 1, 3, padding=0)		  
	)

def forward(self, x):
	# Encoding 2D input data						# input ([1, 1, 128, 128])
	out = self.encoder2d_lvl_1(x)	
	#print(out.size())				    	#torch.Size([1, 32, 128, 128])
	out = self.encoder2d_lvl_2(self.pool2d(out))
	#print(out.size())								#torch.Size([1, 128, 64, 64])
	out = self.encoder2d_lvl_3(self.pool2d(out))
	#print(out.size())								#torch.Size([1, 256, 32, 32])
	out = self.encoder2d_lvl_4(self.pool2d(out))
	#print(out.size())								##torch.Size([1, 512,16, 16])
	out = self.encoder2d_lvl_5(self.pool2d(out))
	#print(out.size())								#torch.Size([1, 1024, 8, 8])
	out = self.encoder2d_lvl_6(self.pool2d(out))
	#print(out.size())								

	# Decoding to 2D data
	out = self.decoder2d_lvl_5(out)
	#print(out.size())			
	out = self.decoder2d_lvl_4(out)
	#print(out.size())						#torch.Size([1, 512, 16, 16])
	out = self.decoder2d_lvl_3(out)
	#print(out.size())						#torch.Size([1, 256, 32, 32])
	out = self.decoder2d_lvl_2(out)
	#print(out.size())						#torch.Size([1, 128, 66, 66])
	out = self.decoder2d_lvl_1(out)
	#print(out.size())						#torch.Size([1, 1, 128, 128])
	out = torch.sigmoid(out)
    
	#print(out.size())						#torch.Size([1, 1, 128, 128])
	return out

#loss function defining
from pytorch_msssim import SSIM
#loss function defining
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
criterion = SSIM()
model = model.to(device)
criterion = criterion.to(device)

#training the autoencoder network and print loss statistics
trainLos = []
ValLos = []

lenTrainingData = len(train_list)
lenValData = len(val_list)

counterTrain = 0
counterVal = 0

counter = 0

for iteration in range(100 * lenTrainingData):
# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
sampleGT = getData(train_list[counterTrain])
sampleGT = torch.from_numpy(sampleGT).float().to(device)
sample = sampleGT.unsqueeze_(0).unsqueeze_(0)
outputs = model(sample)
loss = 1- criterion(outputs, sampleGT)
loss.backward()
optimizer.step()

counterTrain = counterTrain + 1

if counterTrain == lenTrainingData:
    counterTrain = 0

# print statistics

trainLos.append(loss.item())

sampleGT = getData(val_list[counterVal])
sampleGT = torch.from_numpy(sampleGT).float().to(device)
sample = sampleGT.unsqueeze_(0).unsqueeze_(0)
outputs = model(sample)
loss = 1- criterion(outputs, sampleGT)
ValLos.append(loss.item())

counterVal = counterVal + 1
counter = counter + 1

if counterVal == lenValData:
    counterVal = 0

if counter == 100:
    plt.figure()
    plt.plot(trainLos,label = "Training los")
    plt.plot(ValLos,label = "validation los")
    plt.ylabel('Iterations')
    plt.ylabel('Los')
    plt.legend()
    #plt.show()
    plt.savefig('./autoEncouder.png')
    plt.close()
    torch.save(model.state_dict(), "./autoEncouder.pth")
    counter = 0
    print(iteration)
    print(loss.item())

The first conv layer in your model is expecting an input with 1 channel as it’s defined via: nn.Conv2d(1, 32, 3, padding=1) while the input tensor has 3 channels.
Either set in_channels to 3 in:

	self.encoder2d_lvl_1 = nn.Sequential(
		nn.Conv2d(1, 32, 3, padding=1), 

or pass inputs with a single channel to the model.

@ptrblck I did check the input tensor,

ArryImage = getData(listOfAllImages[0])
tensor = torch.from_numpy(ArryImage)
print(tensor.shape)

its shape is : torch.Size([1024, 1024]) , meaning it has 1 channel(as my images are grey scale)

This doesn’t match the error message:

RuntimeError: Given groups=1, weight of size [3, 1, 1, 11], expected input[1, 3, 1014, 1024] to have 1 channels, but got 3 channels instead

which indicates 3 channels are used, so narrow down which operation is creating these additional channels.

So I should change my input channel to 3 in the first layer instead of 1?

Thank you sir @ptrblck

@ptrblck But after changing the in_chal to 3, I get this error "RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[1, 1, 1024, 1024] to have 3 channels, but got 1 channels instead.

This would mean that some of your input images use a single channel while other use 3.
You could transform the images in the __getitem__ method of your Dataset to make sure each of the images has 3 channels e.g. by repeating the single-channel images 3 times in their channel dimension. Something like this might work:

x = torch.randn(1, 224, 224)
# single-channel image
if x.size(0) == 1:
    x = x.repeat(3, 1, 1)
print(x.shape)
# torch.Size([3, 224, 224])

Thank you sir I will try @ptrblck

@ptrblck sir, I did modify the changes you suggested by changing the getData() function but am still getting the same error.

Are you using a custom Dataset and did you add the posted change to the __getitem__ method or into the DataLoader loop?

Actually am very beginner in this. I fail to understand what exatly your telling. Could you please explain me bit more in detail?

This is how I changed my getData() function in my code.
def getData(path):
ArryImage = skio.imread(path, plugin=‘pil’)
ArryImage = ArryImage/np.max(ArryImage)
ArryImage = transform.resize(ArryImage, (1024, 1024))
x = torch.from_numpy(ArryImage).float()
# single-channel image
if x.size(0) == 1:
x = x.repeat(3, 1, 1)
return x

Thank you in advance @ptrblck