RuntimeError: Given groups=1, weight[64, 3, 3, 3], so expected input[16, 64, 256, 256] to have 3 channels, but got 64 channels instead

I believe skimage.io.imread loads images in channels-last format, so you might need to either transpose the numpy array to channels first before checking the channel dimension or you could use this code manipulating the numpy array:

img = skimage.io.imread("image.jpeg", as_gray=True)
print(img.shape) 
# (800, 800)

if img.ndim == 2:
    img = img[None, :, :]
    img = img.repeat(3, axis=0)

print(img.shape)
# (3, 800, 800)

Hello, Iā€™m getting the below error:
Given groups=1, weight of size [64, 6, 4, 4], expected input[128, 3, 128, 320] to have 6 channels, but got 3 channels instead

Below is the code for Generator and Discriminator :

class Generator(nn.Module):
def init(self):
super(Generator, self).init()

    self.label_conditioned_generator = nn.Sequential(nn.Embedding(3, 100),
                  nn.Linear(100, 16))
     
    self.latent = nn.Sequential(nn.Linear(100, 4*4*512),
                               nn.LeakyReLU(0.2, inplace=True))

    self.model = nn.Sequential(nn.ConvTranspose2d(512, 64*8, 4, 2, 1, bias=False),
                  nn.BatchNorm2d(64*8, momentum=0.1,  eps=0.8),
                  nn.ReLU(True),
                  nn.ConvTranspose2d(64*8, 64*4, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*4, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*4, 64*2, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*2, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*2, 64*1, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*1, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*1, 3, 4, 2, 1, bias=False),
                  nn.Tanh())

def forward(self, inputs):
    noise_vector, label = inputs
    label_output = self.label_conditioned_generator(label)
    label_output = label_output.view(-1, 1, 4, 4)
    latent_output = self.latent(noise_vector)
    latent_output = latent_output.view(-1, 512,4,4)
    concat = torch.cat((latent_output, label_output), dim=1)
    image = self.model(concat)
    #print(image.size())
    return image

class Discriminator(nn.Module):
def init(self):
super(Discriminator, self).init()

    self.label_condition_disc = nn.Sequential(nn.Embedding(3, 100),
                  nn.Linear(100, 3*128*128))       
    
    self.model = nn.Sequential(nn.Conv2d(6, 64, 4, 2, 1, bias=False),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64, 64*2, 4, 3, 2, bias=False),
                  nn.BatchNorm2d(64*2, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64*2, 64*4, 4, 3,2, bias=False),
                  nn.BatchNorm2d(64*4, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64*4, 64*8, 4, 3, 2, bias=False),
                  nn.BatchNorm2d(64*8, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True), 
                  nn.Flatten(),
                  nn.Dropout(0.4),
                  nn.Linear(4608, 1),
                  nn.Sigmoid()
                 )

def forward(self, inputs):
    img, label = inputs
    label_output = self.label_condition_disc(label)
    label_output = label_output.view(-1, 3, 128, 128)
    print("Image Sollu :",len(img))
    print("Latent Sollu :", len(label_output))
    concat = torch.cat((img, label_output), dim=-1)
    #print(concat.size())
    output = self.model(concat)
    return output

I guess your Discriminator raises the error as its first nn.Conv2d layer expects an input with 6 channels.
I donā€™t know what print(concat.size()) returns but I assume itā€™s an input activation with 3 channels.
If so, you could fix the issue by setting in_channels to 3 in the first conv layer of self.model.

Thanks for your inputs on thisā€¦
Iā€™m trying to generate fake images that replicate the Rock/Paper/Sissor hand gestures, please find my code

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd.variable import Variable
from torchvision import datasets, transforms
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import imageio

train_transform = transforms.Compose([
transforms.Resize(128),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])])
train_dataset = datasets.ImageFolder(root=ā€˜./data/rpsā€™, transform=train_transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)

def weights_init(m):
classname = m.class.name
if classname.find(ā€˜Convā€™) != -1:
torch.nn.init.normal_(m.weight, 0.0, 0.02)
elif classname.find(ā€˜BatchNormā€™) != -1:
torch.nn.init.normal_(m.weight, 1.0, 0.02)
torch.nn.init.zeros_(m.bias)

class Generator(nn.Module):
def init(self):
super(Generator, self).init()

    self.label_conditioned_generator = nn.Sequential(nn.Embedding(3, 100),
                  nn.Linear(100, 16))
     
    self.latent = nn.Sequential(nn.Linear(100, 4*4*512),
                               nn.LeakyReLU(0.2, inplace=True))

    self.model = nn.Sequential(nn.ConvTranspose2d(512, 64*8, 4, 2, 1, bias=False),
                  nn.BatchNorm2d(64*8, momentum=0.1,  eps=0.8),
                  nn.ReLU(True),
                  nn.ConvTranspose2d(64*8, 64*4, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*4, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*4, 64*2, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*2, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*2, 64*1, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*1, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*1, 3, 4, 2, 1, bias=False),
                  nn.Tanh())

def forward(self, inputs):
    noise_vector, label = inputs
    label_output = self.label_conditioned_generator(label)
    label_output = label_output.view(-1, 1, 4, 4)
    latent_output = self.latent(noise_vector)
    latent_output = latent_output.view(-1, 512,4,4)
    concat = torch.cat((latent_output, label_output), dim=1)
    image = self.model(concat)
    #print(image.size())
    return image

class Discriminator(nn.Module):
def init(self):
super(Discriminator, self).init()

    self.label_condition_disc = nn.Sequential(nn.Embedding(3, 100),
                  nn.Linear(100, 3*128*128))       
    
    self.model = nn.Sequential(nn.Conv2d(6, 64, 4, 2, 1, bias=False),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64, 64*2, 4, 3, 2, bias=False),
                  nn.BatchNorm2d(64*2, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64*2, 64*4, 4, 3,2, bias=False),
                  nn.BatchNorm2d(64*4, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64*4, 64*8, 4, 3, 2, bias=False),
                  nn.BatchNorm2d(64*8, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True), 
                  nn.Flatten(),
                  nn.Dropout(0.4),
                  nn.Linear(4608, 1),
                  nn.Sigmoid()
                 )

def forward(self, inputs):
    img, label = inputs
    label_output = self.label_condition_disc(label)
    label_output = label_output.view(-1, 3, 128, 128)
    print("Image Sollu :",len(img))
    print("Latent Sollu :", len(label_output))
    concat = torch.cat((img, label_output), dim=-1)
    #print(concat.size())
    output = self.model(concat)
    return output

device = ā€˜cudaā€™

generator = Generator().to(device)
generator.apply(weights_init)
discriminator = Discriminator().to(device)
discriminator.apply(weights_init)

learning_rate = 0.0002
G_optimizer = optim.Adam(generator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
D_optimizer = optim.Adam(discriminator.parameters(), lr = learning_rate, betas=(0.5, 0.999))

images = []

adversarial_loss = nn.BCELoss()

def generator_loss(fake_output, label):
gen_loss = adversarial_loss(fake_output, label)
#print(gen_loss)
return gen_loss

def discriminator_loss(output, label):
disc_loss = adversarial_loss(output, label)
return disc_loss

num_epochs = 200
for epoch in range(1, num_epochs+1):

D_loss_list, G_loss_list = [], []

for index, (real_images, labels) in enumerate(train_loader):
    D_optimizer.zero_grad()
    real_images = real_images.to(device)
    labels = labels.to(device)
    labels = labels.unsqueeze(1).long()

   
    real_target = Variable(torch.ones(real_images.size(0), 1).to(device))
    fake_target = Variable(torch.zeros(real_images.size(0), 1).to(device))
   
    D_real_loss = discriminator_loss(discriminator((real_images, labels)), real_target)
    # print(discriminator(real_images))
    #D_real_loss.backward()
 
    noise_vector = torch.randn(real_images.size(0), 100, device=device)  
    noise_vector = noise_vector.to(device)
     
    
    generated_image = generator((noise_vector, labels))
    output = discriminator((generated_image.detach(), labels))
    D_fake_loss = discriminator_loss(output,  fake_target)

 
    # train with fake
    #D_fake_loss.backward()
   
    D_total_loss = (D_real_loss + D_fake_loss) / 2
    D_loss_list.append(D_total_loss)
   
    D_total_loss.backward()
    D_optimizer.step()

    # Train generator with real labels
    G_optimizer.zero_grad()
    G_loss = generator_loss(discriminator((generated_image, labels)), real_target)
    G_loss_list.append(G_loss)

    G_loss.backward()
    G_optimizer.step()

After making the changes (Iā€™ve chagned the in_channel = 3 in Discriminator First Conv layer), Iā€™m gettign the below error :
mat1 and mat2 shapes cannot be multiplied (128x10752 and 4608x1)

The last linear layer raises the issue as it expects an input with 4608 features, while the flattened input activation has 10752 features. Similar as before, you could change the in_features argument to 10752 in the linear layer and it fix the issue.

After changing the in_features of Linear Layer to 10752, Iā€™m getting the below error :slight_smile :
ā€œmat1 and mat2 shapes cannot be multiplied (128x9216 and 10752x1)ā€

class Generator(nn.Module):

def __init__(self):
    super(Generator, self).__init__()

    self.label_conditioned_generator =nn.Sequential(nn.Embedding(3, 100),
                  nn.Linear(100, 16))

    self.latent =nn.Sequential(nn.Linear(100, 4*4*512),
                               nn.LeakyReLU(0.2, inplace=True))
        
    self.model =nn.Sequential(nn.ConvTranspose2d(513, 64*8, 4, 2, 1, bias=False),
                  nn.BatchNorm2d(64*8, momentum=0.1,  eps=0.8),
                  nn.ReLU(True),
                  nn.ConvTranspose2d(64*8, 64*4, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*4, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*4, 64*2, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*2, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*2, 64*1, 4, 2, 1,bias=False),
                  nn.BatchNorm2d(64*1, momentum=0.1,  eps=0.8),
                  nn.ReLU(True), 
                  nn.ConvTranspose2d(64*1, 3, 4, 2, 1, bias=False),
                  nn.Tanh())

def forward(self, inputs):
    noise_vector, label = inputs
    label_output = self.label_conditioned_generator(label)
    label_output = label_output.view(-1, 1, 4, 4)
    latent_output = self.latent(noise_vector)
    latent_output = latent_output.view(-1, 512,4,4)
    concat = torch.cat((latent_output, label_output), dim=1)
    image = self.model(concat)
    #print(image.size())
    return image

class Discriminator(nn.Module):

def __init__(self):
    super(Discriminator, self).__init__()
     
    self.label_condition_disc = nn.Sequential(nn.Embedding(3, 100),
                  nn.Linear(100, 3*128*128))       
    
    self.model = nn.Sequential(nn.Conv2d(3, 64, 4, 2, 1, bias=False),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64, 64*2, 4, 3, 2, bias=False),
                  nn.BatchNorm2d(64*2, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64*2, 64*4, 4, 3,2, bias=False),
                  nn.BatchNorm2d(64*4, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True),
                  nn.Conv2d(64*4, 64*8, 4, 3, 2, bias=False),
                  nn.BatchNorm2d(64*8, momentum=0.1,  eps=0.8),
                  nn.LeakyReLU(0.2, inplace=True), 
                  nn.Flatten(),
                  nn.Dropout(0.4),
                  nn.Linear(10752, 1),
                  nn.Sigmoid()
                 )

def forward(self, inputs):
    img, label = inputs
    label_output = self.label_condition_disc(label)
    label_output = label_output.view(-1, 3, 128, 128)
    print("Image Sollu :",len(img))
    print("Latent Sollu :", len(label_output))
    concat = torch.cat((img, label_output), dim=-1)
    #print(concat.size())
    output = self.model(concat)
    return output

Please use the below URL for reference of the above code

It seems you are working with inputs with variable shapes. In this case you would need to create a fixed activation shape either by resizing the inputs to the same shape or by using e.g adaptive pooling layers before the first linear layer.

Hi . Im slightly new in pytorch
Iā€™ve got the same error
Given groups=1, weight of size [512, 256, 3, 3], expected input[1, 64, 128, 128] to have 256 channels, but got 64 channels instead
My input shape is (1,256,256)

My model :
class DoubleConv(torch.nn.Module):
def init(self, in_channels, out_channels):
super().init()
self.step = torch.nn.Sequential(torch.nn.Conv2d(in_channels,out_channels,kernel_size=3 , padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(out_channels,out_channels,kernel_size=3 , padding=1),
torch.nn.ReLU())

def forward(self, X):
    return self.step(X)

class Unet(torch.nn.Module):
def init(self):
super().init()

    #Encoder
    self.layer1 = DoubleConv(1,64)
    self.layer2 = DoubleConv(64, 128)
    self.layer2 = DoubleConv(128, 256)
    self.layer2 = DoubleConv(256, 512)
    
    #Decoder
    
    self.layer5 = DoubleConv(512+256,256)
    self.layer6 = DoubleConv(256+128,128)
    self.layer7 = DoubleConv(128+64,64)
    self.layer8 = torch.nn.Conv2d(64,1,1)
    
    self.maxpool = torch.nn.MaxPool2d(2)

def forward (self, x):
    
    #Encoder
    x1 = self.layer1(x)
    x1m = self.maxpool(x1)
    
    x2 = self.layer2(x1m)
    x2m = self.maxpool(x2)
    
    x3 = self.layer3(x2m)
    x3m = self.maxpool(x3)
    
    x4 = self.layer4(x3m)
    
    #Decoder
    x5 = torch.nn.Upsample(scale_factor=2, mode="bilinear")(x4)
    x5 = torch.cat([x5,x3],dim=1)
    x5 = self.layer5(x5)
    
    x6 = torch.nn.Upsample(scale_factor=2, mode="bilinear")(x5)
    x6 = torch.cat([x6,x2],dim=1)
    x6 = self.layer5(x6)
    
    x7 = torch.nn.Upsample(scale_factor=2, mode="bilinear")(x6)
    x7 = torch.cat([x7,x1],dim=1)
    x7 = self.layer7(x7)
    
    ret = self.layer8(x7)
    return ret

How can i fix this ??

You are replacing layer2 multiple times:

        self.layer2 = DoubleConv(64, 128)
        self.layer2 = DoubleConv(128, 256)
        self.layer2 = DoubleConv(256, 512)

while I assume different names should be used.

hello @ptrblck, i am also having the same issue. I am trying to incorporate an ASPP module into the FC-hardnet but when i try running the model, i get this RuntimeError:RuntimeError: Given groups=1, weight of size [18, 10, 3, 3], expected input[1, 58, 64, 64] to have 10 channels, but got 58 channels instead

Here is the code i am working with:

class ConvLayer(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel=3, stride=1, dropout=0.1):
        super().__init__()
        self.add_module('conv', nn.Conv2d(in_channels, out_channels, kernel_size=kernel,
                                          stride=stride, padding=kernel//2, bias=False))
        self.add_module('norm', nn.BatchNorm2d(out_channels))
        self.add_module('relu', nn.ReLU6(inplace=True))

    def forward(self, x):
        return super().forward(x)

# define the bottleneck residual layer class
class BRLayer(nn.Sequential):
    def __init__(self, in_channels):
        super().__init__()
        self.add_module('norm', nn.BatchNorm2d(in_channels))
        self.add_module('relu', nn.ReLU(True))

    def forward(self, x):
        return super().forward(x)
    
# define a HarDBlock v2 class
class HarDBlock_v2(nn.Module):
    def get_link(self, layer, base_ch, growth_rate, grmul):
        if layer == 0:
          return base_ch, 0, []
        out_channels = growth_rate
        link = []
        for i in range(10):
          dv = 2 ** i
          if layer % dv == 0:
            k = layer - dv
            link.append(k)
            if i > 0:
                out_channels *= grmul
        out_channels = int(int(out_channels + 1) / 2) * 2
        in_channels = 0
        for i in link:
          ch,_,_ = self.get_link(i, base_ch, growth_rate, grmul)
          in_channels += ch
        return out_channels, in_channels, link
    
    # a helper function to get the number of output channels
    def get_out_ch(self):
        return self.out_channels

    def __init__(self, in_channels, growth_rate, grmul, n_layers, keepBase=False, residual_out=False, dwconv=False, list_out=False):
        super().__init__()
        self.in_channels = in_channels
        self.growth_rate = growth_rate
        self.grmul = grmul
        self.n_layers = n_layers
        self.keepBase = keepBase
        self.links = []
        self.list_out = list_out
        layers_ = []
        self.out_channels = 0

        for i in range(n_layers):
            outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
            self.links.append(link)
            use_relu = residual_out
            # add CatConv2d layer
            layers_.append(CatConv2d(inch, outch, (3,3), relu=True))

            if (i % 2 == 0) or (i == n_layers - 1):
                self.out_channels += outch
        print("Blk out =",self.out_channels)
        self.layers = nn.ModuleList(layers_)
    
    def transform(self, blk):
    # Loop through the layers of the current block
        for i in range(len(self.layers)):
            self.layers[i].weight[:,:,:,:] = blk.layers[i][0].weight[:,:,:,:] # Copy the weights and biases of the corresponding layers from block
            self.layers[i].bias[:] = blk.layers[i][0].bias[:]
        
    def forward(self, x):
        layers_ = [x] # The input layer is added to the layers list
        for layer in range(len(self.layers)):
            link = self.links[layer]
            tin = []
            for i in link:
                tin.append(layers_[i]) 
            out = self.layers[layer](tin)
            layers_.append(out) # Append the output of the current layer to layers_ list
            
        t = len(layers_)
        out_ = []
        for i in range(t):
            if (i == 0 and self.keepBase) or \
            (i == t-1) or (i%2 == 1):
                out_.append(layers_[i])
                
        # Check if the output needs to be returned as a list
        if self.list_out:
            return out_
        else:
            # Concatenate the output layers along the channel dimension and return as a tensor
            return torch.cat(out_, 1)
    
class HarDBlock(nn.Module):
    # Get the link between the layers in the block
    def get_link(self, layer,base_ch, growth_rate, grmul): 
        if layer == 0:
            return base_ch, 0 , []
        out_channels = growth_rate

        link = []
        for i in range(10):
            dv = 2**i
            if layer % dv == 0:
                k = layer - dv
                link.append(k)
                if i > 0:
                    out_channels *=grmul
        
        #compute the number of output channels for the current layer
        out_channels = int(int(out_channels + 1)/ 2) * 2

        in_channels = 0
        for i in link: 
            ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
            in_channels += ch
            return out_channels, in_channels, link
        
    
    # Get output channels for the block
    def get_out_ch(self):
        return self.out_channels
    
    def __init__(self, in_channels, growth_rate, grmul, n_layers, keepBase=False, residual_out=False):
        super().__init__()
        self.in_channels = in_channels
        self.growth_rate = growth_rate
        self.grmul = grmul
        self.n_layers = n_layers
        self.keepBase  = keepBase
        self.links = []
        layers_ = []
        self.out_channels = 0 # if upsample else in_channels

        for i in range(n_layers):
            # create layers of the block
            outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
            self.links.append(link)
            use_relu = residual_out
            layers_.append(ConvLayer(inch, outch))
            
            if (i % 2 == 0) or  (i == n_layers - 1):
                self.out_channels += outch

        print('Blk out =', self.out_channels)
        self.layers = nn.ModuleList(layers_)

    # forward pass of the block
    def forward(self, x):
        layers_ = [x]

        for layer in range(len(self.layers)):
            link = self.links[layer]
            tin = []
            for i in link:
                tin.append(layers_[i])
            if len(tin)> 1:
                x = torch.cat(tin, 1)
            else:
                x = tin[0]
            out = self.layers[layer](x)
            layers_.append(out)
        
        t = len(layers_)
        out_ = []
        for i in range(t):
            # concatenation of the first layer and all odd numbered layers to get the output of the block
            if (i == 0 and self.keepBase) or \
                (i == t-1) or (i%2 == 1):
                out_.append(layers_[i])
        out = torch.cat(out_, 1)
        return out

class TransitionUp(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
            
        print("upsample",in_channels, out_channels)

    def forward(self, x, skip, concat=True):

        is_v2 = type(skip) is list
        if is_v2:
            skip_x = skip[0]
        else:
            skip_x = skip
        
        out = F.interpolate(
                x,
                size=(skip_x.size(2), skip_x.size(3)),
                mode="bilinear",
                align_corners=True,)
        if concat:  
            if is_v2:
                    out = [out] + skip
            else:
                out = torch.cat([out, skip], 1)
        
        return out 

class ASPP(nn.Module):

    def __init__(self, in_channels, out_channels=256, rates=[6, 12, 18]):
        super(ASPP, self).__init__()
        self.conv1x1_1 = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.conv3x3_1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, dilation=rates[0], padding=rates[0])
        self.conv3x3_2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, dilation=rates[1], padding=rates[1])
        self.conv3x3_3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, dilation=rates[2], padding=rates[2])
        self.conv1x1_2 = nn.Conv2d(out_channels * 4, out_channels, kernel_size=1)
        
    def forward(self, x):
        conv1x1_1 = self.conv1x1_1(x)
        conv3x3_1 = self.conv3x3_1(x)
        conv3x3_2 = self.conv3x3_2(x)
        conv3x3_3 = self.conv3x3_3(x)
        out = torch.cat([conv1x1_1, conv3x3_1, conv3x3_2, conv3x3_3], dim=1)
        out = self.conv1x1_2(out)
        return out

class Hardnet(nn.Module):
    def __init__(self, n_classes=19):
        super(Hardnet, self).__init__()

        first_ch = [16, 24, 32, 48]
        ch_list = [64, 96, 160, 224, 320]
        grmul = 1.7
        gr = [10, 16, 18, 24, 32]
        n_layers = [4, 4, 8, 8, 8]
        blks = len(n_layers)
        self.shortcut_layers = []
        self.base = nn.ModuleList([])

        self.base.append(ConvLayer(in_channels=3, out_channels=first_ch[0], kernel=3, stride=2))
        self.base.append(ConvLayer(first_ch[0], first_ch[1], kernel=3))
        self.base.append(ConvLayer(first_ch[1], first_ch[2], kernel=3, stride=2))
        self.base.append(ConvLayer(first_ch[2], first_ch[3], kernel=3))

        skip_connection_channel_counts = []
        ch = first_ch[3]

        for i in range(blks):
            blk = HarDBlock(ch, gr[i], grmul, n_layers[i])
            ch = blk.get_out_ch()
            skip_connection_channel_counts.append(ch)
            self.base.append(blk)

            if i < blks - 1:
                self.shortcut_layers.append(len(self.base) - 1)

            self.base.append(ConvLayer(ch, ch_list[i], kernel=1))
            ch = ch_list[i]

            if i < blks - 1:
                self.base.append(nn.AvgPool2d(kernel_size=2, stride=2))

        cur_channels_count = ch
        prev_block_channels = ch
        n_blocks = blks - 1
        self.n_blocks = n_blocks

        self.aspp = ASPP(ch_list[-1])
        
        #################################
        #        upsampling             #
        #################################
        self.transUpBlocks = nn.ModuleList([])
        self.denseBlocksUp = nn.ModuleList([])
        self.conv1x1_up = nn.ModuleList([])

        for i in range(n_blocks - 1, -1, -1):
            self.transUpBlocks.append(TransitionUp(prev_block_channels, prev_block_channels))
            cur_channels_count = prev_block_channels + skip_connection_channel_counts[i]
            self.conv1x1_up.append(ConvLayer(cur_channels_count, cur_channels_count // 2, kernel=1))
            cur_channels_count = cur_channels_count // 2
            blk = HarDBlock(cur_channels_count, gr[i], grmul, n_layers[i])
            self.denseBlocksUp.append(blk)
            prev_block_channels = blk.get_out_ch()
            cur_channels_count = prev_block_channels

        self.finalConv = nn.Conv2d(in_channels=cur_channels_count, out_channels=n_classes, 
                                   kernel_size=1, stride=1, padding=0, bias=True)
    

    # transform HarDBlock to HarDBlock_v2. 
    def v2_transform(self):
        for i in range(len(self.base)):
            if isinstance(self.base[i], HarDBlock):
                blk = self.base[i]
                self.base[i] = HarDBlock_v2(blk.in_channels, blk.growth_rate, blk.grmul, blk.n_layers, list_out=True)
                self.base[i].transform(blk)# Transferring the weights and biases of the original HarDBlock to the new HarDBlock_v2
    
            elif isinstance(self.base[i], nn.Sequential):
                blk = self.base[i]
                sz = blk[0].weight.shape
                if sz[2] == 1:
                    self.base[i] = CatConv2d(sz[1], sz[0], (1,1), relu=True)
                    self.base[i].weight[:,:,:,:] = blk[0].weight[:,:,:,:]
                    self.base[i].bias[:] = blk[0].bias[:]

        for i in range(self.n_blocks):
            blk = self.denseBlocksUp[i]
            self.denseBlocksUp[i] = HarDBlock_v2(blk.in_channels, blk.growth_rate, blk.grmul, blk.n_layers, list_out=False)
            self.denseBlocksUp[i].transform(blk)

        # Looping through the 1x1 convolutional layers of the model
        for i in range(len(self.conv1x1_up)):
            blk = self.conv1x1_up[i]
            sz = blk[0].weight.shape
            if sz[2] == 1:
                self.conv1x1_up[i] = CatConv2d(sz[1], sz[0], (1,1), relu=True)
                self.conv1x1_up[i].weight[:,:,:,:] = blk[0].weight[:,:,:,:]
    
    def forward(self, x):
        # Encoder
        skip_connections = []

        size_in = x.size()

        for i in range(len(self.base)):
            x = self.base[i](x)
            if i in self.shortcut_layers:
                skip_connections.append(x)

        # ASPP module
        aspp_output = self.aspp(x)

        # Decoder
        # Concatenate ASPP output with the original encoder output
        decoder_input = torch.cat([aspp_output, x], dim=1)

        out = decoder_input  # Use the concatenated input for the decoder

        for i in range(self.n_blocks):
            skip = skip_connections.pop()
            out = self.transUpBlocks[i](out, skip, True)
            out = self.conv1x1_up[i](out)
            out = self.denseBlocksUp[i](out)

        out = self.finalConv(out)
        # Upsample the output to the input size
        out = F.interpolate(
            out,
            size=(size_in[2], size_in[3]),
            mode="bilinear",
            align_corners=True)
        return out

The issue is raised in the 5th iteration in self.base[4](x), where x has a shape of [batch_size, 48, 56, 56].
Inside the self.base[4] module the error is then raised in HarDBlock.forward in out = self.layers[layer](x).

Check all loops, conditions etc. and try to narrow down why the activation has the wrong shape.

PS: Iā€™ve also formatted your code to be able to copy/paste it. Post code snippets by wrapping them into three backticks.

i want to change the current backbon of YOLOv5 from darknet to resnet for that I added the these 2 classes in commons.py

class MaxPooling2D(nn.Module):
    # MaxPooling2D layer with args(kernel, stride, padding)
    def __init__(self, k=2, s=2, p=0):
        super().__init__()
        self.maxpool = nn.MaxPool2d(k, s, p)

    def forward(self, x):
        return self.maxpool(x)

class ResBlock(nn.Module):
    def __init__(self, c1, num_repeats):
        super().__init__()
        self.blocks = nn.Sequential(*[
            nn.Identity() if i == 0 else Conv(c1, c1, k=3, s=1, act=True) for i in range(num_repeats)
        ])

    def forward(self, x):
        return torch.cat(x, self.blocks(x))

the backbone is given below:

Backbone

backbone: [
    [-1, 1, "Conv", [64, 6, 2, 2]],  # 0 - Conv1
    [-1, 1, "MaxPooling2D", [3, 2]],   # 1 - MaxPool
    [-1, 3, "ResBlock", [64]],         # 2 - Res2a, Res2b, Res2c
    [-1, 1, "Conv", [128, 3, 2]], # 3 - Conv3
    [-1, 4, "ResBlock", [128]],        # 4 - Res3a, Res3b, Res3c, Res3d
    [-1, 1, "Conv", [256, 3, 2]], # 5 - Conv4
    [-1, 6, "ResBlock", [256]],        # 6 - Res4a, Res4b, Res4c, Res4d, Res4e, Res4f
    [-1, 1, "Conv", [512, 3, 2]], # 7 - Conv5
    [-1, 3, "ResBlock", [512]],        # 8 - Res5a, Res5b, Res5c
    [-1, 1, "SPPF", [1024, 5]]         # 9 - SPPF
    ]
  1. i am getting this shape error after adding Maxpooling2D and Resblock class names in Yolo.py model summary is printed on screen succesfully after that this error occurs.
Traceback (most recent call last):
  File "train.py", line 647, in <module>
    main(opt)
  File "train.py", line 536, in main
    train(opt.hyp, opt, device, callbacks)
  File "train.py", line 130, in train
    model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
  File "/home/dev/Documents/yolov5/models/yolo.py", line 195, in __init__
    m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
  File "/home/dev/Documents/yolov5/models/yolo.py", line 194, in <lambda>
    forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
  File "/home/dev/Documents/yolov5/models/yolo.py", line 209, in forward
    return self._forward_once(x, profile, visualize)  # single-scale inference, train
  File "/home/dev/Documents/yolov5/models/yolo.py", line 121, in _forward_once
    x = m(x)  # run
  File  "/home/dev/Documents/yolov5/models/common.py", line 90, in forward
    return torch.cat(x, self.blocks(x))
  File "/home/dev/Documents/yolov5/models/common.py", line 68, in forward
    return self.act(self.bn(self.conv(x)))
  File "/home/dev/.cache/pypoetry/virtualenvs/yolov5-FT1Hnn5N-py3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dev/.cache/pypoetry/virtualenvs/yolov5-FT1Hnn5N-py3.8/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 463, in forward
    return self._conv_forward(input, self.weight, self.bias)
 RuntimeError: Given groups=1, weight of size [8, 8, 3, 3], expected input[1, 32, 13, 13] to have 8 channels, but got 32 channels instead

When I run my main code, Visual Studio return this error message:

RuntimeError: Given groups=1, weight of size [32, 12, 3, 3], expected input[1, 48, 16, 16] to have 12 channels, but got 48 channels instead

While I examined same errors which were posted by the users in Stack Overflow and Github, I have seen same problems, but common file is not written by me. It was uploaded through packages. Thatā€™s why I could not understand how I could fix this problem. Common.py file was over 30000 characters. So I shared the codes as Replit links:
Main code
Common
Returned the line:
" #when I run main.py, it prints the debug.py
def forward_fuse(self, x):
return self.act(self.conv(x)) "


class ConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch, time_emb_dim, up=False):
        super().__init__()

        self.time_mlp =  nn.Linear(time_emb_dim, out_ch)
        if up:
            self.conv1 = nn.Conv2d(2*in_ch, out_ch, 3, padding=1)
            self.transform = nn.ConvTranspose2d(out_ch, out_ch, 4, 2, 1)
        else:
            self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
            self.transform = nn.Conv2d(out_ch, out_ch, 4, 2, 1)

        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.bn2 = nn.BatchNorm2d(out_ch)
        self.relu  = nn.ReLU()

    def forward(self, x, t, ):


        h = self.bn1(self.relu(self.conv1(x)))
        # Time embedding
        time_emb = self.relu(self.time_mlp(t))
        # Extend last 2 dimensions
        time_emb = time_emb[(..., ) + (None, ) * 2]
        # Add time channel
        h = h + time_emb

        h = self.bn2(self.relu(self.conv2(h)))
        # Down or Upsample
        return self.transform(h)


class PositionalEncoding(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, time):
        device = time.device
        half_dim = self.dim // 2
        embeds = math.log(10000) / (half_dim - 1)
        embeds = torch.exp(torch.arange(half_dim, device=device) * -embeds)
        embeds = time[:, None] * embeds[None, :]
        embeds = torch.cat((embeds.sin(), embeds.cos()), dim=-1)
        return embeds


class Unet(nn.Module):
    """
    A simplified Unet architecture.
    """
    def __init__(self):
        super().__init__()
        image_channels = 3
        down_channels = (64, 128, 256, 512, 1024)
        up_channels = (1024, 512, 256, 128, 64)
        out_dim = 1
        time_emb_dim = 32

        # Time embedding
        self.time_mlp = nn.Sequential(
                PositionalEncoding(time_emb_dim),
                nn.Linear(time_emb_dim, time_emb_dim),
                nn.ReLU()
            )


        self.conv0 = nn.Conv2d(image_channels, down_channels[0], 3, padding=1)

        # Downsample
        self.downs = nn.ModuleList([ConvBlock(down_channels[i], down_channels[i+1],
                                    time_emb_dim) for i in range(len(down_channels)-1)])
        # Upsample
        self.ups = nn.ModuleList([ConvBlock(up_channels[i], up_channels[i+1],
                                        time_emb_dim, up=True) for i in range(len(up_channels)-1)])

        self.output = nn.Conv2d(up_channels[-1], 3, out_dim)

    def forward(self, x, timestep):

        # Embedd time
        t = self.time_mlp(timestep)
        x = self.conv0(x)

        # Unet
        residual_inputs = []
        for down in self.downs:
            x = down(x, t)
            residual_inputs.append(x)
        for up in self.ups:
            residual_x = residual_inputs.pop()
            x = torch.cat((x, residual_x), dim=1)
            x = up(x, t)
        return self.output(x)

model = Unet()
model

I am getting a error here :
Given groups=1, weight of size [64, 3, 3, 3], expected input[64, 1, 150, 150] to have 3 channels, but got 1 channels instead

I canā€™t find whereā€™s the error. I tweaked many things here. But canā€™t find the solution

The input channels are defined as:

image_channels = 3

while your input image apparently uses a single channel and thus raises the error.

I also have the same kind of error, RuntimeError: Given groups=1, weight of size [16, 256, 1, 1], expected input[1, 1024, 1, 1] to have 256 channels, but got 1024 channels instead. My code is given below
ā€˜ā€™'class Attention(nn.Module):
def init(self, in_planes, out_planes, kernel_size, groups=1, reduction=0.0625, kernel_num=4, min_channel=16):
super(Attention, self).init()
attention_channel = max(int(in_planes * reduction), min_channel)
self.kernel_size = kernel_size
self.kernel_num = kernel_num
self.temperature = 1.0

    self.avgpool = nn.AdaptiveAvgPool2d(1)
    self.fc = nn.Conv2d(in_planes, attention_channel, 1, bias=False)
    self.bn = nn.BatchNorm2d(attention_channel)
    self.relu = nn.ReLU(inplace=True)

    self.channel_fc = nn.Conv2d(attention_channel, in_planes, 1, bias=True)
    self.func_channel = self.get_channel_attention

    if in_planes == groups and in_planes == out_planes:  # depth-wise convolution
        self.func_filter = self.skip
    else:
        self.filter_fc = nn.Conv2d(attention_channel, out_planes, 1, bias=True)
        self.func_filter = self.get_filter_attention

    if kernel_size == 1:  # point-wise convolution
        self.func_spatial = self.skip
    else:
        self.spatial_fc = nn.Conv2d(attention_channel, kernel_size * kernel_size, 1, bias=True)
        self.func_spatial = self.get_spatial_attention

    if kernel_num == 1:
        self.func_kernel = self.skip
    else:
        self.kernel_fc = nn.Conv2d(attention_channel, kernel_num, 1, bias=True)
        self.func_kernel = self.get_kernel_attention

    self._initialize_weights()

def _initialize_weights(self):
    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        if isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)

def update_temperature(self, temperature):
    self.temperature = temperature

@staticmethod
def skip(_):
    return 1.0

def get_channel_attention(self, x):
    channel_attention = torch.sigmoid(self.channel_fc(x).view(x.size(0), -1, 1, 1) / self.temperature)
    return channel_attention

def get_filter_attention(self, x):
    filter_attention = torch.sigmoid(self.filter_fc(x).view(x.size(0), -1, 1, 1) / self.temperature)
    return filter_attention

def get_spatial_attention(self, x):
    spatial_attention = self.spatial_fc(x).view(x.size(0), 1, 1, 1, self.kernel_size, self.kernel_size)
    spatial_attention = torch.sigmoid(spatial_attention / self.temperature)
    return spatial_attention

def get_kernel_attention(self, x):
    kernel_attention = self.kernel_fc(x).view(x.size(0), -1, 1, 1, 1, 1)
    kernel_attention = F.softmax(kernel_attention / self.temperature, dim=1)
    return kernel_attention

def forward(self, x):
    x = self.avgpool(x)
    x = self.fc(x)
    x = self.bn(x)
    x = self.relu(x)
    return self.func_channel(x), self.func_filter(x), self.func_spatial(x), self.func_kernel(x)

class ODConv2d(nn.Module):
def init(self, in_planes, out_planes, kernel_size, stride=1, padding=None, dilation=1, groups=1,
reduction=0.0625, kernel_num=4):
super(ODConv2d, self).init()
self.in_planes = in_planes
self.out_planes = out_planes
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.kernel_num = kernel_num
self.attention = Attention(in_planes, out_planes, kernel_size, groups=groups,
reduction=reduction, kernel_num=kernel_num)
self.weight = nn.Parameter(torch.randn(kernel_num, out_planes, in_planes//groups, kernel_size, kernel_size),
requires_grad=True)
self._initialize_weights()

    if self.kernel_size == 1 and self.kernel_num == 1:
        self._forward_impl = self._forward_impl_pw1x
    else:
        self._forward_impl = self._forward_impl_common

def _initialize_weights(self):
    for i in range(self.kernel_num):
        nn.init.kaiming_normal_(self.weight[i], mode='fan_out', nonlinearity='relu')

def update_temperature(self, temperature):
    self.attention.update_temperature(temperature)

def _forward_impl_common(self, x):
    # Multiplying channel attention (or filter attention) to weights and feature maps are equivalent,
    # while we observe that when using the latter method the models will run faster with less gpu memory cost.
    channel_attention, filter_attention, spatial_attention, kernel_attention = self.attention(x)
    batch_size, in_planes, height, width = x.size()
    x = x * channel_attention
    x = x.reshape(1, -1, height, width)
    aggregate_weight = spatial_attention * kernel_attention * self.weight.unsqueeze(dim=0)
    aggregate_weight = torch.sum(aggregate_weight, dim=1).view(
        [-1, self.in_planes // self.groups, self.kernel_size, self.kernel_size])
    output = F.conv2d(x, weight=aggregate_weight, bias=None, stride=self.stride, padding=self.padding,
                      dilation=self.dilation, groups=self.groups * batch_size)
    output = output.view(batch_size, self.out_planes, output.size(-2), output.size(-1))
    output = output * filter_attention
    print(output)
    return output

def _forward_impl_pw1x(self, x):
    channel_attention, filter_attention, spatial_attention, kernel_attention = self.attention(x)
    x = x * channel_attention
    output = F.conv2d(x, weight=self.weight.squeeze(dim=0), bias=None, stride=self.stride, padding=self.padding,
                      dilation=self.dilation, groups=self.groups)
    output = output * filter_attention
    return output

def forward(self, x):
    return self._forward_impl(x)'''

ā€˜ā€™'def autopad(k, p=None): # kernel, padding
# Pad to ā€˜sameā€™
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
def ODautopad(kernel_size, padding=None): # kernel, padding
# Pad to ā€˜sameā€™
if padding is None:
padding = kernel_size // 2 if isinstance(kernel_size, int) else [x // 2 for x in kernel_size] # auto-pad
return padding
class Conv(nn.Module):
# Standard convolution
def init(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Conv, self).init()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

def forward(self, x):
    return self.act(self.bn(self.conv(x)))

def fuseforward(self, x):
    return self.act(self.conv(x))

class ODConvBNReLU(nn.Module):
def init(self, in_planes, out_planes, kernel_size=1, stride=1, padding=None, groups=1, norm_layer=nn.BatchNorm2d,
reduction=0.0625, kernel_num=1):
super(ODConvBNReLU, self).init()
self.conv = ODConv2d(in_planes, out_planes, kernel_size, stride, ODautopad(kernel_size, padding), groups=groups,
reduction=reduction, kernel_num=kernel_num)
self.bn = norm_layer(out_planes)
self.relu = nn.ReLU6(inplace=True)

def forward(self, x):
    x = self.conv(x)
    x = self.bn(x)
    x = self.relu(x)
    return x'''

The ymal file is

yolov7 backbone

backbone:

[from, number, module, args]

[[-1, 1, Conv, [32, 3, 1]], # 0

[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Conv, [64, 3, 1]],

[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 1, Conv, [64, 1, 1]],
[-2, 1, Conv, [64, 1, 1]],
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, Conv, [64, 3, 1]],
[[-1, -3, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [256, 1, 1]], # 11

[-1, 1, MP, ],
[-1, 1, Conv, [128, 1, 1]],
[-3, 1, Conv, [128, 1, 1]],
[-1, 1, Conv, [128, 3, 2]],
[[-1, -3], 1, Concat, [1]], # 16-P3/8
[-1, 1, Conv, [128, 1, 1]],
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[[-1, -3, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [512, 1, 1]], # 24

[-1, 1, MP, ],
[-1, 1, Conv, [256, 1, 1]],
[-3, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [256, 3, 2]],
[[-1, -3], 1, Concat, [1]], # 29-P4/16
[-1, 1, Conv, [256, 1, 1]],
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[[-1, -3, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [1024, 1, 1]], # 37

[-1, 1, MP, ],
[-1, 1, Conv, [512, 1, 1]],
[-3, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [512, 3, 2]],
[[-1, -3], 1, Concat, [1]], # 42-P5/32
[-1, 1, Conv, [256, 1, 1]],
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[[-1, -3, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [1024, 1, 1]], # 50
]

yolov7 head

head:
[[-1, 1, SPPCSPC, [512]], # 51

[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, ā€˜nearestā€™]],
[37, 1, ODConvBNReLU, [256, 1, 1]], # route backbone P4
[[-1, -2], 1, Concat, [1]],

[-1, 1, Conv, [256, 1, 1]],
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [256, 1, 1]], # 63

[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, ā€˜nearestā€™]],
[24, 1, ODConvBNReLU, [128, 1, 1]], # route backbone P3
[[-1, -2], 1, Concat, [1]],

[-1, 1, Conv, [128, 1, 1]],
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, Conv, [64, 3, 1]],
[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [128, 1, 1]], # 75

[-1, 1, MP, ],
[-1, 1, Conv, [128, 1, 1]],
[-3, 1, Conv, [128, 1, 1]],
[-1, 1, Conv, [128, 3, 2]],
[[-1, -3, 63], 1, Concat, [1]],

[-1, 1, Conv, [256, 1, 1]],
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, Conv, [128, 3, 1]],
[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [256, 1, 1]], # 88

[-1, 1, MP, ],
[-1, 1, Conv, [256, 1, 1]],
[-3, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [256, 3, 2]],
[[-1, -3, 51], 1, Concat, [1]],

[-1, 1, Conv, [512, 1, 1]],
[-2, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, Conv, [256, 3, 1]],
[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
[-1, 1, Conv, [512, 1, 1]], # 101

[75, 1, RepConv, [256, 3, 1]],
[88, 1, RepConv, [512, 3, 1]],
[101, 1, RepConv, [1024, 3, 1]],

[[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
]
when i am changing to Conv to ODConvBNReLU, the above error is coming. I have tried everything but couldnot understand. In my understanding ODConvBNReLU must pass the 256 channel but it is passing 1024 channel.

Your code is not properly formatted and hard to understand. Could you post a minimal, executable, and properly formatted code snippet so we could reproduce the issue?

Sorry sir, but i can not understand how i can post a minimal code. What i can say is that, i am working on yolov7 ofiicial repo. In yolo architecture i want to replace a regular convolution layer with a custom convolution layer. the details of custom conv layer i have already provided. What i understood from the error is that whatever i am giving input to custom conv, it always provide 1024 output channel even if it is mentiond that i want to output channel let say 256 or 128. If it is possible to see the whole project, i can share the my google colab link.

A minimal and executable code snippet provides the minimal code to reproduce the issue simply by copy/pasting the code into a new Python script. Stackoverflow explains it here in more detail.