Custom convolution layer AlexNet

Hello!
I want to implement my own convolutional layer and use it in the AlexNet network. I have used this layer in a Lenet network with the MNIST dataset, and it worked fine. However, when I use it in the AlexNet network with the CIFAR10 dataset, I encounter the following error.

RuntimeError: Given output_size=(62, 62), kernel_size=(1, 1), dilation=(1, 1), padding=(0, 0), stride=(1, 1), expected size of input's dimension 2 to match the calculated number of sliding blocks 62 * 62 = 3844, but got input.size(2)=60516.

The entire implementation of my code is provided below.

layer_weights1 = net.state_dict()['conv1.weight']
layer_bias1 = net.state_dict()['conv1.bias']
class MyConv2d1(nn.Conv2d):
    def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(MyConv2d1, self).__init__(in_channels, out_channels, kernel_size, stride, padding)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        # Initialize weights and biases
        self.weights = layer_weights1#torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        self.biases = layer_bias1#torch.zeros((out_channels, 1))



    def forward(self, input):
        batch_size, in_channels, in_height, in_width = input.shape
        #print("self.weights1=",self.weights)
        #print("self.biases1=",self.biases)

        # Calculate output dimensions
        out_height = int((in_height + 2 * self.padding - self.kernel_size) / self.stride + 1)
        out_width = int((in_width + 2 * self.padding - self.kernel_size) / self.stride + 1)

        # Pad input
        input_padded = torch.nn.functional.pad(input, (self.padding, self.padding, self.padding, self.padding))

        # Initialize output
        output = torch.zeros((batch_size, self.out_channels, out_height, out_width))

        # Perform convolution
        unfoldedconv1=torch.nn.functional.unfold(input_padded,(self.kernel_size,self.kernel_size))

        conv1_output=unfoldedconv1.transpose(1,2).matmul(self.weights.view(self.weights.size(0),-1).t()).transpose(1,2)

        output=torch.nn.functional.fold(conv1_output,(out_height,out_width),(1,1))

        return output
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms

class myAlexNet(nn.Module):
    def __init__(self, num_classes=10,  q = False):
        super().__init__()

        self.conv1 = MyConv2d1(in_channels=3, out_channels=96, kernel_size=11, stride=4)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv2 = nn.Conv2d(96, 256, 5, stride=1, padding=2)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv3 = nn.Conv2d(256, 384, 3, stride=1, padding=1)
        self.relu3 = nn.ReLU(inplace=True)

        self.conv4 = nn.Conv2d(384, 384, 3, stride=1, padding=1)
        self.relu4 = nn.ReLU(inplace=True)

        self.conv5 = nn.Conv2d(384, 256, 5, stride=1, padding=2)
        self.relu5 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.avgpool= nn.AvgPool2d(6)

        self.drop1   = nn.Dropout()
        self.linear1 = nn.Linear(in_features=(9216), out_features=4096)
        self.relu6   = nn.ReLU(inplace=True)
        self.drop2   = nn.Dropout()
        self.linear2 = nn.Linear(in_features=4096, out_features=10)
        self.q = q
        if q:
          self.quant = QuantStub()
          self.dequant = DeQuantStub()


    def forward(self, x):
        if self.q:
          x = self.quant(x)
        x = self.conv1(x)
        print(self.conv1.shape)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = self.conv3(x)
        x = self.relu3(x)

        x = self.conv4(x)
        x = self.relu4(x)

        x = self.conv5(x)
        x = self.relu5(x)
        x = self.pool3(x)

        x=x.reshape(x.size(0), 256*6*6)
        x = self.drop1(x)
        x = self.linear1(x)
        x = self.relu6(x)
        x = self.drop2(x)

        x = self.linear2(x)
        if self.q:
          x = self.dequant(x)
    return x

How can I resolve it?

Could you post a minimal, executable, and properly formatted code snippet to reproduce the issue, please?

Hi I think if you can post the proper error as printed that would help and also I tried running your code and ran into some issue such as having to index the kernel and padding with were being used as integers but the values were stored as tuple, fixed whatever issues came up but I was stuck at defining your function since I didn’t understand what you wanted to achieve with that function.

Yes, certainly.

layer_weights1 = net.state_dict()['conv1.weight']
layer_bias1 = net.state_dict()['conv1.bias']

class MyConv2d1(nn.Conv2d):
    def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(MyConv2d1, self).__init__(in_channels, out_channels, kernel_size, stride, padding)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        # Initialize weights and biases
        self.weights = layer_weights1#torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        self.biases = layer_bias1#torch.zeros((out_channels, 1))



    def forward(self, input):
        batch_size, in_channels, in_height, in_width = input.shape
        #print("self.weights1=",self.weights)
        #print("self.biases1=",self.biases)

        # Calculate output dimensions
        out_height = int((in_height + 2 * self.padding - self.kernel_size) / self.stride + 1)
        out_width = int((in_width + 2 * self.padding - self.kernel_size) / self.stride + 1)

        # Pad input
        input_padded = torch.nn.functional.pad(input, (self.padding, self.padding, self.padding, self.padding))

        # Initialize output
        output = torch.zeros((batch_size, self.out_channels, out_height, out_width))

        # Perform convolution
        unfoldedconv1=torch.nn.functional.unfold(input_padded,(self.kernel_size,self.kernel_size))

        conv1_output=unfoldedconv1.transpose(1,2).matmul(self.weights.view(self.weights.size(0),-1).t()).transpose(1,2)

        output=torch.nn.functional.fold(conv1_output,(out_height,out_width),(1,1))

        return output

import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms

class myAlexNet(nn.Module):
    def __init__(self, num_classes=10,  q = False):
        super().__init__()

        self.conv1 = MyConv2d1(in_channels=3, out_channels=96, kernel_size=11, stride=4)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv2 = nn.Conv2d(96, 256, 5, stride=1, padding=2)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv3 = nn.Conv2d(256, 384, 3, stride=1, padding=1)
        self.relu3 = nn.ReLU(inplace=True)

        self.conv4 = nn.Conv2d(384, 384, 3, stride=1, padding=1)
        self.relu4 = nn.ReLU(inplace=True)

        self.conv5 = nn.Conv2d(384, 256, 5, stride=1, padding=2)
        self.relu5 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.avgpool= nn.AvgPool2d(6)

        self.drop1   = nn.Dropout()
        self.linear1 = nn.Linear(in_features=(9216), out_features=4096)
        self.relu6   = nn.ReLU(inplace=True)
        self.drop2   = nn.Dropout()
        self.linear2 = nn.Linear(in_features=4096, out_features=10)
        self.q = q
        if q:
          self.quant = QuantStub()
          self.dequant = DeQuantStub()


    def forward(self, x):
        if self.q:
          x = self.quant(x)
        x = self.conv1(x)
        print(self.conv1.shape)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = self.conv3(x)
        x = self.relu3(x)

        x = self.conv4(x)
        x = self.relu4(x)

        x = self.conv5(x)
        x = self.relu5(x)
        x = self.pool3(x)

        x=x.reshape(x.size(0), 256*6*6)
        x = self.drop1(x)
        x = self.linear1(x)
        x = self.relu6(x)
        x = self.drop2(x)

        x = self.linear2(x)
        if self.q:
          x = self.dequant(x)

        return x

But I am facing the following error.

RuntimeError: Given output_size=(62, 62), kernel_size=(1, 1), dilation=(1, 1), padding=(0, 0), stride=(1, 1), expected size of input's dimension 2 to match the calculated number of sliding blocks 62 * 62 = 3844, but got input.size(2)=60516.

Thank you for your responsiveness.
I want to use my custom convolutional layer in the AlexNet network and compare the obtained accuracy with the original AlexNet. To do this, I initially trained the AlexNet network, saved the model, loaded it into the ‘myAlexNet’ model, transferred the weights and biases from the original model to ‘myAlexNet’, and then performed inference.
But I am facing the following error.

RuntimeError: Given output_size=(62, 62), kernel_size=(1, 1), dilation=(1, 1), padding=(0, 0), stride=(1, 1), expected size of input's dimension 2 to match the calculated number of sliding blocks 62 * 62 = 3844, but got input.size(2)=60516.

HI I was able to fix the issue when you were unfolding you forgot to give the stride parameter which you had used to calculate the output of the convolution operation so at the end the values did not match in order to complete the operation, Here is the fixed code.

class MyConv2d1(nn.Conv2d):
    def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(MyConv2d1, self).__init__(in_channels, out_channels, kernel_size, stride, padding)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        # Initialize weights and biases
        self.weights = torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        self.biases = torch.zeros((out_channels, 1))



    def forward(self, input):
        batch_size, in_channels, in_height, in_width = input.shape
        #print("self.weights1=",self.weights)
        #print("self.biases1=",self.biases)
        print(self.out_channels)
        # Calculate output dimensions
        out_height = int(((in_height + 2 * self.padding - self.kernel_size-1) / self.stride) + 1)
        out_width = int(((in_width + 2 * self.padding - self.kernel_size-1) / self.stride) + 1)
        # Pad input
        input_padded = torch.nn.functional.pad(input, (self.padding, self.padding, self.padding, self.padding))

        # Initialize output
        output = torch.zeros((batch_size, self.out_channels, out_height, out_width))

        # Perform convolution
        unfoldedconv1=torch.nn.functional.unfold(input_padded,(self.kernel_size,self.kernel_size),stride=(self.stride,self.stride))
        print(unfoldedconv1.transpose(1,2).size())
        conv1_output=unfoldedconv1.transpose(1,2).matmul(self.weights.view(self.weights.size(0),-1).t()).transpose(1,2)
        print(conv1_output.size())
        output=torch.nn.functional.fold(conv1_output,(out_height,out_width),(1,1))

        return output

just a side note I ran into issues with reshaping your tensor before giving it to a linear layer instead of using.

 x=x.reshape(x.size(0), 256*6*6)

you can use

x=torch.flatten(x,1)

it is much more dynamic

1 Like