Hello!
I want to implement my own convolutional layer and use it in the AlexNet network. I have used this layer in a Lenet network with the MNIST dataset, and it worked fine. However, when I use it in the AlexNet network with the CIFAR10 dataset, I encounter the following error.
RuntimeError: Given output_size=(62, 62), kernel_size=(1, 1), dilation=(1, 1), padding=(0, 0), stride=(1, 1), expected size of input's dimension 2 to match the calculated number of sliding blocks 62 * 62 = 3844, but got input.size(2)=60516.
The entire implementation of my code is provided below.
layer_weights1 = net.state_dict()['conv1.weight']
layer_bias1 = net.state_dict()['conv1.bias']
class MyConv2d1(nn.Conv2d):
def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0):
super(MyConv2d1, self).__init__(in_channels, out_channels, kernel_size, stride, padding)
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
# Initialize weights and biases
self.weights = layer_weights1#torch.randn(out_channels, in_channels, kernel_size, kernel_size)
self.biases = layer_bias1#torch.zeros((out_channels, 1))
def forward(self, input):
batch_size, in_channels, in_height, in_width = input.shape
#print("self.weights1=",self.weights)
#print("self.biases1=",self.biases)
# Calculate output dimensions
out_height = int((in_height + 2 * self.padding - self.kernel_size) / self.stride + 1)
out_width = int((in_width + 2 * self.padding - self.kernel_size) / self.stride + 1)
# Pad input
input_padded = torch.nn.functional.pad(input, (self.padding, self.padding, self.padding, self.padding))
# Initialize output
output = torch.zeros((batch_size, self.out_channels, out_height, out_width))
# Perform convolution
unfoldedconv1=torch.nn.functional.unfold(input_padded,(self.kernel_size,self.kernel_size))
conv1_output=unfoldedconv1.transpose(1,2).matmul(self.weights.view(self.weights.size(0),-1).t()).transpose(1,2)
output=torch.nn.functional.fold(conv1_output,(out_height,out_width),(1,1))
return output
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
class myAlexNet(nn.Module):
def __init__(self, num_classes=10, q = False):
super().__init__()
self.conv1 = MyConv2d1(in_channels=3, out_channels=96, kernel_size=11, stride=4)
self.relu1 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
self.conv2 = nn.Conv2d(96, 256, 5, stride=1, padding=2)
self.relu2 = nn.ReLU(inplace=True)
self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
self.conv3 = nn.Conv2d(256, 384, 3, stride=1, padding=1)
self.relu3 = nn.ReLU(inplace=True)
self.conv4 = nn.Conv2d(384, 384, 3, stride=1, padding=1)
self.relu4 = nn.ReLU(inplace=True)
self.conv5 = nn.Conv2d(384, 256, 5, stride=1, padding=2)
self.relu5 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
self.avgpool= nn.AvgPool2d(6)
self.drop1 = nn.Dropout()
self.linear1 = nn.Linear(in_features=(9216), out_features=4096)
self.relu6 = nn.ReLU(inplace=True)
self.drop2 = nn.Dropout()
self.linear2 = nn.Linear(in_features=4096, out_features=10)
self.q = q
if q:
self.quant = QuantStub()
self.dequant = DeQuantStub()
def forward(self, x):
if self.q:
x = self.quant(x)
x = self.conv1(x)
print(self.conv1.shape)
x = self.relu1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.pool2(x)
x = self.conv3(x)
x = self.relu3(x)
x = self.conv4(x)
x = self.relu4(x)
x = self.conv5(x)
x = self.relu5(x)
x = self.pool3(x)
x=x.reshape(x.size(0), 256*6*6)
x = self.drop1(x)
x = self.linear1(x)
x = self.relu6(x)
x = self.drop2(x)
x = self.linear2(x)
if self.q:
x = self.dequant(x)
return x
How can I resolve it?