Replicate keras CNN in Pytorch

I am trying to replicate the following keras model in Pytorch:

model = models.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(224, 224, 3), kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.3))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(256, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.3))
model.add(layers.Conv2D(256, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(512, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(512, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(9, activation='softmax'))

This is my code for Pytorch:

height=224
width = 224
kernel_size = 3 
stride=1

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
    
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=kernel_size, stride=1)
        self.relu1 = nn.ReLU() 
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2) #64*112*112
     
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=kernel_size, stride=stride)
        self.relu2 = nn.ReLU()
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2) #128 * 56 * 56

 	# Convolution 3
        self.cnn3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=kernel_size, stride=stride)
        self.relu3 = nn.ReLU()
        
        # Max pool 3
        self.maxpool3 = nn.MaxPool2d(kernel_size=2) #256 * 28 * 28

	# Convolution 4
        self.cnn4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=kernel_size, stride=stride)
        self.relu4 = nn.ReLU()
        
        # Max pool 4
        self.maxpool4 = nn.MaxPool2d(kernel_size=2) #256 * 14 * 14

	# Convolution 5
        self.cnn5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=kernel_size, stride=stride)
        self.relu5 = nn.ReLU()
        
        # Max pool 5
        self.maxpool5 = nn.MaxPool2d(kernel_size=2) #512 * 7 * 7
        
        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(int(512 * 7* 7), 9) # 9 represents the number of labels
        
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Max pool 1
        out = self.maxpool1(out)
        
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        
        # Max pool 2 
        out = self.maxpool2(out)
	
	# Convolution 3 
        out = self.cnn3(out)
        out = self.relu3(out)
        
        # Max pool 3 
        out = self.maxpool3(out)

	# Convolution 4 
        out = self.cnn4(out)
        out = self.relu4(out)
        
        # Max pool 4 
        out = self.maxpool4(out)
        
	# Convolution 5 
        out = self.cnn5(out)
        out = self.relu5(out)
        
        # Max pool 5 
        out = self.maxpool5(out)

        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        
        return out

I keep getting the following error, though:
RuntimeError: Calculated padded input size per channel: (1 x 1). Kernel size: (3 x 3). Kernel size can't be greater than actual input size

Any idea why this is happening?

The error seems wrong for an input of [batch_size, 3, 224, 224], but you will get a shape mismatch in the linear layer, as your shape calculation is a bit wrong.
Since you are not using any padding in your conv layers, the spatial size will be smaller than given by your comments. These are the shapes after each pooling layer:

torch.Size([1, 64, 111, 111])
torch.Size([1, 128, 54, 54])
torch.Size([1, 256, 26, 26])
torch.Size([1, 256, 12, 12])
torch.Size([1, 512, 5, 5])

Changing the number of input features to 512*5*5 seems to work.