Shape mismatch while implementing VGG11*: Runtime Error

img_rows, img_cols = 64, 64
in_channels = 3
batch_size = 1
input_batch = (batch_size, in_channels, img_rows, img_cols)

filter_amt = 64
kernel_size = 3
pool_size = 2
num_output_classes = 200
padding = 1
stride = 1

pool_dim = (pool_size, pool_size)
conv_filter = (kernel_size, kernel_size)

import torch
import torch.nn as nn


class VGG9(nn.Module):
    def __init__(self, in_channels, num_output_classes):
        super(VGG9, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_output_classes

        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels = self.in_channels, out_channels = 64, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.Conv2d(64, 128, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.Conv2d(256, 512, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )


        self.linear_layers = nn.Sequential(
            nn.Linear(in_features = 512 * 8 * 8, out_features = 4096), # 64/(2^3) - 4 'blocks', each increasing the output by a power of 2, except the first block
            nn.ReLU(),
            nn.Dropout2d(0.25),
            nn.Linear(in_features = 4096, out_features = 4096),
            nn.ReLU(),
            nn.Dropout2d(0.25),
            nn.Linear(in_features = 4096, out_features = self.num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg9 = VGG9(in_channels = in_channels, num_output_classes = num_output_classes).to(device)
total_params = sum(p.numel() for p in vgg9.parameters())
print(total_params)

image_tensor = torch.randn(input_batch).to(device)
outputs = vgg9(image_tensor)
print(outputs.shape)


21818952
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-14-21c3dc6880f6> in <module>()
      5 
      6 image_tensor = torch.randn(input_batch).to(device)
----> 7 outputs = vgg9(image_tensor)
      8 print(outputs.shape)

6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
    441                             _pair(0), self.dilation, self.groups)
    442         return F.conv2d(input, weight, bias, self.stride,
--> 443                         self.padding, self.dilation, self.groups)
    444 
    445     def forward(self, input: Tensor) -> Tensor:

RuntimeError: Given groups=1, weight of size [256, 128, 3, 3], expected input[1, 256, 16, 16] to have 128 channels, but got 256 channels instead

The shape mismatch is raised in these layers:

            nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),

as the former conv layer creates an output activation with 256 channels while the latter expects 128 input channels.