Turning AlexNet into a single sequential model gives me an error

I am getting an issue with the dimensions when I load the alexnet model and change the it such that it is a single list. For example, the code below gives me an error. Why is this so? I have done nothing except merge the list.

RuntimeError: size mismatch, m1: [98304 x 6], m2: [9216 x 4096] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:266
class ALEXNET(nn.Module):
    def __init__(self):
        super(ALEXNET, self).__init__()

        # load alexnet
        self.model = models.alexnet(pretrained=True)   
        self.model = nn.Sequential(*(list(self.model.features.children()) + [self.model.avgpool] + list(self.model.classifier.children())))
                        
    def forward(self, images):
    
        images = images.cuda()  #print(images.size()) = torch.Size([64, 3, 224, 224])
        scores = self.model(images)
             
        return scores

Original alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

AlexNet merged into 1 list

Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (13): AdaptiveAvgPool2d(output_size=(6, 6))
  (14): Dropout(p=0.5)
  (15): Linear(in_features=9216, out_features=4096, bias=True)
  (16): ReLU(inplace)
  (17): Dropout(p=0.5)
  (18): Linear(in_features=4096, out_features=4096, bias=True)
  (19): ReLU(inplace)
)

Since you are wrapping all submodules into an nn.Sequential module, you are losing the flatten operation which is performed in the forward method.
Similar to this issue you can add a custom Flatten module and it should work:

class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return x

class ALEXNET(nn.Module):
    def __init__(self):
        super(ALEXNET, self).__init__()
        self.model = models.alexnet(pretrained=False)   
        self.model = nn.Sequential(*(list(self.model.features.children()) + [nn.AvgPool2d(1), Flatten()] + list(self.model.classifier.children())))
                        
    def forward(self, images):
        scores = self.model(images)
        return scores