I have been adding more & more convolutional layers to my model to see how they effect model size & accuracy. It was all going fine until I added my 6th convolutional layer.
class MyCNN(nn.Module):
def __init__(self):
super().__init__()
# convolution layers
self._body = nn.Sequential(
# Conv - 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64, momentum=0.3),
nn.MaxPool2d(kernel_size=2),
# Conv - 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128, momentum=0.3),
nn.MaxPool2d(kernel_size=2),
# Conv - 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(256, momentum=0.3),
nn.MaxPool2d(kernel_size=2),
# Conv - 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(512, momentum=0.3),
nn.MaxPool2d(kernel_size=2),
# Conv - 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(1024, momentum=0.3),
nn.MaxPool2d(kernel_size=2),
# Conv - 6
nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(2048, momentum=0.3),
nn.MaxPool2d(kernel_size=2),
)
# Fully connected layers
self._head = nn.Sequential(
# nn.Linear(in_features=128*54*54, out_features=512),
# nn.Linear(in_features=512*12*12, out_features=512),
# nn.Linear(in_features=1024*5*5, out_features=512),
nn.Linear(in_features=2048*2*2, out_features=512),
nn.ReLU(inplace=True),
nn.Linear(in_features=512, out_features=7)
)
def forward(self, x):
x = self._body(x)
x = x.view(x.size()[0], -1)
x = self._head(x)
return x
As can be seen in the code above I have been adding a conv-layer then recalculating the length of the vector to put into the fully connected layers. For each successive addition the calculations have been right & the model trains. But now I’ve added the 6th layer I get the following error,
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x2048 and 8192x512)
Why has it decided that my input feature vector is 32*2048
and not 2048*2*2
that I have coded?