img_rows, img_cols = 64, 64
in_channels = 3
batch_size = 1
input_batch = (batch_size, in_channels, img_rows, img_cols)
filter_amt = 64
kernel_size = 3
pool_size = 2
num_output_classes = 200
padding = 1
stride = 1
pool_dim = (pool_size, pool_size)
conv_filter = (kernel_size, kernel_size)
import torch
import torch.nn as nn
class VGG9(nn.Module):
def __init__(self, in_channels, num_output_classes):
super(VGG9, self).__init__()
self.in_channels = in_channels
self.num_classes = num_output_classes
self.conv_layers = nn.Sequential(
nn.Conv2d(in_channels = self.in_channels, out_channels = 64, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2),
nn.Conv2d(64, 128, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2),
nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2),
nn.Conv2d(256, 512, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
self.linear_layers = nn.Sequential(
nn.Linear(in_features = 512 * 8 * 8, out_features = 4096), # 64/(2^3) - 4 'blocks', each increasing the output by a power of 2, except the first block
nn.ReLU(),
nn.Dropout2d(0.25),
nn.Linear(in_features = 4096, out_features = 4096),
nn.ReLU(),
nn.Dropout2d(0.25),
nn.Linear(in_features = 4096, out_features = self.num_classes)
)
def forward(self, x):
x = self.conv_layers(x)
x = x.view(x.size(0), -1)
x = self.linear_layers(x)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg9 = VGG9(in_channels = in_channels, num_output_classes = num_output_classes).to(device)
total_params = sum(p.numel() for p in vgg9.parameters())
print(total_params)
image_tensor = torch.randn(input_batch).to(device)
outputs = vgg9(image_tensor)
print(outputs.shape)
21818952
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-14-21c3dc6880f6> in <module>()
5
6 image_tensor = torch.randn(input_batch).to(device)
----> 7 outputs = vgg9(image_tensor)
8 print(outputs.shape)
6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
441 _pair(0), self.dilation, self.groups)
442 return F.conv2d(input, weight, bias, self.stride,
--> 443 self.padding, self.dilation, self.groups)
444
445 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Given groups=1, weight of size [256, 128, 3, 3], expected input[1, 256, 16, 16] to have 128 channels, but got 256 channels instead
The shape mismatch is raised in these layers:
nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
nn.ReLU(),
nn.Conv2d(128, 256, kernel_size = kernel_size, stride = stride, padding = padding),
as the former conv layer creates an output activation with 256
channels while the latter expects 128
input channels.