five convo layer , relu activation function
class ConvNet(nn.Module):
def init(self, num_classes=110):
super(ConvNet, self).init()
self.features = torch.nn.Sequential(
nn.Conv2d(3, 224, kernel_size=11, stride=2, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(224, 192, kernel_size=5, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = torch.nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096), # output of last conv layer*target output h*w
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
out = self.classifier(x)
return out
If I understand in the first convo layer the input channels is 3 , and the output produced by the convolution is 224. But what is the input image ?