I have been messing around with the base VGG16 model trying to improve the performance and I ended up changing the ‘features’ module such that if I print it out it shows as such when I print it out:
(features): Sequential(
(0): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): ReLU(inplace=True)
(5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(15): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(16): ReLU(inplace=True)
(17): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(22): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(23): ReLU(inplace=True)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
To get the above I made the following edits in my VGG class:
class Vgg(nn.Module):
def __init__(self, n_classes, bias= None, dropout = 0.3):
super().__init__()
self.model = models.vgg16()
# change to allow 4 channels input
self.model.features[0] = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
del self.model.features[2]
del self.model.features[2]
self.model.features[2] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
# change the pooling layer
self.model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
# set output to correct num classes
self.model.classifier = nn.Linear(in_features=512, out_features=n_classes, bias=True)
def forward(self, x):
x = self.model.features(x)
x = self.model.avgpool(x)
x = torch.squeeze(x)
x = self.model.classifier(x)
return x
So when I run this code it works fine. But I tried to re-write it in a more straightforward, easy to follow manner, as such:
class Vgg2(nn.Module):
def __init__(self, n_classes, bias= None, dropout = 0.3):
super().__init__()
self.dropout = nn.Dropout(p=dropout)
self.r = nn.ReLU(inplace=True)
modules = nn.Sequential(nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)),
#self.bn64,
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn128,
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn128,
nn.ReLU(inplace=True),
#self.dropout,
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn256,
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn256,
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn256,
nn.ReLU(inplace=True),
#self.dropout,
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn512,
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn512,
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn512,
nn.ReLU(inplace=True),
#self.dropout,
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn512,
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn512,
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
#self.bn512,
nn.ReLU(inplace=True),
#self.dropout,
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
self.get_feat = modules
# change the pooling layer
self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
# set output to correct num classes
self.classify = nn.Linear(in_features=512, out_features=n_classes, bias=True)
def forward(self, x):
x = self.get_feat(x)
x = self.pool(x)
x = torch.squeeze(x)
x = self.classify(x)
return x
However, when I try to train using this second method, the model does not learn at all, all predictions remain zero.
The only difference I can see between the two is the index numbering of the model.features of the Sequential module assigned to features:
With Vgg (1st method):
(features): Sequential(
(0): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
And with Vgg2 (2nd method):
(get_feat): Sequential(
(0): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): ReLU(inplace=True)
(5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(15): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(16): ReLU(inplace=True)
(17): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(22): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(23): ReLU(inplace=True)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
The only difference I see is that in the first method, the indexes of the ‘features’ goes 0, 1, 4, 5, … (because of the layers which I deleted from the original list) whereas the 2nd method#s indices go in order (0,1,2,3,4,5…). But I don’t see how this would make any difference, but it is really the only difference I can see.
Maybe someone else can see where I am going wrong?