I’ve followed this discussion, especially the recommendation from @ptrblck, to create the following model:
class VGG(nn.Module):
def __init__(self, features, output_dim):
super().__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d(7)
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.LeakyReLU(inplace = True),
nn.Dropout(0.5),
nn.LeakyReLU(4096, 4096),
nn.ReLU(inplace = True),
nn.Dropout(0.5),
nn.Linear(4096, output_dim),
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
h = x.view(x.shape[0], -1)
x = self.classifier(h)
return x
class TwoPagesModule(nn.Module):
def __init__(self, prevPageVGG, targPageVGG):
super(TwoPagesModule, self).__init__()
self.prevPageVGG = prevPageVGG
self.targPageVGG = targPageVGG
self.classifier = nn.Sequential(
nn.LeakyReLU(inplace = True),
nn.Dropout(0.5),
nn.Linear(2*256, 2)
)
def forward(self, x1, x2):
x1 = self.prevPageVGG(x1)
x2 = self.targPageVGG(x2)
x = torch.cat((x1, x2), dim=1)
x = self.classifier(x)
return x
The final model would be like this. After loading the VGG16 pre-trained, I set autograd as false from the CNN layers that come before the 28.
TwoPagesModule(
(prevPageVGG): VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=7)
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=256, bias=True)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): Dropout(p=0.5, inplace=False)
)
)
(targPageVGG): VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=7)
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=256, bias=True)
(1): LeakyReLU(negative_slope=0.01, inplace=True)
(2): Dropout(p=0.5, inplace=False)
)
)
(classifier): Sequential(
(0): LeakyReLU(negative_slope=0.01, inplace=True)
(1): Dropout(p=0.5, inplace=False)
(2): Linear(in_features=512, out_features=2, bias=True)
)
)
I want to set different learning rates for the VGG model parameters and for the ensemble ones. I was trying to use something like the following but it does not work.
params = [
{'params': model.prevPageVGG.features.parameters(), 'lr': FOUND_LR / 10},
{'params': model.targPageVGG.features.parameters(), 'lr': FOUND_LR / 10},
{'params': model.prevPageVGG.classifier.parameters()},
{'params': model.targPageVGG.classifier.parameters()},
{'params': model.classifier.parameters(),},
]
optimizer = optim.Adam(params, lr = FOUND_LR)
the error:
ValueError: some parameters appear in more than one parameter group
Can anyone help me find the best way to set the learning rate for the problem exposed?
Thanks in advance!