I want to seperate vgg16 layers in to different block so that the learning rate of parameters of different block can be different, so I rewrite the class, here is the code:
class vgg16Net(nn.Module):
def __init__(self):
super(vgg16Net, self).__init__()
self.block1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2))
self.block2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2))
self.block3 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2))
self.block4 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(2512),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2))
self.block5 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(2512),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2))
self.classifier = nn.Sequential(nn.AdaptiveAvgPool2d(1),
nn.Linear(512, 10))
def forward(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(x)
x = self.classifier(x)
Then I want to load pretrained weights:
vggmodel = vgg16Net()
model_urls = ‘https://download.pytorch.org/models/vgg16_bn-6c64b313.pth’
weights = model_zoo.load_url(model_urls)
vggmodel.load_state_dict(weights, False)
but I found it doesn’t work:
(vggmodel.block1[0].weight.data == weights[‘features.0.weight’]).sum() #the result is 0
anyone can help?
or is there a simple way to set different learning rate in different layer but in the same Sequential, since the source code of vgg16 of pytorch just have two Sequential