Fail to load weight by setting strict to fasle

dawn · July 3, 2018, 3:36pm

I want to seperate vgg16 layers in to different block so that the learning rate of parameters of different block can be different, so I rewrite the class, here is the code:

class vgg16Net(nn.Module):

def __init__(self):
    super(vgg16Net, self).__init__()
    self.block1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, padding=1), 
                                nn.BatchNorm2d(64), 
                                nn.ReLU(True),
                                nn.Conv2d(64, 64, kernel_size=3, padding=1),
                                nn.BatchNorm2d(64),
                                nn.ReLU(True),
                                nn.MaxPool2d(kernel_size=2, stride=2))
    self.block2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, padding=1),
                                nn.BatchNorm2d(128),
                                nn.ReLU(True),
                                 nn.Conv2d(128, 128, kernel_size=3, padding=1),
                                nn.BatchNorm2d(128),
                                nn.ReLU(True),
                                nn.MaxPool2d(kernel_size=2, stride=2))
    self.block3 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, padding=1),
                                nn.BatchNorm2d(256),
                                nn.ReLU(True),
                                nn.Conv2d(256, 256, kernel_size=3, padding=1),
                                nn.BatchNorm2d(256),
                                nn.ReLU(True),
                                nn.Conv2d(256, 256, kernel_size=3, padding=1),
                                nn.BatchNorm2d(256),
                                nn.ReLU(True),
                                nn.MaxPool2d(kernel_size=2, stride=2))
    self.block4 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, padding=1),
                                nn.BatchNorm2d(512),
                                nn.ReLU(True),
                                nn.Conv2d(512, 512, kernel_size=3, padding=1),
                                nn.BatchNorm2d(2512),
                                nn.ReLU(True),
                                nn.Conv2d(512, 512, kernel_size=3, padding=1),
                                nn.BatchNorm2d(512),
                                nn.ReLU(True),
                                nn.MaxPool2d(kernel_size=2, stride=2))
    self.block5 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, padding=1),
                                nn.BatchNorm2d(512),
                                nn.ReLU(True),
                                nn.Conv2d(512, 512, kernel_size=3, padding=1),
                                nn.BatchNorm2d(2512),
                                nn.ReLU(True),
                                nn.Conv2d(512, 512, kernel_size=3, padding=1),
                                nn.BatchNorm2d(512),
                                nn.ReLU(True), 
                                nn.MaxPool2d(kernel_size=2, stride=2))
    self.classifier = nn.Sequential(nn.AdaptiveAvgPool2d(1),
                                    nn.Linear(512, 10))
    
def forward(self, x):
    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)
    x = self.block4(x)
    x = self.block5(x)
    x = self.classifier(x)

Then I want to load pretrained weights:

vggmodel = vgg16Net()
model_urls = ‘https://download.pytorch.org/models/vgg16_bn-6c64b313.pth’
weights = model_zoo.load_url(model_urls)
vggmodel.load_state_dict(weights, False)

but I found it doesn’t work:
(vggmodel.block1[0].weight.data == weights[‘features.0.weight’]).sum() #the result is 0

anyone can help?
or is there a simple way to set different learning rate in different layer but in the same Sequential, since the source code of vgg16 of pytorch just have two Sequential