Loading weights from a pretrained layer to a bigger layer

Nimrod_Daniel · July 2, 2019, 4:24pm

I have MDNet with 3 conv layers and 3 fc layers(in online learning, in offline learning the last fc layer is branches). I added another feature (also 4 features in another version) to the second fc layer (fc5) so I had to initialize the whole layer and load only conv1-conv3 and fc4.
The thing is, now I want to load that pretrained layer fc5 from the saved model and add to that one (or four) initialized neuron. Instead of initializing all the 513 neurons in fc5, I want to load the weights from 512 pretrained neurons and add to that one neuron with initialized weights.
I’m not sure it’s possible to do that, is it?

The model class is attached, my question is in regard to load_model which appears at the end, so basically you can skip to the last method.

class MDNet(nn.Module):
def init(self, model_path=None, K=1, use_gpu=True):
super(MDNet, self).init()
self.use_gpu = use_gpu
self.K = K
self.layers = nn.Sequential(OrderedDict([
(‘conv1’, nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(inplace=True),
nn.LocalResponseNorm(2),
nn.MaxPool2d(kernel_size=3, stride=2))),
(‘conv2’, nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2),
nn.ReLU(inplace=True),
nn.LocalResponseNorm(2),
nn.MaxPool2d(kernel_size=3, stride=2))),
(‘conv3’, nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1),
nn.ReLU(inplace=True))),
(‘fc4’, nn.Sequential(nn.Linear(512 * 3 * 3, 512),
nn.ReLU(inplace=True))),
(‘fc5’, nn.Sequential(nn.Dropout(0.5),
nn.Linear(512+1, 512),
nn.ReLU(inplace=True)))]))

    self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5),
                                                 nn.Linear(512, 2+1)) for _ in range(K)]) ### 2 -->3

    #print('The Net: ')
    #for k in self.layers.state_dict(): print("Module Layer", k)
    
    #print('The Branches: ')
    #for k in self.branches.state_dict(): print("Branches Layer", k)

    for m in self.layers.modules():
        if isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, 0, 0.01)
            nn.init.constant_(m.bias, 0.1)
    for m in self.branches.modules():
        if isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, 0, 0.01)
            nn.init.constant_(m.bias, 0)

    if model_path is not None:
        print('the model path: ',model_path )
        if os.path.splitext(model_path)[1] == '.pth':
            self.load_model(model_path)
        elif os.path.splitext(model_path)[1] == '.mat':
            self.load_mat_model(model_path)
        else:
            raise RuntimeError('Unkown model format: {:s}'.format(model_path))
    self.build_param_dict()

def build_param_dict(self):
    self.params = OrderedDict()
    for name, module in self.layers.named_children():
        append_params(self.params, module, name)
    for k, module in enumerate(self.branches):
        append_params(self.params, module, 'fc6_{:d}'.format(k))

def set_learnable_params(self, layers):
    for k, p in self.params.items():
        if any([k.startswith(l) for l in layers]):
            p.requires_grad = True
        else:
            p.requires_grad = False

def get_learnable_params(self):
    params = OrderedDict()
    for k, p in self.params.items(): # k- keys, p - weights
        if p.requires_grad:
            params[k] = p
    return params

def get_all_params(self):
    params = OrderedDict()
    for k, p in self.params.items(): # k- keys, p - weights
        params[k] = p
    return params

def forward(self, x, iou, k=0, in_layer='conv1', out_layer='fc6'): ### add current iou

    iou = iou.astype(np.float32) 

    run = False
    ### prepare to concat iou to feature vector that will be fed to fc5
    iou = torch.from_numpy(np.array(iou)) # Solves the problem of expected ndarray in train_mdnet
    #iou = torch.from_numpy(iou) #.type(torch.FloatTensor)  no need
    iou_tensor = torch.Tensor(x.shape[0], 1) # shape: [~256,1] (could be smaller than 256, depends on current batch)

    if self.use_gpu:
        iou_tensor = iou_tensor.cuda()

    iou_tensor = iou_tensor.fill_(iou.data[0]) # fill the tensor with the iou
    iou_tensor.shape
    
    for name, module in self.layers.named_children():
        if name == in_layer:
            run = True
        if run:
            x = module(x)
            if name == 'conv3':
                x = x.view(x.size(0),-1) # flatten 
            if (name == 'fc4'):                     
                x = torch.cat((x,iou_tensor),1) ### add the iou to the feature vector of current batch
            if name == out_layer:
                return x 
            
    x = self.branches[k](x)
    
    if out_layer=='fc6':
        return x
    elif out_layer=='fc6_softmax':
        return F.softmax(x) 

def load_model(self, model_path):
    model_weights = torch.load(model_path)
    d = model_weights['shared_layers'] 
    d['fc5.1.weight'] = torch.randn((512, 513)) * 0.01
    d['fc5.1.bias'] = torch.zeros(512) 
    self.layers.load_state_dict(d)

Damien_Menigaux · September 1, 2023, 5:12pm

Not exactly sure it’s exactly what you’re looking for but I use this :

def load_partial_state_dict(model, state_dict):
    own_state = model.state_dict()
    for name, param in state_dict.items():
        if name in own_state:
            if own_state[name].size() == param.size():
                own_state[name].copy_(param)
            else:
                print(
                    f'Size mismatch for {name}, copying available weights and setting rest to zero')
                own_state[name].zero_()
                slices = tuple(slice(0, min(a, b))
                               for a, b in zip(own_state[name].size(), param.size()))
                own_state[name][slices] = param[slices]
        else:
            print(f'Layer {name} not found in current model')
    model.load_state_dict(own_state, strict=False)
    return model

model = load_partial_state_dict(model, loaded_modules)

You might wanna change it if I didn’t understand you correctly and you need the other way around