I have MDNet with 3 conv layers and 3 fc layers(in online learning, in offline learning the last fc layer is branches). I added another feature (also 4 features in another version) to the second fc layer (fc5) so I had to initialize the whole layer and load only conv1-conv3 and fc4.
The thing is, now I want to load that pretrained layer fc5 from the saved model and add to that one (or four) initialized neuron. Instead of initializing all the 513 neurons in fc5, I want to load the weights from 512 pretrained neurons and add to that one neuron with initialized weights.
I’m not sure it’s possible to do that, is it?
The model class is attached, my question is in regard to load_model which appears at the end, so basically you can skip to the last method.
class MDNet(nn.Module):
def init(self, model_path=None, K=1, use_gpu=True):
super(MDNet, self).init()
self.use_gpu = use_gpu
self.K = K
self.layers = nn.Sequential(OrderedDict([
(‘conv1’, nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(inplace=True),
nn.LocalResponseNorm(2),
nn.MaxPool2d(kernel_size=3, stride=2))),
(‘conv2’, nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2),
nn.ReLU(inplace=True),
nn.LocalResponseNorm(2),
nn.MaxPool2d(kernel_size=3, stride=2))),
(‘conv3’, nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1),
nn.ReLU(inplace=True))),
(‘fc4’, nn.Sequential(nn.Linear(512 * 3 * 3, 512),
nn.ReLU(inplace=True))),
(‘fc5’, nn.Sequential(nn.Dropout(0.5),
nn.Linear(512+1, 512),
nn.ReLU(inplace=True)))]))
self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5),
nn.Linear(512, 2+1)) for _ in range(K)]) ### 2 -->3
#print('The Net: ')
#for k in self.layers.state_dict(): print("Module Layer", k)
#print('The Branches: ')
#for k in self.branches.state_dict(): print("Branches Layer", k)
for m in self.layers.modules():
if isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0.1)
for m in self.branches.modules():
if isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
if model_path is not None:
print('the model path: ',model_path )
if os.path.splitext(model_path)[1] == '.pth':
self.load_model(model_path)
elif os.path.splitext(model_path)[1] == '.mat':
self.load_mat_model(model_path)
else:
raise RuntimeError('Unkown model format: {:s}'.format(model_path))
self.build_param_dict()
def build_param_dict(self):
self.params = OrderedDict()
for name, module in self.layers.named_children():
append_params(self.params, module, name)
for k, module in enumerate(self.branches):
append_params(self.params, module, 'fc6_{:d}'.format(k))
def set_learnable_params(self, layers):
for k, p in self.params.items():
if any([k.startswith(l) for l in layers]):
p.requires_grad = True
else:
p.requires_grad = False
def get_learnable_params(self):
params = OrderedDict()
for k, p in self.params.items(): # k- keys, p - weights
if p.requires_grad:
params[k] = p
return params
def get_all_params(self):
params = OrderedDict()
for k, p in self.params.items(): # k- keys, p - weights
params[k] = p
return params
def forward(self, x, iou, k=0, in_layer='conv1', out_layer='fc6'): ### add current iou
iou = iou.astype(np.float32)
run = False
### prepare to concat iou to feature vector that will be fed to fc5
iou = torch.from_numpy(np.array(iou)) # Solves the problem of expected ndarray in train_mdnet
#iou = torch.from_numpy(iou) #.type(torch.FloatTensor) no need
iou_tensor = torch.Tensor(x.shape[0], 1) # shape: [~256,1] (could be smaller than 256, depends on current batch)
if self.use_gpu:
iou_tensor = iou_tensor.cuda()
iou_tensor = iou_tensor.fill_(iou.data[0]) # fill the tensor with the iou
iou_tensor.shape
for name, module in self.layers.named_children():
if name == in_layer:
run = True
if run:
x = module(x)
if name == 'conv3':
x = x.view(x.size(0),-1) # flatten
if (name == 'fc4'):
x = torch.cat((x,iou_tensor),1) ### add the iou to the feature vector of current batch
if name == out_layer:
return x
x = self.branches[k](x)
if out_layer=='fc6':
return x
elif out_layer=='fc6_softmax':
return F.softmax(x)
def load_model(self, model_path):
model_weights = torch.load(model_path)
d = model_weights['shared_layers']
d['fc5.1.weight'] = torch.randn((512, 513)) * 0.01
d['fc5.1.bias'] = torch.zeros(512)
self.layers.load_state_dict(d)