Hi,
I’m using MDNet, which is a network with 3 conv layers,2 fc layers, plus branches in offline mode or fc layer in online mode (not pretrained, this layer is learned in the offline/online learning).
Thing is, I changed fc5 to add another feature thus I want to load only the weights from the 3 conv layers (conv1-conv3) and the first fc layer (fc4) in online learning. I saw the two solutions mentioned in How to load part of pre trained model?
but still not sure how to change load_model function so the net will load only conv1, conv2, conv3, and fc4 from the .pth file no need to load the .mat file).
Any idea what exactly should I do?
Plus, what exactly this line mean (‘shared_weights’ is not defined anywhere)?
shared_layers = states[‘shared_layers’]
Thanks!
That’s the MDNet class (the forward also includes the new feature, it’s just not shown here, but it’s irrelevant) :
class MDNet(nn.Module):
def init(self, model_path=None, K=1):
super(MDNet, self).init()
self.K = K
self.layers = nn.Sequential(OrderedDict([
(‘conv1’, nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(),
LRN(),
nn.MaxPool2d(kernel_size=3, stride=2))),
(‘conv2’, nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2),
nn.ReLU(),
LRN(),
nn.MaxPool2d(kernel_size=3, stride=2))),
(‘conv3’, nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1),
nn.ReLU())),
(‘fc4’, nn.Sequential(nn.Dropout(0.5),
nn.Linear(512 * 3 * 3, 512),
nn.ReLU())),
(‘fc5’, nn.Sequential(nn.Dropout(0.5),
nn.Linear(512, 512),
nn.ReLU()))]))
self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5),
nn.Linear(512, 2)) for _ in range(K)])
if model_path is not None:
if os.path.splitext(model_path)[1] == '.pth':
self.load_model(model_path)
elif os.path.splitext(model_path)[1] == '.mat':
self.load_mat_model(model_path)
else:
raise RuntimeError("Unkown model format: %s" % (model_path))
self.build_param_dict()
def build_param_dict(self):
self.params = OrderedDict()
for name, module in self.layers.named_children():
append_params(self.params, module, name)
for k, module in enumerate(self.branches):
append_params(self.params, module, 'fc6_%d'%(k))
def set_learnable_params(self, layers):
for k, p in self.params.items():
if any([k.startswith(l) for l in layers]):
p.requires_grad = True
else:
p.requires_grad = False
def get_learnable_params(self):
params = OrderedDict()
for k, p in self.params.items():
if p.requires_grad:
params[k] = p
return params
def forward(self, x, k=0, in_layer='conv1', out_layer='fc6'):
#
# forward model from in_layer to out_layer
run = False
for name, module in self.layers.named_children():
if name == in_layer:
run = True
if run:
x = module(x)
if name == 'conv3':
x = x.view(x.size(0),-1)
if name == out_layer:
return x
x = self.branches[k](x)
if out_layer=='fc6':
return x
elif out_layer=='fc6_softmax':
return F.softmax(x)
def load_model(self, model_path):
states = torch.load(model_path)
shared_layers = states['shared_layers']
self.layers.load_state_dict(shared_layers)
def load_mat_model(self, matfile):
mat = scipy.io.loadmat(matfile)
mat_layers = list(mat['layers'])[0]
# copy conv weights
for i in range(3):
weight, bias = mat_layers[i*4]['weights'].item()[0]
self.layers[i][0].weight.data = torch.from_numpy(np.transpose(weight, (3,2,0,1)))
self.layers[i][0].bias.data = torch.from_numpy(bias[:,0])