Loading pretrained weights into new model

ptrblck · September 21, 2021, 9:37am

I don’t know exactly which layers are new, but since the model architecture was changed, you could create a mapping between the old and new layer names and load each layer separately.
E.g something like this should work:

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10, 10)

class MyNewModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10, 10)
        self.new_layer = nn.Linear(1, 1)

model = MyModel()
new_model = MyNewModel()

# this fails
new_model.load_state_dict(model.state_dict())
# > RuntimeError: Error(s) in loading state_dict for MyNewModel:
#    Missing key(s) in state_dict: "new_layer.weight", "new_layer.bias". 
      
# this works, but could be dangerous, if you are not careful
new_model.load_state_dict(model.state_dict(), strict=False)
# > _IncompatibleKeys(missing_keys=['new_layer.weight', 'new_layer.bias'], unexpected_keys=[])

# check
print((new_model.fc1.weight == model.fc1.weight).all())
# > tensor(True)

# create mapping
mapping = [['fc1', 'fc1']]
for m in mapping:
    print('loading {} to {}'.format(m[1], m[0]))
    getattr(new_model, m[0]).load_state_dict(getattr(model, m[1]).state_dict())