I don’t know exactly which layers are new, but since the model architecture was changed, you could create a mapping between the old and new layer names and load each layer separately.
E.g something like this should work:
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10, 10)
class MyNewModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10, 10)
self.new_layer = nn.Linear(1, 1)
model = MyModel()
new_model = MyNewModel()
# this fails
new_model.load_state_dict(model.state_dict())
# > RuntimeError: Error(s) in loading state_dict for MyNewModel:
# Missing key(s) in state_dict: "new_layer.weight", "new_layer.bias".
# this works, but could be dangerous, if you are not careful
new_model.load_state_dict(model.state_dict(), strict=False)
# > _IncompatibleKeys(missing_keys=['new_layer.weight', 'new_layer.bias'], unexpected_keys=[])
# check
print((new_model.fc1.weight == model.fc1.weight).all())
# > tensor(True)
# create mapping
mapping = [['fc1', 'fc1']]
for m in mapping:
print('loading {} to {}'.format(m[1], m[0]))
getattr(new_model, m[0]).load_state_dict(getattr(model, m[1]).state_dict())