By adding a few layers to the pretrained model(PretrainedModel
), I want to train new model(MyNewModel
), where the existing pretrained layers(starting with pretrained weights) and newly added layers are trained simultaneously.
class PretrainedModel(nn.Module):
def __init__(self):
self.layer_s=PretrainedModule(...)
def forward(self,....):
outputs=self.layer_s(...)
return outputs
class MyNewModel(nn.Moudle):
def __init__(self):
self.layer_s=PretrainedModule(...)
self.new_layer_s=NewModule(...)
def forward(self,....):
outputs=self.layer_s(...)
outputs=self.new_layer_s(outputs)
return outputs
net=MyNewModel(...)
net.load_state_dict(....) # load parameters, that are belong to PretrainedModule
optim = Adam(net.parameters(),lr=1e-4)
for idx,batch in enumerate(training_set):
output=net(...)
loss = loss_fn(output)
loss.backward()
optim.step()
Is it okay to train MyNewModel
by splitting in two model and using two optimizer?
For example,
class PretrainedModel(nn.Module):
def __init__(self):
self.layer_s=PretrainedModule(...)
def forward(self,....):
outputs=self.layer_s(...)
return outputs
class AddedLayers(nn.Moudle):
def __init__(self):
self.new_layer_s=NewModule(...)
def forward(self,....):
outputs=self.new_layer_s(...)
return outputs
pretrained_net=PretrainedModel(...)
pretrained_net.load_state_dict(torch.load('pretrained.pt'))
optimizer_pretrained= Adam(pretrained_net.parameters(),lr=1e-4)
added_net=AddedLayers(...)
optimizer_added= Adam(added_net.parameters(),lr=1e-4)
for idx, batch in enumerate(training_set):
output=pretrained_net(...)
output=added_net(output)
loss = loss_fn(output)
loss.backward()
optimizer_pretrained.step()
optimizer_added.step()
Or do I have to run with upper case?