Hi, thanks for the comment.
If I load the model like this:
model.load_state_dict( torch.load(path_to_saved_model), there is no problem.
But If I load the tensors from a file after instantiating the model I don’t get same inference performance, loss is higher than what it would be in the first epoch of the training if I had started to train it with random weights. Also it seems like the weights are not updating. The model is structured similar to this:
class SmallNet(nn.Module):
def init(self, …)
self.layers = nn.ModuleList()
self.normLayers = nn.ModuleList()
…
…
def setParams(self, start_ind, v_ParamList):
for i in range(len(self.layers)):
self.layers[i].weight.data = v_ParamList[start_ind].cuda()
self.layers[i].weight.requires_grad = True
self.layers[i].bias.data = v_ParamList[start_ind+1].cuda()
self.layers[i].bias.requires_grad = True
start_ind += 2
for i in range(len(self.normLayers)):
if self.normLayers[i]!=None:
self.normLayers [i].weight.data = v_ParamList[start_ind].cuda()
self.normLayers[i].bias.data = v_ParamList[start_ind+1].cuda()
self.normLayers[i].running_mean.data = v_ParamList[start_ind+2].cuda()
self.normLayers[i].running_var.data = v_ParamList[start_ind+3].cuda()
self.normLayers[i].num_batches_tracked.data = v_ParamList[start_ind+4].cuda()
start_ind += 5
return start_ind
class BigNet(nn.Module):
def init(self, …):
self.Layer_1 = SmallNet(…)
self.Layer_2 = SmallNet(…)
self.Layer_3 = nn.ModuleList()
for i in range(10):
self.Layer_3.append(SmallNet(....))
def setParams(self, start_ind, v_ParamList):
start_ind = self.Layer_1.setParams(start_ind, v_ParamList)
start_ind = self.Layer_2.setParams(start_ind, v_ParamList)
for i in range(len(self.Layer_3)):
start_ind = self.Layer_3[i].setParams(start_ind, v_ParamList)
return start_ind
def loadModelParams(self, path):
container = torch.jit.load(path)
v_tensors = getattr(container, "weights")
start_ind = self.setParams(0, v_tensors)
....
model = BigNet()
model = model.float()
device = utils.get_device(“cuda”)
if cuda == 'cuda
model.to(device)
model.loadModelParams("…pt")
If I use one SmallNet to train on MNIST dataset and save the weights&biases to a file and reload them again, I don’t have any problems. But loading the weights&biases of BigNet from a file doesn’t work. I checked the difference between the tensors if the model is loaded with “load_state_dict” or from a tensors file, they are same. I don’t undestand what BigNet changes when I want to load the tensors directly from a file.
Thanks a lot!