Empty model.state_dict()

protoneqt · August 12, 2020, 2:36pm

I have the following model:

class DepthV1Acceleration(torch.nn.Module):
    def __init__(self, prediction_steps):
        super(DepthV1Acceleration, self).__init__()
        t = 1 / 30
        self.initial_P = torch.tensor([[0, 0, 0],
                                       [0, 1000, 0],
                                       [0, 0, 1000]], dtype=torch.float32)
        self.P = self.initial_P.clone()
        self.F = torch.tensor([[1, t, 0.5 * t ** 2],
                               [0, 1, t],
                               [0, 0, 0.9]])
        self.H = torch.tensor([[1, 0, 0]], dtype=torch.float32)
        self.R = torch.tensor([[0.01]], requires_grad=True)
        self.prediction_steps = prediction_steps


    def forward(self, measurements):
        # self.F[2, 2] = self.depth_acceleration
        output = torch.zeros(measurements.shape[0], measurements.shape[1] - self.prediction_steps, measurements.shape[2])
        for i in range(measurements.shape[0]):
            self.P = self.initial_P.clone()
            x = torch.zeros(3, 1)
            x[0, 0] = measurements[i, 0, :]

            for j in range(measurements.shape[1] - 1 * self.prediction_steps):
                z = measurements[i, j, :]
                y = z - mm(self.H, x)
                S = mm(mm(self.H, self.P), torch.transpose(self.H, 0, 1)) + self.R
                K = mm(mm(self.P, torch.transpose(self.H, 0, 1)), torch.inverse(S))
                x = x + mm(K, y)
                self.P = mm((torch.eye(3) - mm(K, self.H)), self.P)
                # prediction
                output_element = x.clone()
                x = mm(self.F, x)
                self.P = mm(mm(self.F, self.P), torch.transpose(self.F, 0, 1))
                for _ in range(self.prediction_steps):
                    output_element = mm(self.F, output_element)
                output_element = output_element[0]
                output_element = output_element.unsqueeze(0)
                if j == 0:
                    sequence_output = output_element
                else:
                    sequence_output = torch.cat((sequence_output, output_element), 0)
            sequence_output = sequence_output.unsqueeze(0)
            if i == 0:
                output = sequence_output
            else:
                output = torch.cat((output, sequence_output), 0)

        return output

    def parameters(self, only_trainable=True):
        return [self.R]

I want to continue training of this model. I save and load the model in the following way:

        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, checkpoint_path)
checkpoint = torch.load(last_checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

But my model.state_dict() is empty (outputs OrderedDict()). What do I do wrong?

mariosasko · August 12, 2020, 4:56pm

Replace the line self.R = torch.tensor([[0.01]], requires_grad=True) with self.R = torch.nn.Parameter(torch.tensor([[0.01]])). After that modification, the parameters will be properly registered, so they will reside in the model’s state dict. And you should remove your own implementation of the method parameters as well.

Note that by default, a tensor that is wrapped in a torch.nn.Parameter has requires_grad=True.