Optimization problem in pytorch 0.4

Hi guys, I’m trying to optimize the following parameters by keeping the L param always lower triangular with a positive diagonal and the noise param always with just positive diagonal, but they are not correctly updating in the forward pass. I guess I’m doing something wrong with the autograd mechanism. Any help appreciated. Here is a sample snippet.

import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch.nn import ParameterList

class Model(torch.nn.Module):
    def __init__(self, dim):
        """
        Constructor.
        """
        super(Model, self).__init__()
   
        self.noise_vector = Parameter(torch.tensor(torch.zeros(D).cuda(), requires_grad=True))
        self.noise = Parameter(torch.tensor(torch.diag(torch.exp(self.noise_vector.data)).cuda(), requires_grad=True))

        self.L_chol_cov_theta = Parameter(torch.tensor(torch.randn(dim, dim).cuda(), requires_grad=True))
        self.log_diag_L_chol_cov_theta = Parameter(torch.tensor(torch.randn(dim).cuda(), requires_grad=True))
        self.L = Parameter(torch.tensor(torch.randn(dim, dim).cuda(), requires_grad=True))
        self.L_chol_cov_theta.data = torch.tril(self.L_chol_cov_theta.data)
        self.L_chol_cov_theta.data -= torch.diag(torch.diag(self.L_chol_cov_theta.data))
        self.L.data = self.L_chol_cov_theta.data + torch.diag(torch.exp(self.log_diag_L_chol_cov_theta.data))

    def forward(self):
        # update parameters
        self.L_chol_cov_theta.data -= torch.diag(torch.diag(self.L_chol_cov_theta.data))
        self.L.data = self.L_chol_cov_theta.data + torch.diag(torch.exp(self.log_diag_L_chol_cov_theta.data))

        self.noise.data = torch.diag(torch.exp(self.noise_vector.data))

        return torch.mm(self.L, self.noise_vector.view(-1,-1))

 custom_net = Model(5)
 optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)

for epoch in range(10):
   optimizer.zero_grad()
   forward_pass_something = model()
   loss = calc_likelihood(samples, a_ground_truth) # calc a custom loss
   loss.backward()
   optimizer.step()