Gradients of torch.distributions cannot work?

ecolss · January 30, 2019, 4:01pm

I created a MultivariateNormal object and made the mean/cov as parameters, so that I could optimize them, but after backward(), I didn’t get any gradients on cov matrix, anything wrong??

import numpy as np
import torch as th
import torch.distributions as pr

def mvlgauss(mu, cov, x):
    dim = cov.shape[0]
    return -np.log(2*np.pi)*dim/2 - 0.5*th.logdet(cov) \
        -0.5*(x-mu)@cov.inverse()@(x-mu).view(-1,1)


if __name__ == '__main__':
    mu = th.rand(2)
    cov = th.rand(2,2)
    cov = (cov+cov.transpose(0,1))/2 + th.diag(th.ones(2)) # ensure positive-definite

    G = pr.MultivariateNormal(loc=mu, covariance_matrix=cov)
    G.mean.requires_grad_(True)
    G.covariance_matrix.requires_grad_(True)

    x = th.rand(2)
    z = G.log_prob(x)
    z.backward()
    print(G.mean.grad, G.covariance_matrix.grad)  # ERROR HERE: cov no grad???

    # copy from above means and covariance, set `requires_grad'
    mu1 = mu.clone().detach_().requires_grad_(True)
    cov1 = cov.clone().detach_().requires_grad_(True)
    z1 = mvlgauss(mu1, cov1, x)
    z1.backward()
    print(mu1.grad, cov1.grad)  # show the grads

Output for the above 2 print are as follows:

tensor([-0.1538,  0.4517]) None
tensor([-0.1538,  0.4517]) tensor([[-0.2717, -0.0090],
        [-0.0090, -0.2344]])