Ok. So, as Simon pointed out, your covariance matrix needs to be positive semi-definite. As the doc at https://pytorch.org/docs/stable/distributions.html#multivariatenormal points out, you can achieve that by multiplying your lower triangular matrix by its transpose:
import torch
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions import kl_divergence
# p = MultivariateNormal(torch.zeros(5).cuda(), torch.eye(5).cuda())
# q = MultivariateNormal(torch.randn(1, 5).cuda(), torch.tril(torch.randn(5,5)).cuda())
# kl_divergence(p,q)
p = MultivariateNormal(torch.zeros(5), torch.eye(5))
print('p.sample()', p.sample())
if False:
q_mean = torch.randn(1, 5)
q_cov = torch.tril(torch.randn(5,5))
print('q_mean', q_mean)
print('q_cov', q_cov)
q = MultivariateNormal(q_mean, q_cov)
print('q', q)
q_mean = torch.randn(1, 5)
L = torch.tril(torch.randn(5,5))
q_cov = L @ L.transpose(0, 1)
print('q_mean', q_mean)
print('q_cov', q_cov)
q = MultivariateNormal(q_mean, q_cov)
print('q.sample()', q.sample())
kl = kl_divergence(p,q)
print('kl', kl)
Output:
p.sample() tensor([ 0.1836, -0.6165, 0.7646, -0.9500, -1.9736])
q_mean tensor([[ 0.7356, -1.4405, 0.4172, 0.2697, 1.2461]])
q_cov tensor([[ 3.7545, -0.4939, 0.2997, 0.3735, 0.6791],
[-0.4939, 0.1115, 0.4387, -0.1818, 0.0683],
[ 0.2997, 0.4387, 7.0361, -2.1879, 2.3761],
[ 0.3735, -0.1818, -2.1879, 1.1052, -0.5114],
[ 0.6791, 0.0683, 2.3761, -0.5114, 1.1063]])
q.sample() tensor([[ 4.9458, -1.7962, 1.3363, 0.2006, 1.5021]])
kl tensor([ 117.6113])