I’m trying to compute the full Hessian w.r.t. the weights of a linear layer. I am calling autograd.grad on the gradient, specifying the weights as inputs. The result I get contains the sums of the values on each column of the correct Hessian. What am I doing wrong here?
Thanks!
import torch
import torch.nn.functional as functional
import torch.autograd as autograd
def main():
min_max = (-3, 3)
ni, no = 5, 3
x = torch.randint(*min_max, (1, ni))
t = torch.randint(*min_max, (1, no))
W = torch.randint(*min_max, (no, ni,), requires_grad=True)
b = torch.randint(*min_max, (no,), requires_grad=True)
y = functional.linear(x, W, b)
loss = functional.mse_loss(y, t)
grad_W, = autograd.grad(loss, W, create_graph=True, retain_graph=True)
print("Grad torch:")
print(grad_W)
hess_W, = autograd.grad(grad_W, W,
grad_outputs=torch.ones_like(grad_W),
create_graph=True, retain_graph=True)
print("Hess torch:")
print(hess_W)
print("Grad manual: ")
print(torch.mm(2 / no * (y - t).view(-1, 1), x.view(1, -1)))
print("Hess manual:")
print(2 / no * torch.mm(x.view(-1, 1), x.view(1, -1)))