I implement a logistic regression model with random initialization and random input. I calculate the Hessian matrix using torch.autograd.grad
. Here is the code:
import torch
from torch import nn
class LogisticRegression(nn.Module):
def __init__(self, input_dim):
super(LogisticRegression, self).__init__()
self.fc = nn.Linear(input_dim, 1, bias=False)
self.sigmoid = nn.Sigmoid()
with torch.no_grad():
# random initialization
self.fc.weight = nn.Parameter(torch.rand((1, input_dim)).sub(0.25))
def forward(self, x):
x = self.sigmoid(self.fc(x))
return x.squeeze()
input_ = torch.rand((100, 100))
target = torch.rand(100)
model = LogisticRegression(input_dim=100)
output = model(input_)
criterion = nn.BCELoss(reduction="none")
loss = torch.sum(criterion(output, target))
""" Computing the hessian matrix """
hess = torch.zeros((100, 100))
grad = torch.autograd.grad(outputs=loss, inputs=model.fc.weight, create_graph=True)
grad = grad[0].squeeze()
for i, g in enumerate(grad):
model.zero_grad()
second_order_grad = torch.autograd.grad(outputs=g, inputs=model.fc.weight, retain_graph=True)
second_order_grad = second_order_grad[0].squeeze()
hess[i, :] = second_order_grad
""" Check if hessian is positive semi-definite """
eigenvalues, _ = torch.linalg.eig(hess)
print(eigenvalues)
By running the code, I got the following results
tensor([ 1.2445e-03+0.0000e+00j, -4.1439e-04+0.0000e+00j,
1.8660e-04+0.0000e+00j, 1.1943e-04+0.0000e+00j,
-1.1723e-04+0.0000e+00j, -9.6112e-05+0.0000e+00j,
8.6769e-05+0.0000e+00j, -6.3600e-05+0.0000e+00j,
-6.2202e-05+0.0000e+00j, 7.0490e-05+0.0000e+00j,
-4.6572e-05+0.0000e+00j, 6.4383e-05+0.0000e+00j,
5.7739e-05+0.0000e+00j, 5.6093e-05+0.0000e+00j,
4.4553e-05+0.0000e+00j, -4.2349e-05+0.0000e+00j,
-3.3346e-05+0.0000e+00j, 4.6737e-05+0.0000e+00j,
4.0225e-05+0.0000e+00j, 3.7023e-05+0.0000e+00j,
3.2834e-05+0.0000e+00j, -2.8623e-05+0.0000e+00j,
-2.3274e-05+0.0000e+00j, 3.1192e-05+0.0000e+00j,
3.0093e-05+0.0000e+00j, 2.7552e-05+0.0000e+00j,
-1.9698e-05+0.0000e+00j, 2.4640e-05+0.0000e+00j,
2.3299e-05+0.0000e+00j, 2.0421e-05+0.0000e+00j,
-1.5103e-05+0.0000e+00j, -1.3587e-05+0.0000e+00j,
1.8011e-05+0.0000e+00j, 1.6778e-05+0.0000e+00j,
1.5867e-05+0.0000e+00j, 1.4337e-05+0.0000e+00j,
-1.1166e-05+0.0000e+00j, -1.0755e-05+0.0000e+00j,
-9.8075e-06+0.0000e+00j, 1.2454e-05+0.0000e+00j,
1.3076e-05+6.5081e-08j, 1.3076e-05-6.5081e-08j,
-7.7048e-06+0.0000e+00j, 1.0609e-05+0.0000e+00j,
1.1109e-05+0.0000e+00j, -7.4331e-06+0.0000e+00j,
-6.7556e-06+0.0000e+00j, 9.6333e-06+0.0000e+00j,
8.8359e-06+0.0000e+00j, 7.9485e-06+0.0000e+00j,
-5.9216e-06+0.0000e+00j, -4.6393e-06+0.0000e+00j,
-4.6548e-06+0.0000e+00j, 7.1046e-06+0.0000e+00j,
6.9616e-06+0.0000e+00j, 5.9614e-06+0.0000e+00j,
5.6388e-06+0.0000e+00j, 5.1294e-06+0.0000e+00j,
-4.1212e-06+0.0000e+00j, 4.6867e-06+0.0000e+00j,
3.9201e-06+0.0000e+00j, 3.5539e-06+6.5330e-08j,
3.5539e-06-6.5330e-08j, -3.1284e-06+0.0000e+00j,
-3.0800e-06+0.0000e+00j, -2.3478e-06+0.0000e+00j,
-2.1333e-06+0.0000e+00j, 2.6568e-06+0.0000e+00j,
2.4638e-06+0.0000e+00j, 2.1477e-06+0.0000e+00j,
-1.3528e-06+0.0000e+00j, -1.1852e-06+0.0000e+00j,
1.7787e-06+0.0000e+00j, 1.6337e-06+0.0000e+00j,
1.4714e-06+0.0000e+00j, -9.1909e-07+0.0000e+00j,
-8.1863e-07+0.0000e+00j, 1.1196e-06+0.0000e+00j,
1.0123e-06+0.0000e+00j, 9.0910e-07+0.0000e+00j,
-5.4653e-07+0.0000e+00j, 7.3642e-07+0.0000e+00j,
5.7726e-07+0.0000e+00j, -4.1951e-07+0.0000e+00j,
3.9256e-07+0.0000e+00j, -1.7926e-07+0.0000e+00j,
3.3471e-07+0.0000e+00j, 2.6827e-07+0.0000e+00j,
2.1603e-07+0.0000e+00j, -1.0832e-07+0.0000e+00j,
1.1932e-07+1.2010e-08j, 1.1932e-07-1.2010e-08j,
-6.6019e-08+0.0000e+00j, 7.9518e-08+0.0000e+00j,
6.2423e-08+0.0000e+00j, 2.2673e-08+0.0000e+00j,
6.9920e-09+0.0000e+00j, 1.1066e-09+1.8414e-09j,
1.1066e-09-1.8414e-09j, 4.6437e-12+0.0000e+00j])
Apparently, some negative value exists in the eigenvalues. However, the Hessian matrix of logistic regression is supposed to be positive semi-definite, where all eigenvalues should be larger or equal to zero with an arbitrary value of weight and input.
May I know what causes this issue? Thank you!