Hi all I am trying to calculate Jacobian matrix in the form of batch mode as my input ‘x’ has 128x1000 dimension where 128 is the batch size and 1000 is the number of classes. When I used torch.autograd.functional.jacobian it gave me ‘cuda out of memory’ error. So the batch mode was preferred. the output of the torch.autograd.functional.jacobian function in lower size is like [batch_size, n_classes, batch_size, n_classes] so I need to change my output like this 4D while my output is [batch_size, n_classes, n_classes].

my code is as follows;

```
import torch
import torch.nn.functional as F
def gradient(y, x, grad_outputs=None):
"""Compute dy/dx @ grad_outputs"""
if grad_outputs is None:
grad_outputs = torch.ones_like(y)
grad = torch.autograd.grad(y, [x], grad_outputs = grad_outputs, create_graph=True)[0]
return grad
def jacobian(y, x):
"""Compute dy/dx = dy/dx @ grad_outputs;
for grad_outputs in [1, 0, ..., 0], [0, 1, 0, ..., 0], ...., [0, ..., 0, 1]"""
jac = torch.zeros(y.shape[0], x.shape[0])
for i in range(y.shape[0]):
grad_outputs = torch.zeros_like(y)
grad_outputs[i] = 1
jac[i] = gradient(y, x, grad_outputs = grad_outputs)
return jac
def divergence(y, x):
div = 0.
for i in range(y.shape[-1]):
div += torch.autograd.grad(y[..., i], x, torch.ones_like(y[..., i]), create_graph=True)[0][..., i:i+1]
return div
def laplace(y, x):
grad = gradient(y, x)
return divergence(grad, x)
x = torch.rand(128,1000)
w = torch.rand(1000, 1000)
b = torch.rand(1000)
j=[]
for i in range(128):
x_p = torch.tensor(x[i,:], requires_grad=True)
y = F.linear(x_p, w, b)
j_p=jacobian(y,x_p)
j.append(j_p)
j=torch.stack(j,0)
```

Thanks.