How Do I Solve the Error When Taking Gradients in a Function?

I need to calculate some partial derivatives for my specific loss function that I defined below:

class BaseLoss:

  # Initialize a base class for all losses

  def __init__(self, model, grad, p):

    self._model = model
    self._grad = grad
    self._p = p

  def _norm(self, func, variables):
    
    v_input = [v.clone().requires_grad_(self._grad) for v in variables]
    u_hat = self._model(torch.stack(v_input, dim=1))

    return torch.linalg.norm(func(u_hat, *v_input),
                             ord=self._p,
                             dim=1,
                             keepdim=True).square().mean()

class ResLoss(BaseLoss):

  def __init__(self, model, res_grad, res_funcs, res_p=2):

    # Initialize a residual loss class

    super(ResLoss, self).__init__(model, res_grad, res_p)

    self._res_funcs = res_funcs

  def __call__(self, res_variables):

    return sum(self._norm(f, v) for f, v in zip(self._res_funcs, res_variables))

I want to define my data, say x, t, within global scope and impart them requires_grad property so that they can be used in u_res in gradient calculation. I want to do that by in-place operation requires_grad_() method of PyTorch inside _norm function since my data might come in batches. If I defined x, t with requires_grad in global scope beforehand, this would create extra computational graph nodes in the gradient calculation when I put my data in batches or manipulate with something else before feeding in the model. In order to avoid this, I copied data by clone() method with a for-loop in list comprehension inside _norm() method along with model so that I can pass both in to func(u_hat, *v_input). However, when I run it, I got an error message. Is there a way to achieve what I have described above?

For reproducibility I provide the minimal code and related error message down below.

Code:

PI = np.pi
SEED = 7
torch.manual_seed(SEED)

x = torch.linspace(-1, 1, 50)
t = torch.linspace(0, 1, 50)

def u_res(u, x, t):

  du_dt = torch.autograd.grad(torch.sum(u), t, retain_graph=True, create_graph=True)[0]
  du_dx = torch.autograd.grad(torch.sum(u), x, retain_graph=True, create_graph=True)[0]
  d2u_dx2 = torch.autograd.grad(torch.sum(du_dx), x, retain_graph=True, create_graph=True)[0]

  return du_dt + u * du_dx - (0.01 / PI) * d2u_dx2

model = torch.nn.Linear(2, 1)

res_loss = ResLoss(model, True, [u_res])

for i in [[x, t]]:

  print(res_loss([i]))

Error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-3-a3315b149764> in <cell line: 65>()
     65 for i in [[x, t]]:
     66 
---> 67   print(res_loss([i]))

4 frames
/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py in grad(outputs, inputs, grad_outputs, retain_graph, create_graph, only_inputs, allow_unused, is_grads_batched, materialize_grads)
    392         )
    393     else:
--> 394         result = Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    395             t_outputs,
    396             grad_outputs_,

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.