I need to calculate some partial derivatives for my specific loss function that I defined below:
class BaseLoss: # Initialize a base class for all losses def __init__(self, model, grad, p): self._model = model self._grad = grad self._p = p def _norm(self, func, variables): v_input = [v.clone().requires_grad_(self._grad) for v in variables] u_hat = self._model(torch.stack(v_input, dim=1)) return torch.linalg.norm(func(u_hat, *v_input), ord=self._p, dim=1, keepdim=True).square().mean() class ResLoss(BaseLoss): def __init__(self, model, res_grad, res_funcs, res_p=2): # Initialize a residual loss class super(ResLoss, self).__init__(model, res_grad, res_p) self._res_funcs = res_funcs def __call__(self, res_variables): return sum(self._norm(f, v) for f, v in zip(self._res_funcs, res_variables))
I want to define my data, say
t, within global scope and impart them
requires_grad property so that they can be used in
u_res in gradient calculation. I want to do that by in-place operation
requires_grad_() method of PyTorch inside
_norm function since my data might come in batches. If I defined
requires_grad in global scope beforehand, this would create extra computational graph nodes in the gradient calculation when I put my data in batches or manipulate with something else before feeding in the model. In order to avoid this, I copied data by
clone() method with a for-loop in list comprehension inside
_norm() method along with model so that I can pass both in to
func(u_hat, *v_input). However, when I run it, I got an error message. Is there a way to achieve what I have described above?
For reproducibility I provide the minimal code and related error message down below.
PI = np.pi SEED = 7 torch.manual_seed(SEED) x = torch.linspace(-1, 1, 50) t = torch.linspace(0, 1, 50) def u_res(u, x, t): du_dt = torch.autograd.grad(torch.sum(u), t, retain_graph=True, create_graph=True) du_dx = torch.autograd.grad(torch.sum(u), x, retain_graph=True, create_graph=True) d2u_dx2 = torch.autograd.grad(torch.sum(du_dx), x, retain_graph=True, create_graph=True) return du_dt + u * du_dx - (0.01 / PI) * d2u_dx2 model = torch.nn.Linear(2, 1) res_loss = ResLoss(model, True, [u_res]) for i in [[x, t]]: print(res_loss([i]))
RuntimeError Traceback (most recent call last) <ipython-input-3-a3315b149764> in <cell line: 65>() 65 for i in [[x, t]]: 66 ---> 67 print(res_loss([i])) 4 frames /usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py in grad(outputs, inputs, grad_outputs, retain_graph, create_graph, only_inputs, allow_unused, is_grads_batched, materialize_grads) 392 ) 393 else: --> 394 result = Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass 395 t_outputs, 396 grad_outputs_, RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.