I need to calculate some partial derivatives for my specific loss function that I defined below:
class BaseLoss:
# Initialize a base class for all losses
def __init__(self, model, grad, p):
self._model = model
self._grad = grad
self._p = p
def _norm(self, func, variables):
v_input = [v.clone().requires_grad_(self._grad) for v in variables]
u_hat = self._model(torch.stack(v_input, dim=1))
return torch.linalg.norm(func(u_hat, *v_input),
ord=self._p,
dim=1,
keepdim=True).square().mean()
class ResLoss(BaseLoss):
def __init__(self, model, res_grad, res_funcs, res_p=2):
# Initialize a residual loss class
super(ResLoss, self).__init__(model, res_grad, res_p)
self._res_funcs = res_funcs
def __call__(self, res_variables):
return sum(self._norm(f, v) for f, v in zip(self._res_funcs, res_variables))
I want to define my data, say x
, t
, within global scope and impart them requires_grad
property so that they can be used in u_res
in gradient calculation. I want to do that by in-place operation requires_grad_()
method of PyTorch inside _norm
function since my data might come in batches. If I defined x
, t
with requires_grad
in global scope beforehand, this would create extra computational graph nodes in the gradient calculation when I put my data in batches or manipulate with something else before feeding in the model. In order to avoid this, I copied data by clone()
method with a for-loop in list comprehension inside _norm()
method along with model so that I can pass both in to func(u_hat, *v_input)
. However, when I run it, I got an error message. Is there a way to achieve what I have described above?
For reproducibility I provide the minimal code and related error message down below.
Code:
PI = np.pi
SEED = 7
torch.manual_seed(SEED)
x = torch.linspace(-1, 1, 50)
t = torch.linspace(0, 1, 50)
def u_res(u, x, t):
du_dt = torch.autograd.grad(torch.sum(u), t, retain_graph=True, create_graph=True)[0]
du_dx = torch.autograd.grad(torch.sum(u), x, retain_graph=True, create_graph=True)[0]
d2u_dx2 = torch.autograd.grad(torch.sum(du_dx), x, retain_graph=True, create_graph=True)[0]
return du_dt + u * du_dx - (0.01 / PI) * d2u_dx2
model = torch.nn.Linear(2, 1)
res_loss = ResLoss(model, True, [u_res])
for i in [[x, t]]:
print(res_loss([i]))
Error:
RuntimeError Traceback (most recent call last)
<ipython-input-3-a3315b149764> in <cell line: 65>()
65 for i in [[x, t]]:
66
---> 67 print(res_loss([i]))
4 frames
/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py in grad(outputs, inputs, grad_outputs, retain_graph, create_graph, only_inputs, allow_unused, is_grads_batched, materialize_grads)
392 )
393 else:
--> 394 result = Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
395 t_outputs,
396 grad_outputs_,
RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.