How to predict the derivation of the output?

I expect a neural network predict a value and the derivation of value.Is the following code the correct way?

import torch
from torch import nn
from torch.autograd import grad

class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.lin1 = nn.Linear(3, 30)
        self.lin2 = nn.Linear(30, 1)

    def forward(self, p):
        x = self.lin1(p)
        x = nn.ReLU()(x)
        return self.lin2(x)

x = torch.randn(1000, 3)
y = (5 * torch.sin(x) + 3 * torch.cos(x)).sum(dim=-1).unsqueeze(-1)
z = (5 * torch.cos(x) - 3 * torch.sin(x)).sum(dim=-1).unsqueeze(-1)
model = net()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)

for epoch in range(10000):
    model.train()
    x.requires_grad = True
    optimizer.zero_grad()
    output = model(x)
    grad_x = grad(output.sum(), x, retain_graph=True)[0]
    loss_y = nn.MSELoss()(output, y)
    loss_z = nn.MSELoss()(grad_x.sum(dim=-1).unsqueeze(-1), z)
    loss = loss_y + loss_z
    loss.backward(retain_graph=True)
    optimizer.step()
    print('Loss_y = {:.4f} | Loss_z = {:.4f}.'.format(loss_y.item(), loss_z.item()))

I check the grad_fn of variable loss_z,find loss_y.grad_fn = <MseLossBackward object at 0x0000024F2AB8DF98>,but loss_z.grad_fn = None.So although loss_z decreases,this means the loss of the derivation of output doesn’t participate in the gradient decent.Maybe just the model predicts y very well,so it can predict z well.If the dataset is not as easy as this form,loss_z even doesn’t decrease.
So how to predict the derivation of the output correctly?

Hi @sakuraiiiii,
TLDR: You need to use the create graph functionality in the grad function.
That is, changing from the below line

to

grad_x = grad(output.sum(), x, create_graph=True)[0]

Explanation:
Create graph=True makes the graph of the derivative to be constructed, allowing you to compute higher order derivatives. The grad tensors hence will get a backward function attached to them.

1 Like

Thank you very much.But if I use a little difficult form like the following:

import torch
from torch import nn
from torch.autograd import grad

class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.lin1 = nn.Linear(3, 300)
        self.lin2 = nn.Linear(300, 1)

    def forward(self, p):
        x = self.lin1(p)
        x = nn.ReLU()(x)
        return self.lin2(x)

x = torch.randn(50000, 3)
y = (5 * torch.sin(x) + 3 * torch.cos(x) + 4 * torch.sin(2 * x)).sum(dim=-1).unsqueeze(-1)
z = (5 * torch.cos(x) - 3 * torch.sin(x) + 4 * torch.cos(2 * x)).sum(dim=-1).unsqueeze(-1)
model = net()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)

for epoch in range(1000):
    model.train()
    x.requires_grad = True
    optimizer.zero_grad()
    output = model(x)
    grad_x = grad(output.sum(), x, retain_graph=True, create_graph=True)[0]
    # loss_y = nn.MSELoss()(output, y)
    # print(loss_y.grad_fn)  # <MseLossBackward object at 0x0000028D1D801400>
    loss_z = nn.MSELoss()(grad_x.sum(dim=-1).unsqueeze(-1), z)
    # print(loss_z.grad_fn)  # None
    # loss = loss_y + loss_z
    loss_z.backward()
    optimizer.step()
    print('Loss_z = {:.4f}.'.format(loss_z.item()))
    # print('Loss_y = {:.4f} | Loss_z = {:.4f}.'.format(loss_y.item(), loss_z.item()))

The result is:

Loss_z = 196.2179.
Loss_z = 190.8175.
Loss_z = 185.5588.
Loss_z = 180.4273.
Loss_z = 175.4291.
...
Loss_z = 37.6990.
Loss_z = 37.7005.
Loss_z = 37.7033.
Loss_z = 37.7058.
Loss_z = 37.7021.
Loss_z = 37.7042.
Loss_z = 37.7074.
Loss_z = 37.7154.
Loss_z = 37.7073.
Loss_z = 37.7076.
Loss_z = 37.7042.
Loss_z = 37.7040.
Loss_z = 37.7000.
Loss_z = 37.7041.
Loss_z = 37.7037.
Loss_z = 37.7032.
Loss_z = 37.7038.
Loss_z = 37.7071.
Loss_z = 37.6989.
Loss_z = 37.7091.
Loss_z = 37.7040.
Loss_z = 37.7026.
Loss_z = 37.7024.
Loss_z = 37.7001.
Loss_z = 37.7006.
Loss_z = 37.7027.
Loss_z = 37.7000.
Loss_z = 37.7022.
Loss_z = 37.7021.
Loss_z = 37.7042.
Loss_z = 37.7000.
Loss_z = 37.6954.
Loss_z = 37.6866.
Loss_z = 37.6955.
Loss_z = 37.6954.
Loss_z = 37.6983.
Loss_z = 37.6968.
Loss_z = 37.6983.
Loss_z = 37.6944.

loss_z doesn’t vary.I don’t know whether this is the problem of the code or just need to adjust the parameters of machine learning.