I expect a neural network predict a value and the derivation of value.Is the following code the correct way?
import torch
from torch import nn
from torch.autograd import grad
class net(nn.Module):
def __init__(self):
super(net, self).__init__()
self.lin1 = nn.Linear(3, 30)
self.lin2 = nn.Linear(30, 1)
def forward(self, p):
x = self.lin1(p)
x = nn.ReLU()(x)
return self.lin2(x)
x = torch.randn(1000, 3)
y = (5 * torch.sin(x) + 3 * torch.cos(x)).sum(dim=-1).unsqueeze(-1)
z = (5 * torch.cos(x) - 3 * torch.sin(x)).sum(dim=-1).unsqueeze(-1)
model = net()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)
for epoch in range(10000):
model.train()
x.requires_grad = True
optimizer.zero_grad()
output = model(x)
grad_x = grad(output.sum(), x, retain_graph=True)[0]
loss_y = nn.MSELoss()(output, y)
loss_z = nn.MSELoss()(grad_x.sum(dim=-1).unsqueeze(-1), z)
loss = loss_y + loss_z
loss.backward(retain_graph=True)
optimizer.step()
print('Loss_y = {:.4f} | Loss_z = {:.4f}.'.format(loss_y.item(), loss_z.item()))
I check the grad_fn of variable loss_z
,find loss_y.grad_fn = <MseLossBackward object at 0x0000024F2AB8DF98>
,but loss_z.grad_fn = None
.So although loss_z
decreases,this means the loss of the derivation of output doesn’t participate in the gradient decent.Maybe just the model predicts y
very well,so it can predict z
well.If the dataset is not as easy as this form,loss_z
even doesn’t decrease.
So how to predict the derivation of the output correctly?
Hi @sakuraiiiii,
TLDR: You need to use the create graph functionality in the grad function.
That is, changing from the below line
to
grad_x = grad(output.sum(), x, create_graph=True)[0]
Explanation:
Create graph=True makes the graph of the derivative to be constructed, allowing you to compute higher order derivatives. The grad tensors hence will get a backward function attached to them.
1 Like
Thank you very much.But if I use a little difficult form like the following:
import torch
from torch import nn
from torch.autograd import grad
class net(nn.Module):
def __init__(self):
super(net, self).__init__()
self.lin1 = nn.Linear(3, 300)
self.lin2 = nn.Linear(300, 1)
def forward(self, p):
x = self.lin1(p)
x = nn.ReLU()(x)
return self.lin2(x)
x = torch.randn(50000, 3)
y = (5 * torch.sin(x) + 3 * torch.cos(x) + 4 * torch.sin(2 * x)).sum(dim=-1).unsqueeze(-1)
z = (5 * torch.cos(x) - 3 * torch.sin(x) + 4 * torch.cos(2 * x)).sum(dim=-1).unsqueeze(-1)
model = net()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)
for epoch in range(1000):
model.train()
x.requires_grad = True
optimizer.zero_grad()
output = model(x)
grad_x = grad(output.sum(), x, retain_graph=True, create_graph=True)[0]
# loss_y = nn.MSELoss()(output, y)
# print(loss_y.grad_fn) # <MseLossBackward object at 0x0000028D1D801400>
loss_z = nn.MSELoss()(grad_x.sum(dim=-1).unsqueeze(-1), z)
# print(loss_z.grad_fn) # None
# loss = loss_y + loss_z
loss_z.backward()
optimizer.step()
print('Loss_z = {:.4f}.'.format(loss_z.item()))
# print('Loss_y = {:.4f} | Loss_z = {:.4f}.'.format(loss_y.item(), loss_z.item()))
The result is:
Loss_z = 196.2179.
Loss_z = 190.8175.
Loss_z = 185.5588.
Loss_z = 180.4273.
Loss_z = 175.4291.
...
Loss_z = 37.6990.
Loss_z = 37.7005.
Loss_z = 37.7033.
Loss_z = 37.7058.
Loss_z = 37.7021.
Loss_z = 37.7042.
Loss_z = 37.7074.
Loss_z = 37.7154.
Loss_z = 37.7073.
Loss_z = 37.7076.
Loss_z = 37.7042.
Loss_z = 37.7040.
Loss_z = 37.7000.
Loss_z = 37.7041.
Loss_z = 37.7037.
Loss_z = 37.7032.
Loss_z = 37.7038.
Loss_z = 37.7071.
Loss_z = 37.6989.
Loss_z = 37.7091.
Loss_z = 37.7040.
Loss_z = 37.7026.
Loss_z = 37.7024.
Loss_z = 37.7001.
Loss_z = 37.7006.
Loss_z = 37.7027.
Loss_z = 37.7000.
Loss_z = 37.7022.
Loss_z = 37.7021.
Loss_z = 37.7042.
Loss_z = 37.7000.
Loss_z = 37.6954.
Loss_z = 37.6866.
Loss_z = 37.6955.
Loss_z = 37.6954.
Loss_z = 37.6983.
Loss_z = 37.6968.
Loss_z = 37.6983.
Loss_z = 37.6944.
loss_z
doesn’t vary.I don’t know whether this is the problem of the code or just need to adjust the parameters of machine learning.