First and second order derivative with respect to Input inside a custom loss function

Hi, I’m trying to approximate a solution to a PDE and for that I need to compute 1st and 2nd order derivatives with respect to specific input values in my training data batch.

I defined a custom loss function:

def ODE(x,y,f_init):
    dydx, = torch.autograd.grad(y, x, 
    grad_outputs=torch.ones_like(y),
    create_graph=True, retain_graph=True, allow_unused=True)

    d2ydx2 = torch.autograd.grad(dydx, x, 
    grad_outputs=torch.ones_like(dydx),
    create_graph=True, retain_graph=True, allow_unused=True)[0]

    eq  = 4*dydx[:,1] - d2ydx2[:,0]

    bc1_inp = torch.zeros((X_train.shape))
    bc1_inp[:,1] = X_train[:,1]
    bc1 = model(bc1_inp) 
    
    bc2_inp = torch.ones((X_train.shape))*2.
    bc2_inp[:,1] = X_train[:,1]
    bc2 = model(bc2_inp)

    ic_inp = torch.zeros((X_train.shape))
    ic_inp[:,0] = X_train[:,0]
    ic = model(ic_inp) - f_init

    return torch.mean(eq**2 + bc1**2 + bc2**2 + ic**2), dydx

When I try to run the backprop, I get the error:

RuntimeError: leaf variable has been moved into the graph interior

Attaching my complete code below:

# Define the NN model to solve the problem
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.lin1 = nn.Linear(2,10)
        self.lin2 = nn.Linear(10,1)

    def forward(self, x):
        x = torch.sigmoid(self.lin1(x))
        x = self.lin2(x)
        return x

model = Model()

# Define loss_function from the Ordinary differential equation to solve
def ODE(x,y,f_init):
    dydx, = torch.autograd.grad(y, x, 
    grad_outputs=torch.ones_like(y),
    create_graph=True, retain_graph=True, allow_unused=True)

    d2ydx2 = torch.autograd.grad(dydx, x, 
    grad_outputs=torch.ones_like(dydx),
    create_graph=True, retain_graph=True, allow_unused=True)[0]

    eq  = 4*dydx[:,1] - d2ydx2[:,0]

    bc1_inp = torch.zeros((X_train.shape))
    bc1_inp[:,1] = X_train[:,1]
    bc1 = model(bc1_inp) 
    
    bc2_inp = torch.ones((X_train.shape))*2.
    bc2_inp[:,1] = X_train[:,1]
    bc2 = model(bc2_inp)

    ic_inp = torch.zeros((X_train.shape))
    ic_inp[:,0] = X_train[:,0]
    ic = model(ic_inp) - f_init

    return torch.mean(eq**2 + bc1**2 + bc2**2 + ic**2), dydx

loss_func = ODE

# Define the optimization
# opt = optim.SGD(model.parameters(), lr=0.1, momentum=0.99,nesterov=True) # Equivalent to blog
opt = optim.Adam(model.parameters(),lr=0.1,amsgrad=True) # Got faster convergence with Adam using amsgrad

# Define reference grid 
x_data = X_train

# Iterative learning
epochs = 1000
for epoch in range(epochs):
    opt.zero_grad()
    y_trial = model(x_data)
    loss, dydx = loss_func(x_data, y_trial, f_init)

    loss.backward()
    opt.step()

    if epoch % 100 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

X_train has dimensions (16,2) and f_init has dimensions (16,1). I created them using the following code:

import numpy as np
X_train = torch.zeros((16,2), requires_grad=True)
for i in range(16):
    x_rand = np.random.randint(x.shape[0])
    t_rand = np.random.randint(t.shape[0])
    X_train[i,0] = x[x_rand]
    X_train[i,1] = t[t_rand]

f_init = 2*torch.sin(np.pi*X_train[:,0]/2)-torch.sin(np.pi*X_train[:,0])+4*torch.sin(2*np.pi*X_train[:,0])
f_init = f_init.view(16,1) # reshaping the tensor

This is how they look like:

I can’t understand what I’m missing here. I hope someone can help me here.
Thanks