Please explain why I got this error: RuntimeError: One of the differentiated Tensors appears to not have been used in the graph

I am trying to solve Fokker-Planck equation of 2d duffing oscillator with neural network. And here is my code: In the middle, g1_x1, g1_x2 matters. I marked them up with ***.

Blockquote

import numpy as np
from numpy.linalg import norm

import torch
from torch.utils.data import Dataset
from torch.autograd import grad
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import argparse
import os
from matplotlib import pyplot as plt

import scipy
import pandas as pd

# Data generation for domain and boundary
def get_domain_data(dt=0.01):
    x1 = np.arange(-2 + dt, 2, dt)
    x2 = np.arange(-2 + dt, 2, dt)
    X1, X2 = np.meshgrid(x1, x2)
    return np.stack([X1.ravel(), X2.ravel()], axis=-1)

def get_boundary_data():
    x1 = np.array([-2, 2])
    x2 = np.array([-2, 2])
    boundary_data = np.array(np.meshgrid(x1, x2)).T.reshape(-1, 2)
    return np.unique(boundary_data, axis=0)


class PdeDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Neural Network definition
class NeuralNet(nn.Module):
     def __init__(self, input_size, node_num, output_size):
         super(NeuralNet, self).__init__()
         self.layer = nn.Sequential(
             nn.Linear(input_size, node_num),
             nn.Sigmoid(),
             nn.Linear(node_num, node_num),
             nn.Sigmoid(),
             nn.Linear(node_num, node_num),
             nn.Sigmoid(),
             nn.Linear(node_num, output_size)
         )

     def forward(self, x):
         return self.layer(x)

def grad_net(y, x1, x2, order=1):
    # Create weights tensor
    weights = torch.ones_like(y) # Set the all weights to be 1.

    if order == 1:
        # Compute first-order derivatives
        g1_x1 = grad(outputs=y, inputs=x1, grad_outputs=weights, create_graph=True)[0]
        g1_x2 = grad(outputs=y, inputs=x2, grad_outputs=weights, create_graph=True)[0]
        return g1_x1, g1_x2

    elif order == 2:
        # Compute second-order derivatives
        g1_x1 = grad(outputs=y, inputs=x1, grad_outputs=weights, create_graph=True)[0]
        g1_x2 = grad(outputs=y, inputs=x2, grad_outputs=weights, create_graph=True)[0]
        g2_x1x1 = grad(outputs=g1_x1, inputs=x1, grad_outputs=weights, create_graph=True)[0]
        #g2_x1x2 = grad(outputs=g1_x1, inputs=x2, grad_outputs=weights, create_graph=True)[0]
        g2_x2x2 = grad(outputs=g1_x2, inputs=x2, grad_outputs=weights, create_graph=True)[0]
        return g2_x1x1, g2_x2x2

    else:
        raise NotImplementedError

model_save_path = 'model_save'
if not os.path.exists(model_save_path):
    os.mkdir(model_save_path)

parser = argparse.ArgumentParser()
parser.add_argument('--lr', type=float, default=3e-2)
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--num_epoch', type=int, default=10000)
parser.add_argument('--node_num', type=int, default=20)
parser.add_argument('--dt', type=float, default=0.01)
parser.add_argument('--sigma', type=float, default=0.1) # in fact, sigma squared.
parser.add_argument('--alpha', type=float, default=1.0)
parser.add_argument('--beta', type=float, default=0.2)
parser.add_argument('--eta', type=float, default=0.2)     # eta
parser.add_argument('--phase', type=str, default='grad_with_adams')
parser.add_argument('-f')

config, _ = parser.parse_known_args()
print(config)



domain_data = get_domain_data()
boundary_data = get_boundary_data()

domain_dataset = PdeDataset(domain_data)
boundary_dataset = PdeDataset(boundary_data)

domain_loader = DataLoader(dataset=domain_dataset, batch_size=config.batch_size, shuffle=True)
boundary_loader = DataLoader(dataset=boundary_dataset, batch_size=config.batch_size, shuffle=False)

model = NeuralNet(2, config.node_num, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

# Training setup
model = NeuralNet(2, config.node_num, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)


# Function to define the dynamics of the system
f_func = lambda x1, x2: - config.eta * x2 - config.alpha * x1 - config.beta * (x1**3)
d_f_func_x2 = lambda x2: - config.eta


for epoch in tqdm(range(config.num_epoch)):
    domain_loss = 0.0
    boundary_loss = 0.0
    for domain_data, boundary_data in zip(domain_loader, boundary_loader):
        var_domain = domain_data.requires_grad_()
        var_boundary = boundary_data.requires_grad_()


        optimizer.zero_grad()
        out_domain = model(var_domain)
        out_boundary = model(var_boundary)


        dx = config.dt  

        f_value = f_func(var_domain[:, 0],var_domain[:, 1])
        df_value = d_f_func_x2(var_domain[:, 1])

       **** g1_x1, g1_x2 = grad_net(out_domain, var_domain[:, 0], var_domain[:, 1], order=1)
        g2_x1x1, g2_x2x2 = grad_net(out_domain, var_domain[:, 0], var_domain[:, 1], order=2) ***** The part where the error occurs


        f_term = var_domain[:, 1] * g1_x1 + df_value*out_domain + g1_x2 * f_value
        diffusion_term = (config.sigma / 2) * g2_x2x2

        e1 = torch.mean((-f_term + diffusion_term) ** 2)

        e2 = (torch.abs(out_domain * (dx**2).sum() - 1))**2


        e3 = ((out_boundary - 1)**2).mean()

        loss = 400*e1 + 0.1*e2 + e3
        loss.backward()
        optimizer.step()
        domain_loss += e1.item()
        boundary_loss += e3.item()

    if epoch % 100 == 0:
        print(f'Epoch {epoch}: Loss = {loss.item()}')

# Save the model
torch.save(model.state_dict(), 'duffing_oscillator_model.pth')

And I got the following error:

Blockquote

RuntimeError                              Traceback (most recent call last)
<ipython-input-19-697a79892dcc> in <cell line: 40>()
     77         df_value = d_f_func_x2(var_domain[:, 1])
     78 
---> 79         g1_x1, g1_x2 = grad_net(out_domain, var_domain[:, 0], var_domain[:, 1], order=1)
     80         g2_x1x1, g2_x2x2 = grad_net(out_domain, var_domain[:, 0], var_domain[:, 1], order=2)
     81 

1 frames
/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py in grad(outputs, inputs, grad_outputs, retain_graph, create_graph, only_inputs, allow_unused, is_grads_batched, materialize_grads)
    409         )
    410     else:
--> 411         result = Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    412             t_outputs,
    413             grad_outputs_,

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

I have no idea how to fix this. Can anyone troubleshoot my code?