Bayesian PINN: error on autograd for physics loss computation

Hi all!
I’m having some problems in testing a PINN on a damped oscillator problem with Bayesian framework. I added the data and physics losses computation in the forward method. However, if I execute the code, I got ‘RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn’ on the dy_dx = torch.autograd.grad(y_pred_physics, xCP, torch.ones_like(y_pred_physics), create_graph=True)[0] line.

Here’s the code to reproduce the error:

import torch
import torch.nn as nn
import numpy as np
import numpy.random as npr
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
from pyro.infer import MCMC, NUTS, Predictive

# algebra and data handle
from scipy.stats import norm
import numpy as np
import numpy.random as npr
import glob, time


# 0a. General BNN
class BFCN(PyroModule):
    """a. Defines the architecture of a fully connected network: N_INPUT, N_OUTPUT, N_HIDDEN, N_LAYERS
       b. Defines the activation function involved in each node_l_i to node_l_i+1 
    """
    
    def __init__(self, N_INPUT, N_OUTPUT, N_HIDDEN, N_LAYERS, PRIOR_SCALE=5.):
        super().__init__()
        self.activation = nn.Tanh()
        assert N_INPUT > 0 and N_OUTPUT > 0 and N_HIDDEN > 0 and N_LAYERS > 0  # make sure the dimensions are valid

        # Define the layer sizes and the PyroModule layer list
        self.layer_sizes = [N_INPUT] + N_LAYERS * [N_HIDDEN] + [N_OUTPUT]

        layer_list = [PyroModule[nn.Linear](self.layer_sizes[idx - 1], self.layer_sizes[idx]) for idx in
                      range(1, len(self.layer_sizes))]
        self.layers = PyroModule[torch.nn.ModuleList](layer_list)

        for layer_idx, layer in enumerate(self.layers):
            layer.weight = PyroSample(dist.Normal(0., PRIOR_SCALE * np.sqrt(2 / self.layer_sizes[layer_idx])).expand(
                [self.layer_sizes[layer_idx + 1], self.layer_sizes[layer_idx]]).to_event(2))
            layer.bias = PyroSample(dist.Normal(0., PRIOR_SCALE).expand([self.layer_sizes[layer_idx + 1]]).to_event(1))
        
        # Define sigma as a parameter with unique name
        self.sigma = PyroSample(dist.Gamma(0.5, 1))

    def _predict(self, x):
        """ Compute predictions with the model """
        x = x.reshape(-1, 1)
        x = self.activation(self.layers[0](x))  # input --> hidden
        for layer in self.layers[1:-1]:
            x = self.activation(layer(x))  # hidden --> hidden
        mu_pred = self.layers[-1](x).squeeze()  # hidden --> output
        #sigma = pyro.sample("sigma", dist.Gamma(.5, 1))  # infer the response noise     
        return mu_pred.view(-1,1)


    def forward(self, x, yD, xCP, d, w0):
        # Compute predictions for the data loss
        mu_pred = self._predict(x)

        # Compute data loss (MSE) if yD is provided
        data_loss = torch.tensor(0.0)
        if yD is not None:
            data_loss = torch.mean((mu_pred - yD) ** 2)
    
        # Compute physics-informed loss if xCP, d, and w0 are provided
        xCP = xCP.view(-1,1).requires_grad_(True).to('cpu')
        #y_pred_physics = y_pred_physics.requires_grad_(True)
        physics_loss = torch.tensor(0.0, requires_grad=True)
        if xCP is not None and d is not None and w0 is not None:
            y_pred_physics = self._predict(xCP)
            dy_dx          = torch.autograd.grad(y_pred_physics, xCP, torch.ones_like(y_pred_physics), create_graph=True)[0]
            d2y_dx2        = torch.autograd.grad(dy_dx, xCP, torch.ones_like(dy_dx), create_graph=True)[0]
            physics_residual = d2y_dx2 + 2 * d * dy_dx + w0**2 * y_pred_physics
            # Physics-informed loss (MSE of the residuals)
            physics_loss = (1e-4) * torch.mean(physics_residual**2)
        
        # Return the combined loss
        total_loss = data_loss + physics_loss
        
        return total_loss, mu_pred

def oscillator(d, w0, t):
    """Defines the analytical solution to the 1D underdamped harmonic oscillator problem. 
    Equations taken from: https://beltoforion.de/en/harmonic_oscillator/"""
    assert d < w0
    w = np.sqrt(w0**2-d**2)
    phi = np.arctan(-d/w)
    A = 1/(2*np.cos(phi))
    cos = torch.cos(phi+w*t)
    sin = torch.sin(phi+w*t)
    exp = torch.exp(-d*t)
    y  = exp*2*A*cos
    return y

def f_SOL(t,d,w0):
    return oscillator(d, w0, t)

torch.set_default_dtype(torch.float64)
device = 'cpu'

n_input, n_output, n_hidden, n_layers, prior_scale = 1,1,3,32,2
model = BFCN(n_input, n_output, n_hidden, n_layers, prior_scale)

# Define Hamiltonian Monte Carlo (HMC) kernel
# NUTS = "No-U-Turn Sampler" (https://arxiv.org/abs/1111.4246), gives HMC an adaptive step size
nuts_kernel = NUTS(model, jit_compile=False)  # jit_compile=True is faster but requires PyTorch 1.6+

# Define MCMC sampler, get 50 posterior samples
mcmc = MCMC(nuts_kernel, num_samples=50)

# Example values for mu and k
d, w0 = 2, 20
mu, k = 2*d, w0**2 
t = torch.linspace(0,1,1000).view(-1,1)
u = f_SOL(t,d,w0)

## DATA
M  = 32
e  = 0.1
lb, ub = 0, 0.60
tD = lb + (ub - lb) * npr.rand(M)
tD = torch.from_numpy(tD)
tD = tD.to(device).view(-1,1)
tD, _ = tD.sort(dim=0, descending=False)
yD = f_SOL(tD,d,w0) + norm(0, e).rvs(M).reshape(-1, 1)
yD = yD.to(device).view(-1,1)

## CONTROL POINTs
N   = 128
lb, ub  = 0., 1.
tCP = lb + (ub - lb) * torch.linspace(0.,1.,N)
tCP = tCP.to(device).view(-1,1)
tCP, _ = tCP.sort(dim=0, descending=False)
yCP = f_SOL(tCP,d,w0)
yCP = yCP.to(device).view(-1,1)

# Convert data to PyTorch tensors
x_train = tD
y_train = yD
x_physics = tCP; #x_physics = x_physics.view(-1, 1).requires_grad_(True)  # example physics points

# Run MCMC with physics-informed loss
mcmc.run(x_train, y_train, x_physics, d=d, w0=w0)

I try to force and check if xCP has ‘required_grad_(True)’ active (yes) but it does not resolve the issue. I was expecting to derive smoothly, as with simple PINNs.