PINN toy-example with pytorch

Hello to all,

I am trying to learn physics informed neural networks.

I have created an example based on: Diffusion equation — DeepXDE 1.8.3.dev7+g4733e0e documentation.

However, when I use the L-BFGS optimizer the loss function does not decrease anymore (stays exactly with the same value). Is this normal? If not, can anyone give me a hand with debugging?

Suggestion regarding code structure are also very welcome!

#%% modules
import torch
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt 

from pyDOE import lhs 

#PyTorch random number generator
torch.manual_seed(1234)
np.random.seed(1234)

#%% Network
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Working on {device}")

class PINN(nn.Module):
    def __init__(self, layers, 
                 t0, x0, y0, 
                 t_lb, x_lb, y_lb, 
                 t_ub, x_ub, y_ub, 
                 t_domain, x_domain):
        
        super(PINN, self).__init__() 
              
        self.activation = nn.Tanh()
        
        self.layers = len(layers)
        self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
        
        self.t0 = t0
        self.x0 = x0
        self.y0 = y0
        
        self.t_lb = t_lb
        self.x_lb = x_lb
        self.y_lb = y_lb
        
        self.t_ub = t_ub
        self.x_ub = x_ub
        self.y_ub = y_ub
        
        self.t_domain = t_domain
        self.x_domain = x_domain
        
        self.optimizer = None
        self.train_loss_history = []


    def forward(self, X):
        if torch.is_tensor(X) != True:         
            X = torch.from_numpy(X)                
        
        a = X.float()
        
        for i in range(self.layers - 2):           
            z = self.linears[i](a)                       
            a = self.activation(z)
            
        a = self.linears[-1](a)
        
        return a
    
    def network_prediction(self, t, x):
        return self.forward(torch.cat((t, x), 1))
    
    def PDE_prediction(self, t, x):
        # Compute the differential equation
        N = self.network_prediction(t, x)
        dN_dt = self.get_derivative(N, t, 1)
        dN_dxx = self.get_derivative(N, x, 2)
        f = dN_dt - dN_dxx + torch.exp(-t)*(torch.sin(np.pi*x) - np.pi*np.pi*torch.sin(np.pi*x))
        return f
    
    def get_derivative(self, y, x, n):
        # General formula to compute the n-th order derivative of y = f(x) with respect to x
        if n == 0:
            return y
        else:
            dy_dx = torch.autograd.grad(y, x, torch.ones_like(y).to(device), create_graph=True, retain_graph=True, allow_unused=True)[0]
        return self.get_derivative(dy_dx, x, n - 1)
    

    def loss_IC(self):
        y_pred_IC = self.network_prediction(self.t0, self.x0)
        mse_IC = torch.mean((self.y0 - y_pred_IC)**2)
        return mse_IC
    
    def loss_BC(self):
        y_pred_BC1 = self.network_prediction(self.t_lb, self.x_lb)
        mse_BC1 = torch.mean((self.y_lb - y_pred_BC1)**2)
        
        y_pred_BC2 = self.network_prediction(self.t_ub, self.x_ub)
        mse_BC2 = torch.mean((self.y_ub - y_pred_BC2)**2)
        
        mse_BC = mse_BC1 + mse_BC2
        return mse_BC

    def loss_interior(self):
        y_pred = self.PDE_prediction(self.t_domain, self.x_domain)
        mse_interior = torch.mean((y_pred)**2)
        
        return mse_interior

    def loss_func(self):

        mse_IC = self.loss_IC()
        mse_BC = self.loss_BC()
        mse_domain = self.loss_interior()

        return mse_IC, mse_BC, mse_domain

    def closure(self):
        self.optimizer.zero_grad()
        mse_0, mse_b, mse_f = self.loss_func()
        total_loss = mse_0 + mse_b + mse_f
        total_loss.backward(retain_graph=True)
        return total_loss

    def train(self, epochs, optimizer='Adam', **kwargs):

        if optimizer=='Adam':
            self.optimizer = torch.optim.Adam(self.parameters(), **kwargs)

        elif optimizer=='L-BFGS':
            self.optimizer = torch.optim.LBFGS(self.parameters(), **kwargs)

        # Training loop
        for epoch in range(epochs):
            mse_IC, mse_BC, mse_domain = self.loss_func()
            total_loss = mse_IC + mse_BC + mse_domain
            
            self.train_loss_history.append([total_loss.cpu().detach(), mse_IC.cpu().detach(), mse_BC.cpu().detach(), mse_domain.cpu().detach()])

            self.optimizer.step(self.closure)

            if epoch % 100 == 0:
                print(f'Epoch ({optimizer}): {epoch}, Total Loss: {total_loss.detach().cpu().numpy()}')
            
    def get_training_history(self):
        loss_his = np.array(self.train_loss_history)
        total_loss, mse_IC, mse_BC, mse_domain = np.split(loss_his, 4, axis=1)
        return total_loss, mse_IC, mse_BC, mse_domain

#%% Data
def exactSolution(t, x):
    return np.exp(-t)*np.sin(np.pi*x)

x = np.linspace(-1, 1, 200).reshape(-1,1) # Space domain
t = np.linspace(0, 1, 100).reshape(-1,1) # Time domain

X, T = np.meshgrid(x[:, 0], t[:, 0]) # space-time domain

y_true = exactSolution(T, X)

# Lower and upper bound of the space-time domain
lb = np.zeros((1,2))
ub = np.zeros((1,2))
lb[0, 0] = t[0]; lb[0, 1] = x[0]
ub[0, 0] = t[-1]; ub[0, 1] = x[-1]

# Number of Points to sample
NPoints_t0 = 100
NPoinst_bc = 100
NPoints_domain = 10000

idx_x = np.random.choice(x.shape[0], NPoints_t0, replace=False)
x0 = x[idx_x, :]
X0 = np.column_stack((x0[:, 0], np.zeros(len(x0))))
Y0 = np.sin(np.pi*x[:,0])[:, None]
Y0 = Y0[idx_x, :]

# Boundary Condition
idx_t = np.random.choice(t.shape[0], NPoinst_bc, replace=False)

BC_1_x_t = np.column_stack((np.ones(t.shape[0])*x[0], t))[idx_t]
BC_1_y = np.zeros((x.shape[0], 1))[idx_t]

BC_2_x_t = np.column_stack((np.ones(t.shape[0])*x[-1], t))[idx_t]
BC_2_y = np.zeros((x.shape[0], 1))[idx_t]

# Create collocation points with latin hypercube sampling
X_T_domain = lb + (ub - lb) * lhs(2, NPoints_domain)


#%% Network

layers = [2, 32, 32, 1]

x0 = torch.tensor(X0[:, 0], requires_grad=True).view(-1,1).float().to(device)
t0 = torch.tensor(X0[:, 1], requires_grad=True).view(-1,1).float().to(device)
y0 = torch.tensor(Y0).float().to(device)

x_lb = torch.tensor(BC_1_x_t[:, 0], requires_grad=True).view(-1,1).float().to(device)
t_lb = torch.tensor(BC_1_x_t[:, 1], requires_grad=True).view(-1,1).float().to(device)
y_lb = torch.tensor(BC_1_y).float().to(device)

x_ub = torch.tensor(BC_2_x_t[:, 0], requires_grad=True).view(-1,1).float().to(device)
t_ub = torch.tensor(BC_2_x_t[:, 1], requires_grad=True).view(-1,1).float().to(device)
y_ub = torch.tensor(BC_2_y).float().to(device)

x_domain = torch.tensor(X_T_domain[:, 0], requires_grad=True).view(-1,1).float().to(device)
t_domain = torch.tensor(X_T_domain[:, 1], requires_grad=True).view(-1,1).float().to(device)

model = PINN(layers, t0, x0, y0, t_lb, x_lb, y_lb, t_ub, x_ub, y_ub, t_domain, x_domain).to(device)

#%% Train
model.train(2500, optimizer='Adam', lr=1e-3)
model.train(500, optimizer='L-BFGS')

Try these tips

  1. The learning rate could be too small: Try experimenting with different learning rates for the L-BFGS optimizer. Although L-BFGS is a quasi-Newton optimization method that does not require learning rates, the torch.optim.LBFGS optimizer takes a learning rate parameter (lr). Increase the learning rate and see if the loss starts to decrease.
  2. Stuck in a local minimum: It is possible that the optimizer got stuck in a local minimum. This might be more likely if the loss has already decreased significantly during the Adam optimization phase.
  3. The L-BFGS optimizer may require more iterations: L-BFGS is a second-order optimization method, and it may need more iterations to make significant progress. Try increasing the number of epochs for the L-BFGS optimizer.
  4. Experiment with the history size: The L-BFGS optimizer takes a history size parameter (history_size). Try changing the history size and see if it affects the optimization process.