Hello to all,
I am trying to learn physics informed neural networks.
I have created an example based on: Diffusion equation — DeepXDE 1.8.3.dev7+g4733e0e documentation.
However, when I use the L-BFGS
optimizer the loss function does not decrease anymore (stays exactly with the same value). Is this normal? If not, can anyone give me a hand with debugging?
Suggestion regarding code structure are also very welcome!
#%% modules
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from pyDOE import lhs
#PyTorch random number generator
torch.manual_seed(1234)
np.random.seed(1234)
#%% Network
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Working on {device}")
class PINN(nn.Module):
def __init__(self, layers,
t0, x0, y0,
t_lb, x_lb, y_lb,
t_ub, x_ub, y_ub,
t_domain, x_domain):
super(PINN, self).__init__()
self.activation = nn.Tanh()
self.layers = len(layers)
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
self.t0 = t0
self.x0 = x0
self.y0 = y0
self.t_lb = t_lb
self.x_lb = x_lb
self.y_lb = y_lb
self.t_ub = t_ub
self.x_ub = x_ub
self.y_ub = y_ub
self.t_domain = t_domain
self.x_domain = x_domain
self.optimizer = None
self.train_loss_history = []
def forward(self, X):
if torch.is_tensor(X) != True:
X = torch.from_numpy(X)
a = X.float()
for i in range(self.layers - 2):
z = self.linears[i](a)
a = self.activation(z)
a = self.linears[-1](a)
return a
def network_prediction(self, t, x):
return self.forward(torch.cat((t, x), 1))
def PDE_prediction(self, t, x):
# Compute the differential equation
N = self.network_prediction(t, x)
dN_dt = self.get_derivative(N, t, 1)
dN_dxx = self.get_derivative(N, x, 2)
f = dN_dt - dN_dxx + torch.exp(-t)*(torch.sin(np.pi*x) - np.pi*np.pi*torch.sin(np.pi*x))
return f
def get_derivative(self, y, x, n):
# General formula to compute the n-th order derivative of y = f(x) with respect to x
if n == 0:
return y
else:
dy_dx = torch.autograd.grad(y, x, torch.ones_like(y).to(device), create_graph=True, retain_graph=True, allow_unused=True)[0]
return self.get_derivative(dy_dx, x, n - 1)
def loss_IC(self):
y_pred_IC = self.network_prediction(self.t0, self.x0)
mse_IC = torch.mean((self.y0 - y_pred_IC)**2)
return mse_IC
def loss_BC(self):
y_pred_BC1 = self.network_prediction(self.t_lb, self.x_lb)
mse_BC1 = torch.mean((self.y_lb - y_pred_BC1)**2)
y_pred_BC2 = self.network_prediction(self.t_ub, self.x_ub)
mse_BC2 = torch.mean((self.y_ub - y_pred_BC2)**2)
mse_BC = mse_BC1 + mse_BC2
return mse_BC
def loss_interior(self):
y_pred = self.PDE_prediction(self.t_domain, self.x_domain)
mse_interior = torch.mean((y_pred)**2)
return mse_interior
def loss_func(self):
mse_IC = self.loss_IC()
mse_BC = self.loss_BC()
mse_domain = self.loss_interior()
return mse_IC, mse_BC, mse_domain
def closure(self):
self.optimizer.zero_grad()
mse_0, mse_b, mse_f = self.loss_func()
total_loss = mse_0 + mse_b + mse_f
total_loss.backward(retain_graph=True)
return total_loss
def train(self, epochs, optimizer='Adam', **kwargs):
if optimizer=='Adam':
self.optimizer = torch.optim.Adam(self.parameters(), **kwargs)
elif optimizer=='L-BFGS':
self.optimizer = torch.optim.LBFGS(self.parameters(), **kwargs)
# Training loop
for epoch in range(epochs):
mse_IC, mse_BC, mse_domain = self.loss_func()
total_loss = mse_IC + mse_BC + mse_domain
self.train_loss_history.append([total_loss.cpu().detach(), mse_IC.cpu().detach(), mse_BC.cpu().detach(), mse_domain.cpu().detach()])
self.optimizer.step(self.closure)
if epoch % 100 == 0:
print(f'Epoch ({optimizer}): {epoch}, Total Loss: {total_loss.detach().cpu().numpy()}')
def get_training_history(self):
loss_his = np.array(self.train_loss_history)
total_loss, mse_IC, mse_BC, mse_domain = np.split(loss_his, 4, axis=1)
return total_loss, mse_IC, mse_BC, mse_domain
#%% Data
def exactSolution(t, x):
return np.exp(-t)*np.sin(np.pi*x)
x = np.linspace(-1, 1, 200).reshape(-1,1) # Space domain
t = np.linspace(0, 1, 100).reshape(-1,1) # Time domain
X, T = np.meshgrid(x[:, 0], t[:, 0]) # space-time domain
y_true = exactSolution(T, X)
# Lower and upper bound of the space-time domain
lb = np.zeros((1,2))
ub = np.zeros((1,2))
lb[0, 0] = t[0]; lb[0, 1] = x[0]
ub[0, 0] = t[-1]; ub[0, 1] = x[-1]
# Number of Points to sample
NPoints_t0 = 100
NPoinst_bc = 100
NPoints_domain = 10000
idx_x = np.random.choice(x.shape[0], NPoints_t0, replace=False)
x0 = x[idx_x, :]
X0 = np.column_stack((x0[:, 0], np.zeros(len(x0))))
Y0 = np.sin(np.pi*x[:,0])[:, None]
Y0 = Y0[idx_x, :]
# Boundary Condition
idx_t = np.random.choice(t.shape[0], NPoinst_bc, replace=False)
BC_1_x_t = np.column_stack((np.ones(t.shape[0])*x[0], t))[idx_t]
BC_1_y = np.zeros((x.shape[0], 1))[idx_t]
BC_2_x_t = np.column_stack((np.ones(t.shape[0])*x[-1], t))[idx_t]
BC_2_y = np.zeros((x.shape[0], 1))[idx_t]
# Create collocation points with latin hypercube sampling
X_T_domain = lb + (ub - lb) * lhs(2, NPoints_domain)
#%% Network
layers = [2, 32, 32, 1]
x0 = torch.tensor(X0[:, 0], requires_grad=True).view(-1,1).float().to(device)
t0 = torch.tensor(X0[:, 1], requires_grad=True).view(-1,1).float().to(device)
y0 = torch.tensor(Y0).float().to(device)
x_lb = torch.tensor(BC_1_x_t[:, 0], requires_grad=True).view(-1,1).float().to(device)
t_lb = torch.tensor(BC_1_x_t[:, 1], requires_grad=True).view(-1,1).float().to(device)
y_lb = torch.tensor(BC_1_y).float().to(device)
x_ub = torch.tensor(BC_2_x_t[:, 0], requires_grad=True).view(-1,1).float().to(device)
t_ub = torch.tensor(BC_2_x_t[:, 1], requires_grad=True).view(-1,1).float().to(device)
y_ub = torch.tensor(BC_2_y).float().to(device)
x_domain = torch.tensor(X_T_domain[:, 0], requires_grad=True).view(-1,1).float().to(device)
t_domain = torch.tensor(X_T_domain[:, 1], requires_grad=True).view(-1,1).float().to(device)
model = PINN(layers, t0, x0, y0, t_lb, x_lb, y_lb, t_ub, x_ub, y_ub, t_domain, x_domain).to(device)
#%% Train
model.train(2500, optimizer='Adam', lr=1e-3)
model.train(500, optimizer='L-BFGS')