This is my code file, that I can’t seem to find the error for. The code is for a complicated problem but I believe the error can be solved by a small change. help on this is greatly appreciated!
import torch
from torch.optim.lr_scheduler import ExponentialLR
import numpy as np
import scipy.io
from scipy.interpolate import griddata
from scipy.spatial import distance
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from collections import OrderedDict
from matplotlib import cm
import os
from pyDOE import lhs
import time
np.random.seed(1234)
'''
if torch.cuda.is_available():
device = torch.device('cuda')
else:
device = torch.device('cpu')
'''
device = torch.device('cpu')
# The deep neural network
class DNN(torch.nn.Module):
def __init__(self, layers):
super(DNN, self).__init__()
# parameters
self.depth = len(layers) - 1
# set up layer order dict
self.activation = torch.nn.Tanh
layer_list = list()
for i in range(self.depth - 1):
layer_list.append(
('layer_%d' % i, torch.nn.Linear(layers[i], layers[i + 1]))
)
layer_list.append(('activation_%d' % i, self.activation()))
layer_list.append(
('layer_%d' % (self.depth - 1), torch.nn.Linear(layers[-2], layers[-1]))
)
layerDict = OrderedDict(layer_list)
# deploy layers
self.layers = torch.nn.Sequential(layerDict)
def forward(self, x):
out = self.layers(x)
return out
# The physics-guided neural network
class PhysicsInformedNN():
def __init__(self, X, u, X_f, X_val, u_val, layers, lb, ub):
#added X_f, X_val, u_val
# Bounds
self.lb = torch.tensor(lb).float().to(device)
self.ub = torch.tensor(ub).float().to(device)
# Data
self.x = torch.tensor(X[:, 0:1], requires_grad=True).float().to(device)
self.t = torch.tensor(X[:, 1:2], requires_grad=True).float().to(device)
self.u = torch.tensor(u).float().to(device)
self.x_f = torch.tensor(X_f[:, 0:1], requires_grad=True).float().to(device)
self.t_f = torch.tensor(X_f[:, 1:2], requires_grad=True).float().to(device)
#added lines below
self.u_pred = self.net_u(self.x, self.t)
self.f_pred, self.Phi_pred, self.u_t_pred = self.net_f(self.x_f, self.t_f, self.x_f.shape[0])
self.loss_u = torch.mean((self.u - self.u_pred) ** 2) #MSE
self.loss_f_coeff = torch.nn.Parameter(torch.tensor(0.0, dtype=torch.float32), requires_grad=True)
self.loss_f = self.loss_f_coeff * torch.mean(self.f_pred ** 2) #MSE
self.loss_lambda = 1e-7 * torch.norm(self.lambda1, p=1)
self.loss = torch.log(self.loss_u + self.loss_f + self.loss_lambda)
self.x_val = torch.tensor(X_val[:, 0:1], requires_grad=True).float().to(device)
self.t_val = torch.tensor(X_val[:, 1:2], requires_grad=True).float().to(device)
self.u_val = torch.tensor(u_val).float().to(device)
self.u_val_pred = self.net_u(self.x_val, self.t_val)
self.f_val_pred, _, _ = self.net_f(self.x_val, self.t_val, self.x_val.shape[0])
self.loss_u_val = torch.mean((self.u_val - self.u_val_pred) ** 2)
self.loss_f_val = torch.mean(self.f_val_pred ** 2)
self.loss_val = torch.log(self.loss_u_val + self.loss_f_val)
###
# Parameters
self.lambda1 = torch.zeros([16,1], requires_grad=True).to(device)
self.lambda1 = torch.nn.Parameter(self.lambda1)
# Deep neural network
self.dnn = DNN(layers).to(device)
self.dnn.register_parameter('lambda_1', self.lambda1)
#to adjust for var_list = var_list_1 and learning rate,
initial_lr = 1e-3
beta1 = 0.99
beta2 = 0.9
epsilon = 1e-8
self.optimizer = torch.optim.LBFGS(
self.loss,
self.dnn.parameters(),
lr=initial_lr, #starts at 1e-3 and decreases by 25% every 1000 iterations following an exponential decay
max_iter=1000,
max_eval=1000,
history_size=50,
#tolerance_grad=1e-5,
tolerance_change=1.0 * np.finfo(float).eps,
line_search_fn="strong_wolfe" # can be "strong_wolfe"
)
#Also to adjust for var_list = var_list_Pretrain and learning rate
self.optimizer_Pretrain = torch.optim.LBFGS(
self.loss,
self.dnn.parameters(),
lr=initial_lr, #starts at 1e-3 and decreases by 25% every 1000 iterations following an exponential decay
max_iter=1000,
max_eval=1000,
history_size=50,
#tolerance_grad=1e-5,
tolerance_change=1.0 * np.finfo(float).eps,
line_search_fn="strong_wolfe" # can be "strong_wolfe"
)
#to adjust for var_list = var_list_1
self.optimizer_Adam = torch.optim.Adam(
self.loss,
self.dnn.parameters(),
lr=initial_lr,
betas=(beta1, beta2),
eps=epsilon
)
self.iter = 0
self.torch_dict = {self.x_pt: self.x, self.t_pt: self.t, self.u_pt: self.u,
self.x_f_pt: self.x_f, self.t_f_pt: self.t_f,
self.x_val_pt: self.x_val, self.t_val_pt: self.t_val, self.u_val_pt: self.u_val}
def net_u(self, x, t):
u = self.dnn(torch.cat([x, t], dim=1))
return u
def net_f(self, x, t, N_f):
u = self.net_u(x, t)
u_t = torch.autograd.grad(
u, t,
grad_outputs=torch.ones_like(u),
retain_graph=True,
create_graph=True
)[0]
u_x = torch.autograd.grad(
u, x,
grad_outputs=torch.ones_like(u),
retain_graph=True,
create_graph=True
)[0]
u_xx = torch.autograd.grad(
u_x, x,
grad_outputs=torch.ones_like(u_x),
retain_graph=True,
create_graph=True
)[0]
u_xxx = torch.autograd.grad(
u_xx, x,
grad_outputs=torch.ones_like(u_xx),
retain_graph=True,
create_graph=True,
)[0]
Phi = torch.cat([torch.ones(N_f, 1, dtype=torch.float32),
u, u**2, u**3, u_x, u*u_x, u**2*u_x,
u**3*u_x, u_xx, u*u_xx, u**2*u_xx,
u**3*u_xx, u_xxx, u*u_xxx, u**2*u_xxx,
u**3*u_xxx], 1)
#print(Phi.shape) here gives N_f*16
self.library_description = ['1',
'u', 'u**2', 'u**3',
'u_x', 'u*u_x', 'u**2*u_x', 'u**3*u_x',
'u_xx', 'u*u_xx', 'u**2*u_xx', 'u**3*u_xx',
'u_xxx', 'u*u_xxx', 'u**2*u_xxx', 'u**3*u_xxx']
#Phi = torch.cat([u*u_x, u_xx], 1)
#But print(Phi.shape) here gives 250*2
f = torch.matmul(Phi, self.lambda1) - u_t
return f, Phi, u_t
def loss_func(self):
u_pred = self.net_u(self.x, self.t)
#f_pred, Phi, u_t = self.net_f(self.x, self.t)
f_pred, self.Phi_pred, self.u_t_pred = self.net_f(self.x_f, self.t_f, self.x_f.shape[0])
#loss = torch.mean(f_pred**2) + torch.mean((self.u - u_pred)**2)
#added lines below
loss_u = torch.mean((self.u - u_pred)**2)
loss_f = self.loss_f_coeff_pt * torch.mean(f_pred**2) #loss_f_coeff_pt will be called later
loss_lambda = 1e-7 * torch.norm(self.lambda1, p=1)
loss = torch.log(loss_u + loss_f + loss_lambda)
u_val_pred = self.net_u(self.x_val, self.t_val)
f_val_pred, _, _ = self.net_f(self.x_val, self.t_val, self.x_val.shape[0])
loss_u_val = torch.mean((self.u_val - u_val_pred)**2)
loss_f_val = torch.mean(f_val_pred**2)
loss_val = torch.log(loss_u_val + loss_f_val)
self.optimizer.zero_grad()
loss.backward()
loss_val.backward()
self.iter += 1
if self.iter % 100 == 0:
print(
'Iteration: %i, Loss: %e, l1: %.3f, l2: %.3f' %
(
self.iter,
loss.item(),
self.lambda1[0].item(),
self.lambda1[1].item(),
)
)
return loss
def train(self, nIter):
self.dnn.train()
for epoch in range(nIter):
u_pred = self.net_u(self.x, self.t)
f_pred, self.Phi_pred, self.u_t_pred = self.net_f(self.x_f, self.t_f, self.x_f.shape[0])
#f_pred, Phi, u_t = self.net_f(self.x, self.t)
u_val_pred = self.net_u(self.x_val, self.t_val)
#loss = torch.mean(f_pred**2) + torch.mean((self.u - u_pred)**2)
#added lines below
loss_u = torch.mean((self.u - u_pred)**2)
loss_f = self.loss_f_coeff_pt * torch.mean(f_pred**2)
loss_lambda = 1e-7 * torch.norm(self.lambda1, p=1)
loss = torch.log(loss_u + loss_f + loss_lambda)
u_val_pred = self.net_u(self.x_val, self.t_val)
f_val_pred, _, _ = self.net_f(self.x_val, self.t_val, self.x_val.shape[0])
loss_u_val = torch.mean((self.u_val - u_val_pred)**2)
loss_f_val = torch.mean(f_val_pred**2)
loss_val = torch.log(loss_u_val + loss_f_val)
self.optimizer_Adam.zero_grad()
loss.backward()
loss_val.backward()
self.optimizer_Adam.step()
if epoch % 1 == 0:
print(
'Epoch: %i, Loss: %e' %
(
epoch,
loss.item()
)
)
self.optimizer.step(self.loss_func)
def predict(self, X_star):
torch_dict = {self.x_pt: X_star[:, 0:1], self.t_pt: X_star[:, 1:2]}
u_star = self.u_pred(torch_dict)
return u_star
def callTrainSTRidge(self):
lam = 1e-5 #regularization parameter
d_tol = 1
maxit = 100
STR_iters = 10
l0_penalty = None
normalize = 2
split = 0.8
print_best_tol = False
Phi_pred = self.Phi_pred(self.torch_dict)
u_t_pred = self.u_t_pred(self.torch_dict)
lambda2 = self.TrainSTRidge(Phi_pred, u_t_pred, lam, d_tol, maxit, STR_iters, l0_penalty, normalize, split,
print_best_tol)
self.lambda1 = torch.tensor(lambda2, dtype=torch.float32)
def TrainSTRidge(self, R0, Ut, lam, d_tol, maxit, STR_iters = 10, l0_penalty = None, normalize = 2, split = 0.8,
print_best_tol = False):
# First normalize data
n,d = R0.shape
R = np.zeros((n,d), dtype=np.float32)
if normalize != 0:
Mreg = np.zeros((d,1))
for i in range(0,d):
Mreg[i] = 1.0/(np.linalg.norm(R0[:,i],normalize))
R[:,i] = Mreg[i]*R0[:,i]
normalize_inner = 0
else:
R = R0
Mreg = np.ones((d,1))*d
normalize_inner = 2
global lambda_normalized_history_STRidge
lambda_normalized_history_STRidge = np.append(lambda_normalized_history_STRidge, Mreg, axis = 1)
# Split data into 80% training and 20% test, then search for the best tolderance.
np.random.seed(0) # for consistancy
n,_ = R.shape
train = np.random.choice(n, int(n*split), replace = False)
test = [i for i in np.arange(n) if i not in train]
TrainR = R[train,:]
TestR = R[test,:]
TrainY = Ut[train,:]
TestY = Ut[test,:]
# Set up the initial tolerance and l0 penalty
d_tol = float(d_tol)
if self.it == 0:
self.tol = d_tol
# Or inherit Lambda, perform element-wise division with Mreg
w_best = self.lambda1/Mreg.detach().numpy()
# err_f = np.linalg.norm(TestY - TestR.dot(w_best), 2)
err_f = np.mean((TestY - TestR.dot(w_best))**2)
if l0_penalty == None and self.it == 0:
self.l0_penalty_0 = err_f
l0_penalty = self.l0_penalty_0
elif l0_penalty == None:
l0_penalty = self.l0_penalty_0
err_lambda = l0_penalty*np.count_nonzero(w_best)
err_best = err_f + err_lambda
tol_best = 0
global loss_history_STRidge
global loss_f_history_STRidge
global loss_lambda_history_STRidge
global tol_history_STRidge
loss_history_STRidge = np.append(loss_history_STRidge, err_best)
loss_f_history_STRidge = np.append(loss_f_history_STRidge, err_f)
loss_lambda_history_STRidge = np.append(loss_lambda_history_STRidge, err_lambda)
tol_history_STRidge = np.append(tol_history_STRidge, tol_best)
# Now increase tolerance until test performance decreases
for iter in range(maxit):
# Get a set of coefficients and error
w = self.STRidge(TrainR, TrainY, lam, STR_iters, self.tol, Mreg, normalize = normalize_inner)
# err_f = np.linalg.norm(TestY - TestR.dot(w), 2)
err_f = np.mean((TestY - TestR.dot(w))**2)
err_lambda = l0_penalty*np.count_nonzero(w)
err = err_f + err_lambda
# Has the accuracy improved?
if err <= err_best:
err_best = err
w_best = w
tol_best = self.tol
self.tol = self.tol + d_tol
loss_history_STRidge = np.append(loss_history_STRidge, err_best)
loss_f_history_STRidge = np.append(loss_f_history_STRidge, err_f)
loss_lambda_history_STRidge = np.append(loss_lambda_history_STRidge, err_lambda)
tol_history_STRidge = np.append(tol_history_STRidge, tol_best)
else:
self.tol = max([0,self.tol - 2*d_tol])
d_tol = d_tol/1.618
self.tol = self.tol + d_tol
if print_best_tol: print ("Optimal tolerance:", tol_best)
global optimaltol_history
optimaltol_history = np.append(optimaltol_history, tol_best)
return np.real(np.multiply(Mreg, w_best))
def STRidge(self, X0, y, lam, maxit, tol, Mreg, normalize = 2, print_results = False):
n,d = X0.shape
X = np.zeros((n,d), dtype=np.complex64)
# First normalize data
if normalize != 0:
Mreg = np.zeros((d,1))
for i in range(0,d):
Mreg[i] = 1.0/(np.linalg.norm(X0[:,i],normalize))
X[:,i] = Mreg[i]*X0[:,i]
else:
X = X0
# Inherit lambda
w = (self.lambda1)/Mreg
biginds = np.where(abs(w) > tol)[0]
num_relevant = d
global ridge_append_counter_STRidge
ridge_append_counter = 0
global lambda_history_STRidge
lambda_history_STRidge = np.append(lambda_history_STRidge, np.multiply(Mreg, w), axis = 1)
ridge_append_counter += 1
# Threshold and continue
for j in range(maxit):
# Figure out which items to cut out
smallinds = np.where(abs(w) < tol)[0]
new_biginds = [i for i in range(d) if i not in smallinds]
# If nothing changes then stop
if num_relevant == len(new_biginds): break
else: num_relevant = len(new_biginds)
if len(new_biginds) == 0:
if j == 0:
if normalize != 0:
lambda_history_STRidge = np.append(lambda_history_STRidge, w*Mreg, axis = 1)
ridge_append_counter += 1
ridge_append_counter_STRidge = np.append(ridge_append_counter_STRidge, ridge_append_counter)
return np.multiply(Mreg, w)
else:
lambda_history_STRidge = np.append(lambda_history_STRidge, w*Mreg, axis = 1)
ridge_append_counter += 1
ridge_append_counter_STRidge = np.append(ridge_append_counter_STRidge, ridge_append_counter)
return w
else: break
biginds = new_biginds
# Otherwise get a new guess
w[smallinds] = 0
if lam != 0:
w[biginds] = np.linalg.lstsq(X[:, biginds].T.dot(X[:, biginds]) + lam*np.eye(len(biginds)),X[:, biginds].T.dot(y))[0]
lambda_history_STRidge = np.append(lambda_history_STRidge, np.multiply(Mreg,w), axis = 1)
ridge_append_counter += 1
else:
w[biginds] = np.linalg.lstsq(X[:, biginds],y)[0]
lambda_history_STRidge = np.append(lambda_history_STRidge, np.multiply(Mreg,w), axis = 1)
ridge_append_counter += 1
# Now that we have the sparsity pattern, use standard least squares to get w
if biginds != []: w[biginds] = np.linalg.lstsq(X[:, biginds],y)[0]
if normalize != 0:
lambda_history_STRidge = np.append(lambda_history_STRidge, w*Mreg, axis = 1)
ridge_append_counter += 1
ridge_append_counter_STRidge = np.append(ridge_append_counter_STRidge, ridge_append_counter)
return np.multiply(Mreg, w)
else:
lambda_history_STRidge = np.append(lambda_history_STRidge, w*Mreg, axis = 1)
ridge_append_counter += 1
ridge_append_counter_STRidge = np.append(ridge_append_counter_STRidge, ridge_append_counter)
return w
if __name__ == "__main__":
start_time = time.time()
layers = [2, 20, 20, 20, 20, 20, 20, 20, 20, 1]
# load data
desktop_path = '/home/nbg22002/Desktop/Codes'
mat_file_path = os.path.join(desktop_path, 'burgers.mat')
# Load the MATLAB file
data = scipy.io.loadmat(mat_file_path)
t = np.real(data['t'].flatten()[:,None])
x = np.real(data['x'].flatten()[:,None])
Exact = np.real(data['usol']).T
X, T = np.meshgrid(x,t)
X_star = np.hstack((X.flatten()[:,None], T.flatten()[:,None]))
u_star = Exact.flatten()[:,None]
# Domain bounds
lb = X_star.min(0)
ub = X_star.max(0)
# Measurement data
# In this case, measurements are from N_u_s points and continuously sampled all the time.
N_u_s = 10
idx_s = np.random.choice(x.shape[0], N_u_s, replace=False)
X0 = X[:, idx_s]
T0 = T[:, idx_s]
Exact0 = Exact[:, idx_s]
N_u_t = int(t.shape[0]*1)
idx_t = np.random.choice(t.shape[0], N_u_t, replace=False)
X0 = X0[idx_t, :]
T0 = T0[idx_t, :]
Exact0 = Exact0[idx_t, :]
X_u_meas = np.hstack((X0.flatten()[:,None], T0.flatten()[:,None]))
u_meas = Exact0.flatten()[:,None]
# Training measurements, which are randomly sampled spatio-temporally
Split_TrainVal = 0.8
N_u_train = int(X_u_meas.shape[0]*Split_TrainVal)
idx_train = np.random.choice(X_u_meas.shape[0], N_u_train, replace=False)
X_u_train = X_u_meas[idx_train,:]
u_train = u_meas[idx_train,:]
# Validation Measurements, which are the rest of measurements
idx_val = np.setdiff1d(np.arange(X_u_meas.shape[0]), idx_train, assume_unique=True)
X_u_val = X_u_meas[idx_val,:]
u_val = u_meas[idx_val,:]
# Collocation points
N_f = 50000
X_f_train = lb + (ub-lb)*lhs(2, N_f)
X_f_train = np.vstack((X_f_train, X_u_train))
# Option: Add noise
noise = 0.1
u_train = u_train + noise*np.std(u_train)*np.random.randn(u_train.shape[0], u_train.shape[1])
u_val = u_val + noise*np.std(u_val)*np.random.randn(u_val.shape[0], u_val.shape[1])
# model
model = PhysicsInformedNN(X_u_train, u_train, X_f_train, X_u_val, u_val, layers, lb, ub)
model.train(6)