I am trying to use a supervised learning neural net(nn) as a function approximator for a policy function, which solves my model equation. To start out I tried to use a nn to approximate f(x) = x in the interval [1.07312, 20.38941].with a nn having 4 hidden layers with 100/75/50/25 neurons.
With the supplied example I only get to precisions with the log10(mean squared error) around -3, which takes some thousand training episodes. When I use classical methods like formal value-/policy-function-iteration, i usually get to numerical precision with the log10 maximal error down to -6 or -10.
The universal function approximation theorem claims that we can approximate any function to arbitrary precision. Thus I hoped to get to numerical precision in pytorch. I have tried around with various hyperparameters like, number of layers, number of neurons, learning rate, number of grid points, activation function, dropout layer, batch normalization, weight initialization. None of those improved precision or convergence speed significantly.
I would appreciate any help how to improve primarily on approximation precision and maybe also on convergence speed.
You find the minimal working example below. Just three lines in the jupyter notebook. The approx class which runs train and the neural network, which I would suggest to be quite standard.
jupyter:
from mwe import approx
approxlin = approx()
approxlin.train()
approx class:
import numpy as np
import math
import torch
import torch.optim as optim
from neural_net import neural_net
device = "cpu"
def ten(x): return torch.from_numpy(x).float().to(device)
class approx():
def __init__(self):
self.grid_min = 1.07312
self.grid_max = 20.38941
self.grid = ten(np.linspace(self.grid_min,self.grid_max,100)).unsqueeze(1)
# create neural net
self.policy_net = neural_net(1, 100, 1, 0.00001)
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=0.0002)
def train(self):
policy = self.policy_net(self.grid)
# training loop
for episode in range(20000):
policy = self.policy_net(self.grid)
loss = ((policy-self.grid)**2).mean()
if episode % 100 == 0:
losslog10 = math.log10(math.sqrt(loss.squeeze().cpu().detach().item()))
print('episode {} -- losslog10 : {:8.6f}'.format(episode,losslog10))
self.optimizer.zero_grad()
loss.backward()
# torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1)
self.optimizer.step()
return policy.squeeze()
Neural Net:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
def hidden_init(layer):
fan_in = layer.weight.data.size()[0]
lim = 1. / np.sqrt(fan_in)
return (0, lim)
class neural_net(nn.Module):
"""Actor (Policy) Model."""
def __init__(self, num_states, hidden_size, num_policies, dropout): #, fc1_units=24, fc2_units=48):
"""Initialize parameters and build model.
Params
======
num_states (int): Number of states
num_policies (int): Number of policies
hidden_size (int): Number of nodes in first hidden layer, sequential ones will have proportionally less
"""
# Neural net has input, output, and two hidden layers
super(neural_net, self).__init__()
self.dropout = dropout
self.input_layer = nn.Linear(num_states, hidden_size)
self.batchnorm_layer = nn.BatchNorm1d(hidden_size)
self.fullyconnected1 = nn.Linear(hidden_size, int(hidden_size*0.75))
self.fullyconnected2 = nn.Linear(int(hidden_size*0.75), int(hidden_size*0.5))
self.fullyconnected3 = nn.Linear(int(hidden_size*0.5), int(hidden_size*0.25))
# self.fullyconnected4 = nn.Linear(int(hidden_size*0.4), int(hidden_size*0.2))
# self.fullyconnected5 = nn.Linear(int(hidden_size*0.2), int(hidden_size*0.1))
self.dropout_layer = nn.Dropout(p=dropout)
self.output_layer = nn.Linear(int(hidden_size*0.25), num_policies)
self.reset_parameters()
def reset_parameters(self):
self.input_layer.weight.data.uniform_(*hidden_init(self.input_layer))
self.fullyconnected1.weight.data.uniform_(*hidden_init(self.fullyconnected1))
self.fullyconnected2.weight.data.uniform_(*hidden_init(self.fullyconnected2))
self.fullyconnected3.weight.data.uniform_(*hidden_init(self.fullyconnected3))
# self.fullyconnected4.weight.data.uniform_(*hidden_init(self.fullyconnected4))
# self.fullyconnected5.weight.data.uniform_(*hidden_init(self.fullyconnected5))
self.output_layer.weight.data.uniform_(0, 3e-3)
def forward(self, state):
"""Build neural network that maps state values -> values"""
x = F.relu(self.input_layer(state))
# x = F.relu(self.batchnorm_layer(x))
x = F.relu(self.fullyconnected1(x))
x = F.relu(self.fullyconnected2(x))
x = F.relu(self.fullyconnected3(x))
# x = F.relu(self.fullyconnected4(x))
# x = F.relu(self.fullyconnected5(x))
if self.dropout > 0.01:
x = self.dropout_layer(x)
return F.relu(self.output_layer(x))```