I am trying to use a supervised learning neural net(nn) as a function approximator for a policy function, which solves my model equation. To start out I tried to use a nn to approximate f(x) = x in the interval [1.07312, 20.38941].with a nn having 4 hidden layers with 100/75/50/25 neurons.
(You can find the almost same mwe here with a different question)
As I approximate f(x) = x on only positive values, my neural net only needs to output positive values.
In this mwe the agent converges for both random weight initializations. Those with positive and those with negative outputs, Though in my original problem I cannot handle negative output values and thus need to enforce positive outputs on the net.
What is the correct way to enforce positive values on my net outputs? I tried to use a relu on the output-layer as:
return F.relu(self.output_layer(x))
instead of:
return self.output_layer(x)
Though when random weights produce negative output values, it gets stuck at 0, due to zero gradients, as mentioned in the first answer here: Almost same mwe with zero gradient answer
I also tried a softplus activation function on the output-layer. This worked at the cost of reducing precision in a way that is very undesirable and maybe also convergence speed.
I would appreciate any help on what is the correct or best way to restrict outputs to positive values and maintain efficiency as with a linear activation function?
You find the minimal working example below. Just few lines in the jupyter notebook, added a for loop to find negative outputs. The approx class which runs train and the neural network, which I would suggest to be quite standard.
jupyter:
for a in range(5):
from mwe import approx
approxlin = approx()
approxlin.train()
approx class:
import numpy as np
import math
import torch
import torch.optim as optim
from neural_net import neural_net
device = "cpu"
def ten(x): return torch.from_numpy(x).float().to(device)
class approx():
def __init__(self):
self.grid_min = 1.07312
self.grid_max = 20.38941
self.grid = ten(np.linspace(self.grid_min,self.grid_max,100)).unsqueeze(1)
# create neural net
self.policy_net = neural_net(1, 100, 1, 0.00001)
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=0.0002)
def train(self):
policy = self.policy_net(self.grid)
# training loop
for episode in range(20000):
policy = self.policy_net(self.grid)
loss = ((policy-self.grid)**2).mean()
if episode % 100 == 0:
losslog10 = math.log10(math.sqrt(loss.squeeze().cpu().detach().item()))
print('episode {} -- losslog10 : {:8.6f}'.format(episode,losslog10))
self.optimizer.zero_grad()
loss.backward()
# torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1)
self.optimizer.step()
return policy.squeeze()
Neural Net:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
def hidden_init(layer):
fan_in = layer.weight.data.size()[0]
lim = 1. / np.sqrt(fan_in)
return (0, lim)
class neural_net(nn.Module):
"""Actor (Policy) Model."""
def __init__(self, num_states, hidden_size, num_policies, dropout): #, fc1_units=24, fc2_units=48):
"""Initialize parameters and build model.
Params
======
num_states (int): Number of states
num_policies (int): Number of policies
hidden_size (int): Number of nodes in first hidden layer, sequential ones will have proportionally less
"""
# Neural net has input, output, and two hidden layers
super(neural_net, self).__init__()
self.input_layer = nn.Linear(num_states, hidden_size)
self.fullyconnected1 = nn.Linear(hidden_size, int(hidden_size*0.75))
self.fullyconnected2 = nn.Linear(int(hidden_size*0.75), int(hidden_size*0.5))
self.fullyconnected3 = nn.Linear(int(hidden_size*0.5), int(hidden_size*0.25))
self.dropout_layer = nn.Dropout(p=dropout)
self.output_layer = nn.Linear(int(hidden_size*0.25), num_policies)
self.reset_parameters()
def reset_parameters(self):
self.input_layer.weight.data.uniform_(*hidden_init(self.input_layer))
self.fullyconnected1.weight.data.uniform_(*hidden_init(self.fullyconnected1))
self.fullyconnected2.weight.data.uniform_(*hidden_init(self.fullyconnected2))
self.fullyconnected3.weight.data.uniform_(*hidden_init(self.fullyconnected3))
self.output_layer.weight.data.uniform_(0, 3e-3)
def forward(self, state):
"""Build neural network that maps state values -> values"""
x = F.relu(self.input_layer(state))
x = F.relu(self.fullyconnected1(x))
x = F.relu(self.fullyconnected2(x))
x = F.relu(self.fullyconnected3(x))
return self.output_layer(x)```