I am trying to use the nn.BatchNorm1d in my linear layer. The problem is that the code for neural network creation is not as simple as the examples given for batchnorm1d implementaion and is part of bigger reinforcement learning program (taken from datahubbs) and the code is made in such a way to be flexible w.r.t number of neurons and number of layers.
class actorCriticNet(nn.Module):
def __init__(self, learning_rate, n_hidden_layers, n_hidden_nodes,
bias=False, device='cpu'):
super(actorCriticNet, self).__init__()
self.device = device
self.n_inputs = 23
self.n_outputs = 11
self.n_hidden_nodes = n_hidden_nodes
self.n_hidden_layers = n_hidden_layers
self.learning_rate = learning_rate
self.bias = bias
self.action_space = np.arange(self.n_outputs)
# Generate network according to hidden layer and node settings
self.layers = OrderedDict()
self.n_layers = 2 * self.n_hidden_layers
for i in range(self.n_layers + 1):
# Define single linear layer
if self.n_hidden_layers == 0:
self.layers[str(i)] = nn.Linear(
self.n_inputs,
self.n_outputs,
bias=self.bias)
# Define input layer for multi-layer network
elif i % 2 == 0 and i == 0 and self.n_hidden_layers != 0:
self.layers[str(i)] = nn.Linear(
self.n_inputs,
self.n_hidden_nodes,
bias=self.bias)
# Define intermediate hidden layers
elif i % 2 == 0 and i != 0:
self.layers[str(i)] = nn.Linear(
self.n_hidden_nodes,
self.n_hidden_nodes,
bias=self.bias)
else:
self.layers[str(i)] = nn.ReLU(nn.BatchNorm1d(self.n_hidden_nodes))
self.body = nn.Sequential(self.layers)
# Define policy head
self.policy = nn.Sequential(
nn.Linear(self.n_hidden_nodes,
self.n_hidden_nodes,
bias=self.bias),
nn.ReLU(),
nn.Linear(self.n_hidden_nodes,
self.n_outputs,
bias=self.bias))
# Define value head
self.value = nn.Sequential(
nn.Linear(self.n_hidden_nodes,
self.n_hidden_nodes,
bias=self.bias),
nn.ReLU(),
nn.Linear(self.n_hidden_nodes,
1,
bias=self.bias))
self.optimizer = torch.optim.Adam(self.parameters(),
lr=self.learning_rate)
def predict(self, state):
body_output = self.get_body_output(state)
probs = F.softmax(self.policy(body_output), dim=-1)
return probs, self.value(body_output)
def get_body_output(self, state):
state_t = torch.FloatTensor(state).to(device=self.device)
return self.body(state_t)
def get_action(self, state):
probs = self.predict(state)[0][0][0].detach().numpy()
action = np.random.choice(self.action_space, p=probs)
return action
def get_log_probs(self, state):
body_output = self.get_body_output(state)
logprobs = F.log_softmax(self.policy(body_output), dim=-1)
return logprobs
The line of interest being:
self.layers[str(i)] = nn.ReLU(nn.BatchNorm1d(self.n_hidden_nodes))
Is my code for the implementation of batchnorm1d correct? or should I modify the code?
I am newbie in python/pytorch so please point out if there are any obvious optimisation mistakes I have made.