Hello,
I’m having a lot of NaN values with the log_prob function.
See the code:
def forward(self, observation):
# Convert tuple into tensor
observation_as_list = []
observation_as_list.append(observation[0])
observation_as_list.append(observation[1])
observation_as_list = np.asarray(observation_as_list)
observation_as_list = observation_as_list.reshape(1,2)
observation = observation_as_list
state = torch.from_numpy(observation).float()
state = F.tanh(self.affine(state))
state_value = self.value_layer(state)
weight_prelu = torch.tensor([[5.0]])
action_parameters = (self.action_layer(state)) # A voir quelle activation function mettre
action_distribution = Normal(action_parameters[0][0], action_parameters[0][1])
action = action_distribution.sample() # Torch.tensor; action
self.logprobs.append(action_distribution.log_prob(action+ 1e-6)) # HERE ###
self.state_values.append(state_value)
#print(action.item())
return action.item() # Float element
The whole code is here: https://github.com/nbrosson/Actor-critic-MountainCar/blob/master/agent.py
Any ideas how I’m getting this problem?
Thanks !!