I am working on an Actor-Critic Model with Continuous action output. The output should be any number from 0 to 1. In the Actor part, I am calculating the mu using Sigmoid (to have output from 0 to 1) and variance using ReLU (to always have a positive value).
The code is as follows:
class Policy(nn.Module): """ implements both actor and critic in one model """ def __init__(self): super(Policy, self).__init__() self.fc1 = nn.Linear(state_size, 128) self.fc2 = nn.Linear(128, 64) # actor's layer self.action_head = nn.Linear(64, action_size) self.mu = nn.Sigmoid() self.var = nn.ReLU() # critic's layer self.value_head = nn.Linear(64, 1) def forward(self, x): """ forward of both actor and critic """ x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) # actor: choses action to take from state s_t # by returning probability of each action action_prob = self.action_head(x) mu = self.mu(action_prob) var = self.var(action_prob) # critic: evaluates being in the state s_t state_values = self.value_head(x) return mu, var, state_values class Agent(): def __init__(self, model, is_eval=False, model_name=""): self.model_name = model_name self.is_eval = is_eval self.model = load_model(model_name) if is_eval else model def act(self, state): mu, var, state_value = self.model(state) mu = mu.data.cpu().numpy() sigma = torch.sqrt(var).data.cpu().numpy() actions = np.random.normal(mu, sigma) actions = np.clip(actions, 0, 1) #to have output ranging from 0 to 1 actions = torch.from_numpy(actions) return actions, state_value
Now, lets say action_size = 1.
One output for print(mu, var) is: tensor([0.5266], grad_fn=), tensor([0.7478], grad_fn=)
The action, which is the normal distribution, is 0 (actually it was a negative number before the clipping step).
Now, my question is if the logic is correct to have an Actor-Critic model with Continuous action output.