The model is here:
class Actor(nn.Module):
def __init__(self, state_size, action_size, hidden_size=512):
super(Actor, self).__init__()
self.state_size = state_size
self.hidden_size = hidden_size
self.action_size = action_size
self.block_state = nn.Sequential(
nn.Linear(state_size, hidden_size),
nn.LayerNorm(hidden_size),
nn.ReLU(),
)
self.block_hidden = nn.Sequential(
nn.Linear(hidden_size, hidden_size),
nn.LayerNorm(hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, hidden_size),
nn.LayerNorm(hidden_size),
nn.ReLU(),
)
self.block_mean = nn.Sequential(
nn.Linear(hidden_size, action_size),
)
self.block_std = nn.Sequential(
nn.Linear(hidden_size, action_size),
)
def forward(self, state):
out = self.block_state(state)
out = self.block_hidden(out)
mean = self.block_mean(out)
std = self.block_std(out)
return mean,std
The output is:
tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan]], grad_fn=)
tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan]], grad_fn=)
I’m sure the input doesn’t contain any nan value.