I’m following an older tutorial on policy gradient RL for cartpole seen here, and I’m getting the following error for the select action step
class Policy(nn.Module):
def __init__(self):
super(Policy, self).__init__()
self.state_space = env.observation_space.shape[0]
self.action_space = env.action_space.n
self.l1 = nn.Linear(self.state_space, 128, bias=False)
self.l2 = nn.Linear(128, self.action_space, bias=False)
self.gamma = gamma
# Episode policy and reward history
self.policy_history = Variable(torch.Tensor())
self.reward_episode = []
# Overall reward and loss history
self.reward_history = []
self.loss_history = []
def forward(self, x):
model = torch.nn.Sequential(
self.l1,
nn.Dropout(p=0.6),
nn.ReLU(),
self.l2,
nn.Softmax(dim=-1)
)
return model(x)
policy = Policy()
optimizer = optim.Adam(policy.parameters(), lr=learning_rate)
def select_action(state):
#Select an action (0 or 1) by running policy model and choosing based on the probabilities in state
state = torch.from_numpy(state).type(torch.FloatTensor)
state = policy(Variable(state))
c = Categorical(state) # turns the
action = c.sample()
# Add log probability of our chosen action to our history
if policy.policy_history.dim() != 0:
policy.policy_history = torch.cat([policy.policy_history, c.log_prob(action)])
else:
policy.policy_history = (c.log_prob(action))
return action
I get the following error when I begin to train the network
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-21-c1f1eef9c02f> in <module>()
1 episodes = 1000
----> 2 main(episodes)
<ipython-input-20-2b725c0b3414> in main(episodes)
7
8 for time in range(1000):
----> 9 action = select_action(state)
10 # Step through environment using chosen action
11 state, reward, done, _ = env.step(action.data[0])
<ipython-input-18-f02a0e7d07a5> in select_action(state)
8 # Add log probability of our chosen action to our history
9 if policy.policy_history.dim() != 0:
---> 10 policy.policy_history = torch.cat([policy.policy_history, c.log_prob(action)])
11 else:
12 policy.policy_history = (c.log_prob(action))
RuntimeError: zero-dimensional tensor (at position 1) cannot be concatenated
How can I fix this error?