Issues with concatenating tensors for policy history

I’m following an older tutorial on policy gradient RL for cartpole seen here, and I’m getting the following error for the select action step

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.state_space = env.observation_space.shape[0]
        self.action_space = env.action_space.n
        
        self.l1 = nn.Linear(self.state_space, 128, bias=False)
        self.l2 = nn.Linear(128, self.action_space, bias=False)
        
        self.gamma = gamma
        
        # Episode policy and reward history 
        self.policy_history = Variable(torch.Tensor()) 
        self.reward_episode = []
        # Overall reward and loss history
        self.reward_history = []
        self.loss_history = []
def forward(self, x):    
        model = torch.nn.Sequential(
            self.l1,
            nn.Dropout(p=0.6),
            nn.ReLU(),
            self.l2,
            nn.Softmax(dim=-1)
        )
        return model(x)
policy = Policy()
optimizer = optim.Adam(policy.parameters(), lr=learning_rate)
def select_action(state):
    #Select an action (0 or 1) by running policy model and choosing based on the probabilities in state
    state = torch.from_numpy(state).type(torch.FloatTensor)
    state = policy(Variable(state))
    c = Categorical(state) # turns the 
    action = c.sample()
    
    # Add log probability of our chosen action to our history    
    if policy.policy_history.dim() != 0:
        policy.policy_history = torch.cat([policy.policy_history, c.log_prob(action)])
    else:
        policy.policy_history = (c.log_prob(action))
    return action

I get the following error when I begin to train the network

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-21-c1f1eef9c02f> in <module>()
      1 episodes = 1000
----> 2 main(episodes)

<ipython-input-20-2b725c0b3414> in main(episodes)
      7 
      8         for time in range(1000):
----> 9             action = select_action(state)
     10             # Step through environment using chosen action
     11             state, reward, done, _ = env.step(action.data[0])

<ipython-input-18-f02a0e7d07a5> in select_action(state)
      8     # Add log probability of our chosen action to our history
      9     if policy.policy_history.dim() != 0:
---> 10         policy.policy_history = torch.cat([policy.policy_history, c.log_prob(action)])
     11     else:
     12         policy.policy_history = (c.log_prob(action))

RuntimeError: zero-dimensional tensor (at position 1) cannot be concatenated

How can I fix this error?

Hey there. I am facing the same issue. Were you able to solve it? :slight_smile: