Hi,
Iam new to this forum and to pytorch.
Could someone help me with that error
RuntimeError: invalid argument 4: Index tensor must have same dimensions as input tensor at
def learn(self, batch, gamma):
“”"Update value parameters using given batch of experience tuples.
Params
======
experiences (Tuple[torch.Variable]): tuple of (s, a, r, s', done) tuples
gamma (float): discount factor
"""
#states, actions, rewards, next_states, dones = experiences
states = np.array([each[0][0] for each in batch], ndmin=3)
actions = np.array([each[0][1] for each in batch])
rewards = np.array([each[0][2] for each in batch])
next_states = np.array([each[0][3] for each in batch], ndmin=3)
dones = np.array([each[0][4] for each in batch])
#dones = np.array([map(lambda x: 1 if x else 0, dones)],dtype=np.float16)
dones = dones.astype(np.int16)
states = torch.from_numpy(states).float().cuda()
next_states = torch.from_numpy(next_states).float().cuda()
rewards = torch.from_numpy(rewards).float().cuda()
actions = torch.from_numpy(actions).long().cuda()
dones = torch.from_numpy(dones).float().cuda()
#next_states = torch.from_numpy(next_states).float().unsqueeze(0).cuda()
#print(states.shape)
states = states.view(32,8)
#print(states.shape)
next_states = next_states.view(32,8)
## TODO: compute and minimize the loss
"*** YOUR CODE HERE ***"
Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
print(rewards.shape)
print(Q_targets_next)
Q_targets = rewards + (gamma * Q_targets_next * (1-dones))
Q_expected = self.qnetwork_local(states).gather(1, actions)
loss = F.mse_loss(Q_expected, Q_targets)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# ------------------- update target network ------------------- #
self.soft_update(self.qnetwork_local, self.qnetwork_target, self.tau)
I don’t know if this gather is correct here found the code online and it worked
now I try to modify to my problem
Thank you very much for your help
If you need more information let me know
Best Regards
Chris