hub: uncaught exception: Traceback (most recent call last):
File “/home/ubuntu/.local/lib/python3.8/site-packages/ryu/lib/hub.py”, line 60, in _launch
return func(*args, **kwargs)
File “/home/ubuntu/ryu/ryu/app/SACDynamicTimeout.py”, line 130, in _monitor
self._request_stats(datapath)
File “/home/ubuntu/ryu/ryu/app/SACDynamicTimeout.py”, line 308, in _request_stats
self.dynamic_timeout()
File “/home/ubuntu/ryu/ryu/app/SACDynamicTimeout.py”, line 406, in dynamic_timeout
self.model.train_on_transition(self.prev_state, self.action, self.state, reward, done_bool)
File “/home/ubuntu/ryu/ryu/app/SAC/Discrete_SAC_Agent.py”, line 70, in train_on_transition
self.train_networks(transition)
File “/home/ubuntu/ryu/ryu/app/SAC/Discrete_SAC_Agent.py”, line 95, in train_networks
self.critic_loss(states_tensor, actions_tensor, rewards_tensor, next_states_tensor, done_tensor)
File “/home/ubuntu/ryu/ryu/app/SAC/Discrete_SAC_Agent.py”, line 126, in critic_loss
soft_q_values = self.critic_local(states_tensor).gather(1, actions_tensor)
RuntimeError: index 10 is out of bounds for dimension 1 with size 10
def critic_loss(self, states_tensor, actions_tensor, rewards_tensor, next_states_tensor, done_tensor):
with torch.no_grad():
action_probabilities, log_action_probabilities = self.get_action_info(next_states_tensor)
next_q_values_target = self.critic_target.forward(next_states_tensor)
next_q_values_target2 = self.critic_target2.forward(next_states_tensor)
soft_state_values = (action_probabilities * (
torch.min(next_q_values_target, next_q_values_target2) - self.alpha * log_action_probabilities
)).sum(dim=1)
next_q_values = rewards_tensor + ~done_tensor * self.DISCOUNT_RATE*soft_state_values
soft_q_values = self.critic_local(states_tensor).gather(1, actions_tensor)
soft_q_values2 = self.critic_local2(states_tensor).gather(1, actions_tensor)
critic_square_error = torch.nn.MSELoss(reduction="none")(soft_q_values, next_q_values)
critic2_square_error = torch.nn.MSELoss(reduction="none")(soft_q_values2, next_q_values)
weight_update = [min(l1.item(), l2.item()) for l1, l2 in zip(critic_square_error, critic2_square_error)]
self.replay_buffer.update_weights(weight_update)
critic_loss = critic_square_error.mean()
critic2_loss = critic2_square_error.mean()
return critic_loss, critic2_loss
STATE_DIM = 5 # 4-Dimensional State Space: [avg_PI_IAT, avg_fd, PIAT, action, avg_PIAT]
ACTION_DIM = 10 # 10-Dimensional Action Space: 1-10