Hi,
I am getting above error when trying a second backward.
As I understand the solution to this is use a clone operation.
I have tried to clone everything in my code but still the issue comes.
can someone please help to let know what is the issue here.
def learn(self, state, action, reward, state_, done):
s = state.reshape(1,-1)
s_ = state_.reshape(1,-1)
action = action.reshape(1,-1)
value, (hvo, cvo) = self.value((s.clone(), (self.actor_hx.clone(), self.actor_cx.clone())))
value_ , (hvto, cvto) = self.target_value((s_.clone(), (self.actor_Hx.clone(), self.actor_Cx.clone())))
if(done == True):
value_ = 0
actions, log_probs, (_, _) = self.actor.sample_normal(s.clone(), (self.actor_hx.clone(), self.actor_cx.clone()),reparameterize=False)
q1_new_policy, (_, _) = self.critic_1.forward((s.clone(), actions.clone() , (self.actor_hx.clone(), self.actor_cx.clone())))
q2_new_policy, (_, _) = self.critic_2.forward((s.clone(), actions.clone() , (self.actor_hx.clone(), self.actor_cx.clone())))
critic_value = T.min(q1_new_policy, q2_new_policy)
critic_value = critic_value
self.value.optimizer.zero_grad()
value_target = critic_value - log_probs
value_target = value_target.reshape(value.shape)
value_loss = 0.5 * (F.mse_loss(value, value_target))
value_loss.backward(retain_graph=True)
self.value.optimizer.step()
actions, log_probs, (hao, cao) = self.actor.sample_normal(s.clone(), (self.actor_hx.clone(), self.actor_cx.clone()), reparameterize=True)
#actions, log_probs = self.actor.sample_mvnormal(s, reparameterize=False)
q1_new_policy, (_, _) = self.critic_1.forward((s.clone(), actions.clone(), (self.actor_hx.clone(), self.actor_cx.clone())))
q2_new_policy, (_, _) = self.critic_2.forward((s.clone(), actions.clone(), (self.actor_hx.clone(), self.actor_cx.clone())))
critic_value = T.min(q1_new_policy, q2_new_policy)
critic_value = critic_value
actor_loss = log_probs - critic_value
self.actor.optimizer.zero_grad()
actor_loss.backward(retain_graph=True)
self.actor.optimizer.step()
self.critic_1.optimizer.zero_grad()
self.critic_2.optimizer.zero_grad()
q_hat = self.scale*reward + self.gamma*value_
q1_old_policy, (_, _) = self.critic_1.forward((s.clone(), action.clone(), (self.actor_hx.clone(), self.actor_cx.clone())))
q2_old_policy, (_, _) = self.critic_2.forward((s.clone(), action.clone(), (self.actor_hx.clone(), self.actor_cx.clone())))
critic_1_loss = 0.5*F.mse_loss(q1_old_policy, q_hat)
critic_2_loss = 0.5*F.mse_loss(q2_old_policy, q_hat)
critic_loss = critic_1_loss + critic_2_loss
critic_loss.backward()
self.critic_1.optimizer.step()
self.critic_2.optimizer.step()
self.update_network_parameters()
self.actor_hx = self.actor_Hx.clone()
self.actor_cx = self.actor_Cx.clone()
I am getting error at the following line.
actor_loss.backward(retain_graph=True)
Can some one please help to let me know what I am I missing.