Hi,
I am using the PyTorch version 1.9 in google’s colab. I tried using with torch.autograd.set_detect_anomly(True)
to find more information but the stack trace doesn’t change and it is not showing any other additional info.
def _a2c_update(self, value, batch_idx):
returns, advantages = self._discount_rewards(value, self.transitions[batch_idx])
for transition, _return, advantage in zip(self.transitions[batch_idx], returns, advantages):
reward, index, output, value, done = transition
if done:
continue
advantage = advantage.detach()
probs = F.softmax(output, dim=-1)
log_probs = torch.log(probs)
log_action_prob = log_probs[index]
policy_loss = -log_action_prob * advantage
value_loss = (.5 * (value - _return)**2)
entropy = (-log_probs * probs).mean()
# add up the loss over time
self.model_loss += policy_loss + 0.5 * value_loss - 0.1 * entropy
self.statistics.stats_episode_append(
reward=reward,
policy=policy_loss.item(),
value=value_loss.item(),
entropy=entropy.item(),
confidence=torch.mean(torch.exp(log_action_prob)).item()
)
self.model_updates += 1
self.transitions[batch_idx] = []
if self.model_loss == 0 or self.model_updates % self.batch_size != 0:
return
# Only if all of the agents in the batch have performed their update the backpropagation is invoked to reduce
# computational complexity
self.statistics.stats_episode_append(loss=self.model_loss.item())
self.optimizer.zero_grad()
self.model_loss.backward(retain_graph=True)
nn.utils.clip_grad_norm_(self.model.parameters(), self.config['training']['optimizer']['clip_grad_norm'])
self.optimizer.step()
self.model_loss = 0.
@albanD , Please let me know what steps should I take in order to resolve the issue.
I tried using self.model_loss.clone() right before the self.model_loss.backward(), but that didn’t work.