Hello,

I am encountering a runtime error during the backpropagation step of my training loop, specifically when calling `actor_loss.backward()`

. The error message, first with warning is:

[W anomaly_mode.cpp:54] Warning: Error detected in MmBackward0. Traceback of forward call that caused the error:

(function print_stack)

An exception occurred during backward pass: one of the variables needed for gradient computation has been modified by an inplace operation: [CPUFloatType [1, 9]] is at version 1216; expected version 1197 instead.

```
// Update the update_policy function to handle batched data
void update_policy(std::shared_ptr<Actor> actor, std::shared_ptr<Critic> critic,
const std::vector<torch::Tensor>& batch_states,
const std::vector<torch::Tensor>& batch_actions,
const std::vector<torch::Tensor>& batch_log_probs,
const std::vector<double>& batch_rewards,
const std::vector<torch::Tensor>& batch_next_states,
double clip_param = 0.01)
{
torch::autograd::AnomalyMode::set_enabled(true);
torch::Tensor states = torch::stack(batch_states);
torch::Tensor actions = torch::stack(batch_actions);
torch::Tensor old_log_probs = torch::stack(batch_log_probs);
torch::Tensor rewards = torch::tensor(batch_rewards);
torch::Tensor next_states = torch::stack(batch_next_states);
torch::Tensor returns = compute_returns(rewards, 1);
torch::Tensor values = critic->forward({ states }).squeeze(-1);
torch::Tensor advantages = returns - values.detach();
// Normalize advantages
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-10);
// Evaluate current policy
auto [means, stds] = actor->forward(states);
torch::Tensor curr_log_probs = calculate_log_probs(actions, means, stds);
// Compute ratio
torch::Tensor ratios = torch::exp(curr_log_probs - old_log_probs);;
// Compute surrogate losses
torch::Tensor surr1 = ratios * advantages;
torch::Tensor surr2 = torch::clamp(ratios, 1.0 - clip_param, 1.0 + clip_param) * advantages;
// Compute actor and critic losses
torch::Tensor actor_loss = torch::min(surr1, surr2).mean();
// To be sure, values is not implemented below:
torch::Tensor critic_loss = torch::mse_loss(critic->forward({ states }).squeeze(-1), returns);
//std::cout << "actor_loss: " << actor_loss << std::endl;
//std::cout << "critic_loss: " << critic_loss << std::endl;
// Perform backpropagation
critic->optimizer.zero_grad();
critic_loss.backward();
critic->optimizer.step();
actor->optimizer.zero_grad();
actor_loss.backward(); // Error and warning here
actor->optimizer.step();
std::cout << "Update - Actor Loss: " << actor_loss.item<float>()
<< ", Critic Loss: " << critic_loss.item<float>() << std::endl;
}
```

I believe I use an unsupported action while calculating one of the tensors defining actor_loss, but I cannot find it. Any support is appreciated, thanks!