Cant' find : RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation

Hello everyone, I am trying to run the MADDPG alogrithm on my environement but I struggle with a Pytorch error during training concerning an inplace operation which I can’t seem to find in my code.

error :

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 1]], which is output 0 of AsStridedBackward0, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

the backtrace:

RuntimeError                              Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18392\3501804896.py in <module>
     41 
     42         if total_steps % 100 == 0 and not evaluate:
---> 43             maddpg_agents.learn(memory)
     44 
     45         obs = obs_

~\AppData\Local\Temp\ipykernel_18392\78932359.py in learn(self, memory)
     73                 critic_loss = F.mse_loss(target, critic_value)
     74                 agent.critic.optimizer.zero_grad()
---> 75                 critic_loss.backward(retain_graph=True)
     76                 agent.critic.optimizer.step()
     77 

c:\ProgramData\Anaconda3\lib\site-packages\torch\_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    361                 create_graph=create_graph,
    362                 inputs=inputs)
--> 363         torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
    364 
    365     def register_hook(self, hook):
c:\ProgramData\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    173     Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
...
--> 175         allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass
    176 
    177 def grad(

I can’t locate the inplace operation.

The network:

class CriticNetwork(nn.Module):
    def __init__(self, beta, input_dims, fc1_dims, fc2_dims,
            n_agents, n_actions, name, chkpt_dir):
        super(CriticNetwork, self).__init__()

        self.chkpt_file = os.path.join(chkpt_dir, name)

        #critic taking full state observation vector of the whole env
        #+ each action vectors of all agents
        self.fc1 = nn.Linear(input_dims+n_agents*n_actions, fc1_dims)
        self.fc2 = nn.Linear(fc1_dims, fc2_dims)
        self.q = nn.Linear(fc2_dims, 1)

        self.optimizer = optim.Adam(self.parameters(), lr=beta)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state, action):
        x = F.relu(self.fc1(T.cat([state, action], dim=1)))
        x = F.relu(self.fc2(x))
        q = self.q(x)

        return q

And the learning function:

def learn(self, memory):
        if not memory.ready():
            return
        
        actor_states, states, actions, rewards, actor_new_states, states_, dones = memory.sample_buffer()
        
        device = self.agents[0].actor.device

        states = T.tensor(states, dtype=T.float).to(device)
        actions = T.tensor(actions, dtype=T.float).to(device)
        rewards = T.tensor(rewards, dtype=T.float).to(device)
        states_ = T.tensor(states_, dtype=T.float).to(device)
        dones = T.tensor(dones).to(device)

        all_agents_new_actions = []
        all_agents_new_mu_actions = []
        old_agents_actions = []
        
        for agent_idx, agent in enumerate(self.agents):
            new_states = T.tensor(actor_new_states[agent_idx], 
                                 dtype=T.float).to(device)

            new_act = agent.target_actor.forward(new_states)

            all_agents_new_actions.append(new_act)
            mu_states = T.tensor(actor_states[agent_idx], 
                                 dtype=T.float).to(device)
            acti = agent.actor.forward(mu_states)
            all_agents_new_mu_actions.append(acti)
            old_agents_actions.append(actions[agent_idx])
        
        new_actions = T.cat([acts for acts in all_agents_new_actions], dim=1)
        mu = T.cat([acts for acts in all_agents_new_mu_actions], dim=1)
        old_actions = T.cat([acts for acts in old_agents_actions],dim=1)

        for agent_idx, agent in enumerate(self.agents):
            with T.autograd.set_detect_anomaly(True):
                critic_value_ = agent.target_critic.forward(states_, new_actions).flatten()
                critic_value_[dones[:,0]] = 0.0
                critic_value = agent.critic.forward(states, old_actions).flatten()

                target = rewards[:,agent_idx] + agent.gamma*critic_value_
                critic_loss = F.mse_loss(target, critic_value)
                agent.critic.optimizer.zero_grad()
                critic_loss.backward(retain_graph=True)
                agent.critic.optimizer.step()

                actor_loss = agent.critic.forward(states, mu).flatten()
                actor_loss = -T.mean(actor_loss)
                agent.actor.optimizer.zero_grad()
                actor_loss.backward(retain_graph=True)
                agent.actor.optimizer.step()

                agent.update_network_parameters()

The fact that it is a multi-agent algorithm, during learning I have to make multiple passes through the graph (1 for each agent) so the graph needs to be retained.

Thank you all!

nb: I didn’t put my ful code here because I suspect the in-place operation to happend here, but I can give the whole code if necessary.

I think the same is discussed here: Encounter the RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation