RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 6]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further ab

My code is below:

class Policy_Network(nn.Module):
    def __init__(self,  state_input_dim, action_dim, lr):
        super(Policy_Network, self).__init__()
        self.first_layer = nn.Linear(state_input_dim, 256)
        self.second_layer = nn.Linear(256, 64)
        #self.third_layer = nn.Linear(256, 64)
        self.final_layer = nn.Linear(64, action_dim)
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        autograd.set_detect_anomaly(True)
        #self.eps = 0.00001

    def forward(self, x):
        x1 = np.array(x)
        x2 = F.relu(self.first_layer(torch.tensor(x1, dtype = torch.float32)))
        x3 = F.relu(self.second_layer(x2))
        x4 = self.final_layer(x3)
        out = F.softmax(x4, dim = 0).unsqueeze(dim=0)

        return out
    
    def select_action(self, probs):

        m = Categorical(probs)
        action = m.sample()

        return action.item(), m.log_prob(action)
    
    def update_PFA(self, eps_rewards, eps_log_probs, gamma):
        R = 0
        policy_loss = []
        rewards = []
        for r in eps_rewards[::-1]:
            R =  1 * r + gamma * R
            rewards.insert(0, R)
        rewards = torch.tensor(rewards, dtype = torch.float)
        
        if rewards.std() == 0:
            return
        if rewards.mean() != 0:
            rewards = (rewards - rewards.mean()) / (rewards.std() )#+ self.eps
        
        for log_prob, reward in zip(eps_log_probs, rewards):
            #positive beacuse we want to decrease cost
            #not policy_loss.append(-log_prob * reward)
            policy_loss.append( log_prob * reward)

        self.optimizer.zero_grad()

        loss = torch.cat(policy_loss).sum()
        loss.backward(retain_graph=True)
        nn.utils.clip_grad_value_(self.parameters(), clip_value=1.0)

Why do I get the inplace error?

The backtrace is as follows:

/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/autograd/__init__.py:200: UserWarning: Error detected in MmBackward0. Traceback of forward call that caused the error:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/console/__main__.py", line 24, in <module>
    start.main()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/console/start.py", line 330, in main
    kernel.start()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 736, in start
    self.io_loop.start()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
    self._run_once()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
    handle._run()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue
    await self.process_one()
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 505, in process_one
    await dispatch(*args)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell
    await result
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 740, in execute_request
    reply_content = await reply_content
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
    res = shell.run_cell(
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 546, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
    result = self._run_cell(
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
    result = runner(coro)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/pm/p4cmsynd4sq4rg7h5_2nx6rh0000gn/T/ipykernel_61767/3283248021.py", line 1, in <module>
    runfile('/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learn.py', wdir='/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN')
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/customize/spydercustomize.py", line 528, in runfile
    return _exec_file(
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/customize/spydercustomize.py", line 615, in _exec_file
    exec_code(file_code, filename, ns_globals, ns_locals,
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/customize/spydercustomize.py", line 473, in exec_code
    exec_fun(compile(ast_code, filename, 'exec'), ns_globals, ns_locals)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/py3compat.py", line 356, in compat_exec
    exec(code, globals, locals)
  File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learn.py", line 153, in <module>
    run_func()
  File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learn.py", line 86, in run_func
    call = d.get_state_action(state)
  File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learning_modules.py", line 250, in get_PFA_action
    probs = self.model(curr_state)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learning_modules.py", line 94, in forward
    x4 = self.final_layer(x3)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
 (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:119.)

These issues are often caused by using retain_graph=True in the backward() call. The computation graph will be kept alive and the next iteration tries to recompute the gradient from previous iterations using stale forward activations (assuming a parameter update took place).
Could you explain why you are using this argument?

If I dont do that than I get the following error.

Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

I tried to search for the above above issue most people suggest to write the retain_graph = True. I dont understand where the error is in either case

Which is unfortunately most of the time wrong and in this discussion board we usually suggest to either explain why it’s used (as valid use cases certainly exist) or to fix the original issue first.

Based on the first error you could need to check which tensor is reused as you are accumulating to the computation graph. I don’t know how exactly your training loop looks like, but check if e.g. the output of a previous iteration is used as a new input. In this case .detadch() the tensor before starting the new forward pass.