RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [256, 3]],

I am implementing PPO RL algorithm in pytorch and getting above error.However I am unable to find why this error is comming.

Below is my code.

#main loop for generating data into replaybuffer.

while count<max_timesteps-1:
        episode_length += 1
        if done:
            cx = Variable(torch.zeros(params.lstm_layers, 1, params.lstm_size))
            hx = Variable(torch.zeros(params.lstm_layers, 1, params.lstm_size))
        else:
            cx = Variable(cx.data)
            hx = Variable(hx.data)

        values = []
        log_probs = []
        rewards = []
        entropies = []
        adv = []
        st = []
        rew = []
        act = []
        while count<max_timesteps-1:
            St = (Variable(state.unsqueeze(0)))
            st.append(St)
            value, action_values = model(St)
            prob = F.softmax(action_values - max(action_values), dim = -1)
            log_prob = torch.log(prob).reshape(-1,)
            entropy = -(log_prob * prob).sum(1, keepdim=True)
            entropies.append(entropy)
            m = categorical.Categorical(prob)
            action = m.sample().reshape(-1,)
            log_prob_a = log_prob.gather(0, Variable(action))
            act.append(action)
            state, reward, done = env.step(action)
            reward = max(min(reward, 1), -1)
            
            count +=1
            
            if done:
                episode_length = 0
                state = env.reset()
                
            
            values.append(value)
            log_probs.append(log_prob_a)
            rewards.append(reward)
            print("rank ",rank," action:",action, "reward ",reward)

            if done:
                break

        R = torch.zeros(1, 1)
        if not done:
            St = Variable(state.unsqueeze(0))
            value, _ = model(Variable(St))
            R = value.data
        values.append(Variable(R))
        R = Variable(R)
        gae = torch.zeros(1, 1)
        for i in reversed(range(len(rewards))):
            R = params.gamma * R + rewards[i]
            rew.insert(0,R)
            # advantage = R - values[i]
            TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data
            gae = gae * params.gamma * params.tau + TD
            adv.insert(0,gae)

        for i in reversed(range(len(rewards))):
            transition = [st[i], adv[i], rew[i], act[i], log_probs[i], values[i]]
            r.add(transition)

#ActorCritic Class

class ActorCritic(torch.nn.Module):

    def __init__(self, params):
        super(ActorCritic, self).__init__()

        self.num_inputs = params.num_inputs
        self.action_space = params.action_dim
        self.hidden_size = params.hidden_size
        num_inputs = params.num_inputs
        self.lstm = nn.LSTM(num_inputs, 8,num_layers = params.lstm_layers)
        self.fc1 = nn.Linear(8, 256)
        self.fc1.apply(init_weights)
        self.fc2 = nn.Linear(256, 256)
        self.fc2.apply(init_weights)
        # self.fc3 = nn.Linear(256, 256)
        # self.fc3.apply(init_weights)
        self.critic_linear = nn.Linear(256, 1)
        self.critic_linear.apply(init_weights)
        self.actor_linear = nn.Linear(256, self.action_space)
        self.actor_linear.apply(init_weights)
        self.train()

    def forward(self, inputs):
        f1 = F.elu(self.fc1(inputs))
        f2 = F.elu(self.fc2(f1))
        # x = torch.tanh(self.fc3(x))
        critic = self.critic_linear(f2)
        actor = self.actor_linear(f2)
        return  critic, actor
    
    
    def get_state(self,inputs):
        inputs,(hx,cx) = inputs
        st, (hx,cx) = self.lstm(inputs,(hx,cx))
        return st, (hx,cx)

#code for training part

ind = np.random.randint(0, len(r.storage), size=int(0.2*len(r.storage)))
                    for i in ind:
                        state, adv, reward, action, old_log_prob, value = r.storage[i]
                        V, act_val = model(Variable(state))
                        prob = F.softmax(act_val - max(act_val), dim = -1)
                        log_prob = torch.log(prob).reshape(-1,)
                        entropy = -(log_prob * prob).sum(1, keepdim=True)
                        action_log_prob = log_prob.gather(0, Variable(action))
                        ratio = torch.exp(action_log_prob - old_log_prob)
                        surr1 = ratio * adv
                        surr2 = torch.clamp(ratio, 1.0 - 0.2,1.0 + 0.2) * adv
                        actor_loss = -torch.min(surr1, surr2).mean()
                        value_loss = 0.5 * (reward - V).pow(2).mean()
                        entr_loss = 0.01 * entropy.mean()
                        
                        optimizer.zero_grad()
                        actor_loss.mean().backward(retain_graph = True)
                        (0.5 * value_loss).mean().backward(retain_graph = True)
                        entr_loss.mean().backward()

                        torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
                        optimizer.step()

Full traceback:

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [256, 3]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!


[W ..\torch\csrc\autograd\python_anomaly_mode.cpp:60] Warning: Error detected in AddmmBackward. Traceback of forward call that caused the error:
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\console\__main__.py", line 23, in <module>
    start.main()
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\console\start.py", line 332, in main
    kernel.start()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
    self.io_loop.start()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\platform\asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\base_events.py", line 541, in run_forever
    self._run_once()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\base_events.py", line 1786, in _run_once
    handle._run()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 787, in inner
    self.run()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelbase.py", line 545, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\ipkernel.py", line 306, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 2877, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 2922, in _run_cell
    return runner(coro)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 3146, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 3337, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-a2ecfc8e1e15>", line 1, in <module>
    runfile('C:/Users/granthjain/Desktop/startup_code/get_data/ppo_try.py', wdir='C:/Users/granthjain/Desktop/startup_code/get_data')
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\customize\spydercustomize.py", line 541, in runfile
    post_mortem=post_mortem)
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\customize\spydercustomize.py", line 440, in exec_code
    exec(compiled, ns_globals, ns_locals)
  File "C:\Users\granthjain\Desktop\get_data\ppo_try.py", line 376, in <module>
    train(0, params, model, optimizer, ticker, sc, r)
  File "C:\Users\granthjain\Desktop\get_data\ppo_try.py", line 128, in train
    value, action_values = model(St)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\granthjain\Desktop\get_data\ppo_try.py", line 226, in forward
    actor = self.actor_linear(f2)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\linear.py", line 91, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\functional.py", line 1674, in linear
    ret = torch.addmm(bias, input, weight.t())
 (function print_stack)

The traceback is giving the DoubleTensor[256,3], actor_linear fully connected layer is is of size 256,3.

I am unable to figure it out what is wrong with the code.

Try to add .clone() operations to tensors to isolate the offending line of code.
I cannot find anything suspicious by skimming through your code.

PS: Variables are deprecated since PyTorch 0.4, so you can use tensors now :wink:

Hi,

I tried to clone everything and removed Variable. I am still getting this error. I have encountered this error earlier also but there I was able to identify a variable that was updated inplace.

But I am unable to get which variable is getting updated inplace here.

Please help