RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [256, 3]],

I am implementing PPO RL algorithm in pytorch and getting above error.However I am unable to find why this error is comming.

Below is my code.

#main loop for generating data into replaybuffer.

while count<max_timesteps-1:
        episode_length += 1
        if done:
            cx = Variable(torch.zeros(params.lstm_layers, 1, params.lstm_size))
            hx = Variable(torch.zeros(params.lstm_layers, 1, params.lstm_size))
            cx = Variable(
            hx = Variable(

        values = []
        log_probs = []
        rewards = []
        entropies = []
        adv = []
        st = []
        rew = []
        act = []
        while count<max_timesteps-1:
            St = (Variable(state.unsqueeze(0)))
            value, action_values = model(St)
            prob = F.softmax(action_values - max(action_values), dim = -1)
            log_prob = torch.log(prob).reshape(-1,)
            entropy = -(log_prob * prob).sum(1, keepdim=True)
            m = categorical.Categorical(prob)
            action = m.sample().reshape(-1,)
            log_prob_a = log_prob.gather(0, Variable(action))
            state, reward, done = env.step(action)
            reward = max(min(reward, 1), -1)
            count +=1
            if done:
                episode_length = 0
                state = env.reset()
            print("rank ",rank," action:",action, "reward ",reward)

            if done:

        R = torch.zeros(1, 1)
        if not done:
            St = Variable(state.unsqueeze(0))
            value, _ = model(Variable(St))
            R =
        R = Variable(R)
        gae = torch.zeros(1, 1)
        for i in reversed(range(len(rewards))):
            R = params.gamma * R + rewards[i]
            # advantage = R - values[i]
            TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data
            gae = gae * params.gamma * params.tau + TD

        for i in reversed(range(len(rewards))):
            transition = [st[i], adv[i], rew[i], act[i], log_probs[i], values[i]]

#ActorCritic Class

class ActorCritic(torch.nn.Module):

    def __init__(self, params):
        super(ActorCritic, self).__init__()

        self.num_inputs = params.num_inputs
        self.action_space = params.action_dim
        self.hidden_size = params.hidden_size
        num_inputs = params.num_inputs
        self.lstm = nn.LSTM(num_inputs, 8,num_layers = params.lstm_layers)
        self.fc1 = nn.Linear(8, 256)
        self.fc2 = nn.Linear(256, 256)
        # self.fc3 = nn.Linear(256, 256)
        # self.fc3.apply(init_weights)
        self.critic_linear = nn.Linear(256, 1)
        self.actor_linear = nn.Linear(256, self.action_space)

    def forward(self, inputs):
        f1 = F.elu(self.fc1(inputs))
        f2 = F.elu(self.fc2(f1))
        # x = torch.tanh(self.fc3(x))
        critic = self.critic_linear(f2)
        actor = self.actor_linear(f2)
        return  critic, actor
    def get_state(self,inputs):
        inputs,(hx,cx) = inputs
        st, (hx,cx) = self.lstm(inputs,(hx,cx))
        return st, (hx,cx)

#code for training part

ind = np.random.randint(0, len(, size=int(0.2*len(
                    for i in ind:
                        state, adv, reward, action, old_log_prob, value =[i]
                        V, act_val = model(Variable(state))
                        prob = F.softmax(act_val - max(act_val), dim = -1)
                        log_prob = torch.log(prob).reshape(-1,)
                        entropy = -(log_prob * prob).sum(1, keepdim=True)
                        action_log_prob = log_prob.gather(0, Variable(action))
                        ratio = torch.exp(action_log_prob - old_log_prob)
                        surr1 = ratio * adv
                        surr2 = torch.clamp(ratio, 1.0 - 0.2,1.0 + 0.2) * adv
                        actor_loss = -torch.min(surr1, surr2).mean()
                        value_loss = 0.5 * (reward - V).pow(2).mean()
                        entr_loss = 0.01 * entropy.mean()
                        actor_loss.mean().backward(retain_graph = True)
                        (0.5 * value_loss).mean().backward(retain_graph = True)

                        torch.nn.utils.clip_grad_norm_(model.parameters(), 40)

Full traceback:

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [256, 3]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

[W ..\torch\csrc\autograd\python_anomaly_mode.cpp:60] Warning: Error detected in AddmmBackward. Traceback of forward call that caused the error:
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\console\", line 23, in <module>
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\console\", line 332, in main
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\", line 612, in start
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\platform\", line 149, in start
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\", line 541, in run_forever
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\", line 1786, in _run_once
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\", line 88, in _run, *self._args)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 743, in _run_callback
    ret = callback()
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 787, in inner
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 748, in run
    yielded = self.gen.send(value)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\", line 545, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\", line 306, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\", line 2877, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\", line 2922, in _run_cell
    return runner(coro)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\", line 68, in _pseudo_sync_runner
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\", line 3146, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\", line 3337, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-a2ecfc8e1e15>", line 1, in <module>
    runfile('C:/Users/granthjain/Desktop/startup_code/get_data/', wdir='C:/Users/granthjain/Desktop/startup_code/get_data')
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\customize\", line 541, in runfile
  File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\customize\", line 440, in exec_code
    exec(compiled, ns_globals, ns_locals)
  File "C:\Users\granthjain\Desktop\get_data\", line 376, in <module>
    train(0, params, model, optimizer, ticker, sc, r)
  File "C:\Users\granthjain\Desktop\get_data\", line 128, in train
    value, action_values = model(St)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\granthjain\Desktop\get_data\", line 226, in forward
    actor = self.actor_linear(f2)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\", line 91, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\", line 1674, in linear
    ret = torch.addmm(bias, input, weight.t())
 (function print_stack)

The traceback is giving the DoubleTensor[256,3], actor_linear fully connected layer is is of size 256,3.

I am unable to figure it out what is wrong with the code.

Try to add .clone() operations to tensors to isolate the offending line of code.
I cannot find anything suspicious by skimming through your code.

PS: Variables are deprecated since PyTorch 0.4, so you can use tensors now :wink:


I tried to clone everything and removed Variable. I am still getting this error. I have encountered this error earlier also but there I was able to identify a variable that was updated inplace.

But I am unable to get which variable is getting updated inplace here.

Please help