One of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 22]]

Rabee_Qasem · August 1, 2021, 8:15am

I’m getting this error but i don’t understand why it has a problem tried using set_detect_anomaly but still i can’t find the replace operation


class DQN(mp.Process):
  def __init__(self,episodes,eps_start,eps_end,gamma,min_replay_size,batch_size,Target_network,T_max,optimizer):
      super(DQN, self).__init__()
      self.episodes=episodes
      self.eps_start=eps_start
      self.eps_end=eps_end
      self.decay=episodes
      self.gamma=gamma
      self.min_replay_size=min_replay_size
      self.episode_reward=0
      self.rew_buffer=[0]
      self.batch_size=batch_size
      self.target_update_freq=1000
      self.device=T.device('cuda' if T.cuda.is_available() else 'cpu')
      self.T_max=T_max
      self.env=environment()
      self.action_list= self.env.action_list
      self.rb=replay_buffer(self.min_replay_size,self.action_list,self.env)
      num_actions=self.env.num_actions
      self.optimizer=optimizer

      #num_action*2=when we would like to convert the state into onehotvector we need to concatinate the two vector 22+22
      self.online=Network(num_actions*2,num_actions)
      self.target=Network(num_actions*2,num_actions)
      self.target.load_state_dict(self.online.state_dict())
      #self.optimizer=T.optim.Adam(self.online.parameters(),lr=1e-2)
      self.global_target=Target_network

      self.gamma_list=[]
      self.mean_reward=[]
      self.done_location=[]
      self.loss_list=[]
      self.number_of_episodes=[]
      self.stat_dict={'episodes':[],'epsilon':[],'explore_exploit':[],'time':[]}
  
  def plot(self):
    dec = {'number_of_episodes':self.number_of_episodes,'mean_reward':self.mean_reward,'gamma':self.gamma_list,'loss':self.loss_list,'explore_exploit':self.stat_dict['explore_exploit']}
    fig, ax =plt.subplots(1,3,figsize=(15,5))
    sns.lineplot(data=dec, x="number_of_episodes", y="mean_reward",ax=ax[0])
    sns.lineplot(data=dec, x="number_of_episodes", y="loss",ax=ax[1])
    sns.countplot(data=dec,x='explore_exploit', ax=ax[2])
    plt.show()

  def run(self):
    obs=self.env.reset()
    for i in range(self.episodes):
      
      itr=0
      epsilon=np.exp(-i/(self.episodes/2))
      rnd_sample=random.random()
      self.stat_dict['episodes'].append(i)
      self.stat_dict['epsilon'].append(epsilon)

      if rnd_sample <=epsilon:
        #action=np.random.choice(self.action_list)
        action=self.env.get_random_neighbors(obs,rnd_sample)
        self.stat_dict['explore_exploit'].append('explore')

      else:
        source,end=self.env.state_dec(obs)
        v_obs=self.env.state_to_vector(source,end)
        t_obs=T.tensor([v_obs])
        action=self.online.act(t_obs)
        self.stat_dict['explore_exploit'].append('exploit')

      #fill transition and append to replay buffer

      
      new_obs,rew,done=self.env.step(obs,action)

      transition=(obs,action,rew,done,new_obs)

      #replay_buffer.append(transition)
      self.rb.update(transition)
      

      obs=new_obs
      self.episode_reward+=rew


      if done:
        obs=self.env.reset()
        self.rew_buffer.append(self.episode_reward)
        self.episode_reward=0.0
        self.done_location.append(i)


      #start gradient step
      transitions=random.sample(self.rb.replay_buffer,self.batch_size)

      obses=np.asarray([t[0] for t in transitions])
      actions=np.asarray([t[1] for t in transitions])
      rews=np.asarray([t[2] for t in transitions])
      dones=np.asarray([t[3] for t in transitions])
      new_obses=np.asarray([t[4] for t in transitions])


      obses_t=T.as_tensor(obses,dtype=T.float32).to(self.device)
      actions_t=T.as_tensor(actions,dtype=T.int64).to(self.device)
      rews_t=T.as_tensor(rews,dtype=T.float32).to(self.device)
      dones_t=T.as_tensor(dones,dtype=T.float32).to(self.device)
      new_obses_t=T.as_tensor(new_obses,dtype=T.float32).to(self.device)
      actions_t=actions_t.unsqueeze(-1)

      
      list_new_obses_t=T.tensor(self.env.list_of_vecotrs(new_obses_t)).to(self.device)
      target_q_values=self.target(list_new_obses_t)##
      #target_q_values=target(obses_t)


      max_target_q_values=target_q_values.max(dim=1,keepdim=False)[0]
      targets=rews_t+gamma*(1-dones_t)*max_target_q_values
      targets=targets.unsqueeze(-1)
      
      list_obses_t=T.tensor(self.env.list_of_vecotrs(obses_t)).to(self.device)
      q_values=self.online(list_obses_t)
      #q_values=online(obses_t)
      action_q_values=T.gather(input=q_values,dim=1,index=actions_t)

      
      #warning UserWarning: Using a target size (torch.Size([24, 24])) that is different to the input size (torch.Size([24, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
      if i% self.T_max==0 or done:

        loss=nn.functional.mse_loss(action_q_values,targets)
        #loss=rmsle(action_q_values,targets)
        self.loss_list.append(loss.item())
        self.optimizer.zero_grad()
        for target_parm,global_param in zip(self.target.parameters(),
                                               self.global_target.parameters()):
            global_param._grad=target_parm.grad
        self.optimizer.step()
        self.target.load_state_dict(self.global_target.state_dict())

        
        loss.backward()
        #self.optimizer.step()

      #plot
      self.mean_reward.append(np.mean(self.rew_buffer))
      self.number_of_episodes.append(i)
      self.gamma_list.append(self.gamma)
      dec = {'number_of_episodes':self.number_of_episodes,'mean_reward':self.mean_reward,'gamma':self.gamma_list}
      #clear_output(wait=True)
      #sns.lineplot(data=dec, x="number_of_episodes", y="mean_reward")
      #plt.show()

      

      if i % self.target_update_freq==0:
        self.target.load_state_dict(self.online.state_dict())
      if i % 1000 ==0:
        print('step',i,'avg rew',round(np.mean(self.rew_buffer),2))
        pass
    self.plot()



    def test(self):
      obs=self.env.reset()
      done=False
      sp=[obs]
      while not done:
        current,end=self.state_dec(obs)
        vector=self.env.state_to_vector(current,end)
        t_vector=T.tensor(vector).to(device)
        action=self.online.act(t_vector)
        new_obs,rw,done=self.env.step(obs,action)
        sp.append(new_obs)
        obs=new_obs
      return sp

it gives me this error

RuntimeError: one of the variables needed for gradient computation has been modified by an in place operation: [torch.FloatTensor [32, 22]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

this is the full trace

CPU count= 1
start
join
/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py:149: UserWarning:

Error detected in AddmmBackward. Traceback of forward call that caused the error:
  File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/local/lib/python3.7/dist-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 535, in <lambda>
    self.io_loop.add_callback(lambda: self._handle_events(self.socket, 0))
  File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 451, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 434, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-227-c9ef4c7bc98a>", line 22, in <module>
    [w.start() for w in workers]
  File "<ipython-input-227-c9ef4c7bc98a>", line 22, in <listcomp>
    [w.start() for w in workers]
  File "/usr/lib/python3.7/multiprocessing/process.py", line 112, in start
    self._popen = self._Popen(self)
  File "/usr/lib/python3.7/multiprocessing/context.py", line 223, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "/usr/lib/python3.7/multiprocessing/context.py", line 277, in _Popen
    return Popen(process_obj)
  File "/usr/lib/python3.7/multiprocessing/popen_fork.py", line 20, in __init__
    self._launch(process_obj)
  File "/usr/lib/python3.7/multiprocessing/popen_fork.py", line 74, in _launch
    code = process_obj._bootstrap()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "<ipython-input-226-350e591ef6b6>", line 110, in run
    target_q_values=self.target(list_new_obses_t)##
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "<ipython-input-221-9cf4c8531576>", line 17, in forward
    x=self.f4(x)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py", line 96, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py", line 1847, in linear
    return torch._C._nn.linear(input, weight, bias)
 (Triggered internally at  /pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:104.)

Process DQN-79:
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "<ipython-input-226-350e591ef6b6>", line 138, in run
    loss.backward()
  File "/usr/local/lib/python3.7/dist-packages/torch/_tensor.py", line 255, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py", line 149, in backward
    allow_unreachable=True, accumulate_grad=True)  # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 22]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck

tom · August 1, 2021, 12:50pm

You seem to have an inplace operation that affects either the inputs or the outputs of the f4 layer in your target module. From the code snippet you shared, it is not clear to me which.

Best regards

Thomas