I’m getting this error but i don’t understand why it has a problem tried using set_detect_anomaly
but still i can’t find the replace operation
class DQN(mp.Process):
def __init__(self,episodes,eps_start,eps_end,gamma,min_replay_size,batch_size,Target_network,T_max,optimizer):
super(DQN, self).__init__()
self.episodes=episodes
self.eps_start=eps_start
self.eps_end=eps_end
self.decay=episodes
self.gamma=gamma
self.min_replay_size=min_replay_size
self.episode_reward=0
self.rew_buffer=[0]
self.batch_size=batch_size
self.target_update_freq=1000
self.device=T.device('cuda' if T.cuda.is_available() else 'cpu')
self.T_max=T_max
self.env=environment()
self.action_list= self.env.action_list
self.rb=replay_buffer(self.min_replay_size,self.action_list,self.env)
num_actions=self.env.num_actions
self.optimizer=optimizer
#num_action*2=when we would like to convert the state into onehotvector we need to concatinate the two vector 22+22
self.online=Network(num_actions*2,num_actions)
self.target=Network(num_actions*2,num_actions)
self.target.load_state_dict(self.online.state_dict())
#self.optimizer=T.optim.Adam(self.online.parameters(),lr=1e-2)
self.global_target=Target_network
self.gamma_list=[]
self.mean_reward=[]
self.done_location=[]
self.loss_list=[]
self.number_of_episodes=[]
self.stat_dict={'episodes':[],'epsilon':[],'explore_exploit':[],'time':[]}
def plot(self):
dec = {'number_of_episodes':self.number_of_episodes,'mean_reward':self.mean_reward,'gamma':self.gamma_list,'loss':self.loss_list,'explore_exploit':self.stat_dict['explore_exploit']}
fig, ax =plt.subplots(1,3,figsize=(15,5))
sns.lineplot(data=dec, x="number_of_episodes", y="mean_reward",ax=ax[0])
sns.lineplot(data=dec, x="number_of_episodes", y="loss",ax=ax[1])
sns.countplot(data=dec,x='explore_exploit', ax=ax[2])
plt.show()
def run(self):
obs=self.env.reset()
for i in range(self.episodes):
itr=0
epsilon=np.exp(-i/(self.episodes/2))
rnd_sample=random.random()
self.stat_dict['episodes'].append(i)
self.stat_dict['epsilon'].append(epsilon)
if rnd_sample <=epsilon:
#action=np.random.choice(self.action_list)
action=self.env.get_random_neighbors(obs,rnd_sample)
self.stat_dict['explore_exploit'].append('explore')
else:
source,end=self.env.state_dec(obs)
v_obs=self.env.state_to_vector(source,end)
t_obs=T.tensor([v_obs])
action=self.online.act(t_obs)
self.stat_dict['explore_exploit'].append('exploit')
#fill transition and append to replay buffer
new_obs,rew,done=self.env.step(obs,action)
transition=(obs,action,rew,done,new_obs)
#replay_buffer.append(transition)
self.rb.update(transition)
obs=new_obs
self.episode_reward+=rew
if done:
obs=self.env.reset()
self.rew_buffer.append(self.episode_reward)
self.episode_reward=0.0
self.done_location.append(i)
#start gradient step
transitions=random.sample(self.rb.replay_buffer,self.batch_size)
obses=np.asarray([t[0] for t in transitions])
actions=np.asarray([t[1] for t in transitions])
rews=np.asarray([t[2] for t in transitions])
dones=np.asarray([t[3] for t in transitions])
new_obses=np.asarray([t[4] for t in transitions])
obses_t=T.as_tensor(obses,dtype=T.float32).to(self.device)
actions_t=T.as_tensor(actions,dtype=T.int64).to(self.device)
rews_t=T.as_tensor(rews,dtype=T.float32).to(self.device)
dones_t=T.as_tensor(dones,dtype=T.float32).to(self.device)
new_obses_t=T.as_tensor(new_obses,dtype=T.float32).to(self.device)
actions_t=actions_t.unsqueeze(-1)
list_new_obses_t=T.tensor(self.env.list_of_vecotrs(new_obses_t)).to(self.device)
target_q_values=self.target(list_new_obses_t)##
#target_q_values=target(obses_t)
max_target_q_values=target_q_values.max(dim=1,keepdim=False)[0]
targets=rews_t+gamma*(1-dones_t)*max_target_q_values
targets=targets.unsqueeze(-1)
list_obses_t=T.tensor(self.env.list_of_vecotrs(obses_t)).to(self.device)
q_values=self.online(list_obses_t)
#q_values=online(obses_t)
action_q_values=T.gather(input=q_values,dim=1,index=actions_t)
#warning UserWarning: Using a target size (torch.Size([24, 24])) that is different to the input size (torch.Size([24, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
if i% self.T_max==0 or done:
loss=nn.functional.mse_loss(action_q_values,targets)
#loss=rmsle(action_q_values,targets)
self.loss_list.append(loss.item())
self.optimizer.zero_grad()
for target_parm,global_param in zip(self.target.parameters(),
self.global_target.parameters()):
global_param._grad=target_parm.grad
self.optimizer.step()
self.target.load_state_dict(self.global_target.state_dict())
loss.backward()
#self.optimizer.step()
#plot
self.mean_reward.append(np.mean(self.rew_buffer))
self.number_of_episodes.append(i)
self.gamma_list.append(self.gamma)
dec = {'number_of_episodes':self.number_of_episodes,'mean_reward':self.mean_reward,'gamma':self.gamma_list}
#clear_output(wait=True)
#sns.lineplot(data=dec, x="number_of_episodes", y="mean_reward")
#plt.show()
if i % self.target_update_freq==0:
self.target.load_state_dict(self.online.state_dict())
if i % 1000 ==0:
print('step',i,'avg rew',round(np.mean(self.rew_buffer),2))
pass
self.plot()
def test(self):
obs=self.env.reset()
done=False
sp=[obs]
while not done:
current,end=self.state_dec(obs)
vector=self.env.state_to_vector(current,end)
t_vector=T.tensor(vector).to(device)
action=self.online.act(t_vector)
new_obs,rw,done=self.env.step(obs,action)
sp.append(new_obs)
obs=new_obs
return sp
it gives me this error
RuntimeError: one of the variables needed for gradient computation has been modified by an in place operation: [torch.FloatTensor [32, 22]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
this is the full trace
CPU count= 1
start
join
/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py:149: UserWarning:
Error detected in AddmmBackward. Traceback of forward call that caused the error:
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 845, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/ioloop.py", line 758, in _run_callback
ret = callback()
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 535, in <lambda>
self.io_loop.add_callback(lambda: self._handle_events(self.socket, 0))
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 451, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 434, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-227-c9ef4c7bc98a>", line 22, in <module>
[w.start() for w in workers]
File "<ipython-input-227-c9ef4c7bc98a>", line 22, in <listcomp>
[w.start() for w in workers]
File "/usr/lib/python3.7/multiprocessing/process.py", line 112, in start
self._popen = self._Popen(self)
File "/usr/lib/python3.7/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/usr/lib/python3.7/multiprocessing/context.py", line 277, in _Popen
return Popen(process_obj)
File "/usr/lib/python3.7/multiprocessing/popen_fork.py", line 20, in __init__
self._launch(process_obj)
File "/usr/lib/python3.7/multiprocessing/popen_fork.py", line 74, in _launch
code = process_obj._bootstrap()
File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "<ipython-input-226-350e591ef6b6>", line 110, in run
target_q_values=self.target(list_new_obses_t)##
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "<ipython-input-221-9cf4c8531576>", line 17, in forward
x=self.f4(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py", line 96, in forward
return F.linear(input, self.weight, self.bias)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py", line 1847, in linear
return torch._C._nn.linear(input, weight, bias)
(Triggered internally at /pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
Process DQN-79:
Traceback (most recent call last):
File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "<ipython-input-226-350e591ef6b6>", line 138, in run
loss.backward()
File "/usr/local/lib/python3.7/dist-packages/torch/_tensor.py", line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py", line 149, in backward
allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 22]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck