I am implementing PPO RL algorithm in pytorch and getting above error.However I am unable to find why this error is comming.
Below is my code.
#main loop for generating data into replaybuffer.
while count<max_timesteps-1:
episode_length += 1
if done:
cx = Variable(torch.zeros(params.lstm_layers, 1, params.lstm_size))
hx = Variable(torch.zeros(params.lstm_layers, 1, params.lstm_size))
else:
cx = Variable(cx.data)
hx = Variable(hx.data)
values = []
log_probs = []
rewards = []
entropies = []
adv = []
st = []
rew = []
act = []
while count<max_timesteps-1:
St = (Variable(state.unsqueeze(0)))
st.append(St)
value, action_values = model(St)
prob = F.softmax(action_values - max(action_values), dim = -1)
log_prob = torch.log(prob).reshape(-1,)
entropy = -(log_prob * prob).sum(1, keepdim=True)
entropies.append(entropy)
m = categorical.Categorical(prob)
action = m.sample().reshape(-1,)
log_prob_a = log_prob.gather(0, Variable(action))
act.append(action)
state, reward, done = env.step(action)
reward = max(min(reward, 1), -1)
count +=1
if done:
episode_length = 0
state = env.reset()
values.append(value)
log_probs.append(log_prob_a)
rewards.append(reward)
print("rank ",rank," action:",action, "reward ",reward)
if done:
break
R = torch.zeros(1, 1)
if not done:
St = Variable(state.unsqueeze(0))
value, _ = model(Variable(St))
R = value.data
values.append(Variable(R))
R = Variable(R)
gae = torch.zeros(1, 1)
for i in reversed(range(len(rewards))):
R = params.gamma * R + rewards[i]
rew.insert(0,R)
# advantage = R - values[i]
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data
gae = gae * params.gamma * params.tau + TD
adv.insert(0,gae)
for i in reversed(range(len(rewards))):
transition = [st[i], adv[i], rew[i], act[i], log_probs[i], values[i]]
r.add(transition)
#ActorCritic Class
class ActorCritic(torch.nn.Module):
def __init__(self, params):
super(ActorCritic, self).__init__()
self.num_inputs = params.num_inputs
self.action_space = params.action_dim
self.hidden_size = params.hidden_size
num_inputs = params.num_inputs
self.lstm = nn.LSTM(num_inputs, 8,num_layers = params.lstm_layers)
self.fc1 = nn.Linear(8, 256)
self.fc1.apply(init_weights)
self.fc2 = nn.Linear(256, 256)
self.fc2.apply(init_weights)
# self.fc3 = nn.Linear(256, 256)
# self.fc3.apply(init_weights)
self.critic_linear = nn.Linear(256, 1)
self.critic_linear.apply(init_weights)
self.actor_linear = nn.Linear(256, self.action_space)
self.actor_linear.apply(init_weights)
self.train()
def forward(self, inputs):
f1 = F.elu(self.fc1(inputs))
f2 = F.elu(self.fc2(f1))
# x = torch.tanh(self.fc3(x))
critic = self.critic_linear(f2)
actor = self.actor_linear(f2)
return critic, actor
def get_state(self,inputs):
inputs,(hx,cx) = inputs
st, (hx,cx) = self.lstm(inputs,(hx,cx))
return st, (hx,cx)
#code for training part
ind = np.random.randint(0, len(r.storage), size=int(0.2*len(r.storage)))
for i in ind:
state, adv, reward, action, old_log_prob, value = r.storage[i]
V, act_val = model(Variable(state))
prob = F.softmax(act_val - max(act_val), dim = -1)
log_prob = torch.log(prob).reshape(-1,)
entropy = -(log_prob * prob).sum(1, keepdim=True)
action_log_prob = log_prob.gather(0, Variable(action))
ratio = torch.exp(action_log_prob - old_log_prob)
surr1 = ratio * adv
surr2 = torch.clamp(ratio, 1.0 - 0.2,1.0 + 0.2) * adv
actor_loss = -torch.min(surr1, surr2).mean()
value_loss = 0.5 * (reward - V).pow(2).mean()
entr_loss = 0.01 * entropy.mean()
optimizer.zero_grad()
actor_loss.mean().backward(retain_graph = True)
(0.5 * value_loss).mean().backward(retain_graph = True)
entr_loss.mean().backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
optimizer.step()
Full traceback:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [256, 3]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
[W ..\torch\csrc\autograd\python_anomaly_mode.cpp:60] Warning: Error detected in AddmmBackward. Traceback of forward call that caused the error:
File "C:\Users\granthjain\anaconda\envs\env_full\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\console\__main__.py", line 23, in <module>
start.main()
File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\console\start.py", line 332, in main
kernel.start()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
self.io_loop.start()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\platform\asyncio.py", line 149, in start
self.asyncio_loop.run_forever()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\base_events.py", line 541, in run_forever
self._run_once()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\base_events.py", line 1786, in _run_once
handle._run()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
ret = callback()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 787, in inner
self.run()
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 748, in run
yielded = self.gen.send(value)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\kernelbase.py", line 545, in execute_request
user_expressions, allow_stdin,
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\ipkernel.py", line 306, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 2877, in run_cell
raw_cell, store_history, silent, shell_futures)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 2922, in _run_cell
return runner(coro)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 3146, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 3337, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-a2ecfc8e1e15>", line 1, in <module>
runfile('C:/Users/granthjain/Desktop/startup_code/get_data/ppo_try.py', wdir='C:/Users/granthjain/Desktop/startup_code/get_data')
File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\customize\spydercustomize.py", line 541, in runfile
post_mortem=post_mortem)
File "C:\Users\granthjain\AppData\Roaming\Python\Python37\site-packages\spyder_kernels\customize\spydercustomize.py", line 440, in exec_code
exec(compiled, ns_globals, ns_locals)
File "C:\Users\granthjain\Desktop\get_data\ppo_try.py", line 376, in <module>
train(0, params, model, optimizer, ticker, sc, r)
File "C:\Users\granthjain\Desktop\get_data\ppo_try.py", line 128, in train
value, action_values = model(St)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\granthjain\Desktop\get_data\ppo_try.py", line 226, in forward
actor = self.actor_linear(f2)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\modules\linear.py", line 91, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\granthjain\anaconda\envs\env_full\lib\site-packages\torch\nn\functional.py", line 1674, in linear
ret = torch.addmm(bias, input, weight.t())
(function print_stack)
The traceback is giving the DoubleTensor[256,3], actor_linear fully connected layer is is of size 256,3.
I am unable to figure it out what is wrong with the code.