My code is below:
class Policy_Network(nn.Module):
def __init__(self, state_input_dim, action_dim, lr):
super(Policy_Network, self).__init__()
self.first_layer = nn.Linear(state_input_dim, 256)
self.second_layer = nn.Linear(256, 64)
#self.third_layer = nn.Linear(256, 64)
self.final_layer = nn.Linear(64, action_dim)
self.optimizer = optim.Adam(self.parameters(), lr=lr)
autograd.set_detect_anomaly(True)
#self.eps = 0.00001
def forward(self, x):
x1 = np.array(x)
x2 = F.relu(self.first_layer(torch.tensor(x1, dtype = torch.float32)))
x3 = F.relu(self.second_layer(x2))
x4 = self.final_layer(x3)
out = F.softmax(x4, dim = 0).unsqueeze(dim=0)
return out
def select_action(self, probs):
m = Categorical(probs)
action = m.sample()
return action.item(), m.log_prob(action)
def update_PFA(self, eps_rewards, eps_log_probs, gamma):
R = 0
policy_loss = []
rewards = []
for r in eps_rewards[::-1]:
R = 1 * r + gamma * R
rewards.insert(0, R)
rewards = torch.tensor(rewards, dtype = torch.float)
if rewards.std() == 0:
return
if rewards.mean() != 0:
rewards = (rewards - rewards.mean()) / (rewards.std() )#+ self.eps
for log_prob, reward in zip(eps_log_probs, rewards):
#positive beacuse we want to decrease cost
#not policy_loss.append(-log_prob * reward)
policy_loss.append( log_prob * reward)
self.optimizer.zero_grad()
loss = torch.cat(policy_loss).sum()
loss.backward(retain_graph=True)
nn.utils.clip_grad_value_(self.parameters(), clip_value=1.0)
Why do I get the inplace error?
The backtrace is as follows:
/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/autograd/__init__.py:200: UserWarning: Error detected in MmBackward0. Traceback of forward call that caused the error:
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/console/__main__.py", line 24, in <module>
start.main()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/console/start.py", line 330, in main
kernel.start()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 736, in start
self.io_loop.start()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
self.asyncio_loop.run_forever()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
self._run_once()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
handle._run()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue
await self.process_one()
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 505, in process_one
await dispatch(*args)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell
await result
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 740, in execute_request
reply_content = await reply_content
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
res = shell.run_cell(
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 546, in run_cell
return super().run_cell(*args, **kwargs)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
result = self._run_cell(
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
result = runner(coro)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
coro.send(None)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/var/folders/pm/p4cmsynd4sq4rg7h5_2nx6rh0000gn/T/ipykernel_61767/3283248021.py", line 1, in <module>
runfile('/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learn.py', wdir='/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN')
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/customize/spydercustomize.py", line 528, in runfile
return _exec_file(
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/customize/spydercustomize.py", line 615, in _exec_file
exec_code(file_code, filename, ns_globals, ns_locals,
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/customize/spydercustomize.py", line 473, in exec_code
exec_fun(compile(ast_code, filename, 'exec'), ns_globals, ns_locals)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/spyder_kernels/py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learn.py", line 153, in <module>
run_func()
File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learn.py", line 86, in run_func
call = d.get_state_action(state)
File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learning_modules.py", line 250, in get_PFA_action
probs = self.model(curr_state)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/Users/prakashgawas/MyFiles/Merinio/Code/Simulation/Learn/DQN/Learning_modules.py", line 94, in forward
x4 = self.final_layer(x3)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/Users/prakashgawas/anaconda3/envs/MYENV/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
(Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:119.)