Hello everyone,
below what I get at a certain episode using REINFORCE algorithm with Pytorch:
C:\Users\fusco\Anaconda3\lib\site-packages\torch\autograd_init_.py:154: UserWarning: Error detected in SoftmaxBackward0. Traceback of forward call that caused the error:
File “C:\Users\fusco\Anaconda3\lib\runpy.py”, line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File “C:\Users\fusco\Anaconda3\lib\runpy.py”, line 87, in run_code
exec(code, run_globals)
File "C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\console_main.py", line 23, in
start.main()
File “C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\console\start.py”, line 328, in main
kernel.start()
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\kernelapp.py”, line 677, in start
self.io_loop.start()
File “C:\Users\fusco\Anaconda3\lib\site-packages\tornado\platform\asyncio.py”, line 199, in start
self.asyncio_loop.run_forever()
File “C:\Users\fusco\Anaconda3\lib\asyncio\base_events.py”, line 570, in run_forever
self._run_once()
File “C:\Users\fusco\Anaconda3\lib\asyncio\base_events.py”, line 1859, in _run_once
handle._run()
File “C:\Users\fusco\Anaconda3\lib\asyncio\events.py”, line 81, in _run
self._context.run(self._callback, *self._args)
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\kernelbase.py”, line 457, in dispatch_queue
await self.process_one()
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\kernelbase.py”, line 446, in process_one
await dispatch(*args)
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\kernelbase.py”, line 353, in dispatch_shell
await result
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\kernelbase.py”, line 648, in execute_request
reply_content = await reply_content
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\ipkernel.py”, line 353, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File “C:\Users\fusco\Anaconda3\lib\site-packages\ipykernel\zmqshell.py”, line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File “C:\Users\fusco\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 2901, in run_cell
result = self._run_cell(
File “C:\Users\fusco\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 2947, in _run_cell
return runner(coro)
File “C:\Users\fusco\Anaconda3\lib\site-packages\IPython\core\async_helpers.py”, line 68, in pseudo_sync_runner
coro.send(None)
File “C:\Users\fusco\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3172, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “C:\Users\fusco\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3364, in run_ast_nodes
if (await self.run_code(code, result, async=asy)):
File “C:\Users\fusco\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py”, line 3444, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “C:\Users\fusco\AppData\Local\Temp/ipykernel_17840/2349141103.py”, line 1, in
debugfile(‘C:/Users/fusco/Desktop/Ph.D/Reinforcement_Learning/Smart_Grid/REINFORCEMENT_LEARNING/SARL/Policy_Based_Methods/REINFORCE_BASELINE/Test_4/SARL_Main_REINFORCE_BASELINE_MRKI.py’, wdir=‘C:/Users/fusco/Desktop/Ph.D/Reinforcement_Learning/Smart_Grid/REINFORCEMENT_LEARNING/SARL/Policy_Based_Methods/REINFORCE_BASELINE/Test_4’)
File “C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py”, line 607, in debugfile
enter_debugger(
File “C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\customize\spyderpdb.py”, line 776, in enter_debugger
debugger.run(code)
File “C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\customize\spyderpdb.py”, line 717, in run
super(SpyderPdb, self).run(cmd, globals, locals)
File “C:\Users\fusco\Anaconda3\lib\bdb.py”, line 580, in run
exec(cmd, globals, locals)
File “”, line 1, in
File “C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py”, line 577, in runfile
exec_code(file_code, filename, ns_globals, ns_locals,
File “C:\Users\fusco\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py”, line 465, in exec_code
exec(compiled, ns_globals, ns_locals)
File “c:\users\fusco\desktop\ph.d\reinforcement_learning\smart_grid\reinforcement_learning\sarl\policy_based_methods\reinforce_baseline\test_4\sarl_main_reinforce_baseline_mrki.py”, line 184, in
Manager.run()
File “C:\Users\fusco\Desktop\Ph.D\Reinforcement_Learning\Smart_Grid\REINFORCEMENT_LEARNING\SARL\Policy_Based_Methods\REINFORCE_BASELINE\Test_4\SARL_System_Manager_REINFORCE_BASELINE_MRKI.py”, line 123, in run
policy_loss, value_loss = self.agent.train( state_list, action_list, reward_list, episode )
File “C:\Users\fusco\Desktop\Ph.D\Reinforcement_Learning\Smart_Grid\REINFORCEMENT_LEARNING\SARL\Policy_Based_Methods\REINFORCE_BASELINE\Test_4\SARL_Agent_REINFORCE_BASELINE_MRKI.py”, line 223, in train
selected_action_probs = self.policy_network(state_t).gather(1, action_t.long())
File “C:\Users\fusco\Anaconda3\lib\site-packages\torch\nn\modules\module.py”, line 1102, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\fusco\Desktop\Ph.D\Reinforcement_Learning\Smart_Grid\REINFORCEMENT_LEARNING\SARL\Policy_Based_Methods\REINFORCE_BASELINE\Test_4\SARL_Agent_REINFORCE_BASELINE_MRKI.py”, line 52, in forward
out_data = torch.nn.functional.softmax(out_data, dim = -1) #-1 to take softmax of last dimension
File “C:\Users\fusco\Anaconda3\lib\site-packages\torch\nn\functional.py”, line 1680, in softmax
ret = input.softmax(dim)
(Triggered internally at …\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward(
It looks like there is any problem with the SOFTMAX when BACKWARD is executed.
Could anyone give me a hint?
Thanks in advance.
Regards.