I am trying to train the TQC reinforcement learning model with PyTorch. In the training loop I get the following error:
And I got the following error:
RuntimeError Traceback (most recent call last)
<ipython-input-18-3f97736972df> in <cell line: 4>()
20 # Train agent after collecting sufficient data
21 if t >= batch_size:
---> 22 trainer.train(replay_buffer, batch_size)
23
24 if done or t==(max_timesteps-1):
3 frames
/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py in _engine_run_backward(t_outputs, *args, **kwargs)
742 unregister_hooks = _register_logging_hooks_on_whole_graph(t_outputs)
743 try:
--> 744 return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
745 t_outputs, *args, **kwargs
746 ) # Calls into the C++ engine to run the backward pass
**RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [512, 25]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!**
I tried some advice found online to debug, like checking for inplace operations, adding .clone() where it seemed appropriate, and torch.autograd.set_detect_anomaly(True). I still can’t fix the error, and I’m not sure how to continue.
The entire traceback output for this error is as follows:
/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:744: UserWarning: Error detected in AddmmBackward0. Traceback of forward call that caused the error:
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
ColabKernelApp.launch_instance()
File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
app.start()
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
self.io_loop.start()
File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback
ret = callback()
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner
self.ctx_run(self.run)
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 377, in dispatch_queue
yield self.process_one()
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 250, in wrapper
runner = Runner(ctx_run, result, future, yielded)
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 748, in __init__
self.ctx_run(self.run)
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one
yield gen.maybe_future(dispatch(*args))
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request
self.do_execute(
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
result = self._run_cell(
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
coro.send(None)
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-18-3f97736972df>", line 22, in <cell line: 4>
trainer.train(replay_buffer, batch_size)
File "<ipython-input-13-1b71b7a07e51>", line 38, in train
actor_loss = (alpha * log_pi - self.critic(state, new_action).mean(2).mean(1, keepdim=True)).mean()
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "<ipython-input-11-96234079d43e>", line 101, in forward
quantiles = torch.stack(tuple(net(sa) for net in self.nets), dim=1)
File "<ipython-input-11-96234079d43e>", line 101, in <genexpr>
quantiles = torch.stack(tuple(net(sa) for net in self.nets), dim=1)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "<ipython-input-11-96234079d43e>", line 57, in forward
output = self.last_fc(h)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
(Triggered internally at ../torch/csrc/autograd/python_anomaly_mode.cpp:111.)
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
which seems to indicate that the error originates here:
torch.autograd.set_detect_anomaly(True)
import numpy as np
import torch
from torch.nn import Module, Linear
from torch.distributions import Distribution, Normal
from torch.nn.functional import relu, logsigmoid
from gym import spaces
import gym
LOG_STD_MIN_MAX = (-20, 2)
# Rescale action
class RescaleAction(gym.ActionWrapper):
def __init__(self, env, a, b):
assert isinstance(env.action_space, spaces.Box), (
"expected Box action space, got {}".format(type(env.action_space)))
assert np.less_equal(a, b).all(), (a, b)
super(RescaleAction, self).__init__(env)
self.a = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + a
self.b = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + b
self.action_space = spaces.Box(low=a, high=b, shape=env.action_space.shape, dtype=env.action_space.dtype)
def action(self, action):
assert np.all(np.greater_equal(action, self.a)), (action, self.a)
assert np.all(np.less_equal(action, self.b)), (action, self.b)
low = self.env.action_space.low
high = self.env.action_space.high
action = low + (high - low)*((action - self.a)/(self.b - self.a))
action = np.clip(action, low, high)
return action
# Mlp
class Mlp(Module):
def __init__(
self,
input_size,
hidden_sizes,
output_size
):
super().__init__()
# TODO: initialization
self.fcs = []
in_size = input_size
for i, next_size in enumerate(hidden_sizes):
fc = Linear(in_size, next_size)
self.add_module(f'fc{i}', fc)
self.fcs.append(fc)
in_size = next_size
self.last_fc = Linear(in_size, output_size)
def forward(self, input):
h = input
for fc in self.fcs:
h = relu(fc(h).clone())
output = self.last_fc(h)
return output
# Replay Buffer features:
class ReplayBuffer(object):
def __init__(self, state_dim, action_dim, max_size=int(1e6)):
self.max_size = max_size
self.ptr = 0
self.size = 0
self.transition_names = ('state', 'action', 'next_state', 'reward', 'not_done')
sizes = (state_dim, action_dim, state_dim, 1, 1)
for name, size in zip(self.transition_names, sizes):
setattr(self, name, np.empty((max_size, size)))
def add(self, state, action, next_state, reward, done):
values = (state, action, next_state, reward, 1. - done)
for name, value in zip(self.transition_names, values):
getattr(self, name)[self.ptr] = value
self.ptr = (self.ptr + 1) % self.max_size
self.size = min(self.size + 1, self.max_size)
def sample(self, batch_size):
ind = np.random.randint(0, self.size, size=batch_size)
names = self.transition_names
return (torch.FloatTensor(getattr(self, name)[ind]).to(device) for name in names)
# Critic features:
class Critic(Module):
def __init__(self, state_dim, action_dim, n_quantiles, n_nets):
super().__init__()
self.nets = []
self.n_quantiles = n_quantiles # Quantiles for truncated quantile mechanism
self.n_nets = n_nets # What is meant by nets?
for i in range(n_nets):
net = Mlp(state_dim + action_dim, [512, 512, 512], n_quantiles) # Mlp comes up
self.add_module(f'qf{i}', net)
self.nets.append(net)
def forward(self, state, action):
sa = torch.cat((state, action), dim=1)
quantiles = torch.stack(tuple(net(sa) for net in self.nets), dim=1)
return quantiles
# Actor features:
class Actor(Module):
def __init__(self, state_dim, action_dim):
super().__init__()
self.action_dim = action_dim
self.net = Mlp(state_dim, [256, 256], 2 * action_dim)
def forward(self, obs):
mean, log_std = self.net(obs).split([self.action_dim, self.action_dim], dim=1)
log_std = log_std.clamp(*LOG_STD_MIN_MAX) # LOG_STD_MIN_MAX is a parameter here
if self.training:
std = torch.exp(log_std)
tanh_normal = TanhNormal(mean, std) # Uses tahn normal
action, pre_tanh = tanh_normal.rsample()
log_prob = tanh_normal.log_prob(pre_tanh)
log_prob = log_prob.sum(dim=1, keepdim=True)
else: # deterministic eval without log_prob computation
action = torch.tanh(mean)
log_prob = None
return action, log_prob
def select_action(self, obs):
obs = torch.FloatTensor(obs).to(device)[None, :]
action, _ = self.forward(obs)
action = action[0].cpu().detach().numpy()
return action
# Model uses tahn normal distributio. Why?
class TanhNormal(Distribution):
arg_constraints = {}
def __init__(self, normal_mean, normal_std):
super().__init__()
self.normal_mean = normal_mean
self.normal_std = normal_std
self.standard_normal = Normal(torch.zeros_like(self.normal_mean, device=device),
torch.ones_like(self.normal_std, device=device))
self.normal = Normal(normal_mean, normal_std)
def log_prob(self, pre_tanh):
log_det = 2 * np.log(2) + logsigmoid(2 * pre_tanh) + logsigmoid(-2 * pre_tanh)
result = self.normal.log_prob(pre_tanh) - log_det
return result
def rsample(self):
pretanh = self.normal_mean + self.normal_std * self.standard_normal.sample()
t = (torch.tanh(pretanh).clone())
return t, pretanh
type or paste code here
I would really appreciate some advice! Let me know if more code is needed to determine what is going on