Inplace operation Runtime Error

I am trying to train the TQC reinforcement learning model with PyTorch. In the training loop I get the following error:
And I got the following error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-18-3f97736972df> in <cell line: 4>()
     20         # Train agent after collecting sufficient data
     21         if t >= batch_size:
---> 22             trainer.train(replay_buffer, batch_size)
     23 
     24         if done or t==(max_timesteps-1):

3 frames
/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py in _engine_run_backward(t_outputs, *args, **kwargs)
    742         unregister_hooks = _register_logging_hooks_on_whole_graph(t_outputs)
    743     try:
--> 744         return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    745             t_outputs, *args, **kwargs
    746         )  # Calls into the C++ engine to run the backward pass

**RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [512, 25]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!**

I tried some advice found online to debug, like checking for inplace operations, adding .clone() where it seemed appropriate, and torch.autograd.set_detect_anomaly(True). I still can’t fix the error, and I’m not sure how to continue.

The entire traceback output for this error is as follows:

/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:744: UserWarning: Error detected in AddmmBackward0. Traceback of forward call that caused the error:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()
  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()
  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner
    self.ctx_run(self.run)
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 377, in dispatch_queue
    yield self.process_one()
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 250, in wrapper
    runner = Runner(ctx_run, result, future, yielded)
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 748, in __init__
    self.ctx_run(self.run)
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request
    self.do_execute(
  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
    result = self._run_cell(
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
    return runner(coro)
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-18-3f97736972df>", line 22, in <cell line: 4>
    trainer.train(replay_buffer, batch_size)
  File "<ipython-input-13-1b71b7a07e51>", line 38, in train
    actor_loss = (alpha * log_pi - self.critic(state, new_action).mean(2).mean(1, keepdim=True)).mean()
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "<ipython-input-11-96234079d43e>", line 101, in forward
    quantiles = torch.stack(tuple(net(sa) for net in self.nets), dim=1)
  File "<ipython-input-11-96234079d43e>", line 101, in <genexpr>
    quantiles = torch.stack(tuple(net(sa) for net in self.nets), dim=1)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "<ipython-input-11-96234079d43e>", line 57, in forward
    output = self.last_fc(h)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py", line 116, in forward
    return F.linear(input, self.weight, self.bias)
 (Triggered internally at ../torch/csrc/autograd/python_anomaly_mode.cpp:111.)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass

which seems to indicate that the error originates here:

torch.autograd.set_detect_anomaly(True)

import numpy as np
import torch
from torch.nn import Module, Linear
from torch.distributions import Distribution, Normal
from torch.nn.functional import relu, logsigmoid
from gym import spaces
import gym


LOG_STD_MIN_MAX = (-20, 2)


# Rescale action
class RescaleAction(gym.ActionWrapper):
    def __init__(self, env, a, b):
        assert isinstance(env.action_space, spaces.Box), (
            "expected Box action space, got {}".format(type(env.action_space)))
        assert np.less_equal(a, b).all(), (a, b)
        super(RescaleAction, self).__init__(env)
        self.a = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + a
        self.b = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + b
        self.action_space = spaces.Box(low=a, high=b, shape=env.action_space.shape, dtype=env.action_space.dtype)

    def action(self, action):
        assert np.all(np.greater_equal(action, self.a)), (action, self.a)
        assert np.all(np.less_equal(action, self.b)), (action, self.b)
        low = self.env.action_space.low
        high = self.env.action_space.high
        action = low + (high - low)*((action - self.a)/(self.b - self.a))
        action = np.clip(action, low, high)
        return action


# Mlp
class Mlp(Module):
    def __init__(
            self,
            input_size,
            hidden_sizes,
            output_size
    ):
        super().__init__()
        # TODO: initialization
        self.fcs = []
        in_size = input_size
        for i, next_size in enumerate(hidden_sizes):
            fc = Linear(in_size, next_size)
            self.add_module(f'fc{i}', fc)
            self.fcs.append(fc)
            in_size = next_size
        self.last_fc = Linear(in_size, output_size)

    def forward(self, input):
        h = input
        for fc in self.fcs:
            h = relu(fc(h).clone())
        output = self.last_fc(h)
        return output


# Replay Buffer features:
class ReplayBuffer(object):
    def __init__(self, state_dim, action_dim, max_size=int(1e6)):
        self.max_size = max_size
        self.ptr = 0
        self.size = 0

        self.transition_names = ('state', 'action', 'next_state', 'reward', 'not_done')
        sizes = (state_dim, action_dim, state_dim, 1, 1)
        for name, size in zip(self.transition_names, sizes):
            setattr(self, name, np.empty((max_size, size)))

    def add(self, state, action, next_state, reward, done):
        values = (state, action, next_state, reward, 1. - done)
        for name, value in zip(self.transition_names, values):
            getattr(self, name)[self.ptr] = value

        self.ptr = (self.ptr + 1) % self.max_size
        self.size = min(self.size + 1, self.max_size)

    def sample(self, batch_size):
        ind = np.random.randint(0, self.size, size=batch_size)
        names = self.transition_names
        return (torch.FloatTensor(getattr(self, name)[ind]).to(device) for name in names)


# Critic features:
class Critic(Module):
    def __init__(self, state_dim, action_dim, n_quantiles, n_nets):
        super().__init__()
        self.nets = []
        self.n_quantiles = n_quantiles # Quantiles for truncated quantile mechanism
        self.n_nets = n_nets # What is meant by nets?
        for i in range(n_nets):
            net = Mlp(state_dim + action_dim, [512, 512, 512], n_quantiles) # Mlp comes up
            self.add_module(f'qf{i}', net)
            self.nets.append(net)

    def forward(self, state, action):
        sa = torch.cat((state, action), dim=1)
        quantiles = torch.stack(tuple(net(sa) for net in self.nets), dim=1)
        return quantiles


# Actor features:
class Actor(Module):
    def __init__(self, state_dim, action_dim):
        super().__init__()
        self.action_dim = action_dim
        self.net = Mlp(state_dim, [256, 256], 2 * action_dim)

    def forward(self, obs):
        mean, log_std = self.net(obs).split([self.action_dim, self.action_dim], dim=1)
        log_std = log_std.clamp(*LOG_STD_MIN_MAX) # LOG_STD_MIN_MAX is a parameter here

        if self.training:
            std = torch.exp(log_std)
            tanh_normal = TanhNormal(mean, std) # Uses tahn normal
            action, pre_tanh = tanh_normal.rsample()
            log_prob = tanh_normal.log_prob(pre_tanh)
            log_prob = log_prob.sum(dim=1, keepdim=True)
        else:  # deterministic eval without log_prob computation
            action = torch.tanh(mean)
            log_prob = None
        return action, log_prob

    def select_action(self, obs):
        obs = torch.FloatTensor(obs).to(device)[None, :]
        action, _ = self.forward(obs)
        action = action[0].cpu().detach().numpy()
        return action


# Model uses tahn normal distributio. Why?
class TanhNormal(Distribution):
    arg_constraints = {}

    def __init__(self, normal_mean, normal_std):
        super().__init__()
        self.normal_mean = normal_mean
        self.normal_std = normal_std
        self.standard_normal = Normal(torch.zeros_like(self.normal_mean, device=device),
                                      torch.ones_like(self.normal_std, device=device))
        self.normal = Normal(normal_mean, normal_std)

    def log_prob(self, pre_tanh):
        log_det = 2 * np.log(2) + logsigmoid(2 * pre_tanh) + logsigmoid(-2 * pre_tanh)
        result = self.normal.log_prob(pre_tanh) - log_det
        return result

    def rsample(self):
        pretanh = self.normal_mean + self.normal_std * self.standard_normal.sample()
        t = (torch.tanh(pretanh).clone())
        return t, pretanh
type or paste code here

I would really appreciate some advice! Let me know if more code is needed to determine what is going on

Hi Elizabeth!

I don’t really understand actor-critic training, but it seems that such models
are hotbeds for inplace-modification errors. You might start by looking at
this discussion of some of the ways such errors can arise

Do you call .backward (retain_graph = True) anywhere? Doing so is
sometimes (usually?) incorrect and can lead to inplace-modification errors.

Note that the shape of the problem tensor, [512, 25], can be a useful
piece of information, see below.

Based on the forward-call traceback and the shape reported for the problem
tensor, it looks like last_fc of one of your Critics – a tensor presumably
being optimized – is the cause of your problem.

One possibility is that you are doing something like:

loss.backward (retain_graph = True)   # leaves Critic.last_fc in what will become a stale computation graph
...
opt.step()                            # counts as an inplace modification
...
loss.backward (...)                   # backpropagates through the stale graph and hits the modified Critic.last_fc

In any event, you can find various methods for finding inplace-modification errors in the following post (which happens to be about an actor-critic model):

Good luck!

K. Frank

EDIT: Hey!! I fixed the error by cloning the weights of the Linear layers before using them in the forward pass of the Mlp function, like you advised in the discussion. Thank you so much!

Hi Frank!

Thank you for your reply. I read that discussion but didn’t get any further. I’m double checking the code for the Trainer class as maybe the error originates from there.

I’ll post the Trainer class here in case that helps:

    class Trainer:
      def __init__(self, actor, critic, critic_target, discount, tau, top_quantiles_to_drop, target_entropy):
          self.actor = actor
          self.critic = critic
          self.critic_target = critic_target
          self.log_alpha = torch.zeros((1,), requires_grad=True, device=device)
          self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=3e-4)
          self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=3e-4)
          self.alpha_optimizer = optim.Adam([self.log_alpha], lr=3e-4)
          self.discount = discount
          self.tau = tau
          self.top_quantiles_to_drop = top_quantiles_to_drop
          self.target_entropy = target_entropy
          self.quantiles_total = critic.n_quantiles * critic.n_nets
          self.total_it = 0

      def train(self, replay_buffer, batch_size=256):
          state, action, next_state, reward, not_done = replay_buffer.sample(batch_size)
          alpha = torch.exp(self.log_alpha)

          with torch.no_grad():
              new_next_action, next_log_pi = self.actor(next_state)
              next_z = self.critic_target(next_state, new_next_action)
              sorted_z, _ = torch.sort(next_z.reshape(batch_size, -1))
              sorted_z_part = sorted_z[:, :self.quantiles_total - self.top_quantiles_to_drop]
              target = reward + not_done * self.discount * (sorted_z_part - alpha * next_log_pi)

          cur_z = self.critic(state, action)
          critic_loss = quantile_huber_loss_f(cur_z, target)

          new_action, log_pi = self.actor(state)
          alpha_loss = -self.log_alpha * (log_pi + self.target_entropy).detach().mean()
          actor_loss = (alpha * log_pi - self.critic(state, new_action).mean(2).mean(1, keepdim=True)).mean()

          self.critic_optimizer.zero_grad()
          critic_loss.backward()
          self.critic_optimizer.step()

          with torch.no_grad():
              for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
                  target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)

          self.actor_optimizer.zero_grad()
          actor_loss.backward()
          self.actor_optimizer.step()

          self.alpha_optimizer.zero_grad()
          alpha_loss.backward()
          self.alpha_optimizer.step()

          self.total_it = 1 + self.total_it