Hello,
I am building a DDPG model with an RNN layer, but I get the following issue.
[W …\torch\csrc\autograd\python_anomaly_mode.cpp:104] Warning: Error detected in AddmmBackward. Traceback of forward call that caused the error:
File “train.py”, line 154, in
value_loss, policy_loss = agent.update_params(batch)
File “DDPG - RNN\ddpg.py”, line 113, in update_params
state_action_batch = self.critic(state_batch, action_batch)
File “venv\lib\site-packages\torch\nn\modules\module.py”, line 727, in _call_impl
result = self.forward(*input, **kwargs)
File “DDPG - RNN\utils\nets.py”, line 127, in forward
self.hx = self.rnn(x, self.hx)
File “venv\lib\site-packages\torch\nn\modules\module.py”, line 727, in _call_impl
result = self.forward(*input, **kwargs)
File “venv\lib\site-packages\torch\nn\modules\rnn.py”, line 885, in forward
self.bias_ih, self.bias_hh,
(function print_stack)
Traceback (most recent call last):
File “train.py”, line 154, in
value_loss, policy_loss = agent.update_params(batch)
File “DDPG - RNN\ddpg.py”, line 122, in update_params
policy_loss.backward()
File “venv\lib\site-packages\torch\tensor.py”, line 221, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "venv\lib\site-packages\torch\autograd_init.py", line 132, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [300, 300]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
import gc
import logging
import os
import torch
import torch.nn.functional as F
from torch.optim import Adam
from utils.nets import Actor, Critic
logger = logging.getLogger('ddpg')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())
torch.autograd.set_detect_anomaly(True)
# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def soft_update(target, source, tau):
for target_param, param in zip(target.parameters(), source.parameters()):
target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
def hard_update(target, source):
for target_param, param in zip(target.parameters(), source.parameters()):
target_param.data.copy_(param.data)
class DDPG(object):
def __init__(self, gamma, tau, hidden_size, num_inputs, action_space, checkpoint_dir=None):
self.gamma = gamma
self.tau = tau
self.action_space = action_space
# Define the actor
self.actor = Actor(hidden_size, num_inputs, self.action_space).to(device)
self.actor_target = Actor(hidden_size, num_inputs, self.action_space).to(device)
# Define the critic
self.critic = Critic(hidden_size, num_inputs, self.action_space).to(device)
self.critic_target = Critic(hidden_size, num_inputs, self.action_space).to(device)
# Define the optimizers for both networks
self.actor_optimizer = Adam(self.actor.parameters(),
lr=1e-4) # optimizer for the actor network
self.critic_optimizer = Adam(self.critic.parameters(),
lr=1e-3,
weight_decay=1e-2
) # optimizer for the critic network
# Make sure both targets are with the same weight
hard_update(self.actor_target, self.actor)
hard_update(self.critic_target, self.critic)
# Set the directory to save the models
if checkpoint_dir is None:
self.checkpoint_dir = "./saved_models/"
else:
self.checkpoint_dir = checkpoint_dir
os.makedirs(self.checkpoint_dir, exist_ok=True)
logger.info('Saving all checkpoints to {}'.format(self.checkpoint_dir))
def calc_action(self, state, action_noise=None):
x = state.to(device)
# Get the continous action value to perform in the env
self.actor.eval() # Sets the actor in evaluation mode
mu = self.actor(x)
self.actor.train() # Sets the actor in training mode
mu = mu.data
# During training we add noise for exploration
if action_noise is not None:
noise = torch.Tensor(action_noise.noise()).to(device)
mu += noise
# Clip the output according to the action space of the env
mu = mu.clamp(self.action_space.low[0], self.action_space.high[0])
return mu
def update_params(self, batch):
"""
Updates the parameters/networks of the agent according to the given batch.
This means we ...
1. Compute the targets
2. Update the Q-function/critic by one step of gradient descent
3. Update the policy/actor by one step of gradient ascent
4. Update the target networks through a soft update
Arguments:
batch: Batch to perform the training of the parameters
"""
# Get tensors from the batch
state_batch = torch.cat(batch.state).to(device)
action_batch = torch.cat(batch.action).to(device)
reward_batch = torch.cat(batch.reward).to(device)
done_batch = torch.cat(batch.done).to(device)
next_state_batch = torch.cat(batch.next_state).to(device)
# Get the actions and the state values to compute the targets
next_action_batch = self.actor_target(next_state_batch)
next_state_action_values = self.critic_target(next_state_batch, next_action_batch.detach())
# Compute the target
reward_batch = reward_batch.unsqueeze(1)
done_batch = done_batch.unsqueeze(1)
expected_values = reward_batch + (1.0 - done_batch) * self.gamma * next_state_action_values
# Update the critic network
self.critic_optimizer.zero_grad()
state_action_batch = self.critic(state_batch, action_batch)
value_loss = F.mse_loss(state_action_batch, expected_values.detach())
value_loss.backward(retain_graph=True)
self.critic_optimizer.step()
# Update the actor network
self.actor_optimizer.zero_grad()
policy_loss = -self.critic(state_batch, self.actor(state_batch))
policy_loss = policy_loss.mean()
policy_loss.backward()
self.actor_optimizer.step()
# Update the target networks
soft_update(self.actor_target, self.actor, self.tau)
soft_update(self.critic_target, self.critic, self.tau)
return value_loss.item(), policy_loss.item()
def save_checkpoint(self, last_timestep, replay_buffer):
"""
Saving the networks and all parameters to a file in 'checkpoint_dir'
Arguments:
last_timestep: Last timestep in training before saving
replay_buffer: Current replay buffer
"""
checkpoint_name = self.checkpoint_dir + '/ep_{}.pth.tar'.format(last_timestep)
logger.info('Saving checkpoint...')
checkpoint = {
'last_timestep': last_timestep,
'actor': self.actor.state_dict(),
'critic': self.critic.state_dict(),
'actor_target': self.actor_target.state_dict(),
'critic_target': self.critic_target.state_dict(),
'actor_optimizer': self.actor_optimizer.state_dict(),
'critic_optimizer': self.critic_optimizer.state_dict(),
'replay_buffer': replay_buffer,
}
logger.info('Saving model at timestep {}...'.format(last_timestep))
torch.save(checkpoint, checkpoint_name)
gc.collect()
logger.info('Saved model at timestep {} to {}'.format(last_timestep, self.checkpoint_dir))
def get_path_of_latest_file(self):
"""
Returns the latest created file in 'checkpoint_dir'
"""
files = [file for file in os.listdir(self.checkpoint_dir) if (file.endswith(".pt") or file.endswith(".tar"))]
filepaths = [os.path.join(self.checkpoint_dir, file) for file in files]
last_file = max(filepaths, key=os.path.getctime)
return os.path.abspath(last_file)
def load_checkpoint(self, checkpoint_path=None):
"""
Saving the networks and all parameters from a given path. If the given path is None
then the latest saved file in 'checkpoint_dir' will be used.
Arguments:
checkpoint_path: File to load the model from
"""
if checkpoint_path is None:
checkpoint_path = self.get_path_of_latest_file()
if os.path.isfile(checkpoint_path):
logger.info("Loading checkpoint...({})".format(checkpoint_path))
key = 'cuda' if torch.cuda.is_available() else 'cpu'
checkpoint = torch.load(checkpoint_path, map_location=key)
start_timestep = checkpoint['last_timestep'] + 1
self.actor.load_state_dict(checkpoint['actor'])
self.critic.load_state_dict(checkpoint['critic'])
self.actor_target.load_state_dict(checkpoint['actor_target'])
self.critic_target.load_state_dict(checkpoint['critic_target'])
self.actor_optimizer.load_state_dict(checkpoint['actor_optimizer'])
self.critic_optimizer.load_state_dict(checkpoint['critic_optimizer'])
replay_buffer = checkpoint['replay_buffer']
gc.collect()
logger.info('Loaded model at timestep {} from {}'.format(start_timestep, checkpoint_path))
return start_timestep, replay_buffer
else:
raise OSError('Checkpoint not found')
def set_eval(self):
"""
Sets the model in evaluation mode
"""
self.actor.eval()
self.critic.eval()
self.actor_target.eval()
self.critic_target.eval()
def set_train(self):
"""
Sets the model in training mode
"""
self.actor.train()
self.critic.train()
self.actor_target.train()
self.critic_target.train()
def get_network(self, name):
if name == 'Actor':
return self.actor
elif name == 'Critic':
return self.critic
else:
raise NameError('name \'{}\' is not defined as a network'.format(name))
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHTS_FINAL_INIT = 3e-3
BIAS_FINAL_INIT = 3e-4
def fan_in_uniform_init(tensor, fan_in=None):
"""Utility function for initializing actor and critic"""
if fan_in is None:
fan_in = tensor.size(-1)
w = 1. / np.sqrt(fan_in)
nn.init.uniform_(tensor, -w, w)
class Actor(nn.Module):
def __init__(self, hidden_size, num_inputs, action_space):
super(Actor, self).__init__()
self.action_space = action_space
num_outputs = action_space.shape[0]
self.hidden_size = hidden_size
self.hx = torch.randn(1, hidden_size[1]).to(device)
# Layer 1
self.linear1 = nn.Linear(num_inputs, hidden_size[0])
self.ln1 = nn.LayerNorm(hidden_size[0])
# Layer 2
self.linear2 = nn.Linear(hidden_size[0], hidden_size[1])
self.ln2 = nn.LayerNorm(hidden_size[1])
# Recurrent Layer (replaces relu since it has it build in)
self.rnn = nn.RNNCell(hidden_size[1], hidden_size[1])
# Output Layer
self.mu = nn.Linear(hidden_size[1], num_outputs)
# Weight Init
fan_in_uniform_init(self.linear1.weight)
fan_in_uniform_init(self.linear1.bias)
fan_in_uniform_init(self.linear2.weight)
fan_in_uniform_init(self.linear2.bias)
nn.init.uniform_(self.mu.weight, -WEIGHTS_FINAL_INIT, WEIGHTS_FINAL_INIT)
nn.init.uniform_(self.mu.bias, -BIAS_FINAL_INIT, BIAS_FINAL_INIT)
def forward(self, inputs):
x = inputs
if self.hx.size(0) != x.size(0):
self.hx = torch.randn(x.size(0), self.hidden_size[1]).to(device)
# Layer 1
x = self.linear1(x)
x = self.ln1(x)
x = F.relu(x)
# Layer 2
x = self.linear2(x)
x = self.ln2(x)
self.hx = self.rnn(x, self.hx)
x = self.hx
x = self.mu(x)
# Output
mu = torch.tanh(x)
return mu
class Critic(nn.Module):
def __init__(self, hidden_size, num_inputs, action_space):
super(Critic, self).__init__()
self.action_space = action_space
num_outputs = action_space.shape[0]
self.hidden_size = hidden_size
self.hx = torch.randn(1, hidden_size[1]).to(device)
# Layer 1
self.linear1 = nn.Linear(num_inputs, hidden_size[0])
self.ln1 = nn.LayerNorm(hidden_size[0])
# Layer 2
# In the second layer the actions will be inserted also
self.linear2 = nn.Linear(hidden_size[0] + num_outputs, hidden_size[1])
self.ln2 = nn.LayerNorm(hidden_size[1])
self.rnn = nn.RNNCell(hidden_size[1], hidden_size[1])
# Output layer (single value)
self.V = nn.Linear(hidden_size[1], 1)
# Weight Init
fan_in_uniform_init(self.linear1.weight)
fan_in_uniform_init(self.linear1.bias)
fan_in_uniform_init(self.linear2.weight)
fan_in_uniform_init(self.linear2.bias)
nn.init.uniform_(self.V.weight, -WEIGHTS_FINAL_INIT, WEIGHTS_FINAL_INIT)
nn.init.uniform_(self.V.bias, -BIAS_FINAL_INIT, BIAS_FINAL_INIT)
def forward(self, inputs, actions):
x = inputs
if self.hx.size(0) != x.size(0):
self.hx = torch.randn(x.size(0), self.hidden_size[1]).to(device)
# Layer 1
x = self.linear1(x)
x = self.ln1(x)
x = F.relu(x)
# Layer 2
x = torch.cat((x, actions), 1) # Insert the actions
x = self.linear2(x)
x = self.ln2(x)
self.hx = self.rnn(x, self.hx)
x = self.hx
# Output
V = self.V(x)
return V
I have looked at similar posts, but I can’t see any obvious inplace operations.
Thank you in advance for your help!