Hello everyone, I am trying to run the MADDPG alogrithm on my environement but I struggle with a Pytorch error during training concerning an inplace operation which I can’t seem to find in my code.
error :
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 1]], which is output 0 of AsStridedBackward0, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
the backtrace:
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18392\3501804896.py in <module>
41
42 if total_steps % 100 == 0 and not evaluate:
---> 43 maddpg_agents.learn(memory)
44
45 obs = obs_
~\AppData\Local\Temp\ipykernel_18392\78932359.py in learn(self, memory)
73 critic_loss = F.mse_loss(target, critic_value)
74 agent.critic.optimizer.zero_grad()
---> 75 critic_loss.backward(retain_graph=True)
76 agent.critic.optimizer.step()
77
c:\ProgramData\Anaconda3\lib\site-packages\torch\_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
361 create_graph=create_graph,
362 inputs=inputs)
--> 363 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
364
365 def register_hook(self, hook):
c:\ProgramData\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
173 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
...
--> 175 allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
176
177 def grad(
I can’t locate the inplace operation.
The network:
class CriticNetwork(nn.Module):
def __init__(self, beta, input_dims, fc1_dims, fc2_dims,
n_agents, n_actions, name, chkpt_dir):
super(CriticNetwork, self).__init__()
self.chkpt_file = os.path.join(chkpt_dir, name)
#critic taking full state observation vector of the whole env
#+ each action vectors of all agents
self.fc1 = nn.Linear(input_dims+n_agents*n_actions, fc1_dims)
self.fc2 = nn.Linear(fc1_dims, fc2_dims)
self.q = nn.Linear(fc2_dims, 1)
self.optimizer = optim.Adam(self.parameters(), lr=beta)
self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
self.to(self.device)
def forward(self, state, action):
x = F.relu(self.fc1(T.cat([state, action], dim=1)))
x = F.relu(self.fc2(x))
q = self.q(x)
return q
And the learning function:
def learn(self, memory):
if not memory.ready():
return
actor_states, states, actions, rewards, actor_new_states, states_, dones = memory.sample_buffer()
device = self.agents[0].actor.device
states = T.tensor(states, dtype=T.float).to(device)
actions = T.tensor(actions, dtype=T.float).to(device)
rewards = T.tensor(rewards, dtype=T.float).to(device)
states_ = T.tensor(states_, dtype=T.float).to(device)
dones = T.tensor(dones).to(device)
all_agents_new_actions = []
all_agents_new_mu_actions = []
old_agents_actions = []
for agent_idx, agent in enumerate(self.agents):
new_states = T.tensor(actor_new_states[agent_idx],
dtype=T.float).to(device)
new_act = agent.target_actor.forward(new_states)
all_agents_new_actions.append(new_act)
mu_states = T.tensor(actor_states[agent_idx],
dtype=T.float).to(device)
acti = agent.actor.forward(mu_states)
all_agents_new_mu_actions.append(acti)
old_agents_actions.append(actions[agent_idx])
new_actions = T.cat([acts for acts in all_agents_new_actions], dim=1)
mu = T.cat([acts for acts in all_agents_new_mu_actions], dim=1)
old_actions = T.cat([acts for acts in old_agents_actions],dim=1)
for agent_idx, agent in enumerate(self.agents):
with T.autograd.set_detect_anomaly(True):
critic_value_ = agent.target_critic.forward(states_, new_actions).flatten()
critic_value_[dones[:,0]] = 0.0
critic_value = agent.critic.forward(states, old_actions).flatten()
target = rewards[:,agent_idx] + agent.gamma*critic_value_
critic_loss = F.mse_loss(target, critic_value)
agent.critic.optimizer.zero_grad()
critic_loss.backward(retain_graph=True)
agent.critic.optimizer.step()
actor_loss = agent.critic.forward(states, mu).flatten()
actor_loss = -T.mean(actor_loss)
agent.actor.optimizer.zero_grad()
actor_loss.backward(retain_graph=True)
agent.actor.optimizer.step()
agent.update_network_parameters()
The fact that it is a multi-agent algorithm, during learning I have to make multiple passes through the graph (1 for each agent) so the graph needs to be retained.
Thank you all!
nb: I didn’t put my ful code here because I suspect the in-place operation to happend here, but I can give the whole code if necessary.