Hello, I’m trying to implement a DDPG, but every time I try to update my networks, I have this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation. Can you help me to find the inplace operation that cause it?
This is my code
batch = self.memory.extract_batch(self.batch_size)
states = []
actions = []
next_states = []
rews = []
dones = []
for exp in batch:
states.append(torch.FloatTensor(exp.state).unsqueeze(0))
actions.append(torch.FloatTensor(exp.action).unsqueeze(0))
next_states.append(torch.FloatTensor(exp.next_obs).unsqueeze(0))
rews.append(torch.tensor([exp.reward], dtype=float).unsqueeze(0))
dones.append(torch.tensor([exp.done], dtype=float).unsqueeze(0))
states = torch.cat(states)
actions = torch.cat(actions)
next_states = torch.cat(next_states)
rews = torch.cat(rews)
dones = torch.cat(dones)
next_inputs = next_states
inputs = states
next_actions = self.target_policy.forward(next_inputs)
target_qvals = self.target_qf.forward(next_inputs, next_actions.detach())
y = rews + (1.0 - dones) * self.discount * target_qvals
qval = self.qf.forward(inputs, actions)
qval_loss = (qval - y).pow(2).mean()
actions = self.policy.forward(inputs)
action_loss = -1 * self.qf.forward(inputs, actions).mean()
# optimize qf
self.qf_optimizer.zero_grad()
qval_loss.backward(retain_graph=True)
self.qf_optimizer.step()
# optimize policy
self.policy_optimizer.zero_grad()
action_loss.backward()
self.policy_optimizer.step()
Thank you in advance!