Hi,
I cannot solve this memory leak problem:
My code is:
def trex_reward_update(self, all_traj_pairs): batch_loss = 0 error = 0 for k in range(self.n_rewards): L = T.zeros(1).to(self.device) for traj_pair in all_traj_pairs: with T.no_grad(): pos_traj, neg_traj = [(traj[0].tolist(), traj[1]) for traj in traj_pair[0]], [(traj[0].tolist(), traj[1]) for traj in traj_pair[1]] traj_sample_length = min(min(len(pos_traj), len(neg_traj)), self.traj_sample_length) pos_traj, neg_traj = [T.tensor(traj[0]).to(self.device) for traj in pos_traj if traj not in neg_traj], [T.tensor(traj[0]).to(self.device) for traj in neg_traj if traj not in pos_traj] pos_traj = random.sample(pos_traj, traj_sample_length) neg_traj = random.sample(neg_traj, traj_sample_length) pos_input_tensor = T.stack(pos_traj).to(self.device) neg_input_tensor = T.stack(neg_traj).to(self.device) pos_output = self.reward[k].forward(pos_input_tensor) neg_output = self.reward[k].forward(neg_input_tensor) for error_iter in range(len(pos_output)): if pos_output[error_iter] < neg_output[error_iter]: error += 1 L -= T.log(T.exp(pos_output.sum()) / (T.exp(pos_output.sum()) + T.exp(neg_output.sum()))) self.reward_optimizer[k].zero_grad() L.backward(retain_graph = False) self.reward_optimizer[k].step() self.scheduler[k].step() batch_loss += L.detach().item() del L return batch_loss, error
If I don’t run line of L.backward()
, then it is fine. Running L.backward()
gives memory leak. Can anyone please advise me? What is happening and how to solve this.