Learning time increase over-time

Hello,

I am training several networks at the same time. With the passing of episodes the training time takes longer. From the 10th to the 90th episode the total training time of all networks goes from 6.42 seconds to 13.7 seconds. What may be the reason for this increase? Thank you in advance

# dqn class
    def updateSARSD(self, state, action, reward, next_state, done):
        self.remember(state, action, reward, next_state, int(done) )
        # Optimize the target network
        self.learn()

    def learn(self):

        if self.memory.mem_cntr < self.batch_size * 2:
            return

        self.q_eval.optimizer.zero_grad()
        self.replace_target_network()

        states, actions, rewards, next_states, dones = self.sample_memory()

        indices = np.arange(self.batch_size)
        q_pred = self.q_eval.forward(states)[indices,actions]
        q_next = self.q_next.forward(next_states).max(dim=1)[0]
        q_next[dones] = 0.0
        q_target = rewards +  self.gamma * q_next

        loss = self.q_eval.loss(q_target, q_pred).to(self.q_eval.device)
        loss.backward()
        self.q_eval.optimizer.step()

        self.learn_step_counter += 1
        self.decrement_epsilon()

        return loss

    def replace_target_network(self):

        if self.learn_step_counter % self.replace_target_cnt == 0:
            self.q_next.load_state_dict(self.q_eval.state_dict())

    def remember(self, *args):
        self.memory.push(*args)

    def sample_memory(self):

        state, action, reward, new_state, done = self.memory.sample(self.batch_size)

        states = torch.tensor(state, dtype=torch.float, device=self.q_eval.device)
        rewards = torch.tensor(reward, dtype=torch.float, device=self.q_eval.device)
        dones = torch.tensor(done)
        actions = torch.tensor(action)
        next_states = torch.tensor(new_state, dtype=torch.float, device=self.q_eval.device)

        return states, actions, rewards, next_states, dones

# memory_buffer class

    def push(self, state, action, reward, new_state, done):

        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        self.action_memory[index] = action
        self.reward_memory[index] = reward
        self.new_state_memory[index] = new_state
        self.terminal_memory[index] = done
        self.mem_cntr += 1

    def sample(self, batch_size):

        max_mem = min(self.mem_cntr, self.mem_size)
        batch = np.random.choice(max_mem, batch_size, replace=False) # replace = False to not repeat any memory

        states = self.state_memory[batch]
        actions = self.action_memory[batch]
        rewards = self.reward_memory[batch]
        next_states = self.new_state_memory[batch]
        dones = self.terminal_memory[batch]

        return states, actions, rewards, next_states, dones

Have you detached the “action” tensor before you storing it with remember called by updateSARSD?