Hello,
I am training several networks at the same time. With the passing of episodes the training time takes longer. From the 10th to the 90th episode the total training time of all networks goes from 6.42 seconds to 13.7 seconds. What may be the reason for this increase? Thank you in advance
# dqn class
def updateSARSD(self, state, action, reward, next_state, done):
self.remember(state, action, reward, next_state, int(done) )
# Optimize the target network
self.learn()
def learn(self):
if self.memory.mem_cntr < self.batch_size * 2:
return
self.q_eval.optimizer.zero_grad()
self.replace_target_network()
states, actions, rewards, next_states, dones = self.sample_memory()
indices = np.arange(self.batch_size)
q_pred = self.q_eval.forward(states)[indices,actions]
q_next = self.q_next.forward(next_states).max(dim=1)[0]
q_next[dones] = 0.0
q_target = rewards + self.gamma * q_next
loss = self.q_eval.loss(q_target, q_pred).to(self.q_eval.device)
loss.backward()
self.q_eval.optimizer.step()
self.learn_step_counter += 1
self.decrement_epsilon()
return loss
def replace_target_network(self):
if self.learn_step_counter % self.replace_target_cnt == 0:
self.q_next.load_state_dict(self.q_eval.state_dict())
def remember(self, *args):
self.memory.push(*args)
def sample_memory(self):
state, action, reward, new_state, done = self.memory.sample(self.batch_size)
states = torch.tensor(state, dtype=torch.float, device=self.q_eval.device)
rewards = torch.tensor(reward, dtype=torch.float, device=self.q_eval.device)
dones = torch.tensor(done)
actions = torch.tensor(action)
next_states = torch.tensor(new_state, dtype=torch.float, device=self.q_eval.device)
return states, actions, rewards, next_states, dones
# memory_buffer class
def push(self, state, action, reward, new_state, done):
index = self.mem_cntr % self.mem_size
self.state_memory[index] = state
self.action_memory[index] = action
self.reward_memory[index] = reward
self.new_state_memory[index] = new_state
self.terminal_memory[index] = done
self.mem_cntr += 1
def sample(self, batch_size):
max_mem = min(self.mem_cntr, self.mem_size)
batch = np.random.choice(max_mem, batch_size, replace=False) # replace = False to not repeat any memory
states = self.state_memory[batch]
actions = self.action_memory[batch]
rewards = self.reward_memory[batch]
next_states = self.new_state_memory[batch]
dones = self.terminal_memory[batch]
return states, actions, rewards, next_states, dones