Hi guys, 3 weeks ago i changed my “Dueling DDQN” system from Keras with Theano/Tensorflow backend to PyTorch. Reason: Speed issues. I was hoping, that i can get faster run times with PyTorch. Currently I’m only using my CPU, so I’m really concerned about speed.
I’m using simple test-data:
- 2000 Episodes over 2000 data points
- on every data point i do a training (on a random sample) of batchsize 32 of 5 inputs 32x5
I would assume, that this should be really fast… But it is not:
Keras/Theano backend: [3.38s/ episode] << fast
Keras/Tensorflow backend: [12.71s/ episode] << slower
Pytorch: [15.13s/ episode] << slowest
Maybe one problem is, that the data is currently spread and copyed between Deque/Numpy and PyTorch tensors? Does it make sense to copy ALL DATA (2000 x 5) into a tensor and get slices from that?
1.) So my first question is, which data has to be in tensors and which not - and why?
2.) I already asked something on stackoverflow but no answer:
# How does a (py)torch DDQN know, which action it is updating?
The code is quite complex but i can post parts of it to solve this. Just tell me, what else you need ;)…
Many thanks!
DDQN module:
##################################################################################
import torch
import torch.nn as nn
##################################################################################
class DQNTorch(nn.Module):
#////////////////////////////////////////////////
def __init__(self, num_inputs, num_outputs, num_neurons, dueling):
#////////////////////////////////////////////////
super(DQNTorch, self).__init__()
#////////////////////////////////////////////////
self.feature = None
self.value = None
self.advantage = None
self.dueling = dueling
#////////////////////////////////////////////////
if(self.dueling):
#////////////////////////////////////////////////
# dueling, add in forward()
#////////////////////////////////////////////////
self.feature = nn.Sequential(
nn.Linear(num_inputs, num_neurons),
nn.ReLU(),
nn.Linear(num_neurons, num_neurons),
nn.ReLU()
)
self.value = nn.Sequential(
nn.Linear(num_neurons, num_neurons),
nn.ReLU(),
nn.Linear(num_neurons, 1)
)
self.advantage = nn.Sequential(
nn.Linear(num_neurons, num_neurons),
nn.ReLU(),
nn.Linear(num_neurons, num_outputs)
)
#////////////////////////////////////////////////
else:
#////////////////////////////////////////////////
# 1 more layer as output
#////////////////////////////////////////////////
self.feature = nn.Sequential(
nn.Linear(num_inputs, num_neurons),
nn.ReLU(),
nn.Linear(num_neurons, num_neurons),
nn.ReLU(),
nn.Linear(num_neurons, num_outputs)
)
#////////////////////////////////////////////////
def forward(self, state):
#////////////////////////////////////////////////
# model(x) == model.forward(x)
#////////////////////////////////////////////////
res = None
#////////////////////////////////////////////////
if(self.dueling):
#////////////////////////////////////////////////
fea = self.feature(state)
val = self.value(fea)
adv = self.advantage(fea)
#////////////////////////////////////////////////
res = val + (adv - adv.mean())
#////////////////////////////////////////////////
else:
#////////////////////////////////////////////////
res = self.feature(state)
#////////////////////////////////////////////////
return(res)
##################################################################################
class DQNetwork(object):
#////////////////////////////////////////////////
"""DQNetwork - Deep Q Network"""
#////////////////////////////////////////////////
def __init__(self, SYS, target):
#////////////////////////////////////////////////
"""DQNetwork - Deep Q Network"""
#////////////////////////////////////////////////
self.class_name = self.__class__.__name__
#////////////////////////////////////////////////
self.num_inputs = SYS.NET['STATE_SIZE']
self.num_neurons = SYS.NET['NEURONS']
self.num_outputs = SYS.NET['ACTION_SIZE']
self.batch_size = SYS.NET["FIT_BATCH_SIZE"]
self.learn_rate = SYS.NET['LEARNING_RATE']
self.dueling = (SYS.NET['DUELING'] == 1)
self.verbose = SYS.NET["VERBOSE"]
self.target = target
#////////////////////////////////////////////////
# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#////////////////////////////////////////////////
self.model = DQNTorch(self.num_inputs, self.num_outputs, self.num_neurons, self.dueling)
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learn_rate)
self.get_loss = nn.MSELoss()
#////////////////////////////////////////////////
print(LINE)
print(f"Init {self.class_name}({SYS.NET['NAME']}) with params: {self.__dict__}")
#////////////////////////////////////////////////
def fit(self, current_q, expected_q):
#////////////////////////////////////////////////
"""fit the model aka minimize the loss"""
#////////////////////////////////////////////////
self.optimizer.zero_grad()
#////////////////////////////////////////////////
loss = self.get_loss(current_q, expected_q)
#////////////////////////////////////////////////
loss.backward()
#////////////////////////////////////////////////
self.optimizer.step()
#////////////////////////////////////////////////
res = loss.item()
#////////////////////////////////////////////////
return(res)
#////////////////////////////////////////////////
def predict(self, state):
#////////////////////////////////////////////////
"""predict Q values (for actions) on ONE state or a BATCH of states"""
#////////////////////////////////////////////////
res = self.model(state)
#////////////////////////////////////////////////
return(res)
##################################################################################
MEMORY module and parts of the AGENT module:
##################################################################################
class MEMORY_D(object):
#////////////////////////////////////////////////
"""memory in a deque"""
#////////////////////////////////////////////////
def __init__(self, max_size):
#////////////////////////////////////////////////
"""memory"""
#////////////////////////////////////////////////
self.buffer = deque(maxlen=max_size) # buffer is a deque from collections
#////////////////////////////////////////////////
def add(self, obj):
#////////////////////////////////////////////////
self.buffer.append(obj)
#////////////////////////////////////////////////
def get_batch(self, batch_size):
#////////////////////////////////////////////////
res = random.sample(self.buffer, k=batch_size)
#////////////////////////////////////////////////
return(res)
#////////////////////////////////////////////////
def get_size(self):
#////////////////////////////////////////////////
return(len(self.buffer))
##################################################################################
class DQNAgent(object):
#////////////////////////////////////////////////
"""DDQNAgent"""
#////////////////////////////////////////////////
def __init__(self, SYS):
#////////////////////////////////////////////////
"""load env and init agent"""
#////////////////////////////////////////////////
SYS.AGT["NAME"] = self.__class__.__name__
#////////////////////////////////////////////////
self.MEM = MEMORY_D(max_size=SYS.AGT["MEMORY_MAX"])
self.RAM = None # todo preallocate?
#////////////////////////////////////////////////
self.DQN = DQNetwork(SYS, target=False)
self.TAR = DQNetwork(SYS, target=True)
#////////////////////////////////////////////////
self.UDTAU = SYS.AGT['TAR_UPD_TAU']
self.GAMMA = SYS.AGT["GAMMA"]
#////////////////////////////////////////////////
self.state_size = SYS.NET['STATE_SIZE']
self.action_size = SYS.NET['ACTION_SIZE']
#////////////////////////////////////////////////
self.target_mode = SYS.AGT["TARGET_MODE"]
self.batch_size = SYS.AGT["MEM_BATCH_SIZE"]
#////////////////////////////////////////////////
self.update_every = SYS.AGT['TAR_UPD_EVERY']
self.update_count = 0
#////////////////////////////////////////////////
# preallocate vars
#////////////////////////////////////////////////
self.state = torch.zeros(1, self.state_size, dtype=torch.float32)
self.action = 0
#////////////////////////////////////////////////
self.c_states = torch.zeros(self.batch_size, self.state_size , dtype=torch.float32)
self.actions = torch.zeros(self.batch_size, 1 , dtype=torch.int64)
self.rewards = torch.zeros(self.batch_size, 1 , dtype=torch.float32)
self.n_states = torch.zeros(self.batch_size, self.state_size , dtype=torch.float32)
self.dones = torch.zeros(self.batch_size, 1 , dtype=torch.int64)
#////////////////////////////////////////////////
self.DQN.model.eval()
self.TAR.model.eval()
#////////////////////////////////////////////////
self.update_TAR_hard() # copy weights from DQN to TAR
#////////////////////////////////////////////////
print(LINE)
print("Init", SYS.AGT["NAME"], "with params: {}".format(self.__dict__))
#////////////////////////////////////////////////
def train(self, SYS):
#////////////////////////////////////////////////
"""train the agent on a MINIBATCH"""
#////////////////////////////////////////////////
loss = 0.0
#////////////////////////////////////////////////
if(self.get_mem_size() <= self.batch_size): return(0)
#////////////////////////////////////////////////
# possible bottleneck, using a list is ~ 3.3 x faster for random access!
#////////////////////////////////////////////////
self.RAM = self.MEM.get_batch(batch_size=self.batch_size)
#////////////////////////////////////////////////
self.c_states = torch.from_numpy(np.array([item[0] for item in self.RAM], dtype=np.float32))
self.actions = torch.from_numpy(np.array([item[1] for item in self.RAM], dtype=np.int64))
self.rewards = torch.from_numpy(np.array([item[2] for item in self.RAM], dtype=np.float32))
self.n_states = torch.from_numpy(np.array([item[3] for item in self.RAM], dtype=np.float32))
self.dones = torch.from_numpy(np.array([item[4] for item in self.RAM], dtype=np.int64))
#////////////////////////////////////////////////
q_values = self.DQN.predict(self.c_states).gather(1, self.actions.unsqueeze(1)).squeeze(1)
dqn_next = self.DQN.predict(self.n_states)
#////////////////////////////////////////////////
q_action = torch.argmax(dqn_next, dim=1)
tar_next = self.TAR.predict(self.n_states).gather(1, q_action.unsqueeze(1)).squeeze(1)
#////////////////////////////////////////////////
q_target = (self.rewards + (self.GAMMA * tar_next * (1 - self.dones)))
#////////////////////////////////////////////////
self.DQN.model.train()
#////////////////////////////////////////////////
loss = self.DQN.fit(current_q=q_values, expected_q=q_target)
#////////////////////////////////////////////////
self.DQN.model.eval()
#////////////////////////////////////////////////
self.update_TAR() # copy DQN to TAR on every N episodes
#////////////////////////////////////////////////
return(loss)
##################################################################################