I am implementing A3C algorithm in pytorch and getting above error. I am constructing a shared model and shared optimizer which are initially on cpu.
Then in worker I am sending the model and all the tensors to cuda However this error is coming. Can someone please help in what should be the right way of implementing GPU version of A3C in pytorch.
Below is my code.
model = ActorCritic(params)
optimizer = SharedAdam(model.parameters(), lr=params.lr)
model.share_memory()
batch = []
jobs = []
for ep in range(100):
filename = "ind_A3C_" + str(hidden_size) + "_"+ str(layers) + "_" + str(state_dim) + "_" + str(ep)
print("epoch:",ep)
f.write("epoch:"+str(ep)+"\n")
for rank in range(params.num_processes):
try:
p = Process(target=train, args=(rank, params, model, optimizer, indices, sc, device, filename))
jobs.append(p)
p.start()
except Exception as e:
print(e)
traceback.print_exc()
var = traceback.format_exc()
f.write("exception:\n"+str(var))
for p in jobs:
p.join()
batch = []
jobs = []
f.flush()
model.save(filename, directory="./pytorch_models")
main training loop-
def train(rank, params, model, optimizer, indices, scaler, device, filename):
model.to(device)
for ticker in indices:
data = pd.read_csv(ticker + '.csv')
data = data.iloc[int(data.shape[0]*0.4):,:]
data = Process_Data(data, scaler)
data = torch.DoubleTensor(np.asarray(data))
env = ENV(params.num_inputs, params.action_dim, data)
# init training variables
max_timesteps = data.shape[0] - 2
state = env.reset().to(device)
count = 0
done = True
episode_length = 0
count = 0
while count<max_timesteps-1:
episode_length += 1
values = []
log_probs = []
rewards = []
entropies = []
while count<max_timesteps-1:
if done:
cx = (torch.zeros(params.lstm_layers, 1, params.lstm_size)).to(device)
hx = (torch.zeros(params.lstm_layers, 1, params.lstm_size)).to(device)
else:
cx = (cx).to(device)
hx = (hx).to(device)
value, action_values, (hx, cx) = model((state.unsqueeze(0).to(device), (hx, cx)))
prob = F.softmax(action_values - max(action_values), dim = -1).to(device)
log_prob = torch.log(prob).reshape(-1,)
entropy = -(log_prob * prob).sum(1, keepdim=True).to(device)
entropies.append(entropy)
m = categorical.Categorical(prob)
action = m.sample().reshape(-1,)
log_prob_a = log_prob.gather(0, (action)).to(device)
state, reward, done = env.step(action)
reward = reward.to(device)
value = value.to(device)
count +=1
if done:
episode_length = 0
state = env.reset()
values.append(value)
log_probs.append(log_prob_a)
rewards.append(reward)
# print(ticker, "rank ",rank," action:",action, "reward ",reward)
if done:
break
R = torch.zeros(1, 1).to(device)
if not done:
value, _, _ = model((state.unsqueeze(0).to(device), (hx.to(device), cx.to(device))))
R = value.data.to(device)
values.append(R)
policy_loss = torch.DoubleTensor([0]).to(device)
value_loss = torch.DoubleTensor([0]).to(device)
R = (R).to(device)
gae = torch.zeros(1, 1).to(device)
for i in reversed(range(len(rewards))):
R = params.gamma * R.to(device) + rewards[i].to(device)
advantage = R.to(device) - values[i].to(device)
value_loss = value_loss.to(device) + 0.5 * advantage.pow(2).to(device)
TD = rewards[i].to(device) + params.gamma * values[i + 1].data.to(device) - values[i].data.to(device)
gae = gae.to(device) * params.gamma * params.tau + TD.to(device)
policy_loss = policy_loss.to(device) - (log_probs[i] * gae).to(device) - 0.01 * entropies[i].to(device)
optimizer.zero_grad()
loss = (policy_loss.to(device) + 0.5 * value_loss.to(device)).mean()
print("loss: ",loss.is_cuda)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
optimizer.step()
model.save(filename, directory="./pytorch_models")
traceback-
Traceback (most recent call last):
File "D:\get_data\ind_a3g.py", line 166, in train
optimizer.step()
File "C:\Users\grant\miniconda3\lib\site-packages\torch\autograd\grad_mode.py", line 15, in decorate_context
return func(*args, **kwargs)
File "C:\Users\grant\miniconda3\lib\site-packages\torch\optim\adam.py", line 99, in step
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!