Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu

I am implementing A3C algorithm in pytorch and getting above error. I am constructing a shared model and shared optimizer which are initially on cpu.

Then in worker I am sending the model and all the tensors to cuda However this error is coming. Can someone please help in what should be the right way of implementing GPU version of A3C in pytorch.

Below is my code.

model = ActorCritic(params)
optimizer = SharedAdam(model.parameters(), lr=params.lr)
model.share_memory()
batch = []
jobs = []
for ep in range(100):
    filename = "ind_A3C_" + str(hidden_size) + "_"+ str(layers) + "_" + str(state_dim) + "_" + str(ep)
    print("epoch:",ep)
    f.write("epoch:"+str(ep)+"\n")
    for rank in range(params.num_processes):
        try:
            p = Process(target=train, args=(rank, params, model, optimizer, indices, sc, device, filename))
            jobs.append(p)
            p.start()              
        except Exception as e:
            print(e)
            traceback.print_exc()
            var = traceback.format_exc()
            f.write("exception:\n"+str(var))
    for p in jobs:
        p.join()
    batch = []
    jobs = []
    f.flush()
    model.save(filename, directory="./pytorch_models")

main training loop-



def train(rank, params, model, optimizer, indices, scaler, device, filename):
    model.to(device)
    
    for ticker in indices:
        data =  pd.read_csv(ticker + '.csv')
        data = data.iloc[int(data.shape[0]*0.4):,:]
        data = Process_Data(data, scaler)
        data = torch.DoubleTensor(np.asarray(data))

        env = ENV(params.num_inputs, params.action_dim, data)
        
        # init training variables
        max_timesteps = data.shape[0] - 2
        state = env.reset().to(device)
        count = 0
        done = True
        episode_length = 0
        count = 0
        while count<max_timesteps-1:
            episode_length += 1
            values = []
            log_probs = []
            rewards = []
            entropies = []
            while count<max_timesteps-1:
                if done:
                    cx = (torch.zeros(params.lstm_layers, 1, params.lstm_size)).to(device)
                    hx = (torch.zeros(params.lstm_layers, 1, params.lstm_size)).to(device)
                else:
                    cx = (cx).to(device)
                    hx = (hx).to(device)

                value, action_values, (hx, cx) = model((state.unsqueeze(0).to(device), (hx, cx)))
                prob = F.softmax(action_values - max(action_values), dim = -1).to(device)
                log_prob = torch.log(prob).reshape(-1,)
                entropy = -(log_prob * prob).sum(1, keepdim=True).to(device)
                entropies.append(entropy)
                m = categorical.Categorical(prob)
                action = m.sample().reshape(-1,)
                log_prob_a = log_prob.gather(0, (action)).to(device)

                state, reward, done = env.step(action)
                reward = reward.to(device)
                value = value.to(device)
                
                count +=1
                
                if done:
                    episode_length = 0
                    state = env.reset()
                    
                
                values.append(value)
                log_probs.append(log_prob_a)
                rewards.append(reward)
                # print(ticker, "rank ",rank," action:",action, "reward ",reward)

                if done:
                    break
                
            R = torch.zeros(1, 1).to(device)
            if not done:
                value, _, _ = model((state.unsqueeze(0).to(device), (hx.to(device), cx.to(device))))
                R = value.data.to(device)
            values.append(R)
            policy_loss = torch.DoubleTensor([0]).to(device)
            value_loss = torch.DoubleTensor([0]).to(device)
            R = (R).to(device)
            gae = torch.zeros(1, 1).to(device)
            for i in reversed(range(len(rewards))):
                R = params.gamma * R.to(device) + rewards[i].to(device)
                advantage = R.to(device) - values[i].to(device)
                value_loss = value_loss.to(device) + 0.5 * advantage.pow(2).to(device)
                TD = rewards[i].to(device) + params.gamma * values[i + 1].data.to(device) - values[i].data.to(device)
                gae = gae.to(device) * params.gamma * params.tau + TD.to(device)
                policy_loss = policy_loss.to(device) - (log_probs[i] * gae).to(device) - 0.01 * entropies[i].to(device)

            optimizer.zero_grad()
            loss = (policy_loss.to(device) + 0.5 * value_loss.to(device)).mean()
            print("loss: ",loss.is_cuda)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
            optimizer.step()
        model.save(filename, directory="./pytorch_models")


    

traceback-

Traceback (most recent call last):
  File "D:\get_data\ind_a3g.py", line 166, in train
    optimizer.step()
  File "C:\Users\grant\miniconda3\lib\site-packages\torch\autograd\grad_mode.py", line 15, in decorate_context
    return func(*args, **kwargs)
  File "C:\Users\grant\miniconda3\lib\site-packages\torch\optim\adam.py", line 99, in step
    exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
2 Likes

You need to add your data to the same device as your model.
use input.to(dev) , replace input with the input to your model.

1 Like

Hi,

I have changed the implementation as above and getting same error.Does it mean that my model is on CPU??

There might be some other tensors in the computation graph which are in different device.
There are a few ways to avoid this. please see Which device is model / tensor stored on?
One way to check what device your tensors are in is : tensor.device, you can check to see if all tensors are in the same device.
You only need to set device for your inputs and models, the other tensors which are created during the computation will be automatically in the same device. e.g. loss.