Runntime Error MSELoss expects the same dtype

Hey there,

I get the following runntime error:

Traceback (most recent call last):
  File "ppo_witches_multi2.py", line 420, in <module>
    learn_single(ppo1, update_timestep, eps_decay, env)
  File "ppo_witches_multi2.py", line 269, in learn_single
    ppo.my_update(memory)
  File "ppo_witches_multi2.py", line 174, in my_update
    loss.mean().backward()
  File "/home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/tensor.py", line 198, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/autograd/__init__.py", line 100, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: expected dtype Double but got dtype Float (validate_dtype at /pytorch/aten/src/ATen/native/TensorIterator.cpp:143)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x46 (0x7fa5570a6536 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: at::TensorIterator::compute_types() + 0xce3 (0x7fa5947c6183 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #2: at::TensorIterator::build() + 0x44 (0x7fa5947c8b64 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #3: at::native::mse_loss_backward_out(at::Tensor&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long) + 0x193 (0x7fa594616b93 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #4: <unknown function> + 0x10b7db7 (0x7fa594a42db7 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #5: at::native::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long) + 0x172 (0x7fa59461f2d2 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #6: <unknown function> + 0x109e76f (0x7fa594a2976f in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #7: <unknown function> + 0x10c3c76 (0x7fa594a4ec76 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #8: <unknown function> + 0x2a9eceb (0x7fa596429ceb in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #9: <unknown function> + 0x10c3c76 (0x7fa594a4ec76 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #10: torch::autograd::generated::MseLossBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x1f7 (0x7fa596231787 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x2d89c05 (0x7fa596714c05 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #12: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x16f3 (0x7fa596711f03 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #13: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x3d2 (0x7fa596712ce2 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #14: torch::autograd::Engine::thread_init(int) + 0x39 (0x7fa59670b359 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #15: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7fa5a2e4a4d8 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #16: <unknown function> + 0xbd6df (0x7fa5a3f1b6df in /usr/lib/x86_64-linux-gnu/libstdc++.so.6)
frame #17: <unknown function> + 0x76db (0x7fa5a64b06db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #18: clone + 0x3f (0x7fa5a67e988f in /lib/x86_64-linux-gnu/libc.so.6)

After using .float() this error occurs:

Traceback (most recent call last):
  File "ppo_witches_multi2.py", line 424, in <module>
    learn_single(ppo1, update_timestep, eps_decay, env)
  File "ppo_witches_multi2.py", line 273, in learn_single
    ppo.my_update(memory)
  File "ppo_witches_multi2.py", line 178, in my_update
    loss.mean().backward()
  File "/home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/tensor.py", line 198, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/autograd/__init__.py", line 100, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: [enforce fail at CPUAllocator.cpp:64] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 517246976 bytes. Error code 12 (Cannot allocate memory)
frame #0: c10::ThrowEnforceNotMet(char const*, int, char const*, std::string const&, void const*) + 0x47 (0x7f4d434294f7 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: c10::alloc_cpu(unsigned long) + 0x1f1 (0x7f4d43411201 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #2: <unknown function> + 0x17f4a (0x7f4d43412f4a in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #3: THStorage_resize + 0x3b (0x7f4d80ed867b in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #4: at::native::resize_(at::Tensor&, c10::ArrayRef<long>, c10::optional<c10::MemoryFormat>) + 0x4ab (0x7f4d80acad3b in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #5: <unknown function> + 0xdbe5d3 (0x7f4d80acb5d3 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #6: at::native::mm_cpu_out(at::Tensor&, at::Tensor const&, at::Tensor const&) + 0x27b (0x7f4d80887ceb in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #7: at::native::mm_cpu(at::Tensor const&, at::Tensor const&) + 0x177 (0x7f4d808892f7 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #8: <unknown function> + 0x107fdb8 (0x7f4d80d8cdb8 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #9: <unknown function> + 0x10c3ec0 (0x7f4d80dd0ec0 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0x2c9b6fe (0x7f4d829a86fe in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x10c3ec0 (0x7f4d80dd0ec0 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #12: at::Tensor c10::Dispatcher::callUnboxed<at::Tensor, at::Tensor const&, at::Tensor const&>(c10::OperatorHandle const&, at::Tensor const&, at::Tensor const&) const + 0xb3 (0x7f4d8ef08ca3 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #13: <unknown function> + 0x28ac337 (0x7f4d825b9337 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #14: torch::autograd::generated::AddmmBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x134 (0x7f4d825f4004 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #15: <unknown function> + 0x2d89c05 (0x7f4d82a96c05 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #16: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x16f3 (0x7f4d82a93f03 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x3d2 (0x7f4d82a94ce2 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #18: torch::autograd::Engine::thread_init(int) + 0x39 (0x7f4d82a8d359 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7f4d8f1cc4d8 in /home/mlamprecht/Documents/witches_env/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #20: <unknown function> + 0xbd6df (0x7f4d9029d6df in /usr/lib/x86_64-linux-gnu/libstdc++.so.6)
frame #21: <unknown function> + 0x76db (0x7f4d928326db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #22: clone + 0x3f (0x7f4d92b6b88f in /lib/x86_64-linux-gnu/libc.so.6)

For me this error seems to be related with this one. I could solve it somehow using .float():

        # Optimize policy for K epochs:
        for _ in range(self.K_epochs):
            # Evaluating old actions and values :
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
            advantages = rewards - state_values.detach()

            rewards    = rewards.float()
            advantages = advantages.float()
            loss       =  self.calculate_total_loss(state_values, logprobs, old_logprobs, advantages, rewards, dist_entropy)


            # take gradient step
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()

However using this method, the error just pops up one hour later (in training). I do not know why but I think this is because I use numpy rewards and somehow they are not float anymore…

Could it be that there is a rounding problem? / What else did I miss to .float ?

Another strange thing is that this problem did not occur for 4 weeks but now it is there.
On another pc I just got killed as output with no error message at all.

Any ideas on debugging would be great :slight_smile:

The second error message seems to be a bit weird, as you are “only” trying to allocate ~500MB.
Could you check the RAM and are you really running out of memory?

The default data type of numpy is float64, so these float/double mismatches are often raised, if you don’t convert the numpy array to a FloatTensor (PyTorch uses float32 by default).

(Cannot allocate memory)

Means I have not enough RAM left… (I still have enough storage 3.6 Gb)

My RAM is decreasing during training from 28141 to 5000 and lower

# at the start:
 free -m
              total        used        free      shared  buff/cache   available
Mem:          31981        1862       28141         668        1977       29026
Swap:           976           0         976

@ptrblck I included now:

torch.from_numpy(rewards["final_rewards"]).float()

Thus the memory error remains…

How can I force pytorch to not use too much memory?

You would have to lower the memory footprint e.g. by reducing the batch size during training or using a smaller model.

However, let’s first make sure you are not leaking memory.
Are you running out of memory in the first iteration or are you seeing an increase in the memory usage during training?
In the latter case, check if you are storing any tensors, which might still be attached to the computation graph.
E.g. storing the loss for printing purposes without detaching it, will increase the memory usage:

output = model(input)
loss = criterion(output, target)
losses.append(loss) # use losses.append(loss.detach()) 

@ptrblck
I see an increase in the memory usage during training.

Ok so I guess in this case I have to check that I do not store any tensors…

Hm I made only small modifications on this ppo minimal implemenation

So far I did not find any storing problem in my code…


class Memory:
    def __init__(self):
        self.actions = []
        self.states = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []

    def clear_memory(self):
        del self.actions[:]
        del self.states[:]
        del self.logprobs[:]
        del self.rewards[:]
        del self.is_terminals[:]

#Actor Model:
class ActorMod(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorMod, self).__init__()
        self.a_dim   = action_dim
        self.l1      = nn.Linear(state_dim, n_latent_var)
        self.l1_tanh = nn.PReLU()
        self.l2      = nn.Linear(n_latent_var, n_latent_var)
        self.l2_tanh = nn.PReLU()
        self.l3      = nn.Linear(n_latent_var+action_dim, action_dim)

    def forward(self, input):
        x = self.l1(input)
        x = self.l1_tanh(x)
        x = self.l2(x)
        out1 = self.l2_tanh(x) # 64x1
        if len(input.shape)==1:
            out2 = input[self.a_dim*3:self.a_dim*4]   # 60x1 this are the available options of the active player!
            output =torch.cat( [out1, out2], 0)
        else:
            out2 = input[:, self.a_dim*3:self.a_dim*4]
            output =torch.cat( [out1, out2], 1) #how to do that?
        x = self.l3(output)
        return x.softmax(dim=-1)


class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorCritic, self).__init__()

        # actor
        #TODO see question: https://discuss.pytorch.org/t/pytorch-multiple-inputs-in-sequential/74040
        self.action_layer = ActorMod(state_dim, action_dim, n_latent_var)

        # critic
        self.value_layer = nn.Sequential(
                nn.Linear(state_dim, n_latent_var),
                nn.PReLU(),#prelu
                nn.Linear(n_latent_var, n_latent_var),
                nn.PReLU(),
                nn.Linear(n_latent_var, 1)
                )

    def forward(self, state_input):
        he = self.act(state_input, None)
        returned_tensor = torch.zeros(1, 2)
        returned_tensor[:, 0] = he#.item()
        return returned_tensor

    def act(self, state, memory):
        if type(state) is np.ndarray:
            state = torch.from_numpy(state).float()
        action_probs = self.action_layer(state)
        # here make a filter for only possible actions!
        #action_probs = action_probs *state[120:180]
        dist = Categorical(action_probs)
        action = dist.sample()

        if memory is not None:
            memory.states.append(state)
            memory.actions.append(action)
            memory.logprobs.append(dist.log_prob(action))

        return action.item()

    def evaluate(self, state, action):
        action_probs = self.action_layer(state)
        dist = Categorical(action_probs)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()

        state_value = self.value_layer(state)

        return action_logprobs, torch.squeeze(state_value), dist_entropy

class PPO:
    def __init__(self, state_dim, action_dim, n_latent_var, lr, betas, gamma, K_epochs, eps_clip, lr_decay=1000000):
        self.lr = lr
        self.betas = betas
        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs

        self.policy = ActorCritic(state_dim, action_dim, n_latent_var)
        self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=lr, betas=betas, eps=1e-5) # no eps before!
        self.policy_old = ActorCritic(state_dim, action_dim, n_latent_var)
        self.policy_old.load_state_dict(self.policy.state_dict())
        #TO decay learning rate during training:
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=lr_decay, gamma=0.9)
        self.MseLoss = nn.MSELoss() # MSELossFlat # SmoothL1Loss

    def monteCarloRewards(self, memory):
        # Monte Carlo estimate of state rewards:
        # see: https://medium.com/@zsalloum/monte-carlo-in-reinforcement-learning-the-easy-way-564c53010511
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(memory.rewards), reversed(memory.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)

        # Normalizing the rewards:
        rewards = torch.tensor(rewards)         # use here memory.rewards
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-5)  # commented out
        return rewards

    def calculate_total_loss(self, state_values, logprobs, old_logprobs, advantage, rewards, dist_entropy):
        # 1. Calculate how much the policy has changed                # Finding the ratio (pi_theta / pi_theta__old):
        ratios = torch.exp(logprobs - old_logprobs.detach())
        # 2. Calculate Actor loss as minimum of 2 functions
        surr1       = ratios * advantage
        surr2       = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantage
        actor_loss  = -torch.min(surr1, surr2)
        # 3. Critic loss
        crictic_discount = 0.5
        critic_loss =crictic_discount*self.MseLoss(state_values, rewards)
        # 4. Total Loss
        beta       = 0.01 # encourage to explore different policies
        total_loss = critic_loss+actor_loss- beta*dist_entropy
        return total_loss

    def my_update(self, memory):
        # My rewards: (learns the moves!)
        rewards = torch.tensor(memory.rewards)
        #rewards = rewards/100
        rewards = self.monteCarloRewards(memory)

        # convert list to tensor
        old_states   = torch.stack(memory.states).detach()
        old_actions  = torch.stack(memory.actions).detach()
        old_logprobs = torch.stack(memory.logprobs).detach()

        # Optimize policy for K epochs:
        for _ in range(self.K_epochs):
            # Evaluating old actions and values :
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
            advantages = rewards - state_values.detach()

            #rewards    = rewards.float()
            #advantages = advantages.float()
            loss       =  self.calculate_total_loss(state_values, logprobs, old_logprobs, advantages, rewards, dist_entropy)


            # take gradient step
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()

        # Copy new weights into old policy:
        self.policy_old.load_state_dict(self.policy.state_dict())

I also guess the error should be contained within the ppo / model / update and not inside my gym.

Are there any more ways to debug this?

The RAM decreases very slowly.

I think I should use a DataLoader as well (However it should also work without one if I store not any tensors…)

Just found my solution…

Problem was that I was appending my memory if I did a correct move, thus during training my memory was getting bigger and bigger…

1 Like