RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 120960000 bytes

desert_ranger · August 12, 2022, 3:08am

I am getting an Runtime Error. I am quite certain it is not the batch size (after looking at other posts) that’s causing the issue, as the algorithm runs for quite a few iterations. Perhaps there is a memory leak somewhere? My belief is that this happens when I am transferring data to GPU in the compute_td_loss function. I sincerely apologize for posting this entire block of code -

# Here we import all libraries
import numpy as np
#pip install gym[atari,accept-rom-license]
import gym
import matplotlib.pyplot as plt
import os
import torch
import random
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from collections import deque 
import torchvision
import torch.nn.functional as F

import sys
env = gym.make("ALE/Pong-v5")


# In[2]:


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# In[5]:


#Hyperparameters
episodes = 20000*10
eps = 1.0
learning_rate = 0.001
tot_rewards = []
tot_loss = []
decay_val = 0.0001
mem_size = 1000000
batch_size = 300
gamma = 0.99
update_target = 100
max_steps = 200
PATH = "./saved_models/pong"


# In[6]:


class NeuralNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(NeuralNetwork, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.conv1 = nn.Conv2d(state_size,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(29008, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, action_size)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# In[7]:


model = NeuralNetwork(env.observation_space.shape[2], env.action_space.n).to(device)
target = NeuralNetwork(env.observation_space.shape[2], env.action_space.n).to(device)

opt = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
replay_buffer = deque(maxlen=mem_size)


# In[8]:


# # Testing code

# state = torch.tensor(env.reset(), dtype=torch.float32).unsqueeze(0)
# state= state.reshape(1, 3, 250, 160)
# print("state = ", state.shape)
# out = model(state)
# print("out = ", out)


# In[9]:


def compute_td_loss(batch_size):
    state, next_state, reward, done, action = zip(*random.sample(replay_buffer, batch_size))
#     state = torch.stack(list(state), dim=0).reshape(batch_size, -1)
#     print("Shape of state = ", torch.stack(list(state), dim=0).squeeze(1).shape)
    state = torch.stack(list(state), dim=0).squeeze(1)
    state= state.reshape(batch_size, 3, 210, 160).to(device)
#     print("next state shape ", torch.from_numpy(np.array(next_state)).reshape(batch_size, 3, 250, 160).shape)
#     next_state = torch.from_numpy(np.array(next_state).reshape(batch_size, -1)).type(torch.float32)

    next_state = torch.from_numpy(np.array(next_state)).reshape(batch_size, 3, 210, 160).type(torch.float32).to(device)
    
    reward = torch.from_numpy(np.array(reward)).to(device)
    done = torch.from_numpy(np.array(done)).long().to(device)
    action = torch.from_numpy(np.array(action)).type(torch.int64).to(device)
    
    q_values = model(state)
    next_q_values = target(next_state)

    q_vals = q_values.gather(dim=-1, index=action.reshape(-1,1))
    max_next_q_values = torch.max(next_q_values,-1)[0].detach()

    loss = ((reward + gamma*max_next_q_values*(1-done) - q_vals.squeeze())**2).mean()

    opt.zero_grad()
    loss.backward()
    opt.step()
    return loss
    


# In[11]:


if os.path.exists(PATH):
    model.load_state_dict(torch.load(PATH))
else:
    frame_index = 0
    for i in range(episodes):

        state = torch.tensor(env.reset(), dtype=torch.float32).unsqueeze(0)
        state= state.reshape(1, 3, 210, 160)

        done = False
        steps = 0
        eps_rew = 0 
        eps_loss = 0
        while not done and steps<max_steps:
            print("frame_index = ", frame_index, "episode = ", i)
            if np.random.uniform(0,1)<eps:
                action = env.action_space.sample()
            else:
    #             action = env.action_space.sample()
                action = torch.argmax(model(state.to(device))).cpu().detach().numpy()


            next_state, reward, done, info = env.step(action)
            replay_buffer.append((state, next_state, reward, done, action))
            if len(replay_buffer)>batch_size and steps%4==0:
                loss = compute_td_loss(batch_size)
                eps_loss += loss.cpu().detach().numpy()
            eps = eps/(1 + decay_val)
            eps_rew += reward 

            if steps%50==0:
                target.load_state_dict(model.state_dict())

            if done:
                tot_rewards.append(eps_rew)
                break

            state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
            state= state.reshape(1, 3, 210, 160)
            steps += 1
            frame_index += 1
        tot_rewards.append(eps_rew)
        tot_loss.append(eps_loss)

        if(i%100)==0:
            plt.scatter(np.arange(len(tot_rewards)), tot_rewards)
            plt.show()
    torch.save(model.state_dict(), PATH)

Please let me know if you have any questions. Thank you very much.

ptrblck · August 12, 2022, 5:07am

You are running out of memory on your host while trying to allocate ~115MB (the error message is not a GPU OOM).
Are you maybe running out of shared memory or is your host RAM that small?

desert_ranger · August 12, 2022, 1:12pm

I am running this entire code on a laptop. My RAM is indeed a little small (~8 GB), however, I don’t think it is small enough to cause this issue. I have run similar problems on my laptop and they ran fine
Actually, this is the first time I am coding Deep RL from scratch and I believe I made a mistake somewhere.

ptrblck · August 12, 2022, 3:55pm

Could you verify that you are not running out of memory by observing it e.g. via htop while executing the script?

desert_ranger · August 29, 2022, 7:41pm

I think it is indeed running out of memory. But for some reason, the data size seems to be erratically increasing. I wrote a new version of my code and here’s the error trace -

RuntimeError                              Traceback (most recent call last)
Input In [7], in <cell line: 1>()
     21 replay_buffer.append((state, next_state, reward, done, action))
     22 if len(replay_buffer)>batch_size and steps%4==0:
---> 23     loss = compute_td_loss(batch_size)
     24     eps_loss += loss.cpu().detach().numpy()
     25 eps = eps/(1 + decay_val)

Input In [6], in compute_td_loss(batch_size)
      3 state = torch.stack(list(state), dim=0).squeeze(1)
      4 state= state.reshape(batch_size, 3, 210, 160).to(device)
----> 5 next_state = torch.from_numpy(np.array(next_state)).reshape(batch_size, 3, 210, 160).type(torch.float32).to(device)
      6 reward = torch.from_numpy(np.array(reward)).to(device)
      7 done = torch.from_numpy(np.array(done)).long().to(device)

RuntimeError: [enforce fail at C:\cb\pytorch_1000000000000\work\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 201600000 bytes.

Here’s my entire code block. I apologize for the wall of code. I don’t know what to trim, since I am unsure what’s causing this issue.

#!/usr/bin/env python
# coding: utf-8

# In[1]:


# Here we import all libraries
import numpy as np
import gym
import matplotlib.pyplot as plt
import os
import torch
import random
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from collections import deque 
import torchvision
import torch.nn.functional as F
import sys
env = gym.make("ALE/Pong-v5")


# In[2]:


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# In[3]:


#Hyperparameters
episodes = 20000*10
eps = 1.0
learning_rate = 0.001
tot_rewards = []
tot_loss = []
decay_val = 0.0001
mem_size = 1000000
batch_size = 500
gamma = 0.99
update_target = 100
max_steps = 200
PATH = "./saved_models/pong"


# In[4]:


class NeuralNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(NeuralNetwork, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.conv1 = nn.Conv2d(state_size,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(29008, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, action_size)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# In[5]:


print(env.observation_space.shape[2])
model = NeuralNetwork(env.observation_space.shape[2], env.action_space.n).to(device)
target = NeuralNetwork(env.observation_space.shape[2], env.action_space.n).to(device)

opt = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
replay_buffer = deque(maxlen=mem_size)


# In[6]:


def compute_td_loss(batch_size):
    state, next_state, reward, done, action = zip(*random.sample(replay_buffer, batch_size))
    state = torch.stack(list(state), dim=0).squeeze(1)
    state= state.reshape(batch_size, 3, 210, 160).to(device)
    next_state = torch.from_numpy(np.array(next_state)).reshape(batch_size, 3, 210, 160).type(torch.float32).to(device)
    reward = torch.from_numpy(np.array(reward)).to(device)
    done = torch.from_numpy(np.array(done)).long().to(device)
    action = torch.from_numpy(np.array(action)).type(torch.int64).to(device)
    q_values = model(state)
    next_q_values = target(next_state)
    q_vals = q_values.gather(dim=-1, index=action.reshape(-1,1))
    max_next_q_values = torch.max(next_q_values,-1)[0].detach()
    loss = ((reward + gamma*max_next_q_values*(1-done) - q_vals.squeeze())**2).mean()
    opt.zero_grad()
    loss.backward()
    opt.step()
    return loss
    


# In[7]:


if os.path.exists(PATH):
    model.load_state_dict(torch.load(PATH))
else:
    frame_index = 0
    for i in range(episodes):
        state = torch.tensor(env.reset(), dtype=torch.float32).unsqueeze(0)
        state= state.reshape(1, 3, 210, 160)
        done = False
        steps = 0
        eps_rew = 0 
        eps_loss = 0
        while not done and steps<max_steps:
            print("frame_index = ", frame_index, "episode = ", i)
            if np.random.uniform(0,1)<eps:
                action = env.action_space.sample()
            else:
                action = torch.argmax(model(state.to(device))).cpu().detach().numpy()


            next_state, reward, done, info = env.step(action)
            replay_buffer.append((state, next_state, reward, done, action))
            if len(replay_buffer)>batch_size and steps%4==0:
                loss = compute_td_loss(batch_size)
                eps_loss += loss.cpu().detach().numpy()
            eps = eps/(1 + decay_val)
            eps_rew += reward 

            if steps%50==0:
                target.load_state_dict(model.state_dict())

            if done:
                tot_rewards.append(eps_rew)
                break

            state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
            state= state.reshape(1, 3, 210, 160)
            steps += 1
            frame_index += 1
        tot_rewards.append(eps_rew)
        tot_loss.append(eps_loss)

#         if(i%200)==0:
#             np.savetxt("tot_rewards.csv", np.array(tot_rewards))
    torch.save(model.state_dict(), PATH)

desert_ranger · August 29, 2022, 8:46pm

Never mind! I got the answer - https://www.reddit.com/r/reinforcementlearning/comments/x0xflo/comment/imaunh7/?%2524deep_link=true&correlation_id=5fcca151-6dbf-46ca-91a9-9416209a039f&ref=email_comment_reply&ref_campaign=email_comment_reply&ref_source=email&%25243p=e_as&_branch_match_id=863611594864846989&utm_medium=Email%20Amazon%20SES&_branch_referrer=H4sIAAAAAAAAA32PW2rEMAxFV5P%2B5elMSgrDUCjdhlFtJRHjVxWHTHZfhU5%2FCzZcjs9F8pJzWt%2FqmtFayhWkVDkK91qlW9H1Kl1Rw%2FoiMTLNFMDpjd11OVuFei%2B6Tzn7vlfPvoleAJ8XKUyRDXoM2SFwoDALF%2BMkq8RH85hclODRRz60WHdNQftD2%2B8gnDxsYXk9pygZ1FvEpM%2F1CvWRecOiG0xkRgeZYtBkhV8mY6C9tOVgv6ayHwyUYwtjOfbt0DUjNGqcpMc4iYweyOnnSpoxueP3TRvwCWgO%2F0pr3OSDf8oPNpHW5EsBAAA%3D