Hello,
I am running into a ram leaking memory problem when I am saving my frames as pytorch tensor for a simple dqn implementation (inspired by link). Here is an quick example without the learning loop, trying to isolate my issue:
import resource
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple
import os
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
import torchvision.transforms as T
import cv2
import pickle
import glob
import time
import subprocess
from collections import namedtuple
# Class
class ReplayMemory(object):
'''
A simple class to wrap around the concept of memory
this helps for managing how much data is used.
'''
def __init__(self, capacity):
self.capacity = capacity
self.memory = []
self.position = 0
def push(self, *args):
"""Saves a transition."""
if len(self.memory) < self.capacity:
self.memory.append(None)
self.memory[self.position] = Transition(*args)
self.position = (self.position + 1) % self.capacity
def sample(self, batch_size):
return random.sample(self.memory, batch_size)
def __len__(self):
return len(self.memory)
# Functions
def ProcessState(state,torchOutput=True):
img = cv2.cvtColor(state, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (imageShape[1],imageShape[0])).astype('float32')
if torchOutput:
img = torch.from_numpy(img)
img /= 255
img -= 0.5
img *= 2
return img
# Variables
Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward', 'done'))
imageShape = (110,80)
env = gym.make('PongDeterministic-v3')
action = 0
memory = ReplayMemory(32)
# Example with pytorch
for i_episode in range(25):
break
print 'Pytorch: Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
obser = env.reset()
obser = ProcessState(obser)
state = torch.ones((3,imageShape[0],imageShape[1]))
state = torch.cat((state,obser.view(1,imageShape[0],imageShape[1])),0)
for t in range(10000):
obser, reward, done, _ = env.step(0)
#this is new observation getting process
obser = ProcessState(obser)
state = torch.cat((state,obser.view(1,imageShape[0],imageShape[1])),0)
memory.push(state[:-1], action, state[1:], reward, done)
state = state[1:]
if done:
break
# quit()
# memory = ReplayMemory(32)
# Numpy
for i_episode in range(25):
print 'Numpy: Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
obser = env.reset()
obser = ProcessState(obser,False)
state = np.zeros((3,imageShape[0],imageShape[1]))
state = np.concatenate([state, obser.reshape((1,imageShape[0],imageShape[1]))])
for t in range(10000):
obser, reward, done, _ = env.step(0)
#this is new observation getting process
obser = ProcessState(obser,False)
# state = torch.cat((state,obser.view(1,imageShape[0],imageShape[1])),0)
state = np.concatenate([state, obser.reshape((1,imageShape[0],imageShape[1]))])
memory.push(state[:-1], action, state[1:], reward, done)
state = state[1:]
if done:
break
Here is the output I get for running the first loop (using pytorch) vs the second one, which is saving numpy arrays.
jtremblay@office:~/code/Personal-git/dqn$ python memory_issue.py
[2017-03-06 12:38:30,254] Making new env: PongDeterministic-v3
Pytorch: Memory usage: 113432 (kb)
Pytorch: Memory usage: 226380 (kb)
Pytorch: Memory usage: 323796 (kb)
Pytorch: Memory usage: 410124 (kb)
Pytorch: Memory usage: 490116 (kb)
Pytorch: Memory usage: 565884 (kb)
Pytorch: Memory usage: 637428 (kb)
Pytorch: Memory usage: 704220 (kb)
Pytorch: Memory usage: 760188 (kb)
Pytorch: Memory usage: 815892 (kb)
Pytorch: Memory usage: 861828 (kb)
Pytorch: Memory usage: 905388 (kb)
Pytorch: Memory usage: 938916 (kb)
Pytorch: Memory usage: 966900 (kb)
Pytorch: Memory usage: 993036 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
Pytorch: Memory usage: 1001484 (kb)
jtremblay@office:~/code/Personal-git/dqn$ python memory_issue.py
[2017-03-06 12:39:22,433] Making new env: PongDeterministic-v3
Numpy: Memory usage: 113936 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
Numpy: Memory usage: 130988 (kb)
As you can see the numpy saving is much more stable. This does not look like much but when you run my script with a replay size of one million frames it crashes quickly.
Should I avoid storing torch tensor? I quite like keeping everything as a torch tensor to be honest. It saves me a few torch.from_numpy calls. Is there a way to release memory used by torch, I was not able to find anything on that subject in the documentation.
I can provide more examples with learning loops if needed.