I have created a simple iterable dataset which extracts frames from a video and provides them for training. It works just fine but when I try to iterate on the dataset I observe that my CPU RAM is increasing after every iteration.
def frame_stacking(dpath, T, overlap=True):
#T=5
d_list = os.listdir(dpath)
for filename in d_list:
vidcap = cv2.VideoCapture(os.path.join(data_path, filename))
success = True
frame_set =[]
while success:
success,image = vidcap.read()
if overlap:
if success:
gray = cv2.cvtColor(image[:,40:-40, :], cv2.COLOR_BGR2GRAY)
gray = cv2.resize(gray, (128,128)).astype(float)/255.0
if len(frame_set)==T:
yield np.stack(frame_set,axis=0) # the yield statement
tmp_frame=[]
for ii in range(T-1):
tmp_frame.append(frame_set[ii+1])
tmp_frame.append(gray)
frame_set=tmp_frame
tmp_frame=[]
else:
frame_set.append(gray)
else:
frame_set=[]
class MyDataset(IterableDataset):
def __init__(self, data_root, NumFrames, isOverlapAllowed=True):
self.samples = cycle(frame_stacking(data_root,5, isOverlapAllowed))
def __iter__(self):
return self.samples
Does anyone know why this could be? Am I making a mistake in creating the Dataset? I know it is caused by the dataset as it also happens in a simple situation like this.
for batch_idx, (data) in enumerate(dataloader,0):
print("PERCENTAGE RAM USED", psutil.virtual_memory().percent)