I don’t really understand the iterable dataset. I would like to stream different videos in different process, and that each video would be present in the batch like this:
#batch1:
video0_frame0
video1_frame0
…
videoB_frame0
#batch2:
video0_frame1
video1_frame1
…
videoB_frmae1
you get the picture.
i am trying like this:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
from torch.utils.data import Dataset, IterableDataset, DataLoader
import gym
import numpy as np
import cv2
class GymEnv(object):
def __init__(self, env_name, niter=1000):
self.env = gym.make(env_name)
self.niter = niter
def __iter__(self):
self.env.reset()
for _ in range(self.niter):
action = np.random.randint(0, self.env.action_space.n)
observation, reward, done, info = self.env.step(action)
if done:
self.env.reset()
else:
yield observation, reward, done, info
yield self.env.step(action)
class Gym(IterableDataset):
def __init__(self, env_name='SpaceInvaders-v0', niter=10000):
self.env_name = env_name
self.niter = niter
self.start, self.end = 0, niter
def preprocess(self, step):
#We only have the text in the file for this case
observation, reward, done, info = step
return observation
def __iter__(self):
env_iter = GymEnv(self.env_name, self.niter)
return map(self.preprocess, env_iter)
def worker_env_init(worker_id):
worker_info = torch.utils.data.get_worker_info()
dataset = worker_info.dataset # the dataset copy in this worker process
overall_start = dataset.start
overall_end = dataset.end
# configure the dataset to only process the split workload
dataset.env_name = ['SpaceInvaders-v0', 'Pong-v0'][worker_info.id]
print('dataset: ', dataset.env_name)
if __name__ == '__main__':
ds = Gym()
dl = DataLoader(ds, batch_size=2, num_workers=2, worker_init_fn=worker_env_init)
# for data in ds:
# cv2.imshow('test', data[..., ::-1])
# cv2.waitKey(5)
for idx, data in enumerate(dl):
print('batch_idx: ', idx)
for i in range(len(data)):
img = data[i].cpu().numpy()
cv2.imshow('env#'+str(i), img[..., ::-1])
cv2.waitKey(0)
but batches basically contains several times the same image and the videos are alternatively in one batch or the over, never in one batch at the same time