Hi, I am using IterableDataset for Loading my videos using the following code.
class RetinaFaceDatasetVideoNew(torch.utils.data.IterableDataset):
def __init__(self,name:str):
self.name = name
self.cap = cv2.VideoCapture(str(name))
if not self.cap.isOpened():
raise IOError(f"Cannot open video {name}")
self.scale = 0.5
def __iter__(self):
frame_idx = 0
while True:
try:
ret, image = self.cap.read()
if not ret:
# no frames has been grabbed
break
img = cv2.resize(image,
None,
None,
fx=self.scale,
fy=self.scale,
interpolation=cv2.INTER_LINEAR,
)
yield image,self.name
except StopIteration:
return
Problem is that this process is slow.
To tackle this issue I tried multiprocessing over different videos as it is faster.
For that I wrote
class RandomDatasetM(torch.utils.data.IterableDataset):
def __init__(self,name: list):
self.name = name
self.cap = []
self.cap.append(cv2.VideoCapture(name[0]))
self.cap.append(cv2.VideoCapture(name[1]))
def __iter__(self):
frame_idx = 0
worker_info = torch.utils.data.get_worker_info()
if worker_info is None: # single-process data loading, return the full iterator
while True:
try:
ret, image = self.cap[0].read()
if not ret:
# no frames has been grabbed
break
yield image, self.name[worker_id]
except StopIteration:
return
else: # in a worker process
# split workload
worker_id = worker_info.id
while True:
try:
ret, image = self.cap[worker_id].read()
if not ret:
# no frames has been grabbed
break
yield image,self.name[worker_id]
except StopIteration:
return
But this is not running and going in deadlock. Can someone suggest me approach to achieve multiprocessing here?