Hello everyone.
I’m not sure if i understand MultiProcessingReadingService correctly.
Below is a small test with a custom IterDataPipe which should be spawned to 4 workers.
Without MultiProcessingReadingService it takes ~ 110ms on my PC.
With MultiProcessingReadingService it takes minutes!
I mean it’s not a little bit slower, its like… crazy
My versions: torchdata 0.6.0, Python 3.9.16
I’m testing on a Win11 machine.
Am i doing it wrong?
Is the slow down caused by IPC and multiprocessing should only be used for very heavy-CPU related stuff?
Is it Windows related?
Thank you for any help!
from io import IOBase
import time
from typing import IO, Tuple
from torchdata.dataloader2 import DataLoader2, MultiProcessingReadingService
from torchdata.datapipes.iter import IterDataPipe
class TestIterDataPipe(IterDataPipe[Tuple[str, IOBase]]):
def __init__(self, source_datapipe: IterDataPipe[Tuple[str, IO]]) -> None:
self.source_datapipe: IterDataPipe[Tuple[str, IO]] = source_datapipe
def __iter__(self):
for i in range(100000):
yield 'foo', i
if __name__ == '__main__':
tStep1 = time.perf_counter() * 1000.0
tStart = tStep1
pipe = TestIterDataPipe(None)
mpReader = MultiProcessingReadingService(num_workers=4)
dl2 = DataLoader2(pipe, reading_service=mpReader)
tStep2 = time.perf_counter() * 1000.0
tDiff1 = tStep2 - tStep1
tStep1 = tStep2
ndx = 0
for dt1, dt2 in dl2:
ndx += 1
if not ndx % 123:
tStep2 = time.perf_counter() * 1000.0
tDiff2 = tStep2 - tStep1
print(f'{ndx}: {dt2}, {tDiff2} ms')
tStep2 = time.perf_counter() * 1000.0
tDiff2 = tStep2 - tStep1
tStep1 = tStep2
print(f'pipeline: {tDiff1} ms, loop: {tDiff2} ms')
print(f'total: {tStep2 - tStart} ms')
dl2.shutdown()