Datapip TFRecordLoader is extremly slow


I’m trying to use datapipe wit Dataloader2 to read from TFRecord files. When I increase the batch_size (e.g: to 32), the data loading process becomes extremely slow. Any suggestions how can I optimise the pipeline that works with larger batch sizes as well?

def build_datapipes(path):
    datapipe = FSSpecFileLister([path])
    datapipe = datapipe.filter(filter_fn=filter_tf_records)
    datapipe = datapipe.shuffle().sharding_filter()
    datapipe = datapipe.open_files_by_fsspec(mode="rb")
    datapipe = datapipe.prefetch(1024)
    datapipe = TFRecordLoader(datapipe=datapipe, spec=None) 
    datapipe = datapipe.batch(batch_size=32, drop_last=True)
    datapipe = Collator(datapipe)
    return datapipe

datapipe = build_datapipes(path_to_tfrecord_files)
ms = MultiProcessingReadingService(num_workers=8,
dl2 = DataLoader2(datapipe=datapipe, reading_service=ms)
start = default_timer()
count = 0
for i, x in enumerate(dl2):
    count += 1
end = default_timer()
print(end - start)