I’m trying to decode a large number of jpeg images directly onto my GPU via a DataLoader, but am running into issues.
I’m running with batch_size=64
and num_workers=8
.
The code runs as expected when I pass device="cpu"
to decode_jpeg()
.
My dataset is defined as follows:
class MyDataset(Dataset):
def __init__(self):
super(MyDataset).__init__()
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.files = sorted(glob.glob("my_dir/*.jpeg"))
def __getitem__(self, idx):
im_u8 = read_file(self.files[idx])
im_nv = decode_jpeg(im_u8, device=self.device).float() / 255
return im_nv
def __len__(self):
return len(self.files)
Initially I was running into a CUDA initialization error, which I fixed by setting mp.set_start_method("spawn")
. I am now running into the following cryptic RuntimeError:
Traceback (most recent call last):
File “/code/scripts/predict-noncapture-hauls.py”, line 161, in
run()
File “/usr/local/lib/python3.9/dist-packages/click/core.py”, line 1130, in call
return self.main(*args, **kwargs)
File “/usr/local/lib/python3.9/dist-packages/click/core.py”, line 1055, in main
rv = self.invoke(ctx)
File “/usr/local/lib/python3.9/dist-packages/click/core.py”, line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File “/usr/local/lib/python3.9/dist-packages/click/core.py”, line 760, in invoke
return __callback(*args, **kwargs)
File “/code/scripts/predict-noncapture-hauls.py”, line 147, in run
predictions, metadata = evaluate_event_yolov5(
File “/code/scripts/predict-noncapture-hauls.py”, line 75, in evaluate_event_yolov5
for batch in dataloader:
File “/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py”, line 530, in next
data = self._next_data()
File “/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py”, line 1224, in _next_data
return self._process_data(data)
File “/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py”, line 1250, in _process_data
data.reraise()
File “/usr/local/lib/python3.9/dist-packages/torch/_utils.py”, line 457, in reraise
raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File “/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/worker.py”, line 287, in _worker_loop
data = fetcher.fetch(index)
File “/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/fetch.py”, line 52, in fetch
return self.collate_fn(data)
File “/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py”, line 136, in default_collate
storage = elem.storage()._new_shared(numel)
File “/usr/local/lib/python3.9/dist-packages/torch/_tensor.py”, line 181, in storage
storage = self._storage()
RuntimeError: it != attype_to_py_storage_type.end()INTERNAL ASSERT FAILED at “…/torch/csrc/DynamicTypes.cpp”:69, please report a bug to PyTorch. Failed to get the Python type of_UntypedStorage
.