I am using the following custom dataset:
class MyDataset(Dataset):
def __init__(self, names, X):
self.names = names
self.X = X
def __len__(self):
return len(self.names)
def __getitem__(self, idx):
name = self.names[idx]
sample_x = self.X[name]
return torch.tensor(sample_x)
where X
is a loaded .npz
file and names are the keys in this file. That is I create my dataset as following:
X = np.load('npzifle.npz', mmap_mode='r')
names = X.files
ds = MyDataset(names, X)
I have a problem when creating a dataloader with num_workers > 1
:
from torch.utils.data import DataLoader
dl = DataLoader(ds, batch_size=4, num_workers=2)
where I get the following error:
Traceback (most recent call last):
File "/home/ansar/temp/foo.py", line 11, in <module>
for x in dl:
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/utils/data/dataloader.py", line 631, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/utils/data/dataloader.py", line 1346, in _next_data
return self._process_data(data)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/utils/data/dataloader.py", line 1372, in _process_data
data.reraise()
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/_utils.py", line 722, in reraise
raise exception
zipfile.BadZipFile: Caught BadZipFile in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
^^^^^^^^^^^^^^^^^^^^
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
~~~~~~~~~~~~^^^^^
File "/home/ansar/foo/src/foo/data.py", line 242, in __getitem__
sample_x = self.X[name]
~~~~~~^^^^^^
File "/home/ansar/venvir/ml/lib64/python3.11/site-packages/numpy/lib/npyio.py", line 252, in __getitem__
magic = bytes.read(len(format.MAGIC_PREFIX))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib64/python3.11/zipfile.py", line 955, in read
data = self._read1(n)
^^^^^^^^^^^^^^
File "/usr/lib64/python3.11/zipfile.py", line 1045, in _read1
self._update_crc(data)
File "/usr/lib64/python3.11/zipfile.py", line 973, in _update_crc
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
zipfile.BadZipFile: Bad CRC-32 for file 'arr_0.npy'