I am following the following official tutorial Transfer Learning for Computer Vision Tutorial — PyTorch Tutorials 2.1.0+cu121 documentation.
I replaced datasets.ImageFolder with custom Dataset as so:
class CustomDataset(datasets.ImageFolder):
def init(self, path, transforms):
self.path = path
super().init(path, transforms)
def __getitem__(self, i):
image, label = super().__getitem__(i)
fname, _ = self.samples[i]
return (image, label, fname)
def __len__(self):
return super().__len__()
Then I replaced datasets.ImageFolder with the new class as shown here:
image_datasets = {x: CustomDataset(os.path.join(data_dir, x),
data_transforms)
for x in [‘train’, ‘val’]}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets, batch_size=4,
shuffle=True, num_workers=4
If I try to iterate
it = iter(dataloaders[‘val’])
next(it)
I get the following error:
Empty Traceback (most recent call last)
File ~\AppData\Local\r-miniconda\envs\r-reticulate\Lib\site-packages\torch\utils\data\dataloader.py:1132, in _MultiProcessingDataLoaderIter._try_get_data(self, timeout)
1131 try:
→ 1132 data = self._data_queue.get(timeout=timeout)
1133 return (True, data)
File ~\AppData\Local\r-miniconda\envs\r-reticulate\Lib\multiprocessing\queues.py:114, in Queue.get(self, block, timeout)
113 if not self._poll(timeout):
→ 114 raise Empty
115 elif not self._poll():
Empty:
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
Cell In[91], line 2
1 it = iter(dataloaders[‘val’])
----> 2 next(it)
File ~\AppData\Local\r-miniconda\envs\r-reticulate\Lib\site-packages\torch\utils\data\dataloader.py:630, in _BaseDataLoaderIter.next(self)
627 if self._sampler_iter is None:
628 # TODO(Bug in dataloader iterator found by mypy · Issue #76750 · pytorch/pytorch · GitHub)
629 self._reset() # type: ignore[call-arg]
→ 630 data = self._next_data()
631 self._num_yielded += 1
632 if self._dataset_kind == _DatasetKind.Iterable and
633 self._IterableDataset_len_called is not None and
634 self._num_yielded > self._IterableDataset_len_called:
File ~\AppData\Local\r-miniconda\envs\r-reticulate\Lib\site-packages\torch\utils\data\dataloader.py:1328, in _MultiProcessingDataLoaderIter._next_data(self)
1325 return self._process_data(data)
1327 assert not self._shutdown and self._tasks_outstanding > 0
→ 1328 idx, data = self._get_data()
1329 self._tasks_outstanding -= 1
1330 if self._dataset_kind == _DatasetKind.Iterable:
1331 # Check for _IterableDatasetStopIteration
File ~\AppData\Local\r-miniconda\envs\r-reticulate\Lib\site-packages\torch\utils\data\dataloader.py:1294, in _MultiProcessingDataLoaderIter._get_data(self)
1290 # In this case, self._data_queue
is a queue.Queue
,. But we don’t
1291 # need to call .task_done()
because we don’t use .join()
.
1292 else:
1293 while True:
→ 1294 success, data = self._try_get_data()
1295 if success:
1296 return data
File ~\AppData\Local\r-miniconda\envs\r-reticulate\Lib\site-packages\torch\utils\data\dataloader.py:1145, in _MultiProcessingDataLoaderIter._try_get_data(self, timeout)
1143 if len(failed_workers) > 0:
1144 pids_str = ', ‘.join(str(w.pid) for w in failed_workers)
→ 1145 raise RuntimeError(f’DataLoader worker (pid(s) {pids_str}) exited unexpectedly’) from e
1146 if isinstance(e, queue.Empty):
1147 return (False, None)
RuntimeError: DataLoader worker (pid(s) 26204, 30776, 26256, 32236) exited unexpectedly
The code works fine if either
- num_worker parameter is set to 0
- CustomDataset is replaced with datasets.ImageFolder and num_worker remains set to 4.
Any idea what is the problem?