I’m on Linux. I implemented the if-clause protection, but it didn’t change the behavior of the code.
Behavior with torch.multiprocessing.set_start_method(‘fork’):
prints 100x
Error! being run by __main__
Behavior with torch.multiprocessing.set_start_method(‘spawn’):
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _try_get_data(self, timeout)
871 try:
--> 872 data = self._data_queue.get(timeout=timeout)
873 return (True, data)
~/.conda/envs/torch_env/lib/python3.7/multiprocessing/queues.py in get(self, block, timeout)
103 timeout = deadline - time.monotonic()
--> 104 if not self._poll(timeout):
105 raise Empty
~/.conda/envs/torch_env/lib/python3.7/multiprocessing/connection.py in poll(self, timeout)
256 self._check_readable()
--> 257 return self._poll(timeout)
258
~/.conda/envs/torch_env/lib/python3.7/multiprocessing/connection.py in _poll(self, timeout)
413 def _poll(self, timeout):
--> 414 r = wait([self], timeout)
415 return bool(r)
~/.conda/envs/torch_env/lib/python3.7/multiprocessing/connection.py in wait(object_list, timeout)
920 while True:
--> 921 ready = selector.select(timeout)
922 if ready:
~/.conda/envs/torch_env/lib/python3.7/selectors.py in select(self, timeout)
414 try:
--> 415 fd_event_list = self._selector.poll(timeout)
416 except InterruptedError:
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/_utils/signal_handling.py in handler(signum, frame)
65 # Python can still get and update the process status successfully.
---> 66 _error_if_any_worker_fails()
67 if previous_handler is not None:
RuntimeError: DataLoader worker (pid 17676) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. Rerunning with num_workers=0 may give better error trace.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-7-b7dbc62fb341> in <module>
5
6 if __name__ == '__main__':
----> 7 main()
<ipython-input-7-b7dbc62fb341> in main()
1 def main():
----> 2 for batch_num, (inputs, labels_y, labels_w) in enumerate(training_dataloader):
3 (inputs, labels_y, labels_w) =(inputs.cuda(), labels_y.cuda(), labels_w.cuda())
4 #print(inputs)
5
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
1066
1067 assert not self._shutdown and self._tasks_outstanding > 0
-> 1068 idx, data = self._get_data()
1069 self._tasks_outstanding -= 1
1070 if self._dataset_kind == _DatasetKind.Iterable:
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _get_data(self)
1032 else:
1033 while True:
-> 1034 success, data = self._try_get_data()
1035 if success:
1036 return data
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _try_get_data(self, timeout)
883 if len(failed_workers) > 0:
884 pids_str = ', '.join(str(w.pid) for w in failed_workers)
--> 885 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
886 if isinstance(e, queue.Empty):
887 return (False, None)
RuntimeError: DataLoader worker (pid(s) 17676) exited unexpectedly
Behavior with torch.multiprocessing.set_start_method(‘forkserver’):
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _try_get_data(self, timeout)
871 try:
--> 872 data = self._data_queue.get(timeout=timeout)
873 return (True, data)
~/.conda/envs/torch_env/lib/python3.7/multiprocessing/queues.py in get(self, block, timeout)
104 if not self._poll(timeout):
--> 105 raise Empty
106 elif not self._poll():
Empty:
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-7-b7dbc62fb341> in <module>
5
6 if __name__ == '__main__':
----> 7 main()
<ipython-input-7-b7dbc62fb341> in main()
1 def main():
----> 2 for batch_num, (inputs, labels_y, labels_w) in enumerate(training_dataloader):
3 (inputs, labels_y, labels_w) =(inputs.cuda(), labels_y.cuda(), labels_w.cuda())
4 #print(inputs)
5
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
1066
1067 assert not self._shutdown and self._tasks_outstanding > 0
-> 1068 idx, data = self._get_data()
1069 self._tasks_outstanding -= 1
1070 if self._dataset_kind == _DatasetKind.Iterable:
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _get_data(self)
1032 else:
1033 while True:
-> 1034 success, data = self._try_get_data()
1035 if success:
1036 return data
~/.conda/envs/torch_env/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _try_get_data(self, timeout)
883 if len(failed_workers) > 0:
884 pids_str = ', '.join(str(w.pid) for w in failed_workers)
--> 885 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
886 if isinstance(e, queue.Empty):
887 return (False, None)
RuntimeError: DataLoader worker (pid(s) 17746, 17747, 17748, 17749, 17750, 17751, 17752, 17753, 17754, 17755, 17756, 17757, 17758, 17759, 17760) exited unexpectedly
If anyone has suggestions on getting more specific error messages, that would also be very helpful. Right now I don’t know how to make things print from inside the process. Also very helpful if someone has a toy example of a dataloader with num_workers>0 that plays nice