I’m trying to train multiple models in parallel from one parent process.
Using the multiprocessing library ‘spawn’ start method, I get an error “[Errno 9] Bad file descriptor” when I try to start the process.
Has anyone run into this before?
Here’s the stacktrace:
/usr/lib/python3.5/multiprocessing/process.py in start(self)
103 'daemonic processes are not allowed to have children’
104 _cleanup()
–> 105 self._popen = self._Popen(self)
106 self._sentinel = self._popen.sentinel
107 _children.add(self)
/usr/lib/python3.5/multiprocessing/context.py in _Popen(process_obj)
272 def _Popen(process_obj):
273 from .popen_spawn_posix import Popen
–> 274 return Popen(process_obj)
275
276 class ForkServerProcess(process.BaseProcess):
/usr/lib/python3.5/multiprocessing/popen_spawn_posix.py in init(self, process_obj)
31 def init(self, process_obj):
32 self._fds = []
—> 33 super().init(process_obj)
34
35 def duplicate_for_child(self, fd):
/usr/lib/python3.5/multiprocessing/popen_fork.py in init(self, process_obj)
18 sys.stderr.flush()
19 self.returncode = None
—> 20 self._launch(process_obj)
21
22 def duplicate_for_child(self, fd):
/usr/lib/python3.5/multiprocessing/popen_spawn_posix.py in _launch(self, process_obj)
46 try:
47 reduction.dump(prep_data, fp)
—> 48 reduction.dump(process_obj, fp)
49 finally:
50 context.set_spawning_popen(None)
/usr/lib/python3.5/multiprocessing/reduction.py in dump(obj, file, protocol)
57 def dump(obj, file, protocol=None):
58 ‘’‘Replacement for pickle.dump() using ForkingPickler.’’’
—> 59 ForkingPickler(file, protocol).dump(obj)
60
61 #
/usr/local/lib/python3.5/dist-packages/torch/multiprocessing/reductions.py in reduce_storage(storage)
116 else:
117 df = multiprocessing.reduction.DupFd(fd)
–> 118 cache_key = fd_id(fd)
119 metadata = (df, size)
120 rebuild = rebuild_storage_fd
/usr/local/lib/python3.5/dist-packages/torch/multiprocessing/reductions.py in fd_id(fd)
53 # this doesn’t work with shared memory handles, which is why we don’t
54 # support the “file_descriptor” sharing method on that platform.
—> 55 stat = os.fstat(fd)
56 return (stat.st_ino, stat.st_dev)
57
OSError: [Errno 9] Bad file descriptor