My code includes the following:
from torch.multiprocessing import set_start_method
set_start_method('spawn')
…
from torch.multiprocessing import Pool
input_length = len(input_hashes)
with Pool(2) as worker_pool:
embedding_loss = sum(worker_pool.starmap(
self._forward_i,
zip(input_hashes, input_embeddings, [ngram_embeddings] * input_length,
[latest_words] * input_length)))
Before the end of the first epoch, I get the following error:
Exception in thread Thread-19521:
Traceback (most recent call last):
File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 463, in _handle_results
task = get()
File "/usr/lib/python3.6/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "/home/plvaudry/virtualenvs/aml/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 96, in rebuild_storage_cuda
storage = cls._new_shared_cuda(device, handle, size, offset, view_size)
RuntimeError: cuda runtime error (30) : unknown error at /pytorch/torch/csrc/generic/StorageSharing.cpp:304