When I use torch==1.9.0, the following code runs fine.
import torch
from multiprocessing import Process
import multiprocessing
def run():
print('in proc', torch.cuda.is_initialized())
print('in proc', torch.cuda._is_in_bad_fork())
torch.zeros((5,)).cuda()
def fk_run():
print('in fork proc', torch.cuda.is_initialized())
print('in fork proc', torch.cuda._is_in_bad_fork())
torch.zeros((5,)).cuda()
if __name__ == "__main__":
print('in main', torch.cuda.is_initialized())
print('in main', torch.cuda._is_in_bad_fork())
sp_ctx = multiprocessing.get_context('spawn')
a = sp_ctx.Process(target=run)
a.start()
fk_ctx = multiprocessing.get_context('fork')
b = fk_ctx.Process(target=fk_run)
b.start()
a.join()
b.join()
However, when run with torch=1.12 get a RuntimeError
RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
I check the source code of 1.9 and 1.12 and find no difference.
Besides, the demo code doesn’t init CUDA in the main process, why prompts Cannot re-initialize CUDA in forked subprocess
?