You can use share_momery_()
and torch.multiprocessing.SimpleQueue
to implement IPC. E.g.:
import numpy as np
import torch
import torch.multiprocessing as mp
def func(rank, x, p2c, c2p):
x_power = x.to(rank) ** rank
c2p.put(x_power)
# citing multiprocessing doc: Unlike CPU tensors, the
# sending process is required to keep the original tensor
# as long as the receiving process retains a copy of
# the tensor. The refcounting is implemented under the
# hood but requires users to follow the next best practices.
p2c.get()
print(f"child-{rank} done")
if __name__ == '__main__':
nprocs = 2
x = torch.ones(2, 2)
x.share_memory_()
ctx = mp.get_context('spawn')
c2p, p2c = ctx.SimpleQueue(), ctx.SimpleQueue()
ps = [ctx.Process(target=func, args=(rank, x, p2c, c2p)) for rank in range(nprocs)]
[p.start() for p in ps]
tensors = [c2p.get() for _ in range(nprocs)]
print(tensors)
del tensors
for p in ps:
p2c.put(0)
p.join()
print("parent done")