Hello!
I try to run the code below:
import torch.multiprocessing as mp
import torch
from torch import nn
import numpy as np
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.vars = nn.ParameterList()
print("Net init 0")
weight = nn.Parameter(torch.nn.init.orthogonal_(torch.zeros([64, 64]), 1.4))
print("Net init 1")
bias = nn.Parameter(torch.nn.init.constant_(torch.zeros(64), 1.4))
print("Net init 2")
self.vars.extend([weight, bias])
print("Net init 3")
def f():
net = Net()
if __name__ == "__main__":
agent = Net()
processes = [mp.Process(target=f) for _ in range(2)]
for p in processes:
p.start()
for p in processes:
p.join()
It seems to have encountered a deadlock. The init of Net in subprocesses doesn’t continue to print “Net init 1” and the program is stuck:
# python3 test.py
Net init 0
Net init 1
Net init 2
Net init 3
Net init 0
Net init 0
However, if I comment the 1st line in main:
# agent = Net()
It works fine.
# python3 test.py
Net init 0
Net init 0
Net init 1
Net init 2
Net init 3
Net init 1
Net init 2
Net init 3
This problem is too strange!