Multiprocess is blocked when train the model in the main thread!

i want to use multiprocess to train the model, but i found that there may be some bugs in the multiprocess settings.
Here is what is i found:
when i train the model in the main process, the model’s forward procedure in the sub-process is blocked.
Here is the code:
there is some amazing thing happed, when set the self.fc1's out_features=1 , the following code can work. But if the out_features is a little bit larger, the code won’t work, it will block.

import torch
from torch import optim
from torch import multiprocessing
from torch import nn
from torch.autograd import Variable
from nets.nets import Actor
import torch.nn.functional as F


class Actor(nn.Module):
    def __init__(self, num_actions=2):
        super(Actor, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=4, out_channels=32, kernel_size=8, stride=4, padding=2)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)

        # just allocate the memory, won't use it in the forward process
        self.fc1 = nn.Linear(in_features=10 * 10 * 64, out_features=512)
        self.out_layer = nn.Linear(in_features=512, out_features=num_actions)

    def forward(self, inputs):
        print('inputs', inputs.size())
        net = self.conv1(inputs)
        net = F.relu(net, inplace=True)
        net = self.conv2(net)
        net = F.relu(net, inplace=True)
        net = self.conv3(net)
        net = F.relu(net, inplace=True)

        return net


def iteration():
    demonet = Actor()
    while True:
        inputs = Variable(torch.FloatTensor(1, 4, 84, 84))
       # the computing process is blocked at the first layer conv
        demonet(inputs)
        print("hello world")


if __name__ == '__main__':
    demonet = Actor()

    ccc = optim.Adam(params=demonet.parameters())
    # to simulate the backward process
    # set the grad
    for para in demonet.parameters():
        para._grad = Variable(para.data.new().resize_as_(para.data).zero_(), volatile=True)
    ccc.step()

    p = multiprocessing.Process(target=iteration)
    p.start()
    p.join()

Hi Keith. Actually I didn’t get any error when running this code.
Meanwhile, if I run forward pass in main process after the child process starts, it also works quite well.