Is there a way to share network parameters and gradients across multiple processes?

I am trying to run the network training in multiple processes. However, the gradients that are computed after the loss is back-propagated are available only in that process not outside, even when the process joins.

Here is a minimal reproducing example:

import torch
import torch.nn as nn
import torch.multiprocessing as mp

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.net = nn.Sequential(
            nn.Linear(3, 4),
            nn.ReLU(),
            nn.Linear(4, 1)
        )
    
    def forward(self, x):
        return self.net(x)

def compute_something(network: nn.Module, pid: int):
    print(f'Running PID: {pid}')
    inputs = torch.randn(size=(5, 3))
    logits = network(inputs)
    loss = -1 * torch.mean(torch.pow(logits, 2))
    loss.backward()
    print('INSIDE' + '#' * 10)
    for param in network.parameters():
        print(f'Param: {param}')
        print(f'Grad: {param.grad}')
    print('#' * 10)
    
def main():
    net = Net()
    net = net.share_memory()
    
    processes = [mp.Process(target=compute_something, args=(net, pid, )) 
             for pid in range(2)]
    
    for p in processes:
        p.start()

    for p in processes:
        p.join()
        
    for param in net.parameters():
        print(f'Param: {param}')
        print(f'Grad: {param.grad}')
        
if __name__ == '__main__':
    main()

In the internal case the gradients are computed and assigned to tensors, but in the external case, the gradients are None. I appreciate any help.

@ptrblck Do you have any pointers on this? Any help is appreciated.