Hello I am trying to play around with the multi process implementation (link and I ran into a problem with the grad being None when I initialize my model. Here is a simple example:
from __future__ import print_function
import argparse
import os
import sys
import torch
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
class MyModel(torch.nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.main = nn.Sequential(
nn.Linear(256,6),nn.Softmax())
def forward(self,x):
return self.main(x)
def train(model):
# This for loop will break sharing of gradient buffers. It's not
# necessary but it reduces the contention, and has a small memory cost
# (equal to the total size of parameters).
for param in model.parameters():
param.grad.data = param.grad.data.clone()
# Construct data_loader, optimizer, etc.
for data, labels in data_loader:
input1 = torch.ones(256)
optimizer.zero_grad()
loss_fn(model(input1), torch.zeros(6)).backward()
optimizer.step() # This will update the shared parameters
if __name__ == '__main__':
num_processes = 4
model = MyModel()
# NOTE: this is required for the ``fork`` method to work
model.share_memory()
processes = []
for rank in range(num_processes):
p = mp.Process(target=train, args=(model,))
p.start()
processes.append(p)
for p in processes:
p.join()
I get the following error:
Process Process-4:
Traceback (most recent call last):
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/jtremblay/code/pytorch-a3c/grad_example.py", line 25, in train
param.grad.data = param.grad.data.clone()
AttributeError: 'NoneType' object has no attribute 'data'
Process Process-2:
Traceback (most recent call last):
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/jtremblay/code/pytorch-a3c/grad_example.py", line 25, in train
param.grad.data = param.grad.data.clone()
AttributeError: 'NoneType' object has no attribute 'data'
Process Process-3:
Traceback (most recent call last):
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/jtremblay/code/pytorch-a3c/grad_example.py", line 25, in train
param.grad.data = param.grad.data.clone()
AttributeError: 'NoneType' object has no attribute 'data'
Process Process-1:
Traceback (most recent call last):
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/jtremblay/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/jtremblay/code/pytorch-a3c/grad_example.py", line 25, in train
param.grad.data = param.grad.data.clone()
AttributeError: 'NoneType' object has no attribute 'data'
I know I could create them manually but I am not sure this is a wanted behaviour.