I started to use PyTorch yesterday and it works pretty well.
Today I tried to use data_parallel but there are some errors.
I tried to reproduce the error with this simple code:
import torch
import torch.utils.data
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import sys
import numpy as np
class Test(nn.Module):
def __init__(self):
super(Test, self).__init__()
self.const = Variable(torch.from_numpy(np.zeros((5,5),dtype=np.float32))).cuda(0)
def forward(self, x, y):
bat = x.size(0)
return self.const.unsqueeze(0).expand(bat, 5, 5)+x+y
model=Test()
model = torch.nn.DataParallel(model, device_ids=range(int(sys.argv[1])))
inp1 = Variable(torch.from_numpy(np.zeros((6,5,5),dtype=np.float32))).cuda()
inp2 = Variable(torch.from_numpy(np.zeros((6,5,5),dtype=np.float32))).cuda()
print inp1
print model(inp1, inp2)
The error msg is: RuntimeError: arguments are located on different GPUs at /b/wheel/pytorch-src/torch/lib/THC/generated/../generic/THCTensorMathPointwise.cu:214