Hi, I have got a problem abount multi-gpus training. The code is from https://github.com/chaoqichen/HTCN
. Here is the key code:
class RandomLayer(nn.Module):
def __init__(self, input_dim_list=[], output_dim=1024):
super(RandomLayer, self).__init__()
self.input_num = len(input_dim_list)# 2
self.output_dim = output_dim
self.random_matrix = [torch.rand(input_dim_list[i], output_dim) for i in range(self.input_num)]
def forward(self, input_list):
return_list = [torch.mm(input_list[i], self.random_matrix[i]) for i in range(self.input_num)]
return_tensor = return_list[0] / math.pow(float(self.output_dim), 1.0 / len(return_list))
for single in return_list[1:]:
return_tensor = torch.mul(return_tensor, single)
return return_tensor
def cuda(self):
super(RandomLayer, self).cuda()
self.random_matrix = [val.cuda() for val in self.random_matrix]
class vgg16(_fasterRCNN):
def __init__(self, classes, pretrained=False, class_agnostic=False,lc=False,gc=False, la_attention = False, mid_attention = False):
self.model_path = cfg.VGG_PATH
self.dout_base_model = 512
self.pretrained = pretrained
self.class_agnostic = class_agnostic
self.lc = lc
self.gc = gc
_fasterRCNN.__init__(self, classes, class_agnostic,lc,gc, la_attention, mid_attention)
def _init_modules(self):
vgg = models.vgg16()
if self.pretrained:
print("Loading pretrained weights from %s" %(self.model_path))
state_dict = torch.load(self.model_path)
vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
# not using the last maxpool layer
#print(vgg.features)
self.RCNN_base1 = nn.Sequential(*list(vgg.features._modules.values())[:14])
self.RCNN_base2 = nn.Sequential(*list(vgg.features._modules.values())[14:21])
self.RCNN_base3 = nn.Sequential(*list(vgg.features._modules.values())[21:-1])
#print(self.RCNN_base1)
#print(self.RCNN_base2)
self.netD = netD(context=self.gc)
self.netD_pixel = netD_pixel(context=self.lc)
self.netD_mid = netD_mid(context=self.gc)
feat_d = 4096
feat_d2 = 384
feat_d3 = 2048
self.RandomLayer = RandomLayer([feat_d, feat_d2], feat_d3)
self.RandomLayer.cuda()
# Fix the layers before conv3:
self.netD_da = netD_da(feat_d3)
for layer in range(10):
for p in self.RCNN_base1[layer].parameters(): p.requires_grad = False
# self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
self.RCNN_top = vgg.classifier
self.RCNN_cls_score = nn.Linear(feat_d+feat_d2, self.n_classes)
if self.class_agnostic:
self.RCNN_bbox_pred = nn.Linear(feat_d+feat_d2, 4)
else:
self.RCNN_bbox_pred = nn.Linear(feat_d+feat_d2, 4 * self.n_classes)
def _head_to_tail(self, pool5):
pool5_flat = pool5.view(pool5.size(0), -1)
fc7 = self.RCNN_top(pool5_flat)
return fc7
RandomLayer cannot be assigned to multi devices using the .cuda()
method using the code self.RandomLayer.cuda()
. The error message is as follows:
return_list = [torch.mm(input_list[i], self.random_matrix[i]) for i in range(self.input_num)]
RuntimeError: Expected tensor for ‘out’ to have the same device as tensor for argument #3 ‘mat2’; but device 1 dose not equal 0(while checking arguments for addmm)
And I print the device of input_list[i]
and self.random_matrix[i]
when using two gpus, the result is as follows (self.input_num == 2
):
gpu id 0:
input_list0: cuda:0 self.random_matrix0: cuda:0
input_list1: cuda:0 self.random_matrix1: cuda:0
gpu id 1:
input_list0: cuda:1 self.random_matrix0: cuda:0
input_list1: cuda:1 self.random_matrix1: cuda:0
Is there any advice to fix the problem?