Pytorch 0.4.0 / Cuda 9.0 / cudnn 7.1 / V100 x 4
Traceback (most recent call last):
File "main.py", line 135, in <module>
model.train(train_loader, valid_loader)
File "/home/rplab/workspace/DW/nucleus/nuclues_segmentation/trainers/CNNTrainer.py", line 55, in train
output_ = self.G(input_)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 114, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 124, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/parallel/parallel_apply.py", line 65, in parallel_apply
raise output
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/parallel/parallel_apply.py", line 41, in _worker
output = module(*input, **kwargs)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/rplab/workspace/DW/nucleus/nuclues_segmentation/models/unet_nonlocal.py", line 55, in forward
nonlocal1 = self.nonlocal1(maxpool1)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/rplab/workspace/DW/nucleus/nuclues_segmentation/models/nonlocal_layer.py", line 111, in forward
output = self.operation_function(x)
File "/home/rplab/workspace/DW/nucleus/nuclues_segmentation/models/nonlocal_layer.py", line 119, in _embedded_gaussian
g_x = self.g(x).view(batch_size, self.inter_channels, -1)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/container.py", line 91, in forward
input = module(input)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/rplab/workspace/DW/nucleus/pytorch040/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 301, in forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected tensor for argument #1 'input' to have the same device as tensor for argument #2 'weight'; but device 1 does not equal 0 (while checking arguments for cudnn_convolution)
This error causes under code
class Foo(nn.Module):
def __init(self, .....):
.....
# Define the operation
if mode == 'concatenation':
self.operation_function = self._concatenation
elif mode == 'concatenation_debug':
self.operation_function = self._concatenation_debug
elif mode == 'concatenation_residual':
self.operation_function = self._concatenation_residual
else:
raise NotImplementedError('Unknown operation function.')
def forward(self, x, g):
output = self.operation_function(x, g)
return output
self.operation_function
is defined in init block use if condition
But in forward output = self.__concatenation(x,g)
, above error is not occur
why if condition in __init__
+ nn.Dataparallel
is occur Runtime error?