I have 4 GPUs:
gpus = list(config.GPUS)
gradient_loss = GradientLoss
model = nn.DataParallel(model, device=gpus).cuda()
Gradient loss is computed as below:
class GradientLoss(nn.Module):
def __init__(self, channels=3):
super(GradientLoss, self).__init__()
pos = torch.from_numpy(np.identity(channels, dtype=np.float32))
neg = -1 * pos
self.filter_x = torch.stack((neg, pos)).unsqueeze(0).permute(3, 2, 0, 1).cuda()
self.filter_y = torch.stack((pos.unsqueeze(0), neg.unsqueeze(0))).permute(3, 2, 0, 1).cuda()
def forward(self, prediction, target):
gen_frames_x = nn.functional.pad(prediction, [0, 1, 0, 0])
gen_frames_y = nn.functional.pad(prediction, [0, 0, 0, 1])
gt_frames_x = nn.functional.pad(target, [0, 1, 0, 0])
gt_frames_y = nn.functional.pad(target, [0, 0, 0, 1])
gen_dx = torch.abs(nn.functional.conv2d(gen_frames_x, self.filter_x))
gen_dy = torch.abs(nn.functional.conv2d(gen_frames_y, self.filter_y))
gt_dx = torch.abs(nn.functional.conv2d(gt_frames_x, self.filter_x))
gt_dy = torch.abs(nn.functional.conv2d(gt_frames_y, self.filter_y))
grad_diff_x = torch.abs(gt_dx - gen_dx)
grad_diff_y = torch.abs(gt_dy - gen_dy)
return torch.mean(grad_diff_x + grad_diff_y)
The error:
line 32, in forward
gen_dx = torch.abs(nn.functional.conv2d(gen_frames_x, self.filter_x))
RuntimeError: Expected tensor for argument #1 ‘input’ to have the same device as tensor for argument #2 ‘weight’; but device 1 does not equal 0 (while checking arguments for cudnn_convolution)
The reason is that the self.filter_x uses a GPU device (eg: cuda(0)) but the prediction and target images use another GPU device (eg: cuda(1)). How to solve this issue? Tks.