Hi, I am using custom network for Super-Resolution along with Perceptual loss on 3 GPUs. So, I have paralelized my network by DataParallel(net)
, also I can do the same with VGGPerceptualLoss()
as well.
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3"
import torch
import torchvision
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
class VGGPerceptualLoss(torch.nn.Module):
def __init__(self, resize=True, normalize_mean_std = True, device = 'cpu'):
super(VGGPerceptualLoss, self).__init__()
blocks = []
blocks.append(torchvision.models.vgg16(pretrained=True).features[:4].eval())
blocks.append(torchvision.models.vgg16(pretrained=True).features[4:9].eval())
blocks.append(torchvision.models.vgg16(pretrained=True).features[9:16].eval())
blocks.append(torchvision.models.vgg16(pretrained=True).features[16:23].eval())
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
self.blocks = torch.nn.ModuleList(blocks)
self.transform = torch.nn.functional.interpolate
self.normalize_mean_std = normalize_mean_std
if self.normalize_mean_std:
self.mean = torch.nn.Parameter(torch.tensor([0.485, 0.456, 0.406], device=device).view(1,3,1,1))
self.std = torch.nn.Parameter(torch.tensor([0.229, 0.224, 0.225], device=device).view(1,3,1,1))
self.resize = resize
def forward(self, input, target):
if input.shape[1] != 3:
input = input.repeat(1, 3, 1, 1)
target = target.repeat(1, 3, 1, 1)
if self.normalize_mean_std:
input = (input-self.mean) / self.std
target = (target-self.mean) / self.std
if self.resize:
input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False)
target = self.transform(target, mode='bilinear', size=(224, 224), align_corners=False)
loss = 0.0
for block in self.blocks:
input = block(input)
target = block(target)
loss += torch.nn.functional.l1_loss(input, target)
return loss
## TEST CASE
loss = VGGPerceptualLoss()
in_ = torch.rand(6, 3,300,300)
out = torch.rand(6, 3,300,300)
loss = torch.nn.DataParallel(loss)
loss.to(device)
print(loss(in_,out))
So my question is when the object is created at execution of this line loss = VGGPerceptualLoss()
, I hope that mean and std are created on the CPU as thats the default device. But when I wrapped the loss in DataParallel(loss)
this mean and std will be created on all three devices? how can I verify that.
And how can I make this process of moving of the data from CPU among different GPUs efficient as i often come across this problem. Thanks alot.