I’m having trouble using nn.DataParallel
with a custom model. I followed the guidance in this tutorial, but when I run the model, it is not actually using both GPUs, and my first GPU fills up. I’m not sure how to incorporate the guidance from this forum into my code. Here is my model and what I’m using to make it parallel, taken from the tutorial:
# specify NN
class SegNet(nn.Module):
def __init__(self, params):
super(SegNet, self).__init__()
C_in, H_in, W_in = params["input_shape"]
init_f = params["initial_filters"]
num_outputs = params["num_outputs"]
self.conv1 = nn.Conv2d(C_in, init_f, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(init_f, 2 * init_f, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(2 * init_f, 4 * init_f, kernel_size=3, padding=1)
self.conv4 = nn.Conv2d(4 * init_f, 8 * init_f, kernel_size=3, padding=1)
self.conv5 = nn.Conv2d(8 * init_f, 16 * init_f, kernel_size=3, padding=1)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up1 = nn.Conv2d(16 * init_f, 8 * init_f, kernel_size=3, padding=1)
self.conv_up2 = nn.Conv2d(8 * init_f, 4 * init_f, kernel_size=3, padding=1)
self.conv_up3 = nn.Conv2d(4 * init_f, 2 * init_f, kernel_size=3, padding=1)
self.conv_up4 = nn.Conv2d(2 * init_f, init_f, kernel_size=3, padding=1)
self.conv_out = nn.Conv2d(init_f, num_outputs, kernel_size=3, padding=1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv3(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv4(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv5(x))
x = self.upsample(x)
x = F.relu(self.conv_up1(x))
x = self.upsample(x)
x = F.relu(self.conv_up2(x))
x = self.upsample(x)
x = F.relu(self.conv_up3(x))
x = self.upsample(x)
x = F.relu(self.conv_up4(x))
output = self.conv_out(x)
return output
# specify model parameters
params_model = {
"input_shape": (3, h, w),
"initial_filters": 16,
"num_outputs": 1,
}
model = SegNet(params_model)
# tell pytorch to use both gpus
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model = nn.DataParallel(model)
model.to(device)
But then when I try training with my dataset, I get this error because it is only using one GPU. When I use a similar strategy with one of the torchvision models, it runs perfectly.
RuntimeError: CUDA out of memory. Tried to allocate 28.00 MiB (GPU 0; 11.00 GiB total capacity; 9.90 GiB already allocated; 23.01 MiB free; 9.98 GiB reserved in total by PyTorch)