Freezing layers issue for parallel GPU

Here it is. If you comment out the model = nn.DataParallel(model) line, no error occurs, otherwise the error occurs (on a computer with multiple GPUs):

from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class mynet(torch.nn.Module):
    def __init__(self):
        super(mynet, self).__init__()
        self.fc = nn.Linear(2, 1, bias=False)

    def forward(self, x):        
        for param in self.fc.parameters():
            param.requires_grad = False

        return self.fc(x)

model = mynet()
model = nn.DataParallel(model)  # comment out this line to make the error go away
model.cuda()

data = np.array([0.0, 1.0])
x = torch.from_numpy(data).float().unsqueeze(0).unsqueeze(0).unsqueeze(0)
gt = torch.from_numpy(np.array([0.0])).float().unsqueeze(0).unsqueeze(0).unsqueeze(0)

# model.train()
result = model(x=x.cuda())