RuntimeError: Tensor for argument #2 'weight' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)

  • Many thanks for your prompt reply!
    However, after I modified the code for model training, the same error persisted.
    The error message is shown below…
Traceback (most recent call last):
  File "make_train.py", line 44, in <module>
    output = net(pressure)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 152, in forward
    outputs = self.parallel_apply(replicas, inputs, kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 162, in parallel_apply
    return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/parallel_apply.py", line 83, in parallel_apply
    raise output
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/parallel_apply.py", line 59, in _worker
    output = module(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/data/lyf/pytorch/make_net.py", line 92, in forward
    x = F.elu(nn.BatchNorm2d(num_features=self.f_dim*8)(x))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/batchnorm.py", line 83, in forward
    exponential_average_factor, self.eps)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1697, in batch_norm
    training, momentum, eps, torch.backends.cudnn.enabled
RuntimeError: Tensor for argument #2 'weight' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)

  • Please allow me to show you the code for the network definition.
import torch
import torch.nn as nn
import torch.nn.functional as F
from make_ops import conv_out_size_same
from make_data import batch_size


s_h, s_w = 403, 640
# 403,640
s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
# 202, 320
s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
# 101, 160
s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
# 51, 80
s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)
# 25, 40
s_h32, s_w32 = conv_out_size_same(s_h16, 2), conv_out_size_same(s_w16, 2)
# 12, 20
s_h64, s_w64 = conv_out_size_same(s_h32, 2), conv_out_size_same(s_w32, 2)
# 6, 10
s_h128, s_w128 = conv_out_size_same(s_h64, 2), conv_out_size_same(s_w64, 2)
# 3,5
s_h256, s_w256 = conv_out_size_same(s_h128, 2), conv_out_size_same(s_w128, 2)
# 2, 3


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.CONV1_DEPTH = 2
        self.CONV2_DEPTH = 4
        self.CONV3_DEPTH = 8
        self.CONV4_DEPTH = 16
        self.CONV5_DEPTH = 32
        self.CONV6_DEPTH = 64
        self.CONV7_DEPTH = 128
        self.CONV8_DEPTH = 256
        self.f_dim = 32
        self.channel_dim = 1
        self.FC_NODE = 512
        self.IMG_HEIGHT = 403
        self.IMG_WIDTH = 640
        self.batch_size = batch_size

        self.fc1 = nn.Linear(in_features=10, out_features=self.f_dim*8)
        self.fc2 = nn.Linear(in_features=self.f_dim*8, out_features=self.f_dim*8*s_w256*s_h256)

        self.deconv1 = nn.ConvTranspose2d(in_channels=self.f_dim*8, out_channels=self.f_dim*4,
                                          kernel_size=2, stride=2)
        self.deconv2 = nn.ConvTranspose2d(in_channels=self.f_dim*4, out_channels=self.f_dim*2,
                                          kernel_size=2, stride=2)
        self.deconv3 = nn.ConvTranspose2d(in_channels=self.f_dim*2, out_channels=self.f_dim,
                                          kernel_size=2, stride=2)
        self.deconv4 = nn.ConvTranspose2d(in_channels=self.f_dim, out_channels=self.f_dim//2,
                                          kernel_size=2, stride=2)
        self.deconv5 = nn.ConvTranspose2d(in_channels=self.f_dim//2, out_channels=self.f_dim//4,
                                          kernel_size=2, stride=2)
        self.deconv6 = nn.ConvTranspose2d(in_channels=self.f_dim//4, out_channels=self.f_dim//8,
                                          kernel_size=2, stride=2)
        self.deconv7 = nn.ConvTranspose2d(in_channels=self.f_dim//8, out_channels=self.f_dim//16,
                                          kernel_size=2, stride=2)
        self.deconv8 = nn.ConvTranspose2d(in_channels=self.f_dim//16, out_channels=self.channel_dim,
                                          kernel_size=2, stride=2)

        self.conv1 = nn.Conv2d(in_channels=self.channel_dim, out_channels=self.CONV1_DEPTH,
                               kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=self.CONV1_DEPTH, out_channels=self.CONV2_DEPTH,
                               kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=self.CONV2_DEPTH, out_channels=self.CONV3_DEPTH,
                               kernel_size=2, stride=2)
        self.conv4 = nn.Conv2d(in_channels=self.CONV3_DEPTH, out_channels=self.CONV4_DEPTH,
                               kernel_size=2, stride=2)
        self.conv5 = nn.Conv2d(in_channels=self.CONV4_DEPTH, out_channels=self.CONV5_DEPTH,
                               kernel_size=2, stride=2)
        self.conv6 = nn.Conv2d(in_channels=self.CONV5_DEPTH, out_channels=self.CONV6_DEPTH,
                               kernel_size=2, stride=2)
        self.conv7 = nn.Conv2d(in_channels=self.CONV6_DEPTH, out_channels=self.CONV7_DEPTH,
                               kernel_size=2, stride=2)
        self.conv8 = nn.Conv2d(in_channels=self.CONV7_DEPTH, out_channels=self.CONV8_DEPTH,
                               kernel_size=2, stride=2)

        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=1)

    def forward(self, input_tensor):

        x = self.fc1(input_tensor)
        x = self.fc2(x)
        x = x.view(-1, self.f_dim*8, s_h256, s_w256)
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim*8)(x))

        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim*4)(self.deconv1(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim*2)(self.deconv2(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim)(self.deconv3(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim//2)(self.deconv4(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim//4)(self.deconv5(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim//8)(self.deconv6(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.f_dim//16)(self.deconv7(x)))
        x = F.tanh(nn.BatchNorm2d(num_features=self.channel_dim)(self.deconv8(x)))

        x = F.elu(nn.BatchNorm2d(num_features=self.CONV1_DEPTH)(self.conv1(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV2_DEPTH)(self.conv2(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV3_DEPTH)(self.conv3(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV4_DEPTH)(self.conv4(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV5_DEPTH)(self.conv5(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV6_DEPTH)(self.conv6(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV7_DEPTH)(self.conv7(x)))
        x = F.elu(nn.BatchNorm2d(num_features=self.CONV8_DEPTH)(self.conv8(x)))

        x = self.avg_pool(x)
        x = x.view(-1, self.num_flat_features(x))
        x = F.elu(nn.Linear(in_features=x.size()[-1], out_features=self.FC_NODE)(x))
        x = nn.Linear(in_features=self.FC_NODE, out_features=self.IMG_HEIGHT*self.IMG_WIDTH)(x)
        x = x.view(-1, self.IMG_HEIGHT, self.IMG_WIDTH)

        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

The error message tells me Error in checking BatchNorm used in Cudnn.

  • The code for my modified model training is shown below.
import torch
import torch.nn as nn
import torch.optim as optim
from make_data import train_dataloader, test_dataloader
from make_net import Net


num_epochs = 30
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_loader = train_dataloader

criterion = nn.MSELoss()
net = Net()
optimizer = optim.Adam(net.parameters(), lr=0.1, betas=(0.9, 0.99))

if torch.cuda.is_available():
    print("Let's use", torch.cuda.device_count(), "GPUs")
    net = nn.DataParallel(net)
net.to(device)


for epoch in range(num_epochs):
    running_loss = 0.0
    print("Epoch {}/{}".format(epoch, num_epochs-1))
    print("-" * 10)

    for i, sample in enumerate(train_loader, 0):
        image, pressure = sample['image'], sample['pressure']

        image = image.float()
        image = image.to(device)

        pressure = pressure.float()
        pressure = pressure.to(device)

        optimizer.zero_grad()
        output = net(pressure)
        loss = criterion(output, image)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (i+1) % 100 == 0:
            print("%d, %5d, loss: %.3f" % (epoch, i, running_loss/100))
            running_loss = 0.0


  • Many thanks for your prompt reply!
    Thank you very much!!!