Variable seen on CPU while moved onto GPU

neon29 · December 14, 2018, 1:41pm

Hi!

I have been working with Pytorch for a while but as the training time was an obstacle, I build myself a new computer partially dedicated to deep learning.

Today, I was trying to modify some of my existing scripts in order to run them on GPU. To do so,
I have read I should add .cuda() when instantiating my models and also on the input tensors.

When modifying a simple vanilla GAN, I get the following error:

Traceback (most recent call last):

File “”, line 1, in
runfile(‘/home/florian/DeepLearning/GAN/GAN_pytorch.py’, wdir=‘/home/florian/DeepLearning/GAN’)

File “/home/florian/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”, line 668, in runfile
execfile(filename, namespace)

File “/home/florian/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”, line 108, in execfile
exec(compile(f.read(), filename, ‘exec’), namespace)

File “/home/florian/DeepLearning/GAN/GAN_pytorch.py”, line 145, in
fake_data = generator(Variable(sample_z(batch_size, hidden_size)))

File “/home/florian/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 489, in call
result = self.forward(*input, **kwargs)

File “/home/florian/DeepLearning/GAN/GAN_pytorch.py”, line 33, in forward
x = F.leaky_relu(self.layer1(z))

File “/home/florian/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 489, in call
result = self.forward(*input, **kwargs)

File “/home/florian/anaconda3/lib/python3.7/site-packages/torch/nn/modules/linear.py”, line 67, in forward
return F.linear(input, self.weight, self.bias)

File “/home/florian/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py”, line 1352, in linear
ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())

RuntimeError: Expected object of backend CUDA but got backend CPU for argument #4 ‘mat1’

Thus, it seems a variable is still on CPU while I thought I’ve moved my data on the GPU. Here is the full code:

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

class Generator(nn.Module):
def init(self, hidden_size):
super(Generator, self).init()
self.layer1 = nn.Linear(hidden_size, 256)
torch.nn.init.xavier_normal(self.layer1.weight)
self.layer2 = nn.Linear(256, 512)
torch.nn.init.xavier_normal(self.layer2.weight)
self.layer3 = nn.Linear(512, 1024)
torch.nn.init.xavier_normal(self.layer3.weight)
self.layer4 = nn.Linear(1024, 28*28)
torch.nn.init.xavier_normal(self.layer4.weight)

def forward(self, z):
    x = F.leaky_relu(self.layer1(z))
    x = F.leaky_relu(self.layer2(x))
    x = F.leaky_relu(self.layer3(x))
    x = F.tanh(self.layer4(x))
    return x

class Discriminator(nn.Module):
def init(self):
super(Discriminator, self).init()
self.layer1 = nn.Linear(784, 1024)
torch.nn.init.xavier_normal(self.layer1.weight)
self.layer2 = nn.Linear(1024, 512)
torch.nn.init.xavier_normal(self.layer2.weight)
self.layer3 = nn.Linear(512, 256)
torch.nn.init.xavier_normal(self.layer3.weight)
self.layer4 = nn.Linear(256, 1)
torch.nn.init.xavier_normal(self.layer4.weight)

def forward(self, x):
    x = F.leaky_relu(self.layer1(x))
    #x = F.dropout(x, p=0.2)
    x = F.leaky_relu(self.layer2(x))
    #x = F.dropout(x, p=0.2)
    x = F.sigmoid(self.layer3(x))
    #x = F.dropout(x, p=0.2)
    x = F.sigmoid(self.layer4(x))

    return x

def sample_z(batch_size, size):
‘’’
Sample the generator input from a uniform (rand) or normal (randn) distribution
‘’’
#return 2 * torch.rand(batch_size, size) - 1
return torch.randn(batch_size, size)

def plot(samples):
fig = plt.figure(figsize=(4, 4))
gs = gridspec.GridSpec(4, 4)
gs.update(wspace=0.05, hspace=0.05)

for i, sample in enumerate(samples):
    ax = plt.subplot(gs[i])
    plt.axis('off')
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.set_aspect('equal')
    plt.imshow(sample.data.numpy().reshape(28, 28), cmap='Greys_r')
    
return fig

if name == ‘main’:

# parameters
lr = 2e-4
optim_betas = (0.9, 0.999)

batch_size = 128
hidden_size = 100
nb_epochs = 100
train_loader = torch.utils.data.DataLoader(
                               datasets.MNIST('../MNIST-PyTorch', 
                               train=True, 
                               download=True, 
                               transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5,), std=(0.5,))])), 
                                batch_size=batch_size, shuffle=True, 
                                drop_last=True) 
                           
ones_label = Variable(torch.ones((batch_size, 1)), requires_grad=False)
ones_label = ones_label.cuda()
zeros_label = Variable(torch.zeros((batch_size, 1)), requires_grad=False)
zeros_label = zeros_label.cuda()
                                      
    
generator = Generator(hidden_size=hidden_size)
generator = generator.cuda()
discriminator = Discriminator()
discriminator = discriminator.cuda()

# binary cross-entropy loss
criterion = nn.BCELoss()

# optimizer
G_optim = torch.optim.Adam(generator.parameters(), lr=lr, betas=optim_betas)
D_optim = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=optim_betas)


for epoch_idx in range(nb_epochs):        
    
    print('------------------------')
    print('epoch %i' % (epoch_idx+1) )
    
    d_loss_sum = 0
    g_loss_sum = 0

    for batch_idx, (data, labels) in enumerate(train_loader):            
        
        '''
        Train the discriminator (50/50 real/fake) with two different 
        mini-batches, one for real images and another one for fake images
        '''                     
        D_optim.zero_grad()

        # sample real data
        real_data = Variable(data).view(batch_size, -1)
        real_data = real_data.cuda()
        # sample fake data
        fake_data = generator(Variable(sample_z(batch_size, hidden_size)))
        fake_data = fake_data.cuda()
        
        # discriminator must classify real data as 1's and fake ones as 0's
        real_out = discriminator(real_data)
        real_loss = criterion(real_out, ones_label)
        real_loss.backward()
        
        fake_out = discriminator(fake_data)
        fake_loss = criterion(fake_out, zeros_label)
        fake_loss.backward()
        
        D_optim.step()
        
        d_loss_sum += real_loss.item() + fake_loss.item()
                                
        
        '''
        Train the generator
        '''
        G_optim.zero_grad()

        # sample fake data
        fake_data = generator(Variable(sample_z(batch_size, hidden_size)))
        
        fake_out = discriminator(fake_data)
        
        # generator want the discriminator to classify fake data as 1's
        
        g_loss = criterion(fake_out, ones_label)
        
        g_loss_sum += g_loss.item()
        
        g_loss.backward()
        
        G_optim.step()
        

        
        
    print('discriminator loss = %f' % d_loss_sum)
    print('generator loss = %f' % g_loss_sum)
    print('------------------------')

    
    fig = plot(generator(Variable(sample_z(16, hidden_size))))
    plt.savefig('out_vanilla_GAN/%i.png' % epoch_idx)
    plt.close(fig)

ptrblck · December 15, 2018, 9:38am

The sample_z method returns a CPU tensor, which should also be moved onto the GPU before being passed to the generator.

neon29 · January 4, 2019, 1:22pm

Indeed, I forgot to modify this method. Thanks!