Hello,
I am currently working on the customization of the cDCGAN for the MNIST Database.
I found some problems with the use of the Binary Cross Entropy Loss when running the model.
It seems that the dimension of the tensors feeding the Loss are different, and I don’t really know why.
I got the following error :
ValueError: Target and input must have the same number of elements. target nelement (128) != input nelement (1280)
Here’s the code used
# -*- coding: utf-8 -*-
"""tme11_advanced.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1mB2YseGg-3V6LtmR14qQV_dxErTi9hCA
"""
googlecolab = True
if googlecolab:
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
!pip install Pillow==4.1.1
# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
"""## Hyperpameters
Define the hyperparameters. You can play with those later.
"""
## Data loading
workers = 4 # Number of workers for dataloader (/!\ set to 4 when you're done debugging)
## Architecture
arch = 'cDCGAN' # or cGAN
nz = 100 # Size of z latent vector (i.e. size of generator input)µ
ndf = 32 # Base size of feature maps in discriminator
ngf = 32 # Base size of feature maps in generator
## Optimization
lrD = 0.0002 # Learning rate for the discriminator
lrG = 0.0002 # Learning rate for the generator
beta1G = 0.5 # Momentum beta1 for the discriminator
beta1D = 0.5 # Momentum beta1 for the generator
## Training
batch_size = 128 # Images per batch
nb_update_D = 1 # Number of sub-steps of discriminator optim. at each step
nb_update_G = 1 # Number of sub-steps of generator optim. at each step
#steps = 8000 # Number of global steps in the training loop
nb_epochs = 20 # Number of epochs, leave "None" if you want to set the number of "steps" (i.e. batches)
if nb_epochs is None:
nb_epochs = (steps * batch_size) / (nb_update_D * 50000)
else:
steps = int(nb_epochs * nb_update_D * 50000 / batch_size)
print("Doing %.1f epochs in %d steps" % (nb_epochs, steps))
steps_per_epoch = int(steps / nb_epochs)
"""# Dataset
Download and load the dataset. Nothing to do here.
"""
# We can use an image folder dataset the way we have it setup.
# Create the dataset
dataroot = '/tmp/mnist'
transform = transforms.Compose([
transforms.Pad(2),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5,), std=(0.5,))
])
dataset = dset.MNIST(dataroot, train=True, download=True, transform=transform)
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
shuffle=True, num_workers=workers)
# Decide which device we want to run on
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
plt.show()
"""# Model architectures
## Discriminator
Input: Image $x \in \mathbb{R}^{32\times 32\times 1}$
Output: "Real" image probability $\in [0,1]$
## Generator
Input: Random "noise" $z \in \mathbb{R}^{\text{nz}}$
Output: Generated image $\tilde x \in \mathbb{R}^{32\times 32\times 1}$
"""
# cDCGAN
if arch == 'cDCGAN':
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.emb_x = nn.Sequential(
# TODO
nn.Conv2d(1,2*ndf,kernel_size = 4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2)
)
self.emb_y = nn.Sequential(
# TODO
nn.Conv2d(10,2*ndf, kernel_size=4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2)
)
self.emb_xy = nn.Sequential(
# TODO
nn.Conv2d(4*ndf,8*ndf,kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(8*ndf),
nn.LeakyReLU(0.2),
nn.Conv2d(8*ndf, 16*ndf, kernel_size=4, stride = 2, padding=1,bias=False),
nn.BatchNorm2d(16*ndf),
nn.LeakyReLU(0.2),
nn.Conv2d(16*ndf,1,kernel_size=4, stride=2, padding=0, bias=False),
nn.Sigmoid()
)
def forward(self, x, y):
#TODO
h1 = self.emb_x(x)
h2 = self.emb_y(y.expand(-1,-1,32,32))
x = torch.cat([h1,h2],1)
o = self.emb_xy(x)
return o
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.emb_z = nn.Sequential(
# TODO
nn.ConvTranspose2d(nz,8*ngf,kernel_size=4,stride=1,padding=0, bias=False),
nn.BatchNorm2d(8*ndf),
nn.ReLU()
)
self.emb_y = nn.Sequential(
# TODO
nn.ConvTranspose2d(10,ndf*8, kernel_size=4, stride=1,padding=0,bias=False),
nn.BatchNorm2d(8*ngf),
nn.ReLU()
)
self.emb_zy = nn.Sequential(
# TODO
nn.ConvTranspose2d(ngf*16,ngf*8,kernel_size=4,stride=2,padding=1,bias=False),
nn.BatchNorm2d(8*ngf),
nn.ReLU(),
nn.ConvTranspose2d(ngf*8,ngf*4,kernel_size=4,stride=2,padding=1,bias=False),
nn.BatchNorm2d(4*ngf),
nn.ReLU(),
nn.ConvTranspose2d(4*ngf,1,kernel_size=4,stride=2,padding=1),
nn.Tanh()
)
def forward(self, z, y):
# TODO
h1 = self.emb_z(z)
h2 = self.emb_y(y)
x = torch.cat([h1,h2],1)
o = self.emb_zy(x)
return o
# cGAN
if arch == 'cGAN':
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.emb_x = nn.Sequential(
# TODO
nn.Linear(784,1024), # 794 ?
nn.LeakyReLU(0.2)
)
self.emb_y = nn.Sequential(
# TODO
nn.Linear(10,1024), # self.label_emb = nn.Embedding(10, 10) ?
nn.LeakyReLU(0.2)
)
self.emb_xy = nn.Sequential(
# TODO
nn.Linear(2048,1024,bias=False),
nn.BatchNorm1d(1024)
nn.LeakyReLU(0.2),
nn.Linear(1024,512,bias=False),
nn.BatchNorm1d(512)
nn.LeakyReLU(0.2),
nn.Linear(512,1),
nn.Sigmoid()
)
def forward(self, x, y):
#TODO
h1 = self.emb_x(x)
h2 = self.emb_y(y)
x = torch.cat([h1,h2],1)
o = self.emb_xy(x)
return o
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.emb_z = nn.Sequential(
# TODO
nn.Linear(110,256, bias=False),
nn.BatchNorm1d(256),
nn.ReLU()
)
self.emb_y = nn.Sequential(
# TODO
nn.Linear(10,256),
nn.BatchNorm1d(256),
nn.ReLU()
)
self.emb_zy = nn.Sequential(
# TODO
nn.Linear(512,1024),
nn.BatchNorm1d(1024),
nn.ReLU(),
nn.Linear(1024,1024, bias=False),
nn.BatchNorm1d(1024),
nn.ReLU(),
nn.Linear(1024,1024),
nn.Tanh()
)
def forward(self, z, y):
# Remove unused dimensions in non-conv model
z = z.view(z.shape[0], -1) z[:, :, 0, 0]
y = y.view(y.shape[0], -1)
# TODO
h1 = self.emb_z(z)
h2 = self.emb_y(y)
x = torch.cat([h1,h2],1)
o = self.emb_zy(x)
return o
# Create the models
netG = Generator().to(device)
netD = Discriminator().to(device)
# Apply the weights_init function to randomly initialize all weights to mean=0, stdev=0.2
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
netG.apply(weights_init)
netD.apply(weights_init)
# Print the models
print(netG)
print(netD)
# TODO test you models to check if they
z = torch.zeros(10, nz, 1, 1).to(device)
x = torch.zeros(10, 1, 32, 32).to(device)
y = torch.randn(10, 10, 1, 1).to(device)
#y.shape
#torch.cat([x, y.expand(-1,-1,32,32)], 1).shape
#y.shape
#y.expand(10,10,32,32).shape
#net_test = nn.Sequential(nn.Conv2d(10,ndf, kernel_size=4, stride=2, padding=1))
#net_test(y)
print(netG(z, y).shape) # expected: [10, 1, 32, 32]
print(netD(x, y).shape) # expected: [10, 1, 1, 1]
"""# Optimization and training
Here we will define:
* Our prior $P(z)$ that we use to sample random "noise". We will use a Gaussian distribution.
* The criterion that will be used to train the discriminator, and indirectly the generator. We will use the binary cross-entropy.
* The optimizers of both models. We will use the Adam optimizer.
"""
# Prior P(z). Returns a Gaussian random tensor of shape (batch_size, nz, 1, 1)
def get_noise(batch_size):
noise = torch.randn((batch_size, nz, 1, 1), dtype=torch.float32) # TODO
return noise
# Returns a random vector of one-hot encoding of shape (batch_size, 10, 1, 1)
def get_fixed_y(batch_size):
y = torch.zeros(batch_size, 10, 1, 1, device=device)
n = batch_size // 10
for i in range(10):
y[i*n:i*n+n, i, :, :].fill_(1)
return y
# Create the criterion function that will take (y_hat, y) as input
criterion = nn.BCELoss() # TODO
# Setup Adam optimizers for D and G
optimizerD = optim.Adam(netD.parameters(), lr=lrD, betas=(beta1D, 0.999)) # TODO
optimizerG = optim.Adam(netG.parameters(), lr=lrG, betas=(beta1G, 0.999)) # TODO
"""### Data format / batch creation functions
`r_real` and `r_fake` are targets for the disriminator's criterion
"""
# Data format / batch creation functions
fixed_noise = get_noise(100) # Create a fixed random vector sampled from a Gaussian, will be used during train for viz
fixed_y = get_fixed_y(100)
real_label = 1
fake_label = 0
iterator = iter(dataloader)
def onehot(data, nclass=10):
bsize = data.shape[0]
y = torch.zeros(bsize, nclass)
y.scatter_(1, data.unsqueeze(1), 1)
y = y.unsqueeze(2).unsqueeze(3)
return y
# returns a batch of real images from the dataset (iterates infinitely on the dataset)
def get_batch_real():
global iterator
try:
x_real, y_real = next(iterator)
except:
iterator = iter(dataloader)
x_real, y_real = next(iterator)
x_real = x_real.to(device)
y_real = onehot(y_real).to(device)
r_real = torch.full((x_real.size(0),), real_label, device=device)
print('x_real_size',x_real.size())
print('y_real_size',y_real.size())
print('r_real_size',r_real.size())
return x_real, y_real, r_real
# returns a batch of generated images and training targets y_fake
# Note that the targets y_fake will be different is train_G is True or False
def get_batch_fake(y_real, train_G=False):
z = torch.randn(y_real.shape[0], nz, 1, 1, device=device)
x_fake = netG(z, y_real)
#
if train_G:
r_fake = None # TODO
else:
r_fake = torch.full((x_fake.size(0),), fake_label, device=device) # TODO
print('x_fake_size',x_fake.size())
#print('y_fake_size',y_fake.size())
print('r_fake_size',r_fake.size())
print('test', r_fake[0])
#print(r_fake)
#print(y_fake)
return x_fake, y_real, r_fake
"""### Training Loop
**Reminder:** when your training loop starts to work, change the `workers` variable to 4 and rerun your notebook
"""
img_list = []
G_losses = []
D_losses = []
# Commented out IPython magic to ensure Python compatibility.
for i in range(steps):
if i == steps_per_epoch * 11:
optimizerG.param_groups[0]['lr'] /= 10
optimizerD.param_groups[0]['lr'] /= 10
print("learning rate change!")
if i == steps_per_epoch * 16:
optimizerG.param_groups[0]['lr'] /= 10
optimizerD.param_groups[0]['lr'] /= 10
print("learning rate change!")
########
# Update D network: maximize log(D(x)) + log(1 - D(G(z)))
for _ in range(nb_update_D):
netD.zero_grad()
# Create batches
x_real, y_real, r_real = get_batch_real()
x_fake, y_fake, r_fake = get_batch_fake(y_real)
# Forward
r_hat_real = netD(x_real,y_real) # TODO
r_hat_fake = netD(x_fake,y_fake) # TODO
errD = criterion(y_real, r_hat_real) + criterion(y_fake, r_hat_fake) # TODO sum of criterion of real and fake samples
# Backward
# TODO backward & optimization step on D
optimizerD.zero_grad()
errD.backward(retain_graph=True)
optimizerD.step()
# Compute / save metrics
avg_output_for_real = r_hat_real.mean().item()
avg_output_for_fake = r_hat_fake.mean().item()
D_losses.append(errD.item())
########
# Update G network: maximize log(D(G(z)))
for _ in range(nb_update_G):
netG.zero_grad()
# TODO: forward + backward
# NOTE: use errG as name for your loss variable, like errD above
errG = criterion(y_hat_fake, y_fake)
optimizerG.zero_grad()
errG.backward(retain_graph=True)
optimizerG.step()
# Compute / save metrics
G_losses.append(errG.item())
########
# Logs
if i % 25 == 0:
print('[%5d/%5d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f'
# % (i, steps, errD.item(), errG.item(), avg_output_for_real, avg_output_for_fake))
if i % 100 == 0:
with torch.no_grad():
x_fake = netG(fixed_noise, fixed_y).detach().cpu()
img_list.append(vutils.make_grid(x_fake, padding=2, normalize=True, nrow=10))
plt.figure(figsize=(10,10))
plt.imshow(np.transpose(img_list[-1],(1,2,0)))
plt.axis("off")
plt.show()
"""# Display training evolution"""
# Show generations
fig = plt.figure(figsize=(8,8))
plt.axis("off")
ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)
HTML(ani.to_jshtml())
# Loss evolution
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
plt.title("Generator Trainig Loss")
plt.plot(G_losses)
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.subplot(2,1,2)
plt.title("Generator Trainig Loss")
plt.plot(D_losses)
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.show()
Can anyone help me ?
Thanks in advance !