Hey guys, I’ve been trying to work with DCGANs for 1 or 2 months and, though I got some results, they’re far away from being a proper, decent result I usually see in the tutorials.
Currently, I’m trying to reproduce NVIDIA’s Progressive Growing DCGAN in Pytorch using the exact same parameters they describe in the paper(Adam optimizer with lr = 0.001 and beta1 = 0, beta2 = 0.99 for both generator and discriminator). I didn’t apply smoothing to the weights during the transitions.
However, even though I’ve used the exact parameters from the paper, the only thing I could get as result was random noise(even after 500.000 epochs with 4x4 images, then 8x8 images and even now with 16x16 images)
I’ve also tried the classic DCGAN from Pytorch’s tutorial, but not with Celeba dataset. I’m using a custom one that uses fanarts of a fictional character.
After training for 100.000 epochs, these were the best images I could get:
I suppose that, since I’m using a different dataset(and one which the art style may vary greatly from one image to another), perhaps I should change my optimizers parameters. However, do I need to do this manually or can I simply apply a hyperparameter tuning technique? I got confused because DCGANs seems so unstable that I fear that trying to use something like a genetic algorithms would make it collapse. I even tried using a scheduler, but I remember that this made my model collapse after some scheduler updates.
Here’s the code for my Progressive DCGAN in Pytorch:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
class Generator(nn.Module):
def __init__(self, ):
super(Generator, self).__init__()
self.transconv1 = nn.ConvTranspose2d(100, 75, 4, 2, 0, bias=False) # Using 16x16 images
self.batchnorm1 = nn.BatchNorm2d(75, momentum=0.8)
self.LeakyReLU = nn.LeakyReLU(0.2, inplace=True)
self.transconv2 = nn.ConvTranspose2d(75, 50, 4, 2, 1, bias=False)
self.batchnorm2 = nn.BatchNorm2d(50, momentum=0.8)
self.transconv3 = nn.ConvTranspose2d(50, 3, 4, 2, 1, bias=False)
def forward(self, input):
x = self.transconv1(input)
x = self.batchnorm1(x)
x = self.LeakyReLU(x)
x = self.transconv2(x)
x = self.batchnorm2(x)
x = self.LeakyReLU(x)
x = self.transconv3(x)
output = self.tanh(x)
return output
netG = Generator().to(device)
netG.apply(weights_init)
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.conv1 = nn.Conv2d(3, 100, 6, 2, 2, bias=False)
self.LeakyRelu = nn.LeakyReLU(0.2, inplace=True)
self.dropout = nn.Dropout(0.4, inplace=False)
self.conv2 = nn.Conv2d(100, 75, 4, 2, 1, bias=False)
self.batchnorm2 = nn.BatchNorm2d(75, momentum=0.8)
self.conv3 = nn.Conv2d(75, 1, 4, 2, 0, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, input):
x = self.conv1(input)
x = torch.randn(x.size()).to(device) + x # Adding random noise, trick suggested by Open AI. Removing this doesn't seem to change the result.
x = self.LeakyRelu(x)
x = self.dropout(x)
x = self.conv2(x)
x = torch.randn(x.size()).to(device) + x
x = self.batchnorm2(x)
x = self.LeakyRelu(x)
x = self.dropout(x)
x = self.conv3(x)
output = self.sigmoid(x)
return output
netD = Discriminator().to(device)
netD.apply(weights_init)
real_label = 0.9 # One-sided label smoothing, another trick suggested by OpenAI
fake_label = 0.1
optimizerD = optim.Adam(netD.parameters(), lr=0.001, betas=(0, 0.99))
optimizerG = optim.Adam(netG.parameters(), lr=0.001, betas=(0, 0.99))
The training loop:
def train(data=None, epochs=1000, batch_size=6,loss=nn.BCELoss(), optimizerD=optimizerD, optimizerG=optimizerG, save_point=100, checkpoint=5000, model_name='default_Cocogoat'):
if os.path.isfile(f'Cocogoat/discriminator_{model_name}.pth'):
try:
netD.load_state_dict(torch.load(f'Cocogoat/discriminator_{model_name}.pth')) # Checkpoint
except RuntimeError:
previous_discriminator = torch.load(f'Cocogoat/discriminator_{model_name}.pth') # Previous model
current_discriminator = netD.state_dict()
current_discriminator['conv1.weight'] = previous_discriminator['conv1.weight'] # The weights size from previous_d conv1 and current_d conv1 is the same.
weights = previous_discriminator['conv2.weight'] # Conv2, however, requires manipulation.
weights = torch.cat((weights, weights, weights, weights, weights), dim=0)
weights = torch.cat((weights, weights, weights, weights, weights), dim=0)
weights = torch.cat((weights, weights, weights), dim=0)
current_discriminator['conv2.weight'] = weights
if os.path.isfile(f'Cocogoat/generator_{model_name}.pth'):
try:
netG.load_state_dict(torch.load(f'Cocogoat/generator_{model_name}.pth'))
except RuntimeError:
previous_generator = torch.load(f'Cocogoat/generator_{model_name}.pth') # Previous model
current_generator = netG.state_dict()
current_generator['transconv1.weight'] = previous_generator['transconv1.weight']
current_generator['batchnorm1.weight'] = previous_generator['batchnorm1.weight']
current_generator['batchnorm1.bias'] = previous_generator['batchnorm1.bias']
current_generator['batchnorm1.running_mean'] = previous_generator['batchnorm1.running_mean']
current_generator['batchnorm1.running_var'] = previous_generator['batchnorm1.running_var']
current_generator['batchnorm1.num_batches_tracked'] = previous_generator['batchnorm1.num_batches_tracked']
weights = previous_generator['transconv2.weight']
weights = torch.cat((weights, weights, weights, weights), dim=1)
weights = torch.cat((weights, weights, weights, weights), dim=1)
zeros = torch.zeros(75, 2, 4, 4).to(device)
weights = torch.cat((weights, zeros), dim=1)
current_generator['transconv2.weight'] = weights
print("Weights Updated!")
for epoch in range(epochs):
netD.zero_grad()
# Format batch
b_size = batch_size
real_cpu = data[np.random.randint(0, data.shape[0], size=batch_size), :, :, :].to(device)
label = torch.full((real_cpu.shape[0],), real_label, dtype=torch.float, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1) # Gera um tensor com shape (batch_size,)
# Calculate loss on all-real batch
errD_real = loss(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, 100, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = loss(output, label)
# Calculate the gradients for this batch, accumulated (summed) with previous gradients
errD_fake.backward()
D_G_z1 = output.mean().item()
# Compute error of D as sum over the fake and the real batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
netG.zero_grad()
label.fill_(real_label)
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = loss(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Getting and saving best parameters:
best_gen_loss, best_disc_loss = float('inf'), float('inf')
generator_loss, discriminator_loss = errG.item(), errD.item()
if generator_loss < best_gen_loss:
best_gen_loss = generator_loss
best_generator_parameters = netG.state_dict()
if discriminator_loss < best_disc_loss:
best_disc_loss = discriminator_loss
best_discriminator_parameters = netD.state_dict()
# Output training stats
if epoch % checkpoint == 0:
print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, epochs,
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
torch.save(best_generator_parameters, f'Cocogoat/generator_{model_name}.pth')
torch.save(best_discriminator_parameters, f'Cocogoat/discriminator_{model_name}.pth')
print("Models saved!")