Tuning and adjusting DCGANs

Martyn · January 24, 2022, 6:39pm

Hey guys, I’ve been trying to work with DCGANs for 1 or 2 months and, though I got some results, they’re far away from being a proper, decent result I usually see in the tutorials.

Currently, I’m trying to reproduce NVIDIA’s Progressive Growing DCGAN in Pytorch using the exact same parameters they describe in the paper(Adam optimizer with lr = 0.001 and beta1 = 0, beta2 = 0.99 for both generator and discriminator). I didn’t apply smoothing to the weights during the transitions.
However, even though I’ve used the exact parameters from the paper, the only thing I could get as result was random noise(even after 500.000 epochs with 4x4 images, then 8x8 images and even now with 16x16 images)

I’ve also tried the classic DCGAN from Pytorch’s tutorial, but not with Celeba dataset. I’m using a custom one that uses fanarts of a fictional character.

After training for 100.000 epochs, these were the best images I could get:

I suppose that, since I’m using a different dataset(and one which the art style may vary greatly from one image to another), perhaps I should change my optimizers parameters. However, do I need to do this manually or can I simply apply a hyperparameter tuning technique? I got confused because DCGANs seems so unstable that I fear that trying to use something like a genetic algorithms would make it collapse. I even tried using a scheduler, but I remember that this made my model collapse after some scheduler updates.

Here’s the code for my Progressive DCGAN in Pytorch:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

class Generator(nn.Module):
    def __init__(self, ):
        super(Generator, self).__init__()

        self.transconv1 = nn.ConvTranspose2d(100, 75, 4, 2, 0, bias=False) # Using 16x16 images
        self.batchnorm1 = nn.BatchNorm2d(75, momentum=0.8)
        self.LeakyReLU = nn.LeakyReLU(0.2, inplace=True)
        self.transconv2 = nn.ConvTranspose2d(75, 50, 4, 2, 1, bias=False)
        self.batchnorm2 = nn.BatchNorm2d(50, momentum=0.8)
        self.transconv3 = nn.ConvTranspose2d(50, 3, 4, 2, 1, bias=False)

    def forward(self, input):
        x = self.transconv1(input)
        x = self.batchnorm1(x)
        x = self.LeakyReLU(x)
        x = self.transconv2(x)
        x = self.batchnorm2(x)
        x = self.LeakyReLU(x)
        x = self.transconv3(x)
        output = self.tanh(x)
        return output

netG = Generator().to(device)
netG.apply(weights_init)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.conv1 = nn.Conv2d(3, 100, 6, 2, 2, bias=False)
        self.LeakyRelu = nn.LeakyReLU(0.2, inplace=True)
        self.dropout = nn.Dropout(0.4, inplace=False)
        self.conv2 = nn.Conv2d(100, 75, 4, 2, 1, bias=False)
        self.batchnorm2 = nn.BatchNorm2d(75, momentum=0.8)
        self.conv3 = nn.Conv2d(75, 1, 4, 2, 0, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, input):
        x = self.conv1(input)
        x = torch.randn(x.size()).to(device) + x # Adding random noise, trick suggested by Open AI. Removing this doesn't seem to change the result.
        x = self.LeakyRelu(x)
        x = self.dropout(x)
        x = self.conv2(x)
        x = torch.randn(x.size()).to(device) + x
        x = self.batchnorm2(x)
        x = self.LeakyRelu(x)
        x = self.dropout(x)
        x = self.conv3(x)
        output = self.sigmoid(x)

        return output

netD = Discriminator().to(device)
netD.apply(weights_init)

real_label = 0.9 # One-sided label smoothing, another trick suggested by OpenAI
fake_label = 0.1

optimizerD = optim.Adam(netD.parameters(), lr=0.001, betas=(0, 0.99))
optimizerG = optim.Adam(netG.parameters(), lr=0.001, betas=(0, 0.99))

The training loop:


def train(data=None, epochs=1000, batch_size=6,loss=nn.BCELoss(), optimizerD=optimizerD, optimizerG=optimizerG, save_point=100, checkpoint=5000, model_name='default_Cocogoat'):
    if os.path.isfile(f'Cocogoat/discriminator_{model_name}.pth'):
        try:
            netD.load_state_dict(torch.load(f'Cocogoat/discriminator_{model_name}.pth')) # Checkpoint
        
        except RuntimeError:
            previous_discriminator = torch.load(f'Cocogoat/discriminator_{model_name}.pth') # Previous model
            current_discriminator = netD.state_dict()
            current_discriminator['conv1.weight'] = previous_discriminator['conv1.weight'] # The weights size from previous_d conv1 and current_d conv1 is the same.

            weights = previous_discriminator['conv2.weight'] # Conv2, however, requires manipulation.
            weights = torch.cat((weights, weights, weights, weights, weights), dim=0)
            weights = torch.cat((weights, weights, weights, weights, weights), dim=0)
            weights = torch.cat((weights, weights, weights), dim=0)
            current_discriminator['conv2.weight'] = weights

    
    if os.path.isfile(f'Cocogoat/generator_{model_name}.pth'):
        try:
            netG.load_state_dict(torch.load(f'Cocogoat/generator_{model_name}.pth'))
        
        except RuntimeError:
            previous_generator = torch.load(f'Cocogoat/generator_{model_name}.pth') # Previous model
            current_generator = netG.state_dict()

            current_generator['transconv1.weight'] = previous_generator['transconv1.weight']
            current_generator['batchnorm1.weight'] = previous_generator['batchnorm1.weight']
            current_generator['batchnorm1.bias'] = previous_generator['batchnorm1.bias']
            current_generator['batchnorm1.running_mean'] = previous_generator['batchnorm1.running_mean']
            current_generator['batchnorm1.running_var'] = previous_generator['batchnorm1.running_var']
            current_generator['batchnorm1.num_batches_tracked'] = previous_generator['batchnorm1.num_batches_tracked']

            weights = previous_generator['transconv2.weight']
            weights = torch.cat((weights, weights, weights, weights), dim=1)
            weights = torch.cat((weights, weights, weights, weights), dim=1)

            zeros = torch.zeros(75, 2, 4, 4).to(device)
            weights = torch.cat((weights, zeros), dim=1)

            current_generator['transconv2.weight'] = weights
            
            print("Weights Updated!")

    for epoch in range(epochs):
        netD.zero_grad()
        # Format batch
        b_size = batch_size
        real_cpu = data[np.random.randint(0, data.shape[0], size=batch_size), :, :, :].to(device)
        label = torch.full((real_cpu.shape[0],), real_label, dtype=torch.float, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu).view(-1) # Gera um tensor com shape (batch_size,)
        # Calculate loss on all-real batch
        errD_real = loss(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, 100, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = loss(output, label)
        # Calculate the gradients for this batch, accumulated (summed) with previous gradients
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Compute error of D as sum over the fake and the real batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        netG.zero_grad()
        label.fill_(real_label)
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = loss(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Getting and saving best parameters:

        best_gen_loss, best_disc_loss = float('inf'), float('inf')
        generator_loss, discriminator_loss = errG.item(), errD.item()

        if generator_loss < best_gen_loss:
            best_gen_loss = generator_loss
            best_generator_parameters = netG.state_dict()
                
        if discriminator_loss < best_disc_loss:
            best_disc_loss = discriminator_loss
            best_discriminator_parameters = netD.state_dict()

        # Output training stats
        if epoch % checkpoint == 0:
            print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                    % (epoch, epochs,
                        errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
            
            
            torch.save(best_generator_parameters, f'Cocogoat/generator_{model_name}.pth')
            torch.save(best_discriminator_parameters, f'Cocogoat/discriminator_{model_name}.pth')
            print("Models saved!")

anantguptadbl · January 25, 2022, 11:51am

@Martyn I would suggest picking up a much simpler dataset like

geometric shape data : Four Shapes | Kaggle
MNIST Fashion

Once your code works for these, you can easily extend it to complicated datasets. But you will have ironed out many issues by this time

Martyn · February 4, 2022, 1:44pm

Would using a simpler dataset make that much difference? I mean…I’d probably need to adjust my entire neural network nd my optimizer when I change my dataset, right?