GAN -- GCGAN : Lost Function weird behaviour

alex-le-trex · June 20, 2021, 4:01pm

Hello everyone, I’m pretty new in Pytorch game, and need some help, hope you can help me !

Let me explain my problem. As a newbie, I’m training myself with the mnist DataSet to implement a DCGAN. Ok so I took very basic Network for my Generator and Discriminator (as you can see in my code below), and then I’m training my model. The problem is that I think the Loss Function of my Generator is fucked up : it’s value is either 100 or 0 at the first iteration and then I think that it breaks the whole network, here is what I get :

iteration 0 / 329 – epoch 0 ---- Loss_D : 2.2673 Loss_G : 0.0 D(x) : 0.2363 D(G(z1)) : 0.35204 D(G(z2)) : 1.0
iteration 50 / 329 – epoch 0 ---- Loss_D : 100.0 Loss_G : 0.0 D(x) : 1.0 D(G(z1)) : 1.0 D(G(z2)) : 1.0

Can you help me on that and maybe explain me what’s wrong with my code ? I let you know just below the interesting section of my code (Generator, Discriminator and training loop). If you think you neew more to figure it out, let me know.

Generator and Discriminator :

class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        return self.main(input)



class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

Weight init, Loss Function, Optimizer :

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)



G = Generator(1).to(device)
D = Discriminator(1).to(device) 

G.apply(weights_init)
D.apply(weights_init)

BCE_loss = nn.BCELoss()
G_optimizer = optim.Adam(G.parameters(), lr=learning_rate, betas=(beta1, 0.999))
D_optimizer = optim.Adam(D.parameters(), lr=learning_rate, betas=(beta1, 0.999))


real_label = 1
fake_label = 0 

G_loss_l = []
D_loss_l = []

The Training Loop :

print("Starting Training Loop...")

for epoch in range(num_epoch) :
  for i, data in enumerate(dataloader, 0):  

  #-------------------- Discriminator (maximize log(D(x)) + log(1 - D(G(z))))----------------------

    #real datas

    D.zero_grad()
    real = data[0].to(device) #gpu
    b_size = real.size(0) 
    label = torch.full((b_size,), real_label, dtype=torch.float, device=device)  
    outputD_real = D(real).view(-1) 
    loss_D_real = BCE_loss(outputD_real, label) 
    loss_D_real.backward() #gradient
    
    
    #fake datas

    noise = torch.randn(b_size, nz, 1, 1, device=device) #noise tensor
    fake = G(noise) 
    label.fill_(fake_label) 
    outputD_fake = D(fake.detach()).view(-1)  
    loss_D_fake = BCE_loss(outputD_fake, label)
    loss_D_fake.backward()
    D_G_z1 = outputD_fake.mean().item()

    loss_D = loss_D_real + loss_D_fake 
    D_optimizer.step() 




    #----------------- Generator (maximize log(D(G(z))))----------------------------

    G.zero_grad() 
    label.fill_(real_label) 
    outputD2 = D(fake).view(-1)  
    loss_G = BCE_loss(outputD2, label)  
    loss_G.backward()

    D_G_z2 = outputD2.mean().item()  
    G_optimizer.step()  #on améliore G


    G_loss_l.append(loss_G.item())
    D_loss_l.append(loss_D.item())


    if i%50==0 :
      print("iteration ", i, "/", len(dataloader),"-- epoch", epoch, "---- Loss_D : ", loss_D.item(), " Loss_G : ", loss_G.item(),
            " D(x) : ", D_x, " D(G(z1)) : ", D_G_z1, " D(G(z2)) : ", D_G_z2)