Hello everyone, I’m pretty new in Pytorch game, and need some help, hope you can help me !
Let me explain my problem. As a newbie, I’m training myself with the mnist DataSet to implement a DCGAN. Ok so I took very basic Network for my Generator and Discriminator (as you can see in my code below), and then I’m training my model. The problem is that I think the Loss Function of my Generator is fucked up : it’s value is either 100 or 0 at the first iteration and then I think that it breaks the whole network, here is what I get :
iteration 0 / 329 – epoch 0 ---- Loss_D : 2.2673 Loss_G : 0.0 D(x) : 0.2363 D(G(z1)) : 0.35204 D(G(z2)) : 1.0
iteration 50 / 329 – epoch 0 ---- Loss_D : 100.0 Loss_G : 0.0 D(x) : 1.0 D(G(z1)) : 1.0 D(G(z2)) : 1.0
Can you help me on that and maybe explain me what’s wrong with my code ? I let you know just below the interesting section of my code (Generator, Discriminator and training loop). If you think you neew more to figure it out, let me know.
Generator and Discriminator :
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
Weight init, Loss Function, Optimizer :
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
G = Generator(1).to(device)
D = Discriminator(1).to(device)
G.apply(weights_init)
D.apply(weights_init)
BCE_loss = nn.BCELoss()
G_optimizer = optim.Adam(G.parameters(), lr=learning_rate, betas=(beta1, 0.999))
D_optimizer = optim.Adam(D.parameters(), lr=learning_rate, betas=(beta1, 0.999))
real_label = 1
fake_label = 0
G_loss_l = []
D_loss_l = []
The Training Loop :
print("Starting Training Loop...")
for epoch in range(num_epoch) :
for i, data in enumerate(dataloader, 0):
#-------------------- Discriminator (maximize log(D(x)) + log(1 - D(G(z))))----------------------
#real datas
D.zero_grad()
real = data[0].to(device) #gpu
b_size = real.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
outputD_real = D(real).view(-1)
loss_D_real = BCE_loss(outputD_real, label)
loss_D_real.backward() #gradient
#fake datas
noise = torch.randn(b_size, nz, 1, 1, device=device) #noise tensor
fake = G(noise)
label.fill_(fake_label)
outputD_fake = D(fake.detach()).view(-1)
loss_D_fake = BCE_loss(outputD_fake, label)
loss_D_fake.backward()
D_G_z1 = outputD_fake.mean().item()
loss_D = loss_D_real + loss_D_fake
D_optimizer.step()
#----------------- Generator (maximize log(D(G(z))))----------------------------
G.zero_grad()
label.fill_(real_label)
outputD2 = D(fake).view(-1)
loss_G = BCE_loss(outputD2, label)
loss_G.backward()
D_G_z2 = outputD2.mean().item()
G_optimizer.step() #on améliore G
G_loss_l.append(loss_G.item())
D_loss_l.append(loss_D.item())
if i%50==0 :
print("iteration ", i, "/", len(dataloader),"-- epoch", epoch, "---- Loss_D : ", loss_D.item(), " Loss_G : ", loss_G.item(),
" D(x) : ", D_x, " D(G(z1)) : ", D_G_z1, " D(G(z2)) : ", D_G_z2)