I found that the output of Discriminator is not normal during training a simple GAN, Seems the output is rapidly decreasing to negative infinity, and the output of torch.log(1 - d_out)
always NaN
,.
Did I do something wrong?
Here’s the output:
The frist few
tensor([[[[-0.0000]]],
[[[-0.0000]]],
[[[-0.0000]]],
[[[-0.0000]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-0.1123]]],
[[[-0.1064]]],
[[[-0.1147]]],
[[[-0.1123]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-0.8551]]],
[[[-0.8135]]],
[[[-0.8382]]],
[[[-0.8800]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-3.9393]]],
[[[-3.8927]]],
[[[-3.8870]]],
[[[-3.6732]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-12.6282]]],
[[[-13.2028]]],
[[[-12.6427]]],
[[[-13.1222]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
After 50 steps:
tensor([[[[-3858527.5000]]],
[[[-3400833.5000]]],
[[[-4068995.5000]]],
[[[-4414499.0000]]]],
device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-4434020.]]],
[[[-5257153.]]],
[[[-4943982.]]],
[[[-4949389.]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-5265078.]]],
[[[-5186647.]]],
[[[-5505139.]]],
[[[-5414633.]]]], device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-4643945.5000]]],
[[[-5820869.0000]]],
[[[-5961802.0000]]],
[[[-5756706.5000]]]],
device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-5991412.5000]]],
[[[-6428053.0000]]],
[[[-5671416.5000]]],
[[[-5646565.0000]]]],
device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
tensor([[[[-6527784.5000]]],
[[[-5977119.5000]]],
[[[-6244153.0000]]],
[[[-6483654.0000]]]],
device='cuda:0', grad_fn=<CudnnConvolutionBackward>)
Here are my network of Discriminator:
class NormalBlock(nn.Module):
def __init__(self, input_c, output_c):
super(NormalBlock, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(input_c, output_c, kernel_size=4, stride=2, padding=1),
nn.LeakyReLU(0.01)
)
def forward(self, x):
return self.main(x)
class Discriminator(nn.Module):
def __init__(self, num_filters, num_bottleneck):
super(Discriminator, self).__init__()
layer = []
layer.append(NormalBlock(3, num_filters))
curr_num_filters = num_filters
for i in range(num_bottleneck):
layer.append(NormalBlock(curr_num_filters, curr_num_filters * 2))
curr_num_filters = curr_num_filters * 2
layer.append(nn.Conv2d(curr_num_filters, 1, kernel_size=3, padding=1, bias=False))
self.main = nn.Sequential(*layer)
def forward(self, x):
return self.main(x)
Here are my training process
d_out = self.D(ori_image)
d_loss_real = torch.log(d_out).mean()
gen_image = self.G(ori_image, target_label)
d_out = self.D(gen_image.detach())
d_loss_fake = torch.log(0.9 - d_out).mean()
d_loss = - d_loss_real - d_loss_fake
self.reset_grad()
d_loss.backward()
self.D_optimizer.step()
I use AdamOptimizer
with this config
self.D_optimizer = torch.optim.Adam(
self.D.parameters(),
lr=self.D_lr,
betas=(0.5, 0.999)
)