I’m trying to train my own dcgan for a project and I can’t seem to get the generator to work right. I’m prototyping things out on mnist to keep the problem simple while i experiment.
I’ve tried having a discriminator with an equal number of layers to the generator but in this case the discriminator quickly converges to 0.01 or lower and the generator just kind of generates random shapes that kinda start to look like numbers (if we are being generous) but don’t fool the discriminator
I’ve tried lowering the number of conv layers in the discriminator. This actually seems to produce a convergence but its awful and the generated images are not numbers - the generator loss sits around 0.7.
I’ve tried labels smoothing the positive labels for the generator. this lowers the loss for the generator (which doesnt mean much) but the images still suck.
I’ve tried different loss functions (MSE, and BCELoss).
I’ve implemented dcgan (somewhat) succuessfully in tensorflow before but I’m new to pytorch -
Questions:
is there a way in pytorch for me to see if the discriminator can see the gradients of the generator maybe i screwed something up and that’s why the discriminator can overfit so heavily?
how many epochs should i have to train dcgan to get decent results? to converge? I’ve looked around and seen different numbers.
what exactly should I be calling .cuda()
on? I tried to read up on it but I’m not getting a lot of clear answers. right now i’m calling it on pretty much everything.
here’s my training code:
for epoch in range(n_epochs):
for i, image_batch in enumerate(data_loader):
image_batch = image_batch[0]
# --- train discriminator ---
real_images = to_variable(image_batch)
outputs = discriminator(real_images)
pos_labels = to_variable(torch.ones(outputs.data.shape))
real_loss = loss_func(outputs, pos_labels)
# make fake images from generator and
# see how much probability in excess of
# 0 the discriminator gives each one to being
# real
noise = to_variable(torch.randn(batch_size, noise_dim))
fake_imagiies = generator(noise)
outputs = discriminator(fake_images)
neg_labels = to_variable(torch.zeros(outputs.data.shape))
fake_loss = loss_func(outputs, neg_labels)
# backpropagate the loss from both
# real and fake images for discriminator
total_loss = real_loss + fake_loss
discriminator.zero_grad()
total_loss.backward()
d_optimizer.step()
# --- train generator ---
noise = to_variable(torch.randn(batch_size, noise_dim))
# get the generator loss by seeing
# how close to being 1.0 (positive label)
# each fake image was throughthe discriminator
fake_images = generator(noise)
outputs = discriminator(fake_images)
gen_labels = to_variable(torch.ones(outputs.data.shape))
gen_loss= loss_func(outputs, gen_labels)
# backpropagate and update
# generator
generator.zero_grad()
gen_loss.backward()
g_optimizer.step()
if not i % 250:
print('epoch {}'.format(epoch))
print('image {}'.format(i))
print('generator loss: {}, discriminator loss: {}'.format(gen_loss.data[0], total_loss.data[0]))
fake_img = generator(test_noise)
img = denorm_mnist(fake_img)
result = transforms.Compose([transforms.ToPILImage()])(fake_img.cpu().data[0])
plt.figure()
plt.imshow(result)
plt.axis('off')
plt.show()
here is my generator:
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.main = nn.Sequential(
nn.ConvTranspose2d(noise_dim,
g_filter_depth*8,
kernel_size=4,
stride=1,
padding=0,
bias=False),
nn.BatchNorm2d(g_filter_depth*8),
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(g_filter_depth*8,
g_filter_depth*4,
kernel_size=g_kernel_size,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(g_filter_depth*4),
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(g_filter_depth*4,
g_filter_depth*2,
kernel_size=g_kernel_size,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(g_filter_depth*2),
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(g_filter_depth*2,
g_filter_depth,
kernel_size=g_kernel_size,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(g_filter_depth),
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(g_filter_depth,
d_filter_depth_in,
kernel_size=g_kernel_size,
stride=2,
padding=1,
bias=False),
nn.Tanh()
)
def forward(self, inputs):
inputs = inputs.view(inputs.size(0), inputs.size(1), 1, 1)
output = self.main(inputs)
return output
here’s my discriminator:
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(in_channels=d_filter_depth_in,
out_channels=d_filter_depth,
kernel_size=4,
stride=2,
padding=1,
bias=False),
nn.LeakyReLU(0.2),
nn.Conv2d(in_channels=d_filter_depth,
out_channels=d_filter_depth*2,
kernel_size=4,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(d_filter_depth*2),
nn.LeakyReLU(0.2),
nn.Conv2d(in_channels=d_filter_depth*2,
out_channels=d_filter_depth*4,
kernel_size=4,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(d_filter_depth*4),
nn.LeakyReLU(0.2),
nn.Conv2d(in_channels=d_filter_depth*4,
out_channels=d_filter_depth*8,
kernel_size=4,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(d_filter_depth*8),
nn.LeakyReLU(0.2),
nn.Conv2d(in_channels=d_filter_depth*8,
out_channels=1,
kernel_size=4,
stride=1,
padding=0,
bias=False),
nn.Sigmoid()
)
def forward(self, inputs):
output = self.main(inputs)
return output
any feedback. or pointing out any errors or rookie pytorch mistakes would be greatly appreciated.