I am trying to implement BiGAN(Bidirectional Generative adversarial network) in pytorch. My images are high resolution fundus images. Since my images are of high resolution i cannot simply unroll it, form a vector and fed it to the deconv network(discriminator architecture as suggested by paper). So i just divided my discriminator model into two model.One model, I have implemented CNN, which takes the image as an input and produces a vector as an output and the other model is just the linear model, which takes noise as an input and produces a vector. Afterwards, we concatenate the output of both the models and fed it to the linear network which has two hidden layers and the network produces a single output(I.e the discriminator output, probability). My encoder follows Alex net architecture but I have tweaked it a little to match my dataset. Generator follows Radford architecture.
While training I am facing an issue, half of my discriminator gradients value is zero. I am not sure what is the cause, maybe it is due to discriminator architecture.
If anyone has any idea regarding this, please share it, i will be really grateful.
class Discriminator(nn.Module):
def __init__(self):#### E_z is the encoder output size. Here it is 100 dimension feature map(Noise vector).
super(Discriminator, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size = 8, stride=2, padding=2), ## 128 +4 -8/2 + 1 = 63
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2,True),
nn.Dropout2d(p = 0.5),
nn.MaxPool2d(kernel_size=3, stride=2), ## 63-3/2 +1 = 31
nn.Conv2d(64, 192, kernel_size=5, padding=2), ## 31 - 5 +4 /1 + 1 = 31
nn.BatchNorm2d(192),
nn.LeakyReLU(0.2,True),
nn.Dropout2d(p = 0.5),
nn.MaxPool2d(kernel_size=3, stride=2), ## 31-3/2 + 1 = 15
nn.Conv2d(192, 384, kernel_size=3, padding=1), ## 15 - 3 +2/1 + 1 = 15
nn.BatchNorm2d(384),
nn.LeakyReLU(0.2,True),
nn.Dropout2d(p = 0.5),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2,True),
nn.Dropout2d(p = 0.5),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.LeakyReLU(0.2,True),
nn.Dropout2d(p = 0.5),
nn.MaxPool2d(kernel_size=3, stride=2), ## 15 - 3/2 +1 = 7
nn.Conv2d(256,128, kernel_size = 7), ## ouput will be 128 * 1 * 1
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, True),
nn.Dropout2d(p = 0.5),
)
self.flatten = nn.Sequential(
nn.Linear(128,128, bias = True),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout(p = 0.5),
## output of this will be a vector of size 128
nn.Linear(128,128, bias = True),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout(p = 0.5) ## output of this will be a vector of size 128
)
self.inference_z = nn.Sequential(
# input dim: z_dim x 1 x 1
nn.Linear(100, 128, bias=True),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(0.5),
# state dim: 512 x 1 x 1
nn.Linear(128, 128, bias=True),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(p=0.5)
# output dim: 128 x 1 x 1
## Output of nn.inference_z will be vector of 128 obtained using a noise vector
)
self.inference_joint = nn.Sequential(
nn.Linear(256, 256, bias=True),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(p=0.5),
nn.Linear(256, 256, bias=True),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(p=0.5),
nn.Linear(256, 1, bias=True),
nn.Sigmoid()
)
def forward(self, x, z):
feature = self.features(x)
x = feature.view(feature.size(0),-1) ## Output_x size = (128,1)
x = self.flatten(x) ## x size = (128,1)
z = z.view(z.size(0),-1)
z = self.inference_z(z) ## output_z size = (128,1)
X = torch.cat((x,z),1) ## X size = (256,1)
output = self.inference_joint(X) ## output size = 1
return output,feature