Convolutional VAE not training

tirthasheshpatel · May 29, 2020, 8:21am

First of all, thanks for your reply! I checked if any gradients were none, infinity or nan. Turns out that’s not the case still the model doesn’t train. But when I create a module with encoder and decoder in the module as submodules, it magically (for me) starts training as expected and produces really nice samples!! But I don’t get it. Shouldn’t the model train even if I have encoder and decoder as seperate modules and not shoved in nn.Module class? I may be wrong there as I have just started using torch.

My modified code:

Modified model

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.encoder = encoder = nn.Sequential(OrderedDict([
        ('e_conv_layer_1', nn.Conv2d(1, 16, 5, 1)),                # 16 x 24 x 24
        ('e_relu_layer_1', nn.LeakyReLU(inplace=True)),
        ('e_batch_norm_1', nn.BatchNorm2d(16)),
        ('e_conv_layer_2', nn.Conv2d(16, 32, 5, 1)),               # 32 x 20 x 20
        ('e_relu_layer_2', nn.LeakyReLU(inplace=True)),
        ('e_batch_norm_2', nn.BatchNorm2d(32)),
        ('e_conv_layer_3', nn.Conv2d(32, 32, 11, 1)),              # 32 x 10 x 10
        ('e_relu_layer_3', nn.LeakyReLU(inplace=True)),
        ('e_batch_norm_3', nn.BatchNorm2d(32)),
        ('e_conv_layer_4', nn.Conv2d(32, 64, 5, 1)),               # 64 x 6 x 6
        ('e_relu_layer_4', nn.LeakyReLU(inplace=True)),
        ('e_batch_norm_4', nn.BatchNorm2d(64)),
        ('e_dropout_layer_1', nn.Dropout2d(p=0.75)),
        ('e_conv_layer_5', nn.Conv2d(64, 128, 5, 1)),              # 128 x 2 x 2
        ('e_relu_layer_5', nn.LeakyReLU(inplace=True)),
        ('e_batch_norm_5', nn.BatchNorm2d(128)),
        ('e_dropout_layer_1', nn.Dropout2d(p=0.85)),
        ('e_flatten_layer', nn.Flatten()),
        ('e_out_layer', LocLogvar(128*2*2, latent_dims))
        ]))

        self.decoder = decoder = nn.Sequential(OrderedDict([
        ('inv_linear_layer_1', nn.Linear(latent_dims, 128*2*2)),   # 128 * 2 * 2
        ('inv_relu_layer_5', nn.LeakyReLU(inplace=True)),
        ('inv_flatten_layer', Reshape(128, 2, 2)),                 # 128 x 2 x 2
        ('inv_conv_layer_5', nn.ConvTranspose2d(128, 64, 5, 1)),   # 64 x 6 x 6
        ('inv_batch_norm_5', nn.BatchNorm2d(64)),
        ('inv_relu_layer_4', nn.LeakyReLU(inplace=True)),
        ('inv_conv_layer_4', nn.ConvTranspose2d(64, 32, 5, 1)),    # 32 x 10 x 10
        ('inv_batch_norm_4', nn.BatchNorm2d(32)),
        ('inv_relu_layer_3', nn.LeakyReLU(inplace=True)),
        ('inv_conv_layer_3', nn.ConvTranspose2d(32, 32, 11, 1)),   # 32 x 20 x 20
        ('inv_batch_norm_3', nn.BatchNorm2d(32)),
        ('inv_relu_layer_2', nn.LeakyReLU(inplace=True)),
        ('inv_conv_layer_2', nn.ConvTranspose2d(32, 16, 5, 1)),    # 16 x 24 x 24
        ('inv_batch_norm_2', nn.BatchNorm2d(16)),
        ('inv_relu_layer_1', nn.LeakyReLU(inplace=True)),
        ('inv_conv_layer_1', nn.ConvTranspose2d(16, 1, 5, 1)),     # 1 x 28 x 28
        ('inv_batch_norm_1', nn.BatchNorm2d(1)),
        ('inv_out_layer', nn.Sigmoid())
        ]))

    def encode(self, x):
        return self.encoder(x)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar