Decoded output dimension not the same as the input in AutoEncoder

jitesh · March 16, 2020, 4:47am

I am building a Custom Autoencoder to train on a dataset. My model is as follows

class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder,self).__init__()

        self.encoder = nn.Sequential(
        nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size=3,stride=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size=3,stride=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size=3,stride=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=128,out_channels=256,kernel_size=5,stride=2),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=256,out_channels=512,kernel_size=5,stride=2),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=512,out_channels=1024,kernel_size=5,stride=2),
        nn.ReLU(inplace=True)
        )

        self.decoder = nn.Sequential(
        nn.ConvTranspose2d(in_channels=1024,out_channels=512,kernel_size=5,stride=2),
        nn.ReLU(inplace=True),
        nn.ConvTranspose2d(in_channels=512,out_channels=256,kernel_size=5,stride=2),
        nn.ReLU(inplace=True),
        nn.ConvTranspose2d(in_channels=256,out_channels=128,kernel_size=5,stride=2),
        nn.ReLU(inplace=True),
        nn.ConvTranspose2d(in_channels=128,out_channels=64,kernel_size=3,stride=1),
        nn.ReLU(inplace=True),
        nn.ConvTranspose2d(in_channels=64,out_channels=32,kernel_size=3,stride=1),
        nn.ReLU(inplace=True),
        nn.ConvTranspose2d(in_channels=32,out_channels=3,kernel_size=3,stride=1),
        nn.ReLU(inplace=True)
        )

    
    def forward(self,x):
        x = self.encoder(x)
        print(x.shape)
        x = self.decoder(x)
        return x



def unit_test():
    num_minibatch = 16
    img = torch.randn(num_minibatch, 3, 512, 640).cuda(0)
    model = AutoEncoder().cuda()
    model = nn.DataParallel(model)
    output = model(img)
    print(output.shape)

if __name__ == '__main__':
    unit_test()

As you can see, my input dimension is (3, 512, 640) but my output after passing it through the decoder is (3, 507, 635). Am I missing something while adding the Conv2Dtranspose layers ?

Any help would be appreciated. Thanks

ptrblck · March 16, 2020, 6:55am

Based on your input shape and layer configs, you would need to set output_padding=1 in the first and third nn.ConvTranspose2d layer to get the same output shape for your input.

jitesh · March 16, 2020, 5:24pm

Thanks a lot, it worked.