Couldn't create Autoencoder model

Here is the code, but I am having an error in combining the model.

import torch; torch.manual_seed(0)
import torch.nn as nn
import cv2
import torch.nn.functional as F
import torch.utils
import torch.distributions
import torchvision
import numpy as np
from torchvision import datasets, transforms
from torchsummary import summary
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import torch.nn as nn
import torch.nn.functional as F

class Encoder(nn.Module):
    def __init__(self, latent_dims):
        # This part of code contains all the definations 
        # of the stuffs that we are going to use in the 
        # model
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) 
        self.batch_norm1 = nn.BatchNorm2d(32) 
        self.pool1 = nn.MaxPool2d(2, 2)
        self.flatten = nn.Flatten(start_dim=1)
        self.linear1 = nn.Linear(32768, 512)
        self.batch_norm2 = nn.BatchNorm1d(512)
        self.linear2 = nn.Linear(512,latent_dims)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.batch_norm1(x)
        x = self.pool1(x)
        x = torch.flatten(x, 1)
        x = self.batch_norm2(F.relu(self.linear1(x)))
        x = F.softmax(self.linear2(x))
        return x

latent_dims = 2
encoder = Encoder(latent_dims).to(device) # GPU
summary(encoder,input_size=(1,64,64))

This compiles correctly

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 32, 64, 64]             320
       BatchNorm2d-2           [-1, 32, 64, 64]              64
         MaxPool2d-3           [-1, 32, 32, 32]               0
            Linear-4                  [-1, 512]      16,777,728
       BatchNorm1d-5                  [-1, 512]           1,024
            Linear-6                    [-1, 2]           1,026
================================================================
Total params: 16,780,162
Trainable params: 16,780,162
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.02
Forward/backward pass size (MB): 2.26
Params size (MB): 64.01
Estimated Total Size (MB): 66.28
----------------------------------------------------------------
class Decoder(nn.Module):
    def __init__(self, latent_dims):
        super(Decoder, self).__init__()
        self.linear1 = nn.Linear(latent_dims, 512)
        self.batch_norm1 = nn.BatchNorm1d(1) 
        self.linear2 = nn.Linear(512, 2048)
        self.batch_norm2 = nn.BatchNorm1d(1) 
        self.unflatten = nn.Unflatten(1, (1, 64, 64))

    def forward(self, z):
        z = F.relu(self.linear1(z))
        z = self.batch_norm1(z)
        print(z.size())
        z = F.relu(self.linear2(z))
        z = self.batch_norm2(z)
        print(z.size())
        z = z.view(-1, 1, 64, 64)
        print(z.size())
        return torch.sigmoid(z)#z.reshape((-1, 1, 64, 64))

latent_dims = 2
decoder = Decoder(latent_dims).to(device) # GPU
summary(decoder,input_size=(1,2))

Here is the decoder with correct compilation

torch.Size([2, 1, 512])
torch.Size([2, 1, 2048])
torch.Size([1, 1, 64, 64])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1               [-1, 1, 512]           1,536
       BatchNorm1d-2               [-1, 1, 512]               2
            Linear-3              [-1, 1, 2048]       1,050,624
       BatchNorm1d-4              [-1, 1, 2048]               2
================================================================
Total params: 1,052,164
Trainable params: 1,052,164
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 4.01
Estimated Total Size (MB): 4.05
----------------------------------------------------------------

But the combined Auto-encoder doesn’t compile right:

class Autoencoder(nn.Module):
    def __init__(self, latent_dims):
        super(Autoencoder, self).__init__()
        self.encoder = Encoder(latent_dims)
        self.decoder = Decoder(latent_dims)

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

latent_dims = 2
autoencoder = Autoencoder(latent_dims).to(device) # GPU
summary(autoencoder,input_size=(1,64,64))


/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   2437 
   2438     return torch.batch_norm(
-> 2439         input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
   2440     )
   2441 

RuntimeError: running_mean should contain 512 elements not 1


self.batch_norm1 expects an input with a single channel while you are passing an activation in the shape [batch_size, 512] to it which will fail.
Change the num_features to 512 in bach_norm1 and to 2048 in batch_norm2 and it should work.

Thanks a lot, it works when I compile the full auto-encoder, but could you please explain, when I compile the Decoder separately, now, it fails, what might be the reason?

class Decoder(nn.Module):
    def __init__(self, latent_dims):
        super(Decoder, self).__init__()
        self.linear1 = nn.Linear(latent_dims, 512)
        self.batch_norm1 = nn.BatchNorm1d(512) 
        self.linear2 = nn.Linear(512, 2048)
        self.batch_norm2 = nn.BatchNorm1d(2048) 
        self.unflatten = nn.Unflatten(1, (1, 64, 64))

    def forward(self, z):
        z = F.relu(self.linear1(z))
        z = self.batch_norm1(z)
        print(z.size())
        z = F.relu(self.linear2(z))
        z = self.batch_norm2(z)
        print(z.size())
        z = z.view(-1, 1, 64, 64)
        print(z.size())
        return torch.sigmoid(z)#z.reshape((-1, 1, 64, 64))

latent_dims = 2
decoder = Decoder(latent_dims).to(device) # GPU
summary(decoder,input_size=(1,2))

---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-8-702d63af76af> in <module>
      1 latent_dims = 2
      2 decoder = Decoder(latent_dims).to(device) # GPU
----> 3 summary(decoder,input_size=(1,2))

5 frames

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   2437 
   2438     return torch.batch_norm(
-> 2439         input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
   2440     )
   2441 

RuntimeError: running_mean should contain 1 elements not 512


Also, while saving the model, the following errors were encountered:

Traceback (most recent call last):
  File "/home/jimut/RnD_Segmentation/Ellipses/run_ae.py", line 146, in <module>
    autoenc.load_state_dict(torch.load('AE_rot.pth'))
  File "/home/jimut/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1604, in load_state_dict
    raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
RuntimeError: Error(s) in loading state_dict for AutoencoderHastyn:
	Missing key(s) in state_dict: "decoder.batch_norm1.weight", "decoder.batch_norm1.bias", "decoder.batch_norm1.running_mean", "decoder.batch_norm1.running_var", "decoder.batch_norm2.weight", "decoder.batch_norm2.bias", "decoder.batch_norm2.running_mean", "decoder.batch_norm2.running_var". 
	Unexpected key(s) in state_dict: "encoder.conv2.weight", "encoder.conv2.bias", "encoder.conv3.weight", "encoder.conv3.bias", "encoder.batch_norm3.weight", "encoder.batch_norm3.bias", "encoder.batch_norm3.running_mean", "encoder.batch_norm3.running_var", "encoder.batch_norm3.num_batches_tracked", "decoder.conv_t_2d_1.weight", "decoder.conv_t_2d_1.bias", "decoder.conv_t_2d_2.weight", "decoder.conv_t_2d_2.bias", "decoder.conv_t_2d_3.weight", "decoder.conv_t_2d_3.bias". 
	size mismatch for encoder.conv1.weight: copying a param with shape torch.Size([16, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 1, 3, 3]).
	size mismatch for encoder.conv1.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for encoder.batch_norm1.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for encoder.batch_norm1.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for encoder.batch_norm1.running_mean: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for encoder.batch_norm1.running_var: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for encoder.linear1.weight: copying a param with shape torch.Size([128, 4096]) from checkpoint, the shape in current model is torch.Size([512, 32768]).
	size mismatch for encoder.linear1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.batch_norm2.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.batch_norm2.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.batch_norm2.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.batch_norm2.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.linear2.weight: copying a param with shape torch.Size([2, 128]) from checkpoint, the shape in current model is torch.Size([2, 512]).
	size mismatch for decoder.linear1.weight: copying a param with shape torch.Size([128, 2]) from checkpoint, the shape in current model is torch.Size([512, 2]).
	size mismatch for decoder.linear1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.linear2.weight: copying a param with shape torch.Size([4096, 128]) from checkpoint, the shape in current model is torch.Size([2048, 512]).
	size mismatch for decoder.linear2.bias: copying a param with shape torch.Size([4096]) from checkpoint, the shape in current model is torch.Size([2048]).

Your code works fine with an input shape of [batch_size, 2]:

class Decoder(nn.Module):
    def __init__(self, latent_dims):
        super(Decoder, self).__init__()
        self.linear1 = nn.Linear(latent_dims, 512)
        self.batch_norm1 = nn.BatchNorm1d(512) 
        self.linear2 = nn.Linear(512, 2048)
        self.batch_norm2 = nn.BatchNorm1d(2048) 
        self.unflatten = nn.Unflatten(1, (1, 64, 64))

    def forward(self, z):
        z = F.relu(self.linear1(z))
        z = self.batch_norm1(z)
        print(z.size())
        z = F.relu(self.linear2(z))
        z = self.batch_norm2(z)
        print(z.size())
        z = z.view(-1, 1, 64, 64)
        print(z.size())
        return torch.sigmoid(z)#z.reshape((-1, 1, 64, 64))

latent_dims = 2
decoder = Decoder(latent_dims)

x = torch.randn(2, 2)
out = decoder(x)

It seems you’ve changed the model architecture after saving the state_dict which now raises the shape mismatches.