Here is the code, but I am having an error in combining the model.
import torch; torch.manual_seed(0)
import torch.nn as nn
import cv2
import torch.nn.functional as F
import torch.utils
import torch.distributions
import torchvision
import numpy as np
from torchvision import datasets, transforms
from torchsummary import summary
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import torch.nn as nn
import torch.nn.functional as F
class Encoder(nn.Module):
def __init__(self, latent_dims):
# This part of code contains all the definations
# of the stuffs that we are going to use in the
# model
super(Encoder, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.batch_norm1 = nn.BatchNorm2d(32)
self.pool1 = nn.MaxPool2d(2, 2)
self.flatten = nn.Flatten(start_dim=1)
self.linear1 = nn.Linear(32768, 512)
self.batch_norm2 = nn.BatchNorm1d(512)
self.linear2 = nn.Linear(512,latent_dims)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.batch_norm1(x)
x = self.pool1(x)
x = torch.flatten(x, 1)
x = self.batch_norm2(F.relu(self.linear1(x)))
x = F.softmax(self.linear2(x))
return x
latent_dims = 2
encoder = Encoder(latent_dims).to(device) # GPU
summary(encoder,input_size=(1,64,64))
This compiles correctly
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 64, 64] 320
BatchNorm2d-2 [-1, 32, 64, 64] 64
MaxPool2d-3 [-1, 32, 32, 32] 0
Linear-4 [-1, 512] 16,777,728
BatchNorm1d-5 [-1, 512] 1,024
Linear-6 [-1, 2] 1,026
================================================================
Total params: 16,780,162
Trainable params: 16,780,162
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.02
Forward/backward pass size (MB): 2.26
Params size (MB): 64.01
Estimated Total Size (MB): 66.28
----------------------------------------------------------------
class Decoder(nn.Module):
def __init__(self, latent_dims):
super(Decoder, self).__init__()
self.linear1 = nn.Linear(latent_dims, 512)
self.batch_norm1 = nn.BatchNorm1d(1)
self.linear2 = nn.Linear(512, 2048)
self.batch_norm2 = nn.BatchNorm1d(1)
self.unflatten = nn.Unflatten(1, (1, 64, 64))
def forward(self, z):
z = F.relu(self.linear1(z))
z = self.batch_norm1(z)
print(z.size())
z = F.relu(self.linear2(z))
z = self.batch_norm2(z)
print(z.size())
z = z.view(-1, 1, 64, 64)
print(z.size())
return torch.sigmoid(z)#z.reshape((-1, 1, 64, 64))
latent_dims = 2
decoder = Decoder(latent_dims).to(device) # GPU
summary(decoder,input_size=(1,2))
Here is the decoder with correct compilation
torch.Size([2, 1, 512])
torch.Size([2, 1, 2048])
torch.Size([1, 1, 64, 64])
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Linear-1 [-1, 1, 512] 1,536
BatchNorm1d-2 [-1, 1, 512] 2
Linear-3 [-1, 1, 2048] 1,050,624
BatchNorm1d-4 [-1, 1, 2048] 2
================================================================
Total params: 1,052,164
Trainable params: 1,052,164
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 4.01
Estimated Total Size (MB): 4.05
----------------------------------------------------------------
But the combined Auto-encoder doesn’t compile right:
class Autoencoder(nn.Module):
def __init__(self, latent_dims):
super(Autoencoder, self).__init__()
self.encoder = Encoder(latent_dims)
self.decoder = Decoder(latent_dims)
def forward(self, x):
z = self.encoder(x)
return self.decoder(z)
latent_dims = 2
autoencoder = Autoencoder(latent_dims).to(device) # GPU
summary(autoencoder,input_size=(1,64,64))
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2437
2438 return torch.batch_norm(
-> 2439 input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
2440 )
2441
RuntimeError: running_mean should contain 512 elements not 1