Just learning the ropes on PyTorch. Kind of a newbie here.
Attempting to construct a convolutional autoencoder on MNIST.
Here is my desired network architecture:
class tCNN_Autoencoder(nn.Module):
def __init__(self):
super(tCNN_Autoencoder, self).__init__()
# Encoder
self.en_conv1 = nn.Conv2d(1,16,3,padding=1)
self.en_conv2 = nn.Conv2d(16,32,3,padding=1)
self.en_conv3 = nn.Conv2d(32,4,3,padding=1)
# Decoder
self.de_tconv1 = nn.ConvTranspose2d(4,32,2,stride=2)
self.de_tconv2 = nn.ConvTranspose2d(32,16,2,stride=2)
self.de_tconv3 = nn.ConvTranspose2d(16,1,2,stride=2)
self.pool = nn.MaxPool2d(2,2)
def forward(self, x):
x = F.relu(self.en_conv1(x))
x = self.pool(x)
x = F.relu(self.en_conv2(x))
x = self.pool(x)
x = F.relu(self.en_conv3(x))
x = self.pool(x) #Compressed representation
x = F.relu(self.de_tconv1(x))
x = F.relu(self.de_tconv2(x))
x = F.sigmoid(self.de_tconv3(x)) # sigmoid to scale the pixel values in grayscale from 0 to 1
return x
And here are my tensor sizes that I get when I run my training loop:
torch.Size([28, 1, 28, 28]) before conv1
torch.Size([28, 16, 28, 28]) after conv1
torch.Size([28, 16, 14, 14]) after pooling at conv1
torch.Size([28, 32, 14, 14]) after conv2
torch.Size([28, 32, 7, 7]) after pooling at conv2
torch.Size([28, 4, 7, 7]) after conv3
torch.Size([28, 4, 3, 3]) after final pooling (compressed representation)
torch.Size([28, 32, 6, 6]) after tconv1
torch.Size([28, 16, 12, 12]) after tconv2
torch.Size([28, 1, 24, 24]) after sigmoid
I can see that I am losing 4 pixels on each h x w dimension, but I don’t completely understand how to scale back up to the original image size of 28x28 using the tconv layers.