Access lower dimensional encoded data of autoencoder

Here is an autoencoder I’m working on from tutorial:https://debuggercafe.com/implementing-deep-autoencoder-in-pytorch/

I’m just learning about autoencoders and I’ve modified the source encode a custom small dataset consisting of :
[0,1,0,1,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0]

It seems to work ok, but I’m unsure how to access the lower dimensional embedding values of dimension 2 (set by parameter out_features).

Do I need to add a method to Autoencoder class to return the embedding ? Accessing the lower dimensional embedding seems like a standard task so perhaps there is ‘PyTorch’ way of accomplishing this. Any suggestions on my code is greatly appreciated.

import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image
import warnings
import os

# import packages
import os
import torch 
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
 
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import numpy as np

# utility functions
def get_device():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    return device

device = get_device()

features = torch.tensor(np.array([ [0,1,0,1,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0] ])).float()
tic_tac_toe_data_loader = torch.utils.data.DataLoader(features, batch_size=1, shuffle=True)

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        # encoder
        self.enc1 = nn.Linear(in_features=9, out_features=2)

        # decoder 
        self.dec1 = nn.Linear(in_features=2, out_features=9)

    def forward(self, x):
        
        x = F.relu(self.enc1(x))
        x = F.relu(self.dec1(x))
        
        return x

net = Autoencoder()
net.to(device)

NUM_EPOCHS = 50
LEARNING_RATE = 1e-3

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

# image transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def train(net, trainloader, NUM_EPOCHS):
    train_loss = []
    for epoch in range(NUM_EPOCHS):
        running_loss = 0.0
        for data in trainloader:
            img = data
            img = img.to(device)
            img = img.view(img.size(0), -1)
#             print('img.shape' , img.shape)
            optimizer.zero_grad()
            outputs = net(img)
            loss = criterion(outputs, img)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        loss = running_loss / len(trainloader)
        train_loss.append(loss)

    return train_loss

# train the network
train_loss = train(net, tic_tac_toe_data_loader, NUM_EPOCHS)

Update:

Updating the code with


import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image
import warnings
import os

# import packages
import os
import torch 
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
 
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import numpy as np

# utility functions
def get_device():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    return device

device = get_device()

features = torch.tensor(np.array([ [0,1,0,1,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0] ])).float()
tic_tac_toe_data_loader = torch.utils.data.DataLoader(features, batch_size=1, shuffle=True)

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(in_features=9, out_features=2)

    def forward(self, x):
        return F.sigmoid(self.fc1(x))

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(in_features=2, out_features=9)
        
    def forward(self, x):
        return F.sigmoid(self.fc1(x))

    
class Autoencoder(nn.Module):
    
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.fc1 = Encoder()
        self.fc2 = Decoder()

    def forward(self, x):
        return self.fc2(self.fc1(x))

net = Autoencoder()
net.to(device)

NUM_EPOCHS = 50
LEARNING_RATE = 1e-3

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

# image transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

outputs = None

def train(net, trainloader, NUM_EPOCHS):
    train_loss = []
    for epoch in range(NUM_EPOCHS):
        running_loss = 0.0
        for data in trainloader:
            img = data
            img = img.to(device)
            img = img.view(img.size(0), -1)
#             print('img.shape' , img.shape)
            optimizer.zero_grad()
            
            outputs = net(img)
            loss = criterion(outputs, img)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        loss = running_loss / len(trainloader)
        train_loss.append(loss)

    return train_loss

# train the network
train_loss = train(net, tic_tac_toe_data_loader, NUM_EPOCHS)

I can access the lower dimensional embedding using

print(Encoder().forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))

But is this using the trained weight values for the embedding ? If I call Encoder multiple times with same values:


print(Encoder().forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))
print(Encoder().forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))

different results are returned:


tensor([0.5083, 0.5020], grad_fn=<SigmoidBackward>)
tensor([0.4929, 0.6940], grad_fn=<SigmoidBackward>)

Why is this the case ? Is an extra training step being invoked as a result of calling Encoder ?

I think you are creating two different objects from Encoder class and initialize its weights with different values. try this:

enc = Encoder()
print(enc.forward(...))
print(enc.forward(...))