I want to make a Deep Autoencoder using Resnet34.
Based on these sites (https://www.kaggle.com/code/khoongweihao/resnet-34-pytorch-starter-kit/notebook ,
https://medium.com/pytorch/implementing-an-autoencoder-in-pytorch-19baa22647d1), I try to make autoencoder code for my understanding.
Unfortunately, my code has a bug about the nn.Module Class, but I don not know how I fix if.
Whoever is familiar with this class’s management, please provide the optimal solution.
Thanks.
Here is my trial code.
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
"""Set our seed and other configurations for reproducibility."""
seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
"""We set the batch size, the number of training epochs, and the learning rate."""
batch_size = 512
epochs = 20
learning_rate = 1e-3
"""## Dataset
We load our MNIST dataset using the `torchvision` package.
"""
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(
root="~/torch_datasets", train=True, transform=transform, download=True
)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True
)
print('train_loader')
"""## Autoencoder
An autoencoder is a type of neural network that finds the function mapping the features x to itself. This objective is known as reconstruction, and an autoencoder accomplishes this through the following process: (1) an encoder learns the data representation in lower-dimension space, i.e. extracting the most salient features of the data, and (2) a decoder learns to reconstruct the original data based on the learned representation by the encoder.
We define our autoencoder class with fully connected layers for both its encoder and decoder components.
"""
#ResNet###############################################
class ResidualBlock(nn.Module):
def __init__(self,in_features,out_features,stride=1,kernel_size=3,padding=1,bias=False):
super().__init__()
self.cnn1 = nn.Sequential(
nn.Conv2d(in_features,out_features,kernel_size,stride,padding,bias=False),
nn.BatchNorm2d(out_features),
nn.ReLU(True)
)
self.cnn2 = nn.Sequential(
nn.Conv2d(in_features,out_features,kernel_size,1,padding,bias=False),
nn.BatchNorm2d(out_features)
)
if stride != 1 or in_features != out_features:
self.shortcut = nn.Sequential(
nn.Conv2d(in_features,out_features,kernel_size=1,stride=stride,bias=False),
nn.BatchNorm2d(out_features)
)
else:
self.shortcut = nn.Sequential()
def forward(self,x):
residual = x
x = self.cnn1(x)
x = self.cnn2(x)
x += self.shortcut(residual)
x = nn.ReLU(True)(x)
return x
class EncoderResNet34(nn.Module):
def __init__(self,x):
super().__init__()
self.block1 = nn.Sequential(
nn.Conv2d(1,64,kernel_size=2,stride=2,padding=3,bias=False),
nn.BatchNorm2d(64),
nn.ReLU(True)
)
self.block2 = nn.Sequential(
nn.MaxPool2d(1,1),
ResidualBlock(64,64),
ResidualBlock(64,64,2),
nn.ReLU(True)
)
self.block3 = nn.Sequential(
ResidualBlock(64,128),
ResidualBlock(128,128,2)
)
self.block4 = nn.Sequential(
ResidualBlock(128,256),
ResidualBlock(256,256,2)
)
self.block5 = nn.Sequential(
ResidualBlock(256,512),
ResidualBlock(512,512,2)
)
self.avgpool = nn.AvgPool2d(2)
def forward(self,x):
print(x)
exit()
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(x)
x = self.avgpool(x)
x = x.view(x.size(0),-1)
return x
class AE(nn.Module):
def __init__(self, **kwargs):
super().__init__()
self.encoder_hidden_layer = nn.Linear(
in_features=kwargs["input_shape"], out_features=128
)
self.encoder_output_layer = nn.Linear(
in_features=128, out_features=128
)
self.decoder_hidden_layer = nn.Linear(
in_features=128, out_features=128
)
self.decoder_output_layer = nn.Linear(
in_features=128, out_features=kwargs["input_shape"]
)
self.encoder = EncoderResNet34
self.decoder = EncoderResNet34 # Temporary
def forward(self, features):
activation = self.encoder_hidden_layer(features)
encoded = self.encoder(activation)
code = self.encoder_output_layer(encoded)
activation = self.decoder_hidden_layer(code)
decoded = self.decoder(activation)
reconstructed = self.decoder_output_layer(activation)
return reconstructed
"""Before using our defined autoencoder class, we have the following things to do:
1. We configure which device we want to run on.
2. We instantiate an `AE` object.
3. We define our optimizer.
4. We define our reconstruction loss.
"""
# use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('gpu aveilable')
# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu
model = AE(input_shape=784).to(device)
# create an optimizer object
# Adam optimizer with learning rate 1e-3
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# mean-squared error loss
criterion = nn.MSELoss()
"""We train our autoencoder for our specified number of epochs."""
for epoch in range(epochs):
loss = 0
for batch_features, tmp in train_loader:
# reshape mini-batch data to [N, 784] matrix
# load it to the active device
batch_features = batch_features.view(-1, 784).to('cuda')
# reset the gradients back to zero
# PyTorch accumulates gradients on subsequent backward passes
optimizer.zero_grad()
# compute reconstructions
outputs = model(batch_features)
# compute training reconstruction loss
train_loss = criterion(outputs, batch_features)
# compute accumulated gradients
train_loss.backward()
# perform parameter update based on current gradients
optimizer.step()
# add the mini-batch training loss to epoch loss
loss += train_loss.item()
# compute the epoch training loss
loss = loss / len(train_loader)
# display the epoch training loss
print("epoch : {}/{}, recon loss = {:.8f}".format(epoch + 1, epochs, loss))
"""Let's extract some test examples to reconstruct using our trained autoencoder."""
test_dataset = torchvision.datasets.MNIST(
root="~/torch_datasets", train=False, transform=transform, download=True
)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=10, shuffle=False
)
test_examples = None
with torch.no_grad():
for batch_features in test_loader:
batch_features = batch_features[0]
test_examples = batch_features.view(-1, 784).to('cuda')
reconstruction = model(test_examples)
break
"""## Visualize Results
Let's try to reconstruct some test images using our trained autoencoder.
"""
with torch.no_grad():
number = 10
plt.figure(figsize=(20, 4))
for index in range(number):
# display original
ax = plt.subplot(2, number, index + 1)
plt.imshow(test_examples[index].cpu().numpy().reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, number, index + 1 + number)
plt.imshow(reconstruction[index].cpu().numpy().reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# plt.show()
plt.savefig('result.pdf')
And the error message is
File “autoencoder_pytorch.py”, line 164, in forward code = self.encoder_output_layer(encoded)
TypeError: linear(): argument ‘input’ (position 1) must be Tensor, not EncoderResNet34
I think the forward function in EncoderResNet34 class does not work, but I can not find out the solution…