I need to create a deep autoencoder for image denoising for an exercise, using mnist as dataset. I’m using pytorch to build the model and for the moment this is my class for the single layer autoencoder
class Autoencoder(nn.Module):
def __init__(self, input_dim, enc_dim, last_layer=True):
super(Autoencoder, self).__init__()
self.last_layer = last_layer
self.input_dim = input_dim
self.enc_dim = enc_dim
# encoder made of one linear layer that maps a vector of size input dim into a vector of size enc_dim
self.encoder = nn.Linear(input_dim, enc_dim)
# the decoder will reconstruct the image in the original space
self.decoder = nn.Linear(enc_dim, input_dim)
self.enc_activation = nn.ReLU()
if self.last_layer:
self.dec_act = nn.Sigmoid()
else:
self.dec_act = nn.ReLU()
def encode(self, x):
x = self.encoder(x)
x = self.enc_activation(x)
return x
def decode(self, x):
x = self.decoder(x)
x = self.dec_act(x)
return x
def forward(self, x):
x = self.encode(x)
x = self.decode(x)
return x
Now I build the deep autoencoder stacking the layer of the autoencoder in this way
class DeepAutoEncoder(nn.Module):
def __init__(self, input_dim, hidden_layers):
super().__init__()
# first layer, the decoder will reconstruct the image
self.autoencoders = [Autoencoder(input_dim, hidden_layers[0])]
print(f"Adding autoencoder with dim {input_dim} {hidden_layers[0]}")
input_dim = hidden_layers[0]
hidden_layers.pop(0)
for elem in hidden_layers:
print(f"Adding autoencoder with dim {input_dim} {elem}")
self.autoencoders.append(Autoencoder(input_dim, elem, False))
input_dim = elem
self.model = nn.ModuleList(self.autoencoders)
def encode(self, x):
enc = x
for ae in self.model:
enc = ae.encode(enc)
return enc
def decode(self, x):
dec = x
# reverse list of models and decode
for ae in self.model[::-1]:
dec = ae.decode(dec)
return dec
def forward(self, x):
x = self.encode(x)
x = self.decode(x)
return x
Then I train the model using this, the encoded representation of the input is passed to the next autoencoder
data_in = X_train
hidden_layers = [512, 256, 128]
deep_ae = DeepAutoEncoder(X_train.shape[1], hidden_layers)
for ae in deep_ae.model:
fit(2, ae, data_in)
# create latent rappresentation of data
# the new encoded data_in will be passed to the data loader in the fit method
with torch.no_grad():
data_in = ae.encode(data_in)
this is the fit and evaluate method. X_train is just the dataset without the labels
def evaluate_MSE(model, eval_dataset):
with torch.no_grad():
eval_output = model(eval_dataset)
loss = F.mse_loss(eval_output, eval_dataset)
return loss.item()
def fit(epochs, model, X_train):
print(X_train.shape)
# prepare data loaders
train_loader = torch.utils.data.DataLoader(X_train, batch_size=20, shuffle=True)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
history = []
for epoch in range(1, epochs+1):
model.train()
for images in train_loader:
# reset gradient
optimizer.zero_grad()
# add noise
noisy_images = add_noise(images, 0, noise_std)
#forward pass
output = model(noisy_images)
loss = criterion(output, images)
# optimizer step and backward pass
loss.backward()
optimizer.step()
train_loss = evaluate_MSE(model, X_train)
print(f'Epoch: {epoch} \tTraining Loss: {train_loss}')
history.append(train_loss)
return history
This Is what I obtain when I run the training, as you can see the loss is diverging, how can I solve?
torch.Size([60000, 784])
Epoch: 1 Training Loss: 0.027089910581707954
Epoch: 2 Training Loss: 0.010246861726045609
torch.Size([60000, 512])
Epoch: 1 Training Loss: 2.82458233833313
Epoch: 2 Training Loss: 3.0086123943328857
torch.Size([60000, 256])
Epoch: 1 Training Loss: 176.9993896484375
Epoch: 2 Training Loss: 207.57615661621094