Hi everyone!
I’m trying to deploy an autoencoder with KL regularization applied only ad the bottleneck layer (the output layer of the encoder)
The structure is the following:
-
Encoder:
input layer: 100neurons
hidden layer 1: 40neurons
hidden layer 2: 20 neurons - Encoder Output layer == Decoder Input Layer: 4 neurons
-
Decoder:
hidden layer 1: 20 neurons
hidden layer 2: 40neurons - Decoder Output layer == Encoder Input Layer: 100neurons
Here it is the code:
# define the autoencoder model
class SparseAutoencoder(nn.Module):
def __init__(self):
super(SparseAutoencoder, self).__init__()
# encoder
self.enc1 = nn.Linear(in_features=100, out_features=40)
self.enc2 = nn.Linear(in_features=40, out_features=20)
self.enc3 = nn.Linear(in_features=20, out_features=4)
# decoder
self.dec1 = nn.Linear(in_features=4, out_features=20)
self.dec2 = nn.Linear(in_features=20, out_features=40)
self.dec3 = nn.Linear(in_features=40, out_features=100)
def forward(self, x):
# encoding
x = F.relu(self.enc1(x))
x = F.relu(self.enc2(x))
x = F.relu(self.enc3(x))
y = x # encoded data
# decoding
x = F.relu(self.dec1(x))
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
return y,x
# the loss function
criterion = nn.MSELoss()
def kl_divergence(rho, rho_hat):
rho_hat = torch.mean(torch.sigmoid(rho_hat), 1)
rho = torch.tensor([rho] * len(rho_hat)).to(device)
return torch.sum(rho * torch.log(rho/rho_hat) + (1 - rho) * torch.log((1 - rho)/(1 - rho_hat)))
def sparse_loss(rho, x):
values = x
loss = 0
for i in range(len(model_children)):
values = model_children[i](values)
if i==3:
loss += kl_divergence(rho, values)
return loss
# define the training function
def fit(model, dataloader, epoch ):
print('Training')
model.train()
# running_loss variable will help us calculate the batch-wise loss.
running_loss = 0.0
# we will use the counter to calculate the per epoch loss.
counter = 0
# we iterate through the data.
for i, data in tqdm(enumerate(dataloader), total=int(len(dataloader.dataset)/dataloader.batch_size)):
counter += 1
data = data.view(-1, 200)
data = data.to(device)
# we update the gradients to zero
optimizer.zero_grad()
# and we compute the outputs
_,outputs = model(data)
# we get the mse_loss
mse_loss = criterion(outputs, data)
if ADD_SPARSITY == 'yes':
# Then we call the sparse_loss function
sparsity = sparse_loss(RHO, data)
# and calculate the final sparsity constraint (we add the sparsity penalty)
loss = mse_loss + BETA * sparsity
else:
loss = mse_loss
# We backpropagate the gradients
loss.backward()
# We update the model parameters
optimizer.step()
# We calculate the batch loss
running_loss += loss.item()
# We calculate the epoch_loss
epoch_loss = running_loss / counter
print(f"Train Loss: {epoch_loss:.3f}")
return epoch_loss
My problem is that only output neurons fires, regardless the input. The activated neuron is always the same one. The fact that 1 out of 4 fires is good, since I’m applying KL divergence, but the activated one can - and must - change with the input.
Maybe there is a bug in the code? Am I doing something wrong?
Thank you for you time and help in advanced.