Hello, I have been trying to find an out-of-the-box autoencoder that lets me choose the size of my latent vector.
I’ve been working on a problem where I want to encode some 128x128 grayscale images in a 256 datapoints using convolutional autoencoder.
I’ve been trying to use some pre-conceived examples but without success.
class Encoder(nn.Module):
def __init__(self):
super().__init__()
### Convolutional section
self.enc1 = nn.Conv2d(
in_channels=image_channels, out_channels=init_channels, kernel_size=kernel_size,
stride=2, padding=1
)
self.enc2 = nn.Conv2d(
in_channels=init_channels, out_channels=init_channels*2, kernel_size=kernel_size,
stride=2, padding=1
)
self.enc3 = nn.Conv2d(
in_channels=init_channels*2, out_channels=init_channels*4, kernel_size=kernel_size,
stride=2, padding=1
)
self.enc4 = nn.Conv2d(
in_channels=init_channels*4, out_channels=64, kernel_size=kernel_size,
stride=2, padding=1
)
self.enc5 = nn.Conv2d(
in_channels=init_channels*8, out_channels=128, kernel_size=kernel_size,
stride=2, padding=0
)
# fully connected layers for learning representations
self.fc1 = nn.Linear(128, 128)
self.fc_mu = nn.Linear(128, latent_dim)
self.fc_log_var = nn.Linear(128, latent_dim)
self.fc2 = nn.Linear(latent_dim, 128)
def reparameterize(self, mu, log_var):
"""
:param mu: mean from the encoder's latent space
:param log_var: log variance from the encoder's latent space
"""
std = torch.exp(0.5*log_var) # standard deviation
eps = torch.randn_like(std) # `randn_like` as we need the same size
sample = mu + (eps * std) # sampling
return sample
def forward(self, x):
# encoding
x = F.relu(self.enc1(x))
x = F.relu(self.enc2(x))
x = F.relu(self.enc3(x))
x = F.relu(self.enc4(x))
x = F.relu(self.enc5(x))
batch, _, _, _ = x.shape
x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
hidden = self.fc1(x)
# get `mu` and `log_var`
mu = self.fc_mu(hidden)
log_var = self.fc_log_var(hidden)
# get the latent vector through reparameterization
z = self.reparameterize(mu, log_var)
z = self.fc2(z)
z = z.view(-1, latent_dim, 1, 1)
return mu, log_var, z
encoder = Encoder()
encoder.to(device)
class Decoder(nn.Module):
def __init__(self):
super().__init__()
self.dec1 = nn.Upsample(scale_factor=2)
self.dec2 = nn.Upsample(scale_factor=2)
self.dec3 = nn.Upsample(scale_factor=2)
self.dec4 = nn.Upsample(scale_factor=2)
self.dec5 = nn.Upsample(scale_factor=2)
self.dec6 = nn.Upsample(scale_factor=2)
self.dec7 = nn.Upsample(scale_factor=2)
self.conv1 = nn.Conv2d(latent_dim, 128, 3, padding=1)
self.conv2 = nn.Conv2d(128, 64, 3, padding=1)
self.conv3 = nn.Conv2d(64, 32, 3, padding=1)
self.conv4 = nn.Conv2d(32, 16, 3, padding=1)
self.conv5 = nn.Conv2d(16, 8, 3, padding=1)
self.conv6 = nn.Conv2d(8, 4, 3, padding=1)
self.conv7 = nn.Conv2d(4, 1, 3, padding=1)
def forward(self, x):
x = self.dec1(x)
x = F.relu(self.conv1(x))
x = self.dec2(x)
x = F.relu(self.conv2(x))
x = self.dec3(x)
x = F.relu(self.conv3(x))
x = self.dec4(x)
x = F.relu(self.conv4(x))
x = self.dec5(x)
x = F.relu(self.conv5(x))
x = self.dec6(x)
x = F.relu(self.conv6(x))
x = self.dec7(x)
x = F.relu(self.conv7(x))
reconstruction = torch.sigmoid(x)
return reconstruction, x
decoder = Decoder()
decoder.to(device)
params_to_optimize = [
{'params': encoder.parameters()},
{'params': decoder.parameters()}
]
optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=1e-05)
criterion = nn.BCELoss(reduction='sum')
I customized a standard autoencoder implementation into this but I can’t even get it to converge, I am actually getting some random results.
I would like to know if you can guide me into some examples where the latent space vector is customized or tell me why my solution is failing hard.
Kind regards