I’m trying haphazardly to implement a VAE on a two dimensional image. I’d like to have the latent space also be 2d (because I read somewhere it was a good idea.) This is my model:
class VAE(nn.Module):
def __init__(self):
super().__init__()
# input shape = [128,128,1]
self.el1 = nn.Conv2d(in_channels=n_input_channels,out_channels=n_latent_channels*2,kernel_size=4, padding=2, stride=4)
# [32, 32, 64]
self.el2 = nn.ReLU(inplace=True)
# [32, 32, 64]
self.el3 = nn.MaxPool2d(kernel_size=2, stride=2)
# [16, 16, 64]
self.dl1 = nn.ConvTranspose2d(in_channels=n_latent_channels, out_channels=n_latent_channels, kernel_size=[2,2], stride=[2,2])
# [32, 32, 64]
self.dl2 = nn.ConvTranspose2d(in_channels=n_latent_channels, out_channels=n_input_channels, kernel_size=[4,4], stride=[4,4])
# [128, 128, 1]
def reparameterise(self, mu, logvar):
if self.training:
std = logvar.mul(0.5).exp_()
eps = std.data.new(std.size()).normal_()
return eps.mul(std).add_(mu)
else:
return mu
def encode(self, x):
x = self.el1(x)
x = self.el2(x)
x = self.el3(x)
latent = x
mu = latent[:, :n_latent_channels, :, :]
logvar = latent[:, n_latent_channels:, :, :]
return mu, logvar
def decode(self, z):
z = self.dl1(z)
z = self.dl2(z)
return z
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparameterise(mu, logvar)
x_hat = self.decode(z)
return x_hat, mu, logvar
def sample(self, n_samples):
z = torch.randn((n_samples, n_latent_channels, 16, 16)).to(device)
return self.decode(z)
And this is my loss function:
def loss_function(x_hat, x, mu, logvar):
# reconstruction loss (pushing the points apart)
BCE = nn.functional.binary_cross_entropy(
x_hat, x, reduction='sum'
)
# KL divergence loss (the relative entropy between two distributions a multivariate gaussian and a normal)
# (enforce a radius of 1 in each direction + pushing the means towards zero)
KLD = 0.5 * torch.sum(logvar.exp()  logvar  1 + mu.pow(2))
return BCE + KLD # we can use a beta parameter here (BCE + beta * KLD)

Is this architecture sane?

When I run the loss function I get
“/aten/src/ATen/native/cuda/Loss.cu:115: operator(): block: [31,0,0], thread: [104,0,0] Assertion input_val >= zero && input_val <= one
failed”
which I assume means I haven’t normalised something during the encoder?
Any help would be greatly appreciated.
Thanks,
Ian