Why am I getting some tensors in my decoder portion of my VAE model being greater than 1. For example one recreation of an image from the decoder has a max of tensor(1.0000002384).

I found this out after having trouble using matplotlib and getting warnings saying that my tensors were not in range of [0,1]. I don’t understand because I am using sigmoid output function which I thought would limit it to [0,1].

Here is my model:

```
class Sampling(nn.Module):
def __init__(self, device):
super().__init__()
self.device = device
def forward(self, z_mean, z_log_var, training=False):
batch = z_mean.size(0)
dim = z_mean.size(1)
# during training, we want to sample from z_log_var, thus get epsilon
if training:
# epsilon is a random sample from standard normal distribution
# do this for 2x2 dimensions
epsilon = torch.normal(mean=0.0, std=1.0, size=(batch, dim)).to(self.device)
# during testing, we don't want variance to play a role
else:
# make epsilon 0 so that we don't have variance and just the mean
epsilon = torch.zeros(batch, dim).to(self.device)
# sigma of z_mean is exp(log(sigma^2)/2)
# thus z_mean + sigma * (normal distribution sample) = z_sample
return z_mean + torch.exp(0.5 * z_log_var) * epsilon
class Encoder(nn.Module):
def __init__(self, device):
self.device = device
super().__init__()
# first convolution
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=128,
kernel_size=3, stride=2,
padding=1),
# batchnorm is done per channel
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# second convolution
self.conv2 = nn.Sequential(
# reducing the y*x dimension for each feature map by half
nn.Conv2d(in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# third convolution
self.conv3 = nn.Sequential(
# reducing the y*x dimension for each feature map by half
nn.Conv2d(in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# fourth convolution
self.conv4 = nn.Sequential(
# reducing the y*x dimension for each feature map by half
nn.Conv2d(in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# fifth convolution
self.conv5 = nn.Sequential(
# reducing the y*x dimension for each feature map by half
nn.Conv2d(in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# take flattened layer and map to 200 nodes to represent 200-dimensional latent space
self.fc1 = nn.Linear(512, 200)
self.fc2 = nn.Linear(512, 200)
# sample from distribution to get a z for decoder to use
self.sample = Sampling(self.device)
def forward(self, input_data, training=False):
x = self.conv1(input_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
flatten = nn.Flatten()(x)
z_mean = self.fc1(flatten)
z_log_var = self.fc2(flatten)
z = self.sample(z_mean, z_log_var, training)
return z, z_mean, z_log_var
class Decoder(nn.Module):
def __init__(self):
super().__init__()
# take 2 nodes and fully connect to 2048 nodes
self.fc1 = nn.Sequential(
nn.Linear(200, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU()
)
# apply a reshape from a vector of 2048 to (128,4,4)
# first transposed convolution
self.tp1 = nn.Sequential(
# take 128 channels and convert to 128 channels
# here we get (128, 8, 8,), thus expanding channels my double
nn.ConvTranspose2d(
in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1, output_padding=1
),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# second transposed convolution
self.tp2 = nn.Sequential(
# take 128 channels and convert to 64 channels
# here we get (64, 16, 16), thus we reduce channels by half and double channels width and length
nn.ConvTranspose2d(
in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1, output_padding=1
),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# third transposed convolution
self.tp3 = nn.Sequential(
# take 64 channels and convert to 32 channels
# here we get (32, 32, 32), thus we reduce channels by half and double channels width and length
nn.ConvTranspose2d(
in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1, output_padding=1
),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
# fourth transposed convolution
self.tp4 = nn.Sequential(
# take 64 channels and convert to 32 channels
# here we get (32, 32, 32), thus we reduce channels by half and double channels width and length
nn.ConvTranspose2d(
in_channels=128, out_channels=128,
kernel_size=3, stride=2,
padding=1, output_padding=1
),
nn.BatchNorm2d(128),
nn.LeakyReLU()
)
self.tp5 = nn.Sequential(
# take 64 channels and convert to 32 channels
# here we get (32, 32, 32), thus we reduce channels by half and double channels width and length
nn.ConvTranspose2d(
in_channels=128, out_channels=3,
kernel_size=3, stride=2,
padding=1, output_padding=1
),
nn.Sigmoid()
)
def forward(self, input_data):
x = self.fc1(input_data)
x = torch.reshape(x, (-1, 128, 2, 2))
x = self.tp1(x)
x = self.tp2(x)
x = self.tp3(x)
x = self.tp4(x)
x = self.tp5(x)
return x
class VAE(nn.Module):
# initialize autoencoder with encoder and decoder objects
def __init__(self, encoder, decoder):
super().__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, input_data, training=False):
# first pass image to be decoded to latent space
z, z_mean, z_log_var = self.encoder(input_data, training)
# decode image from latent space back to pixel space
reconstruction = self.decoder(z)
# return not only reconstruction, but also z_mean and z_log_var
# all 3 will be used in the loss function
return z_mean, z_log_var, reconstruction
```