Hi everyone,
I am training a VAE model which will take a list of numpy arrays and train a VAE model based on those arrays. My VAE model is inspired by the Transformer model as the input arrays is coming from a Image transformer. But after defining the model during training the loss is going to nan after the first epoch.
My encoder model looks something like this
class Encoder(nn.Module):
def init(self, embedding_dim:int=768,
mlp_size:int=512,
m_size:int=256,
dropout:int=0.1,
latent_dims:int=128):
super(Encoder, self).init()
self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
self.fc1 = nn.Linear(embedding_dim, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, latent_dims)
self.fc4 = nn.Linear(256, latent_dims)
self.N = torch.distributions.Normal(0, 1)
self.N.loc = self.N.loc
self.N.scale = self.N.scale
self.kl = 0def forward(self, x): x = self.layer_norm(x) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) mu = self.fc3(x) sigma = torch.exp(self.fc4(x)) z = mu + sigma*self.N.sample(mu.shape) self.kl = (sigma**2 + mu**2 - torch.log(sigma) - 1/2).sum() return z
My decoder model is
class Decoder(nn.Module):
def init(self, embedding_dim:int=768,
mlp_size:int=512,
m_size:int=256,
dropout:int=0.1,
latent_dims:int=128):
super(Decoder, self).init()
self.fc1 = nn.Linear(latent_dims, 256)
self.fc2 = nn.Linear(256, 512)
self.fc3 = nn.Linear(512, embedding_dim)def forward(self, z): x = F.leaky_relu(self.fc1(z)) x = F.leaky_relu(self.fc2(x)) x = self.fc3(x) return x
My entire VAE model
class VariationalAutoencoder(nn.Module):
def init(self, embedding_dim:int=768,
mlp_size:int=512,
m_size:int=256,
dropout:int=0.1,
latent_dims:int=128):
super(VariationalAutoencoder, self).init()
self.encoder = Encoder(embedding_dim=768,
mlp_size=512,
m_size=256,
dropout=0.1,
latent_dims=128)
self.decoder = Decoder(embedding_dim=768, latent_dims=128)def forward(self, x): z = self.encoder(x) y = self.decoder(z) return z, y
I tried initializing the model with random weights and i found its working fine. But when i am training the loss goes to nan
vae = VariationalAutoencoder(latent_dims=128)
optimizer = torch.optim.Adam(vae.parameters(),
lr=10-6,
weight_decay=0.1)
num_epochs = 10
for epoch in range(num_epochs):
for i, x in enumerate(train_dataloader_custom):
z, y = vae(x)
loss = torch.nn.MSELoss()
reconst_loss = loss(y, x)
optimizer.zero_grad()
reconst_loss.backward()
optimizer.step()
print(reconst_loss)
Thanks for any kind of help