I am working with VAE and I don’t know why but during the training process, I am getting the output of VAE as well as that of the encoder as nan
. I am reading a CSV file with rows as my data. Here’s my code:
My data loader:
class data_gen(torch.utils.data.Dataset):
def __init__(self, files):
self.data = files
def __getitem__(self, i):
tmp = self.data[i]
tmp = np.reshape(tmp,(1025))
tmp = torch.from_numpy(tmp).float()
return tmp
def __len__(self):
return len(self.data)
Here’s how I am normalising it:
def norm(data):
norm_data = []
for j in data:
temp = float(2.0*(j-np.amin(data))/(np.amax(data)-np.amin(data)) - 1.0)
norm_data.append(temp)
return norm_data
VAE:
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.lin1 = nn.Linear(in_features = 1025, out_features = 500)
self.lin2 = nn.Linear(in_features=500, out_features=100)
self.lin3 = nn.Linear(in_features=100, out_features=10)
self.fc_mu = nn.Linear(in_features=10, out_features=latent_dims)
self.fc_logvar = nn.Linear(in_features=10, out_features=latent_dims)
def forward(self, x):
x = F.relu(self.lin1(x))
x = F.relu(self.lin2(x))
x = torch.tanh(self.lin3(x))
x_mu = self.fc_mu(x)
x_logvar = self.fc_logvar(x)
return x_mu, x_logvar
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
c = capacity
self.fc = nn.Linear(in_features=latent_dims, out_features=10)
self.lin3 = nn.Linear(in_features=10, out_features=100)
self.lin2 = nn.Linear(in_features=100, out_features=500)
self.lin1 = nn.Linear(in_features=500, out_features=1025)
def forward(self, x):
x = self.fc(x)
x = F.relu(self.lin3(x))
x = torch.tanh(self.lin2(x))
x = torch.tanh(self.lin1(x)) # last layer before output is sigmoid, since we are using BCE as reconstruction loss
return x
I am using tanh
because I have some negative values. The weird thing is, it sometimes gives proper values. I have normalized by input data, my libraries are up to date and I have tried almost all the solutions provided. Even if I just run the inference, I still get nan
. So I don’t know if the learning rate is the issue. What should I do?