Hi everyone,
so, I am trying to implement an Autoencoder for text based on LSTMs. It’s the foundation for something more sophisticated.
However, it always learns to output 4 characters which rarely change during training and for the rest of the string the output is the same on every index.
I use a one hot encoding.
Training:
...
criterion = nn.BCEWithLogitsLoss()
losses = []
optimizer = optim.Adam(self.parameters(), lr)
list = util.get_all_targets()
random.shuffle(list)
for i, item in enumerate(list):
target_tensor = util.example_to_tensor(item)
input_tensor = util.example_to_tensor(item)
if is_cuda:
target_tensor = target_tensor.cuda()
input_tensor = input_tensor.cuda()
input_var = Variable(input_tensor)
target_var = Variable(target_tensor)
output = self(input_var, len(item))
loss = 0
optimizer.zero_grad()
for i, item in enumerate(output[0]):
loss += criterion(item, target_var[0][i])
loss.backward()
optimizer.step()
The net:
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size, num_layers, input_size = hidden_size):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.linear = nn.Linear(input_size, hidden_size)
self.lstm = nn.LSTM(hidden_size, output_size, num_layers, batch_first=True)
self.sigmoid = nn.Sigmoid()
def forward(self, encoded_input):
decoded_output, hidden = self.lstm(encoded_input)
decoded_output = self.sigmoid(decoded_output)
return decoded_output
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size = hidden_size, dropout = 0):
super(EncoderRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(util.get_letters_num(), hidden_size, num_layers, dropout=dropout, batch_first=True)
self.relu = nn.ReLU()
def forward(self, x):
x = self.lstm(x)
x = x[0] # Dont care about hidden states
return self.relu(x)
class LSTMAutoEncoder(nn.Module):
def __init__(self, input_size, hidden_size, num_layers):
super(LSTMAutoEncoder, self).__init__()
self.encoder = EncoderRNN(input_size, hidden_size, num_layers)
self.decoder = DecoderRNN(hidden_size, input_size, num_layers)
if is_cuda:
self.encoder = self.encoder.cuda()
self.decoder = self.decoder.cuda()
def forward(self, input, length):
input = self.encoder(input)
decoded_output = self.decoder(input) #Softmax doesnt change a thing here, same problem
return decoded_output
The data is formatted as:
(Batch, CharIndex, OneHotForThatChar)
With Batch always being 1.
I understand, that I’ll be having some error in thinking, but I just can’t figure it out.
Thanks for your help!
PS: The Net is taken and modified from LSTM autoencoder architecture
Edit: Tried more things and it seems, the first two letters will get correct after a few epochs. Rest is still always the same letter.