Avoid getting caught in loops with LSTM

Hello !

I recently implemented a name generating RNN frms scratch which was doing ok but far from perfect. So I thought about trying my luck with LSTM cells to see if it makes a difference. Indeed it does and the outpus looks way better for the first 7 ~ 8 characters. But then the networks gets caught in a loop and outputs things like “laulaulaulau” or “rourourourou” (it is supposed the generate french names).

Is it a often occuring problem ? If so do you know a way to fix it ? I’m concern about the fact the network doesn’t produce EOS tokens…

here is the model :

class pytorchLSTM(nn.Module):
    def __init__(self,input_size,hidden_size):
        super(pytorchLSTM,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.output_layer = nn.Linear(hidden_size,input_size)
        self.tanh = nn.Tanh()
        self.softmax = nn.LogSoftmax(dim = 2)
        
    def forward(self, input, hidden)
            out, hidden = self.lstm(input,hidden)
            out = self.tanh(out)
            out = self.output_layer(out)
            out = self.softmax(out)
        return out, hidden

training loop :


def train_lstm(model):
    start = time.time()
    criterion = nn.NLLLoss()
    optimizer = torch.optim.Adam(model.parameters())
    n_iters = 20000
    print_every = 1000
    plot_every = 500
    all_losses = []
    total_loss = 0
    for iter in range(1,n_iters+1):
        line = randomChoice(category_line)
        input_line_tensor = inputTensor(line)
        target_line_tensor = targetTensor(line).unsqueeze(-1)
        optimizer.zero_grad()       
        loss = 0
        output, hidden = model(input_line_tensor)
        for i in range(input_line_tensor.size(0)):
            l = criterion(output[i], target_line_tensor[i])
            loss += l
        loss.backward()
        optimizer.step() 

the sampling function :

def sample():
    max_length = 20
    input = torch.zeros(1,1,n_letters)
    input[0][0][n_letters - 2] = 1
    output_name = ""
    hidden = (torch.zeros(2,1,lstm.hidden_size),torch.zeros(2,1,lstm.hidden_size)) 
    
    for i in range(max_length):
        output, hidden = lstm(input)
        output = output[-1][:][:]
        l = torch.multinomial(torch.exp(output[0]),num_samples = 1).item()
        if l == n_letters - 1:
            break
        else:
            letter = all_letters[l]
            output_name += letter
        input = inputTensor(letter)
    return output_name

a typical sample :

Laurayeerauerararauo
Leayealouododauodouo
Meeaueeulauodalauouo

Do you know how I can improve that ?