Generating sequences with RNN

Hello,

I tried modifying a bit the RNN example from the pytorch tutorial to generate names from a given language, not using argmax to select the letter after the last layer’s output but torch.multinomial.
The results are terrible and I am wondering why. I have either made a very dumb mistake somewhere without realiszing or the architecture is fondamentaly wrong.

Can you give me tips on how I could improve this ?

Here are some example of architectures that I tried :

class RNN_debut(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        
        self.h1 = nn.Linear(input_size + hidden_size, hidden_size)
        self.h2 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim = 1)
        
    def forward(self, input, hidden):
        input_combined = torch.cat((input,hidden),1)
        hidden = self.h1(input_combined)
        output = self.h2(hidden)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)
    
class RNN_longer(nn.Module):
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size):
        super().__init__()
        self.hidden1_size = hidden1_size
        
        self.h1 = nn.Linear(input_size + hidden1_size, hidden1_size)
        self.h2 = nn.Linear(hidden1_size, hidden2_size)
        self.h3 = nn.Linear(hidden2_size, output_size)
        
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim = 1)
        
    def forward(self, input, hidden):
        input_combined = torch.cat((input,hidden),1)
        hidden = self.h1(input_combined)
        hidden2 = self.h2(hidden)
        output = self.h3(hidden2)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden1_size)

class RNN_middle(nn.Module):
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size):
        super().__init__()
        self.hidden1_size = hidden1_size
        
        self.h1 = nn.Linear(input_size, hidden1_size)
        self.h2 = nn.Linear(hidden1_size + hidden2_size, hidden2_size)
        self.h3 = nn.Linear(hidden2_size, output_size)
        
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim = 1)
        
    def forward(self, input, hidden):
        out1 = self.h1(input)
        input_combined = torch.cat((out1,hidden),1)
        hidden = self.h2(input_combined)
        output = self.h3(hidden)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden2_size)

These models are then trained, like in the tutorials following :

criterion = nn.NLLLoss()

learning_rate = 0.0005

def train(model,input_line_tensor, target_line_tensor):
    target_line_tensor.unsqueeze_(-1)
    hidden = model.initHidden()

    model.zero_grad()

    loss = 0

    for i in range(input_line_tensor.size(0)):
        output, hidden = model(input_line_tensor[i], hidden)
        l = criterion(output, target_line_tensor[i])
        loss += l

    loss.backward()

    for p in model.parameters():
        p.data.add_(-learning_rate, p.grad.data)

    return loss.item() / input_line_tensor.size(0)

And then I sample from the network with this function :

def sample(start_letter = None):
    input = torch.zeros(1, 1, n_letters)
    input[0][0][n_letters - 2] = 1 #Start of sentence
    output_name = ""

    hidden = rnn.initHidden()
    
    for i in range(max_length):
        output, hidden = rnn(input[0], hidden)
        picked = torch.multinomial(torch.exp(output[0]),num_samples = 1).item()
        if picked == n_letters - 1:
            break
        else:
            letter = all_letters[picked]
            output_name += letter
        input = inputTensor(letter)
    return output_name

Do you know how I could improve this(these ?) simple model ?