RNN HTML sampling

Greetings to everyone!
I’ m stuck for a couple of days trying to make and RNN network to learn a basic HTML template.
I tried different approaches and I even overfit on the following data:

 <!DOCTYPE html>
<html>
<head>
<title>Page Title</title>
</head>
<body>

<h1>This is a Heading</h1>
<p>This is a paragraph.</p>

</body>
</html> 

Obtaining 100% accuracy on training and validation using Adam Optimizer and CrossEntropyLoss.

The problem is that when I try to sample from the network, the results are completely random and I don’t know whats the problem:

..<a<a<a<a<aa<ttp11111b11111b11111111b11b1bbbb<btttn111

My sampling function is the following:

def sample_sentence():
    words = list()
    count = 0
    modelOne.eval()
    with torch.no_grad():
        # Setup initial input state, and input word (we use "the").
        previousWord = torch.LongTensor(1, 1).fill_(trainData.vocabulary['letter2id']['[START]'])
        hidden =  Variable(torch.zeros(6, 1, 100).to(device))


        while True:
            # Predict the next word based on the previous hidden state and previous word.
            inputWord = torch.autograd.Variable(previousWord.to(device))
            
            predictions, newHidden = modelOne(inputWord, hidden)
            
            hidden = newHidden
            
            
            pred = torch.nn.functional.softmax(predictions.squeeze()).data.cpu().numpy().astype('float64')
            
            pred = pred/np.sum(pred)
            
            
            nextWordId = np.random.multinomial(1, pred, 1).argmax()
            
            if nextWordId == 0:
                continue
            
            #print("After nextWordId zero value check")

            words.append(trainData.vocabulary['id2letter'][nextWordId])
            # Setup the inputs for the next round.
            previousWord.fill_(nextWordId)


            # Keep adding words until the [END] token is generated.
            if nextWordId == trainData.vocabulary['letter2id']['[END]']:
                break

            if count>20000:
                break
            count += 1
        words.insert(0, '[START]')


        return words

And my network architecture is here:

class ModelOne(Model) :
    def __init__(self,
                vocabulary_size,
                hidden_size,
                num_layers,
                rnn_dropout,
                embedding_size,
                dropout,
                num_directions):
        super(Model, self).__init__()
        
        self.vocabulary_size = vocabulary_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn_dropout = rnn_dropout
        self.dropout = dropout
        self.num_directions = num_directions
        self.embedding_size = embedding_size
        
        
        self.embeddings = nn.Embedding(self.vocabulary_size, self.embedding_size)
        self.rnn = nn.GRU(self.embedding_size,
                          self.hidden_size,
                          num_layers=self.num_layers,
                          bidirectional=True if self.num_directions==2 else False,
                          dropout=self.rnn_dropout,
                          batch_first=True)
        self.linear = nn.Linear(self.hidden_size*self.num_directions, self.vocabulary_size)
        
        
        

        
    def forward(self, paddedSeqs, hidden):
        
        batchSequenceLength = paddedSeqs.size(1)
        
        batchSize = paddedSeqs.size(0)
        
        lengths = paddedSeqs.ne(0).sum(dim=1)
                  
        embeddingVectors = self.embeddings(paddedSeqs)
        
        x = torch.nn.utils.rnn.pack_padded_sequence(embeddingVectors, lengths, batch_first=True)
        
        self.rnn.flatten_parameters()
        
        x,hid = self.rnn(x, hidden)
        
        output, _ = torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0, total_length=batchSequenceLength)

        predictions = self.linear(output)
    
        
        return predictions.view(batchSize, self.vocabulary_size, batchSequenceLength), hid
    
    def init_hidden(self, paddedSeqs):
        hidden = Variable(torch.zeros(self.num_layers*self.num_directions,
                                    1,
                                    self.hidden_size).to(device))
        return hidden

If you have any idea of what needs to be changed, please let me know.
I added all the code to github repository here: https://github.com/OverclockRo/HTMLGeneration/blob/SamplingTestTemplate/Untitled.ipynb