LSTM for text generation

Hello everyone. I am trying to train an LSTM network for text generation and it seems like it won’t train very well. The loss decreases very slowly, or not at all and the accuracy doesn’t seem to improve. What am I doing wrong? Sorry if it looks like a lot of code, its actually just like 100 lines, it looks longer because of the docstring.

class LSTM(nn.Module):

    """LSTM neural network

         params (dict): holds the program hyperparameters

    def __init__(self, params):
        super(LSTM, self).__init__()

        self.hidden_dim = params['h_dim']
        self.n_layers = params['n_layers']
        self.batch = params['batch']
        self.seq = params['seq']
        alphabet_size = output_size = params['alphabet_size']

        self.i2h = nn.Linear(alphabet_size, self.hidden_dim)
        self.lstm = nn.LSTM(self.hidden_dim, self.hidden_dim, self.n_layers,
                            batch_first=True, dropout=True)

        self.h2O = nn.Linear(self.hidden_dim, output_size)
        self.hidden = self.init_hidden(params['type'])

    def init_hidden(self, type):
        """Initialize the LSTM hidden and cell state

            type: the tensor type e.g: torch.FloatTensor, torch.cuda.FloatTensor

            h_0,c_0 (Variable,Variable): Tensors of size (L,B,H) where:
            L: number of LSTM layers
            B: batch size
            H: hidden dimension of the lstm
        h_0 = Variable(
            torch.zeros(self.n_layers, self.batch, self.hidden_dim).type(type))

        c_0 = Variable(
            torch.zeros(self.n_layers, self.batch, self.hidden_dim).type(type))

        return h_0, c_0

    def forward(self, sequence):
        """Computes the neural net forward pass

            sequence (Variable): one-hot Tensor of size (B,SL-1,AS) where:
            B: batch size
            SL: sequence lenght
            AS: alphabet size

            out (Variable): one-hot Tensor of size (B*(SL-1),AS)


        out = self.i2h(sequence)
        lstm_out, self.hidden = self.lstm(
            out.view(self.batch, self.seq - 1, -1), self.hidden)
        out = self.h2O(lstm_out.contiguous().view(-1, self.hidden_dim))
        return out

def sequence_to_one_hot(sequence, char_to_ix, params):
    """Turns a sequence of chars into a o ne-hot Tensor

        sequence (str): sequence of chars
        char_to_ix (dict): mapping from chars to integers (indexes)
        params (dict): holds the program hyperparameters

        tensor (Tensor): one-hot tensor of size (B,SL,AS) where:
        B: batch size
        SL: sequence lenght
        AS: alphabet size

    batch_size = params['batch'] * params['seq']
    assert len(sequence) == batch_size, 'Sequence must be a batch'

    tensor = torch.zeros(len(sequence), params['alphabet_size']).type(params['type'])

    for i, c in enumerate(sequence):
        tensor[i][char_to_ix[c]] = 1

    return tensor.view(params['batch'], params['seq'], params['alphabet_size'])

def train(dataloaders, char_to_ix, model, optimizer, criterion, params):
    """Trains the neural net

        dataloaders (dict): holds PyTorch Dataloaders for training and validation
        char_to_ix: (dict): mapping from chars to integers (indexes)
        model (LSTM): the model to be trained
        optimizer (Optimizer): PyTorch optimizer
        criterion: Loss function
        params (dict): holds the program hyperparameters

            model (LSTM): the trained model

    assert len(dataloaders['train']) != 0, 'Not enough data for training'
    assert len(dataloaders['val']) != 0, 'Not enough data for validation'

    since = time.time()

    best_loss = float('inf')
    epoch = 1
    bad_epochs = 0

    dataset_size = {x: len(dataloaders[x]) * dataloaders[x].batch_size
                    for x in ['train', 'val']}

    while True:

        print('Epoch {}'.format(epoch))
        print('=' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:

            if phase == 'train':
                model.train(True)  # Training Mode
                model.train(False)  # Evaluate mode

            running_loss = 0
            running_corrects = 0

            # Iterate over the data
            for batch in dataloaders[phase]:

                model.hidden = model.init_hidden(params['type'])

                inputs = Variable(sequence_to_one_hot(batch, char_to_ix, params))

                out = model(inputs[:, :-1, :])
                _, preds = out.max(1)

                # Get the targets (indexes where the one-hot vector is 1)
                _, target = inputs[:, 1:, :].topk(1)

                loss = criterion(out, target.view(-1))

                if phase == 'train':

                running_loss +=[0]
                running_corrects += torch.sum(preds == target).data[0]

            # Compute mean epoch loss and accuracy
            epoch_loss = running_loss / len(dataloaders[phase])
            epoch_acc = running_corrects / dataset_size[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'val':

                # Save best weights
                if epoch_loss < best_loss:
                    bad_epochs = 0
                    best_loss = epoch_loss
          , 'rnn.pkl')

                    bad_epochs += 1

        # Hara-kiri
        if bad_epochs == 10:

        epoch += 1

    time_elapsed = time.time() - since

    print('\nTraining completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    print('Best Loss: {:.4f}\n\n'.format(best_loss))

    # Load best wts

    return model