Lstm losss does not decrease after 3rd epoch

import torch
import torch.nn as nn
import torchvision
import torch.optim as opt
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from TmyDataSet import LMDBDataSet
import lmdb
from tqdm import tqdm
from time import sleep

support_num = 21
start_num = 40
train_len = 5
pre_len = 4

class RNN(nn.Module):
def init(self, train_len, n_hidden, pre_len=4): # hidden size
super(RNN, self).init()
self.n_hidden = n_hidden
self.lstm1 = nn.LSTM(5, self.n_hidden, 1)
self.linear2 = nn.Linear(self.n_hidden, pre_len)
self.pre_len = pre_len
self.train_len = train_len

def forward(self, x_train):
    h, c = self.lstm1(x_train)
    out = h.view(-1, self.n_hidden)
    output = self.linear2(out)
    return output

class LMDBDataSet(Dataset):
def init(self, sequence_length, start_num, support_num, pre_step, max_pressure=40, min_time=34992, max_time=39052):
self.db_path = ‘/home/aimen/PycharmProjects/HydraulicSupport_pressure/’
self.startSupportNum = start_num
self.supportNum = support_num
self.train_length = sequence_length
self.pre_step = pre_step
self.max_pressure = max_pressure
self.min_time = min_time
self.max_time = max_time
self.length = int(self.max_time) - int(self.min_time) - self.pre_step - self.train_length - 1

def __getitem__(self, index):
    data = []
    label = []

    if (index + self.pre_step + self.train_length) < self.length:
        for id in range(self.startSupportNum, self.startSupportNum + self.supportNum):  ## append the data for support id
            env = lmdb.open(self.db_path + "support_id_lmdb"+str(id))
            txn = env.begin(write=False)
            temp = []
            for sample_time in range(index + int(self.min_time),
                                     index + int(self.min_time) + int(self.train_length)): ## append the data for time
                data_value = txn.get(str(sample_time).encode())
                temp.append(np.float32(float(data_value) / self.max_pressure))
            data.append(temp)

            temp = []
            for label_time in range(index + int(self.min_time) + int(self.train_length),
                                    index + int(self.min_time) + int(self.train_length) + int(self.pre_step)):
                label_value = txn.get(str(label_time).encode())
                temp.append(np.float32(float(label_value) / self.max_pressure))
            label.append(temp)

    data = np.array(data)
    label = np.array(label)
    return data, label

def __len__(self):
    return 4000

dataset = LMDBDataSet(sequence_length=train_len, start_num=start_num, support_num=support_num, pre_step=pre_len)

dataloader_train = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1)
dataloader_test = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1)

model = RNN(train_len=5, n_hidden=16)
optimizer = opt.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.999)
LossFunc = nn.MSELoss()

state = {‘model’: model.state_dict(), ‘optimizer’: optimizer.state_dict()}

model.train()
epoch_num = 100
for epoch in range(epoch_num):
loss = 0

train_bar = tqdm(dataloader_train)
for i, data in enumerate(train_bar):
    x_train, y_train = data
    sleep(0.001)
    y_pre = model(x_train)

    y_pre = y_pre.reshape(-1, y_pre.shape[0])
    Y_train = y_train.reshape(y_train.shape[-1], -1)

    loss = LossFunc(Y_train, y_pre)

    y_pre = y_pre.detach().numpy()
    Y_train = Y_train.detach().numpy()

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()
    scheduler.step()
    loss += loss
    train_bar.desc = "train epoch[{}/{}] " \
                         "loss:{:.3f}".format(epoch + 1, epoch_num, loss)

print("------", loss / len(dataloader_train))

Double check the shapes of the output activation in:

self.lstm1 = nn.LSTM(5, self.n_hidden, 1)
...

def forward(self, x_train):
    h, c = self.lstm1(x_train)
    out = h.view(-1, self.n_hidden)

as nn.LSTM uses batch_size=False by default. The output will should thus have the shape [seq_len, batch_size, output_size]. Flattening the h tensor might thus be wrong.