LSTM time series forecasting values of next n rows returns same value n times

I am trying to create an LSTM model to predict a specific value (first column of the dataset, idx 0) for the next 10 rows. The input sequence contains 10 rows of the time series and 19 features

for i in range(sequence_length, len(data) - 10):
        sequences.append(data.iloc[i-sequence_length:i, 2:2+input_size].values)
        labels.append(data.iloc[i + 1: i + 11, 0])

Sample data:






The loss decreases greatly (managed to get it down to 3.1…e-8) yet the predictions for one sequence are always the same numbers.

For example the labels for a sequence could be [1.084,1.0845,1.084,1.08395,1.0839,1.0838,1.0839,1.084,1.0845,1.084]

And the preds that I get back

At the moment I use batch size of 32, so I get roughly the following:


I do not understand why the predictions do not follow the momentum of the value that they meant to predict. Obviously, getting the same value when trying to predict the next n rows is not useful even if the loss is decreasing…

import torch
import pandas as pd
import torch.nn as nn
import numpy as np
from import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CustomLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout, num_layers):
        super(CustomLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.relu = nn.ReLU()  # ReLU activation layer = nn.BatchNorm1d(hidden_size * 2)  # Batch normalization layer
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).double().to(x.device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).double().to(x.device)

        x = torch.nn.functional.normalize(x)
        out, _ = self.lstm(x, (h0, c0))
        out = self.relu(out[:, -1, :])  # Apply ReLU activation
        out =  # Apply batch normalization
        out = self.dropout(out)  # Apply dropout
        out = self.fc(out)

        return out

input_size = 19  # Number of input features

# Loss calculation for regression model
criterion = nn.MSELoss()

data = pd.read_csv('chapter6/a_without_normalization.csv')

# Split the dataset into train and test sets
train_size = int(0.9 * len(data)) 
test_size = len(data) - train_size
train_dataset, test_dataset = data[:train_size], data[train_size:]

def create_sequences(data, sequence_length):
    sequences = []
    labels = []
    for i in range(sequence_length, len(data) - 10):
        sequences.append(data.iloc[i-sequence_length:i, 2:2+input_size].values)
        labels.append(data.iloc[i + 1: i + 11, 0])
    return np.array(sequences), np.array(labels)

sequence_length = 10
train_sequences, train_labels = create_sequences(train_dataset, sequence_length)
test_sequences, test_labels = create_sequences(test_dataset, sequence_length)

# Convert to PyTorch tensors
train_sequences = torch.from_numpy(train_sequences)
train_labels = torch.from_numpy(train_labels)
test_sequences = torch.from_numpy(test_sequences)
test_labels = torch.from_numpy(test_labels)

# Create a TensorDataset from sequences and labels
train_dataset = TensorDataset(train_sequences, train_labels)
test_dataset = TensorDataset(test_sequences, test_labels)

batch_size = 32
dropout = 0.2
hidden_size = 64
weight_decay = 0.001
lstm_layers = 2
lr = 0.001

output_size = 10  # Number of output features
num_epochs = 101
model_eval_every = 2
print_loss_every = 1
save_model_every = 2500

# Create a DataLoader with the current batch size
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

train_dataloader_len = len(train_dataloader)

# Instantiate the model
model = CustomLSTM(input_size, hidden_size, output_size, dropout, lstm_layers).double().to(device)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Define the scheduler
scheduler = StepLR(optimizer, step_size=30, gamma=0.6)

print(f'Training with weight_decay {weight_decay}')

for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_dataloader:
        # Unpack the batch
        batch_sequences, batch_labels = batch[0].to(device), batch[1].to(device)

        # Pass the batch through the model
        output = model(batch_sequences).squeeze()

        # Compute the loss
        loss = criterion(output, batch_labels)
        total_loss += loss.item()

        # Backpropagate the loss

    # Update the learning rate

Could this be because of the difference between the values are so small? Is there a way to accurately predict such data or do I need to normalize the data in some special way?