LSTM time series forecasting values of next n rows returns same value n times

I am trying to create an LSTM model to predict a specific value (first column of the dataset, idx 0) for the next 10 rows. The input sequence contains 10 rows of the time series and 19 features

for i in range(sequence_length, len(data) - 10):
        sequences.append(data.iloc[i-sequence_length:i, 2:2+input_size].values)
        labels.append(data.iloc[i + 1: i + 11, 0])

Sample data:

c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21

1.084,1.08405,1.0841,1.08405,1.0841,1.084,11240,6.249999999985434e-05,-1.0164458235761842e-05,-5.1788748878102555e-05,1.0840285714285716,1.0840928571428572,1.0840280952380952,1.08405,-0.000937629492890638,0.8237791754445127,-0.009223815892633767,49.223395431868134,-3.13680151375703,0.010743580701520136,1000.2306464528247

1.084,1.08405,1.08405,1.08405,1.0841,1.08405,14158,-2.4999999999941735e-05,-9.32997172098382e-06,-6.046625792230974e-05,1.0840285714285716,1.0840857142857143,1.0840309523809522,1.084046103896104,-0.0008606520795521739,3.185291329162407,-0.009223815892633767,49.223395431868134,-2.9477598235694686,0.009208783458445832,1000.2306464528247

1.0839,1.08395,1.08405,1.08395,1.08405,1.08385,19095,-0.00015749999999981057,-1.6547055257998267e-05,-7.543797446324434e-05,1.0840142857142856,1.0840690476190478,1.0840204761904761,1.0840337662337662,-0.0015264100999568611,8.156945531675506,-0.009224666758912318,41.76004501048701,-4.958497925954123,-0.26489247132130206,1000.2306464528247

1.08395,1.084,1.08395,1.084,1.084,1.08385,12756,-0.0001474999999999671,-1.8024291017937344e-05,-9.06405060916429e-05,1.0840035714285714,1.0840547619047618,1.0840185714285715,1.084027489177489,-0.0016626858514864735,7.660743847017261,0.009225943352706798,46.15600965239905,-5.393125751532237,-0.13593640398949522,1000.2767846809004

The loss decreases greatly (managed to get it down to 3.1…e-8) yet the predictions for one sequence are always the same numbers.

For example the labels for a sequence could be [1.084,1.0845,1.084,1.08395,1.0839,1.0838,1.0839,1.084,1.0845,1.084]

And the preds that I get back
[1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395]

At the moment I use batch size of 32, so I get roughly the following:

[
 [1.08395,1.08395,..]
 [1.0841,1.0841,..]
..
]

I do not understand why the predictions do not follow the momentum of the value that they meant to predict. Obviously, getting the same value when trying to predict the next n rows is not useful even if the loss is decreasing…

import torch
import pandas as pd
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CustomLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout, num_layers):
        super(CustomLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.relu = nn.ReLU()  # ReLU activation layer
        self.bn = nn.BatchNorm1d(hidden_size * 2)  # Batch normalization layer
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).double().to(x.device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).double().to(x.device)

        x = torch.nn.functional.normalize(x)
        out, _ = self.lstm(x, (h0, c0))
        out = self.relu(out[:, -1, :])  # Apply ReLU activation
        out = self.bn(out)  # Apply batch normalization
        out = self.dropout(out)  # Apply dropout
        out = self.fc(out)

        return out

input_size = 19  # Number of input features

# Loss calculation for regression model
criterion = nn.MSELoss()

data = pd.read_csv('chapter6/a_without_normalization.csv')

# Split the dataset into train and test sets
train_size = int(0.9 * len(data)) 
test_size = len(data) - train_size
train_dataset, test_dataset = data[:train_size], data[train_size:]

def create_sequences(data, sequence_length):
    sequences = []
    labels = []
    for i in range(sequence_length, len(data) - 10):
        sequences.append(data.iloc[i-sequence_length:i, 2:2+input_size].values)
        labels.append(data.iloc[i + 1: i + 11, 0])
    return np.array(sequences), np.array(labels)

sequence_length = 10
train_sequences, train_labels = create_sequences(train_dataset, sequence_length)
test_sequences, test_labels = create_sequences(test_dataset, sequence_length)

# Convert to PyTorch tensors
train_sequences = torch.from_numpy(train_sequences)
train_labels = torch.from_numpy(train_labels)
test_sequences = torch.from_numpy(test_sequences)
test_labels = torch.from_numpy(test_labels)

# Create a TensorDataset from sequences and labels
train_dataset = TensorDataset(train_sequences, train_labels)
test_dataset = TensorDataset(test_sequences, test_labels)

batch_size = 32
dropout = 0.2
hidden_size = 64
weight_decay = 0.001
lstm_layers = 2
lr = 0.001

output_size = 10  # Number of output features
num_epochs = 101
model_eval_every = 2
print_loss_every = 1
save_model_every = 2500

# Create a DataLoader with the current batch size
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

train_dataloader_len = len(train_dataloader)

# Instantiate the model
model = CustomLSTM(input_size, hidden_size, output_size, dropout, lstm_layers).double().to(device)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Define the scheduler
scheduler = StepLR(optimizer, step_size=30, gamma=0.6)

print(f'Training with weight_decay {weight_decay}')

for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_dataloader:
        # Unpack the batch
        batch_sequences, batch_labels = batch[0].to(device), batch[1].to(device)

        # Pass the batch through the model
        output = model(batch_sequences).squeeze()

        # Compute the loss
        loss = criterion(output, batch_labels)
        total_loss += loss.item()

        # Backpropagate the loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Update the learning rate
    scheduler.step()

Could this be because of the difference between the values are so small? Is there a way to accurately predict such data or do I need to normalize the data in some special way?