I am trying to create an LSTM model to predict a specific value (first column of the dataset, idx 0) for the next 10 rows. The input sequence contains 10 rows of the time series and 19 features
for i in range(sequence_length, len(data) - 10):
sequences.append(data.iloc[i-sequence_length:i, 2:2+input_size].values)
labels.append(data.iloc[i + 1: i + 11, 0])
Sample data:
c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21
1.084,1.08405,1.0841,1.08405,1.0841,1.084,11240,6.249999999985434e-05,-1.0164458235761842e-05,-5.1788748878102555e-05,1.0840285714285716,1.0840928571428572,1.0840280952380952,1.08405,-0.000937629492890638,0.8237791754445127,-0.009223815892633767,49.223395431868134,-3.13680151375703,0.010743580701520136,1000.2306464528247
1.084,1.08405,1.08405,1.08405,1.0841,1.08405,14158,-2.4999999999941735e-05,-9.32997172098382e-06,-6.046625792230974e-05,1.0840285714285716,1.0840857142857143,1.0840309523809522,1.084046103896104,-0.0008606520795521739,3.185291329162407,-0.009223815892633767,49.223395431868134,-2.9477598235694686,0.009208783458445832,1000.2306464528247
1.0839,1.08395,1.08405,1.08395,1.08405,1.08385,19095,-0.00015749999999981057,-1.6547055257998267e-05,-7.543797446324434e-05,1.0840142857142856,1.0840690476190478,1.0840204761904761,1.0840337662337662,-0.0015264100999568611,8.156945531675506,-0.009224666758912318,41.76004501048701,-4.958497925954123,-0.26489247132130206,1000.2306464528247
1.08395,1.084,1.08395,1.084,1.084,1.08385,12756,-0.0001474999999999671,-1.8024291017937344e-05,-9.06405060916429e-05,1.0840035714285714,1.0840547619047618,1.0840185714285715,1.084027489177489,-0.0016626858514864735,7.660743847017261,0.009225943352706798,46.15600965239905,-5.393125751532237,-0.13593640398949522,1000.2767846809004
The loss decreases greatly (managed to get it down to 3.1…e-8) yet the predictions for one sequence are always the same numbers.
For example the labels for a sequence could be [1.084,1.0845,1.084,1.08395,1.0839,1.0838,1.0839,1.084,1.0845,1.084]
And the preds that I get back
[1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395,1.08395]
At the moment I use batch size of 32, so I get roughly the following:
[
[1.08395,1.08395,..]
[1.0841,1.0841,..]
..
]
I do not understand why the predictions do not follow the momentum of the value that they meant to predict. Obviously, getting the same value when trying to predict the next n rows is not useful even if the loss is decreasing…
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class CustomLSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size, dropout, num_layers):
super(CustomLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
self.relu = nn.ReLU() # ReLU activation layer
self.bn = nn.BatchNorm1d(hidden_size * 2) # Batch normalization layer
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(hidden_size * 2, output_size)
def forward(self, x):
h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).double().to(x.device)
c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).double().to(x.device)
x = torch.nn.functional.normalize(x)
out, _ = self.lstm(x, (h0, c0))
out = self.relu(out[:, -1, :]) # Apply ReLU activation
out = self.bn(out) # Apply batch normalization
out = self.dropout(out) # Apply dropout
out = self.fc(out)
return out
input_size = 19 # Number of input features
# Loss calculation for regression model
criterion = nn.MSELoss()
data = pd.read_csv('chapter6/a_without_normalization.csv')
# Split the dataset into train and test sets
train_size = int(0.9 * len(data))
test_size = len(data) - train_size
train_dataset, test_dataset = data[:train_size], data[train_size:]
def create_sequences(data, sequence_length):
sequences = []
labels = []
for i in range(sequence_length, len(data) - 10):
sequences.append(data.iloc[i-sequence_length:i, 2:2+input_size].values)
labels.append(data.iloc[i + 1: i + 11, 0])
return np.array(sequences), np.array(labels)
sequence_length = 10
train_sequences, train_labels = create_sequences(train_dataset, sequence_length)
test_sequences, test_labels = create_sequences(test_dataset, sequence_length)
# Convert to PyTorch tensors
train_sequences = torch.from_numpy(train_sequences)
train_labels = torch.from_numpy(train_labels)
test_sequences = torch.from_numpy(test_sequences)
test_labels = torch.from_numpy(test_labels)
# Create a TensorDataset from sequences and labels
train_dataset = TensorDataset(train_sequences, train_labels)
test_dataset = TensorDataset(test_sequences, test_labels)
batch_size = 32
dropout = 0.2
hidden_size = 64
weight_decay = 0.001
lstm_layers = 2
lr = 0.001
output_size = 10 # Number of output features
num_epochs = 101
model_eval_every = 2
print_loss_every = 1
save_model_every = 2500
# Create a DataLoader with the current batch size
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
train_dataloader_len = len(train_dataloader)
# Instantiate the model
model = CustomLSTM(input_size, hidden_size, output_size, dropout, lstm_layers).double().to(device)
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# Define the scheduler
scheduler = StepLR(optimizer, step_size=30, gamma=0.6)
print(f'Training with weight_decay {weight_decay}')
for epoch in range(num_epochs):
total_loss = 0
for batch in train_dataloader:
# Unpack the batch
batch_sequences, batch_labels = batch[0].to(device), batch[1].to(device)
# Pass the batch through the model
output = model(batch_sequences).squeeze()
# Compute the loss
loss = criterion(output, batch_labels)
total_loss += loss.item()
# Backpropagate the loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Update the learning rate
scheduler.step()
Could this be because of the difference between the values are so small? Is there a way to accurately predict such data or do I need to normalize the data in some special way?