Predict the output of a NARMA10 system with TDNN

The training loss and the predicted training are good, but then the validation loss is wrong. if i repeat the process with the whole training and the test set, the predicted model is a line while the loss is low, how can i resolve this problem?

Here’s my code:

from matplotlib import pyplot as plt
import os
import numpy as np
import random
import torch
import torch.nn as nn
import time
from torch.utils.data import DataLoader
import torch.utils.data as data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
narma = np.loadtxt("NARMA10.csv", delimiter = ",")
input_data = narma[0]
target_data = narma[1]
# define the neural network
class TD_Neural_Net(nn.Module):
    def __init__(self, inputsize, hiddensize, outputsize):
        super().__init__()
        self.fc1 = nn.Linear(inputsize, hiddensize)
        self.fc2 = nn.Linear(hiddensize, outputsize)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        #x = self.fc1(x)
        out = (self.fc2(x))
        return out

class CustomDataset(data.Dataset):
    def __init__(self, inputs, outputs):
        self.inputs = inputs
        self.outputs = outputs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        x = torch.tensor(self.inputs[idx], dtype=torch.float32)
        y = torch.tensor(self.outputs[idx], dtype=torch.float32)
        return x, y

# define loss as function of MSE
loss_fn = nn.MSELoss()

def train_val(network, train_loader, val_loader):
    epochs = 20
    epoch = 1
    train_loss_matrix = []
    val_loss_matrix = []
    start_time = time.time()
    y_pred_training = []
    y_pred_val = []
    #optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    optimizer = torch.optim.Adam(network.parameters(), lr = 0.001)
    
    train_len = len(train_loader.dataset)
    val_len = len(val_loader.dataset)
    while epoch < epochs:
        train_loss, val_loss = 0.0, 0.0
        epoch_train_preds = []
    # training 
        network.train()
        for x_batch, y_batch in train_loader:
            #x, y = input_training[i], output_training[i] # get input output pairs
            #x, y = torch.tensor(x).to(torch.float32), torch.tensor(y).to(torch.float32)  # transform them to torch tensors
            #print(x_batch.shape)
            #print(y_batch.shape)
            optimizer.zero_grad()
            y_pred = network(x_batch)
            #print(y_pred.shape)
            #print(y)
            # Apply transformation to target tensor y
            #y_batch = y_batch.view(-1, 1)
            #print(y_batch.shape)
            loss = loss_fn(y_pred, y_batch)
            #print(y_pred, y, loss)
            loss.backward()
            optimizer.step()
            
            train_loss = train_loss + loss.item() # cumulate loss for each pair of training
            #print(train_loss)
            if epoch == epochs - 1:
                epoch_train_preds.append(y_pred.squeeze().detach().numpy())
        
        
        
        train_loss = train_loss / train_len
        #print(train_loss)
        train_loss_matrix.append(train_loss)
        
    # validation
        epoch_val_preds = []
        network.eval() # inform pytorch there's no need to compute gradients
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                #x, y = input_val[j], output_val[j] #get input output pairs
                #x, y = torch.tensor(x).to(torch.float32), torch.tensor(y).to(torch.float32)  # transform them to torch tensors
                # x, y = x.to(device), y.to(device) # send data to cpu

                y_pred = network(x_batch)
                # Apply transformation to target tensor y
                y_batch = y_batch.view(-1, 1)

                loss = loss_fn(y_pred, y_batch)

                val_loss = val_loss + loss.item()  # cumulate loss for each pair of validation
                if epoch == epochs -1:
                    epoch_val_preds.append(y_pred.squeeze().detach().numpy())
                
        val_loss = val_loss / val_len
        val_loss_matrix.append(val_loss)
        print('epoch {}, train loss= {}, val loss = {}'.format(epoch,train_loss, val_loss))
    
       
        #if epoch == epochs-1:
        #    y_pred_training = np.concatenate(epoch_train_preds).ravel()
        #    y_pred_val = np.concatenate(epoch_val_preds).ravel()
            
        epoch = epoch + 1
    end_time = time.time()
    total_time = end_time - start_time
    
    
    
    print("Total computational time:", total_time, "seconds") 
    
    return train_loss_matrix, epoch_train_preds, val_loss_matrix, epoch_val_preds


# target splitting 
whole_target_training = target_data[:5000]
target_test = target_data[5000:]
target_train = whole_target_training[10:4000]  # i don't consider first 10 values of target since they're null
print(target_train)
target_val = whole_target_training[4000:]
len(target_train)

# hyperparameters and network setting
tau = 10
hidden_size = 32
my_network = TD_Neural_Net(inputsize = tau , hiddensize = hidden_size, outputsize=1)

# windowing and input splitting
input_windowed = windowing(input_data, tau)
whole_input_training = input_windowed[:5000]
input_test = input_windowed[5000:]
input_train = whole_input_training[:3990]   # consider until 3990 since i don't consider first 10 values of target
input_val = whole_input_training[4000:]
len(input_train)


# inputs and outputs are input and output training/validation data
train_dataset = CustomDataset(input_train, target_train)
val_dataset = CustomDataset(input_val, target_val)


train_loader = DataLoader(train_dataset, batch_size=1, shuffle = False)  
val_loader = DataLoader(val_dataset, batch_size=1, shuffle = False)

# call the function for the training and the validation sets
[train_loss, y_pred_train, val_loss, y_pred_val] = train_val(my_network, train_loader, val_loader)

plt.figure()
plt.plot(val_loss, label = 'validation loss')
plt.plot(train_loss, label = 'training loss')
plt.xlabel('epochs')
plt.legend()
plt.title('training and validation losses')

plt.figure()
plt.plot(y_pred_train[:100], label='predicted training')
plt.plot(target_train[:100], label = 'target training')
plt.legend()
plt.title('predicted vs target training')

plt.figure()
plt.plot(y_pred_val[:100], label='predicted validation')
plt.plot(target_val[:100], label = 'target validation')
plt.legend()
plt.title('predicted vs target validation')

# inputs and outputs are input and output training/test data
training_dataset = CustomDataset(whole_input_training, whole_target_training)
test_dataset = CustomDataset(input_test, target_test)


training_loader = DataLoader(training_dataset, batch_size=1, shuffle = False)  
test_loader = DataLoader(test_dataset, batch_size=1, shuffle = False)

# call the function for the training and test sets
[training_loss, y_pred_training, test_loss, y_pred_test] = train_val(my_network, training_loader, test_loader)

plt.figure()
plt.plot(training_loss, label = 'training loss')
plt.plot(test_loss, label = 'test loss')
plt.xlabel('epochs')
plt.legend()
plt.title('training and test losses')

plt.figure()
plt.plot(y_pred_training[:100], label='whole predicted training')
plt.plot(whole_target_training[:100], label = 'whole target training')
plt.legend()
plt.title('predicted vs target training (whole)')

plt.figure()
plt.plot(y_pred_test[:100], label='predicted test')
plt.plot(target_test[:100], label = 'target test')
plt.legend()
plt.title('predicted vs target test')