The training loss and the predicted training are good, but then the validation loss is wrong. if i repeat the process with the whole training and the test set, the predicted model is a line while the loss is low, how can i resolve this problem?
Here’s my code:
from matplotlib import pyplot as plt
import os
import numpy as np
import random
import torch
import torch.nn as nn
import time
from torch.utils.data import DataLoader
import torch.utils.data as data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
narma = np.loadtxt("NARMA10.csv", delimiter = ",")
input_data = narma[0]
target_data = narma[1]
# define the neural network
class TD_Neural_Net(nn.Module):
def __init__(self, inputsize, hiddensize, outputsize):
super().__init__()
self.fc1 = nn.Linear(inputsize, hiddensize)
self.fc2 = nn.Linear(hiddensize, outputsize)
def forward(self, x):
x = torch.relu(self.fc1(x))
#x = self.fc1(x)
out = (self.fc2(x))
return out
class CustomDataset(data.Dataset):
def __init__(self, inputs, outputs):
self.inputs = inputs
self.outputs = outputs
def __len__(self):
return len(self.inputs)
def __getitem__(self, idx):
x = torch.tensor(self.inputs[idx], dtype=torch.float32)
y = torch.tensor(self.outputs[idx], dtype=torch.float32)
return x, y
# define loss as function of MSE
loss_fn = nn.MSELoss()
def train_val(network, train_loader, val_loader):
epochs = 20
epoch = 1
train_loss_matrix = []
val_loss_matrix = []
start_time = time.time()
y_pred_training = []
y_pred_val = []
#optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
optimizer = torch.optim.Adam(network.parameters(), lr = 0.001)
train_len = len(train_loader.dataset)
val_len = len(val_loader.dataset)
while epoch < epochs:
train_loss, val_loss = 0.0, 0.0
epoch_train_preds = []
# training
network.train()
for x_batch, y_batch in train_loader:
#x, y = input_training[i], output_training[i] # get input output pairs
#x, y = torch.tensor(x).to(torch.float32), torch.tensor(y).to(torch.float32) # transform them to torch tensors
#print(x_batch.shape)
#print(y_batch.shape)
optimizer.zero_grad()
y_pred = network(x_batch)
#print(y_pred.shape)
#print(y)
# Apply transformation to target tensor y
#y_batch = y_batch.view(-1, 1)
#print(y_batch.shape)
loss = loss_fn(y_pred, y_batch)
#print(y_pred, y, loss)
loss.backward()
optimizer.step()
train_loss = train_loss + loss.item() # cumulate loss for each pair of training
#print(train_loss)
if epoch == epochs - 1:
epoch_train_preds.append(y_pred.squeeze().detach().numpy())
train_loss = train_loss / train_len
#print(train_loss)
train_loss_matrix.append(train_loss)
# validation
epoch_val_preds = []
network.eval() # inform pytorch there's no need to compute gradients
with torch.no_grad():
for x_batch, y_batch in val_loader:
#x, y = input_val[j], output_val[j] #get input output pairs
#x, y = torch.tensor(x).to(torch.float32), torch.tensor(y).to(torch.float32) # transform them to torch tensors
# x, y = x.to(device), y.to(device) # send data to cpu
y_pred = network(x_batch)
# Apply transformation to target tensor y
y_batch = y_batch.view(-1, 1)
loss = loss_fn(y_pred, y_batch)
val_loss = val_loss + loss.item() # cumulate loss for each pair of validation
if epoch == epochs -1:
epoch_val_preds.append(y_pred.squeeze().detach().numpy())
val_loss = val_loss / val_len
val_loss_matrix.append(val_loss)
print('epoch {}, train loss= {}, val loss = {}'.format(epoch,train_loss, val_loss))
#if epoch == epochs-1:
# y_pred_training = np.concatenate(epoch_train_preds).ravel()
# y_pred_val = np.concatenate(epoch_val_preds).ravel()
epoch = epoch + 1
end_time = time.time()
total_time = end_time - start_time
print("Total computational time:", total_time, "seconds")
return train_loss_matrix, epoch_train_preds, val_loss_matrix, epoch_val_preds
# target splitting
whole_target_training = target_data[:5000]
target_test = target_data[5000:]
target_train = whole_target_training[10:4000] # i don't consider first 10 values of target since they're null
print(target_train)
target_val = whole_target_training[4000:]
len(target_train)
# hyperparameters and network setting
tau = 10
hidden_size = 32
my_network = TD_Neural_Net(inputsize = tau , hiddensize = hidden_size, outputsize=1)
# windowing and input splitting
input_windowed = windowing(input_data, tau)
whole_input_training = input_windowed[:5000]
input_test = input_windowed[5000:]
input_train = whole_input_training[:3990] # consider until 3990 since i don't consider first 10 values of target
input_val = whole_input_training[4000:]
len(input_train)
# inputs and outputs are input and output training/validation data
train_dataset = CustomDataset(input_train, target_train)
val_dataset = CustomDataset(input_val, target_val)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle = False)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle = False)
# call the function for the training and the validation sets
[train_loss, y_pred_train, val_loss, y_pred_val] = train_val(my_network, train_loader, val_loader)
plt.figure()
plt.plot(val_loss, label = 'validation loss')
plt.plot(train_loss, label = 'training loss')
plt.xlabel('epochs')
plt.legend()
plt.title('training and validation losses')
plt.figure()
plt.plot(y_pred_train[:100], label='predicted training')
plt.plot(target_train[:100], label = 'target training')
plt.legend()
plt.title('predicted vs target training')
plt.figure()
plt.plot(y_pred_val[:100], label='predicted validation')
plt.plot(target_val[:100], label = 'target validation')
plt.legend()
plt.title('predicted vs target validation')
# inputs and outputs are input and output training/test data
training_dataset = CustomDataset(whole_input_training, whole_target_training)
test_dataset = CustomDataset(input_test, target_test)
training_loader = DataLoader(training_dataset, batch_size=1, shuffle = False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle = False)
# call the function for the training and test sets
[training_loss, y_pred_training, test_loss, y_pred_test] = train_val(my_network, training_loader, test_loader)
plt.figure()
plt.plot(training_loss, label = 'training loss')
plt.plot(test_loss, label = 'test loss')
plt.xlabel('epochs')
plt.legend()
plt.title('training and test losses')
plt.figure()
plt.plot(y_pred_training[:100], label='whole predicted training')
plt.plot(whole_target_training[:100], label = 'whole target training')
plt.legend()
plt.title('predicted vs target training (whole)')
plt.figure()
plt.plot(y_pred_test[:100], label='predicted test')
plt.plot(target_test[:100], label = 'target test')
plt.legend()
plt.title('predicted vs target test')