Why am I getting different results with pytorch?

I’m new to Pytorch and I’m following this tutorial on regression with pytorch but I’m getting different results. I’m stuck on the first example of the tutorial. what I want to do is a non linear regression, the data looks like a vertex curve and I’m trying to build a model to fit the data, the wierd thing is if I use the trainset to train my model everything works fine but when I split the data into traindata and testdata then I get a wierd behaviour when I plot the results at the end. I don’t why is this happening. here is my code:

import keras
from keras import backend as K
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import r2_score
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
!pip install torch_lr_finder
from torch_lr_finder import LRFinder

%matplotlib inline

# create random dataset for regression

# dataset = make_regression(n_samples=1000, n_features=1, noise=3, random_state=0)
# X, y = dataset
X = np.linspace(-1, 1, 100).reshape(-1, 1)
y = np.power(X, 2) + 0.2* np.random.rand(*X.shape)
X.shape, y.shape

# implementation using pytorch

# transform data to Tensors and split testset and validation set
y = y.reshape(-1, 1)
print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)
X_train_torch = torch.from_numpy(X_train.astype(np.float32))
y_train_torch = torch.from_numpy(y_train.astype(np.float32))
X_test_torch = torch.from_numpy(X_test.astype(np.float32))
y_test_torch = torch.from_numpy(y_test.astype(np.float32))

# normalize data
X_train_torch = (X_train_torch - torch.mean(X_train_torch)) / torch.std(X_train_torch)
X_test_torch = (X_test_torch - torch.mean(X_test_torch)) / torch.std(X_test_torch)

print(X_train_torch.shape, y_train_torch.shape)

X = torch.from_numpy(X.astype(np.float32))
y = torch.from_numpy(y.astype(np.float32))

# create a TensorDataset from the actual dataset
coarse_trainset = TensorDataset(X_train_torch, y_train_torch)
coarse_testset = TensorDataset(X_test_torch, y_test_torch)
trainset = TensorDataset(X, y)  # if I use this trainset and pass the loader to the fit function everything works fine

# create the loader of the dataset
coarse_train_loader = DataLoader(coarse_trainset, batch_size=100, shuffle=True) 
coarse_test_loader = DataLoader(coarse_testset, batch_size=100, shuffle=False)
loader = DataLoader(trainset, batch_size=len(y), shuffle=True)

# definition of the Coarse Model
class CoarseModel(nn.Module):
    """main neural network to learn input/position relationship"""
    def __init__(self, n_features, n_hidden, n_out):
        self.fc1 = nn.Linear(n_features, n_hidden)
        self.fc2 = nn.Linear(n_hidden, n_out)
    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = self.fc2(out)
        return out

coarse_model = CoarseModel(X.shape[1], 10, 1)

# training function => will perform a single training step given a batch
def training_func(model, criterion, optimizer):
    def train_step(sample, target):
        out = model(sample)
        loss = criterion(out, target)
        return loss.item()
    return train_step

# fit function will train the given Model over defined epochs 
def fit(epochs, model, lr, train_loader, test_loader):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    lr_finder = LRFinder(model, optimizer, criterion)
    lr_finder.range_test(train_loader, end_lr=10, num_iter=500, step_mode="exp")
    lr_finder.range_test(train_loader, val_loader=test_loader, end_lr=10, num_iter=500, step_mode="exp")
    train_losses, test_losses, accuracy_list = ([] for _ in range(3))
    train = training_func(model=model, criterion=criterion, optimizer=optimizer)
    for e in range(epochs):
        for x, y in train_loader:
            loss_val = train(x, y)
            with torch.no_grad():
                for x_test, y_test in test_loader:
                    pred = model(x_test)
                    loss = criterion(pred, y_test)
                    accuracy = r2_score(y_test.numpy(), pred.numpy())
        print(f"Epoch: {e} => average training loss= {sum(train_losses)/ len(train_losses)} average test losses= {sum(test_losses)/ len(test_losses)} and accuracy={sum(accuracy_list)/ len(accuracy_list)}")
    return train_losses, test_losses, accuracy_list

# plot the results of the training and testing loss values over the iterations
def plot_results(train_losses, test_losses, accs):
    plt.plot(train_losses, color="b")
    plt.title("train losses over time")
    plt.plot(test_losses, color= "b")
    plt.title("test losses over time")
    plt.plot(accs, color= "b")
    plt.title("accuracy over time")

# train the Coarse Model
train_losses, test_losses, accs = fit(epochs=200, 
                                      test_loader=coarse_test_loader)   # here it is interesting if I pass the loader to the argument train_loader and test_loader then everything works as expected but leaving it like this result to a wierd plot at the end

plot_results(train_losses, test_losses, accs)
print("final train loss value = ", train_losses[-1])
print("final test loss value = ", test_losses[-1])
print("final accuracy of the model => ", accs[-1])

with torch.no_grad():
    criterion = nn.MSELoss()
    coarse_preds = coarse_model(X_train_torch)
    coarse_loss = criterion(coarse_preds, y_train_torch)

# if you plot this you ll get what I mean but if I dont split the data and make a regression on the whole data X and y then everything works fine. only when I split the data and let the model learn then I get a wierd graph like this here. I dont know why !!
plt.scatter(X_train_torch.detach().numpy(), y_train_torch.detach().numpy(), c="r", marker="o")
plt.plot(X_train_torch.detach().numpy(), coarse_preds.detach().numpy(), color="g")

I hope someone can help me with this. pytorch is confusing me since it is mandatory to use that loader structure to loop through the data if I want to use mini batches, but I can’t see what I’m doing wrong here

I assume you are concerned about the chaotic lines in the last plot.
Note that train_test_split shuffles the data, so you could sort the tensor before plotting:

X_train_torch_sorted, idx = torch.sort(X_train_torch, 0)
plt.plot(X_train_torch_sorted.detach().numpy(), coarse_preds[idx[:, 0]].detach().numpy(), color="g")
1 Like

thanks @ptrblck, you are right I totally forgot about that!