Bug in code for universal approximator

Mayank_Jha · June 17, 2020, 8:14am

Dear all, I am trying to identify a mathematical function (quadratic) using feed forward NNs
The training loss, validation loss go down as desired.
however, the NN does not seem to learn at all!
Problem is , when I plot “test_x” against predicton from trained NN (y_pred),
I get somehting very strange.
I am grateful to all the help I can get here please.

here is the complete code. Thank you again for all the help .

import matplotlib.pyplot as plt
#from IPython.display import display, Markdown, Latex

import numpy as np
import math, random
import pandas as pd
from pandas import DataFrame

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.nn.init as init
import torch.utils.data
import torch.optim 
from torch.optim import lr_scheduler

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score

from scipy.signal import gausspulse

HP = {
    #'NUM_HIDDEN_NODES'    : 10 ,
    'NUM_EXAMPLES'        : 100000 ,
    'TRAIN_SPLIT'         : .8 ,
    'MINI_BATCH_SIZE'     : 1000 ,
    'NUM_EPOCHS'          : 600 ,
    'LEARNING_RATE'       : 1e-5 ,
    'LEARNING_RATE_DECAY' : 500 ,
    'WEIGHT_DECAY'        : 5e-4 ,
    'NUM_MOMENTUM'        : 0.9 ,
    'NUM_PATIENCE'        : 50 ,
    'SEED'                : 2018
}
def to_np(x):
    return x.data.cpu().numpy()

def to_var(x, async=False):
    if torch.cuda.is_available():
        x = x.cuda(async=async)
    return Variable(x)

#weight initizalisation routine
def glorot_weight_zero_bias(model):
    """
    Initalize parameters of all modules
    by initializing weights with glorot  uniform/xavier initialization,
    and setting biases to zero.
    Weights from batch norm layers are set to 1.

    Parameters
    ----------
    model: Module
    """
    for module in model.modules():
        if hasattr(module, 'weight') and not module.weight is None:
            if not ('BatchNorm' in module.__class__.__name__):
                init.xavier_uniform_(module.weight, gain=1)
            else:
                init.constant_(module.weight, 1)
        if hasattr(module, 'bias'):
            if module.bias is not None:
                init.constant_(module.bias, 0)

np.random.seed(HP['SEED'])
torch.manual_seed(HP['SEED'])

#creat the input data 
gg=torch.linspace(-1,1,HP['NUM_EXAMPLES']) 

#then using unsqueeze, column vector can be created
x_data=np.float32(torch.unsqueeze(gg,dim=1).numpy())

#funciton to learn
function_to_learn = lambda x: x**2 + 0.001*np.random.randn(*x.shape)

#output
y_data = function_to_learn(x_data)

#random shuffle data

#creat index of data
idx=np.arange(HP['NUM_EXAMPLES'])

#shuffle the index
np.random.shuffle(idx)

#creat the test, train amf validate index from shuffled set
#using the split function
train_idx,validate_idx, test_idx = np.split(idx,[int(0.6*len(idx)), int(0.8*len(idx))])#train, validate, test

#creating colum vector of x_data and y_data
x_data=np.float32(x_data).reshape(HP['NUM_EXAMPLES'],1)
y_data=np.float32(y_data).reshape(HP['NUM_EXAMPLES'],1)

#creating training data batch and test data batch

#first normalisation
scaler=MinMaxScaler(feature_range=(-1,1))

#creat the y train data and transofrm it using scaler
scaler.fit_transform(y_data[train_idx])

#Y data is normalised
y_data=scaler.fit_transform(y_data)
x_data=scaler.fit_transform(x_data)

#######creat training data (a and y)
#first identify x train, y train sets
train_x,train_y=x_data[train_idx],y_data[train_idx]

# creat training set compising of both x and y
train_set=torch.utils.data.TensorDataset(torch.FloatTensor(train_x),torch.FloatTensor(train_y))
train_loader=torch.utils.data.DataLoader(train_set,batch_size=HP['MINI_BATCH_SIZE'],
                                         shuffle=True,pin_memory=True,num_workers=0)

###### creat validation data sets
valid_x, valid_y=x_data[validate_idx],y_data[validate_idx]
valid_set=torch.utils.data.TensorDataset(torch.FloatTensor(valid_x),torch.FloatTensor(valid_y))
valid_loader=torch.utils.data.DataLoader(valid_set,batch_size=HP['MINI_BATCH_SIZE'],shuffle=True,
                                        pin_memory=True, num_workers=0)


test_x, test_y=x_data[test_idx],y_data[test_idx]
test_set=torch.utils.data.TensorDataset(torch.FloatTensor(test_x),torch.FloatTensor(test_y))
test_loader=torch.utils.data.DataLoader(test_set,batch_size=HP['MINI_BATCH_SIZE'],shuffle=True,
                                        pin_memory=True, num_workers=0)



train_x.shape, valid_x.shape, test_x.shape, train_y.shape, valid_y.shape, test_y.shape

#build neural network
class Network(nn.Module):
    def __init__(self,D_in,D_out):
        super().__init__()
        
        # Inputs to hidden layer linear transformation
        self.lin1 = nn.Linear(D_in, 144)
        self.lin2=nn.Linear(144,72)
        self.lin3=nn.Linear(72,18)
        # Output layer,
        self.output = nn.Linear(18, D_out)
        
        # Define sigmoid activation and softmax output 
        #self.tanh = F.tanh()
        
        
    def forward(self, x):# this is where the data flows in the network, respecting 
                         #sequence of layers in forward method is very important.
        # Pass the input tensor through each of our operations
        
        x = self.lin1(x)
        x = F.tanh(x)
        
        x = self.lin2(x)
        x = F.tanh(x)
        
        x = self.lin3(x)
        x = F.tanh(x)
        
        x = self.output(x)
        y = F.tanh(x)
        
        return y
    

model = Network(train_x.shape[1], train_y.shape[1])

#weight intizalization routine
glorot_weight_zero_bias(model)

criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=HP['LEARNING_RATE']) 
                            #momentum=HP['NUM_MOMENTUM'], 
                            #weight_decay=HP['WEIGHT_DECAY'], 
                            #)
#train
%matplotlib notebook

glorot_weight_zero_bias(model)
model.train()

train_losses = []
valid_losses = []
valid_score = []
epochs=[]

#start = time.time()

#epoch_iter = tqdm(range(1, HP['NUM_EPOCHS'] + 1))
epoch_iter =range(1, HP['NUM_EPOCHS'] + 1)

#for epoch in range(1, HP['NUM_EPOCHS'] + 1):

for epoch in epoch_iter:   
    #epoch_iter.set_description('Epoch')
    
    epochs.append(epoch)
    
    #training over all batch data
    batch_idx, tloss_avg,vloss_avg = 0, 0,0
    for batch_idx, (data, target) in enumerate(train_loader):
        y_pred = model(to_var(data)) # predict y based on x
        loss = criterion(y_pred, to_var(target)) # compute loss

        optimizer.zero_grad() # clear gradients
        loss.backward() # compute gradients
        optimizer.step() # apply gradients

        tloss_avg += loss.item()

    tloss_avg /= batch_idx+1
    train_losses.append(tloss_avg)
        
    print(" Epoch : %d , Train loss: %d " %(epoch,tloss_avg))    
    
    #iteration over all validation data
    batch_idx, loss_avg, score = 0, 0, 0
    for batch_idx, (data, target) in enumerate(valid_loader):
        y_pred = model(to_var(data)) # predict y based on x
        loss = criterion(y_pred, to_var(target)) # compute loss

        optimizer.zero_grad() # clear gradients
        loss.backward() # compute gradients
        optimizer.step() # apply gradients

        vloss_avg += loss.item()
        #%score += r2_score(target.detach.numpy(), y_pred.detach.numpy())

    vloss_avg /= batch_idx+1
    valid_losses.append(vloss_avg)
    
    print(" Epoch : %d , Train loss: %d, Validaiton Loss: %d " %(epoch,tloss_avg,vloss_avg))    
    
        
#visualize the valisation and training loss
%matplotlib inline
plt.figure(figsize=(7,6))
plt.plot(epochs, valid_losses,label='Validation Losses', color='r')
plt.plot(epochs, train_losses,label='Training Losses', color='b')
plt.title("Training and validation Loss")
plt.xlabel("Epochs");
plt.ylabel("Losses");
plt.grid(True)
plt.legend(loc='upper right');



#see the trained outputs
%matplotlib inline
net = Network(train_x.shape[1], train_y.shape[1])
net.eval()

#prepare some input from training data (not test data to be sure, NN has learnt atleast with repect to training)
index=2000
inp=train_x[0:index]
input=torch.from_numpy(inp)
output=net(to_var(input))
out=to_np(output)

#
plt.figure()
plt.scatter(inp.flatten(),out.flatten())
plt.scatter(inp.flatten(),train_y[0:index].flatten())


#the final output is so strange, cannot interpre. please help.

ptrblck · June 17, 2020, 8:36am

Could you post the code directly by wrapping it into three backticks ``` please?
This would make the debugging easier and the code snippets more readable.

Mayank_Jha · June 17, 2020, 9:04am

hi, thank you for the reply. Yes, I did that.
regards,

ptrblck · June 17, 2020, 6:28pm

Thanks for the code.
In your test phase you are recreating a new model and thus lose all training progress:

#see the trained outputs
net = Network(train_x.shape[1], train_y.shape[1])
net.eval()

Reusing your trained model instance yields a prediction which approximates the target curve.

Mayank_Jha · June 17, 2020, 7:23pm

gosh ! Thanks a ton.