Can a NN learn a simple interpolation?

Yoni_Keren · August 1, 2018, 6:25pm

Hi guys!

I’ve tried to make a 2 layers NN learn a simple linear interpolation for a discrete function, I’ve tried lots of different learning rates as well as different activation functions, and it seems like nothing is being learned!

I’ve literally spent the last 6 hours trying to debug the following code, but it seems like there’s no bug! So I’m wondering,is there an explanation to this?

from torch.utils.data import Dataset
import os
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import random

LOW_X=255

MID_X=40000

HIGH_X=200000


LOW_Y=torch.Tensor([0,0,1])
MID_Y=torch.Tensor([0.2,0.5,0.3])
HIGH_Y=torch.Tensor([1,0,0])

BATCH_SIZE=4

def x_to_tensor(x):
    if x<=MID_X:
        return LOW_Y+(x-LOW_X)*(MID_Y-LOW_Y)/(MID_X-LOW_X)
    if x<=HIGH_X:
        return MID_Y+(x-MID_X)*(HIGH_Y-MID_Y)/(HIGH_X-MID_X)
    return HIGH_Y


class XYDataset(Dataset):
    LENGTH=10000
    def __len__(self):
        return self.LENGTH
                

    def __getitem__(self, idx):
        x=random.randint(LOW_X,HIGH_X)
        y=x_to_tensor(x)
        return x,y




class Interpolate(nn.Module):

    def __init__(self, planes,hidden_size=10):
        super(Interpolate, self).__init__()
        self.hidden_size=hidden_size
        self.x_to_hidden = nn.Linear(1, hidden_size)
        self.hidden_to_out = nn.Linear(hidden_size,planes)
        self.activation = nn.Tanh() #I have tried Sigmoid and Relu activations as well
        self.softmax=torch.nn.Softmax(dim=1)

    def forward(self, x):
        out = self.x_to_hidden(x)
        out = self.activation(out)
        out = self.hidden_to_out(out)
        out = self.softmax(out)
        return out


dataset=XYDataset()

trainloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=4)

criterion= nn.MSELoss()


def train_net(net,epochs=10,lr=5.137871216190041e-05,l2_regularization=2.181622809797563e-12):
        optimizer= optim.Adam(net.parameters(),lr=lr,weight_decay=l2_regularization)
        net.train(True)
        running_loss=0.0
        for epoch in range(epochs):
            
            for i,data in enumerate(trainloader):
                inputs,targets=data
                inputs,targets=torch.FloatTensor(inputs.float()).view(-1,1),torch.FloatTensor(targets.float())
                optimizer.zero_grad()
                outputs=net(inputs)
                loss=criterion(outputs,targets)
                loss.backward()
                optimizer.step()
                running_loss+=loss.item()              
                if (len(trainloader)*epoch+i)%200==199:
                    running_loss=running_loss/(200*BATCH_SIZE)
                    print('[%d,%5d] loss: %.6f ' % (epoch+1,i+1,running_loss))
                    running_loss=0.0



for i in range(-11,3):
    net=Interpolate(3)
    train_net(net,lr=10**i,epochs=1)
    print('for learning rate {} net output on low x is {}'.format(i,net(torch.Tensor([255]).view(-1,1))))

Yoni_Keren · August 1, 2018, 9:41pm

Huh Just normalizing the inputs is the right answer

A moderator may feel free to delete this post

InnovArul · August 1, 2018, 11:05pm

just curious. how/what did you normalize?

Yoni_Keren · August 2, 2018, 6:56am

The inputs were integer numbers from 255 to 200000. I’ve normalized the input to be in the range [0,1]