Can a NN learn a simple interpolation?

Hi guys!

I’ve tried to make a 2 layers NN learn a simple linear interpolation for a discrete function, I’ve tried lots of different learning rates as well as different activation functions, and it seems like nothing is being learned!

I’ve literally spent the last 6 hours trying to debug the following code, but it seems like there’s no bug! :stuck_out_tongue: So I’m wondering,is there an explanation to this?

from torch.utils.data import Dataset
import os
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import random

LOW_X=255

MID_X=40000

HIGH_X=200000


LOW_Y=torch.Tensor([0,0,1])
MID_Y=torch.Tensor([0.2,0.5,0.3])
HIGH_Y=torch.Tensor([1,0,0])

BATCH_SIZE=4

def x_to_tensor(x):
    if x<=MID_X:
        return LOW_Y+(x-LOW_X)*(MID_Y-LOW_Y)/(MID_X-LOW_X)
    if x<=HIGH_X:
        return MID_Y+(x-MID_X)*(HIGH_Y-MID_Y)/(HIGH_X-MID_X)
    return HIGH_Y


class XYDataset(Dataset):
    LENGTH=10000
    def __len__(self):
        return self.LENGTH
                

    def __getitem__(self, idx):
        x=random.randint(LOW_X,HIGH_X)
        y=x_to_tensor(x)
        return x,y




class Interpolate(nn.Module):

    def __init__(self, planes,hidden_size=10):
        super(Interpolate, self).__init__()
        self.hidden_size=hidden_size
        self.x_to_hidden = nn.Linear(1, hidden_size)
        self.hidden_to_out = nn.Linear(hidden_size,planes)
        self.activation = nn.Tanh() #I have tried Sigmoid and Relu activations as well
        self.softmax=torch.nn.Softmax(dim=1)

    def forward(self, x):
        out = self.x_to_hidden(x)
        out = self.activation(out)
        out = self.hidden_to_out(out)
        out = self.softmax(out)
        return out


dataset=XYDataset()

trainloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=4)

criterion= nn.MSELoss()


def train_net(net,epochs=10,lr=5.137871216190041e-05,l2_regularization=2.181622809797563e-12):
        optimizer= optim.Adam(net.parameters(),lr=lr,weight_decay=l2_regularization)
        net.train(True)
        running_loss=0.0
        for epoch in range(epochs):
            
            for i,data in enumerate(trainloader):
                inputs,targets=data
                inputs,targets=torch.FloatTensor(inputs.float()).view(-1,1),torch.FloatTensor(targets.float())
                optimizer.zero_grad()
                outputs=net(inputs)
                loss=criterion(outputs,targets)
                loss.backward()
                optimizer.step()
                running_loss+=loss.item()              
                if (len(trainloader)*epoch+i)%200==199:
                    running_loss=running_loss/(200*BATCH_SIZE)
                    print('[%d,%5d] loss: %.6f ' % (epoch+1,i+1,running_loss))
                    running_loss=0.0



for i in range(-11,3):
    net=Interpolate(3)
    train_net(net,lr=10**i,epochs=1)
    print('for learning rate {} net output on low x is {}'.format(i,net(torch.Tensor([255]).view(-1,1))))

Huh Just normalizing the inputs is the right answer :stuck_out_tongue:

A moderator may feel free to delete this post

1 Like

just curious. how/what did you normalize?

The inputs were integer numbers from 255 to 200000. I’ve normalized the input to be in the range [0,1]

1 Like