Hi guys!
I’ve tried to make a 2 layers NN learn a simple linear interpolation for a discrete function, I’ve tried lots of different learning rates as well as different activation functions, and it seems like nothing is being learned!
I’ve literally spent the last 6 hours trying to debug the following code, but it seems like there’s no bug! So I’m wondering,is there an explanation to this?
from torch.utils.data import Dataset
import os
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import random
LOW_X=255
MID_X=40000
HIGH_X=200000
LOW_Y=torch.Tensor([0,0,1])
MID_Y=torch.Tensor([0.2,0.5,0.3])
HIGH_Y=torch.Tensor([1,0,0])
BATCH_SIZE=4
def x_to_tensor(x):
if x<=MID_X:
return LOW_Y+(x-LOW_X)*(MID_Y-LOW_Y)/(MID_X-LOW_X)
if x<=HIGH_X:
return MID_Y+(x-MID_X)*(HIGH_Y-MID_Y)/(HIGH_X-MID_X)
return HIGH_Y
class XYDataset(Dataset):
LENGTH=10000
def __len__(self):
return self.LENGTH
def __getitem__(self, idx):
x=random.randint(LOW_X,HIGH_X)
y=x_to_tensor(x)
return x,y
class Interpolate(nn.Module):
def __init__(self, planes,hidden_size=10):
super(Interpolate, self).__init__()
self.hidden_size=hidden_size
self.x_to_hidden = nn.Linear(1, hidden_size)
self.hidden_to_out = nn.Linear(hidden_size,planes)
self.activation = nn.Tanh() #I have tried Sigmoid and Relu activations as well
self.softmax=torch.nn.Softmax(dim=1)
def forward(self, x):
out = self.x_to_hidden(x)
out = self.activation(out)
out = self.hidden_to_out(out)
out = self.softmax(out)
return out
dataset=XYDataset()
trainloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
shuffle=True, num_workers=4)
criterion= nn.MSELoss()
def train_net(net,epochs=10,lr=5.137871216190041e-05,l2_regularization=2.181622809797563e-12):
optimizer= optim.Adam(net.parameters(),lr=lr,weight_decay=l2_regularization)
net.train(True)
running_loss=0.0
for epoch in range(epochs):
for i,data in enumerate(trainloader):
inputs,targets=data
inputs,targets=torch.FloatTensor(inputs.float()).view(-1,1),torch.FloatTensor(targets.float())
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,targets)
loss.backward()
optimizer.step()
running_loss+=loss.item()
if (len(trainloader)*epoch+i)%200==199:
running_loss=running_loss/(200*BATCH_SIZE)
print('[%d,%5d] loss: %.6f ' % (epoch+1,i+1,running_loss))
running_loss=0.0
for i in range(-11,3):
net=Interpolate(3)
train_net(net,lr=10**i,epochs=1)
print('for learning rate {} net output on low x is {}'.format(i,net(torch.Tensor([255]).view(-1,1))))