I am trying to solve a simple toy problem about number sequence so I can learn about LSTM. My data is like this: X[20, 21, 22, 23] → Y[24]. This is my code:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from tqdm import trange
randomseed = 42
np.random.seed(randomseed)
torch.manual_seed(randomseed)
# Data
def get_data(SIZE=1000):
dataX = []
dataY = []
inc = 1
for i in range(1, SIZE):
k = np.random.randint(SIZE)
aux = [k]
for j in range(3):
k += inc
aux.append(k)
dataX.append(aux)
dataY.append((k+inc))
dataX, dataY = np.array(dataX), np.array(dataY)
return dataX, dataY
class Net(nn.Module):
def __init__(self, num_in, num_out):
super().__init__()
self.inp_dim = num_in
self.hidden_dim = 30
self.n_layers = 1
self.out_dim = num_out
self.lstm = nn.LSTM(self.inp_dim,
self.hidden_dim,
self.n_layers,
batch_first=True)
self.fc1 = nn.Linear(self.hidden_dim, self.hidden_dim)
self.fc2 = nn.Linear(self.hidden_dim, self.out_dim)
self.relu = nn.ReLU()
def forward(self, x):
x = x.reshape(x.size(0), 1, x.size(1))
h_t = torch.zeros(self.n_layers, x.size(0), self.hidden_dim)
c_t = torch.zeros(self.n_layers, x.size(0), self.hidden_dim)
out, _ = self.lstm(x, (h_t, c_t))
# Decode the hidden state of the last time step
out = self.relu(out[:, -1, :])
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
return out
def train(model, X_train, Y_train, epochs=1000):
model.train()
loss_function = nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
for i in (t := trange(epochs)):
X = torch.tensor(X_train).float()
Y = torch.tensor(Y_train).float()
# Reset model
optim.zero_grad()
# Forward
out = model(X)
out = out.reshape(-1)
# Loss
loss = loss_function(out, Y)
loss.backward()
# Optim
optim.step()
t.set_description("Loss: %.2f" % (loss))
def evall(model, X_test, Y_test):
X = torch.tensor(X_test).float()
Y_pred = model(X)
Y_pred = torch.round(Y_pred).long().squeeze(dim=1)
Y_pred = np.array(Y_pred)
res = np.array((Y_pred == Y_test)).mean()
return res
def test(model, x):
x = torch.tensor(x).float()
x = model(x).detach().numpy()[0, 0]
return x
if __name__ == '__main__':
dataX, dataY = get_data(100)
# Train
model = Net(num_in=4, num_out=1)
X_train = dataX[:int(len(dataX)*0.80)+1]
Y_train = dataY[:int(len(dataY)*0.80)+1]
X_test = dataX[int(len(dataX)*0.80):]
Y_test = dataY[int(len(dataY)*0.80):]
train(model, X_train, Y_train, epochs=10000)
# Eval
print("--Eval--")
res = evall(model, X_test, Y_test)
print(res)
print("--------")
I want to know if this is the best model I can do to this problem using LSTM, because if my data size is just 100 I need about 10000 epochs to get 0.9-1.0 score, if I increase my data size to 1000 it is really hard to get a good score even with +100000 epochs. I wonder if I am doing something wrong.