Using LSTM to solve number sequence

I am trying to solve a simple toy problem about number sequence so I can learn about LSTM. My data is like this: X[20, 21, 22, 23] → Y[24]. This is my code:

import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from tqdm import trange

randomseed = 42
np.random.seed(randomseed)
torch.manual_seed(randomseed)

# Data
def get_data(SIZE=1000):
  dataX = []
  dataY = []
  inc = 1
  for i in range(1, SIZE):
    k = np.random.randint(SIZE)
    aux = [k]
    for j in range(3):
      k += inc
      aux.append(k)
    dataX.append(aux)
    dataY.append((k+inc))

  dataX, dataY = np.array(dataX), np.array(dataY)
  return dataX, dataY

class Net(nn.Module):
  def __init__(self, num_in, num_out):
    super().__init__()
    self.inp_dim = num_in
    self.hidden_dim = 30
    self.n_layers = 1
    self.out_dim = num_out

    self.lstm = nn.LSTM(self.inp_dim,
                        self.hidden_dim,
                        self.n_layers,
                        batch_first=True)
    self.fc1 = nn.Linear(self.hidden_dim, self.hidden_dim)
    self.fc2 = nn.Linear(self.hidden_dim, self.out_dim)
    self.relu = nn.ReLU()

  def forward(self, x):
    x = x.reshape(x.size(0), 1, x.size(1))
    h_t = torch.zeros(self.n_layers, x.size(0), self.hidden_dim)
    c_t = torch.zeros(self.n_layers, x.size(0), self.hidden_dim)

    out, _ = self.lstm(x, (h_t, c_t))
    # Decode the hidden state of the last time step
    out = self.relu(out[:, -1, :])
    out = self.fc1(out)
    out = self.relu(out)
    out = self.fc2(out)

    return out

    

def train(model, X_train, Y_train, epochs=1000):
  model.train()
  loss_function = nn.MSELoss()
  optim = torch.optim.Adam(model.parameters(), lr=0.001)
  
  for i in (t := trange(epochs)):
    X = torch.tensor(X_train).float()
    Y = torch.tensor(Y_train).float()

    # Reset model
    optim.zero_grad()
  
    # Forward
    out = model(X)
    out = out.reshape(-1)

    # Loss
    loss = loss_function(out, Y)

    loss.backward()

    # Optim
    optim.step()

    t.set_description("Loss: %.2f" % (loss))

def evall(model, X_test, Y_test):
  X = torch.tensor(X_test).float()
  Y_pred = model(X)
  Y_pred = torch.round(Y_pred).long().squeeze(dim=1)
  Y_pred = np.array(Y_pred)
  res = np.array((Y_pred == Y_test)).mean()
  return res

def test(model, x):
  x = torch.tensor(x).float()
  x = model(x).detach().numpy()[0, 0]
  return x

if __name__ == '__main__':
  dataX, dataY = get_data(100)

  # Train
  model = Net(num_in=4, num_out=1)
  X_train = dataX[:int(len(dataX)*0.80)+1]
  Y_train = dataY[:int(len(dataY)*0.80)+1]
  X_test = dataX[int(len(dataX)*0.80):]
  Y_test = dataY[int(len(dataY)*0.80):]

  train(model, X_train, Y_train, epochs=10000)

  # Eval
  print("--Eval--")
  res = evall(model, X_test, Y_test)
  print(res)
  print("--------")

I want to know if this is the best model I can do to this problem using LSTM, because if my data size is just 100 I need about 10000 epochs to get 0.9-1.0 score, if I increase my data size to 1000 it is really hard to get a good score even with +100000 epochs. I wonder if I am doing something wrong.