# Transformer cannot predict next number in linear sequence

I am experimenting with the trasnformer to see if it can predict the next number in a linear sequence. For example, given this training data (1,2,3,4…100) I expect 101 as output. However, the loss is not decreasing as I train the model for some reason. Does anyone know if I am doing something fundamentally wrong here?

My transformer has 1 embed dim, 1 head, and 10 encoders.

``````import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

import torch.optim as optim
import random

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
now = datetime.now()
logdir = "runs/" + now.strftime("%Y%m%d-%H%M%S") + "/"
writer = SummaryWriter(logdir)

class MyModel(nn.Module):
def __init__(self, seq_len):
super().__init__()
self.lin = nn.Linear(seq_len,1)
self.tgt = torch.rand(seq_len, 1).to(device)

def forward(self, x):
y = self.trans(x, self.tgt)
y = torch.transpose(y,0,1)
out = self.lin(y)
return out

model = MyModel(100)
model.to(device)
model.eval()

criterion = nn.CrossEntropyLoss()

running_loss = 0.0

for epoch in range(4000):  # loop over the dataset multiple times
idx = torch.randint(100, (1,)).item() #Get random number
training = torch.range(idx, 100+idx, 1).to(device) #Get consecutive numbers starting from randidx
training = torch.unsqueeze(training, 1)
labels = torch.tensor([101+idx]).to(device) #Label is number after training sequence

outputs = model(training)
outputs = torch.transpose(outputs, 0, 1)
loss = torch.norm(outputs - labels)
loss.backward()
optimizer.step()

# print statistics
running_loss += loss.item()
if epoch % 50 == 0:
print(f'[{epoch + 1}] loss: {running_loss / 50:.3f}')