Dear Community,
I am currently debugging a transformer implementation with the base torch transformer on the sequential mnist task. In the task the 28 by 28
mnist image is flattened to 784
and the model receives a single pixel, so the input size = 1
of the entire image at each time step t, so from t=0
to T=784.
As such the goal is to examine the context length of a transformer. My code can be copy pasted and run as below to replicate the error message for IndexError: index out of range in self
as shown below with the entire error message.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms as T
from torch.utils.data import DataLoader
import torch.optim as optim
import numpy as np
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 64
weight_decay = 0.0
epochs = 50
nworkers = 2
lr = 0.001
input_size = 1
embedding_dim = 256
output_size = 10
data_dir = 'data/'
class TransformerModel(nn.Module):
def __init__(self, input_size, embedding_dim, output_size):
super(TransformerModel, self).__init__()
self.embedding_dim = embedding_dim
self.embedding = nn.Embedding(input_size, embedding_dim)
self.transformer = nn.Transformer(
d_model=embedding_dim, nhead=8, num_encoder_layers=6
)
self.fc = nn.Linear(embedding_dim, output_size)
def forward(self, xs):
embeddings = self.embedding(xs)
out = self.transformer(embeddings) # Pass the embeddings through the Transformer
out = self.fc(out.mean(dim=0)) # Average embeddings over the sequence dimension
return out
train_dataset = datasets.MNIST(root=data_dir,
train=True,
transform=T.Compose([T.ToTensor(),
T.Normalize((0.5,), (0.5,))]),
download=True)
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True,
drop_last=True)
model = TransformerModel(input_size=input_size, embedding_dim=embedding_dim, output_size=output_size).to(device)
print(model)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
loss_f = nn.CrossEntropyLoss()
for epoch in range(epochs):
model.train() # Set the model to training mode
loss_lst = []
correct = 0
total = 0
for batch_idx, (x, y) in enumerate(train_loader):
x, y = x.to(device), y.to(device)
x = x.view(-1, batch_size, input_size) # Reshape input to (sequence_length, batch_size, input_size)'
x = x.permute(1, 0, 2) # Permute dimensions for Transformer: (batch_size, sequence_length, input_size)
x = x.to(torch.long)
out = model(x)
loss_val = loss_f(out, y)
loss_lst.append(float(loss_val.item()))
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
classprobs = F.softmax(out, dim=1)
preds = classprobs.argmax(dim=1)
total += y.size(0)
correct += (preds == y).sum().item()
loss_val = round(sum(loss_lst) / len(loss_lst), 4)
accuracy = round(correct / total, 4)
print(f"Epoch:{epoch + 1} Train[Loss:{loss_val} Accuracy:{accuracy}]")
scheduler.step()
I am debugging on cpu, with the following error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-3-4e6d43608921> in <cell line: 60>()
73 x = x.to(torch.long)
74 print(x.shape)
---> 75 out = model(x)
76 loss_val = loss_f(out, y)
77 loss_lst.append(float(loss_val.item()))
4 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-3-4e6d43608921> in forward(self, xs)
33
34 def forward(self, xs):
---> 35 embeddings = self.embedding(xs)
36 out = self.transformer(embeddings) # Pass the embeddings through the Transformer
37 out = self.fc(out.mean(dim=0)) # Average embeddings over the sequence dimension
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py in forward(self, input)
160
161 def forward(self, input: Tensor) -> Tensor:
--> 162 return F.embedding(
163 input, self.weight, self.padding_idx, self.max_norm,
164 self.norm_type, self.scale_grad_by_freq, self.sparse)
/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2208 # remove once script supports set_grad_enabled
2209 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2210 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
2211
2212
IndexError: index out of range in self