Pytorch Embedding Error Index out of Range in self

RemGalleu · June 21, 2021, 8:22am

As written in the title above it is the Pytroch Error: “IndexError: index out of range in self”. This error occurs as soon as a dataset of more than 500 rows is used. Also when I reload the model and try to run a second data set, I get this error. I have tried everything possible to manually set the embedding size, so everything I have found on the net has not worked. Attached the model, optimizer and runtime, would be very grateful for your help.

class Model(nn.Module):

    def __init__(self, embedding_size, num_numerical_cols, output_size, layers, p=0.4):
        super().__init__()
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(p)
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

        all_layers = []
        num_categorical_cols = sum((nf for ni, nf in embedding_size))
        input_size = num_categorical_cols + num_numerical_cols

        for i in layers:
            all_layers.append(nn.Linear(input_size, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(p))
            input_size = i

        all_layers.append(nn.Linear(layers[-1], output_size))

        self.layers = nn.Sequential(*all_layers)

    def forward(self, x_categorical, x_numerical):
        embeddings = []
        for i,e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:,i]))
        x = torch.cat(embeddings, 1)
        x = self.embedding_dropout(x)

        x_numerical = self.batch_norm_num(x_numerical)
        x = torch.cat([x, x_numerical], 1)
        x = self.layers(x)
        return x

model = Model(categorical_embedding_sizes, numerical_data.shape[1], 5, [400,100,50], p=0.4)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 1000
aggregated_losses = []

for i in range(epochs):
    i += 1
    y_pred = model(categorical_train_data, numerical_train_data)
    single_loss = loss_function(y_pred, train_outputs)
    aggregated_losses.append(single_loss)

    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

Here is the error as described:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-157-202810d3193a> in <module>
      4 for i in range(epochs):
      5     i += 1
----> 6     y_pred = model(categorical_train_data, numerical_train_data)
      7     single_loss = loss_function(y_pred, train_outputs)
      8     aggregated_losses.append(single_loss)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

<ipython-input-117-fd6404aba4b5> in forward(self, x_categorical, x_numerical)
     25         embeddings = []
     26         for i,e in enumerate(self.all_embeddings):
---> 27             embeddings.append(e(x_categorical[:,i]))
     28         x = torch.cat(embeddings, 1)
     29         x = self.embedding_dropout(x)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
    112         return F.embedding(
    113             input, self.weight, self.padding_idx, self.max_norm,
--> 114             self.norm_type, self.scale_grad_by_freq, self.sparse)
    115 
    116     def extra_repr(self):

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   1722         # remove once script supports set_grad_enabled
   1723         _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1724     return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
   1725 
   1726 

IndexError: index out of range in self

vainaijr · June 23, 2021, 11:07am

such an error would occur if you do something like this,

embedding_size = [[10, 10], [20, 20]]
a = nn.ModuleList([nn.Embedding(b, c) for b, c in embedding_size])
for i, j in enumerate(a):
  print(i, j(torch.tensor([15]))) # accessing index 15 which is out of range, as our first embedding table has 10 elements, indexed from 0-9

resulting in

IndexError: index out of range in self

could you check if you are passing an index that exceeds the number of elements in the embedding table.