Out of bounds error for tensor

Parkz · April 28, 2021, 6:32pm

I’m new to data science and need help troubleshooting this error. This is the code:

alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789-._'
def sentence_to_id(data):
    #creates a dict that maps to every char of alphabet an unique int based on position
    char_to_int = dict((c,i) for i,c in enumerate(alphabet))
    encoded_data = []
    #Replaces every char in data with the mapped int
    try:
        encoded_data.append([char_to_int[char] for char in str(data)])
    except:
        print(data)
    #print(encoded_data) #Prints the int encoded array
    #print('encoded data')
    #This part now replaces the int by an one-hot array with size  alphabet
    one_hot = []
    for value in encoded_data:
        for i in value:
            #At first, the whole array is initialized with 0
            letter = [0 for _ in range(len(alphabet))]
            #Only at the number of int, 1 is written
            letter[i] = 1
            one_hot.append(letter)
        x = torch.tensor(one_hot)
        #print(x.shape)
        #print(x.view)
        #x = x.view(-1,12,39)
        #print('tensor created')
        return x

class NLP_model(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
        super(NLP_model, self).__init__()
        self.char_embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers = 5, bidirectional= True)
        self.fc = nn.Linear(hidden_dim*2,num_classes)
        
    def forward(self, x):
        x = self.char_embedding(x)
        output, hidden = self.lstm(x)
        hidden = torch.cat((hidden[0][-2,:,:], hidden[0][-1,:,:]), dim=1)
        x = self.fc(hidden[0])
    
        return x
model = NLP_model(len(alphabet), 10, 20, 57) #Find the number of fismaids for last variable
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

for epoch in range(100): #Look into DataLoader for batch processing
    y = list()
    z = list()
    for sentence, label in zip(ls_X_train, ls_y_train): #training_data should be an array of hostnames and labels
        model.zero_grad()
        output = model(sentence_to_id(sentence)) #sentence is the hostname, label is the fismaid
        #print(label)
        #print(output.shape)
        temp_label = label
        label = torch.zeros(57)
        label[temp_label] = 1.0
        #label = torch.tensor(label).unsqueeze(0)
        #label = torch.tensor([label]).unsqueeze(1)
        #label = torch.tensor(label).unsqueeze(1)
        #print(label.shape)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        y.append(loss.item())
    y_true = []
    y_pred = []
    model.eval()
    for sentence, label in zip(ls_X_test, ls_y_test):
        temp_label = label
        label = torch.zeros(35)
        label[temp_label] = 1.0
        output = model(sentence_to_id(sentence))
        loss = criterion(output, label)
        z.append(loss.item())
        pred = output.detach().numpy
        pred = np.argmax(pred)
        y_pred.append(pred)
        y_true.append(temp_label)
    print(f'epoch {epoch} training loss: {np.array(y).mean()}')
    print(f'testing loss : {np.array(z).mean()}')
    print(f'recall: {recall_score(y_true, y_pred, average="weighted")}')
    print(f'precision: {precision_score(y_true, y_pred, average="weighted")}')
    print(f' f1: {f1_score(y_true, y_pred, average="weighted")}')
    print(f'accuracy: {accuracy_score(y_true, y_pred)}')

Here is the error I get:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-43-7b604e485eb5> in <module>
     13         temp_label = label
     14         label = torch.zeros(57)
---> 15         label[temp_label] = 1.0
     16         #label = torch.tensor(label).unsqueeze(0)
     17         #label = torch.tensor([label]).unsqueeze(1)

IndexError: index 58 is out of bounds for dimension 0 with size 57```

ptrblck · April 29, 2021, 3:40am

temp_label seems to have the index value 58, while you are initializing label with 57 zeros.
This would allow you to index label with values in [0, 56] and 58 will raise the out of bounds error.