I’m new to data science and need help troubleshooting this error. This is the code:
alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789-._'
def sentence_to_id(data):
#creates a dict that maps to every char of alphabet an unique int based on position
char_to_int = dict((c,i) for i,c in enumerate(alphabet))
encoded_data = []
#Replaces every char in data with the mapped int
try:
encoded_data.append([char_to_int[char] for char in str(data)])
except:
print(data)
#print(encoded_data) #Prints the int encoded array
#print('encoded data')
#This part now replaces the int by an one-hot array with size alphabet
one_hot = []
for value in encoded_data:
for i in value:
#At first, the whole array is initialized with 0
letter = [0 for _ in range(len(alphabet))]
#Only at the number of int, 1 is written
letter[i] = 1
one_hot.append(letter)
x = torch.tensor(one_hot)
#print(x.shape)
#print(x.view)
#x = x.view(-1,12,39)
#print('tensor created')
return x
class NLP_model(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
super(NLP_model, self).__init__()
self.char_embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers = 5, bidirectional= True)
self.fc = nn.Linear(hidden_dim*2,num_classes)
def forward(self, x):
x = self.char_embedding(x)
output, hidden = self.lstm(x)
hidden = torch.cat((hidden[0][-2,:,:], hidden[0][-1,:,:]), dim=1)
x = self.fc(hidden[0])
return x
model = NLP_model(len(alphabet), 10, 20, 57) #Find the number of fismaids for last variable
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
for epoch in range(100): #Look into DataLoader for batch processing
y = list()
z = list()
for sentence, label in zip(ls_X_train, ls_y_train): #training_data should be an array of hostnames and labels
model.zero_grad()
output = model(sentence_to_id(sentence)) #sentence is the hostname, label is the fismaid
#print(label)
#print(output.shape)
temp_label = label
label = torch.zeros(57)
label[temp_label] = 1.0
#label = torch.tensor(label).unsqueeze(0)
#label = torch.tensor([label]).unsqueeze(1)
#label = torch.tensor(label).unsqueeze(1)
#print(label.shape)
loss = criterion(output, label)
loss.backward()
optimizer.step()
y.append(loss.item())
y_true = []
y_pred = []
model.eval()
for sentence, label in zip(ls_X_test, ls_y_test):
temp_label = label
label = torch.zeros(35)
label[temp_label] = 1.0
output = model(sentence_to_id(sentence))
loss = criterion(output, label)
z.append(loss.item())
pred = output.detach().numpy
pred = np.argmax(pred)
y_pred.append(pred)
y_true.append(temp_label)
print(f'epoch {epoch} training loss: {np.array(y).mean()}')
print(f'testing loss : {np.array(z).mean()}')
print(f'recall: {recall_score(y_true, y_pred, average="weighted")}')
print(f'precision: {precision_score(y_true, y_pred, average="weighted")}')
print(f' f1: {f1_score(y_true, y_pred, average="weighted")}')
print(f'accuracy: {accuracy_score(y_true, y_pred)}')
Here is the error I get:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-43-7b604e485eb5> in <module>
13 temp_label = label
14 label = torch.zeros(57)
---> 15 label[temp_label] = 1.0
16 #label = torch.tensor(label).unsqueeze(0)
17 #label = torch.tensor([label]).unsqueeze(1)
IndexError: index 58 is out of bounds for dimension 0 with size 57```