I’m not sure how to resolve this error. I am trying to build a classifier for 36 different labels that are numbered from 0 to 35. Here is some of the code:
#one-hot encode vocabulary
alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789-._'
def sentence_to_id(data):
#creates a dict that maps to every char of alphabet an unique int based on position
char_to_int = dict((c,i) for i,c in enumerate(alphabet))
encoded_data = []
#Replaces every char in data with the mapped int
encoded_data.append([char_to_int[char] for char in data])
#print(encoded_data) #Prints the int encoded array
#print('encoded data')
#This part now replaces the int by an one-hot array with size alphabet
one_hot = []
for value in encoded_data:
for i in value:
#At first, the whole array is initialized with 0
letter = [0 for _ in range(len(alphabet))]
#Only at the number of int, 1 is written
letter[i] = 1
one_hot.append(letter)
x = torch.tensor(one_hot)
print(x.shape)
print(x.view)
#x = x.view(-1,12,39)
#print('tensor created')
return x
#basic model, need to modify to situation
class NLP_model(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
super(NLP_model, self).__init__()
self.char_embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers = 1, bidirectional= True)
self.fc = nn.Linear(hidden_dim*2,num_classes)
def forward(self, x):
x = self.char_embedding(x)
output, hidden = self.lstm(x)
hidden = torch.cat((hidden[0][-2,:,:], hidden[0][-1,:,:]), dim=1)
x = self.fc(hidden[0])
return x
model = NLP_model(len(alphabet), 8, 16, 35) #Find the number of fismaids for last variable
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
for epoch in range(100): #Look into DataLoader for batch processing
y = list()
z = list()
for sentence, label in zip(ls_X_train, ls_y_train): #training_data should be an array of hostnames and labels
model.zero_grad()
output = model(sentence_to_id(sentence)) #sentence is the hostname, label is the fismaid
print(output.shape)
label = torch.tensor(label).unsqueeze(1)
loss = criterion(output, label)
loss.backward()
optimizer.step()
y.append(loss.item())
model.eval()
for sentence, label in zip(ls_X_test, ls_y_test):
output = model(sentence_to_id(sentence))
loss = criterion(output, label)
z.append(loss.item())
print(f'epoch {epoch} training loss: {np.array(y).mean()}')
print(f'testing loss : {np.array(z).mean()}')
The error I get is the following:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-31-28c5b337be00> in <module>
11
12 print(output.shape)
---> 13 label = torch.tensor(label).unsqueeze(1)
14 loss = criterion(output, label)
15 loss.backward()
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
How would I go about troubleshooting this error?