I have the following model architecture, which essentially is a 5 layer LSTM that takes in 62 length strings and outputs classification predictions based on that. Because of how the data works, the first 3-5 characters are more important for the classification than the remainder of the strings. How do I get the model to place more weight on the first three characters???
#basic model, need to modify to situation
class NLP_model(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
super(NLP_model, self).__init__()
self.char_embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers = 5, bidirectional= True)
self.fc = nn.Linear(hidden_dim*2,num_classes)
#self.att = nn.MultiheadAttention(embed_dim, num_heads, ...)
def forward(self, x):
x = self.char_embedding(x)
output, hidden = self.lstm(x)
hidden = torch.cat((hidden[0][-2,:,:], hidden[0][-1,:,:]), dim=1)
x = self.fc(hidden[0])
return x
#modify for os
model_os = NLP_model(len(alphabet), 12, 24, lenos) #Find the number of fismaids for last variable
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model_os.parameters(), lr = 0.001)
for epoch in range(10): #Look into DataLoader for batch processing
y = list()
z = list()
for sentence, label in zip(ls_X_train_os, ls_y_train_os): #training_data should be an array of hostnames and labels
model_os.zero_grad()
output = model_os(sentence_to_id(sentence)) #sentence is the hostname, label is the fismaid
#print(label)
#print(output.shape)
temp_label = label
label = torch.zeros(lenos)
label[temp_label] = 1.0
#label = torch.tensor(label).unsqueeze(0)
#label = torch.tensor([label]).unsqueeze(1)
#label = torch.tensor(label).unsqueeze(1)
#print(label.shape)
loss = criterion(output, label)
loss.backward()
optimizer.step()
y.append(loss.item())
y_true = []
y_pred = []
model_os.eval()
for sentence, label in zip(ls_X_test_os, ls_y_test_os):
temp_label = label
label = torch.zeros(lenos)
label[temp_label] = 1.0
output = model_os(sentence_to_id(sentence))
loss = criterion(output, label)
z.append(loss.item())
pred = output.detach().numpy
pred = np.argmax(pred)
y_pred.append(pred)
y_true.append(temp_label)
print(f'epoch {epoch} training loss: {np.array(y).mean()}')
print(f'testing loss : {np.array(z).mean()}')
print(f'recall: {recall_score(y_true, y_pred, average="weighted")}')
print(f'precision: {precision_score(y_true, y_pred, average="weighted")}')
print(f' f1: {f1_score(y_true, y_pred, average="weighted")}')
print(f'accuracy: {accuracy_score(y_true, y_pred)}')