Hi there, I am newbie student studying natural language processing.
What i am writing is named entity recognition for personal study.
import torch
from torch import nn
import numpy as np
class LSTM(nn.Module):
def __init__(self, word_vocab, embed_dim=50, hidden_size=50, num_class=18) -> None:
super().__init__()
self.word_vocab = word_vocab
self.word_embedding = nn.Embedding(len(word_vocab), embedding_dim=embed_dim)
self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers=2, dropout=0.5, bidirectional=True)
self.fc = nn.Linear(embed_dim * 2, num_class)
def forward(self, x_train):
word_embed = self.word_embedding(x_train)
output_word, (hidden, state_word) = self.lstm(word_embed)
outputs = self.fc(output_word)
return outputs.float()
Above is the simple lstm network code that all i’ve wrote.
Below is the training code.
from model.loader import get_data_loader
from model.lstm import LSTM
from torch import nn
from torch.optim import Adam
if __name__ == '__main__':
train_data_loader, test_data_loader, word2idx = get_data_loader(path='./rsc')
lstm_model = LSTM(word2idx)
parameters = filter(lambda p: p.requires_grad, lstm_model.parameters())
c = nn.CrossEntropyLoss(ignore_index=0)
optimizer = Adam(parameters, lr=1e-4)
for step, (x_train, y_train) in enumerate(train_data_loader):
output = lstm_model(x_train)
loss = c(output, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
pass
Shape of output and label is torch.Size([32, 99, 18]) torch.Size([32, 99]) which means first is [batch_size * max_seq_length * num_tag], second is [batch_size * max_seq_length].
My question is as follows:
- How to use cross entropy loss function?
- There is a tokens in training data, So I have to exclude tokens when calculate loss. It can be solved through nn.CrossEntropyLoss(ignore_index=0) ?
Thank you for reading this questions.