Hello gang,
I’m looking to process one sequence as one input and to classify that sequence. However, currently, the network looks at it as a sequence of inputs and I’m just unable to determine the correct way to make it see it as one input. Any help is appreciated.
Thanks for your time.
# Prepare sequence for input into model
def prepare_sequence_words(seq, to_ix):
idxs_for_words = [to_ix[word] for word in seq]
return torch.tensor(idxs_for_words, dtype=torch.long)
# Define model
class LSTMLanguageClassifier(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
super(LSTMLanguageClassifier, self).__init__()
self.hidden_dim = hidden_dim
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
# LSTM takes word embeddings as inputs and outputs hidden states with dimensionality hidden_dim
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
# Linear layer that maps from hidden state space to language class space
self.linear = nn.Linear(hidden_dim, tagset_size)
self.hidden = self.init_hidden()
def init_hidden(self):
# Before we've done anything, we dont have any hidden state.
# Refer to the Pytorch documentation to see exactly
# why they have this dimensionality.
# The axes semantics are (num_layers, minibatch_size, hidden_dim)
return (torch.zeros(1, 1, self.hidden_dim),
torch.zeros(1, 1, self.hidden_dim))
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
lstm_out, self.hidden = self.lstm(
embeds.view(len(sentence[0]), 1, -1),
self.hidden
)
tag_space = self.linear(lstm_out.view(len(sentence[0]), -1))
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
training_data = create_training_data("wili-2018/x_train.txt", "wili-2018/y_train.txt")
word_to_idx, target_to_idx = create_word_and_target_dictionaries(training_data)
EMBEDD_DIM = 32
HIDD_DIM = 32
model = LSTMLanguageClassifier(EMBEDD_DIM, HIDD_DIM, len(word_to_idx), len(target_to_idx))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
with torch.no_grad():
inputs = prepare_sequence_words(training_data[0][0], word_to_idx)
tag_scores = model(inputs.view(1, -1, 1))