Hi guys, I am trying to develop text classification with RNN. The model runs fine, however the loss after a couple of steps starts stagnating.
Below is the code of my model
class ClassifierModel(nn.Module):
def __init__(self, hidden_size, num_layers, vocab_size, embedding_dim,
label_size):
super(ClassifierModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(
input_size=embedding_dim,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=0.5,
bidirectional=False)
self.dense = nn.Linear(
in_features=hidden_size, out_features=label_size)
def forward(self, entity_ids, seq_len):
embedding = self.embedding(entity_ids)
input_size = embedding.size()
out, _ = self.lstm(
embedding.view(input_size[1], input_size[0], input_size[2]))
last_output = torch.index_select(out, 0, seq_len)
logits = self.dense(last_output)
logits = F.relu(logits[0, :, :])
return logits
And this is the code I use for training
classifier_model = ClassifierModel(128, 2, len(vocabs), 200, 2)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier_model.parameters())
for data_bucket, category_bucket, seq_len_bucket in train_data:
for keys in data_bucket.keys():
entity_ids = Variable(data_bucket[keys], requires_grad=False)
category_ids = Variable(category_bucket[keys], requires_grad=False)
seq_len = Variable(seq_len_bucket[keys], requires_grad=False)
logits = classifier_model(entity_ids, seq_len)
loss = loss_fn(logits, category_ids)
print("Loss:", loss)
pred, pred_idx = torch.max(logits, 1)
correct_predictions = (pred_idx.data == category_bucket[keys]).sum()
acc = correct_predictions / category_bucket[keys].size()[0]
print("Accuracy:", acc)
loss.backward()
optimizer.step()
This is the output
Accuracy: 0.5263157894736842
Loss: Variable containing:
0.7016
[torch.FloatTensor of size 1]
Accuracy: 0.41379310344827586
Loss: Variable containing:
0.6972
[torch.FloatTensor of size 1]
Accuracy: 0.4166666666666667
Loss: Variable containing:
0.6948
[torch.FloatTensor of size 1]
Accuracy: 0.4488888888888889
Loss: Variable containing:
0.6918
[torch.FloatTensor of size 1]
Accuracy: 0.4492753623188406
Loss: Variable containing:
0.6931
[torch.FloatTensor of size 1]
Accuracy: 0.5121951219512195
Loss: Variable containing:
0.6931
[torch.FloatTensor of size 1]
Accuracy: 0.51010101010101
Loss: Variable containing:
0.6931
[torch.FloatTensor of size 1]
Accuracy: 0.525
Loss: Variable containing:
0.6931
[torch.FloatTensor of size 1]
As you can see from the output above, after several steps the loss stuck at 0.6931
Am I doing something wrong with my code? Thanks