Hi, I am working on sentiment analysis. I converted my text to word2vec embeddings using genism. Now, I am passing these encoded vectors to lstm as inputs and using lstm to classify sentiment of the text. But, the isssue that I am facing is that for training my loss decreases throughout and accuracy increases to 95% almost. But for my validation loss decreases for few epochs but then it start increasing might be because of over-fitting. The validation accuracy is stuck and fluctuates between 49 and 51. I am not sure what the problem is. This is my first time working on NLP. Kindly, help me out.
Sentiments 0 and 1
Activation = Sigmoid
Learning rate = 5e-3
loss = BCEloss
optimizer = SGD
Code:
class LSTMSentimentNet(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, word_embeddings, drop_prob=0.64):
super(LSTMSentimentNet, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.embedding.weight = nn.Parameter(word_embeddings, requires_grad=False)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True, bidirectional=True)
self.dropout = nn.Dropout(drop_prob)
self.fc = nn.Linear(hidden_dim, output_size)
self.sigmoid = nn.Sigmoid()
def forward (self, input_words):
input_words = input_words.long()
embedded_words = self.embedding(input_words)
lstm_out, h = self.lstm(embedded_words)
lstm_out = self.dropout(lstm_out)
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
fc_out = self.fc(lstm_out)
sigmoid_out = self.sigmoid(fc_out)
sigmoid_out = sigmoid_out.view(batch_size, -1)
sigmoid_last = sigmoid_out[:, -1]
return sigmoid_last, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
if (is_cuda):
hidden = (weight.new(self.n_layers*2, batch_size, self.hidden_dim).zero_().cuda(),
weight.new(self.n_layers*2, batch_size, self.hidden_dim).zero_().cuda())
else:
hidden = (weight.new(self.n_layers*2, batch_size, self.hidden_dim).zero_(),
weight.new(self.n_layers*2, batch_size, self.hidden_dim).zero_())
return hidden
Training
for i in range(epochs):
h = model.init_hidden(batch_size)
num_correct = 0
val_correct = 0
model.train()
print('Epoch: {}/{}...'.format(i+1, epochs))
for inputs, labels in train_loader:
h = tuple([e.data for e in h])
inputs, labels = inputs.to(device), labels.to(device)
model.zero_grad()
output, h = model(inputs)
loss = criterion(output.unsqueeze(1), labels.float())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
pred = torch.round(output.squeeze()) # Rounds the output to 0/1
correct_tensor = pred.eq(labels.float().view_as(pred))
correct = np.squeeze(correct_tensor.cpu().numpy())
num_correct += np.sum(correct)
train_acc = num_correct/len(train_loader.dataset)
print("\tTrain Loss: {:.6f}".format(loss.item())+", Train accuracy: {:.3f}%".format(train_acc*100))
with torch.no_grad():
model.eval()
val_h = model.init_hidden(batch_size)
val_losses = []
for inp, target in val_loader:
val_h = tuple([each.data for each in val_h])
inp, target = inp.to(device), target.to(device)
out, val_h = model(inp)
val_loss = criterion(out.unsqueeze(1), target.float())
val_losses.append(val_loss.item())
val_pred = torch.round(out.squeeze()) # Rounds the output to 0/1
val_correct_tensor = pred.eq(target.float().view_as(val_pred))
corrects = np.squeeze(val_correct_tensor.cpu().numpy())
val_correct += np.sum(corrects)
val_acc = val_correct/len(val_loader.dataset)
print("\tValidation Loss: {:.6f}".format(val_loss)+", Validation accuracy: {:.3f}%".format(val_acc*100))