Hi,
I am new to pytorch and want to implement a sentiment classification problem with LSTM.
I chose a simple model with cross entrophy loss and adam optimiser. But the test accuracy seems to be really poor compared with the same model in svm. Training accuracy is 93%
I tried various regularization parameter(dropout, weightnorm, l2). Nothing seems to address the overfitting problem.
Is there a problem with my model? Or the way I evaluate the model? Or loss prediction? I am really at a loss
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.label_size = label_size
update_dim = hidden_dim
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim)
#self.lstm = nn.utils.weight_norm(self.lstm, name = 'weight_hh_l0')
#self.lstm = nn.utils.weight_norm(self.lstm, name = 'weight_ih_l0')
#self.lstm.flatten_parameters()
self.fullyconnected = nn.Linear(update_dim, 100)
self.relu = nn.ReLU()
#self.dropout = nn.Dropout(0.3)
self.last = nn.Linear(100,label_size)
self.hidden= self.init_hidden()
def forward(self, sentence, aspect_term): #sent separaetly
##input standadisation
sentence = self.embeddings(sentence).view(len(sentence), 1, -1)
lstm_out, self.hidden = self.lstm(sentence, self.hidden)#updating hidden and cell states
embedding_vec = lstm_out[-1]
fc = (self.relu(self.fullyconnected(embedding_vec)))
y = self.last(fc)
probs = F.softmax(y)
return probs
def init_hidden(self):
return (my_variable(torch.zeros(1, 1, self.hidden_dim)),
my_variable(torch.zeros(1, 1, self.hidden_dim)))
#loss update
loss = loss_function([0.2,0.2,0.6], actual_label = 2)
#Model evaluation#
def getpred(model, loss_function, x_pred, y_pred):
counter = 0;total_loss = 0.0
XTEST = x_pred;
YTEST = y_pred
model.eval()
for i, x_test in enumerate(XTEST):
sentence = my_variable(LongTensor([int(n) for n in x_test[0]]))
aspect = my_variable(LongTensor([int(n) for n in x_test[1]]))
model.hidden = model.init_hidden()
probs = model(sentence)
# Compute loss
true_label = my_variable(LongTensor([int(YTEST[i])]), requires_grad=False)
loss = loss_function(probs, true_label)
total_loss += float(loss.data[0])
# Get prediction for max prob
max_value, idx = torch.max(probs, 1)
if USE_CUDA:
Y_pred = idx.data.cpu().numpy()
Y_target = YTEST[i]
else:
Y_pred = idx.data.numpy()
Y_target = YTEST[i]
if Y_pred == Y_target:
counter += 1
print('Loss -- {}'.format((float(total_loss))))
print('Accuracy -- {}'.format((counter/len(XTEST)) * 100))