LSTM for sentiment classification: Over fitting model and regularization does not help

Hi,

I am new to pytorch and want to implement a sentiment classification problem with LSTM.
I chose a simple model with cross entrophy loss and adam optimiser. But the test accuracy seems to be really poor compared with the same model in svm. Training accuracy is 93%

I tried various regularization parameter(dropout, weightnorm, l2). Nothing seems to address the overfitting problem.

Is there a problem with my model? Or the way I evaluate the model? Or loss prediction? I am really at a loss

class LSTM(nn.Module):

def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size):
    super(LSTM, self).__init__()
    
    self.hidden_dim = hidden_dim
    self.label_size = label_size
    update_dim  = hidden_dim 
    self.embeddings = nn.Embedding(vocab_size, embedding_dim)
    self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim)
    #self.lstm = nn.utils.weight_norm(self.lstm, name = 'weight_hh_l0')
    #self.lstm = nn.utils.weight_norm(self.lstm, name = 'weight_ih_l0')
    #self.lstm.flatten_parameters()
    self.fullyconnected = nn.Linear(update_dim, 100)
    self.relu = nn.ReLU()
    #self.dropout = nn.Dropout(0.3)
    self.last = nn.Linear(100,label_size)
    self.hidden= self.init_hidden()


def forward(self, sentence, aspect_term): #sent separaetly
    ##input standadisation
    sentence = self.embeddings(sentence).view(len(sentence), 1, -1)
    lstm_out, self.hidden = self.lstm(sentence, self.hidden)#updating hidden and cell states
    embedding_vec = lstm_out[-1]
    fc = (self.relu(self.fullyconnected(embedding_vec)))
    y = self.last(fc)
    probs = F.softmax(y)
    return probs


def init_hidden(self):
    return (my_variable(torch.zeros(1, 1,  self.hidden_dim)),
            my_variable(torch.zeros(1, 1,  self.hidden_dim))) 

#loss update
loss = loss_function([0.2,0.2,0.6], actual_label = 2)

#Model evaluation#

def getpred(model, loss_function, x_pred, y_pred):
counter = 0;total_loss = 0.0
XTEST = x_pred;
YTEST = y_pred
model.eval()
for i, x_test in enumerate(XTEST):
sentence = my_variable(LongTensor([int(n) for n in x_test[0]]))
aspect = my_variable(LongTensor([int(n) for n in x_test[1]]))

    model.hidden = model.init_hidden()
    probs = model(sentence) 

    # Compute loss
    true_label = my_variable(LongTensor([int(YTEST[i])]), requires_grad=False)
    loss = loss_function(probs, true_label)
    total_loss += float(loss.data[0])

    # Get prediction for max prob
    max_value, idx = torch.max(probs, 1)
    if USE_CUDA:
        Y_pred = idx.data.cpu().numpy()
        Y_target = YTEST[i]
    else:
        Y_pred = idx.data.numpy()
        Y_target = YTEST[i]

    if Y_pred == Y_target:
        counter += 1

print('Loss -- {}'.format((float(total_loss))))
print('Accuracy -- {}'.format((counter/len(XTEST)) * 100))