Multiclass sentiment analysis

Hello,
I am new in Pytorche field, I work on the sentiment analysis .
I created a code to classify the reviews in 4 class (positive, negative, neutral, conflict)
but the code does return a good précision and not correct result, help me find the problem.
Thanks.
model :

class SentimentLSTM(nn.Module):
   def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.1):
        """
        Initialize the model by setting up the layers.
        """
        super(SentimentLSTM, self).__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(0.1)
        
        # linear and sigmoid layers
        self.fc = nn.Linear(hidden_dim, output_size)
        self.softmax = nn.Softmax(dim=1)
        
   def forward(self, x, hidden):
        """
        Perform a forward pass of our model on some input and hidden state.
        """
        batch_size = x.size(0)
        # embeddings and lstm_out
        x = x.long()
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)
        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        
        # dropout and fully-connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)
        # softmax function
        softmax_out = self.softmax(out)
        # reshape to be batch_size first
        softmax_out = softmax_out.view(batch_size, -1, output_size)
        softmax_out = softmax_out[:, -1] # get last batch of labels
        # return last sigmoid output and hidden state
        return softmax_out, hidden
    
    
   def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden

parameters

vocab_size = len(vocab_to_int)+1
output_size = 4
embedding_dim = 40 
hidden_dim = 25
n_layers = 5 
lr=0.001
#criterion = nn.CrossEntropyLoss()
criterion = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=lr)

epochs = 4 
counter = 0
print_every = 100
clip=5 

training data :

net.train()

# train for some number of epochs
for e in range(epochs):   
    # initialize hidden state
    h = net.init_hidden(batch_size)#initialiser les couches cachées

    # batch loop
    for inputs, labels in train_loader:
        counter += 1
        
        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        h = tuple([each.data for each in h])

        # zero accumulated gradients
        net.zero_grad()

        # get the output from the model
        output, h = net(inputs, h)
        

        # calculate the loss and perform backprop
        loss = criterion(output.squeeze(), labels.long())
        
        loss.backward()
        print(loss)
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), clip)
        optimizer.step()

        # loss stats
        if counter % print_every == 0:
            # Get validation loss
            val_h = net.init_hidden(batch_size)
            val_losses = []
            net.eval()
            for inputs, labels in valid_loader:

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                val_h = tuple([each.data for each in val_h])

                inputs, labels = inputs.cpu(), labels.cpu()

                output, val_h = net(inputs, val_h)
                val_loss = criterion(output.squeeze(), labels.long())
                val_losses.append(val_loss.item())

            net.train()
            
           
            print("Epoch: {}/{}...".format(e+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))




# tester les données 

test_losses = [] # track loss
num_correct = 0
# init hidden state
h = net.init_hidden(batch_size)
net.eval()
# iterate over test data
for inputs, labels in test_loader:

    # Creating new variables for the hidden state, otherwise
    # we'd backprop through the entire training history
    h = tuple([each.data for each in h])

    inputs, labels = inputs.cpu(), labels.cpu()
    
    # get predicted outputs
    #inputs = inputs.type(torch.LongTensor)
    output, h = net(inputs, h)
    
    # calculate loss
    test_loss = criterion(output.squeeze(), labels.long())
    test_losses.append(test_loss.item())
    
    # convert output probabilities to predicted class (0 or 1)
   
    pred = torch.argmax(output,dim=1, keepdim=True)
    # compare predictions to true label
    correct_tensor = pred.eq(labels.long().view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) 
    num_correct += np.sum(correct)



# -- Statistiques! - ##
# avg test loss
print("Test perte: {:.3f}%".format(np.mean(test_losses)))

# précision sur toutes les données de test
test_acc = num_correct/len(test_loader.dataset)
print("Test de précision: {:.3f}%".format(test_acc*100))


def tokenize_review(test_review):
    test_review = test_review.lower() # lowercase
    # get rid of punctuation
    test_text = ''.join([c for c in test_review if c not in punctuation])

    # splitting by spaces
    test_words = test_text.split()

    # tokens
    test_ints = []
    test_ints.append([vocab_to_int[word] for word in test_words])
    
    return test_ints

def predict(net, test_review, sequence_length=200):
    
    net.eval()
    # tokenize review
    test_ints = tokenize_review(test_review)
    # pad tokenized sequence
    seq_length=sequence_length
    features = pad_features(test_ints, seq_length)
  
    # convert to tensor to pass into your model
    feature_tensor = torch.from_numpy(np.asarray(features))
    batch_size = feature_tensor.size(0)
   

    # initialize hidden state
    h = net.init_hidden(batch_size)
    
    #feature_tensor = feature_tensor.cpu()
    
    # get the output from the model
    output, h = net(feature_tensor, h)
    pred = torch.argmax(output.squeeze())

   

    
  # print custom response
    if(pred.item()==1):
        print("Positive review detected!")
    elif(pred.item()==0):
        print("Negative review detected.")
    elif(pred.item()==2):
        print("conflet review detected.")
    else:
        print("neutre review detected.")
                
   
test_review_neg="Very good quality and well made" 
seq_length=400
predict(net, test_review_neg, seq_length)


nn.NLLLoss expects log probabilities as the model output, so you should replace the nn.Softmax layer with nn.LogSoftmax.
Could you also explain, what “a good precision and not correct results” mean? Do you think the accuracy calculation is wrong or what kind of values are you seeing?

I have already tried to do LogSoftmax but it gives an incorrect result
for your second question
what I mean by “a good precision and an incorrect result” is that when training the data the code gives a good occuracy but when I test the code it gives an incorrect result so
I can’t understand if the problem is in the model or in the data processing (first part).

How do these incorrect results look?

This sounds as if your model is overfitting, so you might need to add more regularization to your training (e.g. via dropout layers etc.).

Hello,
in fact the code returns incorrect results even with data extracted from used corpus.

after the data training, there is the test phase, so when we test a sentence we will use the tocknise_review function then predect then the code will send a list after applying the argmax.
the problem is that for example when I give a negative sentence the code returns a positive result.

This description sounds as if your model is just not properly learning to generalize.
If the model is overfitting (low training loss, high validation loss), try to add regularization, change the model architecture, add more data augmentation etc.

thanks for your reply, i will try the solution you suggested :blush: