I’m new to pytorch, i am doing sentiment analysis,i want to classify reviews into four classes,therefore my code doesn’t return the correct result, so if you can help me to find where is the problem .
Thanks.
model LSTM :
class SentimentLSTM(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.25):
"""
Initialize the model by setting up the layers.
"""
super(SentimentLSTM, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
# embedding and LSTM layers
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
dropout=drop_prob, batch_first=True)
# dropout layer
self.dropout = nn.Dropout(0.25)
# linear and sigmoid layers
self.fc = nn.Linear(hidden_dim, output_size)
self.softmax = nn.Softmax(dim=1)
def forward(self, x, hidden):
"""
Perform a forward pass of our model on some input and hidden state.
"""
batch_size = x.size(0)
# embeddings and lstm_out
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
print("lstm ouuuuuuuut",lstm_out)
# stack up lstm outputs
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
# dropout and fully-connected layer
out = self.dropout(lstm_out)
out = self.fc(out)
# softmax function
softmax_out = self.softmax(out)
# reshape to be batch_size first
softmax_out = softmax_out.view(batch_size, -1, output_size)
softmax_out = softmax_out[:, -1] # get last batch of labels
# return last sigmoid output and hidden state
return softmax_out, hidden
def init_hidden(self, batch_size):
''' Initializes hidden state '''
# Create two new tensors with sizes n_layers x batch_size x hidden_dim,
# initialized to zero, for hidden state and cell state of LSTM
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
return hidden
train data :
parameters :
vocab_size = len(vocab_to_int)+1
output_size = 4
embedding_dim = 40
hidden_dim = 25
n_layers = 5
lr=0.001#Taux d'apprentissage
#criterion = nn.CrossEntropyLoss()
criterion = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=lr)
# training params
epochs = 4
counter = 0
print_every = 100
clip=5
net.train()
# train for some number of epochs
for e in range(epochs):
# initialize hidden state
h = net.init_hidden(batch_size)#initialiser les couches cachées
# batch loop
for inputs, labels in train_loader:
counter += 1
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
h = tuple([each.data for each in h])
# zero accumulated gradients
net.zero_grad()
# get the output from the model
output, h = net(inputs, h)
# calculate the loss and perform backprop
loss = criterion(output.squeeze(), labels.long())
loss.backward()
print(loss)
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
nn.utils.clip_grad_norm_(net.parameters(), clip)
optimizer.step()
# loss stats
if counter % print_every == 0:
# Get validation loss
val_h = net.init_hidden(batch_size)
val_losses = []
net.eval()
for inputs, labels in valid_loader:
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
val_h = tuple([each.data for each in val_h])
inputs, labels = inputs.cpu(), labels.cpu()
output, val_h = net(inputs, val_h)
val_loss = criterion(output.squeeze(), labels.long())
val_losses.append(val_loss.item())
net.train()
print("Epoch: {}/{}...".format(e+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
# tester les données
test_losses = [] # track loss
num_correct = 0
# init hidden state
h = net.init_hidden(batch_size)
net.eval()
# iterate over test data
for inputs, labels in test_loader:
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
h = tuple([each.data for each in h])
# print("la valeur de h: \n",h)
inputs, labels = inputs.cpu(), labels.cpu()
# get predicted outputs
output, h = net(inputs, h)
# calculate loss
test_loss = criterion(output.squeeze(), labels.long())
test_losses.append(test_loss.item())
#print(test_loss.item())
# convert output probabilities to predicted class (0 or 1)
pred = torch.argmax(output,dim=1, keepdim=True)
# compare predictions to true label
correct_tensor = pred.eq(labels.long().view_as(pred))
correct = np.squeeze(correct_tensor.numpy())
num_correct += np.sum(correct)
# -- Statistiques! - ##
# avg test loss
print("Test perte: {:.3f}%".format(np.mean(test_losses)))
# précision sur toutes les données de test
test_acc = num_correct/len(test_loader.dataset)
print("Test de précision: {:.3f}%".format(test_acc*100))
def tokenize_review(test_review):
test_review = test_review.lower() # lowercase
# get rid of punctuation
test_text = ''.join([c for c in test_review if c not in punctuation])
# splitting by spaces
test_words = test_text.split()
# tokens
test_ints = []
test_ints.append([vocab_to_int[word] for word in test_words])
return test_ints
def predict(net, test_review, sequence_length=200):
net.eval()
# tokenize review
test_ints = tokenize_review(test_review)
# pad tokenized sequence
seq_length=sequence_length
features = pad_features(test_ints, seq_length)
# convert to tensor to pass into your model
feature_tensor = torch.from_numpy(np.asarray(features))
batch_size = feature_tensor.size(0)
# initialize hidden state
h = net.init_hidden(batch_size)
# get the output from the model
#feature_tensor = feature_tensor.type(torch.LongTensor)
output, h = net(feature_tensor, h)
# convert output probabilities to predicted class (0 or 1)
#pred = torch.argmax(output.squeeze(),dim=1)
pred = torch.argmax(output.squeeze())
# print custom response
if(pred.item()==1):
print("Positive review detected!")
elif(pred.item()==0):
print("Negative review detected.")
elif(pred.item()==2):
print("conflet review detected.")
else:
print("neutre review detected.")
test_review_neg="Very good quality and well made"
seq_length=400
predict(net, test_review_neg, seq_length)