Rnn model accuracy not improving

Hello, I am using character level RNN to classify the names. I am basically using same example given in official documentation of Pytorch, but minor modification.
However, overall accuracy is not improving (like it is jumping between 10 to 30%), and I can’t figure out the reason.
Here is module class:

class Model(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        
        self.in_size = input_size
        self.h_size = hidden_size
        self.o_size = output_size
        
        # input to hidden layer
        self.i2h = nn.Linear(input_size+hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size+hidden_size, output_size)
        
    def forward(self, inp, hidden):
        
        combined = torch.cat((inp, hidden), 1)
        hidden_unit = self.i2h(combined)
        output_unit = F.log_softmax(self.i2o(combined), dim=1)
        
        return output_unit, hidden_unit
        
    def initialize_hidden_layer(self):
        
        return Variable(torch.zeros(1, self.h_size))```

This is how my training function look like:

def train_model(model, epoch, features, labels):
  count = 0
  accuracy = 0
  total_loss = 0
  for (feature, target) in zip(features, labels):
    
    hidden = model.initialize_hidden_layer()
    class_index = [i for i in range(len(classes_names)) if target==classes_names[i]][0]
    class_tensor = Variable(torch.LongTensor([class_index]))
    word_tensor = Variable(word_to_tensor(feature))
    for i in range(word_tensor.size()[0]):
        predicted_output, hidden = model(word_tensor[i], hidden)
    
    loss = criterion(predicted_output, class_tensor)
    predicted_label = find_label(predicted_output)
    total_loss += loss.data.item()
    
    if predicted_label[0] == target:
        accuracy += 1
        
    optimizer.zero_grad()
    
    #backpropagation
    loss.backward()
    
    #update params
    optimizer.step()
    count += 1
    
print ("count ", count)
print ("accurate predictions ", accuracy)
print ("total accuracy ", (accuracy/len(features)*100))
print ("total loss ", total_loss)


def get_all_examples():

  labels = []
  features = []
  for k, v in classes_dict.items():
    
    for name in v:
        labels.append(k)
        features.append(name)
        
   return features, labels
features, labels = get_all_examples()
shuffle_list = list(zip(features, labels))
random.shuffle(shuffle_list)
features, labels = zip(*shuffle_list)
model.train()
for epoch in range(10):
   train_model(model, epoch, features, labels)

Other auxiliary functions are same as present in the documentation.

  1. What criterion are you using?
  2. See if using an LSTM instead of vanilla RNN helps.

Thanks for your response.
I tried using SGD and Adam, but there is no such improvement. However, I am going to try using LSTM and will post here, if I face any problems :slight_smile:

is deprecated: Automatic differentiation package - torch.autograd — PyTorch 2.1 documentation
I think neither class_tensor and word_tensor need requires_grad to be set to True.