Training loss is not changing at all while training model

I’m trying to solve a VQA classification problem. my training loss is not changing at all while training the model.

I put in comment the CNN model and try to run it with the text only, but still, no change in the loss function.

I pass through those models:

class question_lstm(nn.Module):

    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout, output_dim, que_size):
        super(question_lstm, self).__init__()
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.tanh = nn.Tanh()
        self.lstm = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.dropout = nn.Dropout(dropout)
        #self.fc1=nn.Linear(n_layers*hid_dim,que_size)
        self.fc1=nn.Linear(n_layers*output_dim,que_size)

    def forward(self, question):
      emb_question=self.embedding(question) #(batchsize, input_dim, emb_dim=256)
      emb_question=self.dropout(emb_question)
      emb_question=self.tanh(emb_question)
      emb_question = emb_question.transpose(0, 1)  #(input_dim, batchsize, emb_dim)
      output, (hidden, cell) = self.lstm(emb_question) 
      qu_feature = torch.cat((hidden, cell), dim=2)               
      qu_feature = qu_feature.transpose(0, 1) #(batchsize=100, num_layer=2, hid_dim=2048)
      question_output =self.fc1(qu_feature)    
      return question_output
class vqamodel(nn.Module):

  def __init__(self, output_dim,input_dim, emb_dim, hid_dim, n_layers, dropout, answer_len, que_size,):

    super(vqamodel,self).__init__()
    #self.image=img_CNN(img_size,image_feature)
    self.question=question_lstm(input_dim, emb_dim, hid_dim, n_layers, dropout,output_dim,que_size)
    self.tanh=nn.Tanh()
    self.relu=nn.ReLU()
    self.dropout=nn.Dropout(dropout)
    self.fc1=nn.Linear(que_size,output_dim) 
    self.fc2=nn.Linear(output_dim,answer_len)

  def forward(self, image, question):
    question_emb=self.question(question)
    combine =question_emb   #*img_emb
    out_feature=self.fc1(combine) #(batchsize=100, output_dim=2048)
    out_feature=self.relu(out_feature) 
    out_feature=self.dropout(out_feature)
    out_feature=self.fc2(out_feature) #(batchsize=100, answer_len=1000)
    return (out_feature)

I’m using cross entropy loss and Adam:

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vqa_model.parameters(),lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

any idea what can cause this constant loss value?