Why the result of my model are all same?

The design of my model are shown follow:
CharLSTM:
class CharLSTM(nn.Module):

def __init__(self, config):
    super(CharLSTM, self).__init__()
    self.config = config
    self.char_embedding = nn.Embedding(self.config.char_num, self.config.char_dim)
    self.char_lstm = nn.LSTM(self.config.char_dim, self.config.char_hidden_dim, batch_first=True)

def forward(self, x):
    x = x.unsqueeze(0)  # (1, char_num)
    x = Variable(x)
    if self.config.transfer_if_gpu:
        x = x.cuda()
    x = self.char_embedding(x)  # (char_num, char_dim)
    out, (h, _) = self.char_lstm(x)  # (1, 1, char_hidden)
    return h

WordLSTM
class DLSA(nn.Module):

def __init__(self, config):
    super(DLSA, self).__init__()
    self.config = config
    self.char_lstm = CharLSTM(self.config)
    self.word_lstm = nn.LSTM(self.config.char_hidden_dim, self.config.transfer_word_hidden_dim, batch_first=True)
    self.linear = nn.Linear(self.config.transfer_word_hidden_dim, self.config.transfer_label_num)

def forward(self, sent, label):
    word_vectors = torch.FloatTensor()
    for word in sent:
        char_list = torch.LongTensor(word)  # (char_num,)
        char_hidden = self.char_lstm(char_list)  # (1, char_num, char_hidden)
        char_hidden = char_hidden.squeeze(0)  # (1, char_hidden)
        word_vectors = torch.cat((word_vectors, char_hidden), 0)  # (word_num, char_hidden)
    input = Variable(word_vectors)
    if self.config.transfer_if_gpu:
        input = input.cuda()
    input = input.unsqueeze(0)
    _, (h, _) = self.word_lstm(input)  # (1, 1, word_hidden)
    h = h.view(-1)  # (word_hidden, )
    output = self.linear(h)  # (5,)
    scores = F.softmax(output)  # (5,)
    cls_loss = -1 * torch.log(scores[label])  # 0-dim, scale
    return cls_loss

train
model = DLSA(config)
if config.transfer_if_gpu:
model.cuda()

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = create_optimizer(parameters, config.transfer_optimizer, config.transfer_learning_rate, config.transfer_weight_decay)

for e in range(config.transfer_epochs):
print "Epoch: ", e + 1
model.train()
if e % config.transfer_adjust_every == 0:
adjust_learning_rate(optimizer, e, config.transfer_learning_rate, config.transfer_adjust_every)

print len(train_batch)
for sentence_list in train_batch:
    optimizer.zero_grad()
    if len(sentence_list) == 0:
        continue
    for sent, label in sentence_list:
        cls_loss = model(sent, label)
        cls_loss.backward()
    # torch.nn.utils.clip_grad_norm_(model.parameters(), config.transfer_clip_norm, norm_type=2)
    optimizer.step()

f1 = evaluate_dev(trial_batch, model)
print "Trial f1: ", f1
if f1 > best_f1:
    best_f1 = f1
    best_model = copy.deepcopy(model)

print “Finish with best dev f1 {0}”.format(best_f1)
evaluate_test(test_batch, best_model)

I got the resullt shown follow


I don’t know where is my mistake, Can you help me

optimizer.step() should be inside loop as below:

for sent, label in sentence_list:
        cls_loss = model(sent, label)
        cls_loss.backward()
        optimizer.step()

The parameters of the char_lstm and char_embedding keep unchange during the training. Why the autograd mechanism can’t affect these parameters? But, the parameters of the self.linear have change during the training

It can’t resolve my problem when i put the optimizer.step() inside the loop