Why the result of my model are all same?

ShunxinXiao · October 17, 2018, 12:30pm

The design of my model are shown follow:
CharLSTM:
class CharLSTM(nn.Module):

def __init__(self, config):
    super(CharLSTM, self).__init__()
    self.config = config
    self.char_embedding = nn.Embedding(self.config.char_num, self.config.char_dim)
    self.char_lstm = nn.LSTM(self.config.char_dim, self.config.char_hidden_dim, batch_first=True)

def forward(self, x):
    x = x.unsqueeze(0)  # (1, char_num)
    x = Variable(x)
    if self.config.transfer_if_gpu:
        x = x.cuda()
    x = self.char_embedding(x)  # (char_num, char_dim)
    out, (h, _) = self.char_lstm(x)  # (1, 1, char_hidden)
    return h

WordLSTM
class DLSA(nn.Module):

def __init__(self, config):
    super(DLSA, self).__init__()
    self.config = config
    self.char_lstm = CharLSTM(self.config)
    self.word_lstm = nn.LSTM(self.config.char_hidden_dim, self.config.transfer_word_hidden_dim, batch_first=True)
    self.linear = nn.Linear(self.config.transfer_word_hidden_dim, self.config.transfer_label_num)

def forward(self, sent, label):
    word_vectors = torch.FloatTensor()
    for word in sent:
        char_list = torch.LongTensor(word)  # (char_num,)
        char_hidden = self.char_lstm(char_list)  # (1, char_num, char_hidden)
        char_hidden = char_hidden.squeeze(0)  # (1, char_hidden)
        word_vectors = torch.cat((word_vectors, char_hidden), 0)  # (word_num, char_hidden)
    input = Variable(word_vectors)
    if self.config.transfer_if_gpu:
        input = input.cuda()
    input = input.unsqueeze(0)
    _, (h, _) = self.word_lstm(input)  # (1, 1, word_hidden)
    h = h.view(-1)  # (word_hidden, )
    output = self.linear(h)  # (5,)
    scores = F.softmax(output)  # (5,)
    cls_loss = -1 * torch.log(scores[label])  # 0-dim, scale
    return cls_loss

train
model = DLSA(config)
if config.transfer_if_gpu:
model.cuda()

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = create_optimizer(parameters, config.transfer_optimizer, config.transfer_learning_rate, config.transfer_weight_decay)

for e in range(config.transfer_epochs):
print "Epoch: ", e + 1
model.train()
if e % config.transfer_adjust_every == 0:
adjust_learning_rate(optimizer, e, config.transfer_learning_rate, config.transfer_adjust_every)

print len(train_batch)
for sentence_list in train_batch:
    optimizer.zero_grad()
    if len(sentence_list) == 0:
        continue
    for sent, label in sentence_list:
        cls_loss = model(sent, label)
        cls_loss.backward()
    # torch.nn.utils.clip_grad_norm_(model.parameters(), config.transfer_clip_norm, norm_type=2)
    optimizer.step()

f1 = evaluate_dev(trial_batch, model)
print "Trial f1: ", f1
if f1 > best_f1:
    best_f1 = f1
    best_model = copy.deepcopy(model)

print “Finish with best dev f1 {0}”.format(best_f1)
evaluate_test(test_batch, best_model)

I got the resullt shown follow

I don’t know where is my mistake, Can you help me

alwynmathew · October 17, 2018, 1:25pm

optimizer.step() should be inside loop as below:

for sent, label in sentence_list:
        cls_loss = model(sent, label)
        cls_loss.backward()
        optimizer.step()

ShunxinXiao · October 17, 2018, 2:18pm

The parameters of the char_lstm and char_embedding keep unchange during the training. Why the autograd mechanism can’t affect these parameters? But, the parameters of the self.linear have change during the training

ShunxinXiao · October 17, 2018, 2:26pm

It can’t resolve my problem when i put the optimizer.step() inside the loop