How to restore the original sequence flatten by packed_pad_sequence?

Hi all,
I am a new pytorch user, I want to finsh a seq2seq model with pytorch.

def forward(self, input, len_list, encoder_lstm1_h_c=None, decoder_lstm1_h_c=None):
    _, idx_sort = torch.sort(len_list, dim=0, descending=True)
    _, idx_unsort = torch.sort(idx_sort, dim=0)
    input = input.index_select(0, Variable(idx_sort).cuda())
    len_list = list(len_list[idx_sort])

    input = nn_utils.rnn.pack_padded_sequence(input, len_list, batch_first=BATCH_FIRST)

    out_put_encoder_lstm1, encoder_lstm1_h_c = self.encoder_lstm_1(input, encoder_lstm1_h_c)
    out_put_decoder_lstm1, decoder_lstm1_h_c = self.decoder_lstm_1(out_put_encoder_lstm1, decoder_lstm1_h_c)

    out_put_padding = nn_utils.rnn.pad_packed_sequence(out_put_decoder_lstm1, batch_first=BATCH_FIRST)
    out_put_resorted = out_put_padding[0].index_select(0, Variable(idx_unsort).cuda())
    out_put = self.fc_1(out_put_resorted.view(-1, self.hidden_size_decoder_1*2))

    return out_put

my problems are:
(1) before forward function , I padding all of my sentence as the length of the longest sentence in corpus with number 0. Forward function return to me a local-longest in a batch, when I use nn.CrossEntropyLoss loss, could I use ignore_index = 0 and don’t masked again? If not , How to mask?
(2) In my test function, predictions and labels_batch_list are padding with number 0 , I try to ’ if one_word_tag[0] != 0:’ to mask, but there are some mistake,. Then I try to use packed_pad_sequence to restore the original sequence, but the result is a sequence flatten by this function, How can I restore it ?
(in my batch, there are 64 variable length sentences, they are mixtured by packed_pad_sequence)

for step, (batch_x, batch_lables, len_list) in enumerate(test_loader):
predictions_max_len = len_list.max()
labels_max_len = MAX_SENT_LEN
x_batch = Variable(batch_x).cuda()
labels_batch = batch_lables.cuda()
predictions = my_model.forward(x_batch, len_list)
predictions = torch.argmax(predictions, 1).view(-1, predictions_max_len, 1)

        labels_batch_list = labels_batch.tolist()
        predictions_list = predictions.tolist()
        assert (len(labels_batch_list) == len(predictions_list))
        for i in range(len(labels_batch_list)):
            t_tag = []
            p_tag = []
            result = []

            for one_word_tag in labels_batch_list[i]:
                if one_word_tag[0] != 0:
                    t_tag.append(tag_index2word_dic[one_word_tag[0]])
            for one_word_tag in predictions_list[i]:
                if one_word_tag[0] != 0:
                    p_tag.append(tag_index2word_dic[one_word_tag[0]])

            result.append(t_tag)
            result.append(p_tag)
            result.append(seg)
            testresult.append(result)