GRU Autoencoder is not working

I create a multi-decoder autoencoder using GRU. The model consists of one encoder and two decoders, and both of them use GRU.

The source code of model is

class EncoderRNN(nn.Module):
    def __init__(self, input_size, emb_size, hidden_size, n_layers, bidirectional):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.emb_size = emb_size
        self.hidden_size = int(hidden_size / (2 if bidirectional else 1))
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        
        self.embedding = nn.Embedding(self.input_size, self.emb_size, padding_idx=0)
        self.rnn = nn.GRU(self.emb_size, self.hidden_size, bidirectional=self.bidirectional, num_layers=self.n_layers)
        
    def forward(self, input_seqs, input_lens):        
        embedded = self.embedding(input_seqs)
        packed = pack_padded_sequence(embedded, input_lens)
        outputs, hidden = self.rnn(packed)
        outputs, output_lengths = pad_packed_sequence(outputs)
        
        if self.bidirectional:
            hidden = torch.cat([hidden[0:hidden.size(0):2], hidden[1:hidden.size(0):2]], 2)
            
        return outputs, hidden
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, emb_size, output_size, n_layers, dropout_p):
        super(DecoderRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.emb_size = emb_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        
        self.embedding = nn.Embedding(self.output_size, self.emb_size, padding_idx=0)
        self.rnn = nn.GRU(self.emb_size, self.hidden_size, num_layers=self.n_layers, dropout=dropout_p)
    
    def forward(self, input_seqs, hidden):
        embedded = self.embedding(input_seqs)
        output, hidden = self.rnn(embedded, hidden)
        return output, hidden
class Net(nn.Module):
    def __init__(self, model_type, encoder, decoder0, decoder1):
        super(Net, self).__init__()
        
        self.type = model_type
        self.encoder = encoder
        self.decoder0 = decoder0
        self.decoder1 = decoder1
        self.generator = None
        
        self.choose_decoder = lambda dec_idx: decoder0 if dec_idx == 0 else decoder1
        
    def forward(self, indices, lengths, dec_idx, only_enc=False):
        # Encode
        enc_output, enc_hidden = self.encoder(indices[1:], lengths-1)
        if only_enc:
            return enc_hidden.squeeze(dim=0)
        
        # Decode
        assert dec_idx == 0 or dec_idx == 1
        
        decoder_outputs, dec_state = self.choose_decoder(dec_idx)(indices, enc_hidden)
        decoded = self.generator(dec_outputs)
        
        return decoded

The generator of Net class is initialized in the model builder function. And it is normally linear layer from hidden size of decoder to vocab size

And train function for my model is

def train_ae(dec_idx, batch, temp=1):
    model.train()
    optimizer_ae.zero_grad()
    
    source, target, lengths = batch
    source = source.to(device)
    target = target.to(device)
    lengths = lengths.to(device)
    
    output = model(source, lengths, dec_idx)    
    output = output.view(-1, vocab_size)
    recon_loss = F.cross_entropy(output, target, ignore_index=0)
        
    recon_loss.backward()
    
    torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
    optimizer_ae.step()
    
    return recon_loss.item()

But losses per epoch decrease very slowly (I think it doesn’t work well), and it makes not good result on generation.

losses something like
ep1 : 15184.654
ep2 : 14797.090
ep3 : 14740.160
ep4 : 14703.133

I trained other recurrent autoencoder model, and its losses decreased very well with same model condition.

I solve this problem.

Target tensor was wrong.