I create a multi-decoder autoencoder using GRU. The model consists of one encoder and two decoders, and both of them use GRU.
The source code of model is
class EncoderRNN(nn.Module):
def __init__(self, input_size, emb_size, hidden_size, n_layers, bidirectional):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.emb_size = emb_size
self.hidden_size = int(hidden_size / (2 if bidirectional else 1))
self.n_layers = n_layers
self.bidirectional = bidirectional
self.embedding = nn.Embedding(self.input_size, self.emb_size, padding_idx=0)
self.rnn = nn.GRU(self.emb_size, self.hidden_size, bidirectional=self.bidirectional, num_layers=self.n_layers)
def forward(self, input_seqs, input_lens):
embedded = self.embedding(input_seqs)
packed = pack_padded_sequence(embedded, input_lens)
outputs, hidden = self.rnn(packed)
outputs, output_lengths = pad_packed_sequence(outputs)
if self.bidirectional:
hidden = torch.cat([hidden[0:hidden.size(0):2], hidden[1:hidden.size(0):2]], 2)
return outputs, hidden
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, emb_size, output_size, n_layers, dropout_p):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.emb_size = emb_size
self.output_size = output_size
self.n_layers = n_layers
self.dropout_p = dropout_p
self.embedding = nn.Embedding(self.output_size, self.emb_size, padding_idx=0)
self.rnn = nn.GRU(self.emb_size, self.hidden_size, num_layers=self.n_layers, dropout=dropout_p)
def forward(self, input_seqs, hidden):
embedded = self.embedding(input_seqs)
output, hidden = self.rnn(embedded, hidden)
return output, hidden
class Net(nn.Module):
def __init__(self, model_type, encoder, decoder0, decoder1):
super(Net, self).__init__()
self.type = model_type
self.encoder = encoder
self.decoder0 = decoder0
self.decoder1 = decoder1
self.generator = None
self.choose_decoder = lambda dec_idx: decoder0 if dec_idx == 0 else decoder1
def forward(self, indices, lengths, dec_idx, only_enc=False):
# Encode
enc_output, enc_hidden = self.encoder(indices[1:], lengths-1)
if only_enc:
return enc_hidden.squeeze(dim=0)
# Decode
assert dec_idx == 0 or dec_idx == 1
decoder_outputs, dec_state = self.choose_decoder(dec_idx)(indices, enc_hidden)
decoded = self.generator(dec_outputs)
return decoded
The generator of Net class is initialized in the model builder function. And it is normally linear layer from hidden size of decoder to vocab size
And train function for my model is
def train_ae(dec_idx, batch, temp=1):
model.train()
optimizer_ae.zero_grad()
source, target, lengths = batch
source = source.to(device)
target = target.to(device)
lengths = lengths.to(device)
output = model(source, lengths, dec_idx)
output = output.view(-1, vocab_size)
recon_loss = F.cross_entropy(output, target, ignore_index=0)
recon_loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
optimizer_ae.step()
return recon_loss.item()
But losses per epoch decrease very slowly (I think it doesn’t work well), and it makes not good result on generation.
losses something like
ep1 : 15184.654
ep2 : 14797.090
ep3 : 14740.160
ep4 : 14703.133
I trained other recurrent autoencoder model, and its losses decreased very well with same model condition.