PyTorch version: 1.1
I’m optimizing Decoder part of model.py at https://github.com/nvidia/tacotron2 using JIT.
@torch.jit.script
class DecoderOptions:
@torch.jit.script_method
def __init__(self, decoder, memory, mask):
B = memory.size(0)
MAX_TIME = memory.size(1)
self.attention_hidden = Variable(memory.data.new(
B, decoder.attention_rnn_dim).zero_())
self.attention_cell = Variable(memory.data.new(
B, decoder.attention_rnn_dim).zero_())
self.decoder_hidden = Variable(memory.data.new(
B, decoder.decoder_rnn_dim).zero_())
self.decoder_cell = Variable(memory.data.new(
B, decoder.decoder_rnn_dim).zero_())
self.attention_weights = Variable(memory.data.new(
B, MAX_TIME).zero_())
self.attention_weights_cum = Variable(memory.data.new(
B, MAX_TIME).zero_())
self.attention_context = Variable(memory.data.new(
B, decoder.encoder_embedding_dim).zero_())
self.memory = memory
self.processed_memory = decoder.attention_layer.memory_layer(memory)
self.mask = mask
class Decoder(torch.jit.ScriptModule):
def __init__(self, hparams):
# same to nvidia version
def get_go_frame(self, memory):
# same to nvidia version
def parse_decoder_inputs(self, decoder_inputs):
# same to nvidia version
def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
# same to nvidia version
@torch.jit.script_method
def decode(self, decoder_input, options: DecoderOptions):
# type: (Tensor, DecoderOptions) -> (Tuple[Tensor, Tensor, Tensor, DecoderOptions])
""" Decoder step using stored states, attention and memory
PARAMS
------
decoder_input: previous mel output
RETURNS
-------
mel_output:
gate_output: gate output energies
attention_weights:
"""
cell_input = torch.cat((decoder_input, options.attention_context), -1)
options.attention_hidden, options.attention_cell = self.attention_rnn(
cell_input, (options.attention_hidden, options.attention_cell))
options.attention_hidden = F.dropout(
options.attention_hidden, self.p_attention_dropout, self.training)
attention_weights_cat = torch.cat(
(options.attention_weights.unsqueeze(1),
options.attention_weights_cum.unsqueeze(1)), dim=1)
options.attention_context, options.attention_weights = self.attention_layer(
options.attention_hidden, options.memory, options.processed_memory,
attention_weights_cat, options.mask)
options.attention_weights_cum += options.attention_weights
decoder_input = torch.cat(
(options.attention_hidden, options.attention_context), -1)
options.decoder_hidden, options.decoder_cell = self.decoder_rnn(
decoder_input, (options.decoder_hidden, options.decoder_cell))
options.decoder_hidden = F.dropout(
options.decoder_hidden, self.p_decoder_dropout, self.training)
decoder_hidden_attention_context = torch.cat(
(options.decoder_hidden, options.attention_context), dim=1)
decoder_output = self.linear_projection(
decoder_hidden_attention_context)
gate_prediction = self.gate_layer(decoder_hidden_attention_context)
return decoder_output, gate_prediction, options.attention_weights, options
def forward(self, memory, decoder_inputs, memory_lengths):
""" Decoder forward pass for training
PARAMS
------
memory: Encoder outputs
decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs
memory_lengths: Encoder output lengths for attention masking.
RETURNS
-------
mel_outputs: mel outputs from the decoder
gate_outputs: gate outputs from the decoder
alignments: sequence of attention weights from the decoder
"""
decoder_input = self.get_go_frame(memory).unsqueeze(0)
decoder_inputs = self.parse_decoder_inputs(decoder_inputs)
decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0)
decoder_inputs = self.prenet(decoder_inputs)
options = DecoderOptions(self, memory, ~get_mask_from_lengths(memory_lengths))
mel_outputs, gate_outputs, alignments = [], [], []
while len(mel_outputs) < decoder_inputs.size(0) - 1:
decoder_input = decoder_inputs[len(mel_outputs)]
mel_output, gate_output, attention_weights, options = self.decode(
decoder_input, options)
mel_outputs += [mel_output.squeeze(1)]
gate_outputs += [gate_output.squeeze()]
alignments += [attention_weights]
mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
mel_outputs, gate_outputs, alignments)
return mel_outputs, gate_outputs, alignments
@torch.jit.script_method
def inference(self, memory, options: DecoderOptions):
""" Decoder inference
PARAMS
------
memory: Encoder outputs
RETURNS
-------
mel_outputs: mel outputs from the decoder
gate_outputs: gate outputs from the decoder
alignments: sequence of attention weights from the decoder
"""
decoder_input = self.get_go_frame(memory)
mel_outputs, gate_outputs, alignments = [], [], []
run_more = True
while run_more:
decoder_input = self.prenet(decoder_input)
mel_output, gate_output, alignment, options = self.decode(decoder_input, options)
mel_outputs += [mel_output.squeeze(1)]
gate_outputs += [gate_output]
alignments += [alignment]
if torch.sigmoid(gate_output.data) > self.gate_threshold:
run_more = False
elif len(mel_outputs) == self.max_decoder_steps:
print("Warning! Reached max decoder steps")
run_more = False
decoder_input = mel_output
mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
mel_outputs, gate_outputs, alignments)
return mel_outputs, gate_outputs, alignments
and I call the decoder in Tacotron2.inference with this line:
mel_outputs, gate_outputs, alignments = self.decoder.inference(
encoder_outputs, DecoderOptions(self.decoder, encoder_outputs))
But the compiler says:
RuntimeError:
Tried to access to nonexistent attribute attention_context. Did you forget to initialize it in __init__()?:
------
decoder_input: previous mel output
RETURNS
-------
mel_output:
gate_output: gate output energies
attention_weights:
"""
cell_input = torch.cat((decoder_input, options.attention_context), -1)
~~~~~~~~~~~~~~~~~ <--- HERE
options.attention_hidden, options.attention_cell = self.attention_rnn(
cell_input, (options.attention_hidden, options.attention_cell))
options.attention_hidden = F.dropout(
options.attention_hidden, self.p_attention_dropout, self.training)
attention_weights_cat = torch.cat(
(options.attention_weights.unsqueeze(1),
options.attention_weights_cum.unsqueeze(1)), dim=1)
options.attention_context, options.attention_weights = self.attention_layer(
I exactly initialized options.attention_context at DecoderOptions.init(). Why this error occurs?
Thanks!