Large errors in initial time steps while using Seq2Seq for time series prediction

Hi,

I am trying to use a Seq2Seq model for time series prediction (prediction of some target time series based on other time series). While I managed to get overall good results, for some reason, my model has a poor prediction performance in the initial timesteps. In the figure below, you can see the predictions vs target for 5 of the output time series:

I am not sure, but I suspect that I might be doing something wrong regarding hidden state initialization. Here’s the piece of code I am using for Seq2Seq.

class Encoder(nn.Module):
    def __init__(self, config):
        super(Encoder, self).__init__()

        self.input_size = config.input_size
        self.hidden_size = config.gru_hidden_size
        self.output_size = config.output_size
        self.n_layers = config.n_layers
        self.seq_len = config.seq_len
        self.config = config

        bidirectional = True
        self.lstm = nn.GRU(self.input_size, config.gru_hidden_size, config.n_layers, bidirectional=bidirectional)

    def forward(self, inputs):
        inputs = inputs.permute(1,0,2)
        output, hidden = self.lstm(inputs)
        return hidden


class Decoder(nn.Module):
    def __init__(self, config):
        super(Decoder, self).__init__()

        self.input_size = config.input_size
        self.hidden_size = config.gru_hidden_size
        self.output_size = config.output_size
        self.n_layers = config.n_layers
        self.seq_len = config.seq_len
        self.config = config

        bidirectional = True
        self.lstm = nn.GRU(self.output_size, config.gru_hidden_size, config.n_layers, bidirectional=bidirectional)
        if bidirectional:
            self.fc = nn.Linear(2*config.gru_hidden_size, config.output_size)
        else:
            self.fc = nn.Linear(config.gru_hidden_size, config.output_size)

    def forward(self, input, hidden):

        input = input.unsqueeze(0)
        output, hidden = self.lstm(input, hidden)
        output = self.fc(output)
        return output, hidden


class Seq2Seq(nn.Module):
    model_name = "Seq2Seq"
    
    def __init__(self, config):
        super(Seq2Seq, self).__init__()

        self.encoder = Encoder(config)
        self.decoder = Decoder(config)

    def forward(self, inputs, targets):

        seq_len = targets.shape[1]

        hidden = self.encoder(inputs)
        preds = torch.zeros(targets.shape)

        input = targets[:,0,:]
        preds[:,0,:] = input

        for t in range(1,seq_len):
            output, hidden = self.decoder(input, hidden)
            preds[:,t,:] = output
            input = output.squeeze(0)
            
        return preds

Does anyone know what I am missing?