Transformer for time series forcasting

can anyone guide me ,how to fix this transformer model… When i run, it is giving very crap results. I can not seem to figure out my mistake here. I have used same data with LSTM, and it gave good performance. Data is basically time series data(univariate). I have used differencing to make data stationary.

support_num = 1
start_num = 40
train_len = 15
pre_len = 8

dim_val = 512
n_heads = 8
n_decoder_layers = 6
n_encoder_layers = 6
dec_seq_len = pre_len # length of input given to decoder
enc_seq_len = train_len # length of input given to encoder
output_sequence_length = pre_len # target sequence length.

in_features_encoder_linear_layer = 2048
in_features_decoder_linear_layer = 2048
max_seq_len = train_len

class TimeSeriesTransformer(nn.Module):
def init(self,
input_size = support_num,
enc_seq_len=train_len,
dec_seq_len=pre_len,
batch_first=batch_first,
dim_val: int = 512,
n_encoder_layers: int = 6,
n_decoder_layers: int = 6,
n_heads: int = 8,
dropout_encoder: float = 0.2,
dropout_decoder: float = 0.2,
dropout_pos_enc: float = 0.1,
dim_feedforward_encoder: int = 2048,
dim_feedforward_decoder: int = 2048,
num_predicted_features: int = pre_len
):

    super().__init__()

    self.dec_seq_len = dec_seq_len
    self.enc_seq_len = enc_seq_len
    self.input_size = support_num

    self.encoder_input_layer = nn.Linear(
        in_features=support_num,
        out_features=dim_val
    )

    self.decoder_input_layer = nn.Linear(
        in_features=support_num,
        out_features=dim_val
    )

    self.linear_mapping = nn.Linear(
        in_features=dim_val,
        out_features=support_num
    )
    # Create positional encoder of encoder
    self.positional_encoding_layer = PositionalEncoder(
        d_model=dim_val,
        dropout=dropout_pos_enc
    )
    # Create positional encoder of decoder
    self.positional_decoding_layer = PositionalEncoder(
        d_model=dim_val,
        dropout=dropout_pos_enc
    )
    encoder_layer = nn.TransformerEncoderLayer(
        d_model=dim_val,
        nhead=n_heads,
        dim_feedforward=dim_feedforward_encoder,
        dropout=dropout_encoder,
        batch_first=batch_first
    )
    self.encoder = nn.TransformerEncoder(
        encoder_layer=encoder_layer,
        num_layers=n_encoder_layers,
        norm=None
    )
    decoder_layer = nn.TransformerDecoderLayer(
        d_model=dim_val,
        nhead=n_heads,
        dim_feedforward=dim_feedforward_decoder,
        dropout=dropout_decoder,
        batch_first=batch_first
    )
    self.decoder = nn.TransformerDecoder(
        decoder_layer=decoder_layer,
        num_layers=n_decoder_layers,
        norm=None
    )

def forward(self, src, tgt,src_mask,tgt_mask):

    src = self.encoder_input_layer(src)
    src = self.positional_encoding_layer(src)

    src = self.encoder(src=src)

    decoder_output = self.decoder_input_layer(tgt)

    decoder_output = self.positional_decoding_layer(decoder_output)

    # print(src.shape,decoder_output.shape)
    # print(src_mask.shape,tgt_mask.shape)


    decoder_output = self.decoder(
        tgt=decoder_output,
        memory=src,
        tgt_mask=tgt_mask,
        memory_mask=src_mask
    )

    decoder_output = self.linear_mapping(decoder_output)

    return decoder_output

here is data set i used for it. i used differencing to make data stationary.

class LMDBDataSet(Dataset):
def init(self, sequence_length, start_num, support_num, pre_step, max_pressure=46, min_time=34992, max_time=39052):

    self.db_path = '/home/aimen/PycharmProjects/HydraulicSupport_pressure/'
    self.startSupportNum = start_num
    self.supportNum = support_num
    self.train_length = sequence_length
    self.pre_step = pre_step
    self.max_pressure = max_pressure
    self.min_time = min_time
    self.max_time = max_time
    self.length = int(self.max_time) - int(self.min_time) - self.pre_step - self.train_length - 1

def __getitem__(self, index):
    data=[]
    for id in range(self.startSupportNum,
                    self.startSupportNum + self.supportNum):  ## append the data for support id
        env = lmdb.open(self.db_path + "support_id_lmdb" + str(id))
        txn = env.begin(write=False)
        temp = []
        for sample_time in range(self.min_time, self.max_time):
            data_value = txn.get(str(sample_time).encode())
            # print(sample_time, data_value)
            temp.append(np.float32(float(data_value)))
        temp = np.array(temp)
        temp = difference(temp, 1)

    data.append(temp)
    data=np.array(data)
  
    data = standarizeData(data)

    input_encoder = data[:, index : index + self.train_length]
    input_decoder = data[:, index + self.train_length-1 : index + self.train_length+self.pre_step-1]
    out_tgt = data[:,index + self.train_length : index + self.train_length+self.pre_step]

    return input_encoder, input_decoder, out_tgt

def __len__(self):
    return 500 #4000

please someone take a look, i have not been able to find mistake here.[quote=“Aimen_Malik, post:2, topic:164818, full:true”]
please someone take a look, i have not been able to find mistake here
[/quote]