can anyone guide me ,how to fix this transformer model… When i run, it is giving very crap results. I can not seem to figure out my mistake here. I have used same data with LSTM, and it gave good performance. Data is basically time series data(univariate). I have used differencing to make data stationary.
support_num = 1
start_num = 40
train_len = 15
pre_len = 8
dim_val = 512
n_heads = 8
n_decoder_layers = 6
n_encoder_layers = 6
dec_seq_len = pre_len # length of input given to decoder
enc_seq_len = train_len # length of input given to encoder
output_sequence_length = pre_len # target sequence length.
in_features_encoder_linear_layer = 2048
in_features_decoder_linear_layer = 2048
max_seq_len = train_len
class TimeSeriesTransformer(nn.Module):
def init(self,
input_size = support_num,
enc_seq_len=train_len,
dec_seq_len=pre_len,
batch_first=batch_first,
dim_val: int = 512,
n_encoder_layers: int = 6,
n_decoder_layers: int = 6,
n_heads: int = 8,
dropout_encoder: float = 0.2,
dropout_decoder: float = 0.2,
dropout_pos_enc: float = 0.1,
dim_feedforward_encoder: int = 2048,
dim_feedforward_decoder: int = 2048,
num_predicted_features: int = pre_len
):
super().__init__()
self.dec_seq_len = dec_seq_len
self.enc_seq_len = enc_seq_len
self.input_size = support_num
self.encoder_input_layer = nn.Linear(
in_features=support_num,
out_features=dim_val
)
self.decoder_input_layer = nn.Linear(
in_features=support_num,
out_features=dim_val
)
self.linear_mapping = nn.Linear(
in_features=dim_val,
out_features=support_num
)
# Create positional encoder of encoder
self.positional_encoding_layer = PositionalEncoder(
d_model=dim_val,
dropout=dropout_pos_enc
)
# Create positional encoder of decoder
self.positional_decoding_layer = PositionalEncoder(
d_model=dim_val,
dropout=dropout_pos_enc
)
encoder_layer = nn.TransformerEncoderLayer(
d_model=dim_val,
nhead=n_heads,
dim_feedforward=dim_feedforward_encoder,
dropout=dropout_encoder,
batch_first=batch_first
)
self.encoder = nn.TransformerEncoder(
encoder_layer=encoder_layer,
num_layers=n_encoder_layers,
norm=None
)
decoder_layer = nn.TransformerDecoderLayer(
d_model=dim_val,
nhead=n_heads,
dim_feedforward=dim_feedforward_decoder,
dropout=dropout_decoder,
batch_first=batch_first
)
self.decoder = nn.TransformerDecoder(
decoder_layer=decoder_layer,
num_layers=n_decoder_layers,
norm=None
)
def forward(self, src, tgt,src_mask,tgt_mask):
src = self.encoder_input_layer(src)
src = self.positional_encoding_layer(src)
src = self.encoder(src=src)
decoder_output = self.decoder_input_layer(tgt)
decoder_output = self.positional_decoding_layer(decoder_output)
# print(src.shape,decoder_output.shape)
# print(src_mask.shape,tgt_mask.shape)
decoder_output = self.decoder(
tgt=decoder_output,
memory=src,
tgt_mask=tgt_mask,
memory_mask=src_mask
)
decoder_output = self.linear_mapping(decoder_output)
return decoder_output
here is data set i used for it. i used differencing to make data stationary.
class LMDBDataSet(Dataset):
def init(self, sequence_length, start_num, support_num, pre_step, max_pressure=46, min_time=34992, max_time=39052):
self.db_path = '/home/aimen/PycharmProjects/HydraulicSupport_pressure/'
self.startSupportNum = start_num
self.supportNum = support_num
self.train_length = sequence_length
self.pre_step = pre_step
self.max_pressure = max_pressure
self.min_time = min_time
self.max_time = max_time
self.length = int(self.max_time) - int(self.min_time) - self.pre_step - self.train_length - 1
def __getitem__(self, index):
data=[]
for id in range(self.startSupportNum,
self.startSupportNum + self.supportNum): ## append the data for support id
env = lmdb.open(self.db_path + "support_id_lmdb" + str(id))
txn = env.begin(write=False)
temp = []
for sample_time in range(self.min_time, self.max_time):
data_value = txn.get(str(sample_time).encode())
# print(sample_time, data_value)
temp.append(np.float32(float(data_value)))
temp = np.array(temp)
temp = difference(temp, 1)
data.append(temp)
data=np.array(data)
data = standarizeData(data)
input_encoder = data[:, index : index + self.train_length]
input_decoder = data[:, index + self.train_length-1 : index + self.train_length+self.pre_step-1]
out_tgt = data[:,index + self.train_length : index + self.train_length+self.pre_step]
return input_encoder, input_decoder, out_tgt
def __len__(self):
return 500 #4000