Hello, I am training a TemporalFusionTransformer
from the pytorch-forecasting library. My code is as follows:
max_prediction_length = len(test)
max_encoder_length = 4*max_prediction_length
# training_cutoff = df_19344_tmp["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
train.loc[:, train.columns != 'date'],
time_idx='time_idx',
target='occupancy',
group_ids=['property_id'],
min_encoder_length=1,
max_encoder_length=max_encoder_length,
min_prediction_length=1,
max_prediction_length=max_prediction_length,
static_categoricals=['property_id'],
static_reals=[],
time_varying_known_categoricals=[],
time_varying_known_reals=['time_idx', '7-bookings', '14-bookings', 'sin_day', 'cos_day', 'sin_month', 'cos_month', 'sin_year', 'cos_year'],
time_varying_unknown_categoricals=[],
time_varying_unknown_reals=['occupancy'],
target_normalizer=GroupNormalizer(
groups=['property_id'], transformation="softplus"
),
# lags=['7-bookings', '14-bookings'],
add_relative_time_idx=True,
add_target_scales=True,
add_encoder_length=True,
allow_missing_timesteps=True
)
validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
batch_size = 32 # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size*2, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
test_time_series_data_set = TimeSeriesDataSet.from_dataset(training, test.loc[:, test.columns != 'date'] , predict=True, stop_randomization=True)
test_dataloader = test_time_series_data_set.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
trainer = pl.Trainer(
max_epochs=100,
# accelerator='gpu',
# devices=1,
enable_model_summary=True,
auto_lr_find=False,
# clipping gradients is a hyperparameter and important to prevent divergance
# of the gradient for recurrent neural networks
gradient_clip_val=0.1,
check_val_every_n_epoch=1,
# logger=
# device=device
)
tft = TemporalFusionTransformer.from_dataset(
training,
# not meaningful for finding the learning rate but otherwise very important
learning_rate=0.0001,
hidden_size=8, # most important hyperparameter apart from learning rate
# number of attention heads. Set to up to 4 for large datasets
attention_head_size=1,
dropout=0.1, # between 0.1 and 0.3 are good values
hidden_continuous_size=8, # set to <= hidden_size
output_size=7, # 7 quantiles by default
loss=QuantileLoss(),
)
My code breaks when I am trying to create the test_time_series_data_set
.
The entire error is
/opt/conda/lib/python3.7/site-packages/pytorch_forecasting/data/timeseries.py:1244: UserWarning: Min encoder length and/or min_prediction_idx and/or min prediction length and/or lags are too large for 1 series/groups which therefore are not present in the dataset index. This means no predictions can be made for those series. First 10 removed groups: [{'__group_id__property_id': '19344'}]
UserWarning,
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
/tmp/ipykernel_20/99987403.py in <module>
36 val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
37
---> 38 test_time_series_data_set = TimeSeriesDataSet.from_dataset(training, test.loc[:, test.columns != 'date'] , predict=True, stop_randomization=True)
39 test_dataloader = test_time_series_data_set.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
40
/opt/conda/lib/python3.7/site-packages/pytorch_forecasting/data/timeseries.py in from_dataset(cls, dataset, data, stop_randomization, predict, **update_kwargs)
1111 """
1112 return cls.from_parameters(
-> 1113 dataset.get_parameters(), data, stop_randomization=stop_randomization, predict=predict, **update_kwargs
1114 )
1115
/opt/conda/lib/python3.7/site-packages/pytorch_forecasting/data/timeseries.py in from_parameters(cls, parameters, data, stop_randomization, predict, **update_kwargs)
1156 parameters.update(update_kwargs)
1157
-> 1158 new = cls(data, **parameters)
1159 return new
1160
/opt/conda/lib/python3.7/site-packages/pytorch_forecasting/data/timeseries.py in __init__(self, data, time_idx, target, group_ids, weight, max_encoder_length, min_encoder_length, min_prediction_idx, min_prediction_length, max_prediction_length, static_categoricals, static_reals, time_varying_known_categoricals, time_varying_known_reals, time_varying_unknown_categoricals, time_varying_unknown_reals, variable_groups, constant_fill_strategy, allow_missing_timesteps, lags, add_relative_time_idx, add_target_scales, add_encoder_length, target_normalizer, categorical_encoders, scalers, randomize_length, predict_mode)
437
438 # create index
--> 439 self.index = self._construct_index(data, predict_mode=predict_mode)
440
441 # convert to torch tensor for high performance data loading later
/opt/conda/lib/python3.7/site-packages/pytorch_forecasting/data/timeseries.py in _construct_index(self, data, predict_mode)
1246 assert (
1247 len(df_index) > 0
-> 1248 ), "filters should not remove entries all entries - check encoder/decoder lengths and lags"
1249
1250 return df_index
AssertionError: filters should not remove entries all entries - check encoder/decoder lengths and lags
The shapes of my train
and test
dataframes are:
train.shape = (1041, 12)
test.shape = (55, 12)
What is more interesting, is that this was code that was working. I am 100% I did not change anything and it stopped working on it’s won. I’ve seen answers like this and this where it is said that your min_encoder_length
and min_prediction_length
should be small so that any time series in the dataset can be longer than the sum of the previous attributes. Can anybody please explain this more in depth?