When i run the same code for the dataset given on pytorch_forecasting the stallion_dataset, the code works fine but after making some adjustments for my dataset , i am able to create the Time series dataset but when i try to find the learning rate i get error: Element 0 of tensors does not require grad and does not have grad_fn.

```
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = data_check1["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
data_check1[lambda x: x.time_idx <= training_cutoff],
time_idx="time_idx",
target="Sales",
group_ids=["sku"],
min_encoder_length=max_encoder_length // 2, # keep encoder length long (as it is in the validation set)
max_encoder_length=max_encoder_length,
min_prediction_length=1,
max_prediction_length=max_prediction_length,
static_categoricals=["Material", "sku",'Color','CATEGORY_LEVEL_3'],
static_reals=["PRICE"],
time_varying_known_categoricals=["month"],
#variable_groups={"special_days": special_days}, # group of categorical variables can be treated as one variable
time_varying_known_reals=["time_idx"],
time_varying_unknown_categoricals=[],
time_varying_unknown_reals=[
"Sales",
"log_Sales",
"avg_sales_by_sku"
],
target_normalizer=GroupNormalizer(
groups=["sku"], transformation="softplus"
), # use softplus and normalize by group
categorical_encoders={
'sku':pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True)
},
add_relative_time_idx=True,
add_target_scales=True,
add_encoder_length=True,
allow_missing_timesteps=True
)
# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(training, data_check1, predict=True, stop_randomization=True)
# create dataloaders for model
batch_size = 128 # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
# I Configure the network and trainer , I dont know how would i check where the error is or how to declare require_grad=True
pl.seed_everything(42)
trainer = pl.Trainer(
gpus=0,
# clipping gradients is a hyperparameter and important to prevent divergance
# of the gradient for recurrent neural networks
gradient_clip_val=0.1,
)
tft = TemporalFusionTransformer.from_dataset(
training,
# not meaningful for finding the learning rate but otherwise very important
learning_rate=0.03,
hidden_size=16, # most important hyperparameter apart from learning rate
# number of attention heads. Set to up to 4 for large datasets
attention_head_size=1,
dropout=0.1, # between 0.1 and 0.3 are good values
hidden_continuous_size=8, # set to <= hidden_size
output_size=7, # 7 quantiles by default
loss=QuantileLoss(),
# reduce learning rate if no improvement in validation loss after x epochs
reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
# I use tuner.lr_find to find the optimal learning rate but this gives error
res = trainer.tuner.lr_find(
tft,
train_dataloaders=train_dataloader,
val_dataloaders=val_dataloader,
max_lr=10.0,
min_lr=1e-6,
)
print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()
```