TemporalFusionTransformer passing whole module to torch.empty()

torch version 2.0.1+cu117
pytorch_forecasting version 1.0.0
pytorch_lightning version 2.0.7
[Using the CPU for testing]

I am trying to set up a TFT model but I get the following error traceback:

Traceback (most recent call last):
File “ShortTrain2.py”, line 105, in
model = TFT(
File “/home/jl/.local/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/init.py”, line 248, in init
self.static_context_variable_selection = GatedResidualNetwork(
File “/home/jl/.local/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py”, line 205, in init
self.fc1 = nn.Linear(self.input_size, self.hidden_size)
File “/home/jl/.local/lib/python3.8/site-packages/torch/nn/modules/linear.py”, line 97, in init
self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
TypeError: empty() received an invalid combination of arguments - got (tuple, dtype=NoneType, device=NoneType), but expected one of:

  • (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
  • (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)

I’ve traced this back to the line in the TFT init.py in the static encoder section where the input_size, hidden_size and output_size are all set to self.hparams.hidden_size but instead of a size tuple or whatever being passed, the object passed is the whole TimeSeriesDataSet module with a length parameter.

I’m sure this is an input issue with a misunderstanding but I would welcome some direction. Here is the code:

import os
import sys
import logging
import math
import warnings
import pandas as pd
import numpy as np
import torch
import pytorch_lightning as pl
import pytorch_forecasting as pf
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping,LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting import Baseline,TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer as TFT
from pytorch_forecasting.data import MultiNormalizer,TorchNormalizer
print(“python version”,sys.version)
print(“pandas version”,pd.version)
print(“numpy version”,np.version)
print(“torch version”,torch.version)
print(“pytorch_forecasting version”,pf.version)
print(“pytorch_lightning version”,pl.version)
pd.options.display.max_rows = 100
os.environ[“CUDA_VISIBLE_DEVICES”] = “”
logging.getLogger(“pytorch_lightning”).setLevel(logging.WARNING)
Train=pd.read_csv(“ShortTrain2.csv”)
Train[Train.select_dtypes(include=[‘float64’]).columns] = Train[Train.select_dtypes(include=[‘float64’]).columns].astype(‘float32’)
aminx=
for i in Train.minx:
aminx.append(“A”+str(math.floor(i/12)))
aminx=pd.DataFrame(aminx)
aminx.columns=[‘aminx’]
Train=pd.concat((Train,aminx),axis=1)
print(1,Train.dtypes)
predictors=1
if len(sys.argv) > 1 :
predictors = 2
xtarget,xtarget_normalizer,xtime_varying_known_reals,xtime_varying_unkown_reals=None,None,None,None
xtime_varying_known_categoricals=[‘aminx’]
if predictors == 1:
print(“Using a single predictor v1_R”)

single predictor

xtarget=['v1_R']
xtarget_normalizer=MultiNormalizer([TorchNormalizer()])
xtime_varying_known_reals=['v1_Act']
xtime_varying_unknown_reals=['v1_C','v1_D','v1_V','v1_S','v1_H','v1_L','v1_R_lag10','v1_R_lag20']

else:
print(“Using 2 predictors v1_R and v2_R”)

2 predictors

xtarget=['v1_R','v2_R']
xtarget_normalizer=MultiNormalizer([TorchNormalizer(),TorchNormalizer()])
xtime_varying_known_reals=['v1_Act','v2_Act']
xtime_varying_unknown_reals=['v1_C','v1_D','v1_V','v1_S','v1_H','v1_L','v1_R_lag10','v1_R_lag20','v2_C','v2_D','v2_V','v2_S','v2_H','v2_L','v2_R_lag10','v2_R_lag20']

print(xtarget)
print(xtarget_normalizer)
print(xtime_varying_known_reals)
print(xtime_varying_unknown_reals)
print(2,Train)
training = TimeSeriesDataSet(
data=Train,
add_encoder_length=True,
add_relative_time_idx=True,
add_target_scales=True,
allow_missing_timesteps=True,
group_ids=[‘Group’],
max_encoder_length=10,
max_prediction_length=20,
min_encoder_length=5,
min_prediction_length=1,

time_idx=‘unique’,

time_idx='time_idx',
static_categoricals=[], # Categorical features that do not change over time - list
static_reals=[], # Continuous features that do not change over time - list
time_varying_known_categoricals=xtime_varying_known_categoricals, # Known in the future - list
time_varying_unknown_categoricals=[],
target=xtarget,
target_normalizer=xtarget_normalizer,
time_varying_known_reals=xtime_varying_known_reals,
time_varying_unknown_reals=xtime_varying_unknown_reals

)
validation = TimeSeriesDataSet.from_dataset(training, Train, predict=False, stop_randomization=True)
print(“Passed validation”)
train_dataloader = training.to_dataloader(train=True, batch_size=16, num_workers=32,shuffle=False)
print(“Created dataloaders - show first batch”)
warnings.filterwarnings(“ignore”)
print(“Start train_dataloader comparison with baseline”)
device = torch.device(“cpu”)
print(“actuals calculated”)
Baseline().to(device)
baseline_predictions = Baseline().predict(train_dataloader)
baseline_predictions = torch.cat([b.clone().detach() for b in baseline_predictions])
print(“baseline_predictions calculated”)
val_loss=1e6
early_stop_callback = EarlyStopping(monitor=“val_loss”, min_delta=1e-4, patience=10, verbose=False, mode=“min”)
lr_logger = LearningRateMonitor() # log the learning rate
logger = TensorBoardLogger(“lightning_logs”) # logging results to a tensorboard
trainer = Trainer(
callbacks=[lr_logger, early_stop_callback],
enable_model_summary=True,
gradient_clip_val=0.1,
limit_train_batches=10, # comment in for training, running valiation every 30 batches
logger=logger
)
model = TFT(
training,
attention_head_size=1,
dropout=0.1,
hidden_continuous_size=8,
learning_rate=0.03, # 1
log_interval=10,
lstm_layers=1, # could be interesting
output_size=1, # number of predictors
reduce_on_plateau_patience=4,
)
print(f"\nNumber of parameters in network: {model.size()/1e3:.1f}k\n")

I can reproduce the issue by explicitly passing None to dtype and device:

a = tuple((2, 3))
torch.empty(a, dtype=None, device=None)
# TypeError: empty() received an invalid combination of arguments - got (tuple, device=NoneType, dtype=NoneType), but expected one of:
#  * (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
#  * (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)

Your code is unfortunately not properly formatted and not executable so I’m unsure if this is the same root cause causing your code to fail.

Thanks @ptrblck for looking.

I’d been staring at this for some time now wondering whether it was one or more of the arguments, were they correctly specified etc, was there a bug that I’d tripped over, should I move the problem to tensorflow … I even posted it as an issue on github as I hadn’t found this list!

But the solution was there all the time staring me in the face!

The model instantiation should be:

model = TFT.from_dataset( … }.

I forgot the ‘.from_dataset’. I’m new to this game so bear with me. :blush: