PyTorch empty() errors out

insipidintegrator · June 14, 2024, 2:54pm

I am writing a Variational Recurrent AutoEncoder based model for stock returns prediction but just before instantiating the model, have stumbled upon a roadblock. The error thrown is:

empty(): argument ‘size’ failed to unpack the object at pos 2 with error “type must be tuple of ints,but got tuple”

The Encoder model constructor is:

def convert_to_dotdict(d):
    if isinstance(d, dict):
        return DotDict({k: convert_to_dotdict(v) for k, v in d.items()})
    elif isinstance(d, list):
        return [convert_to_dotdict(i) for i in d]
    else:
        return d


class Encoder(nn.Module):

    def __init__(self, params: DotDict):
        super(Encoder, self).__init__()

        self.params = params

        self.batch_size = self.params.batch_size
        self.num_stocks = int(self.params.num_stocks)
        self.num_factors = self.params.num_factors
        self.num_layers = self.params.num_layers
        self.hidden_size = self.params.hidden_size
        self.num_lags = self.params.num_lags
        self.dropout = self.params.dropout

        self.rnn = nn.LSTM(
            input_size=self.num_stocks,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            dropout=self.dropout,
            batch_first=True
        )
        print("Hi") #sanity check
        self.relu = nn.ReLU()
        self.mu = nn.Linear(self.hidden_size, self.num_factors)
        self.mu_activation = nn.ReLU()
        self.log_sigma = nn.Linear(self.hidden_size, self.num_factors)
        self.log_sigma_activation = nn.ReLU()

with the Decoder model constructor being VERY similar, except maybe a fully connected layer or projection matrix here and there.

The VRAE module is:

class RVAE(nn.Module):
    def __init__(self, encoder_params:dict, decoder_params:dict):
        super(RVAE, self).__init__()
        self.encoder = Encoder(encoder_params)
        self.decoder = Decoder(decoder_params)

        self.weight_matrix = nn.Parameter(torch.randn(hyperparams.FACTOR_NETWORK.NUM_FACTORS,
                                                      hyperparams.DATA.NUM_STOCKS),
                                          requires_grad=True)
        self.mu = None
        self.logvar = None
        
    def sample(mu : list, logvar : list):
        assert len(mu) == len(logvar)

        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def forward(self, x):
        mu, logvar = self.encoder(x)
        self.mu = mu
        self.logvar = logvar
        z = self.sample(mu, logvar)
        ft = self.decoder(z)
        return np.matmul(self.weight_matrix, ft)

When I run model = RVAE(encoder_params, decoder_params), then I get the mentioned error. Note that DotDict is just a utility class to convert a dictionary into a dot-accessible form.

I tried searching up the docs on torch.empty() and trying various combinations of inputs to the function to try to break it. Interestingly, torch.empty(2, 2), torch.empty((2, 2)), torch.empty([2, 2]) all seem to work but torch.empty((2, (2, 2)) gives the same exact error, which is reasonable based on the contents of the error message.
After this I tried searching up the source code of torch.empty(), but couldn’t find anything reliable and whatever I found had no mention of raising this error.

But my problem is that I have very explicitly converted the self.num_stocks to int() but still the problem occurs. Your help will be of much use to me. Thanks!

NOTE: hyperparams is an external JSON file which I am reading and converting into DotDict.

ptrblck · June 14, 2024, 3:36pm

Could you post a minimal and executable code snippet reproducing the issue, please?

insipidintegrator · June 14, 2024, 5:51pm

class DotDict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

def convert_to_dotdict(d):
    if isinstance(d, dict):
        return DotDict({k: convert_to_dotdict(v) for k, v in d.items()})
    elif isinstance(d, list):
        return [convert_to_dotdict(i) for i in d]
    else:
        return d


import torch
import torch.nn as nn
import torch.nn.functional as F


def convert_to_dotdict(d):
    if isinstance(d, dict):
        return DotDict({k: convert_to_dotdict(v) for k, v in d.items()})
    elif isinstance(d, list):
        return [convert_to_dotdict(i) for i in d]
    else:
        return d


class Encoder(nn.Module):

    def __init__(self, params: DotDict):
        super(Encoder, self).__init__()

        self.params = params

        self.batch_size = self.params.batch_size
        self.num_stocks = int(self.params.num_stocks)
        self.num_factors = self.params.num_factors
        self.num_layers = self.params.num_layers
        self.hidden_size = self.params.hidden_size
        self.num_lags = self.params.num_lags
        self.dropout = self.params.dropout

        self.rnn = nn.LSTM(
            input_size=self.num_stocks,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            dropout=self.dropout,
            batch_first=True
        )
        print("Hi")
        self.relu = nn.ReLU()
        self.mu = nn.Linear(self.hidden_size, self.num_factors)
        self.mu_activation = nn.ReLU()
        self.log_sigma = nn.Linear(self.hidden_size, self.num_factors)
        self.log_sigma_activation = nn.ReLU()

    def forward(self, x):
        out, (hn, cn) = self.rnn(x)
        out = self.relu(out)

        mu = self.mu(out)
        mu = self.mu_activation(mu)

        log_sigma = self.log_sigma(out)
        log_sigma = self.log_sigma_activation(log_sigma)

        return mu, log_sigma



class Decoder(nn.Module):

    def __init__(self, params:DotDict):
        super(Decoder, self).__init__()

        self.params = params

        self.batch_size = self.params.batch_size
        self.num_stocks = self.params.num_stocks
        self.num_factors = self.params.num_factors
        self.num_layers = self.params.num_layers
        self.hidden_size = self.params.hidden_size
        self.num_lags = self.params.num_lags
        self.dropout = self.params.dropout

        self.tanh_h0 = nn.Tanh()
        self.rnn = nn.LSTM(
                           input_size = self.num_stocks,
                           hidden_size = self.hidden_size,
                           num_layers = self.num_layers,
                           dropout = self.dropout,
                           batch_first = True
                        )
        self.relu = nn.ReLU()
        self.fc = nn.Linear(self.hidden_size, self.num_factors)
        self.sigmoid = nn.Sigmoid()



    def forward(self, x, z):
        z = self.tanh_h0(z)

        out, (hn, cn)= self.rnn(x, (z, z))
        out = self.relu(out)

        out = self.fc(out)
        out = self.sigmoid(out)

        return out


class RVAE(nn.Module):
    def __init__(self, encoder_params:dict, decoder_params:dict):
        super(RVAE, self).__init__()
        self.encoder = Encoder(encoder_params)
        self.decoder = Decoder(decoder_params)

        self.weight_matrix = nn.Parameter(torch.randn(hyperparams.FACTOR_NETWORK.NUM_FACTORS,
                                                      hyperparams.DATA.NUM_STOCKS),
                                          requires_grad=True)
        self.mu = None
        self.logvar = None

    def sample(mu : list, logvar : list):
        assert len(mu) == len(logvar)

        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        mu, logvar = self.encoder(x)
        self.mu = mu
        self.logvar = logvar
        z = self.sample(mu, logvar)
        ft = self.decoder(z)
        return np.matmul(self.weight_matrix, ft)


hyperparams = convert_to_dotdict({
	"DATA" : {
		"WINDOW_SIZE" : 10,
		"TRAIN_SIZE" : 0.8,
		"TEST_SIZE" : 0.2,
		"NUM_STOCKS" : 25
	},

	"TRAINING" : {
		"BATCH_SIZE" : 64,
		"LEARNING_RATE" : 0.01,
		"MOMENTUM" : 0.95,
		"LR_DECAY" : 0.95,
		"LAMBDA" : 0.1,
		"NUM_EPOCHS" : 10000
	},

	"FACTOR_NETWORK" : {
		"ENCODER":{
			"TYPE" : "LSTM",
			"PARAMS" : {
				"NUM_LAYERS" : 1,
				"INPUT_SIZE" : [10, 25],
				"HIDDEN_SIZE" : 25,
				"BIAS" : True,
				"DROPOUT_PROB" : 0.0,
				"NUM_LAGS" : 10
			}
		},

		"DECODER":{
			"TYPE" : "LSTM",
			"PARAMS" : {
				"NUM_LAYERS" : 1,
				"INPUT_SIZE" : [10, 25],
				"HIDDEN_SIZE" : 25,
				"BIAS" : True,
				"DROPOUT_PROB" : 0.0,
				"NUM_LAGS" : 10
			}
		},

		"NUM_FACTORS" : 6
	}
})


encoder_params = convert_to_dotdict({
        "batch_size": hyperparams.TRAINING.BATCH_SIZE,
        "num_stocks": hyperparams.DATA.NUM_STOCKS,
        "num_lags": hyperparams.DATA.WINDOW_SIZE,
        "num_factors": hyperparams.FACTOR_NETWORK.NUM_FACTORS,
        "hidden_size": hyperparams.FACTOR_NETWORK.ENCODER.PARAMS.HIDDEN_SIZE,
        "dropout": hyperparams.FACTOR_NETWORK.ENCODER.PARAMS.DROPOUT_PROB,
        "num_layers": hyperparams.FACTOR_NETWORK.ENCODER.PARAMS.NUM_LAYERS
})

decoder_params = convert_to_dotdict({
        "batch_size": hyperparams.TRAINING.BATCH_SIZE,
        "num_stocks": hyperparams.DATA.NUM_STOCKS,
        "num_lags": hyperparams.DATA.WINDOW_SIZE,
        "num_factors": hyperparams.FACTOR_NETWORK.NUM_FACTORS,
        "hidden_size": hyperparams.FACTOR_NETWORK.DECODER.PARAMS.HIDDEN_SIZE,
        "dropout": hyperparams.FACTOR_NETWORK.DECODER.PARAMS.DROPOUT_PROB,
        "num_layers": hyperparams.FACTOR_NETWORK.DECODER.PARAMS.NUM_LAYERS
})

model = RVAE(encoder_params, decoder_params)
model.to(device='cuda' if torch.cuda.is_available() else 'cpu')

When I write everything like this in a single file it works. However, for modularity I had moved Encoder, Decoder and hyperparams into 3 different files. That is when it gives the error. Here are the files if you need them:
utils.py
Encoder