Hi,
I have a *.csv file with time-series data that I want to load in a custom dataset and then use dataloader to get batches of data for an LSTM model.
I’m struggling to get the batches together with the sequence size.
This is the code that I have so far. I’m not even sure if I suppose to do it this way:
class CMAPSSDataset(Dataset):
def __init__(self, csv_file, sep=' ', sequence_length=40):
self.df_cmapss = pd.read_csv(csv_file, sep=sep)
self.df_data = self.df_cmapss.iloc[:, 3:27]
self.targets = self.df_cmapss['RUL']
self.sequence_length = sequence_length
def __len__(self):
return len(self.df_data)
def __getitem__(self, idx):
if (idx+self.sequence_length) > len(self.df_data):
indexes = list(range(idx, len(self.df_data)))
else:
indexes = list(range(idx, idx + self.sequence_length))
data = self.df_data.iloc[indexes, :].values
target = self.targets.iloc[indexes].values
return torch.tensor(data), torch.tensor(target)
cmapss_dataset = {x: CMAPSSDataset(csv_file='data/CMAPSSData/'+x+'_FD001.csv', sep=' ')
for x in ['train', 'test']}
batch_size = 32
dataloaders = {x: DataLoader(cmapss_dataset[x], batch_size=batch_size,
num_workers=0, pin_memory=True)
for x in ['train', 'test']}
I get the following error when I iterate through the dataloader:
RuntimeError: stack expects each tensor to be equal size, but got [40, 24] at entry 0 and [39, 24] at entry 16
Any help would be appreciated.