Hello, i need help in building a time series classification model using LSTM.
I followed the exact same step on a tutorial on how to build a time series classification model using pytorch. I used the same version of library which is torch 1.8.1 and pytorch-lightning 1.2.6. The classes i used are these
class DHPDataset(Dataset):
def __init__(self, sequences):
self.sequences = sequences
def __len__(self):
return len(self.sequences)
def __getitem__(self, idx):
sequence, label = self.sequences[idx]
return dict(
sequence = torch.Tensor(sequence.to_numpy()),
label = torch.Tensor(label).long()
)
class DHPDataModule(pl.LightningDataModule):
def __init__(self, train_sequences, test_sequences, batch_size):
super().__init__()
self.train_sequences = train_sequences
self.test_sequences = test_sequences
self.batch_size = batch_size
def setup(self, stage=None):
self.train_dataset = DHPDataset(self.train_sequences)
self.test_dataset = DHPDataset(self.test_sequences)
def train_dataloader(self):
return DataLoader(
self.train_dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=0,
pin_memory=True
)
def val_dataloader(self):
return DataLoader(
self.test_dataset,
batch_size=self.batch_size,
shuffle=False,
num_workers=0,
pin_memory=True
)
def test_dataloader(self):
return DataLoader(
self.test_dataset,
batch_size=self.batch_size,
shuffle=False,
num_workers=0,
pin_memory=True
)
N_EPOCHS = 250
BATCH_SIZE = 64
data_module = DHPDataModule(train_sequences, test_sequences, BATCH_SIZE)
class DHPModel(nn.Module):
def __init__(self, n_features, n_classes, n_hidden=256, n_layers=3):
super().__init__()
self.n_hidden = n_hidden
self.lstm = nn.LSTM(
input_size=n_features,
hidden_size=n_hidden,
num_layers=n_layers,
batch_first=True,
dropout=0.75
)
self.classifier = nn.Linear(n_hidden, n_classes)
def forward(self, x):
self.lstm.flatten_parameters()
_, (hidden, _) = self.lstm(x)
out = hidden[-1]
return self.classifier(out)
class DHPPredictor(pl.LightningModule):
def __init__(self, n_features: int, n_classes: int):
super().__init__()
self.model = DHPModel(n_features, n_classes)
self.criterion = nn.CrossEntropyLoss()
def forward(self, x, labels=None):
output = self.model(x)
loss = 0
if labels is not None:
loss = self.criterion(output, torch.argmax(labels.squeeze(), dim=1))
return loss, output
def training_step(self, batch, batch_idx):
sequences = batch['sequence']
labels = batch['label']
loss, outputs = self(sequences, labels)
predictions = torch.argmax(outputs, dim=1)
step_accuracy = accuracy(predictions, torch.argmax(labels.squeeze(), dim=1))
self.log('train_loss', loss, prog_bar=True, logger=True)
self.log('train_accuracy', step_accuracy, prog_bar=True, logger=True)
return {'loss': loss, 'accuracy': step_accuracy}
def validation_step(self, batch, batch_idx):
sequences = batch['sequence']
labels = batch['label']
loss, outputs = self(sequences, labels)
predictions = torch.argmax(outputs, dim=1)
step_accuracy = accuracy(predictions, torch.argmax(labels.squeeze(), dim=1))
self.log('val_loss', loss, prog_bar=True, logger=True)
self.log('val_accuracy', step_accuracy, prog_bar=True, logger=True)
return {'loss': loss, 'accuracy': step_accuracy}
def test_step(self, batch, batch_idx):
sequences = batch['sequence']
labels = batch['label']
loss, outputs = self(sequences, labels)
predictions = torch.argmax(outputs, dim=1)
step_accuracy = accuracy(predictions, torch.argmax(labels.squeeze(), dim=1))
self.log('test_loss', loss, prog_bar=True, logger=True)
self.log('test_accuracy', step_accuracy, prog_bar=True, logger=True)
return {'loss': loss, 'accuracy': step_accuracy}
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.0001)
model = DHPPredictor(
n_features=len(featurecolumns),
n_classes=len(le.classes_)
)
trainer = pl.Trainer(
logger=logger,
checkpoint_callback=checkpoint_callback,
max_epochs=N_EPOCHS,
gpus=0,
progress_bar_refresh_rate=30
)
trainer.fit(model, data_module)
However when i tried to fit my trainer with model and data_module, it showed a runtime error
RuntimeError: stack expects each tensor to be equal size, but got [3] at entry 0 and [0] at entry 1
the data module which was made by DHPDatamodule used train_sequences and test_sequences which is shaped like this:
the DischargePresure, VibrationX, … etc. is supossed to be the features and the number after [… columns x … columns] is supposed to be the target