Hi,
I’m currently building an lstm for the UCI human activity dataset:
https://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+Using+Smartphones
I’m using Pytorch lightning but that isn’t what my question is about.
I have the input data for my LSTM in the form (batch_size, sequence_length, num_features), which in this case is (16, 128, 9).
I thought, given that the batch is the first dimension of my input, I had to set batch_first=True; however, when I do this the loss never decreases. I’m also achieving a relatively low accuracy (68% compared to 89% of other networks) after training for 20 epochs.
Here is my code:
class LSTM(pl.LightningModule):
def __init__(self, input_size=9, hidden_size=100, seq_len=128, num_layers=1, output_size=6):
super().__init__()
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
self.fc = nn.Linear(hidden_size, output_size)
# self.hidden_cell = (torch.zeros(num_layers, batch_size, hidden_size),
# torch.zeros(num_layers, batch_size, hidden_size))
self.hidden_size = hidden_size
self.num_layers = num_layers
self.seq_len = seq_len
self.relu = nn.ReLU()
self.dropout = nn.Dropout()
self.soft = nn.Softmax()
def forward(self, x):
lstm_out, self.hidden = self.lstm(x)
lstm_out = self.dropout(lstm_out[:,-1,:])
predictions = self.soft(self.fc(self.relu(lstm_out)))
return predictions
def training_step(self, batch, batch_idx):
X, y = batch
logits = self.forward(X)
criterion = nn.CrossEntropyLoss()
loss = criterion(logits, y.view(-1))
return loss
def test_step(self, batch, batch_idx):
X, y = batch
logits = self.forward(X)
total = 0
correct = 0
for i in range(len(logits)):
pred = logits[i].argmax(dim=0, keepdim=True)
# print(pred)
if (pred[0] == y[i]):
correct += 1
total += 1
metrics = {'correct': correct, 'total': total}
return metrics
def test_epoch_end(self, outputs):
correct = sum([x['correct'] for x in outputs])
total = sum([x['total'] for x in outputs])
print(100*correct/total)
return {'overall_accuracy': 100*correct/total}
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=1e-3)
The LSTM is initialised as:
LSTM(input_size=9, hidden_size=100, seq_len=128, num_layers=2)
Any guidance would be really appreciated, thanks