Hi. I am trying to create my first LSTM model for timeseries forecasting.
There is things that I don’t really understand.
Is it true that one LSTM cell created and input applied sequential to it sample by sample?
And num_layers controls how many LSTM cells created?
Why we using hidden state as output instead of actual output?
Am i correctly extracting hidden state of last layer? (hn[self.num_layers-1])
Any advises for my code? Am I doing something wrong?
class ShallowRegressionLSTM(nn.Module):
def __init__(self, num_features, hidden_units, num_layers, device):
super().__init__()
self.num_features = num_features # this is the number of features
self.hidden_units = hidden_units
self.num_layers = num_layers
self.device = device
self.lstm = nn.LSTM(
input_size=num_features,
hidden_size=hidden_units,
batch_first=True,
num_layers=self.num_layers
)
self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)
def forward(self, x):
# Reset hidden state for current batch
batch_size = x.shape[0]
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units, device=self.device).requires_grad_()
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units, device=self.device).requires_grad_()
# Do forward pass
_, (hn, _) = self.lstm(x, (h0, c0))
out = self.linear(hn[self.num_layers-1]).flatten() # First dim of Hn is num_layers
return out