Hello everyone,
I’m developing a classifier based on LSTM and I defined the model in this way:
class LSTMClassifier(nn.Module):
def __init__(self, input_size, seq_len, n_classes, hidden_size, device, dim_feedforward=1024, num_layers=1, dropout=0, bidirectional=False, batch_first=True):
super(LSTMClassifier, self).__init__()
self.hidden_size = hidden_size
self.seq_len = seq_len
self.input_size = input_size
self.num_layers = num_layers
self.device = device
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=batch_first)
self.classifier = nn.Linear(hidden_size, n_classes)
def init_hidden_state(self, x):
return (
torch.zeros(self.num_layers, x.size()[0], self.hidden_size).to(self.device),
torch.zeros(self.num_layers, x.size()[0], self.hidden_size).to(self.device)
)
def forward(self, x):
self.hidden = self.init_hidden_state(x)
out, (hn, cn) = self.lstm(x, self.hidden)
hn = hn.view(-1, self.hidden_size)
y = self.classifier(hn)
return y
It should be correct. If I create a model with hidden_size=64 and num_layers=1, the size of hn (before view) is [1,1,64] when I give in input 1 sample.
If I change num_layers=2, the size of hn (before view) is [2,1,64] because it concatenates the hidden state of the two stacked layers.
My doubt is: what is the order of hidden states in hn? The first element in hn is the hidden state of the first or of the last LSTM layer?
Is it correct to consider the last hidden state of the last LSTM layer to classify the sequences?
Thanks for your help.