Hi all,
Please find current implementation of LSTM classifier I am using below:
class LSTMClassifier(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size):
super(LSTMClassifier, self).__init__()
self.hidden_dim = hidden_dim
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
self.hidden2label = nn.Linear(hidden_dim, label_size)
self.hidden = self.init_hidden()
def init_hidden(self):
# the first is the hidden h
# the second is the cell c
return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim)),
autograd.Variable(torch.zeros(1, 1, self.hidden_dim)))
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
x = embeds.view(len(sentence), 1, -1)
lstm_out, self.hidden = self.lstm(x, self.hidden)
y = self.hidden2label(lstm_out[-1])
log_probs = F.log_softmax(y)
return log_probs
During training the network, I am detaching the hidden state from it’s history on last batch.
def train(..., ...):
...
...
for batch in batches:
model.hidden = model.init_hidden()
...
...
However, I want to learn the params in the initial hidden state. What’s the proper way to make the initial hidden state learnable during training?