I read some lstm examples and I found something confused me.
class LSTM(nn.Module):
def __init__(self,input_feature_dim,hidden_feature_dim,hidden_layer_num,batch_size,output_dim=10, use_cuda=False, dropout=0):
super(LSTM, self).__init__()
self.input_feature_dim=input_feature_dim
self.hidden_feature_dim=hidden_feature_dim
self.hidden_layer_num=hidden_layer_num
self.batch_size=batch_size
self.use_cuda=use_cuda
self.lstm=nn.LSTM(input_feature_dim,hidden_feature_dim,
hidden_layer_num, dropout=dropout).double()
self.linear1=nn.Linear(hidden_layer_num*hidden_feature_dim, output_dim).double()
def init_hidden(self):
return (t.zeros(self.hidden_layer_num, self.batch_size, self.hidden_feature_dim),
t.zeros(self.hidden_layer_num, self.batch_size, self.hidden_feature_dim))
def forward_v1(self,input):
batchSize = input.size(1)
h0 = t.zeros(self.hidden_layer_num, batchSize, self.hidden_feature_dim).double()
c0 = t.zeros(self.hidden_layer_num, batchSize, self.hidden_feature_dim).double()
_, hn = self.lstm(input, (h0, c0))
hn = hn[0].view(batchSize, self.hidden_layer_num*self.hidden_feature_dim)
output = self.linear1(hn)
return output
def forward_v2(self,input):
batchSize = input.size(1)
h0 = t.zeros(self.hidden_layer_num, batchSize, self.hidden_feature_dim).double()
c0 = t.zeros(self.hidden_layer_num, batchSize, self.hidden_feature_dim).double()
lstm_output, hn = self.lstm(input, (h0, c0))
output = self.linear1(lstm_out[-1].view(self.batch_size, -1))
return output
What I confuse is I saw two version of forward like above:
forward_v1
and forward_v2
.
They pass different output of lstm into next full-layer
I don’t know which one is appropriate. Is there any difference?