Hello guys, I’m trying to implement my LSTM model from Keras to Pytorch, but the results in Pytorch seem really bad at the moment. The network is really simple as below.
model = Sequential()
model.add(LSTM(10, input_length=shape[1], input_dim=shape[2]))
# output shape: (1, 1)
model.add(Dense(10,activation="tanh"))
model.add(Dense(10,activation="tanh"))
model.add(Dense(10,activation="tanh"))
model.add(Dense(10,activation="tanh"))
model.add(Dense(1,activation="linear"))
model.compile(loss="mse", optimizer="adam")
model.summary()
And I migrate it to the Pytorch framework,
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim,bilstm=False):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.isBi = bilstm
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True,bidirectional=bilstm).double()
# for name, param in self.lstm.named_parameters():
# if name.startswith("weight"):
# nn.init.orthogonal_(param)
# else:
# pass
self.fc1 = nn.Sequential(nn.Linear(hidden_dim, 10).double(),nn.Tanh())
self.final_layer1 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer2 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer3 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer4 = nn.Sequential(nn.Linear(10,output_dim).double())
def forward(self, x):
out, (hn, cn) = self.lstm(x)
out = out[:, -1, :]
out = self.fc1(out)
out = self.final_layer1(out)
out = self.final_layer2(out)
out = self.final_layer3(out)
out = self.final_layer4(out)
return out
The result is really bad. I was wondering if the initializing methods/activation functions used in Keras is different from the one I used in Pytorch(Keras seems to be using hard_sigmoid where Pytorch uses sigmoid?).
Would be really appreciate if somebody could help me with this problem!