Hi,
I am someone who has recently migrated to PyTorch from Keras. I previously implemented a sequence classification model and got nearly 90% accuracy, which is pretty decent. However, when I implemented the same(?) model in PyTorch, the accuracy dropped down to 68%. I used exactly same layers and hyperparameters for both the models. It would be great if someone can point out where I am going wrong.
The following is the Keras model followed by the PyTorch model.
Keras
vocab_size = 20000
len_of_vec_embedding = 50
max_sequence_length = 40
hidden_dim = 100
output_dim = 101
model = Sequential()
model.add(Embedding(vocab_size, len_of_vec_embedding, input_length=max_sequence_length))
model.add(LSTM(hidden_dim, dropout=0.2, recurrent_dropout=0.2, input_shape=(max_sequence_length, len_of_vec_embedding)))
model.add(Dense(output_dim, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit_generator(generate_training('training_LSTM@500.csv'),steps_per_epoch=30300, epochs=10,workers=1, validation_data=generate_validation('validation_LSTM@500.csv'), validation_steps=10100)
PyTorch
HIDDEN_DIM = 100
OUTPUT_DIM = 101
EMBEDDING_DIM = 50
VOCAB_SIZE = len(vocabulary)
class ClassifierLSTM(nn.Module):
def __init__(self):
super(ClassifierLSTM, self).__init__()
self.word_embedding = nn.Embedding(VOCAB_SIZE, EMBEDDING_DIM)
self.lstm = nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, dropout=0.2)
self.hidden2output = nn.Linear(HIDDEN_DIM, OUTPUT_DIM)
self.hidden = self.init_hidden()
def init_hidden(self): # Initializes h_0 and c_0
return (autograd.Variable(torch.zeros(1, 1, HIDDEN_DIM)), autograd.Variable(torch.zeros(1, 1, HIDDEN_DIM)))
def forward(self, description):
embedding = self.word_embedding(description)
lstm_out, self.hidden = self.lstm(embedding.view(len(description), 1, -1), self.hidden)
output = self.hidden2output(lstm_out.view(len(description), -1)[-1]) # last LSTM out is fed to
return output.view(1,-1)
model = ClassifierLSTM()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)