I tried to compare a simple IMDB sentiment analysis implementation between PyTorch and Keras and found that the two give quite different test accuracy results:
Test Accuracy:
- PyTorch v1.9.1: ~66%
- Keras v2.4.1: ~77%
I would have expected to get approximately similar results.
The model consists of an Embedding layer, followed by a Flatten and a linear projection to a single output score. The two implementations are given below.
Any ideas if I am doing something wrong, or why there is such a large gap?
Here is a Colab notebook
PyTorch Model
import sys
import numpy as np
import torch
import torch.nn.functional as F
from tensorflow.keras import preprocessing
from tensorflow.keras.datasets import imdb
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
batch_size = 512
learning_rate = 0.001
maxlen = 20
vocab_size = 10000
embedding_dim = 16
output_dim = 1
n_epochs = 10
padding_idx = 0
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)
x_train = preprocessing.sequence.pad_sequences(train_data, maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(test_data, maxlen=maxlen)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
train_data = list(zip(x_train, y_train))
test_data = list(zip(x_test, y_test))
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
class CBOWFlattenNet(nn.Module):
def __init__(self, vocab_size, embedding_dim, output_dim, padding_idx, maxlen):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
self.flatten = nn.Flatten(start_dim=-2, end_dim=-1)
self.fc = nn.Linear(embedding_dim * maxlen, output_dim)
def forward(self, batch):
embedded = self.embedding(batch)
flattened = self.flatten(embedded)
score = self.fc(flattened)
score = score.squeeze(dim=1)
score = F.sigmoid(score)
return score
model = CBOWFlattenNet(vocab_size, embedding_dim, output_dim, padding_idx, maxlen)
loss_function = nn.BCELoss()
optimizer = optim.RMSprop(
model.parameters(),
lr=learning_rate,
momentum=0.0,
alpha=0.9,
eps=1e-7,
centered=False,
weight_decay=0.0,
)
def get_accuracy_bce(prediction, label):
batch_size = prediction.shape[0]
predicted_classes = prediction > 0.5
correct_predictions = (predicted_classes == label).sum()
accuracy = correct_predictions / batch_size
return accuracy
for epoch in range(n_epochs):
epoch_train_losses = []
epoch_train_accs = []
epoch_val_losses = []
epoch_val_accs = []
model.train()
for sentences, labels in tqdm(train_dataloader, desc='training...', file=sys.stdout):
model.zero_grad()
score = model(sentences)
loss = loss_function(score, labels)
loss.backward()
optimizer.step()
acc = get_accuracy_bce(score, labels)
epoch_train_losses.append(loss.item())
epoch_train_accs.append(acc.item())
model.eval()
with torch.no_grad():
for sentences, labels in tqdm(test_dataloader, desc='evaluating...', file=sys.stdout):
score = model(sentences)
loss = loss_function(score, labels)
acc = get_accuracy_bce(score, labels)
epoch_val_losses.append(loss.item())
epoch_val_accs.append(acc.item())
epoch_train_loss = np.mean(epoch_train_losses)
epoch_train_acc = np.mean(epoch_train_accs)
epoch_val_loss = np.mean(epoch_val_losses)
epoch_val_acc = np.mean(epoch_val_accs)
print(f'epoch: {epoch+1}/{n_epochs} train_loss: {epoch_train_loss:.4f} train_acc: {epoch_train_acc:.4f} val_loss: {epoch_val_loss:.4f} val_acc: {epoch_val_acc:.4f}')
Keras Model
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers, preprocessing
from tensorflow.keras.datasets import imdb
batch_size = 512
learning_rate = 0.001
maxlen = 20
vocab_size = 10000
embedding_dim = 16
output_dim = 1
n_epochs = 10
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)
x_train = preprocessing.sequence.pad_sequences(train_data, maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(test_data, maxlen=maxlen)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
model = models.Sequential()
model.add(layers.Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(layers.Flatten())
model.add(layers.Dense(output_dim, activation='sigmoid'))
opt = tf.keras.optimizers.RMSprop(
learning_rate=learning_rate,
rho=0.9,
momentum=0.0,
epsilon=1e-7,
centered=False,
)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc'])
history = model.fit(x_train, y_train, epochs=n_epochs, batch_size=batch_size, shuffle=True)
results = model.evaluate(x_test, y_test)
print(results)