Tried to switch from tensorflow(keras) to pytorch. Testing basic binary classification model with embedding layer for IMDB movie review data. in tensorflow, everything is working fine, easy to build the model and converge very fast. Tried to replicate the same model in pytorch, struggled to make it work.
i searched online, did not see anyone used sigmoid for binary classificaiton, some used log_softmax, not sure why.
Is there anything done wrong? please help, thank you very much!
import torch
import torchtext
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
TEXT = torchtext.data.Field(tokenize = 'spacy', fix_length=40)
LABEL = torchtext.data.LabelField(dtype = torch.float)
train_dataset, test_dataset = torchtext.datasets.IMDB.splits(TEXT, LABEL)
MAX_VOCAB_SIZE = 10000
TEXT.build_vocab(train_dataset, max_size = MAX_VOCAB_SIZE, vectors = "glove.6B.100d")
LABEL.build_vocab(train_dataset)
BATCH_SIZE = 32
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_loader, test_loader = torchtext.data.BucketIterator.splits(
(train_dataset, test_dataset),
batch_size = BATCH_SIZE,
device = device)
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.embedding = nn.Embedding(len(TEXT.vocab), 100, padding_idx=TEXT.vocab.stoi[TEXT.pad_token])
self.lstm = nn.LSTM(100, 100, batch_first = True)
self.fc1 = nn.Linear(100, 1)
def forward(self, x):
x = self.embedding(x)
x, hidden = self.lstm(x)
x = x[-1,:,:]
x = torch.sigmoid(self.fc1(x))
return x
net = RNN()
net.embedding.weight.data.copy_(TEXT.vocab.vectors)
for param in net.embedding.parameters():
param.requires_grad = False
net = net.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters())
for epoch in range(2):
print("epoch: {}".format(epoch+1))
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs.squeeze(1), labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 200 == 199:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 200))
running_loss = 0.0
print('Finished Training')
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = net(images)
predicted = torch.round(outputs).reshape(-1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the test data: {}'.format(100 * correct / total))