Hey guys
After getting to know pytorch with some of its tutorials (especially Classifying Names with an RNN), I now want to build a similar model, but with a bidirectional LSTM.
I tried to fuse these two tutorials together:
http://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html#sphx-glr-intermediate-char-rnn-classification-tutorial-py
http://pytorch.org/docs/0.2.0/_modules/torchvision/datasets/mnist.html
And that’s what I came up with so far:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import unicodedata
import string
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
import time
import math
import matplotlib.pyplot as plt
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 18
batch_size = 100
num_epochs = 2
learning_rate = 0.003
# ---------------- Loading and preprocessing ----------------
def findFiles(path): return glob.glob(path)
# ------------------------------------
# Data looks like this:
#
# names
# -> Arabic.txt
# -> Khoury
# Nahas
# Daher
# ...
# -> Chinese.txt
# ...
# ------------------------------------
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)
def unicodeToAscii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
and c in all_letters
)
category_lines = {}
all_categories = []
def readLines(filename):
lines = open(filename, encoding='utf-8').read().strip().split('\n')
return [unicodeToAscii(line) for line in lines]
for filename in findFiles('names/*.txt'):
category = filename.split('/')[-1].split('.')[0]
all_categories.append(category)
lines = readLines(filename)
category_lines[category] = lines
n_categories = len(all_categories)
def letterToIndex(letter):
return all_letters.find(letter)
def lineToTensor(line):
tensor = torch.zeros(len(line), 1, n_letters)
for li, letter in enumerate(line):
tensor[li][0][letterToIndex(letter)] = 1
return tensor
class BiRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(BiRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size * 2, num_classes)
def forward(self, x):
# Set initial states
h0 = Variable(torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size)) # 2 for bidirection
c0 = Variable(torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size))
# Forward propagate RNN
out, _ = self.lstm(x, (h0, c0))
# Decode hidden state of last time step
out = self.fc(out[:, -1, :])
return out
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
def categoryFromOutput(output):
top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
category_i = top_i[0][0]
return all_categories[category_i], category_i
# ---------------------- Random Inputs ----------------------
def randomChoice(l):
return l[random.randint(0, len(l) - 1)]
def randomTrainingExample():
category = randomChoice(all_categories)
line = randomChoice(category_lines[category])
category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
line_tensor = Variable(lineToTensor(line))
return category, line, category_tensor, line_tensor
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
# Train the Model
def train(category_tensor, line_tensor):
optimizer.zero_grad()
for i in range(line_tensor.size()[0]):
output = rnn(line_tensor)
loss = criterion(output, category_tensor)
loss.backward()
optimizer.step()
return output, loss.data[0]
n_iters = 100000
print_every = 5000
plot_every = 1000
# Keep track of losses for plotting
current_loss = 0
all_losses = []
def timeSince(since):
now = time.time()
s = now - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
start = time.time()
for iter in range(1, n_iters + 1):
category, line, category_tensor, line_tensor = randomTrainingExample()
output, loss = train(category_tensor, line_tensor)
current_loss += loss
# Print iter number, loss, name and guess
if iter % print_every == 0:
guess, guess_i = categoryFromOutput(output)
correct = '✓' if guess == category else '✗ (%s)' % category
print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))
# Add current loss avg to list of losses
if iter % plot_every == 0:
all_losses.append(current_loss / plot_every)
current_loss = 0
plt.figure()
plt.plot(all_losses)
I’m still learning so much about deep learning and I sometimes have big issues with it… But I try my best so please don’t let your eyes bleed when examining this code
My error is this here:
Traceback (most recent call last):
File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 158, in <module>
output, loss = train(category_tensor, line_tensor)
File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 131, in train
output = rnn(line_tensor)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 357, in __call__
result = self.forward(*input, **kwargs)
File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 92, in forward
out, _ = self.lstm(x, (h0, c0))
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 357, in __call__
result = self.forward(*input, **kwargs)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/rnn.py", line 190, in forward
self.check_forward_args(input, hx, batch_sizes)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/rnn.py", line 141, in check_forward_args
fn.input_size, input.size(-1)))
NameError: name 'fn' is not defined
It looks like that my line_tensor should be like this: < 7 x 28 x 28 >
But I’m not sure how to get to the point where dimension 2 and 3 have the same value, because every word I try to feed in the RNN has another length.
Thank you very much, if you read until here. Maybe you would be so kind to help me?
Greetings!