Trying to build a bidirectional LSTM for Name Classification

Hey guys :slight_smile:

After getting to know pytorch with some of its tutorials (especially Classifying Names with an RNN), I now want to build a similar model, but with a bidirectional LSTM.
I tried to fuse these two tutorials together:
http://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html#sphx-glr-intermediate-char-rnn-classification-tutorial-py

http://pytorch.org/docs/0.2.0/_modules/torchvision/datasets/mnist.html

And that’s what I came up with so far:

from __future__ import unicode_literals, print_function, division
from io import open
import glob
import unicodedata
import string
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
import time
import math
import matplotlib.pyplot as plt


sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 18
batch_size = 100
num_epochs = 2
learning_rate = 0.003



# ---------------- Loading and preprocessing ----------------

def findFiles(path): return glob.glob(path)

# ------------------------------------
# Data looks like this:
#
# names
# -> Arabic.txt
#   -> Khoury
#      Nahas
#      Daher
#      ...
# -> Chinese.txt
#      ...
# ------------------------------------

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

category_lines = {}
all_categories = []

def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('names/*.txt'):
    category = filename.split('/')[-1].split('.')[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

def letterToIndex(letter):
    return all_letters.find(letter)

def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        # Set initial states
        h0 = Variable(torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size))  # 2 for bidirection
        c0 = Variable(torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size))

        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))

        # Decode hidden state of last time step
        out = self.fc(out[:, -1, :])

        return out

rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)


def categoryFromOutput(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    category_i = top_i[0][0]
    return all_categories[category_i], category_i


# ---------------------- Random Inputs ----------------------

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
    line_tensor = Variable(lineToTensor(line))
    return category, line, category_tensor, line_tensor


# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)


# Train the Model
def train(category_tensor, line_tensor):

    optimizer.zero_grad()
    for i in range(line_tensor.size()[0]):
        output = rnn(line_tensor)

    loss = criterion(output, category_tensor)
    loss.backward()
    optimizer.step()
    return output, loss.data[0]


n_iters = 100000
print_every = 5000
plot_every = 1000

# Keep track of losses for plotting
current_loss = 0
all_losses = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss

# Print iter number, loss, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0


plt.figure()
plt.plot(all_losses)

I’m still learning so much about deep learning and I sometimes have big issues with it… But I try my best so please don’t let your eyes bleed when examining this code :smiley:
My error is this here:

Traceback (most recent call last):
  File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 158, in <module>
    output, loss = train(category_tensor, line_tensor)
  File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 131, in train
    output = rnn(line_tensor)
  File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 357, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 92, in forward
    out, _ = self.lstm(x, (h0, c0))
  File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 357, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/rnn.py", line 190, in forward
    self.check_forward_args(input, hx, batch_sizes)
  File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/rnn.py", line 141, in check_forward_args
    fn.input_size, input.size(-1)))
NameError: name 'fn' is not defined

It looks like that my line_tensor should be like this: < 7 x 28 x 28 >
But I’m not sure how to get to the point where dimension 2 and 3 have the same value, because every word I try to feed in the RNN has another length.

Thank you very much, if you read until here. Maybe you would be so kind to help me?

Greetings!