Hello guys!
I’m currently trying to code a bidirectional LSTM for name classification. I got an error which basically tells me, that in
input = Variable(input.view(len(input), sequence_length, input_size))
sequence_length and input_size must be the same length. But that is not possible, because every name of the data I use has a different length. Is there anything I can do differently? Or is it okay, that I also change len(input), so that it could work?
I’m pretty new to working with pytorch and building deep learning models, so yeah, I have no idea what I’m doing
Here is some code (after preprocessing data):
def letterToIndex(letter):
return all_letters.find(letter)
def lineToTensor(line):
tensor = torch.zeros(len(line), 1, n_letters)
for li, letter in enumerate(line):
tensor[li][0][letterToIndex(letter)] = 1
return tensor
class BiRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(BiRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size * 2, num_classes)
def forward(self, x):
# Set initial states
h0 = Variable(torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size)) # 2 for bidirection
c0 = Variable(torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size))
# Forward propagate RNN
out, _ = self.lstm(x, (h0, c0))
# Decode hidden state of last time step
out = self.fc(out[:, -1, :])
return out
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
def categoryFromOutput(output):
top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
category_i = top_i[0][0]
return all_categories[category_i], category_i
# ---------------------- Random Inputs ----------------------
def randomChoice(l):
return l[random.randint(0, len(l) - 1)]
def randomTrainingExample():
category = randomChoice(all_categories)
line = randomChoice(category_lines[category])
category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
print(len(line))
line_tensor = Variable(lineToTensor(line))
# Here is the problem: when I just take the line_tensor as it is,
# then
# line_tensor = line_tensor.view() # insert something helpful
# here :D
return category, line, category_tensor, line_tensor
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
# Train model, gets one label and one name
def train(category_tensor, line_tensor):
optimizer.zero_grad()
for i in range(line_tensor.size()[0]):
output = rnn(line_tensor)
loss = criterion(output, category_tensor)
loss.backward()
optimizer.step()
return output, loss.data[0]
n_iters = 100000
print_every = 5000
plot_every = 1000
# Keep track of losses for plotting
current_loss = 0
all_losses = []
def timeSince(since):
now = time.time()
s = now - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
start = time.time()
for iter in range(1, n_iters + 1):
category, line, category_tensor, line_tensor = randomTrainingExample()
#print(line_tensor)
output, loss = train(category_tensor, line_tensor)
current_loss += loss
Somehow, I get another error now (ran the code a few hours before and I think I didn’t change anything…)
Here is the error:
Traceback (most recent call last):
File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 159, in <module>
output, loss = train(category_tensor, line_tensor)
File "/home/erika/PycharmProjects/git_lstm_class_ex/biRNN-test.py", line 134, in train
loss = criterion(output, category_tensor)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 357, in __call__
result = self.forward(*input, **kwargs)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/modules/loss.py", line 679, in forward
self.ignore_index, self.reduce)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/functional.py", line 1161, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, size_average, ignore_index, reduce)
File "/home/erika/.local/lib/python3.5/site-packages/torch/nn/functional.py", line 1052, in nll_loss
return torch._C._nn.nll_loss(input, target, weight, size_average, ignore_index, reduce)
RuntimeError: Assertion `THIndexTensor_(size)(target, 0) == batch_size' failed.
This is interesting, because I don’t use NLLLoss(). But maybe it has something to do with the CrossEntropyLoss()?
Thank you, if you managed to reach this line. And thank you maybe helping me (.-.)