I am trying Sequence Tagging for CoNLL 2003. I followed the Sequence tagging tutorial in pytorch documentation and arrived at this code. I am stuck in loss function.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math
import numpy as np
#Variable Initialization
x,y = [],[]
data = []
#Hyper parameters
input_dim = 200
hidden_dim = 128
num_layers = 1
batch_size = 5
learning_rate = 0.01
num_epochs = 5
def prepare_data():
global data
data = open('conll_2003.txt').read()
seq_lengths = []
training_data= []
x_text = ""
y_label = ""
l = 0
for i,line in enumerate(data.split("\n")):
if len(line) > 0:
tokens = line.split(" ")
if tokens[0] != "-DOCSTART-":
x_text = x_text + str(tokens[0]) + str(" ")
y_label = y_label + str(tokens[3]) + str(" ")
else:
continue
else:
if len(x_text.strip().split()) < 200 and len(x_text.strip().split()) > 0 :
l=l+1
x.append(np.array(x_text.strip().split()))
y.append(np.array(y_label.strip().split()))
seq_lengths.append(len(x_text.strip().split()))
x_text = ""
y_label = ""
return x,y
x,y = prepare_data()
x = np.array(x)
y = np.array(y)
num_batches = math.ceil(len(data)/batch_size)
word_to_ix_obj = {'<PAD>':0}
tag_to_ix_obj = {'<PAD>':0}
def word_to_ix():
for i,item in enumerate(x):
for word in item:
if word not in word_to_ix_obj:
word_to_ix_obj[str(word)] = len(word_to_ix_obj)
def tag_to_ix():
for i,item in enumerate(y):
for tag in item:
if tag not in tag_to_ix_obj:
tag_to_ix_obj[tag] = len(tag_to_ix_obj)
word_to_ix()
tag_to_ix()
def convert_words_to_ix(inputs,ix_map):
text_to_ix = []
for i,text in enumerate(inputs):
text_to_ix.append(prepare_sequence(text,ix_map))
return text_to_ix
def prepare_sequence(seq, to_ix):
idxs = [to_ix[w] for w in seq]
return idxs
def prepare_batch(index):
x,y = batch_iterator(index)
x = convert_words_to_ix(x,word_to_ix_obj)
y = convert_words_to_ix(y,tag_to_ix_obj)
x_lengths = [len(sentence) for sentence in x]
pad_token = word_to_ix_obj['<PAD>']
longest_sentence = max(x_lengths)
padded_x = np.ones((batch_size,longest_sentence),dtype=int) * pad_token
for i,x_length in enumerate(x_lengths):
temp = x[i]
padded_x[i,0:x_length] = temp[:x_length]
y_lengths = [len(sentence) for sentence in y]
pad_token = tag_to_ix_obj['<PAD>']
longest_sentence = max(y_lengths)
padded_y = np.ones((batch_size,longest_sentence),dtype=int) * pad_token
for i,y_length in enumerate(y_lengths):
temp = y[i]
padded_y[i,0:y_length] = temp[:y_length]
return padded_x,padded_y
def batch_iterator(index):
start_index = (index-1) * batch_size
end_index = start_index + batch_size - 1
return x[start_index:end_index],y[start_index:end_index]
class LSTM(nn.Module):
def __init__(self,input_dim, hidden_dim, output_dim, batch_size, vocab_size, num_layers=1):
super().__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.output_dim = output_dim
self.word_embeddings = nn.Embedding(vocab_size, input_dim)
self.lstm = nn.LSTM(self.input_dim,self.hidden_dim,self.num_layers,batch_first=True)
self.linear = nn.Linear(self.hidden_dim,self.output_dim)
def forward(self, x):
input_tensor = torch.LongTensor(x)
h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_dim)
c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_dim)
embeds = self.word_embeddings(input_tensor)
lstm_out, _ = self.lstm(embeds, (h0, c0))
tag_space = self.linear(lstm_out)
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
model = LSTM(input_dim, hidden_dim, len(tag_to_ix_obj), batch_size,len(word_to_ix_obj))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for i in range(num_batches):
model.zero_grad()
input_x,input_y = prepare_batch(i)
tag_scores = model(input_x)
print("tag_scores shape :")
print(tag_scores.shape)
print(tag_scores.size)
print(tag_scores)
print("\n")
print("====================")
print("\n")
input_y = torch.from_numpy(input_y)
print("input_y shape :")
print(input_y.shape)
print(input_y.size)
print(input_y)
input_y = input_y.float()
print("type of input_y")
print(input_y.type())
loss = loss_function(tag_scores, input_y)
loss.backward()
optimizer.step()
I have given batch_size as 5 and 31 being the maximum sequence length.
When i run the above it outputs as follows,
and says
~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1798 if target.size()[1:] != input.size()[2:]:
1799 raise ValueError('Expected target size {}, got {}'.format(
-> 1800 out_size, target.size()))
1801 input = input.contiguous().view(n, c, 1, -1)
1802 target = target.contiguous().view(n, 1, -1)
ValueError: Expected target size (5, 10), got torch.Size([5, 31])