Hello, I am using character level RNN to classify the names. I am basically using same example given in official documentation of Pytorch, but minor modification.
However, overall accuracy is not improving (like it is jumping between 10 to 30%), and I can’t figure out the reason.
Here is module class:
class Model(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Model, self).__init__()
self.in_size = input_size
self.h_size = hidden_size
self.o_size = output_size
# input to hidden layer
self.i2h = nn.Linear(input_size+hidden_size, hidden_size)
self.i2o = nn.Linear(input_size+hidden_size, output_size)
def forward(self, inp, hidden):
combined = torch.cat((inp, hidden), 1)
hidden_unit = self.i2h(combined)
output_unit = F.log_softmax(self.i2o(combined), dim=1)
return output_unit, hidden_unit
def initialize_hidden_layer(self):
return Variable(torch.zeros(1, self.h_size))```
This is how my training function look like:
def train_model(model, epoch, features, labels):
count = 0
accuracy = 0
total_loss = 0
for (feature, target) in zip(features, labels):
hidden = model.initialize_hidden_layer()
class_index = [i for i in range(len(classes_names)) if target==classes_names[i]][0]
class_tensor = Variable(torch.LongTensor([class_index]))
word_tensor = Variable(word_to_tensor(feature))
for i in range(word_tensor.size()[0]):
predicted_output, hidden = model(word_tensor[i], hidden)
loss = criterion(predicted_output, class_tensor)
predicted_label = find_label(predicted_output)
total_loss += loss.data.item()
if predicted_label[0] == target:
accuracy += 1
optimizer.zero_grad()
#backpropagation
loss.backward()
#update params
optimizer.step()
count += 1
print ("count ", count)
print ("accurate predictions ", accuracy)
print ("total accuracy ", (accuracy/len(features)*100))
print ("total loss ", total_loss)
def get_all_examples():
labels = []
features = []
for k, v in classes_dict.items():
for name in v:
labels.append(k)
features.append(name)
return features, labels
features, labels = get_all_examples()
shuffle_list = list(zip(features, labels))
random.shuffle(shuffle_list)
features, labels = zip(*shuffle_list)
model.train()
for epoch in range(10):
train_model(model, epoch, features, labels)
Other auxiliary functions are same as present in the documentation.