Hello,
I tried modifying a bit the RNN example from the pytorch tutorial to generate names from a given language, not using argmax to select the letter after the last layer’s output but torch.multinomial.
The results are terrible and I am wondering why. I have either made a very dumb mistake somewhere without realiszing or the architecture is fondamentaly wrong.
Can you give me tips on how I could improve this ?
Here are some example of architectures that I tried :
class RNN_debut(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.hidden_size = hidden_size
self.h1 = nn.Linear(input_size + hidden_size, hidden_size)
self.h2 = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim = 1)
def forward(self, input, hidden):
input_combined = torch.cat((input,hidden),1)
hidden = self.h1(input_combined)
output = self.h2(hidden)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
class RNN_longer(nn.Module):
def __init__(self, input_size, hidden1_size, hidden2_size, output_size):
super().__init__()
self.hidden1_size = hidden1_size
self.h1 = nn.Linear(input_size + hidden1_size, hidden1_size)
self.h2 = nn.Linear(hidden1_size, hidden2_size)
self.h3 = nn.Linear(hidden2_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim = 1)
def forward(self, input, hidden):
input_combined = torch.cat((input,hidden),1)
hidden = self.h1(input_combined)
hidden2 = self.h2(hidden)
output = self.h3(hidden2)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden1_size)
class RNN_middle(nn.Module):
def __init__(self, input_size, hidden1_size, hidden2_size, output_size):
super().__init__()
self.hidden1_size = hidden1_size
self.h1 = nn.Linear(input_size, hidden1_size)
self.h2 = nn.Linear(hidden1_size + hidden2_size, hidden2_size)
self.h3 = nn.Linear(hidden2_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim = 1)
def forward(self, input, hidden):
out1 = self.h1(input)
input_combined = torch.cat((out1,hidden),1)
hidden = self.h2(input_combined)
output = self.h3(hidden)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden2_size)
These models are then trained, like in the tutorials following :
criterion = nn.NLLLoss()
learning_rate = 0.0005
def train(model,input_line_tensor, target_line_tensor):
target_line_tensor.unsqueeze_(-1)
hidden = model.initHidden()
model.zero_grad()
loss = 0
for i in range(input_line_tensor.size(0)):
output, hidden = model(input_line_tensor[i], hidden)
l = criterion(output, target_line_tensor[i])
loss += l
loss.backward()
for p in model.parameters():
p.data.add_(-learning_rate, p.grad.data)
return loss.item() / input_line_tensor.size(0)
And then I sample from the network with this function :
def sample(start_letter = None):
input = torch.zeros(1, 1, n_letters)
input[0][0][n_letters - 2] = 1 #Start of sentence
output_name = ""
hidden = rnn.initHidden()
for i in range(max_length):
output, hidden = rnn(input[0], hidden)
picked = torch.multinomial(torch.exp(output[0]),num_samples = 1).item()
if picked == n_letters - 1:
break
else:
letter = all_letters[picked]
output_name += letter
input = inputTensor(letter)
return output_name
Do you know how I could improve this(these ?) simple model ?