I’m working on a very simple rnn model and I’ve got variable-length sentences for the input. On every example that I have seen in the past for this issue, they use nn.rnn, gru or lstm, however I’m defining my own model so I don’t know how to use the packed sentence. Below is the relevant part of my code:
x_packed = torch.nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
h = rnn1.initHidden()
output, hidden = rnn1(x_packed ,h)
n_hidden = 200
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.x2h = nn.Linear(input_size, hidden_size)
self.h2h = nn.Linear(hidden_size, hidden_size)
self.h2o = nn.Linear(hidden_size, output_size)
self.softmax = nn.Softmax(dim=1)
def forward(self, input, hidden_):
hidden1 = self.x2h(input)
hidden2 = self.h2h(hidden_)
hidden = hidden1 + hidden2
output = self.h2o(hidden)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
rnn1 = RNN(n_vocab, n_hidden, n_vocab)
This code throws an error and I suspect that is because I’m trying to pass all the words at once. The problem is I don’t know how to access to each word in the packed and still exploits the advantages of the function.
My train function, which was working before the packing, is as follows:
def train(text_x_tensor1,label1):#, text_x_tensor2, label2):
text_x_tensor1, label1 = text_x_tensor1.to(device), label1.to(device)
rnn1.train()
hidden_1 = rnn1.initHidden()
hidden_1 = hidden_1.to(device)
text_x_tensor1 = text_x_tensor1.permute(1,0,2)
for i in range(len(text_x_tensor1)): #For each word
output_1, hidden_1 = rnn1(text_x_tensor1[i], hidden_1)
loss1 = criterion(output_1,label1)
optimizer1.zero_grad()
loss1.backward()
torch.nn.utils.clip_grad_norm_(rnn1.parameters(),1)
optimizer1.step()
return output_1, loss1,hidden_1
Hoping you can help me,
Marco