Hi, I implemented the many-to-one RNN code to predict score with given sequence.
This model does regression with hidden vector of last RNN(GRU) layer in last time step and input sequences have various length.
There is no error and I trained the model. After training the model, I found that the output of this model didn’t change in spite of different inputs.
Here is the code that defines model.
embedding_dim = 200
hidden_size = 200
num_layer = 2
class RNNReg(nn.Module):
def __init__(self, num_layer=2, hidden_size=200, bidirectional=True):
super(RNNReg, self).__init__()
self.num_layer = num_layer
self.hidden_size = hidden_size
self.bidirectional = bidirectional
self.embedding = nn.Embedding(vocaNum, embedding_dim, padding_idx=0,
max_norm=1)
self.gru = nn.GRU(input_size=embedding_dim, hidden_size=hidden_size,
num_layers=num_layers, batch_first=False, bidirectional=bidirectional,
dropout=0.2)
self.fc = nn.Linear(in_features=(int(bidirectional)+1)*hidden_size, out_features=1)
def forward(self, x, lengths):
input = self.embedding(x)
input = pack_padded_sequence(input, lengths, batch_first=True)
output, hidden = self.gru(input)
#output = pad_packed_sequence(output, batch_first=True)
#output = self.fc(output[0][:, -1, :])
output = hidden[(self.num_layer-1)*(int(self.bidirectional)+1):,:,:]
output = output.permute(1,0,2).contiguous()
output = output.view(-1, (int(self.bidirectional)+1)*self.hidden_size)
output = self.fc(output)
return output
and Here is test code to calculate the output with sequence variable test.
test = u"영화 재미 있다"
test = twitter.morphs(test)
test = [[voca2index[word] if word in voca2index else voca2index['<UNK>']
for word in test]]
print(" ".join([index2voca[t] for t in test[0]]))
test, lengths = addPad(test)
test = Variable(test).cuda()
predict = reg(test, lengths)
print(predict)
How can I solve this problem?