I’m trying to convert the following simple model from Python to C++, and while the training loop works, I’m afraid I’m not handling the hidden state correctly as I’m not getting good results. Would someone mind checking my code?
Python:
class Net(nn.Module):
def __init__(self, featurelen, outputlen, hwidth=None, nhidden=4, rnntype='lstm'):
super(Net, self).__init__()
self.hidden_width =12 if hwidth is None else hwidth
self.nhidden = nhidden
self.hidden = None
self.rnn1 = {'lstm':nn.LSTM, 'gru':nn.GRU}[rnntype](featurelen,
self.hidden_width,
num_layers=self.nhidden,
batch_first=True)
self.dense1 = nn.Linear(self.hidden_width, outputlen)
def forward(self, x):
x, self.hidden = self.rnn1(x)
return self.dense1(x)
And in C++:
struct myNet : torch::nn::Module {
myNet(int input_size, int output_size,
int hidden_width = 12,
int recursive_layers = 4) {
recurrent = register_module("recurrent",
torch::nn::GRU(
torch::nn::GRUOptions(input_size,hidden_width)
.num_layers(recursive_layers)
.batch_first(true)));
output = register_module("output",
torch::nn::Linear(hidden_width, output_size));
}
torch::Tensor forward(torch::Tensor x) {
std::tie(x, hidden) = recurrent->forward(x, hidden);
x = output->forward(x);
return x;
}
/*
* See https://pytorch.org/tutorials/advanced/cpp_frontend.html
* for why nullptr.
*/
torch::Tensor hidden;
torch::nn::GRU recurrent{nullptr};
torch::nn::Linear output{nullptr};
};