I am attempting to train an LSTM model to do time series prediction in libtorch. Here is how I set up the model:
struct LSTMPredictorImpl : torch::nn::Module {
LSTMPredictorImpl(int n_hidden = 51) :
lstm1(torch::nn::LSTMCell(1, n_hidden)),
lstm2(torch::nn::LSTMCell(n_hidden, n_hidden)),
linear1(torch::nn::Linear(n_hidden, 1))
{
register_module("lstm1", lstm1);
register_module("lstm2", lstm2);
register_module("linear1", linear1);
this->n_hidden = n_hidden;
}
torch::Tensor forward(torch::Tensor x, int future = 0) {
int n_samples = x.sizes()[0];
std::vector<torch::Tensor> outputs;
std::tuple<torch::Tensor, torch::Tensor> hc_t1(
torch::zeros({n_samples, n_hidden}),
torch::zeros({n_samples, n_hidden}));
std::tuple<torch::Tensor, torch::Tensor> hc_t2(
torch::zeros({n_samples, n_hidden}),
torch::zeros({n_samples, n_hidden}));
torch::Tensor output;
std::vector<torch::Tensor> separated = x.split(1, 1);
for (torch::Tensor input_t : separated) {
hc_t1 = lstm1(input_t, hc_t1);
hc_t2 = lstm2(std::get<0>(hc_t1), hc_t2);
output = linear1(std::get<0>(hc_t2));
outputs.push_back(output);
}
for (int i = 0; i < future; i++) {
hc_t1 = lstm1(output, hc_t1);
hc_t2 = lstm2(std::get<0>(hc_t1), hc_t2);
output = linear1(std::get<0>(hc_t2));
outputs.push_back(output);
}
torch::TensorList intermediate = torch::TensorList(separated);
torch::Tensor final_output = torch::cat(intermediate, 1);
final_output.requires_grad_(true);
return final_output;
}
torch::nn::LSTMCell lstm1, lstm2;
torch::nn::Linear linear1;
int n_hidden;
};
TORCH_MODULE(LSTMPredictor);
And here is my training loop:
void time_series_train(torch::Tensor data) {
auto data_sizes = data.sizes();
torch::Tensor train_input = data.index({
torch::indexing::Slice(3),
torch::indexing::Slice(0, -1)
});
torch::Tensor train_target = data.index({
torch::indexing::Slice(3),
torch::indexing::Slice(1)
});
torch::Tensor test_input = data.index({
torch::indexing::Slice(0, 3),
torch::indexing::Slice(0, -1)
});
torch::Tensor test_target = data.index({
torch::indexing::Slice(0, 3),
torch::indexing::Slice(1)
});
LSTMPredictor predictor;
torch::optim::LBFGS LSTM_optimizer(
predictor->parameters(), torch::optim::LBFGSOptions(0.8)
);
auto cost = [&](){
LSTM_optimizer.zero_grad();
auto out = predictor->forward(train_input);
auto loss = torch::mse_loss(out, train_target);
cout << "loss: " << loss.item() << endl;
loss.backward();
return loss;
};
int n_training_steps = 10;
for (int i = 0; i < n_training_steps; i++) {
printf("step: %d\n", i);
LSTM_optimizer.step(cost);
}
}
When running the training loop, the output just looks like this:
step: 0
loss: 0.291023
step: 1
loss: 0.291023
step: 2
loss: 0.291023
step: 3
loss: 0.291023
step: 4
loss: 0.291023
step: 5
loss: 0.291023
step: 6
loss: 0.291023
step: 7
loss: 0.291023
step: 8
loss: 0.291023
step: 9
loss: 0.291023
I think it may have something to do with me putting:
final_output.requires_grad_(true);
in the forward pass function, but not including that causes an autograd error to be thrown during loss.backward().
Please let me know what I’m doing wrong, thank you.