I’ve been attempting to learn libtorch by converting this time sequence prediction model to c++: examples/time_sequence_prediction at main · pytorch/examples (github.com)
Using this page as a reference for C++ syntax: Using the PyTorch C++ Frontend — PyTorch Tutorials 1.11.0+cu102 documentation
So far I believe I have successfully set up the model:
struct LSTMPredictorImpl : torch::nn::Module {
LSTMPredictorImpl(int n_hidden = 51) :
lstm1(torch::nn::LSTMCell(1, n_hidden)),
lstm2(torch::nn::LSTMCell(n_hidden, n_hidden)),
linear1(torch::nn::Linear(n_hidden, 1))
{
register_module("lstm1", lstm1);
register_module("lstm2", lstm2);
register_module("linear1", linear1);
this->n_hidden = n_hidden;
}
torch::Tensor forward(torch::Tensor x, int future = 0) {
int n_samples = x.sizes()[0];
std::vector<torch::Tensor> outputs;
std::tuple<torch::Tensor, torch::Tensor> hc_t1(
torch::zeros({n_samples, n_hidden}),
torch::zeros({n_samples, n_hidden}));
std::tuple<torch::Tensor, torch::Tensor> hc_t2(
torch::zeros({n_samples, n_hidden}),
torch::zeros({n_samples, n_hidden}));
torch::Tensor output;
std::vector<torch::Tensor> separated = x.split(1, 1);
for (torch::Tensor input_t : separated) {
hc_t1 = lstm1(input_t, hc_t1);
hc_t2 = lstm2(std::get<0>(hc_t1), hc_t2);
output = linear1(std::get<0>(hc_t2));
outputs.push_back(output);
}
for (int i = 0; i < future; i++) {
hc_t1 = lstm1(output, hc_t1);
hc_t2 = lstm2(std::get<0>(hc_t1), hc_t2);
output = linear1(std::get<0>(hc_t2));
outputs.push_back(output);
}
torch::TensorList intermediate = torch::TensorList(separated);
torch::Tensor final_output = torch::cat(intermediate, 1);
return final_output;
}
torch::nn::LSTMCell lstm1, lstm2;
torch::nn::Linear linear1;
int n_hidden;
};
TORCH_MODULE(LSTMPredictor);
I have generated sine wave training data with noise in the format of a 2nd degree Tensor, where each row contains data for a single sine wave and there are 100 rows.
Here is my function that I use to try to train on that data:
void time_series_train(torch::Tensor data) {
auto data_sizes = data.sizes();
torch::Tensor train_input = data.index({
torch::indexing::Slice(3),
torch::indexing::Slice(0, -1)
});
torch::Tensor train_target = data.index({
torch::indexing::Slice(3),
torch::indexing::Slice(1)
});
torch::Tensor test_input = data.index({
torch::indexing::Slice(0, 3),
torch::indexing::Slice(0, -1)
});
torch::Tensor test_target = data.index({
torch::indexing::Slice(0, 3),
torch::indexing::Slice(1)
});
LSTMPredictor predictor;
torch::optim::LBFGS LSTM_optimizer(
predictor->parameters(), torch::optim::LBFGSOptions(0.8)
);
int n_training_steps = 10;
for (int i = 0; i < n_training_steps; i++) {
auto closure = [predictor, train_input, train_target, i] () mutable {
predictor->zero_grad();
torch::Tensor output = predictor->forward(train_input);
torch::Tensor loss = torch::mse_loss(output, train_target);
cout << "Current step: " << i << ", loss: " << loss.item<float>() << endl;
loss.backward();
return loss;
};
LSTM_optimizer.step(closure);
}
}
When I run this function, it reaches the cout statement inside my closure lambda function, and after that I get this output:
terminate called after throwing an instance of ‘c10::Error’
what(): element 0 of tensors does not require grad and does not have a grad_fn
Exception raised from run_backward at …/torch/csrc/autograd/autograd.cpp:91 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x3e (0x7fce15506f0e in /(home)/.local/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::string const&) + 0x5c (0x7fce154e17ce in /(home)/.local/lib/libc10.so)
frame #2: + 0x3efaef0 (0x7fcdffab6ef0 in /(home)/.local/lib/libtorch_cpu.so)
frame #3: torch::autograd::backward(std::vector<at::Tensor, std::allocator< at::Tensor> > const&, std::vector<at::Tensor, std::allocator< at::Tensor> > const&, c10::optional, bool, std::vector<at::Tensor, std::allocator< at::Tensor> > const&) + 0x5c (0x7fcdffab8efc in /(home)/.local/lib/libtorch_cpu.so)
frame #4: + 0x3f54e5e (0x7fcdffb10e5e in (home)/.local/lib/libtorch_cpu.so)
frame #5: at::Tensor::_backward(c10::ArrayRef< at::Tensor>, c10::optional< at::Tensor> const&, c10::optional, bool) const + 0x48 (0x7fcdfd4109e8 in /(home)/.local/lib/libtorch_cpu.so)
frame #6: at::Tensor::backward(at::Tensor const&, c10::optional, bool, c10::optional<c10::ArrayRef< at::Tensor> >) const + 0x11d (0x43e915 in ./PyTorchTest)
frame #7: ./PyTorchTest() [0x43ae88]
frame #8: ./PyTorchTest() [0x43cd2e]
frame #9: torch::optim::LBFGS::step(std::function<at::Tensor ()>) + 0x27a (0x7fce0066f50a in /(home)/.local/lib/libtorch_cpu.so)
frame #10: time_series_train(at::Tensor) + 0x87b (0x43b8c5 in ./PyTorchTest)
frame #11: main + 0x10d (0x43ac9d in ./PyTorchTest)
frame #12: __libc_start_main + 0xf5 (0x7fcdfadb0555 in /lib64/libc.so.6)
frame #13: ./PyTorchTest() [0x43a8c9]
Aborted (core dumped)
From what I can tell, the only important line in all that is " element 0 of tensors does not require grad and does not have a grad_fn". I’ve tried disabling autograd globally by putting this line in various locations:
torch::NoGradGuard no_grad;
But that did not work, probably because I have no clue what I’m doing when it comes to autograd. I suspect maybe it’s something to do with my closure lambda function being setup weird, as I couldn’t find anything definitive online for passing closure functions in libtorch for c++.
Any solutions or general tips on ways I’m doing libtorch incorrectly would be appreciated, Thank you.