Model parameters not updating in training loop

rutledgea20 · June 20, 2022, 5:39pm

I am attempting to train an LSTM model to do time series prediction in libtorch. Here is how I set up the model:

struct LSTMPredictorImpl : torch::nn::Module {
    LSTMPredictorImpl(int n_hidden = 51) :
        lstm1(torch::nn::LSTMCell(1, n_hidden)),
        lstm2(torch::nn::LSTMCell(n_hidden, n_hidden)),
        linear1(torch::nn::Linear(n_hidden, 1))
    {
        register_module("lstm1", lstm1);
        register_module("lstm2", lstm2);
        register_module("linear1", linear1);
        this->n_hidden = n_hidden;
    }
    torch::Tensor forward(torch::Tensor x, int future = 0) {
        int n_samples = x.sizes()[0];
        std::vector<torch::Tensor> outputs;
        std::tuple<torch::Tensor, torch::Tensor> hc_t1(
            torch::zeros({n_samples, n_hidden}), 
            torch::zeros({n_samples, n_hidden}));
        std::tuple<torch::Tensor, torch::Tensor> hc_t2(
            torch::zeros({n_samples, n_hidden}), 
            torch::zeros({n_samples, n_hidden}));

        torch::Tensor output;
        std::vector<torch::Tensor> separated = x.split(1, 1);
        for (torch::Tensor input_t : separated) {
            hc_t1 = lstm1(input_t, hc_t1);
            hc_t2 = lstm2(std::get<0>(hc_t1), hc_t2);
            output = linear1(std::get<0>(hc_t2));
            outputs.push_back(output);
        }
        for (int i = 0; i < future; i++) {
            hc_t1 = lstm1(output, hc_t1);
            hc_t2 = lstm2(std::get<0>(hc_t1), hc_t2);
            output = linear1(std::get<0>(hc_t2));
            outputs.push_back(output);
        }
        torch::TensorList intermediate = torch::TensorList(separated);
        torch::Tensor final_output = torch::cat(intermediate, 1);
        final_output.requires_grad_(true);
        return final_output;
    }
    torch::nn::LSTMCell lstm1, lstm2;
    torch::nn::Linear linear1;
    int n_hidden;
};
TORCH_MODULE(LSTMPredictor);

And here is my training loop:

void time_series_train(torch::Tensor data) {
    auto data_sizes = data.sizes();
    torch::Tensor train_input = data.index({
        torch::indexing::Slice(3),
        torch::indexing::Slice(0, -1)
    });
    torch::Tensor train_target = data.index({
        torch::indexing::Slice(3),
        torch::indexing::Slice(1)
    });
    torch::Tensor test_input = data.index({
        torch::indexing::Slice(0, 3),
        torch::indexing::Slice(0, -1)
    });
    torch::Tensor test_target = data.index({
        torch::indexing::Slice(0, 3),
        torch::indexing::Slice(1)
    });

    LSTMPredictor predictor;
    torch::optim::LBFGS LSTM_optimizer(
        predictor->parameters(), torch::optim::LBFGSOptions(0.8)
    );

    auto cost = [&](){
        LSTM_optimizer.zero_grad();
        auto out = predictor->forward(train_input);
        auto loss = torch::mse_loss(out, train_target);
        cout << "loss: " << loss.item() << endl;
        loss.backward();
        return loss;
    };
    
    int n_training_steps = 10;
    for (int i = 0; i < n_training_steps; i++) {
        printf("step: %d\n", i);
        LSTM_optimizer.step(cost);
    }
}

When running the training loop, the output just looks like this:

step: 0
loss: 0.291023
step: 1
loss: 0.291023
step: 2
loss: 0.291023
step: 3
loss: 0.291023
step: 4
loss: 0.291023
step: 5
loss: 0.291023
step: 6
loss: 0.291023
step: 7
loss: 0.291023
step: 8
loss: 0.291023
step: 9
loss: 0.291023

I think it may have something to do with me putting:

final_output.requires_grad_(true);

in the forward pass function, but not including that causes an autograd error to be thrown during loss.backward().

Please let me know what I’m doing wrong, thank you.

rutledgea20 · June 20, 2022, 7:13pm

Well I feel quite dumb… I’ve made multiple posts due to weird behavior on these forums over the past few days having to do with autograd and now this not training issue, all of it is because the line

torch::TensorList intermediate = torch::TensorList(separated);

Should have been

torch::TensorList intermediate = torch::TensorList(outputs);

After fixing this i no longer need to change any requires_grad values and my model trains as it should.

Apologies.