Hello! I am trying to implement Actor Critic algorithm in C++, and for some reason the weights of my Actor and Critic networks are not being updated.
My Actor.h
#pragma once
#include <torch/torch.h>
class ActorImpl : public torch::nn::Module {
public:
ActorImpl(int input_dims, int hidden_size, int n_actions);
torch::Tensor forward(torch::Tensor state);
torch::Tensor choose_action(torch::Tensor state);
torch::Tensor calculate_loss(double gamma);
private:
torch::nn::Linear input;
torch::nn::Linear pi;
};
TORCH_MODULE(Actor);
My Actor.cpp
#include "Actor.h"
#include <torch/torch.h>
ActorImpl::ActorImpl(int input_dims, int hidden_size, int n_actions)
: input(input_dims, hidden_size), pi(hidden_size, n_actions){
register_module("input", input);
register_module("pi", pi);
}
torch::Tensor ActorImpl::forward(torch::Tensor state) {
auto x = F::relu(input->forward(state));
return pi->forward(x);
}
torch::Tensor ActorImpl::calculate_loss(double gamma){
vector<torch::Tensor> Pi, V;
for(auto i = 0; i < _states.size(); i++){ // _states.size() has the same length as the episode length
auto pi_i = forward(_states[i]);
CriticImpl critic_instance(_input_dims, _hidden_size);
auto v_i = critic_instance.forward(_states[i]);;
Pi.push_back(pi_i);
V.push_back(v_i);
}
double R = 0;
vector<double> returns;
for(auto i = 0; i < _rewards.size(); i++){
R += pow(gamma,i)*_rewards[i];
returns.push_back(R);
}
vector<double> log_probs;
for(auto i = 0; i < _actions.size(); i++){
auto probs = F::softmax(Pi[i], F::SoftmaxFuncOptions(1)); // converts list of logits to list of probabilities
int action = _actions[i];
double log_prob = log(probs[0][action].item<double>());
log_probs.push_back(log_prob);
}
vector<double> total_loss;
for(auto i = 0; i < _states.size(); i++){
double actor_loss = -log_probs[i]*(returns[i] - V[i][0][0].item<double>());
double critic_loss = pow(returns[i] - V[i][0][0].item<double>(), 2);
total_loss.push_back(actor_loss + critic_loss);
}
auto av_total_loss = accumulate(total_loss.begin(), total_loss.end(), 0) / (1.0*total_loss.size());
torch::Tensor return_loss_as_tensor = torch::tensor(av_total_loss, torch::requires_grad());
return return_loss_as_tensor;
}
My main.cpp
// some parameters definitions here
// ...
auto cuda_available = torch::cuda::is_available();
torch::Device device(cuda_available ? torch::kCUDA : torch::kCPU);
cout << (cuda_available ? "CUDA available. Training on GPU." : "Training on CPU.") << '\n';
Actor actor(input_dims, hidden_size, n_actions);
actor->to(device);
torch::optim::Adam optimizer_actor(actor->parameters(), torch::optim::AdamOptions(lr));
cout << "actor parameters" << endl << actor->parameters() << endl;
// in between i do some stuff, pass observation state and compute the loss
// now update the weights and biases
torch::Tensor loss = actor->calculate_loss(gamma);
optimizer_actor.zero_grad(); // Reset gradients
loss.backward(); // backpropagate loss
optimizer_actor.step(); // update network parameters
actor->clear_memory();
cout << "updated actor parameters" << endl << actor->parameters() << endl;
No runntime errors, but the parameters just do not change, no matter what I do with the learning rate. I know that the issue somehow must be with how I define my Actor class, because I have tried doing backdrop on some simple linear regression models, and everything worked.
The complete project is here, in case if you are interested: