How can I copy the paramaters of one model to another in LibTorch?

Alio · October 22, 2022, 7:58pm

How can I copy the paramaters of one model to another in LibTorch? I know how to do it in Torch (Python).

net2.load_state_dict(net.state_dict())

I have tried with the code below in C++ with quite a bit of work. It didn’t copy one to another.

#include <torch/torch.h>
using namespace torch::indexing;

torch::Device device(torch::kCUDA);

void loadstatedict(torch::nn::Module& model, torch::nn::Module& target_model) {
torch::autograd::GradMode::set_enabled(false);  // make parameters copying possible
auto new_params = target_model.named_parameters(); // implement this
auto params = model.named_parameters(true /*recurse*/);
auto buffers = model.named_buffers(true /*recurse*/);
for (auto& val : new_params) {
    auto name = val.key();
    auto* t = params.find(name);
    if (t != nullptr) {
        t->copy_(val.value());
    } else {
        t = buffers.find(name);
        if (t != nullptr) {
            t->copy_(val.value());
            }
        }
    }
}

struct Critic_Net : torch::nn::Module {
    torch::Tensor next_state_batch__sampled_action;
    public:
    Critic_Net() {
        lin1 = torch::nn::Linear(3, 3);
        lin2 = torch::nn::Linear(3, 1);
        lin1->to(device);
        lin2->to(device);
    }
    torch::Tensor forward(torch::Tensor next_state_batch__sampled_action) {
        auto h = next_state_batch__sampled_action;
        h = torch::relu(lin1->forward(h));
        h = lin2->forward(h);
        return h;
    }
    torch::nn::Linear lin1{nullptr}, lin2{nullptr};
};

auto net = Critic_Net();
auto net2 = Critic_Net();
auto the_ones = torch::ones({3, 3}).to(device);

int main() {
    std::cout << net.forward(the_ones);
    std::cout << net2.forward(the_ones);
    loadstatedict(net, net2);
    std::cout << net.forward(the_ones);
    std::cout << net2.forward(the_ones);

}

Matej_Kompanek · October 22, 2022, 9:05pm

try this (you still have to register modules):

struct Critic_Net : torch::nn::Module {
	torch::Tensor next_state_batch__sampled_action;
	torch::nn::Linear lin1{ nullptr }, lin2{ nullptr };
public:
	Critic_Net() {
		lin1 = register_module("lin1", torch::nn::Linear(3, 3));
		lin2 = register_module("lin2", torch::nn::Linear(3, 1));
	}
	torch::Tensor forward(torch::Tensor next_state_batch__sampled_action) {
		auto h = next_state_batch__sampled_action;
		h = torch::relu(lin1->forward(h));
		h = lin2->forward(h);
		return h;
	}

	std::string SaveMemory()
	{
		std::ostringstream oss;
		torch::serialize::OutputArchive archive;

		this->save(archive);
		archive.save_to(oss);

		return oss.str();
	}

	void LoadMemory(const std::string& mem)
	{
		torch::serialize::InputArchive archive;
		archive.load_from(std::istringstream(mem));
		this->load(archive);
	}
};


void main()
{
	try
	{
		auto net = Critic_Net();
		auto net2 = Critic_Net();

		auto the_ones = torch::ones({ 3, 3 });

		std::cout << net.forward(the_ones) << std::endl << std::endl;
		std::cout << net2.forward(the_ones) << std::endl << std::endl;

		auto state = net.SaveMemory();
		net2.LoadMemory(state);

		std::cout << net.forward(the_ones) << std::endl << std::endl;
		std::cout << net2.forward(the_ones) << std::endl << std::endl;
	}
	catch (std::runtime_error& e)
	{
		std::cout << e.what() << std::endl;
	}
	catch (const c10::Error& e)
	{
		std::cout << e.msg() << std::endl;
	}

	system("PAUSE");
}

Alio · October 22, 2022, 9:29pm

Thank you. But I have one error.

I get an error on the archive.load_from(std::istringstream(mem)); line

/home/iii/tor/m_gym/tensor.cpp: In member function ‘void Critic_Net::LoadMemory(const string&)’:
/home/iii/tor/m_gym/tensor.cpp:88:40: error: cannot bind non-const lvalue reference of type ‘std::istream&’ {aka ‘std::basic_istream<char>&’} to an rvalue of type ‘std::basic_istream<char>’
   88 |                 archive.load_from(std::istringstream(mem));

Matej_Kompanek · October 22, 2022, 9:52pm

in VS it works, try changing LoadMemory to simple version like this →

void LoadMemory(std::string mem)

Alio · October 22, 2022, 9:57pm

I’m in VS Code too. I get the error I mentioned or when I change how the mem “string” is handled.

Unrecognized data format
sh: 1: PAUSE: not found

Matej_Kompanek · October 22, 2022, 10:09pm

remove

system("PAUSE");

VS != VS Code, if you’re on linux, the compiler is different and it doesn’t know PAUSE command

Alio · October 22, 2022, 10:16pm

Ah, okay.
Though, the error is still the same. VS Code doesn’t like this line

If I remove the std::istringstream I get the other Unrecognized data format error.

Sorry, I know some C++, but I’m out of my depth with what you’ve introduced.

When I hover over the red underline it shows…

load_from

+4 overloads

Loads the `InputArchive` from a serialized representation stored in the
file at `filename`. Storage are remapped using device option. If device
is not specified, the module is loaded to the original device.

no instance of overloaded function "torch::serialize::InputArchive::load_from" matches the argument listC/C++(304)

Matej_Kompanek · October 23, 2022, 1:22pm

no idea, missing header maybe ?

#include <sstream>

Alio · October 24, 2022, 9:14pm

Nope. I ended up using this. I found it online somewhere and don’t remember the source.

// Model in 2nd position becomes 1st
void loadstatedict(torch::nn::Module& model, torch::nn::Module& target_model) {
torch::autograd::GradMode::set_enabled(false);  // make parameters copying possible
auto new_params = target_model.named_parameters(); // implement this
auto params = model.named_parameters(true /*recurse*/);
auto buffers = model.named_buffers(true /*recurse*/);
for (auto& val : new_params) {
    auto name = val.key();
    auto* t = params.find(name);
    if (t != nullptr) {
        t->copy_(val.value());
    } else {
        t = buffers.find(name);
        if (t != nullptr) {
            t->copy_(val.value());
            }
        }
    }
    torch::autograd::GradMode::set_enabled(true);
}

Alio · October 24, 2022, 11:21pm

I seem to be able to save the model with this code at the bottom of the main function.

    std::string model_path = "model.pt";
    torch::serialize::OutputArchive output_archive;
    net.save(output_archive);
    output_archive.save_to(model_path);

At least this saves a file called “model.pt” in the directory.
Is there a way to load the model somehow with InputArchive at the top of the main function?

Matej_Kompanek · October 25, 2022, 2:10am

If saving to file is OK, I think you can just use torch::save and torch::load, examples here Github Pytorch cpp api serialize test

Alio · October 25, 2022, 2:12am

torch::load and torch::save have always created huge, unspecific errors.

Matej_Kompanek · October 25, 2022, 2:24am

Doesn’t compile or runtime error ?
I meant something like this (outputs same values after loading)

struct Critic_Net : torch::nn::Module {
	torch::Tensor next_state_batch__sampled_action;
public:
	Critic_Net() {
		// Construct and register two Linear submodules.
		lin1 = register_module("lin1", torch::nn::Linear(427, 42));
		lin2 = register_module("lin2", torch::nn::Linear(42, 286));
		lin3 = register_module("lin3", torch::nn::Linear(286, 42));
	}
	torch::Tensor forward(torch::Tensor next_state_batch__sampled_action) {
		auto h = next_state_batch__sampled_action;
		h = torch::relu(lin1->forward(h));
		h = torch::tanh(lin2->forward(h));
		h = lin3->forward(h);
		return h;
	}
	torch::nn::Linear lin1{ nullptr }, lin2{ nullptr }, lin3{ nullptr };
};

int main()
{
	try
	{
		auto test_input = torch::zeros({ 1, 427 });

		auto net = std::make_shared<Critic_Net>();
		auto net2 = std::make_shared<Critic_Net>();

		std::cout << net->forward(test_input);

		torch::save(net, "temp.pt");
		torch::load(net2, "temp.pt");

		std::cout << net2->forward(test_input);
	}
	catch (std::runtime_error& e)
	{
		std::cout << e.what() << std::endl;
	}
	catch (const c10::Error& e)
	{
		std::cout << e.msg() << std::endl;
	}
}

Alio · October 25, 2022, 3:07am

Thank you. The make_shared was holding me back.

int main() {
    auto net = std::make_shared<Critic_Net>();
    torch::load(net, "temp.pt");
    auto net_optimizer = torch::optim::Adam(net->parameters(),  1e-3);

    for (int e = 0; e<1000; e++) {
        net_optimizer.zero_grad();
        y_hat = net->forward(one);

        loss = torch::smooth_l1_loss(target, y_hat);
        loss.backward();
        net_optimizer.step();

        if (e % 50 == 0) {
            std::cout << loss.item() << "  " << "\n";
        }
    }
    torch::save(net, "temp.pt");
}

I thought it wasn’t working because I expected it to load with a lower loss function. I dimly didn’t think about how the script starts with a new random tensor.

Matej_Kompanek · October 25, 2022, 3:20am

You’re loading from model.pt and saving to temp.pt, is that right ?
Also, Adam optimizer has some internal state which is not saved, so it may behave different after restart.

Alio · October 25, 2022, 3:28am

I just fixed that too. I was trying different model names because I was saving with different methods.
It has been a messy journey but looks like a ‘no duh’ situation in hind sight.
I saved the tensors too and load them and now I can save and load a model which improves every time I run the script, which was the goal. Thank you. It would have taken me another day to figure out. When I saw your single tensor example gave the same outputs from the models I knew where I f’d up.