Get a thread safe copy of torch::nn::Sequential object

I need to get a copy of shared neural network of type torch::nn::Sequential in a multi-threaded code. In each thread, I want to change the weights of the neural net and I need to make sure that the weights do not mix up with each other. It seems that I cannot directly use auto l_model = model; since it only provides a new pointer which points to the same address in the memory that the original object lives in. So, whenever I change the weights in one thread, all other threads see that change, which is not desirable. I also declared and instantiated the new model as auto l_model = torch::nn::Sequential(model); which still results in the same issue.

I appreciate any help or comment.

#include <omp.h>
#include "iostream"
#include <ATen/ATen.h>
#include <torch/torch.h>

void set_weights(torch::nn::Sequential &model, float w){

    torch::autograd::GradMode::set_enabled(false);
    for (auto &p : model->named_parameters()) {
        std::string y = p.key();
        auto z = p.value(); // note that z is a Tensor, same as &p : layers->parameters

        if (y.compare(2, 6, "weight") == 0)
            z.uniform_(w, w);
        else if (y.compare(2, 4, "bias") == 0)
            z.uniform_(0, 0);
    }
    torch::autograd::GradMode::set_enabled(true);

}

void print_weights(torch::nn::Sequential &model) {
    //Returns an iterator over module parameters, yielding both the name of the parameter as well as the parameter itself.
    auto params = model->named_parameters(true /*recurse*/);
    for (const auto &pair : params) {
        std::cout << pair.key() << ": " << pair.value() << std::endl;
    }
}

int main()
{
    int N=3;
    torch::Tensor x = torch::tensor({1,1,1,1}).toType(torch::kFloat32);
    int thread;

    torch::nn::Sequential model(torch::nn::Linear(4,2),
                                torch::nn::Functional(torch::relu),
                                torch::nn::Linear(2,1));
    torch::Device device_ = torch::Device(torch::kCPU);
    model->to(device_);
    model->to(torch::kFloat32);
    set_weights(model, 1.);
    omp_set_num_threads(2);
#pragma omp parallel for private(thread) shared(N) //, private(x)
    for (int i=0; i<N; i++) {
        thread = omp_get_thread_num();
        auto l_model = model;

        std::cout << "g-model" <<std::endl;
        print_weights(model);
        set_weights(l_model, (float)thread);
        std::cout << "l-model" <<std::endl;
        print_weights(l_model);
        std::cout << "g-model" <<std::endl;
        print_weights(model);
        auto out = l_model->forward(x);
        auto tmp = out.item<float>();
        result[i][0] = (float)thread;
        result[i][1] = *out_f;
    }
    for (int i=0; i <N ; i++){
        std::cout << "i=" << i << " thread=" << result[i][0] << " out=" << result[i][1] << std::endl;
    }

    return 0;
}