I tried for a while now but no matter what the parameters seems not to be copied over .
pulling the model from a central one
void ThreadHandler::updateNet(MyNet extern_net)
{
m_net.lock();
// MyNet cloned_net = MyNet(options.input_size, options.hidden_size, options.hidden_size2, options.hidden_size3, options.output_size);// should change the parameters to be dynamic
// auto temp = *std::dynamic_pointer_cast<MyNet>(main_net->clone());//just a pointer
auto paramsA = main_net->named_parameters(true);
auto paramsB = extern_net->named_parameters(true);
extern_net->to(options.device);
torch::manual_seed(options.seed);
bool brek = false;
torch::autograd::GradMode::set_enabled(false);
for (auto& paramA : paramsA) {
for (auto& paramB : paramsB) {
if (paramA.key() == paramB.key()) {
if (!paramA.value().grad().defined()) {
std::cout << “Error: Main Net” << paramA.key() << " is not defined" << std::endl;
brek = true;
break;
}
if (!paramB.value().grad().defined()) {
std::cout << “Error: Extern Net” << paramB.key() << " is not defined" << std::endl;
brek = true;
break;
}
//paramB.value().data().copy_(paramA.value().data().clone());
paramB.value().grad().data() = paramA.value().grad().data().clone();
break;
}
}
if (brek)
{
break;
}
}
torch::autograd::GradMode::set_enabled(true);
// std::cout << "Seed is : " << options.seed << std::endl;
m_net.unlock();
}
Just a primitive try to average between multiply models
void ThreadHandler::average(MyNet extern_net)
{
std::cout << “Averaging” << std::endl;
// MyNet mainnet = MyNet(options.input_size, options.hidden_size, options.hidden_size2, options.hidden_size3, options.output_size);
// cloneNet(mainnet);
m_net.lock();
auto paramsA = extern_net->named_parameters(true);
auto paramsB = main_net->named_parameters(true);
bool brek = false;
torch::autograd::GradMode::set_enabled(false);
for (auto& paramA : paramsA) {
for (auto& paramB : paramsB) {
if (paramA.key() == paramB.key()) {
if (!paramA.value().grad().defined()) {
std::cout << “Error: Main Net” << paramA.key() << " is not defined" << std::endl;
brek = true;
break;
}
if (!paramB.value().grad().defined()) {
std::cout << “Error: Extern Net” << paramB.key() << " is not defined" << std::endl;
brek = true;
break;
}
//paramB.value().data().copy_((paramA.value().data() + paramB.value().data()) / 2.0);
paramB.value().grad().data() = (paramA.value().grad().data()+ paramB.value().grad().data()) / 2.0;
break;
}
}
if (brek)
{
break;
}
}
// main_net = mainnet;
torch::autograd::GradMode::set_enabled(true);
m_net.unlock();
}
maybe my train method helps
void ThreadHandler::train(MyNet extern_net, Options options)
{
// for Debugging only
std::vector<int64_t> expected_size = { 29, 1, 4 };
extern_net->train();
auto train_set = ClimateDataset(data).map(torch::data::transforms::Stack<>());
torch::manual_seed(options.seed);
extern_net->to(options.device);
torch::optim::Adam optimizer(extern_net->parameters(), torch::optim::AdamOptions(options.learning_rate));
auto train_loader = torch::data::make_data_loader<torch::data::samplers::DistributedRandomSampler>(
std::move(train_set),
torch::data::DataLoaderOptions().batch_size(options.train_batch_size));
for (int i = 0; i < options.iterations; i++) {
for (auto& batch : *train_loader) {
optimizer.zero_grad();
auto inputs = batch.data.to(options.device);
auto targets = batch.target.to(options.device);
std::vector<int64_t> input_size = inputs.sizes().vec();
std::vector<int64_t> target_size = targets.sizes().vec();
//for debugging and condinal breaks only
int64_t ist = input_size.operator[](2);
int64_t tes = target_size.operator[](2);
TORCH_CHECK(input_size == expected_size);
torch::Tensor prediction = extern_net->forward(inputs);
auto loss = torch::l1_loss(prediction, targets);
std::cout << "Loss: " << loss.item<float>() << std::endl;
loss.backward();
optimizer.step();
}
}
if (options.notwhole) {
average(extern_net);
}
else {
averageWhole(extern_net);
}
stop();
//monitor();
}