I know you usually use processes in Pytroch but I wanted to see if you can do it with multi threading at least it reduce overhead which is nice when you work in a limited environment. So I jumped in …
I made it so it starts running but soon it cause a bug somewhere a debug Abort() is called deep in the system. I know something is wrong with my batch.input
void ThreadHandler::train(MyNet extern_net, int seed, Options options)
{
std::vector<int64_t> expected_size = { 29, 1, 4 };
auto train_set = ClimateDataset(data).map(torch::data::transforms::Stack<>());
torch::manual_seed(seed);
extern_net->to(options.device);
torch::optim::Adam optimizer(extern_net->parameters(), torch::optim::AdamOptions(options.learning_rate));
auto train_loader = torch::data::make_data_loader<torch::data::samplers::RandomSampler>(
std::move(train_set),
torch::data::DataLoaderOptions().batch_size(options.train_batch_size));
for (auto& batch : *train_loader) {
optimizer.zero_grad();
auto inputs = batch.data;//.to(options.device);
auto targets = batch.target.to(options.device);;
std::vector<int64_t> input_size = inputs.sizes().vec();
std::vector<int64_t> target_size = targets.sizes().vec();
//for debugging and condinal breaks only
int64_t ist = input_size.operator[](2);
int64_t tes = target_size.operator[](2);
//AT_ASSERT(input_size == expected_size);
torch::Tensor prediction = extern_net->forward(inputs);
auto loss = torch::l1_loss(prediction, targets);
//AT_ASSERT(!std::isnan(loss.template item<float>()));
loss.backward();
optimizer.step();
}
average(extern_net);
//std::cout << "ThreadHandler: " << std::this_thread::get_id() << " is done" << std::endl;
stop();
}
if .to(options.device) is called it triggers it but if not its end here
#pragma once
#include
#include <torch/torch.h>
struct MyNetImpl : torch::nn::Module {
MyNetImpl(int64_t input_size, int64_t hidden_size, int64_t hidden_size2, int64_t hidden_size3, int64_t output_size) :
i2h(register_module(“i2h”, torch::nn::Linear(input_size, hidden_size))),
h2h(register_module(“h2h”, torch::nn::Linear(hidden_size, hidden_size2))),
h2h2(register_module(“h2h2”, torch::nn::Linear(hidden_size2, hidden_size3))),
h2h3(register_module(“h2h3”, torch::nn::Linear(hidden_size3, hidden_size3))),
h2o(register_module(“h2o”, torch::nn::Linear(hidden_size3, output_size))) {}
torch::Tensor forward(torch::Tensor x1) {
// torch::Tensor xi = torch::cat({ x1, x2, x3 }, 1);
// std::cout << "Input tensor x1: " << x1 << std::endl;
torch::Tensor xm = torch::relu(i2h->forward(x1));
//std::cout << "Input tensor xm: " << xm << std::endl;
torch::Tensor xm2 = torch::relu(h2h->forward(xm));
torch::Tensor xm3 = torch::relu(h2h2->forward(xm2));
torch::Tensor xo = torch::relu(h2h3->forward(xm3));
//std::cout << "Input tensor xo: " << xo << std::endl;
torch::Tensor x = h2o->forward(xo);
// std::cout << "Input tensor x: " << x << std::endl;
return x;
}
torch::nn::Linear i2h, h2h, h2h2, h2h3, h2o;
};
TORCH_MODULE(MyNet);
Basically the first line of forwards I’m sure it has something to do with the multi threading. I’m bit on a lose here and yes the dimensions seems fine so I’m not sure what I should check.
Stacktrace:
ucrtbased.dll!00007ff8d4cc2e65() | Unknown | |
---|---|---|
ucrtbased.dll!00007ff8d4cc3003() | Unknown | |
ucrtbased.dll!00007ff8d4cdab0d() | Unknown | |
ucrtbased.dll!00007ff8d4cd9a90() | Unknown | |
vcruntime140_1d.dll!00007ff9a757223a() | Unknown | |
vcruntime140_1d.dll!00007ff9a7572ec5() | Unknown | |
vcruntime140_1d.dll!00007ff9a7572f57() | Unknown | |
vcruntime140_1d.dll!00007ff9a7576dbb() | Unknown | |
ntdll.dll!00007ff9c36523af() | Unknown | |
ntdll.dll!00007ff9c36014b4() | Unknown | |
ntdll.dll!00007ff9c3650ebe() | Unknown | |
KernelBase.dll!00007ff9c101cf19() | Unknown | |
vcruntime140d.dll!00007ff937f2b760() | Unknown | |
c10.dll!00007ff8d599809d() | Unknown | |
torch_cpu.dll!00007ff85fef1cdc() | Unknown | |
torch_cuda.dll!00007ff896eef11b() | Unknown | |
torch_cuda.dll!00007ff896a9a3de() | Unknown | |
torch_cuda.dll!00007ff896c1b26c() | Unknown | |
torch_cuda.dll!00007ff896c9f722() | Unknown | |
torch_cpu.dll!00007ff861098d41() | Unknown | |
torch_cpu.dll!00007ff861015ec5() | Unknown | |
torch_cpu.dll!00007ff861183d1c() | Unknown | |
torch_cpu.dll!00007ff86122ada9() | Unknown | |
torch_cpu.dll!00007ff86186b7a5() | Unknown | |
torch_cpu.dll!00007ff864d37d5a() | Unknown | |
torch_cpu.dll!00007ff864bb7184() | Unknown | |
torch_cpu.dll!00007ff864bb64fa() | Unknown | |
torch_cpu.dll!00007ff864c3a570() | Unknown | |
torch_cpu.dll!00007ff864c5fc8f() | Unknown | |
torch_cpu.dll!00007ff861098d41() | Unknown | |
torch_cpu.dll!00007ff861015ec5() | Unknown | |
torch_cpu.dll!00007ff861015c3d() | Unknown | |
torch_cpu.dll!00007ff8612070ec() | Unknown | |
torch_cpu.dll!00007ff86186b6d6() | Unknown | |
torch_cpu.dll!00007ff85f9c7100() | Unknown | |
torch_cpu.dll!00007ff8603661f1() | Unknown | |
torch_cpu.dll!00007ff860353225() | Unknown | |
torch_cpu.dll!00007ff862546800() | Unknown | |
torch_cpu.dll!00007ff86257e63c() | Unknown | |
torch_cpu.dll!00007ff8625d9842() | Unknown | |
torch_cpu.dll!00007ff861098d41() | Unknown | |
torch_cpu.dll!00007ff861015ec5() | Unknown | |
torch_cpu.dll!00007ff861015c3d() | Unknown | |
torch_cpu.dll!00007ff8612070ec() | Unknown | |
torch_cpu.dll!00007ff861a83e96() | Unknown | |
torch_cpu.dll!00007ff85f9c6790() | Unknown | |
torch_cpu.dll!00007ff8674aadc6() | Unknown | |
torch_cpu.dll!00007ff8674f3583() | Unknown | |
> | DisTest.exe!MyNetImpl::forward(at::Tensor x1) Line 16 | C++ |
DisTest.exe!ThreadHandler::train(MyNet extern_net, int seed, Options options) Line 65 | C++ | |
DisTest.exe!ThreadHandler::start() Line 15 | C++ | |
[External Code] |