Thanks for your reply, here a simplification of the executable
struct Net : torch::nn::Module {
Net():
conv1a(torch::nn::Conv2dOptions( 1, 64, 3).stride(1).padding(1)),
conv1b(torch::nn::Conv2dOptions(64, 64, 3).stride(1).padding(1)),
conv2a(torch::nn::Conv2dOptions(64, 64, 3).stride(1).padding(1)),
conv2b(torch::nn::Conv2dOptions(64, 64, 3).stride(1).padding(1)),
conv3a(torch::nn::Conv2dOptions(64, 128, 3).stride(1).padding(1)),
conv3b(torch::nn::Conv2dOptions(128, 128, 3).stride(1).padding(1)),
conv4a(torch::nn::Conv2dOptions(128, 256, 3).stride(1).padding(1)),
conv4b(torch::nn::Conv2dOptions(256, 256, 3).stride(1).padding(1)),
convPa(torch::nn::Conv2dOptions(256, 256, 3).stride(1).padding(1)),
convPb(torch::nn::Conv2dOptions(256, 65, 1).stride(1).padding(0)),
convDa(torch::nn::Conv2dOptions(256, 256, 3).stride(1).padding(1)),
convDb(torch::nn::Conv2dOptions(256, 256, 1).stride(1).padding(0)){
register_module("conv1a", conv1a);
register_module("conv1b", conv1b);
register_module("conv2a", conv2a);
register_module("conv2b", conv2b);
register_module("conv3a", conv3a);
register_module("conv3b", conv3b);
register_module("conv4a", conv4a);
register_module("conv4b", conv4b);
register_module("convPa", convPa);
register_module("convPb", convPb);
register_module("convDa", convDa);
register_module("convDb", convDb);
}
std::vector<torch::Tensor> forward(torch::Tensor x){
x = torch::relu(conv1a->forward(x));
x = torch::relu(conv1b->forward(x));
x = torch::max_pool2d(x, 2, 2);
x = torch::relu(conv2a->forward(x));
x = torch::relu(conv2b->forward(x));
x = torch::max_pool2d(x, 2, 2);
x = torch::relu(conv3a->forward(x));
x = torch::relu(conv3b->forward(x));
x = torch::max_pool2d(x, 2, 2);
x = torch::relu(conv4a->forward(x));
x = torch::relu(conv4b->forward(x));
auto cPa = torch::relu(convPa->forward(x));
auto semi = convPb->forward(cPa); // [B, 65, H/8, W/8]
auto cDa = torch::relu(convDa->forward(x));
auto desc = convDb->forward(cDa); // [B, d1, H/8, W/8]
auto dn = torch::norm(desc, 2, 1);
desc = desc.div(torch::unsqueeze(dn, 1));
semi = torch::softmax(semi, 1);
semi = semi.slice(1, 0, 64);
semi = semi.permute({0, 2, 3, 1}); // [B, H/8, W/8, 64]
int Hc = semi.size(1);
int Wc = semi.size(2);
semi = semi.contiguous().view({-1, Hc, Wc, 8, 8});
semi = semi.permute({0, 1, 3, 2, 4});
semi = semi.contiguous().view({-1, Hc * 8, Wc * 8}); // [B, H, W]
std::vector<torch::Tensor> ret;
ret.resize(2);
ret[0] = semi;
ret[1] = desc;
return ret;
}
torch::nn::Conv2d conv1a;
torch::nn::Conv2d conv1b;
torch::nn::Conv2d conv2a;
torch::nn::Conv2d conv2b;
torch::nn::Conv2d conv3a;
torch::nn::Conv2d conv3b;
torch::nn::Conv2d conv4a;
torch::nn::Conv2d conv4b;
torch::nn::Conv2d convPa;
torch::nn::Conv2d convPb;
torch::nn::Conv2d convDa;
torch::nn::Conv2d convDb;
};
torch::Tensor predict(const cv::Mat& img, std::shared_ptr<Net> model, bool cuda){
if(model == nullptr)
throw std::runtime_error("Predictor::detect|ERROR, model not set correctly!");
torch::NoGradGuard no_guard;
torch::Tensor img_tensor = torch::from_blob(img.clone().data, { 1, 1, img.rows, img.cols }, torch::kFloat32).clone();
bool use_cuda = cuda && torch::cuda::is_available();
torch::DeviceType device_type;
if (use_cuda)
device_type = torch::kCUDA;
else
device_type = torch::kCPU;
torch::Device device(device_type);
model->to(device);
img_tensor = img_tensor.set_requires_grad(false);
auto out = model->forward(img_tensor.to(device));
_mProb = out[0].squeeze(0); // [H, W]
// _mDesc = out[1]; // [1, 256, H/8, W/8]
}
int main(){
std::shared_ptr<Net> model(new Net);
torch::load(model, "path/to/weigths");
while(...){
// generate img and check that it is not corrupted
torch::Tensor probs = detect(cv_img, model, false);
// do stuff on probs
// std::this_thread::sleep_for(std::chrono::milliseconds(sleep_for));
}
}
prob
return of the predict function, is not always populated correctly, I’ve already checked that the input cv::Mat
is ok before passing this to the predict
function. If I add the sleep commented in the above code, this behaviour is less frequent.
Thank you for your help