Question on how to interpret the output of FasterRCNN in C++ with the GenericDict type

I’m trying to interpret the output of FasterRCNN in C++ and I’m fighting with the GenericDict type.

My code is as follows:

#include <opencv4/opencv2/opencv.hpp>
#include <opencv4/opencv2/shape.hpp>
#include <opencv4/opencv2/imgcodecs.hpp>
#include <opencv4/opencv2/highgui.hpp>
#include <opencv4/opencv2/imgproc.hpp>
#include <opencv4/opencv2/core/utility.hpp>
#include <opencv4/opencv2/core/mat.hpp>

#include <c10/cuda/CUDAStream.h>
#include <torch/csrc/autograd/grad_mode.h>

#include <torch/csrc/api/include/torch/torch.h>
#include <torch/script.h>
#include <torchvision/vision.h>
#include <torchvision/nms.h>

#include <iostream>
#include <memory>
#include <string>

int main(int argc, const char* argv[])
{
    if (argc != 3)
    {
        printf("usage: %s <path-to-exported-script-module> <image_to_test>\n",argv[0]);
        return -1;
    }

    std::string module_filename = argv[1];
    std::string image_file = argv[2];

    try
    {
        cv::Mat input_img = cv::imread(image_file, cv::IMREAD_GRAYSCALE);

        torch::autograd::AutoGradMode guard(false);
        // Deserialize the ScriptModule from a file using torch::jit::load().
        torch::jit::script::Module module = torch::jit::load(module_filename);

        assert(module.buffers().size() > 0);

        module.eval();

        // Assume that the entire model is on the same device.
        // We just put input to this device.
        auto device = (*std::begin(module.buffers())).device();

        const int height = input_img.rows;
        const int width  = input_img.cols;
        const int channels = 1;

        auto input = torch::from_blob(input_img.data, {height, width, channels}, torch::kUInt8);
        // HWC to CHW
        // input = input.to(device, torch::kFloat).permute({2, 0, 1}).contiguous();
        input = input.to(device, torch::kFloat).permute({2, 0, 1}).contiguous();

        // run the network
        std::vector<at::Tensor> inputs;
        inputs.push_back(input);
        auto output = module.forward({inputs});
        if (device.is_cuda())
            c10::cuda::getCurrentCUDAStream().synchronize();

        std::cout << "output: " << output << std::endl;

        auto outputs = output.toTuple()->elements();

        std::cout << "outputs: " << outputs << std::endl;

        for( auto& elem : outputs )
        {
            std::cout << "elem: " << elem << std::endl;
            if( elem.isGenericDict() )
            {
                std::cout << "elem is generic dict: " << elem << std::endl;
                c10::Dict<c10::IValue, c10::IValue> dict = elem.toGenericDict();

                auto elem_vector_0 = dict.at(c10::IValue("scores")).toIntVector();
                auto elem_vector_1 = dict.at(c10::IValue("boxes")).toIntVector();
                auto elem_vector_2 = dict.at(c10::IValue("labels")).toIntVector();

                for( auto& ee0 : elem_vector_0 )
                {
                    std::cout << "elem_vector_0" << ee0 << std::endl;
                }
                for( auto& ee0 : elem_vector_1 )
                {
                    std::cout << "elem_vector_1" << ee0 << std::endl;
                }
                for( auto& ee0 : elem_vector_2 )
                {
                    std::cout << "elem_vector_2" << ee0 << std::endl;
                }
            }
        }

        cv::namedWindow("Display Image", cv::WINDOW_AUTOSIZE );
        cv::imshow("Display Image", input_img);
        cv::waitKey(0);
    }
    catch(const c10::Error& e)
    {
        std::cerr << e.what() << std::endl;
        return -1;
    }
    catch(const cv::Exception& e)
    {
        std::cerr << e.what() << std::endl;
        return -1;
    }
    catch(const std::exception& e)
    {
        std::cerr << e.what() << std::endl;
        return -1;
    }
    catch(...)
    {
        std::cerr << "Unknown error" << std::endl;
        return -1;
    }

    std::cout << "ok\n";
    return 0;
}

and the output is:

(base) fstrati@fstrati-desktop:~/libtorch_shared_cuda_10.1/load_and_run_model/Release$ ./load_and_run_model ./torch_script_v0.2.pt test_img.png 
[W faster_rcnn.py:95] Warning: RCNN always returns a (Losses, Detections) tuple in scripting (function )
output: ({}, [{boxes: [ CPUFloatType{0,4} ], labels: [ CPULongType{0} ], scores: [ CPUFloatType{0} ]}])
outputs: {} [{boxes: [ CPUFloatType{0,4} ], labels: [ CPULongType{0} ], scores: [ CPUFloatType{0} ]}]
elem: {}
elem is generic dict: {}
Argument passed to at() was not in the map.

I’m struggling to find a way to extract the boxes, labels and scores from the dictionary GenericDict.

This map is strange, I cannot iterate on it and I cannot access first and second types… with it->first it->second

Any ideas ?

Thanks in advance

1 Like

Hi, I am having the same problem. Have you found a solution ?

Hi! I’ve got the same. Did you overcame that obstacle? Could you advise something?

you can try following code:


    auto val_out = out[1].toList().get(0).toGenericDict();
    auto boxes = val_out.at("boxes");
    auto scores = val_out.at("scores");


    at::Tensor bbox = boxes.toTensor().data();
    at::Tensor bscore = scores.toTensor().data();
1 Like

Can you share me your code which is used to convert the model trained by python to Torchscript model? Actually I used my code to convert. but the torchscipt model always occur issue in forward function.

Yes it works!!! :+1:

I am very glad that it can benefit for you.
can you share me your code which is used to convert the the trained model by python to Torchscript?

Hello
Where to send code?

Thanks for your reply!

you can send the code to qvb751201402@163.com; or you can pose the code between here!