Error when using object detection model from torchvision in C++: "forward() Expected a value of type 'List[Tensor]' for argument 'images' but instead found type 'Tensor'"

I took the official torchvision C++ example project and changed it so that it uses the an object detection model ssdlite320_mobilenet_v3_large instead of the image recognition model resnet18. This causes the following error when running the built executable:

⋊> /w/o/v/e/c/h/build on main ⨯ ./hello-world                                                                                                                                       14:12:27
terminate called after throwing an instance of 'c10::Error'
  what():  forward() Expected a value of type 'List[Tensor]' for argument 'images' but instead found type 'Tensor'.
Position: 1
Declaration: forward(__torch__.torchvision.models.detection.ssd.SSD self, Tensor[] images, Dict(str, Tensor)[]? targets=None) -> ((Dict(str, Tensor), Dict(str, Tensor)[]))
Exception raised from checkArg at ../aten/src/ATen/core/function_schema_inl.h:339 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x6b (0x7f0cb87da05b in /work/Downloads/libtorch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0xbf (0x7f0cb87d4f6f in /work/Downloads/libtorch/lib/libc10.so)
frame #2: void c10::FunctionSchema::checkArg<c10::Type>(c10::IValue const&, c10::Argument const&, c10::optional<unsigned long>) const + 0x151 (0x7f0cb9de0361 in /work/Downloads/libtorch/lib/libtorch_cpu.so)
frame #3: void c10::FunctionSchema::checkAndNormalizeInputs<c10::Type>(std::vector<c10::IValue, std::allocator<c10::IValue> >&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, c10::IValue, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, c10::IValue> > > const&) const + 0x217 (0x7f0cb9de1ba7 in /work/Downloads/libtorch/lib/libtorch_cpu.so)
frame #4: torch::jit::Method::operator()(std::vector<c10::IValue, std::allocator<c10::IValue> >, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, c10::IValue, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, c10::IValue> > > const&) const + 0x173 (0x7f0cbcde5b53 in /work/Downloads/libtorch/lib/libtorch_cpu.so)
frame #5: <unknown function> + 0x151da (0x56495747d1da in ./hello-world)
frame #6: <unknown function> + 0x11c90 (0x564957479c90 in ./hello-world)
frame #7: <unknown function> + 0x29d90 (0x7f0cb830dd90 in /lib/x86_64-linux-gnu/libc.so.6)
frame #8: __libc_start_main + 0x80 (0x7f0cb830de40 in /lib/x86_64-linux-gnu/libc.so.6)
frame #9: <unknown function> + 0x11765 (0x564957479765 in ./hello-world)

fish: Job 1, './hello-world' terminated by signal SIGABRT (Abort)

The modified code looks as follows:

trace_model.py

import os.path as osp

import torch
import torchvision

HERE = osp.dirname(osp.abspath(__file__))
ASSETS = osp.dirname(osp.dirname(HERE))

model = torchvision.models.detection.ssdlite320_mobilenet_v3_large()
model.eval()

traced_model = torch.jit.script(model)
traced_model.save("ssdlite320_mobilenet_v3_large.pt")

main.cpp

#include <torch/script.h>
#include <torchvision/vision.h>

int main()
{
    torch::jit::script::Module model = torch::jit::load("ssdlite320_mobilenet_v3_large.pt");
    auto inputs = std::vector<torch::jit::IValue> {torch::rand({1, 3, 10, 10})};
    auto out = model.forward(inputs);
    std::cout << out << "\n";
}

Do you have any idea what’s going on here?

Did you check which inputs work in Python before exporting the model? Based on the error message it seems List[Tensor] is expected.

Not yet, but I think I just simplified the example code a little too much. Turns out I have to do this:

std::vector<torch::jit::IValue> inputs;
inputs.push_back(torch::rand({1, 3, 10, 10}));

not that:

auto inputs = std::vector<torch::jit::IValue> {torch::rand({1, 3, 10, 10})};

In the example code I now get another error when running the binary:

Other error: 
Unknown builtin op: torchvision::nms.
Could not find any similar ops to torchvision::nms. This op may not exist or may not be currently supported in TorchScript.
:
  File "code/__torch__/torchvision/ops/boxes.py", line 128
  _55 = __torch__.torchvision.extension._assert_has_ops
  _56 = _55()
  _57 = ops.torchvision.nms(boxes, scores, iou_threshold)
        ~~~~~~~~~~~~~~~~~~~ <--- HERE
  return _57
'nms' is being compiled since it was called from '_batched_nms_vanilla'
  File "/home/thomas.fritz/.local/lib/python3.10/site-packages/torchvision/ops/boxes.py", line 109
    for class_id in torch.unique(idxs):
        curr_indices = torch.where(idxs == class_id)[0]
        curr_keep_indices = nms(boxes[curr_indices], scores[curr_indices], iou_threshold)
                            ~~~ <--- HERE
        keep_mask[curr_indices[curr_keep_indices]] = True
    keep_indices = torch.where(keep_mask)[0]
Serialized   File "code/__torch__/torchvision/ops/boxes.py", line 85
    _32 = torch.index(boxes, _31)
    _33 = annotate(List[Optional[Tensor]], [curr_indices])
    curr_keep_indices = __torch__.torchvision.ops.boxes.nms(_32, torch.index(scores, _33), iou_threshold, )
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
    _34 = annotate(List[Optional[Tensor]], [curr_keep_indices])
    _35 = torch.index(curr_indices, _34)
'_batched_nms_vanilla' is being compiled since it was called from 'batched_nms'
Serialized   File "code/__torch__/torchvision/ops/boxes.py", line 30
    idxs: Tensor,
    iou_threshold: float) -> Tensor:
  _6 = __torch__.torchvision.ops.boxes._batched_nms_vanilla
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
  _7 = __torch__.torchvision.ops.boxes._batched_nms_coordinate_trick
  _8 = torch.numel(boxes)
'batched_nms' is being compiled since it was called from 'SSD.postprocess_detections'
Serialized   File "code/__torch__/torchvision/models/detection/ssd.py", line 230
    _81 = __torch__.torchvision.ops.boxes.clip_boxes_to_image
    _82 = __torch__.torchvision.models.detection._utils._topk_min
    _83 = __torch__.torchvision.ops.boxes.batched_nms
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
    bbox_regression = head_outputs["bbox_regression"]
    pred_scores = _80(head_outputs["cls_logits"], -1, 3, None, )
'SSD.postprocess_detections' is being compiled since it was called from 'SSD.forward'
  File "/home/thomas.fritz/.local/lib/python3.10/site-packages/torchvision/models/detection/ssd.py", line 404
                losses = self.compute_loss(targets, head_outputs, anchors, matched_idxs)
        else:
            detections = self.postprocess_detections(head_outputs, anchors, images.image_sizes)
                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
            detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
    
Serialized   File "code/__torch__/torchvision/models/detection/ssd.py", line 137
    else:
      image_sizes = images0.image_sizes
      detections1 = (self).postprocess_detections(head_outputs, anchors, image_sizes, )
                                                                         ~~~~~~~~~~~ <--- HERE
      transform0 = self.transform
      image_sizes0 = images0.image_sizes