Expected Tensor but got GenericDict

huang624 · March 1, 2023, 5:58am

Description:
I would like to infernce my model on LibTorch, but I got the error “Expected Tensor but got GenericDict”.

Enviroment
LibTorch: 1.13.1(maybe), but I downloag from pytorch with this link
https://download.pytorch.org/libtorch/cu116/libtorch-shared-with-deps-1.13.1%2Bcu116.zip
Pytorch: 1.13.1

python code for exporting my model to use in c++:

def add_targets(encodings, targets):
  encodings.update({'label': le.transform(targets)})

test_encodings = tokenizer(test_questions, truncation=True, padding=True)
add_targets(test_encodings, test_targets)

class Dataset(torch.utils.data.Dataset):
  def __init__(self, encodings):
    self.encodings = encodings
  def __getitem__(self, idx):
    return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  def __len__(self):
    return len(self.encodings.input_ids)

test_dataset = Dataset(test_encodings)

Load model

from transformers import BertConfig, BertForSequenceClassification
config = BertConfig.from_pretrained(config_path)  
model = BertForSequenceClassification.from_pretrained(model_path, config=config)

Export

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

sample = next(iter(test_dataloader))
sample_input_id = sample['input_ids'].squeeze(1).to("cpu")
sample_mask = sample['attention_mask'].to("cpu")

traced_script_module = torch.jit.trace(model.cpu(), [sample_input_id, sample_mask], strict=False)
traced_script_module.save("traced_text_classification_model.pt")

model is available infernce on python

C++ code for inference

#include <iostream>
#include <map>
#include <fstream>
#include <string>
#include <sstream>
#include <cstring>
#include <vector>
#include <istream>

#include <torch/torch.h>
#include <torch/script.h>

std::pair<torch::Tensor, torch::Tensor> preprocess(std::string text, std::map<std::string, int> token2id, int max_length, bool log = false){
    std::string pad_token = "[PAD]", start_token = "[CLS]", end_token = "[SEP]";
    int pad_token_id = token2id[pad_token], start_token_id = token2id[start_token], end_token_id = token2id[end_token]; //special token decode

    std::vector<int> input_ids(max_length, pad_token_id), masks(max_length, 0);  
    // init ele in input_ids  to pad token, and init attention mask to is 0
    input_ids[0] = start_token_id; masks[0] = 1; // start token -> [CLS]

    std::string word;
    std::istringstream ss(text);
    
    int input_id = 1;
    while(getline(ss, word, ' ')) {
        int word_id = token2id[word];
        masks[input_id] = 1;
        input_ids[input_id++] = word_id;
        
        // if (log)
        std::cout << word << " : " << word_id << '\n';
    }
    

    masks[input_id] = 1;
    input_ids[input_id] = end_token_id;

    if (log){
        for (auto i : input_ids)
            std::cout << i << ' ';
        std::cout << '\n';
    
        for (auto i : masks)
            std::cout << i << ' ';
        std::cout << '\n';
    }

    auto input_ids_tensor = torch::tensor(input_ids).unsqueeze(0);
    auto masks_tensor = torch::tensor(masks).unsqueeze(0).unsqueeze(0);

    return std::make_pair(input_ids_tensor, masks_tensor);
}

struct Model{
    int max_length = 128;
    std::map<std::string, int> token2id;
    std::map<int, std::string> id2token;
    torch::jit::script::Module bert;

    void init_vocab(std::string vocab_path = "model/new_vocab.txt"){
        std::tie(token2id, id2token) = get_vocab(vocab_path);
    }

    void init_bert(std::string bert_path = "/user_data/web/model/traced_text_classification_model.pt"){
        bert = load_model(bert_path);
    }

    std::pair<std::map<std::string, int>, std::map<int, std::string>> get_vocab(std::string vocab_path){
        std::map<std::string, int> token2id;
        std::map<int, std::string> id2token;

        std::fstream newfile;
        newfile.open(vocab_path, std::ios::in);

        std::string line;
        while(getline(newfile, line)){
            char *token = strtok(const_cast<char*>(line.c_str()), " ");
            char *token_id = strtok(nullptr, " ");

            token2id[token] = std::stoi(token_id);
            id2token[std::stoi(token_id)] = token;
        }
        newfile.close();

        return std::make_pair(token2id, id2token);
    }

    torch::jit::script::Module load_model(std::string  model_path){
        torch::jit::script::Module module;
        try {
            module = torch::jit::load(model_path);
        }
        catch (const c10::Error& e) {
            std::cerr << "error loading the model\n";
        }
        return module;
    }
};

int main(int argc, char** argv){
    std::cout << "start" << std::endl;
    
    // set seed
    int seed = 42;
    torch::manual_seed(seed);
    torch::cuda::manual_seed(seed);

    auto model = Model();
    model.init_vocab();
    model.init_bert();

    auto token2id = model.token2id;

    std::string text = "今 天 天 氣 真 好";
    std::cout << text << std::endl;

    torch::Tensor input_ids, masks;
    std::tie(input_ids, masks) = preprocess(text, token2id, model.max_length);
    std::cout << input_ids << std::endl;
    std::cout << masks << std::endl;

    std::vector<torch::jit::IValue> inputs;
    inputs.push_back(input_ids);
    inputs.push_back(masks);

    auto outputs = model.bert.forward(inputs).toTensor();
    std::string pred = std::to_string(int(outputs.argmax().item<int>()));
    std::cout << "Prediction: " << pred << std::endl;
    return 0;
}

The issue is here
auto outputs = model.bert.forward(inputs).toTensor();

Error
terminate called after throwing an instance of ‘c10::Error’
what(): Expected Tensor but got GenericDict
Exception raised from reportToTensorTypeError at …/aten/src/ATen/core/ivalue.cpp:942 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x57 (0x7f8639757dd7 in /user_data/libtorch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::string const&) + 0x64 (0x7f8639721c1c in /user_data/libtorch/lib/libc10.so)
frame #2: c10::IValue::reportToTensorTypeError() const + 0x58 (0x7f8620c70fb8 in /user_data/libtorch/lib/libtorch_cpu.so)
frame #3: c10::IValue::toTensor() && + 0x4b (0x5566fba76df7 in ./web)
frame #4: main + 0x4c9 (0x5566fba721fd in ./web)
frame #5: __libc_start_main + 0xf3 (0x7f861ed46083 in /lib/x86_64-linux-gnu/libc.so.6)
frame #6: _start + 0x2e (0x5566fba70d6e in ./web)

similar issues:

github.com/triton-inference-server/server

PyTorch execute failure: isTensor(); Expected Tensor but got GenericList

opened 02:50AM - 10 Sep 21 UTC

closed 05:11PM - 10 Sep 21 UTC

rexlow

**Description** I've built a custom Yolov5 model and traced it via a [export sc…ript](https://github.com/ultralytics/yolov5/blob/master/export.py) from UltraLytics. Testing it via their [inference script](https://github.com/ultralytics/yolov5/blob/master/detect.py) and my Python native code works. Now I attempt to deploy it with Triton. While deploying it Triton did not throw any error, it is when making a client call only I get this error. A similar error can be found in #2594 , not sure why is this closed?? Perhaps @CoderHam can have a look at this issue? I'm passing a batched numpy array not tensor. ``` PyTorch execute failure: isTensor() INTERNAL ASSERT FAILED at "/tmp/tritonbuild/pytorch/build/include/torch/ATen/core/ivalue_inl.h":157, please report a bug to PyTorch. Expected Tensor but got GenericList Exception raised from toTensor at /tmp/tritonbuild/pytorch/build/include/torch/ATen/core/ivalue_inl.h:157 (most recent call first): frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x6c (0x7f56f28405cc in /opt/tritonserver/backends/pytorch/libc10.so) frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0xfa (0x7f56f2806d4e in /opt/tritonserver/backends/pytorch/libc10.so) frame #2: <unknown function> + 0xf558 (0x7f56f2d9a558 in /opt/tritonserver/backends/pytorch/libtriton_pytorch.so) frame #3: <unknown function> + 0x155a2 (0x7f56f2da05a2 in /opt/tritonserver/backends/pytorch/libtriton_pytorch.so) frame #4: TRITONBACKEND_ModelInstanceExecute + 0x411 (0x7f56f2da1a91 in /opt/tritonserver/backends/pytorch/libtriton_pytorch.so) frame #5: <unknown function> + 0x2f2fd7 (0x7f573ca89fd7 in /opt/tritonserver/bin/../lib/libtritonserver.so) frame #6: <unknown function> + 0xfdfe0 (0x7f573c894fe0 in /opt/tritonserver/bin/../lib/libtritonserver.so) frame #7: <unknown function> + 0xd6d84 (0x7f573c2d1d84 in /usr/lib/x86_64-linux-gnu/libstdc++.so.6) frame #8: <unknown function> + 0x9609 (0x7f573c76c609 in /usr/lib/x86_64-linux-gnu/libpthread.so.0) frame #9: clone + 0x43 (0x7f573bfbf293 in /usr/lib/x86_64-linux-gnu/libc.so.6) ``` **Triton Information** Triton docker version 21.03 **Config file** ``` name: "my_model" platform: "pytorch_libtorch" max_batch_size : 128 input { name: "input__0" data_type: TYPE_FP32 format: FORMAT_NCHW dims: [ 3, 640, 640 ] } output { name: "output__0" data_type: TYPE_FP32 dims: [ 20 ] label_filename: "labels.txt" } instance_group { count: 1 kind: KIND_GPU gpus: 0 } dynamic_batching { preferred_batch_size: [ 1, 2, 4, 8, 16 ] max_queue_delay_microseconds: 100 } ``` **Python Client** ``` image = cv2.resize(img, (640, 640), interpolation = cv2.INTER_CUBIC) image = image.astype(np.float32) image /= 255. # [640, 640, 3] image = np.transpose(image, (2, 0, 1)) # [3, 640, 640] data = np.expand_dims(image, 0) # [1, 3, 640, 640] input_1 = tchttp.InferInput("input__0", (1, 3, 640, 640), "FP32") input_1.set_data_from_numpy(data) outputs = [tchttp.InferRequestedOutput(name) for name in ["output__0"]] response = triton_client.infer( mmodel_name='my_model', inputs=[input_1], outputs=outputs, model_version='1') logits = response.as_numpy('output__0') ```

and they solve the issue using changing there forward() function in model.
Well, I am confused that how I change my function, changing before exporting in python, or changing in c++.

Do everyone have suggestion or help for how I can fix my code.

Thanks for your help!