Hi! I tried following the tutorial on loading a TorchScript model in C++, and adapting the code to run on my GPU. My BERT-based model takes in three tensors and outputs two — when I change the code to the following:
#include <torch/script.h> // One-stop header.
#include <iostream>
#include <memory>
int main(int argc, const char* argv[]) {
if (argc != 2) {
std::cerr << "usage: example-app <path-to-exported-script-module>\n";
return -1;
}
torch::jit::script::Module module;
try {
// Deserialize the ScriptModule from a file using torch::jit::load().
module = torch::jit::load(argv[1]);
module.to(at::kCUDA);
}
catch (const c10::Error& e) {
std::cerr << "error loading the model\n";
return -1;
}
// Create a vector of inputs.
std::vector<torch::jit::IValue> inputs;
auto input1 = torch::ones({1, 128}).to(torch::kLong);
input1 = input1.to(at::kCUDA);
inputs.push_back(input1);
auto input2 = torch::ones({1, 128}).to(torch::kLong);
input2 = input2.to(at::kCUDA);
inputs.push_back(input2);
auto input3 = torch::ones({1, 128}).to(torch::kLong);
input3 = input3.to(at::kCUDA);
inputs.push_back(input3);
// Execute the model and turn its output into a tensor.
auto output = module.forward(inputs).toTuple();
torch::Tensor out1 = output->elements()[0].toTensor();
std::cout << out1 << '\n';
std::cout << "ok\n";
}
I get the following error:
terminate called after throwing an instance of 'std::runtime_error'
what(): The following operation failed in the TorchScript interpreter.
Traceback of TorchScript, serialized code (most recent call last):
File "code/__torch__.py", line 10, in forward
input: Tensor) -> Tensor:
_0 = getattr(self.encoder, "1")
_1 = (getattr(self.encoder, "0")).forward(input_ids, attention_mask, input, )
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
_2, _3, _4, _5, = _1
_6 = (_0).forward(attention_mask, _2, _3, _4, _5, )
File "code/__torch__/sentence_transformers/models/BERT.py", line 9, in forward
attention_mask: Tensor,
input: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
_0 = (self.bert).forward(input_ids, attention_mask, input, )
~~~~~~~~~~~~~~~~~~ <--- HERE
_1, _2, _3, _4, _5, = _0
return (_2, _3, _4, _5)
File "code/__torch__/transformers/modeling_bert.py", line 19, in forward
_5 = torch.to(extended_attention_mask, 6, False, False, None)
attention_mask0 = torch.mul(torch.rsub(_5, 1., 1), CONSTANTS.c0)
_6 = (_1).forward((_2).forward(input_ids, input, ), attention_mask0, )
~~~~~~~~~~~ <--- HERE
_7 = (_0).forward(_6, )
return (_6, _6, _6, _6, _6)
File "code/__torch__/transformers/modeling_bert.py", line 44, in forward
position_ids = torch.arange(annotate(number, seq_length), dtype=4, layout=0, device=torch.device("cuda:0"), pin_memory=False)
input0 = torch.expand(torch.unsqueeze(position_ids, 0), [_14, _15], implicit=False)
_16 = (_12).forward(input_ids, )
~~~~~~~~~~~~ <--- HERE
_17 = (_11).forward(input0, )
_18 = (_10).forward(input, )
File "code/__torch__/torch/nn/modules/sparse.py", line 7, in forward
def forward(self: __torch__.torch.nn.modules.sparse.Embedding,
input_ids: Tensor) -> Tensor:
inputs_embeds = torch.embedding(self.weight, input_ids, 0, False, False)
~~~~~~~~~~~~~~~ <--- HERE
return inputs_embeds
Traceback of TorchScript, original code (most recent call last):
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/functional.py(1724): embedding
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/sparse.py(114): forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(534): _slow_forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(548): __call__
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/transformers/modeling_bert.py(174): forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(534): _slow_forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(548): __call__
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/transformers/modeling_bert.py(727): forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(534): _slow_forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(548): __call__
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/sentence_transformers/models/BERT.py(33): forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(534): _slow_forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(548): __call__
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/container.py(100): forward
<stdin>(10): forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(534): _slow_forward
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/nn/modules/module.py(548): __call__
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/jit/__init__.py(1027): trace_module
/home/john/.pyenv/versions/3.7.7/lib/python3.7/site-packages/torch/jit/__init__.py(875): trace
<stdin>(1): <module>
RuntimeError: Expected object of device type cuda but got device type cpu for argument #3 'index' in call to _th_index_select
Aborted (core dumped)
Why would this be? All of my tensors should have been moved to the GPU, but the error tells me that one of them is on the CPU. Any advice would be greatly appreciated.