Hi everyone,
This is my part of my code but I have a problem that when I want to get data it takes ~90ms. where is the problem?
at::Tensor image_tensor = torch::from_blob(
input_preproc.data, { 1, _img_h, _img_w, _img_d }, at::kFloat);
image_tensor = image_tensor.to(*_device);
image_tensor = image_tensor.permute({ 0, 3, 1, 2 });
image_tensor[0][2] =
image_tensor[0][0].div(255).sub(_img_means[0]).div(_img_stds[0]);
image_tensor[0][1] =
image_tensor[0][1].div(255).sub(_img_means[1]).div(_img_stds[1]);
image_tensor[0][0] =
image_tensor[0][2].div(255).sub(_img_means[2]).div(_img_stds[2]);
std::vectortorch::jit::IValue inputs;
inputs.push_back(image_tensor);
torch::Tensor logits_tensor = _module->forward(inputs).toTensor();
torch::Tensor argmax_tensor = logits_tensor.argmax(1, false);
argmax_tensor = argmax_tensor.toType(at::kInt);
// it takes about ~90ms
auto begin = std::chrono::high_resolution_clock::now();
argmax_tensor = argmax_tensor.to(torch::kCPU);
auto end = std::chrono::high_resolution_clock::now();
OS: Win 10
GPU: GTX 1080Ti
CUDA: 10.1