I have tried to benchmark and compare a Python script, using a pretrained classification model from the torchvision library with the C++ API implementation in order to make sure it will work with my application.
I have tried the following script in Python:
import torch
from torchvision import transforms
from PIL import Image
_IMAGE_FILENAME = "../images/goldfish.jpg"
_MODEL_JIT_FILENAME = "../jit_models_bin/traced_mnasnet0_5.pt"
model = torch.jit.load(_MODEL_JIT_FILENAME)
tfm = transforms.Compose([
transforms.Resize([ 224, 224 ]),
transforms.ToTensor(),
transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] )
])
image = Image.open(_IMAGE_FILENAME)
image = tfm(image)
image = image.unsqueeze(dim=0)
output = model(image)
output = torch.softmax(output, 1)
prob_value, index = torch.max(output, 1)
print("Probability Value: ")
print(prob_value)
print("ImageNet Index: ")
print(index)
And I used the following code in C++:
#include <iostream>
#include <vector>
#include <string>
#include <torch/torch.h>
#include <torch/script.h>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
int main() {
const cv::String _IMAGE_FILENAME = "../images/goldfish.jpg";
const std::string _MODEL_JIT_FILENAME= "../jit_models_bin/traced_mnasnet0_5.pt";
cv::Mat img = cv::imread( _IMAGE_FILENAME, cv::IMREAD_UNCHANGED );
cv::Size rsz = { 224, 224 };
cv::resize( img, img, rsz, 0, 0, cv::INTER_LINEAR );
img.convertTo( img, CV_32FC3, 1/255.0 );
at::Tensor tensorImage = torch::from_blob(img.data, { 1, img.rows, img.cols, 3 }, at::kFloat);
tensorImage = tensorImage.permute({0, 3, 1, 2});
// Normalize data
tensorImage[0][0] = tensorImage[0][0].sub(0.485).div(0.229);
tensorImage[0][1] = tensorImage[0][1].sub(0.456).div(0.224);
tensorImage[0][2] = tensorImage[0][2].sub(0.406).div(0.225);
std::vector<torch::jit::IValue> input;
input.push_back(tensorImage);
torch::jit::script::Module model = torch::jit::load( _MODEL_JIT_FILENAME );
at::Tensor output = torch::softmax(model.forward(input).toTensor(), 1);
std::tuple<at::Tensor, at::Tensor> result = torch::max(output, 1);
std::cout << "Probability Value: " << std::endl;
std::cout << std::get<0>(result) << std::endl;
std::cout << "ImageNet Index" << std::endl;
std::cout << std::get<1>(result) << std::endl;
return 0;
}
The results I get in the Python script:
Probability: 0.99, Index: 1, Label: GoldFish
The results I get in C++:
Probability: 0.4839, Index: 584
I used PyTorch 1.3 version in Python 3.7 and the latest corresponding libtorch.
Any ideas?