Different Output from Libtorch c++than pytorch

I’m using the same traced model in both c++ and python but I’m getting different outputs.

Python Code

import cv2
import numpy as np 
import torch
import torchvision
from torchvision import transforms as trans


# device for pytorch
device = torch.device('cuda:0')

torch.set_default_tensor_type('torch.cuda.FloatTensor')

model = torch.jit.load("traced_facelearner_model_new.pt")
model.eval()

# read the example image used for tracing
image=cv2.imread("videos/example.jpg")

test_transform = trans.Compose([
            trans.ToTensor(),
            trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])       

resized_image = cv2.resize(image, (112, 112))

tens = test_transform(resized_image).to(device).unsqueeze(0)
output = model(tens)
print(output)

C++ code

#include <iostream>
#include <algorithm> 
#include <opencv2/opencv.hpp>
#include <torch/script.h>


int main()
{
	try
	{
		torch::jit::script::Module model = torch::jit::load("traced_facelearner_model_new.pt");
		model.to(torch::kCUDA);
		model.eval();

		cv::Mat visibleFrame = cv::imread("example.jpg");

		cv::resize(visibleFrame, visibleFrame, cv::Size(112, 112));
		at::Tensor tensor_image = torch::from_blob(visibleFrame.data, { 1, visibleFrame.rows, visibleFrame.cols, 3 }, at::kByte);
		tensor_image = tensor_image.permute({ 0, 3, 1, 2 });
		tensor_image = tensor_image.to(at::kFloat);

		tensor_image[0][0] = tensor_image[0][0].sub(0.5).div(0.5);
		tensor_image[0][1] = tensor_image[0][1].sub(0.5).div(0.5);
		tensor_image[0][2] = tensor_image[0][2].sub(0.5).div(0.5);

		tensor_image = tensor_image.to(torch::kCUDA);
		std::vector<torch::jit::IValue> input;
		input.emplace_back(tensor_image);
		// Execute the model and turn its output into a tensor.
		auto output = model.forward(input).toTensor();
		output = output.to(torch::kCPU);
		std::cout << "Embds: " << output << std::endl;

		std::cout << "Done!\n";
	}
	catch (std::exception e)
	{
		std::cout << "exception" << e.what() << std::endl;
	}
}

Python output:
tensor([[-1.6270e+00, -7.8417e-02, -3.4403e-01, -1.5171e+00, -1.3259e+00,
-1.1877e+00, -2.0234e-01, -1.0677e+00, 8.8365e-01, 7.2514e-01,
2.3642e+00, -1.4473e+00, -1.6696e+00, -1.2191e+00, 6.7770e-01,
-3.0230e-01, -1.5904e+00, 1.7433e+00, -7.8862e-01, 3.9448e-01,
-1.7189e+00, 1.1014e+00, -2.2981e+00, -5.1542e-01, -1.1593e-01,
6.5024e-01, -6.8557e-01, -7.0064e-01, -1.0784e+00, -7.7883e-01,
1.3773e+00, -1.5619e+00, -2.0540e-01, 1.2147e+00, 7.3867e-01,
1.1110e+00, 1.0524e-01, -1.1249e+00, -5.0620e-01, -5.2198e-01,
1.3556e+00, -1.5315e+00, 1.0446e-01, 9.1795e-01, 2.7186e+00,
-6.9594e-01, 7.4122e-01, 1.4757e+00, 1.2925e-01, -2.6900e-01,
1.5588e+00, -1.0609e+00, -2.0121e-01, -6.8162e-01, 1.1572e-01,
-1.7430e-01, -1.4399e+00, 1.4873e+00, 1.1772e+00, 8.0879e-01,
-1.3121e-01, -2.0003e+00, -7.4500e-02, -4.1007e-01, -1.2315e+00,
-1.1150e+00, -2.1979e+00, -1.2252e+00, -1.5357e+00, 2.3477e+00,
-1.9694e+00, 1.8873e+00, 3.2776e-01, -7.6457e-01, -1.7912e+00,
5.7192e-01, -2.5461e-01, -6.7235e-01, -3.1392e+00, -8.8816e-01,
-6.2070e-01, -7.2750e-01, 2.4999e-01, 1.1434e+00, 1.0114e+00,
3.4786e-01, 9.9722e-01, -4.8731e-01, -5.6572e-01, 1.2642e+00,
-4.4803e-01, -1.4394e+00, -1.8629e-01, 5.3590e-01, 1.4678e+00,
8.5147e-02, -2.0793e+00, -2.8566e-01, 2.9678e-01, -3.4123e-01,
3.1120e-01, 7.2252e-01, 2.7816e+00, 1.0683e+00, -3.1785e+00,
-6.7824e-01, -1.7665e-02, 5.2761e-01, 1.1141e-01, -1.6249e+00,
-2.0966e+00, 1.2752e+00, -8.8363e-01, -1.9442e+00, 1.5579e+00,
5.6738e-01, -3.4520e-01, 9.1841e-01, 7.5063e-02, -1.6585e+00,
2.5177e-01, -1.3581e+00, 3.4045e-01, 1.2807e+00, -3.7098e-01,
5.8744e-01, 9.2038e-01, -4.1502e-01, -1.4006e+00, 1.3954e+00,
-1.1765e+00, 1.3100e+00, 2.1202e+00, 3.0595e+00, 1.7250e-01,
-5.0746e-01, -1.1361e+00, 1.3437e+00, -8.2815e-02, -1.0477e+00,
8.5581e-01, 2.4402e+00, 1.6616e+00, -1.9156e+00, 4.2771e-01,
1.7761e+00, 1.5104e-01, -2.7037e-01, -6.1427e-02, -1.0483e+00,
-2.2830e-01, 3.9742e-01, -6.7260e-01, 2.4361e+00, -7.6196e-01,
1.0965e+00, 1.4753e+00, 8.5338e-01, 4.5726e-01, -1.8667e-01,
-1.1761e+00, -8.8821e-02, 1.3202e-01, 1.5002e+00, -4.9365e-01,
-1.0977e+00, -2.9104e-02, -3.5381e-01, -2.2095e-01, 9.3996e-01,
-1.0770e+00, 9.3767e-01, 2.2430e+00, -7.1536e-01, -7.0468e-01,
-2.1124e+00, -2.7435e+00, 1.7995e+00, 4.1688e-01, 4.2249e-01,
1.1487e-01, -1.1160e-01, 2.0495e+00, -1.6678e+00, -2.2310e+00,
3.1619e-01, -1.0459e-01, -5.3289e-01, -3.8420e-01, -1.3272e+00,
-4.5785e-01, -1.3917e+00, 1.3051e-01, -1.6694e+00, 2.3753e+00,
7.4885e-01, 2.2261e+00, 3.5489e-01, 2.2460e+00, -7.0667e-01,
-3.1920e-01, 2.7467e-01, -1.4723e-01, 2.2449e-01, 3.0860e-01,
-5.6551e-01, 1.3486e+00, -1.0313e+00, -1.8844e-01, -5.4212e-01,
-8.9150e-01, 2.1663e-01, -2.3341e-02, 5.4041e-01, -2.8048e-01,
-8.5421e-01, -1.3455e+00, -5.4566e-03, 3.3249e-01, 3.2633e-02,
-7.2821e-01, -2.1179e+00, -4.3671e-01, 1.6922e-01, -1.5222e+00,
-8.1076e-01, -4.5145e-01, 1.0031e+00, 3.8981e-01, -7.5108e-01,
1.2772e+00, 1.0216e+00, -8.8832e-02, 7.2678e-01, 2.3863e-01,
-7.2614e-01, -9.3102e-01, 1.0179e-01, -3.1820e-01, 1.7549e+00,
2.4568e-02, -2.4448e-01, 6.6527e-01, 8.9161e-01, 2.4075e-01,
7.7993e-01, -2.9786e-01, 3.7189e-01, -1.8534e+00, 1.2161e+00,
-1.4340e-01, -8.4045e-01, -1.7490e-02, -6.3605e-02, -2.6961e-01,
-6.0356e-02, 1.6479e-02, 8.4313e-02, 1.2867e+00, -1.8166e+00,
-4.4236e-01, 1.9492e+00, 7.5414e-02, -1.1048e+00, 3.2055e-01,
1.6554e+00, 1.6603e+00, 5.2739e-01, -8.8670e-02, -3.8753e-01,
1.1036e+00, -8.2550e-02, 1.5303e+00, 7.2115e-01, 6.3496e-01,
-5.9476e-01, -1.7111e+00, -7.4406e-02, 1.2575e+00, 1.0652e+00,
3.3742e-01, -6.1574e-01, -7.7878e-01, -1.5626e+00, 2.0075e+00,
7.8007e-01, 2.3359e+00, -5.8407e-01, -3.6670e-02, -1.8357e+00,
-8.5492e-01, -7.9237e-02, -3.4835e+00, 1.8853e-01, -6.3243e-01,
-1.4143e-01, -1.5573e+00, 1.3054e+00, 7.2289e-02, -3.3197e-01,
-4.2815e-01, -9.9560e-01, 4.8308e-02, -1.0704e+00, 4.6133e-02,
-2.7710e-01, 6.3607e-01, -1.2849e-01, -5.8321e-01, -6.4198e-01,
6.8877e-01, 4.4855e-01, -9.9281e-01, -1.9603e-01, -1.3646e-01,
-1.5132e+00, -1.8551e+00, 2.9994e+00, 1.9747e+00, -8.8294e-01,
1.0297e+00, 5.4850e-01, 2.2204e+00, -1.9871e-02, 1.6224e+00,
-1.3714e+00, -1.9999e-01, -1.8371e-01, 9.8869e-01, 1.7765e+00,
2.1239e+00, 1.6547e-01, -3.8542e-01, 1.1274e+00, -3.9524e+00,
-1.8184e-01, -9.8598e-01, -1.2485e-01, -7.8307e-01, 1.5246e+00,
-2.3675e-01, 7.5133e-01, -1.8204e+00, 1.1964e+00, 6.9412e-01,
-3.4246e+00, -6.2488e-01, -2.0008e-01, -1.4634e-01, 3.6126e-01,
-6.2960e-01, 1.2811e+00, -2.0820e-01, -2.6770e-01, 1.0875e+00,
-1.8656e+00, -1.7223e+00, -1.6199e+00, -1.6023e+00, 1.1000e-03,
5.5017e-01, 1.9496e+00, 7.6847e-01, -1.2796e+00, 2.4125e+00,
-1.0207e+00, 1.4682e+00, 6.9706e-04, -3.1195e-01, 8.4523e-01,
1.1639e+00, 1.0964e+00, 8.0490e-01, 3.7047e-01, 4.5071e-01,
1.0288e+00, -1.0690e+00, -1.0394e+00, -6.6745e-01, -2.9959e-01,
1.2548e+00, -1.3682e+00, -1.3584e+00, -1.2101e+00, -9.2314e-01,
-1.6717e+00, 1.9204e-01, -5.1889e-01, 6.6319e-01, -3.5625e-02,
3.5143e+00, 7.8116e-01, -8.7697e-01, -3.8530e-01, 2.0860e+00,
-1.5915e+00, -8.9022e-01, -5.0295e-01, -1.2801e+00, 1.8433e-01,
-6.9138e-01, 7.6171e-01, 2.1874e-01, -9.5043e-01, 1.3584e+00,
-1.0811e+00, 3.7449e-01, 1.4505e+00, 1.4932e+00, -1.0532e+00,
-3.7828e-01, 1.7716e+00, 1.8390e-01, -1.4419e+00, 1.0288e+00,
-1.6216e-01, -1.9189e+00, -1.0210e+00, 7.4068e-01, 7.0265e-01,
1.6574e+00, 3.3080e-01, -2.9631e+00, 1.9505e-01, -2.5233e-01,
-2.0795e+00, -1.4711e+00, -1.9923e+00, 3.1158e+00, 2.3007e+00,
-1.4851e+00, -1.3739e+00, -3.8031e-01, 1.3879e+00, 6.2704e-01,
4.0849e-01, 5.2626e-01, -5.3517e-01, 6.4794e-01, 1.3874e+00,
1.1729e+00, -6.2420e-02, 1.6669e-01, 3.7647e-02, -1.8886e+00,
7.9953e-01, 9.9094e-02, 3.3523e-01, 6.6596e-01, -2.0243e+00,
6.9878e-01, 1.0356e+00, 4.0730e-01, -4.5905e-01, 2.0120e+00,
-5.4535e-02, -1.4968e+00, 1.5344e-01, -2.9665e-01, 3.0098e-01,
5.8679e-01, 2.0437e-01, -1.8587e+00, 6.7893e-02, 7.3112e-01,
3.5927e-01, 1.2785e+00, 4.0530e-01, 8.8397e-01, 1.0595e+00,
-6.2867e-01, 9.6102e-01, -1.6319e+00, 3.6489e-01, -4.1222e-01,
1.8157e+00, -2.3874e+00, -2.0938e+00, -5.5133e-01, 1.8377e+00,
-1.0041e+00, 7.4509e-02, 1.0751e+00, 1.6144e+00, -7.9048e-01,
-8.2033e-01, -3.3595e+00, 1.1192e+00, -3.6376e-01, -5.9706e-02,
-1.5762e+00, -7.6090e-01, -5.4732e-01, -2.5771e-01, -5.6112e-02,
-8.0445e-01, -1.9105e+00, 4.5630e-01, 2.2545e+00, -1.7567e+00,
-1.3612e+00, 1.2470e+00, 3.2429e-01, 1.2829e+00, 2.1712e+00,
1.6078e+00, 1.1831e+00, 7.4726e-02, 3.6741e-01, -6.8770e-01,
-7.1650e-01, 1.7661e-01]], device=‘cuda:0’,
grad_fn=)

C++ output
Embds: Columns 1 to 8 -84.6285 -14.7203 17.7419 47.0915 31.8170 57.6813 3.6089 -38.0543

Columns 9 to 16 3.3444 -95.5730 90.3788 -10.8355 2.8831 -14.3861 0.8706 -60.7844

Columns 17 to 24 30.0367 -43.1165 -5.6550 33.2033 -1.1758 105.3884 -9.8710 17.8346

Columns 25 to 32 17.0933 66.6854 119.4765 79.3748 30.2875 -77.4174 0.3317 -4.0767

Columns 33 to 40 -2.8686 -30.3538 -51.4344 -54.1199 -94.5696 -33.0847 -19.5770 54.3094

Columns 41 to 48 9.1542 1.8090 84.0233 -34.8189 79.6485 109.4215 10.2912 -47.0976

Columns 49 to 56 37.7219 -15.3790 -16.3427 22.2094 -110.2703 -47.8214 -40.3721 49.5144

Columns 57 to 64 7.0735 -69.1642 -87.2891 2.4904 -114.2314 -34.6742 77.0583 47.5493

Columns 65 to 72 -12.7955 -12.1884 -70.9220 61.2372 -23.0823 -14.9402 13.1899 77.5274

Columns 73 to 80 14.8980 3.9681 -12.4636 -2.8313 -26.5012 18.7349 -81.2809 27.7805

Columns 81 to 88 4.6502 -18.6308 -65.8188 -7.8959 -84.8021 18.9902 55.9421 -3.1461

Columns 89 to 96 -68.0309 -121.0718 -39.6810 79.0844 44.7410 5.4263 -55.5766 -46.9981

Columns 97 to 104 107.5576 -64.8779 -38.2952 27.7137 -3.9070 27.3118 -6.6422 -13.3164

Columns 105 to 112 104.2085 0.5082 -78.4771 -19.8312 -38.7756 -52.0113 55.9654 -14.9233

Columns 113 to 120 -9.7707 52.0167 -44.6636 -98.1208 4.3471 72.7285 1.8963 -15.4767

Columns 121 to 128 -15.4205 -42.2256 170.4943 -79.3618 -1.6385 11.5500 59.1987 -65.9982

Columns 129 to 136 -9.0985 33.3904 98.2815 -74.2509 11.8020 -89.1567 34.4861 43.4928

Columns 137 to 144 -56.4307 11.7731 -16.7437 31.0511 -46.6434 -20.9232 26.8300 3.2606

Columns 145 to 152 61.6599 -21.9810 -70.2742 -15.0909 -41.5298 -30.9954 -76.2638 0.6642

Columns 153 to 160 2.6916 47.7454 26.7200 21.0140 -44.8855 -6.4925 -65.3175 -45.4141

Columns 161 to 168 -17.8177 -31.5315 -32.9688 11.2705 -58.3355 -83.6264 -56.9800 -41.5826

Columns 169 to 176 14.9421 -66.3415 -19.4020 -8.9205 34.7736 -1.2142 -22.5419 40.3070

Columns 177 to 184 51.2629 37.0988 -84.1648 112.5778 -51.5290 56.4389 -17.4903 42.5482

Columns 185 to 192 57.6678 -29.1431 63.6813 17.9877 -59.6995 31.1782 -43.9503 42.7553

Columns 193 to 200 29.6934 -19.0927 -74.4936 -90.7978 -75.4938 41.4866 9.0591 52.9187

Columns 201 to 208 -89.2584 -50.5271 -46.8471 -67.3429 -1.2110 21.3874 86.3426 -33.9398

Columns 209 to 216 46.3358 17.8981 -100.1674 -50.8498 -55.5474 -42.1486 2.6009 79.9036

Columns 217 to 224 73.3729 41.6763 -82.8588 -2.8996 17.4613 -166.8535 68.3080 42.2190

Columns 225 to 232 -75.3225 -27.0393 40.7027 133.1041 -10.1574 85.9142 -17.5571 -11.0445

Columns 233 to 240 -46.6592 36.1900 -25.5837 23.5690 111.7863 116.6611 -3.4232 -14.3296

Columns 241 to 248 -10.1717 -26.3160 110.0413 -74.1527 66.8889 54.4394 -8.4007 -80.9817

Columns 249 to 256 -52.5828 0.9547 -78.9718 19.8881 68.5607 4.6896 82.5919 11.0848

Columns 257 to 264 -48.9090 49.7747 -90.9747 -22.6597 82.9919 -31.0079 33.3777 -80.8728

Columns 265 to 272 20.9312 24.9726 58.8175 -57.3928 -36.9511 41.7683 -22.7457 18.0902

Columns 273 to 280 33.3806 12.2698 -48.8019 -64.5811 -22.4971 13.0827 25.2252 -69.3366

Columns 281 to 288 -31.1383 9.3472 -41.4773 -45.0921 -29.0197 20.8469 -18.5003 101.1813

Columns 289 to 296 21.4998 -41.0139 13.0072 14.5900 47.8082 8.7939 -1.6898 -65.2906

Columns 297 to 304 98.5455 -36.5257 -13.4876 31.5104 67.0052 20.0974 80.6973 -59.4268

Columns 305 to 312 -9.8725 109.9801 -11.7113 76.0156 19.4814 -54.8399 -58.3198 -22.0197

Columns 313 to 320 -11.4874 -40.5763 -90.6195 61.3063 2.9030 -38.8599 49.8093 63.7094

Columns 321 to 328 -57.7285 41.2222 35.4600 21.2505 29.7755 40.5168 -36.1677 -35.7411

Columns 329 to 336 55.7660 46.6989 56.3559 -109.1042 -56.7988 -16.9920 32.8174 50.5294

Columns 337 to 344 13.8572 92.8637 59.6933 -0.8193 -69.0457 14.8087 20.9237 29.3850

Columns 345 to 352 -59.0192 -19.3695 -47.4750 1.2323 -18.9492 -63.6595 46.3948 1.5139

Columns 353 to 360 80.1003 -116.6856 18.4157 43.6484 14.6691 -26.1271 -60.0532 10.0214

Columns 361 to 368 -17.5375 11.3292 -6.1891 -2.1459 -24.8204 0.0574 147.1159 56.4644

Columns 369 to 376 20.6844 99.9769 -2.2026 45.3141 -5.9111 22.8332 -26.9914 -54.8931

Columns 377 to 384 13.0211 -22.7115 -55.9605 -102.6626 -41.1080 37.0626 64.1098 -87.8013

Columns 385 to 392 -4.5324 116.3614 -13.5869 29.3998 -29.8993 -19.1788 89.5348 33.3830

Columns 393 to 400 47.5617 -47.8952 -115.5733 18.6636 70.4700 38.7836 52.9221 -26.4590

Columns 401 to 408 57.7344 -46.9924 -107.3308 -104.5425 93.0818 -38.1794 28.5326 63.8123

Columns 409 to 416 -21.0296 -53.7937 46.5247 10.2387 -12.8996 85.9877 53.1290 48.6895

Columns 417 to 424 -66.8464 -2.3867 22.6467 7.4483 21.0441 -94.1917 -42.1939 15.9525

Columns 425 to 432 53.8263 113.8375 61.6334 -104.5839 -20.7676 78.8139 -22.6948 -127.5196

Columns 433 to 440 26.8981 20.7751 38.6938 0.1248 -14.7045 -67.0021 -51.5681 -8.1669

Columns 441 to 448 19.7874 -48.3975 -32.2947 81.1478 48.5060 -85.6838 -17.2948 4.0231

Columns 449 to 456 17.8500 173.0746 -8.2571 20.8623 -7.1263 78.6013 18.4043 6.9401

Columns 457 to 464 -55.3688 28.4737 21.1565 142.7567 -89.0954 -30.7984 62.5072 26.2824

Columns 465 to 472 -40.7608 -53.0610 -23.0218 2.4569 58.6491 -60.6084 15.7515 -54.9259

Columns 473 to 480 -44.9702 -8.3017 -71.4793 -84.7397 -114.3832 -15.3010 54.4510 -32.4508

Columns 481 to 488 75.7713 22.8518 -35.4634 -48.0759 -31.5085 -8.1592 6.5577 -23.7090

Columns 489 to 496 -0.2302 -68.3007 26.5670 -28.0143 -21.5935 -55.7180 -5.6677 56.4317

Columns 497 to 504 61.9337 9.6666 -12.2558 -60.3430 -30.2482 31.4843 71.7933 -8.8972

Columns 505 to 512 36.8830 -31.1061 51.6818 8.2866 1.7214 -2.9263 -37.4330 48.5854
[ CPUFloatType{1,512} ]

Using
Pytorch 1.6.0
Libtorch 1.6.0
Windows 10
Cuda 10.1

Hi,
instead of using

tensor_image[0][0] = tensor_image[0][0].sub(0.5).div(0.5);
		tensor_image[0][1] = tensor_image[0][1].sub(0.5).div(0.5);
		tensor_image[0][2] = tensor_image[0][2].sub(0.5).div(0.5);

Just try tensor_image = tensor_image.sub(0.5).div(0.5)

Also, it’s better to monitor the output tensor’s sum or mean rather than all the values in the output tensor.

Hi,
I tried that but I’m still facing the same issue.