I’m converting a data processing code to use torchvision.transforms interface. However the following unit test shows the difference between them:
import numpy as np
import torch
import cv2
import scipy.misc
from PIL import Image
from torchvision import transforms
from torchvision.transforms import Normalize, Resize, ToTensor
filepath = '2359296.jpg'
target_size = 600
# ================== Using cv2 ====================
im = scipy.misc.imread(filepath).astype(np.float32, copy=False)
im1 = im.copy()
im_size_min = np.min(im.shape[0:2])
im_size_max = np.max(im.shape[0:2])
im_scale = float(target_size) / float(im_size_min)
out1 = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR)
out1 /= 255. # Convert range to [0,1]
# ================== Using torchvision ====================
im2 = Image.open(filepath)
composed = transforms.Compose([ Resize(size=target_size),
ToTensor()])
out2 = composed(im2)
out2 = np.transpose(out2.data.numpy(), (1, 2, 0))
np.testing.assert_almost_equal(im1, im2, decimal=4)
np.testing.assert_almost_equal(out1, out2, decimal=4)
The results shows 95.1% mismatch with decimal of 4:
Mismatch: 95.1%
Max absolute difference: 0.0039216
Max relative difference: nan
x: array([[[0.4118, 0.2706, 0.1686],
[0.3912, 0.2539, 0.1569],
[0.35 , 0.2206, 0.1333],...
y: array([[[0.4118, 0.2706, 0.1686],
[0.3922, 0.2549, 0.1569],
[0.349 , 0.2196, 0.1333],...
I am wondering if they are expected to match with each other?