I encountered a problem. When I check the result of operator named GridSample, I found it is different with my python script output on the condition ‘padding=0, mode=0, align_corners=1’.
The source code of torch is:
Tensor _grid_sampler_2d_cpu_quantized(
const Tensor& input,
const Tensor& grid,
int64_t interpolation_mode_,
int64_t padding_mode_,
bool align_corners) {
// See NOTE [ grid_sampler Native Functions ].
// Add checks here in case this is called instead of grid_sampler.
check_grid_sampler_common(input, grid);
check_grid_sampler_2d(input, grid);
auto interpolation_mode =
static_cast<GridSamplerInterpolation>(interpolation_mode_);
/* Bilinear interpolation is supported using the fact that we can perform
* linear interpolations on quantized values without rescaling. */
TORCH_CHECK(
interpolation_mode == GridSamplerInterpolation::Bilinear,
"_grid_sampler_2d_cpu_quantized(): only bilinear interpolation supported")
auto padding_mode = static_cast<GridSamplerPadding>(padding_mode_);
int64_t N = input.size(0);
int64_t C = input.size(1);
int64_t inp_H = input.size(2);
int64_t inp_W = input.size(3);
int64_t out_H = grid.size(1);
int64_t out_W = grid.size(2);
uint8_t zero_point = input.q_zero_point();
auto output = at::_empty_affine_quantized(
{N, C, out_H, out_W},
at::device(c10::kCPU).dtype(c10::kQUInt8),
input.q_scale(),
zero_point);
int64_t inp_sN = input.stride(0);
int64_t inp_sC = input.stride(1);
int64_t inp_sH = input.stride(2);
int64_t inp_sW = input.stride(3);
int64_t grid_sN = grid.stride(0);
int64_t grid_sH = grid.stride(1);
int64_t grid_sW = grid.stride(2);
int64_t grid_sCoor = grid.stride(3);
int64_t out_sN = output.stride(0);
int64_t out_sC = output.stride(1);
int64_t out_sH = output.stride(2);
int64_t out_sW = output.stride(3);
uint8_t* inp_ptr = (uint8_t*)input.data_ptr<quint8>();
uint8_t* out_ptr = (uint8_t*)output.data_ptr<quint8>();
float* grid_ptr = grid.data_ptr<float>();
at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) {
for (const auto n : c10::irange(start, end)) {
float* grid_ptr_N = grid_ptr + n * grid_sN;
uint8_t* inp_ptr_N = inp_ptr + n * inp_sN;
for (const auto h : c10::irange(out_H)) {
for (const auto w : c10::irange(out_W)) {
// get the corresponding input x, y, z co-ordinates from grid
float* grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW;
float x = *grid_ptr_NHW;
float y = grid_ptr_NHW[grid_sCoor];
float ix = grid_sampler_compute_source_index(
x, inp_W, padding_mode, align_corners);
float iy = grid_sampler_compute_source_index(
y, inp_H, padding_mode, align_corners);
// get corner pixel values from (x, y)
// for 4d, we use north-east-south-west
int64_t ix_nw = static_cast<int64_t>(std::floor(ix));
int64_t iy_nw = static_cast<int64_t>(std::floor(iy));
int64_t ix_ne = ix_nw + 1;
int64_t iy_ne = iy_nw;
int64_t ix_sw = ix_nw;
int64_t iy_sw = iy_nw + 1;
int64_t ix_se = ix_nw + 1;
int64_t iy_se = iy_nw + 1;
// get surfaces to each neighbor:
float nw = (ix_se - ix) * (iy_se - iy);
float ne = (ix - ix_sw) * (iy_sw - iy);
float sw = (ix_ne - ix) * (iy - iy_ne);
float se = (ix - ix_nw) * (iy - iy_nw);
// calculate bilinear weighted pixel value and set output pixel
uint8_t* inp_ptr_NC = inp_ptr_N;
uint8_t* out_ptr_NCHW =
out_ptr + n * out_sN + h * out_sH + w * out_sW;
for (int64_t c = 0; c < C;
++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
float res = 0;
res += within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)
? inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw
: zero_point * nw;
res += within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)
? inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne
: zero_point * ne;
res += within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)
? inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw
: zero_point * sw;
res += within_bounds_2d(iy_se, ix_se, inp_H, inp_W)
? inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se
: zero_point * se;
*out_ptr_NCHW = std::round(res);
}
}
}
}
});
return output;
}
I reproduce above in python, only check one couple of cordinates (-1, -0.6) and 4 pixel value in fact [0, 1, 4, 5]. Code is like below:
import math
def within_bounds_2d(h, w, H, W):
if 0<=h<H and 0<=w<W:
return True
x = -1
y = -0.6
ix = ((x + 1) / 2) * (4 - 1)
iy = ((y + 1) / 2) * (4 - 1)
# ix = ((x + 1) * 4 - 1) / 2
# iy = ((y + 1) * 4 - 1) / 2
ix_nw = math.floor(ix)
print(ix_nw)
iy_nw = math.floor(iy)
print(iy_nw)
ix_ne = ix_nw + 1
iy_ne = iy_nw
ix_sw = ix_nw
iy_sw = iy_nw + 1
ix_se = ix_nw + 1
iy_se = iy_nw + 1
print("x: ")
print(ix_nw, ix_ne, ix_sw, ix_se)
print("y: ")
print(iy_nw, iy_ne, iy_sw, iy_se)
nw = (ix_se - ix) * (iy_se - iy)
ne = (ix - ix_sw) * (iy_sw - iy)
sw = (ix_ne - ix) * (iy - iy_ne)
se = (ix - ix_nw) * (iy - iy_nw)
print(nw, ne, sw, se)
res = 0
if within_bounds_2d(iy_nw, ix_nw, 4, 4):
print("nw")
res += 0 * nw
if within_bounds_2d(iy_ne, ix_ne, 4, 4):
print("ne")
res += 1 * ne
if within_bounds_2d(iy_sw, ix_sw, 4, 4):
print("sw")
res += 4 * sw
if within_bounds_2d(iy_se, ix_se, 4, 4):
print("se")
res += 5 * se
print(res)
Pytorch result and script result can’t match. Is there any problem with my script? Could someone please check my code? Thanks.