The result of torch.ao.nn.quantized.functional.conv1d is different from manual calculation

dopiwoo · April 6, 2023, 8:37am

Environment
PyTorch 2.0

Code

import struct
import torch

# define kernel
weight_scale = 0x3bb9211d # IEEE754 representation, 0.005649699363857508
weight_scale = struct.unpack('f', struct.pack('I', weight_scale))[0]
weight_int8 = torch.tensor([[[[  56,    8, -118,  -29,  127,    4,  -52]]]], dtype=torch.int8)
weight = torch.quantize_per_tensor(weight_int8 * weight_scale, scale=weight_scale, zero_point=0, dtype=torch.qint8)

# define input data
x_scale = 0x3d101824 # IEEE754 representation, 0.035179272294044495
x_zero_point = 129
x_scale = struct.unpack('f', struct.pack('I', x_scale))[0]
x_uint8 = torch.tensor([[[190, 213, 231, 247, 255, 251, 234]]], dtype=torch.uint8)
x = torch.quantize_per_tensor((x_uint8.type(torch.int32) - x_zero_point) * x_scale, scale=x_scale, zero_point=
                              x_zero_point, dtype=torch.quint8)

# define bias
bias = 0xbbc834f6 # IEEE754 representation, -0.006109829060733318
bias = struct.unpack('f', struct.pack('I', bias))[0]
bias = torch.tensor([bias], dtype=torch.float32)

# define output scale and output zero point
out_scale = 0x3c64ad2f # IEEE754 representation, 0.013957305811345577
out_scale = struct.unpack('f', struct.pack('I', out_scale))[0]
out_zero_point = 132

# calculation by PyTorch
result_torch = torch.ao.nn.quantized.functional.conv1d(x, weight, bias, stride=1, padding=0, scale=out_scale,
                                                       zero_point=out_zero_point)

# manual calculation
z = weight_int8.squeeze().type(torch.int32) @ (x_uint8.squeeze().type(torch.int32) - x_zero_point)
z = z + torch.round(bias / (x_scale * weight_scale))
unclamped = torch.round(z * (x_scale * weight_scale / out_scale) + out_zero_point)
result_manual = torch.clamp(unclamped, 0, 255).type(torch.uint8)

print(result_torch.int_repr().squeeze(), result_manual)

What I expect to see
Both PyTorch and manual calculation give 127

Actual result (PyTorch, manual calculation)
tensor(141, dtype=torch.uint8) tensor([127], dtype=torch.uint8)