Environment
PyTorch 2.0
Code
import struct
import torch
# define kernel
weight_scale = 0x3bb9211d # IEEE754 representation, 0.005649699363857508
weight_scale = struct.unpack('f', struct.pack('I', weight_scale))[0]
weight_int8 = torch.tensor([[[[ 56, 8, -118, -29, 127, 4, -52]]]], dtype=torch.int8)
weight = torch.quantize_per_tensor(weight_int8 * weight_scale, scale=weight_scale, zero_point=0, dtype=torch.qint8)
# define input data
x_scale = 0x3d101824 # IEEE754 representation, 0.035179272294044495
x_zero_point = 129
x_scale = struct.unpack('f', struct.pack('I', x_scale))[0]
x_uint8 = torch.tensor([[[190, 213, 231, 247, 255, 251, 234]]], dtype=torch.uint8)
x = torch.quantize_per_tensor((x_uint8.type(torch.int32) - x_zero_point) * x_scale, scale=x_scale, zero_point=
x_zero_point, dtype=torch.quint8)
# define bias
bias = 0xbbc834f6 # IEEE754 representation, -0.006109829060733318
bias = struct.unpack('f', struct.pack('I', bias))[0]
bias = torch.tensor([bias], dtype=torch.float32)
# define output scale and output zero point
out_scale = 0x3c64ad2f # IEEE754 representation, 0.013957305811345577
out_scale = struct.unpack('f', struct.pack('I', out_scale))[0]
out_zero_point = 132
# calculation by PyTorch
result_torch = torch.ao.nn.quantized.functional.conv1d(x, weight, bias, stride=1, padding=0, scale=out_scale,
zero_point=out_zero_point)
# manual calculation
z = weight_int8.squeeze().type(torch.int32) @ (x_uint8.squeeze().type(torch.int32) - x_zero_point)
z = z + torch.round(bias / (x_scale * weight_scale))
unclamped = torch.round(z * (x_scale * weight_scale / out_scale) + out_zero_point)
result_manual = torch.clamp(unclamped, 0, 255).type(torch.uint8)
print(result_torch.int_repr().squeeze(), result_manual)
What I expect to see
Both PyTorch and manual calculation give 127
Actual result (PyTorch, manual calculation)
tensor(141, dtype=torch.uint8) tensor([127], dtype=torch.uint8)