I’m writing my own custom implementation of quantized layers but I don’t seem to be getting the same answer as the PyTorch quantizedCPU backend.
PyTorch output: tensor([[ 0.1730, 0.7621, -0.3675, -0.2648, -0.4000]])
Manual output: tensor([[ 0.2096, 0.7595, -0.3691, -0.2660, -0.4016]])
Of course, the values aren’t that far off but they still seem different enough that I’m wondering if there’s more to it than just rounding error? Here’s how I am calculating my outputs. Does PyTorch do something very different, which would account for this difference?
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.quant = torch.quantization.QuantStub()
self.fc_1 = nn.Linear(in_features=10, out_features=5)
self.dequant = torch.quantization.DeQuantStub()
def forward(self, x):
x = self.quant(x)
x = self.fc_1(x)
x = self.dequant(x)
return x
torch.manual_seed(0)
testInput = torch.rand(5, 10)
model = Net() # Model will be initalized to random values.
quant_net = Net().cpu()
quant_net.load_state_dict(model.state_dict())
quant_net.eval()
quant_net.qconfig = torch.quantization.get_default_qconfig('qnnpack')
quant_net_prepared = torch.quantization.prepare(quant_net)
for i in range(5):
singleInput = torch.unsqueeze(testInput[i], 0)
quant_net_prepared(singleInput)
quant_model = torch.quantization.convert(quant_net_prepared)
singleInput = torch.unsqueeze(testInput[0], 0)
print(f"PyTorch output: {quant_model(singleInput)}")
# Unpack the values in the quantized model's state dict to run the layer manually.
quant_scale, quant_zero_point = quant_model.state_dict()['quant.scale'].item(), quant_model.state_dict()['quant.zero_point'].item()
fc_out_scale, fc_out_zero_point = quant_model.state_dict()['fc_1.scale'].item(), quant_model.state_dict()['fc_1.zero_point'].item()
weight_tensor, bias = quant_model.state_dict()['fc_1._packed_params._packed_params']
fc_scale, fc_zero_point = weight_tensor.q_scale(), weight_tensor.q_zero_point()
weight = weight_tensor.dequantize()
# Quantize the input and convert them to INT8. Store them in floats to avoid overflows.
singleInputInt8 = torch.quantize_per_tensor(singleInput, quant_scale, quant_zero_point, dtype=torch.quint8)
singleInputInt8 = torch.int_repr(singleInputInt8).float()
weightInt8 = torch.quantize_per_tensor(weight, fc_scale, fc_zero_point, dtype=torch.qint8)
weightInt8 = torch.int_repr(weightInt8).float()
weightInt8 = weightInt8.transpose(0, 1)
m, n, k = singleInputInt8.size(0), singleInputInt8.size(1), weightInt8.size(1)
out = torch.zeros(m, k)
for i in range(m):
for j in range(k):
sum = 0
for h in range(n):
A = singleInputInt8[i, h].item() - quant_zero_point
B = weightInt8[h, j].item() - fc_zero_point
sum += A*B
out[i, j] = (sum*quant_scale*fc_scale) + bias[j]
print(f"Manual output: {out}")