Qat Linear Inference result is error

I found that in some case, the pytorch result of qat linear will be error. Next I will describe it.

I insert some print in model forword code

print(layers_3_out)
layers_4_out = layers_4(layers_3_out)
print(layers_4._packed_params)
print(layers_4_out)

The layers_4 is define in init:

self.layers_4 = nn.Linear(in_features=16, out_features=1, bias=True)

So as above shows, I print the input and linear params and output, and the result is as follows:

tensor([[5.0954, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.6091,
0.0000, 1.3409, 2.6818, 4.2908, 2.1454, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.2295, 1.4750, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.3409,
0.0000, 1.4750, 3.0840, 4.4249, 2.0113, 2.9500, 1.7432],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[4.9613, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.7432],
[4.9613, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.7432],
[4.9613, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 3.8886, 1.8772, 2.8159, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 3.8886, 1.8772, 2.8159, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 3.8886, 1.8772, 2.8159, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 3.8886, 1.8772, 2.8159, 1.7432],
[4.4249, 1.0727, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.2795, 3.6204, 1.8772, 2.8159, 1.7432],
[4.4249, 1.0727, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.1454, 3.7545, 1.7432, 2.8159, 1.7432],
[4.5590, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.2795, 3.7545, 1.7432, 2.8159, 1.7432],
[4.4249, 1.0727, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.1454, 3.7545, 1.7432, 2.8159, 1.7432],
[4.4249, 1.0727, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.1454, 3.7545, 1.7432, 2.8159, 1.7432],
[4.4249, 1.0727, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.1454, 3.7545, 1.7432, 2.8159, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 4.0227, 1.7432, 2.8159, 1.7432],
[4.4249, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.2068,
0.0000, 1.3409, 2.4136, 3.8886, 1.7432, 2.6818, 1.7432],
[4.5590, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.2068,
0.0000, 1.3409, 2.4136, 3.8886, 1.7432, 2.8159, 1.6091],
[4.4249, 1.0727, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.2068, 2.1454, 3.7545, 1.7432, 2.8159, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 4.0227, 1.7432, 2.8159, 1.7432],
[4.6931, 1.2068, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.4136, 3.8886, 1.8772, 2.8159, 1.7432],
[4.9613, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.7432],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772],
[5.3636, 1.4750, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.4750, 3.0840, 4.5590, 2.1454, 2.9500, 1.8772],
[5.0954, 1.3409, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4750,
0.0000, 1.3409, 2.6818, 4.2908, 2.0113, 2.9500, 1.8772]],
size=(32, 16), dtype=torch.quint8,
quantization_scheme=torch.per_tensor_affine, scale=0.13408887386322021,
zero_point=173)
(tensor([[-0.5172, -0.6572, 0.2640, 0.4471, 0.2855, -0.5549, 0.4741, 0.4364,
-0.5118, -0.0162, -0.3448, -0.5172, -0.6896, -0.3448, -0.6465, -0.4902]],
size=(1, 16), dtype=torch.qint8,
quantization_scheme=torch.per_tensor_affine, scale=0.005387257784605026,
zero_point=0), tensor([-0.1708], requires_grad=True))
tensor([[3.7054],
[3.8143],
[3.8143],
[3.8143],
[3.8143],
[3.8143],
[3.5964],
[3.8143],
[3.8143],
[3.8143],
[3.8143],
[3.9233],
[3.9233],
[3.9233],
[3.9233],
[4.0323],
[4.1413],
[4.0323],
[4.1413],
[4.1413],
[4.1413],
[3.9233],
[4.1413],
[4.1413],
[4.1413],
[3.9233],
[3.9233],
[3.8143],
[3.8143],
[3.8143],
[3.5964],
[3.8143]], size=(32, 1), dtype=torch.quint8,
quantization_scheme=torch.per_tensor_affine, scale=0.10898102819919586,
zero_point=186)

Then I calculate the first result of this linear, as

input 5.0954 1.2068 0 0 0 0 0 0 1.6091 0 1.3409 2.6818 4.2908 2.1454 2.95 1.8772 bias
weight -0.5172 -0.6572 0.264 0.4471 0.2855 -0.5549 0.4741 0.4364 -0.5118 -0.0162 -0.3448 -0.5172 -0.6896 -0.3448 -0.6465 -0.4902 -0.1708
mul -2.63534088 -0.79310896 0 0 0 0 0 0 -0.82353738 0 -0.46234232 -1.38702696 -2.95893568 -0.73973392 -1.907175 -0.92020344 -12.79820454

And I found that the result should be -12.79820454, but the result from pytorch is 3.7054, it is error.
And before inference, I qat and prepare model as before:

net.train()
net.qconfig = torch.quantization.get_default_qat_qconfig(“qnnpack”)
net.fuse_modules()
torch.quantization.prepare_qat(net, inplace=True)
net.load_state_dict(new_state_dict, strict=True)
net.eval()
net.apply(torch.quantization.disable_observer)
net = torch.quantization.convert(net)

And In this model, other linear is calculated correct, except this one. So if anyone meet this problem before, Can anyone help me?

The Code as Follow:

import torch
import torch.nn as nn
import torch.optim as optim

class mturn_predictor(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.QuantStub = torch.quantization.QuantStub()
        self.cls_score_parking_goal_combine_multi_layers_0_new = nn.Linear(in_features=16, out_features=1, bias=True)
        self.DeQuantStub = torch.quantization.DeQuantStub()

    def forward(self, x):
        Cat_out = self.QuantStub(x)
        cls_score_parking_goal_combine_multi_layers_0_new = self.cls_score_parking_goal_combine_multi_layers_0_new(Cat_out)
        DeQuantStub_out = self.DeQuantStub(cls_score_parking_goal_combine_multi_layers_0_new)
        return DeQuantStub_out

def generate_input():
    example = torch.tensor([[4.4249,0,0,0,2.5477,0,4.559,0,0,0,0.5364,3.3522,2.1454,0,0,0]])
    return example

if __name__ == "__main__":
    model = mturn_predictor()
    input_data = generate_input()

    print(input_data)
    output_data = model(input_data)

    ## convert into qat model
    model.train()
    model.qconfig = torch.quantization.get_default_qat_qconfig("qnnpack")
    torch.quantization.prepare_qat(model, inplace=True)

    output_data = model(input_data)

    model.eval()
    model_qat = torch.quantization.convert(model)
    print("torch.quantization.convert Done")

    weight_tensor = torch.tensor([[-0.5172,-0.6572,0.264,0.4471,0.2855,-0.5549,0.4741,0.4364,-0.5118,-0.0162,-0.3448,-0.5172,-0.6896,-0.3448,-0.6465,-0.4902]])
    weight_qat = torch.quantize_per_tensor(weight_tensor, 0.00538725778460502, 0, torch.qint8)

    bias_tensor = torch.tensor([-0.17078383266925812])
    model_qat.cls_score_parking_goal_combine_multi_layers_0_new._packed_params.set_weight_bias(weight_qat, bias_tensor)
    model_qat.cls_score_parking_goal_combine_multi_layers_0_new.scale = 0.10898102819919586
    model_qat.cls_score_parking_goal_combine_multi_layers_0_new.zero_point = 186

    model_qat.QuantStub.register_buffer('scale', torch.tensor([0.13408887386322021]))
    model_qat.QuantStub.register_buffer('zero_point', torch.tensor([173], dtype=torch.long))

    print(model_qat)
    #import pdb; pdb.set_trace()


    print("Input:", input_data)
    print("Weight:", model_qat.cls_score_parking_goal_combine_multi_layers_0_new._packed_params)
    output_data = model_qat(input_data)
    print("Output:", output_data)

    print("Matmul: ", torch.matmul(input_data, torch.transpose(weight_tensor, 0, 1)))

The result of Matmul and Linear is different,
Output: tensor([[7.4107]])
Matmul: tensor([[-2.7979]])

A better test would be to see whether the quantization agrees with the matmul without hardcoding the qparams and things. I moved the weight and bias assignment to the top and it looks fine

not sure what the exact issue is with your code, but the extremely high zero_point value seems suspect.

import torch
import torch.nn as nn
import torch.optim as optim

class mturn_predictor(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.QuantStub = torch.quantization.QuantStub()
        self.cls_score_parking_goal_combine_multi_layers_0_new = nn.Linear(in_features=16, out_features=1, bias=True)
        self.DeQuantStub = torch.quantization.DeQuantStub()

    def forward(self, x):
        Cat_out = self.QuantStub(x)
        cls_score_parking_goal_combine_multi_layers_0_new = self.cls_score_parking_goal_combine_multi_layers_0_new(Cat_out)
        DeQuantStub_out = self.DeQuantStub(cls_score_parking_goal_combine_multi_layers_0_new)
        return DeQuantStub_out

def generate_input():
    example = torch.tensor([[4.4249,0,0,0,2.5477,0,4.559,0,0,0,0.5364,3.3522,2.1454,0,0,0]])
    return example

if __name__ == "__main__":
    model = mturn_predictor()
    input_data = generate_input()

    weight = torch.tensor([[-0.5172,-0.6572,0.264,0.4471,0.2855,-0.5549,0.4741,0.4364,-0.5118,-0.0162,-0.3448,-0.5172,-0.6896,-0.3448,-0.6465,-0.4902]])
    bias = torch.tensor([-0.17078383266925812])

    model.cls_score_parking_goal_combine_multi_layers_0_new.weight.data = weight
    model.cls_score_parking_goal_combine_multi_layers_0_new.bias.data = bias
    output_data = model(input_data)

    ## convert into qat model
    model.train()
    model.qconfig = torch.quantization.get_default_qat_qconfig("qnnpack")
    torch.quantization.prepare_qat(model, inplace=True)

    output_data_prepared = model(input_data)

    model.eval()
    model_qat = torch.quantization.convert(model)
    print("torch.quantization.convert Done")

    print(model_qat)
    print("matmul: ", bias+torch.matmul(input_data, torch.transpose(weight, 0, 1)))
    print("functional linear: ", torch.nn.functional.linear(input_data, weight, bias))
    print("non_quantized_model result", output_data)
    print("prepared model result", output_data_prepared)
    print("q model result", model_qat(input_data))