Hello,
I am trying to learn about quantization configuration and make my own configs (not just passing get_default_qconfig()). I am using FX graph mode and ty to do a PTSQ.
This is the code:
import torch
import torch.nn as nn
import torch.ao.quantization.quantize_fx as quantize_fx
import torch.ao.quantization.qconfig as qconfig
import torch.ao.quantization.observer as observer
from torch.ao.quantization.backend_config import (
BackendConfig,
BackendPatternConfig,
DTypeConfig,
ObservationType,
)
class Test(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(10,10)
def forward(self, x):
x = self.linear(x)
return self.linear(x)
test = Test().eval()
# Set q_config
qconfig = qconfig.QConfig(
activation= observer.MinMaxObserver.with_args(dtype=torch.qint8),
weight= observer.MovingAverageMinMaxObserver.with_args(dtype=torch.quint8)
)
# Set qconfig_mapping
qconfig_mapping = QConfigMapping() \
.set_global(qconfig) \
.set_module_name("linear", qconfig)
# Set Backend configuration
weighted_int8_dtype_config = DTypeConfig(
input_dtype=torch.quint8,
output_dtype=torch.quint8,
weight_dtype=torch.qint8,
bias_dtype=torch.float)
linear_config = BackendPatternConfig(torch.nn.Linear) \
.set_observation_type(ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT) \
.add_dtype_config(weighted_int8_dtype_config) \
.set_root_module(torch.nn.Linear) \
.set_reference_quantized_module(torch.ao.nn.quantized.reference.Linear)
backend_config = BackendConfig("first_config").set_backend_pattern_config(linear_config)
model_prepared = quantize_fx.prepare_fx(model= test,
qconfig_mapping= qconfig_mapping_enc,
example_inputs= torch.rand(10,10),
backend_config= Backend_config_enc
)
prepare.calibrate(model_prepared, torch.rand(100, 10,10))
converted= quantize_fx.convert_fx(model_prepared)
When I printed the converted model, I got sthg like this:
def forward(self, x):
# File: /File: /path/to/file.py:66, code: x = self.linear(x)
linear = self.linear(x); x = None
# File: /path/to/file.py:67, code: return self.linear(x)
linear_1 = self.linear(linear); linear = None
return linear_1
Or when I set the qconfig_mapping default ( qconfig_default = get_default_qconfig_mapping(“qnnpack”) ) I got this output:
class GraphModule(torch.nn.Module):
def forward(self, x):
# No stacktrace found for following nodes
linear_input_scale_0 = self.linear_input_scale_0
linear_input_zero_point_0 = self.linear_input_zero_point_0
quantize_per_tensor = torch.quantize_per_tensor(x, linear_input_scale_0, linear_input_zero_point_0, torch.quint8); x = linear_input_scale_0 = linear_input_zero_point_0 = None
# File: /path/to/file.py:66, code: x = self.linear(x)
linear = self.linear(quantize_per_tensor); quantize_per_tensor = None
# File: /path/to/file.py:67, code: return self.linear(x)
linear_1 = self.linear(linear); linear = None
# No stacktrace found for following nodes
dequantize_2 = linear_1.dequantize(); linear_1 = None
return dequantize_2
As fat as I know, I think that the problem is because the qconfig_mapping and the backend_config are not compatible (That’s why the quantization is ignored), my question is how to make those params compatible.
Thank you!