when I specify dtype for activation as qint8, quantization is not applied as expected.
here is script:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.relu2 = nn.LeakyReLU()
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
y = self.relu1(x)
x = self.conv2(y)
x = self.bn2(x)
x = self.relu2(x)
return x
example_inputs = torch.randn(1, 3, 224, 224)
float_model = MyModel()
float_model.eval()
qconfig_sub = torch.ao.quantization.QConfig(
activation=observer.MovingAverageMinMaxObserver.with_args(qscheme=torch.per_tensor_symmetric, dtype=torch.qint8),
weight=observer.MovingAveragePerChannelMinMaxObserver.with_args(qscheme=torch.per_channel_affine, dtype=torch.qint8,)
)
qconfig_mapping = QConfigMapping()
qconfig_mapping.module_name_qconfigs = {"conv1": qconfig_sub, "relu2": qconfig_sub}
# print(qconfig_mapping.to_dict())
prepared_model = prepare_qat_fx(float_model, qconfig_mapping, [example_inputs], )
quantized_model = convert_fx(prepared_model)
quantized_model(example_inputs)
graph:
# To see more debug info, please use `graph_module.print_readable()`
graph():
%x : [#users=1] = placeholder[target=x]
%conv1 : [#users=1] = call_module[target=conv1](args = (%x,), kwargs = {})
%conv2 : [#users=1] = call_module[target=conv2](args = (%conv1,), kwargs = {})
%relu2 : [#users=1] = call_module[target=relu2](args = (%conv2,), kwargs = {})
return relu2
- I assume we have to specify as uint8, but for Sigmoid, LeakyReLU, does uint8 lead to accuracy drop?
- if so, how could I extend supports for int8 activation quantization
appreciate any feedback and ideas, thanks!