Dear Mates,
I’m learning about quantization and know about the computation of scales for INT-8. When trying to compare the manual calculation with the MinMaxObserver it matches but when I try to compare the MinMaxObserver, manual computation, and static quantized model’s scale it does not match.
PS: Compare with MinMaxObserver as it’s the default configuration.
It will be great to learn about it as it can help me in my future works and attached the code and sample results for reference.
import torch
import torch.ao.quantization
from torchvision import models
from torch import Tensor
from torch.ao.quantization import MinMaxObserver
def quant_forward_hook(module, input, output):
output_ = output.detach()
obs = MinMaxObserver(dtype=torch.qint8, qscheme=torch.per_tensor_affine)
obs(output_)
scale_o, _ = obs.calculate_qparams()
int8_ = torch.iinfo(torch.int8)
scale_m = (output_.max().item() - output_.min().item()) / (int8_.max - int8_.min)
print(f"scale [observer]: {scale_o}")
print(f"scale [manual]: {scale_m}")
def expr_minmax(tensor: Tensor):
minmax = MinMaxObserver(dtype=torch.qint8, qscheme=torch.per_tensor_affine)
minmax(tensor)
print(f"min [observer]: {minmax.min_val}")
print(f"max [observer]: {minmax.max_val}")
r_max = tensor.max().item()
r_min = tensor.min().item()
print(f"min [manual]: {r_min}")
print(f"max [manual]: {r_max}")
scale_o, _ = minmax.calculate_qparams()
int8_ = torch.iinfo(torch.int8)
scale_m = (r_max - r_min) / (int8_.max - int8_.min)
print(f"scale [observer]: {scale_o}")
print(f"scale [manual]: {scale_m}")
class QuantizedVGG16(torch.nn.Module):
def __init__(self):
super().__init__()
self.quant = torch.ao.quantization.QuantStub()
self.vgg16 = models.vgg16(pretrained=True)
self.dequant = torch.ao.quantization.DeQuantStub()
def forward(self, x):
x = self.quant(x)
x = self.vgg16(x)
return self.dequant(x)
def main():
tensor = torch.rand([1,3,224,224])
if tensor.numel() == 0:
return
expr_minmax(tensor)
model = models.vgg16(pretrained=True)
model.eval()
for module in model.modules():
if isinstance(module, torch.nn.Conv2d):
module.register_forward_hook(quant_forward_hook)
break
model(tensor)
# static quantisation
q_model = QuantizedVGG16()
q_model.eval()
q_model.qconfig = torch.ao.quantization.default_qconfig
print(q_model.qconfig)
torch.ao.quantization.prepare(q_model, inplace=True)
q_model(tensor)
vgg16_int = torch.ao.quantization.convert(q_model,inplace=True)
for module in vgg16_int.modules():
if isinstance(module, torch.ao.nn.quantized.Conv2d):
print(f"scale [torch-computed]: {module.scale}")
break
if __name__ == "__main__":
main()
Ouput:
min [observer]: 1.2934207916259766e-05
max [observer]: 0.9999988675117493
min [manual]: 1.2934207916259766e-05
max [manual]: 0.9999988675117493
scale [observer]: tensor([0.0039])
scale [manual]: 0.0039215134639366
/home/vimal/.local/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
/home/vimal/.local/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
scale [observer]: tensor([0.0268])
scale [manual]: 0.02681882147695504
QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})
scale [torch-computed]: 0.053848814219236374
kindly please help in this regards
Thanks
Vimal William