Hi,
I followed tutorials/quantization and tried to PTQ MobileNetV2 from torchvision.
I have quantized with the following script.
However, the accuracy is very poor.
What can I do to improve the accuracy?
- original model
Top1: 64.1% Top5:85.8% - quantized model
Top1: 22.0% Top5:41.9%
tested by ImageNet-1K
model = torchvision.models.quantization.mobilenet_v2(pretrained=True)
def calibrate_model(model, loader, device=torch.device("cpu:0")):
model.to(device)
model.eval()
if len(loader) == 2:
print("data size is ", len(loader[0]))
inputs = loader[0].to(device)
labels = loader[1].to(device)
else:
for inputs, labels in tqdm(loader):
inputs = inputs.to(device)
labels = labels.to(device)
_ = model(inputs)
model.eval()
backend = "fbgemm"
torch.backends.quantized.engine = backend
model.fuse_model()
model.qconfig = torch.quantization.QConfig(
activation=torch.quantization.MinMaxObserver.with_args(reduce_range=True), #default_observer
weight=torch.quantization.PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric) #default_per_channel_weight_observer
)
torch.quantization.prepare(model, inplace=True)
calibrate_model(model=model, loader=valid_queue)
torch.quantization.convert(model, inplace=True)
Thank you.