print(‘args.workers = {}’.format(args.workers))
val_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=1000, shuffle=False,
num_workers=args.workers, pin_memory=True, sampler=val_sampler)
val_loader_onnx = torch.utils.data.DataLoader(
val_dataset, batch_size=1000, shuffle=False,
num_workers=16, pin_memory=True, sampler=val_sampler)
criterion = nn.CrossEntropyLoss().to(device)
**print('QAT Model In Pytorch (SimQuant):')**
validate(val_loader, model, criterion, args)
quantized_eval_model = copy.deepcopy(model.module)
quantized_eval_model.eval()
quantized_eval_model.to(torch.device("cpu"))
torch.ao.quantization.convert(quantized_eval_model, inplace=True)
**print('Converted Model in Pytorch (Quantized):')**
acc1_quantized = validate_cpu(val_loader, quantized_eval_model, criterion, args)
############################################### Here we convert quantized_model to onnx format ########
input_size = torch.randn(1000, 3, 224, 224)
import onnx
import onnxsim
quantized_eval_model(input_size)
torch.onnx.export(quantized_eval_model, # model being run
input_size, # model input (or a tuple for multiple inputs)
'./quantized_mobilenetv3_qat.onnx', # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=18, # the ONNX version to export the model to
# do_constant_folding=True, # whether to execute constant folding for optimization
input_names=['input'], # the model's input names
output_names=['output'], # the model's output names
# dynamic_axes={"input":{0: "batch_size"}, "output":{0: "batch_size"},}
# example_outputs=traced(input_fp32)
)
model_onnx = onnx.load('./quantized_mobilenetv3_qat.onnx')
onnx.checker.check_model(model_onnx)
model_onnx, check = onnxsim.simplify(
model_onnx,
dynamic_input_shape=False,
overwrite_input_shapes={'input': list(input_size.shape)}
)
assert check, 'assert check failed'
onnx.save(model_onnx, 'quantized_mobilenetv3_sim_qat.onnx')
print(" ")
print('Model has been converted to onnx successfully.')
#######################################################################################################
################################## do onnx-runtime inference #########################################
import onnx
import onnxruntime as ort
import numpy as np
print(‘onnx runtime gpu with CUDA’)
sessionOption = ort.SessionOptions()
sessionOption.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
sess = ort.InferenceSession(“./quantized_mobilenetv3_sim_qat.onnx”, sess_options=sessionOption,
providers=[‘CUDAExecutionProvider’, ‘CPUExecutionProvider’])
input_name = sess.get_inputs()[0].name
output_name= sess.get_outputs()[0].name
output_shape = sess.get_outputs()[0].shape
**print('Converted Model in Onnx (Quantized):')**
validate_onnx(val_loader_onnx, sess, criterion, args)
#######################################################################################################
def validate_onnx(val_loader, ort_session, criterion, args):
def run_validate(loader, base_progress=0):
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(loader):
i = base_progress + i
images = images.to('cpu')
target = target.to('cpu')
# compute output
input_data = images.detach().cpu().numpy()
input_name = ort_session.get_inputs()[0].name
output_name= ort_session.get_outputs()[0].name
res = ort_session.run([output_name], {input_name: input_data})
# out = res
import numpy as np
out = np.array(res)
output_tensor = torch.from_numpy(out[0])
# print(output_tensor.shape)
# exit()
loss = criterion(output_tensor, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output_tensor, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i + 1)
# break
batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
losses = AverageMeter('Loss', ':.4e', Summary.NONE)
top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test_ONNX: ')
run_validate(val_loader)
progress.display_summary()
return top1.avg
def validate(val_loader, model, criterion, args):
def run_validate(loader, base_progress=0):
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(loader):
i = base_progress + i
if torch.cuda.is_available():
images = images.cuda()
if torch.cuda.is_available():
target = target.cuda()
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i + 1)
batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
losses = AverageMeter('Loss', ':.4e', Summary.NONE)
top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test: ')
# switch to evaluate mode
model.eval()
run_validate(val_loader)
progress.display_summary()
return top1.avg
def validate_cpu(val_loader, model, criterion, args):
def run_validate(loader, base_progress=0):
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(loader):
i = base_progress + i
images = images.to('cpu')
target = target.to('cpu')
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i + 1)
batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
losses = AverageMeter('Loss', ':.4e', Summary.NONE)
top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test_quantized: ')
# switch to evaluate mode
model.to('cpu')
model.eval()
run_validate(val_loader)
progress.display_summary()
return top1.avg
the log as follows:
QAT Model In Pytorch (SimQuant):
Test: [ 1/50] Time 19.430 (19.430) Loss 4.4070e-01 (4.4070e-01) Acc@1 87.70 ( 87.70) Acc@5 97.40 ( 97.40)
Test: [11/50] Time 0.282 ( 2.129) Loss 7.0866e-01 (7.4780e-01) Acc@1 78.10 ( 80.42) Acc@5 97.00 ( 94.98)
Test: [21/50] Time 0.278 ( 1.248) Loss 1.1263e+00 (7.6287e-01) Acc@1 71.70 ( 79.76) Acc@5 91.20 ( 95.11)
Test: [31/50] Time 0.282 ( 0.936) Loss 1.0681e+00 (8.8677e-01) Acc@1 74.10 ( 77.07) Acc@5 90.90 ( 93.68)
Test: [41/50] Time 0.279 ( 0.776) Loss 1.1727e+00 (9.6547e-01) Acc@1 71.50 ( 75.39) Acc@5 90.10 ( 92.52)
- Acc@1 74.776 Acc@5 92.252
Converted Model in Pytorch (Quantized):
Test_quantized: [ 1/50] Time 17.260 (17.260) Loss 4.7875e-01 (4.7875e-01) Acc@1 88.20 ( 88.20) Acc@5 97.00 ( 97.00)
Test_quantized: [11/50] Time 2.513 ( 4.091) Loss 7.3136e-01 (7.6658e-01) Acc@1 79.30 ( 80.25) Acc@5 96.70 ( 94.70)
Test_quantized: [21/50] Time 2.554 ( 3.346) Loss 1.2043e+00 (7.8450e-01) Acc@1 68.50 ( 79.26) Acc@5 90.70 ( 94.75)
Test_quantized: [31/50] Time 2.561 ( 3.079) Loss 1.0992e+00 (9.1164e-01) Acc@1 74.10 ( 76.61) Acc@5 90.00 ( 93.15)
Test_quantized: [41/50] Time 2.560 ( 2.950) Loss 1.2187e+00 (9.9316e-01) Acc@1 69.40 ( 74.74) Acc@5 89.70 ( 92.14)
- Acc@1 74.184 Acc@5 91.934
============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============
verbose: False, log level: Level.ERROR
======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================
Model has been converted to onnx successfully.
onnx runtime gpu with CUDA
Converted Model in Onnx (Quantized):
Test_ONNX: [ 1/50] Time 24.736 (24.736) Loss 4.9879e-01 (4.9879e-01) Acc@1 86.10 ( 86.10) Acc@5 97.10 ( 97.10)
Test_ONNX: [11/50] Time 7.599 (10.646) Loss 8.6524e-01 (8.7809e-01) Acc@1 75.10 ( 77.11) Acc@5 95.30 ( 93.49)
Test_ONNX: [21/50] Time 7.718 ( 9.352) Loss 1.2917e+00 (8.8729e-01) Acc@1 64.70 ( 76.34) Acc@5 90.10 ( 93.80)
Test_ONNX: [31/50] Time 8.425 ( 8.953) Loss 1.1920e+00 (1.0216e+00) Acc@1 71.10 ( 73.64) Acc@5 89.40 ( 92.00)
Test_ONNX: [41/50] Time 7.646 ( 8.619) Loss 1.2383e+00 (1.0992e+00) Acc@1 68.30 ( 72.02) Acc@5 89.50 ( 90.94)
- Acc@1 71.432 Acc@5 90.598
How to export a correct quantized model for onnx? Acc1@1 71.423 for onnxruntime drops about 3% accuracy compared to Acc@74.148 for quantied model in pytorch. WHY? Anyone can help me to figure out?? Thanks in advance.