ONNX export of quantized model

zetyquickly · April 15, 2020, 3:14pm

Hi,

I’ve tried to export simple model using ONNX export and faced an error that ask me to report a bug.

import torch 
import onnx 
import io

import torch._C as _C
OperatorExportTypes = _C._onnx.OperatorExportTypes

class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.cnn = torch.nn.Conv2d(1,1,1)
    def forward(self, x):
        x = self.quant(x)
        return self.cnn(x)

model = Net()
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
torch.backends.quantized.engine = 'fbgemm'
model = torch.quantization.prepare(model, inplace=False)
torch.quantization.convert(model, inplace=True)
print(model)
inputs = torch.ones((1,10,224,224))
with torch.no_grad():
    with io.BytesIO() as f:
        torch.onnx.export(
            model,
            inputs,
            f,
            operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
#             verbose=True,  # NOTE: uncomment this for debugging
#             export_params=True,
        )
        onnx_model = onnx.load_from_string(f.getvalue())

Net(
  (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
  (cnn): QuantizedConv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-42-9f9e68519c44> in <module>
     27             model,
     28             inputs,
---> 29             f,
     30 #             operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
     31 #             verbose=True,  # NOTE: uncomment this for debugging

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/onnx/__init__.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, custom_opsets, enable_onnx_checker, use_external_data_format)
    170                         do_constant_folding, example_outputs,
    171                         strip_doc_string, dynamic_axes, keep_initializers_as_inputs,
--> 172                         custom_opsets, enable_onnx_checker, use_external_data_format)
    173 
    174 

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/onnx/utils.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, custom_opsets, enable_onnx_checker, use_external_data_format)
     90             dynamic_axes=dynamic_axes, keep_initializers_as_inputs=keep_initializers_as_inputs,
     91             custom_opsets=custom_opsets, enable_onnx_checker=enable_onnx_checker,
---> 92             use_external_data_format=use_external_data_format)
     93 
     94 

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/onnx/utils.py in _export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type, export_type, example_outputs, propagate, opset_version, _retain_param_name, do_constant_folding, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, fixed_batch_size, custom_opsets, add_node_names, enable_onnx_checker, use_external_data_format)
    508                                                             example_outputs, propagate,
    509                                                             _retain_param_name, val_do_constant_folding,
--> 510                                                             fixed_batch_size=fixed_batch_size)
    511 
    512             # TODO: Don't allocate a in-memory string for the protobuf

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/onnx/utils.py in _model_to_graph(model, args, verbose, input_names, output_names, operator_export_type, example_outputs, propagate, _retain_param_name, do_constant_folding, _disable_torch_constant_prop, fixed_batch_size)
    348             model.graph, tuple(in_vars), False, propagate)
    349     else:
--> 350         graph, torch_out = _trace_and_get_graph_from_model(model, args)
    351         state_dict = _unique_state_dict(model)
    352         params = list(state_dict.values())

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/onnx/utils.py in _trace_and_get_graph_from_model(model, args)
    305 
    306     trace_graph, torch_out, inputs_states = \
--> 307         torch.jit._get_trace_graph(model, args, _force_outplace=False, _return_inputs_states=True)
    308     warn_on_static_input_change(inputs_states)
    309 

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/jit/__init__.py in _get_trace_graph(f, args, kwargs, _force_outplace, return_inputs, _return_inputs_states)
    275     if not isinstance(args, tuple):
    276         args = (args,)
--> 277     outs = ONNXTracedModule(f, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
    278     return outs
    279 

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    556             result = self._slow_forward(*input, **kwargs)
    557         else:
--> 558             result = self.forward(*input, **kwargs)
    559         for hook in self._forward_hooks.values():
    560             hook_result = hook(self, input, result)

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/jit/__init__.py in forward(self, *args)
    358             in_vars + module_state,
    359             _create_interpreter_name_lookup_fn(),
--> 360             self._force_outplace,
    361         )
    362 

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/jit/__init__.py in wrapper(*args)
    342             trace_inputs = _unflatten(args[:len(in_vars)], in_desc)
    343 
--> 344             ret_inputs.append(tuple(x.clone(memory_format=torch.preserve_format) for x in args))
    345             if self._return_inputs_states:
    346                 inputs_states.append(_unflatten(args[:len(in_vars)], in_desc))

~/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/jit/__init__.py in <genexpr>(.0)
    342             trace_inputs = _unflatten(args[:len(in_vars)], in_desc)
    343 
--> 344             ret_inputs.append(tuple(x.clone(memory_format=torch.preserve_format) for x in args))
    345             if self._return_inputs_states:
    346                 inputs_states.append(_unflatten(args[:len(in_vars)], in_desc))

RuntimeError: self.qscheme() == at::kPerTensorAffine INTERNAL ASSERT FAILED at /opt/conda/conda-bld/pytorch_1586761698468/work/aten/src/ATen/native/quantized/QTensor.cpp:190, please report a bug to PyTorch. clone for quantized Tensor only works for PerTensorAffine scheme right now

What do I do incorrectly?

zetyquickly · April 16, 2020, 3:24pm

Is it possible to convert quantized model with to ONNX than to Caffe2?

albertotono · April 16, 2020, 3:31pm

Not sure if it helps but here https://pytorch.org/docs/stable/tensor_attributes.html I have found this “Quantized and complex types are not yet supported.”

zetyquickly · April 16, 2020, 3:40pm

It is about torch.Tensor instantiating. Which types are supported as dtype parameter

ebarsoum · April 16, 2020, 5:56pm

Converting quantized model to ONNX, isn’t supported yet.

raghuramank100 · April 16, 2020, 9:31pm

@supriyar: Can you take a look at this issue?

zetyquickly · April 16, 2020, 10:15pm

I forgot to mention that I used pytorch verdion 1.6.0 from nightly build via conda

supriyar · April 17, 2020, 4:24pm

Hi @zetyquickly, it is currently only possible to convert quantized model to Caffe2 using ONNX. The onnx file generated in the process is specific to Caffe2.
If this is something you are still interested in, then you need to run a traced model through the onnx export flow. You can use the following code for reference

        class ConvModel(torch.nn.Module):
            def __init__(self):
                super(ConvModel, self).__init__()
                self.qconfig = torch.quantization.default_qconfig
                self.fc1 = torch.quantization.QuantWrapper(torch.nn.Conv2d(3, 5, 2, bias=True).to(dtype=torch.float))

            def forward(self, x):
                x = self.fc1(x)
                return x
        torch.backends.quantized.engine = "qnnpack"
        qconfig = torch.quantization.default_qconfig
        model = ConvModel()
        model.qconfig = qconfig
        model = torch.quantization.prepare(model)
        model = torch.quantization.convert(model)

        x_numpy = np.random.rand(1, 3, 6, 6).astype(np.float32)
        x = torch.from_numpy(x_numpy).to(dtype=torch.float)
        outputs = model(x)
        input_names = ["x"]
        outputs = model(x)

        traced = torch.jit.trace(model, x)
        buf = io.BytesIO()
        torch.jit.save(traced, buf)
        buf.seek(0)

        model = torch.jit.load(buf)
        f = io.BytesIO()
        torch.onnx.export(model, x, f, input_names=input_names, example_outputs=outputs,
                          operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
        f.seek(0)

        onnx_model = onnx.load(f)

zetyquickly · April 17, 2020, 8:36pm

@supriyar thank you very much for your answer.

You’re right I am interested in conversion to Caffe2.
There are some moments in example that confuse me. Could please reveal it for us?

torch.jit.trace and torch.onnx.export. I thought that they are mutually exclusive functionalities: one for TorchScript and the second for ONNX conversion. While ONNX model needs backend to be executed, TorchScript is standalone. Why do we need TorchScript conversion here before ONNX export? Previously I saw opinions like that https://github.com/pytorch/pytorch/issues/27569#issuecomment-539738922
In general words how are connected Pytorch JIT, TorchScript and ONNX? Why do we still need to convert anything from PyTorch to Caffe2 if TorchScript model is created?

supriyar · April 20, 2020, 4:36pm

The flow is slightly different for quantized ops (so the regular pytorch -> onnx conversion flow rule doesn’t directly apply).
We tried to re-use some of the existing functionality of converting traced ops from pytorch to onnx for quantized models hence it is necessary to first trace it. Similarly it is also necessary to set operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK during the conversion flow for quantized ops.
TorchScript models are not directly runnable on Caffe2 backend. So we need to convert it to the expected backend using onnx.

zetyquickly · April 20, 2020, 4:53pm

Thanks for your reply @supriyar ,

Does it mean that we can convert to onnx scripted parts of network (using torch.jit.script) ?
What if our network contains of operators that aren’t available in TorchScript but available in Caffe2 (e.g. RoIAlign)?
Optionally is it possible to use quantized layers with TorchScript backend on mobile (I mean without additional conversion to Caffe2 using ONNX)?

supriyar · April 20, 2020, 5:13pm

Does it mean that we can convert to onnx scripted parts of network (using torch.jit.script ) ?

I haven’t tried torch.jit.script for quantized pytorch network to onnx to Caffe2. But torch.jit.trace should work.

What if our network contains of operators that aren’t available in TorchScript but available in Caffe2 (e.g. RoIAlign)?

At this point this is only limited to operators present in both quantized Pytorch and quantized Caffe2 framework.

Optionally is it possible to use quantized layers with TorchScript backend on mobile (I mean without additional conversion to Caffe2 using ONNX)?

You can directly run quantized pytorch network on mobile using PyTorch Mobile which is highly recommended over converting to Caffe2. Check out https://pytorch.org/mobile/home/.

LMerCy · July 21, 2020, 10:36am

@supriyar
Dose it now support converting quantized model to ONNX in dev-version or stable version?

supriyar · July 21, 2020, 11:42pm

General export of quantized models to ONNX isn’t currently supported. We only support conversion to ONNX for Caffe2 backend. This thread has additional context on what we currently support - ONNX export of quantized model

RicCu · July 22, 2020, 1:03am

Is generic onnx export support for quantized models (eg for import with onnx runtime) on the roadmap?

srohit0 · November 2, 2020, 6:14pm

@supriyar this workaround fails too in JIT while calling torch.onnx.export.

Bug Filed at INTERNAL ASSERT FAILED at "/pytorch/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp":92 · Issue #47204 · pytorch/pytorch · GitHub

mhamdan · November 25, 2020, 2:30am

Experiencing the same issue. However if qconfig is set to qnnpack ( model.qconfig = torch.quantization.get_default_qconfig(‘qnnpack’)), this error goes away, but another issue pop up.

I am getting the following error for same code except qconfig set to qnnpack. Is there a fix for this? Any way to export quantized pytorch model to ONNX?

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-54-d1ee04c303f8> in <module>()
     24                           example_outputs=outputs,
     25                           # opset_version=10,
---> 26                           operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
     27         # f.seek(0)
     28         onnx_model = onnx.load(f)

C:\Users\mhamdan\AppData\Roaming\Python\Python37\site-packages\torch\onnx\__init__.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, custom_opsets, enable_onnx_checker, use_external_data_format)
    228                         do_constant_folding, example_outputs,
    229                         strip_doc_string, dynamic_axes, keep_initializers_as_inputs,
--> 230                         custom_opsets, enable_onnx_checker, use_external_data_format)
    231 
    232 

C:\Users\mhamdan\AppData\Roaming\Python\Python37\site-packages\torch\onnx\utils.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, custom_opsets, enable_onnx_checker, use_external_data_format)
     89             dynamic_axes=dynamic_axes, keep_initializers_as_inputs=keep_initializers_as_inputs,
     90             custom_opsets=custom_opsets, enable_onnx_checker=enable_onnx_checker,
---> 91             use_external_data_format=use_external_data_format)
     92 
     93 

C:\Users\mhamdan\AppData\Roaming\Python\Python37\site-packages\torch\onnx\utils.py in _export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type, export_type, example_outputs, opset_version, _retain_param_name, do_constant_folding, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, fixed_batch_size, custom_opsets, add_node_names, enable_onnx_checker, use_external_data_format, onnx_shape_inference, use_new_jit_passes)
    637                                 training=training,
    638                                 use_new_jit_passes=use_new_jit_passes,
--> 639                                 dynamic_axes=dynamic_axes)
    640 
    641             # TODO: Don't allocate a in-memory string for the protobuf

C:\Users\mhamdan\AppData\Roaming\Python\Python37\site-packages\torch\onnx\utils.py in _model_to_graph(model, args, verbose, input_names, output_names, operator_export_type, example_outputs, _retain_param_name, do_constant_folding, _disable_torch_constant_prop, fixed_batch_size, training, use_new_jit_passes, dynamic_axes)
    419                             fixed_batch_size=fixed_batch_size, params_dict=params_dict,
    420                             use_new_jit_passes=use_new_jit_passes,
--> 421                             dynamic_axes=dynamic_axes, input_names=input_names)
    422     from torch.onnx.symbolic_helper import _onnx_shape_inference
    423     if isinstance(model, torch.jit.ScriptModule) or isinstance(model, torch.jit.ScriptFunction):

C:\Users\mhamdan\AppData\Roaming\Python\Python37\site-packages\torch\onnx\utils.py in _optimize_graph(graph, operator_export_type, _disable_torch_constant_prop, fixed_batch_size, params_dict, use_new_jit_passes, dynamic_axes, input_names)
    180             torch.onnx.symbolic_helper._quantized_ops.clear()
    181             # Unpack quantized weights for conv and linear ops and insert into graph.
--> 182             torch._C._jit_pass_onnx_unpack_quantized_weights(graph, params_dict)
    183             # Insert permutes before and after each conv op to ensure correct order.
    184             torch._C._jit_pass_onnx_quantization_insert_permutes(graph, params_dict)

RuntimeError: bad optional access

amrmartini · May 7, 2021, 5:03pm

Any update on this? Is it still not possible to export to ONNX backend at all?

Vasiliy_Kuznetsov · May 10, 2021, 11:48pm

hi @amrmartini , we don’t have an update on this issue at the moment. We are not currently actively improving the ONNX export path for quantized models.

Vasiliy_Kuznetsov · May 10, 2021, 11:48pm

We would welcome external contributions in this area!