ONNX export of quantized model

Hi @zetyquickly, it is currently only possible to convert quantized model to Caffe2 using ONNX. The onnx file generated in the process is specific to Caffe2.
If this is something you are still interested in, then you need to run a traced model through the onnx export flow. You can use the following code for reference

        class ConvModel(torch.nn.Module):
            def __init__(self):
                super(ConvModel, self).__init__()
                self.qconfig = torch.quantization.default_qconfig
                self.fc1 = torch.quantization.QuantWrapper(torch.nn.Conv2d(3, 5, 2, bias=True).to(dtype=torch.float))

            def forward(self, x):
                x = self.fc1(x)
                return x
        torch.backends.quantized.engine = "qnnpack"
        qconfig = torch.quantization.default_qconfig
        model = ConvModel()
        model.qconfig = qconfig
        model = torch.quantization.prepare(model)
        model = torch.quantization.convert(model)

        x_numpy = np.random.rand(1, 3, 6, 6).astype(np.float32)
        x = torch.from_numpy(x_numpy).to(dtype=torch.float)
        outputs = model(x)
        input_names = ["x"]
        outputs = model(x)

        traced = torch.jit.trace(model, x)
        buf = io.BytesIO()
        torch.jit.save(traced, buf)
        buf.seek(0)

        model = torch.jit.load(buf)
        f = io.BytesIO()
        torch.onnx.export(model, x, f, input_names=input_names, example_outputs=outputs,
                          operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
        f.seek(0)

        onnx_model = onnx.load(f)
1 Like