Convert to onnx after quantize_dynamic or optimize_for_mobile - both crash ? any workaround?

I need to make a saved model much smaller than it is currently (will be running on an embedded device with very limited memory), preferably down to 1/3 or 1/4 of the size.

Also, due to the limited memory situation, I have to convert to onnx so I can inference without PyTorch (PyTorch won’t fit). Of course I can train on a desktop without such limitations.

I’m doing something for the company I work for so I can’t share the exact network, but here is full code to produce a simplified version for illustrative purposes so I can ask my question:

# MNIST.py

# Net Layout:
# batchSize x 1 x 28 x 28
#     conv1 Conv2d(1, 6, 5)
# batchSize x 6 x 24 x 24
#     relu(x)
#     max_pool2d(x, kernel_size=2)
# batchSize x 6 x 12 x 12
#     conv2 Conv2d(6, 16, 5)
# batchSize x 16 x 8 x 8
#     relu(x)
#     max_pool2d(x, kernel_size=2)
# batchSize x 16 x 4 x 4
#     view(-1, 16 * 4 * 4)    Note: 16 * 4 * 4 = 256
# batchSize x 1 x 256
#     fc1 Linear(256, 120)
#     relu(x)
# batchSize x 1 x 120
#     fc2 Linear(120, 84)
#     relu(x)
# batchSize x 1 x 84
#     fc3 Linear(84, 10)
# batchSize x 1 x 10

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision

import cv2
import numpy as np
import random
from termcolor import colored

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28

TRANSFORM = torchvision.transforms.Compose([torchvision.transforms.Resize((IMAGE_WIDTH, IMAGE_HEIGHT)),
                                            torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize([0.5], [0.5])
                                            ])
BATCH_SIZE = 64
NUM_EPOCHS = 5

class MnistNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(256, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    # end function

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2)
        x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    # end function

# end class

def main():
    trainDataset = torchvision.datasets.MNIST('built_in_dataset', train=True, download=True, transform=TRANSFORM)

    # choose a 20% subset of the train idxs to save time
    trainDatasetIdxs = list(np.arange(len(trainDataset)))
    trainDatasetIdxs = random.sample(trainDatasetIdxs, k=round(len(trainDataset) * 0.2))
    trainDataset = torch.utils.data.Subset(trainDataset, trainDatasetIdxs)

    # # randomly pick out an image to show, if desired
    # randTrainIdx = random.randint(0, len(trainDataset) - 1)
    # ptTrainImage, trainLabelIdx = trainDataset[randTrainIdx]
    # pilTrainImage = torchvision.transforms.ToPILImage()(ptTrainImage)
    # openCvTrainImage = np.array(pilTrainImage)
    # # would convert RGB to BGR here if image was color
    # print('random training image trainLabelIdx = ' + str(trainLabelIdx))
    # cv2.imshow('image', openCvTrainImage)
    # cv2.waitKey()

    trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True)

    # declare net, loss function, and optimizer
    mnistNet = MnistNet()
    lossFunction = nn.CrossEntropyLoss()
    optimizer = optim.Adam(mnistNet.parameters())

    # get device (cuda or cpu)
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        print(colored('WARNING: CUDA does not seem to be available, using CPU', 'yellow'))
        device = torch.device('cpu')
    # end if

    # set network to device
    mnistNet.to(device)

    # set network to train mode
    mnistNet.train()

    print('beginning training . . .')

    # for each epoch . . .
    epoch = 1
    while epoch < NUM_EPOCHS + 1:

        # variables to calculate loss and accuracy within the epoch
        epochLosses = []
        epochAccuracies = []

        # for each batch . . .
        for i, element in enumerate(trainDataLoader):
            # break out the input images and ground truths, note these are Tensors
            inputImages, gndTrths = element

            inputImages = inputImages.to(device)
            gndTrths = gndTrths.to(device)

            # clear gradients from the previous step
            optimizer.zero_grad()

            # get net output
            outputs = mnistNet(inputImages)
            # calculate loss
            loss = lossFunction(outputs, gndTrths)
            # call backward() to compute gradients
            loss.backward()
            # update parameters using gradients
            optimizer.step()

            # append the current classification loss to the list of epoch losses
            epochLosses.append(loss.item())

            # calculate current classification accuracy

            # get the highest scoring classification for each prediction
            _, predictions = torch.max(outputs.data, 1)

            # number of gndTrths and predictions should always be the same, log an error if this is not the case
            if gndTrths.size(0) != predictions.size(0):
                print(colored('ERROR: gndTrths.size(0) != predictions.size(0)', 'red'))
            # end if

            # determine the number of correct predictions for the current batch
            correctPredictions = 0
            for j in range(len(gndTrths)):
                if predictions[j].item() == gndTrths[j].item():
                    correctPredictions += 1
                # end if
            # end for

            # append the current batch accuracy to the list of accuracies
            epochAccuracies.append(correctPredictions / gndTrths.size(0))
        # end for

        # calculate epoch loss and accuracy from the respective lists
        epochLoss = sum(epochLosses) / len(epochLosses)
        epochAccuracy = sum(epochAccuracies) / len(epochAccuracies)

        print('epoch ' + str(epoch) + ', epochLoss = ' + '{:.4f}'.format(epochLoss) + ', epochAccuracy = ' + '{:.4f}'.format(epochAccuracy * 100) + '%')

        epoch += 1
    # end for

    # save the model as a PyTorch graph
    torch.save(mnistNet.state_dict(), 'MNIST.pt')

    # save the model as an ONNX graph
    dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to(device)
    torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx')

This works great and MNIST.onnx can be inferenced as expected.

Now for the quantize_dynamic attempt. If I change the end (after the big training for loop) to:

    torch.save(mnistNet.state_dict(), 'MNIST.pt')

    # quantize
    mnistNet.to('cpu')
    mnistNet = torch.quantization.quantize_dynamic(mnistNet, {torch.nn.Linear}, dtype=torch.qint8)

    torch.save(mnistNet.state_dict(), 'MNISTquant.pt')

    # save the model as an ONNX graph
    dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to('cpu')
    dummyOutput = mnistNet(dummyInput)
    torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
                      operator_export_type=torch.onnx.OperatorExportTypes.ONNX, example_outputs=dummyOutput)

I get:

$ python3 MNIST4.py 
beginning training . . .
epoch 1, epochLoss = 0.7125, epochAccuracy = 77.9422%
epoch 2, epochLoss = 0.1899, epochAccuracy = 94.0409%
epoch 3, epochLoss = 0.1199, epochAccuracy = 96.1686%
epoch 4, epochLoss = 0.0898, epochAccuracy = 97.1410%
epoch 5, epochLoss = 0.0682, epochAccuracy = 97.8391%
Traceback (most recent call last):
  File "MNIST4.py", line 243, in <module>
    main()
  File "MNIST4.py", line 186, in main
    torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/__init__.py", line 271, in export
    return utils.export(model, args, f, export_params, verbose, training,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 88, in export
    _export(model, args, f, export_params, verbose, training, input_names, output_names,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 691, in _export
    _model_to_graph(model, args, verbose, input_names,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 454, in _model_to_graph
    graph, params, torch_out, module = _create_jit_graph(model, args,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 417, in _create_jit_graph
    graph, torch_out = _trace_and_get_graph_from_model(model, args)
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 374, in _trace_and_get_graph_from_model
    orig_state_dict_keys = _unique_state_dict(model).keys()
  File "/usr/local/lib/python3.8/dist-packages/torch/jit/_trace.py", line 69, in _unique_state_dict
    filtered_dict[k] = v.detach()
AttributeError: 'torch.dtype' object has no attribute 'detach'

Line 186 is the torch.onnx.export line.

As an alternative, I figured I’d try optimize_for_mobile, which requires using torch.jit.trace to convert to a ScriptModule. Here is my attempt at that (as before, changing only the end after the big training for loop):

    # save the model as a PyTorch graph
    torch.save(mnistNet.state_dict(), 'MNIST.pt')

    mnistNet.eval()
    mnistNet = mnistNet.to('cpu')

    dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to('cpu')
    mnistNet = torch.jit.trace(mnistNet, dummyInput)

    mnistNet =  torch.utils.mobile_optimizer.optimize_for_mobile(mnistNet)

    dummyOutput = mnistNet(dummyInput)

    torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
                      operator_export_type=torch.onnx.OperatorExportTypes.ONNX, example_outputs=dummyOutput)

Which produces:

$ python3 MNIST5.py 
beginning training . . .
epoch 1, epochLoss = 0.6987, epochAccuracy = 78.6154%
epoch 2, epochLoss = 0.1879, epochAccuracy = 94.2154%
epoch 3, epochLoss = 0.1295, epochAccuracy = 95.8610%
epoch 4, epochLoss = 0.0984, epochAccuracy = 96.8418%
epoch 5, epochLoss = 0.0807, epochAccuracy = 97.5233%
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:889: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:930.)
  result = self.forward(*input, **kwargs)
Traceback (most recent call last):
  File "MNIST5.py", line 244, in <module>
    main()
  File "MNIST5.py", line 189, in main
    torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/__init__.py", line 271, in export
    return utils.export(model, args, f, export_params, verbose, training,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 88, in export
    _export(model, args, f, export_params, verbose, training, input_names, output_names,
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 676, in _export
    with select_model_mode_for_export(model, training):
  File "/usr/lib/python3.8/contextlib.py", line 113, in __enter__
    return next(self.gen)
  File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 38, in select_model_mode_for_export
    is_originally_training = model.training
  File "/usr/local/lib/python3.8/dist-packages/torch/jit/_script.py", line 561, in __getattr__
    return super(RecursiveScriptModule, self).__getattr__(attr)
  File "/usr/local/lib/python3.8/dist-packages/torch/jit/_script.py", line 291, in __getattr__
    return super(ScriptModule, self).__getattr__(attr)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 947, in __getattr__
    raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'RecursiveScriptModule' object has no attribute 'training'

I did see these posts:

However after reading these it’s not clear to me if torch.jit.traceScriptModuleonnx is supported.

— Edit 1 —

Based on these:

I tried this:

    # save the model as a PyTorch graph
    torch.save(mnistNet.state_dict(), 'MNIST.pt')

    model = mnistNet
    dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to('cpu')
    sample_inputs = dummyInput
    input_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    torch.backends.quantized.engine = "qnnpack"

    sample_inputs = sample_inputs.numpy()
    pt_inputs = tuple(torch.from_numpy(x) for x in sample_inputs)
    model.qconfig = torch.quantization.get_default_qconfig('qnnpack')
    q_model = torch.quantization.prepare(model, inplace=False)
    q_model = torch.quantization.convert(q_model, inplace=False)

    traced_model = torch.jit.trace(q_model, pt_inputs)
    buf = io.BytesIO()
    torch.jit.save(traced_model, buf)
    buf.seek(0)
    q_model = torch.jit.load(buf)

    q_model.eval()
    output = q_model(*pt_inputs)

    f = io.BytesIO()
    torch.onnx.export(q_model, pt_inputs, f, input_names=input_names, example_outputs=output,
                      operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
    f.seek(0)

Which produces:

$ python3 MNIST6.py 
beginning training . . .
epoch 1, epochLoss = 0.8135, epochAccuracy = 75.1496%
epoch 2, epochLoss = 0.2094, epochAccuracy = 93.6835%
epoch 3, epochLoss = 0.1284, epochAccuracy = 96.0439%
epoch 4, epochLoss = 0.0989, epochAccuracy = 97.0412%
epoch 5, epochLoss = 0.0763, epochAccuracy = 97.7560%
/usr/local/lib/python3.8/dist-packages/torch/quantization/observer.py:955: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point 
  warnings.warn(
Traceback (most recent call last):
  File "MNIST6.py", line 260, in <module>
    main()
  File "MNIST6.py", line 192, in main
    q_model = torch.quantization.convert(q_model, inplace=False)
  File "/usr/local/lib/python3.8/dist-packages/torch/quantization/quantize.py", line 471, in convert
    _convert(
  File "/usr/local/lib/python3.8/dist-packages/torch/quantization/quantize.py", line 509, in _convert
    reassign[name] = swap_module(mod, mapping, custom_module_class_mapping)
  File "/usr/local/lib/python3.8/dist-packages/torch/quantization/quantize.py", line 534, in swap_module
    new_mod = mapping[type(mod)].from_float(mod)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/conv.py", line 418, in from_float
    return _ConvNd.from_float(cls, mod)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/conv.py", line 220, in from_float
    return cls.get_qconv(mod, activation_post_process, weight_post_process)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/conv.py", line 187, in get_qconv
    qweight = _quantize_weight(mod.weight.float(), weight_post_process)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/utils.py", line 9, in _quantize_weight
    qweight = torch.quantize_per_tensor(
RuntimeError: quantize_tensor_per_tensor_affine expects a quantized and float tensors to be on the same device.

Any suggestions? It’s not really clear to me if converting to onnx is supported after using quantize_dynamic or optimize_for_mobile first. Is there a way around these errors? Or is there an alternative way to make a PyTorch saved model much smaller?

We currently do not fully support converting a pytorch quantized model to ONNX format. The current support is limited to running a subset of operators in Caffe2 via ONNX (converted from PT).

You could try checking out quantization in the ONNX framework, maybe they have ways to convert a quantized PT model to ONNX format.