I need to make a saved model much smaller than it is currently (will be running on an embedded device with very limited memory), preferably down to 1/3 or 1/4 of the size.
Also, due to the limited memory situation, I have to convert to onnx so I can inference without PyTorch (PyTorch won’t fit). Of course I can train on a desktop without such limitations.
I’m doing something for the company I work for so I can’t share the exact network, but here is full code to produce a simplified version for illustrative purposes so I can ask my question:
# MNIST.py
# Net Layout:
# batchSize x 1 x 28 x 28
# conv1 Conv2d(1, 6, 5)
# batchSize x 6 x 24 x 24
# relu(x)
# max_pool2d(x, kernel_size=2)
# batchSize x 6 x 12 x 12
# conv2 Conv2d(6, 16, 5)
# batchSize x 16 x 8 x 8
# relu(x)
# max_pool2d(x, kernel_size=2)
# batchSize x 16 x 4 x 4
# view(-1, 16 * 4 * 4) Note: 16 * 4 * 4 = 256
# batchSize x 1 x 256
# fc1 Linear(256, 120)
# relu(x)
# batchSize x 1 x 120
# fc2 Linear(120, 84)
# relu(x)
# batchSize x 1 x 84
# fc3 Linear(84, 10)
# batchSize x 1 x 10
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import cv2
import numpy as np
import random
from termcolor import colored
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
TRANSFORM = torchvision.transforms.Compose([torchvision.transforms.Resize((IMAGE_WIDTH, IMAGE_HEIGHT)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.5], [0.5])
])
BATCH_SIZE = 64
NUM_EPOCHS = 5
class MnistNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(256, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# end function
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2)
x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
x = x.view(-1, 256)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# end function
# end class
def main():
trainDataset = torchvision.datasets.MNIST('built_in_dataset', train=True, download=True, transform=TRANSFORM)
# choose a 20% subset of the train idxs to save time
trainDatasetIdxs = list(np.arange(len(trainDataset)))
trainDatasetIdxs = random.sample(trainDatasetIdxs, k=round(len(trainDataset) * 0.2))
trainDataset = torch.utils.data.Subset(trainDataset, trainDatasetIdxs)
# # randomly pick out an image to show, if desired
# randTrainIdx = random.randint(0, len(trainDataset) - 1)
# ptTrainImage, trainLabelIdx = trainDataset[randTrainIdx]
# pilTrainImage = torchvision.transforms.ToPILImage()(ptTrainImage)
# openCvTrainImage = np.array(pilTrainImage)
# # would convert RGB to BGR here if image was color
# print('random training image trainLabelIdx = ' + str(trainLabelIdx))
# cv2.imshow('image', openCvTrainImage)
# cv2.waitKey()
trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True)
# declare net, loss function, and optimizer
mnistNet = MnistNet()
lossFunction = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnistNet.parameters())
# get device (cuda or cpu)
if torch.cuda.is_available():
device = torch.device('cuda')
else:
print(colored('WARNING: CUDA does not seem to be available, using CPU', 'yellow'))
device = torch.device('cpu')
# end if
# set network to device
mnistNet.to(device)
# set network to train mode
mnistNet.train()
print('beginning training . . .')
# for each epoch . . .
epoch = 1
while epoch < NUM_EPOCHS + 1:
# variables to calculate loss and accuracy within the epoch
epochLosses = []
epochAccuracies = []
# for each batch . . .
for i, element in enumerate(trainDataLoader):
# break out the input images and ground truths, note these are Tensors
inputImages, gndTrths = element
inputImages = inputImages.to(device)
gndTrths = gndTrths.to(device)
# clear gradients from the previous step
optimizer.zero_grad()
# get net output
outputs = mnistNet(inputImages)
# calculate loss
loss = lossFunction(outputs, gndTrths)
# call backward() to compute gradients
loss.backward()
# update parameters using gradients
optimizer.step()
# append the current classification loss to the list of epoch losses
epochLosses.append(loss.item())
# calculate current classification accuracy
# get the highest scoring classification for each prediction
_, predictions = torch.max(outputs.data, 1)
# number of gndTrths and predictions should always be the same, log an error if this is not the case
if gndTrths.size(0) != predictions.size(0):
print(colored('ERROR: gndTrths.size(0) != predictions.size(0)', 'red'))
# end if
# determine the number of correct predictions for the current batch
correctPredictions = 0
for j in range(len(gndTrths)):
if predictions[j].item() == gndTrths[j].item():
correctPredictions += 1
# end if
# end for
# append the current batch accuracy to the list of accuracies
epochAccuracies.append(correctPredictions / gndTrths.size(0))
# end for
# calculate epoch loss and accuracy from the respective lists
epochLoss = sum(epochLosses) / len(epochLosses)
epochAccuracy = sum(epochAccuracies) / len(epochAccuracies)
print('epoch ' + str(epoch) + ', epochLoss = ' + '{:.4f}'.format(epochLoss) + ', epochAccuracy = ' + '{:.4f}'.format(epochAccuracy * 100) + '%')
epoch += 1
# end for
# save the model as a PyTorch graph
torch.save(mnistNet.state_dict(), 'MNIST.pt')
# save the model as an ONNX graph
dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to(device)
torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx')
This works great and MNIST.onnx
can be inferenced as expected.
Now for the quantize_dynamic
attempt. If I change the end (after the big training for loop) to:
torch.save(mnistNet.state_dict(), 'MNIST.pt')
# quantize
mnistNet.to('cpu')
mnistNet = torch.quantization.quantize_dynamic(mnistNet, {torch.nn.Linear}, dtype=torch.qint8)
torch.save(mnistNet.state_dict(), 'MNISTquant.pt')
# save the model as an ONNX graph
dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to('cpu')
dummyOutput = mnistNet(dummyInput)
torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
operator_export_type=torch.onnx.OperatorExportTypes.ONNX, example_outputs=dummyOutput)
I get:
$ python3 MNIST4.py
beginning training . . .
epoch 1, epochLoss = 0.7125, epochAccuracy = 77.9422%
epoch 2, epochLoss = 0.1899, epochAccuracy = 94.0409%
epoch 3, epochLoss = 0.1199, epochAccuracy = 96.1686%
epoch 4, epochLoss = 0.0898, epochAccuracy = 97.1410%
epoch 5, epochLoss = 0.0682, epochAccuracy = 97.8391%
Traceback (most recent call last):
File "MNIST4.py", line 243, in <module>
main()
File "MNIST4.py", line 186, in main
torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/__init__.py", line 271, in export
return utils.export(model, args, f, export_params, verbose, training,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 88, in export
_export(model, args, f, export_params, verbose, training, input_names, output_names,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 691, in _export
_model_to_graph(model, args, verbose, input_names,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 454, in _model_to_graph
graph, params, torch_out, module = _create_jit_graph(model, args,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 417, in _create_jit_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args)
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 374, in _trace_and_get_graph_from_model
orig_state_dict_keys = _unique_state_dict(model).keys()
File "/usr/local/lib/python3.8/dist-packages/torch/jit/_trace.py", line 69, in _unique_state_dict
filtered_dict[k] = v.detach()
AttributeError: 'torch.dtype' object has no attribute 'detach'
Line 186 is the torch.onnx.export
line.
As an alternative, I figured I’d try optimize_for_mobile
, which requires using torch.jit.trace
to convert to a ScriptModule
. Here is my attempt at that (as before, changing only the end after the big training for loop):
# save the model as a PyTorch graph
torch.save(mnistNet.state_dict(), 'MNIST.pt')
mnistNet.eval()
mnistNet = mnistNet.to('cpu')
dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to('cpu')
mnistNet = torch.jit.trace(mnistNet, dummyInput)
mnistNet = torch.utils.mobile_optimizer.optimize_for_mobile(mnistNet)
dummyOutput = mnistNet(dummyInput)
torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
operator_export_type=torch.onnx.OperatorExportTypes.ONNX, example_outputs=dummyOutput)
Which produces:
$ python3 MNIST5.py
beginning training . . .
epoch 1, epochLoss = 0.6987, epochAccuracy = 78.6154%
epoch 2, epochLoss = 0.1879, epochAccuracy = 94.2154%
epoch 3, epochLoss = 0.1295, epochAccuracy = 95.8610%
epoch 4, epochLoss = 0.0984, epochAccuracy = 96.8418%
epoch 5, epochLoss = 0.0807, epochAccuracy = 97.5233%
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:889: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:930.)
result = self.forward(*input, **kwargs)
Traceback (most recent call last):
File "MNIST5.py", line 244, in <module>
main()
File "MNIST5.py", line 189, in main
torch.onnx.export(mnistNet, dummyInput, 'MNIST.onnx', verbose=True,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/__init__.py", line 271, in export
return utils.export(model, args, f, export_params, verbose, training,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 88, in export
_export(model, args, f, export_params, verbose, training, input_names, output_names,
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 676, in _export
with select_model_mode_for_export(model, training):
File "/usr/lib/python3.8/contextlib.py", line 113, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.8/dist-packages/torch/onnx/utils.py", line 38, in select_model_mode_for_export
is_originally_training = model.training
File "/usr/local/lib/python3.8/dist-packages/torch/jit/_script.py", line 561, in __getattr__
return super(RecursiveScriptModule, self).__getattr__(attr)
File "/usr/local/lib/python3.8/dist-packages/torch/jit/_script.py", line 291, in __getattr__
return super(ScriptModule, self).__getattr__(attr)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 947, in __getattr__
raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'RecursiveScriptModule' object has no attribute 'training'
I did see these posts:
However after reading these it’s not clear to me if torch.jit.trace
→ ScriptModule
→ onnx
is supported.
— Edit 1 —
Based on these:
I tried this:
# save the model as a PyTorch graph
torch.save(mnistNet.state_dict(), 'MNIST.pt')
model = mnistNet
dummyInput = torch.randn(BATCH_SIZE, 1, IMAGE_WIDTH, IMAGE_HEIGHT).to('cpu')
sample_inputs = dummyInput
input_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
torch.backends.quantized.engine = "qnnpack"
sample_inputs = sample_inputs.numpy()
pt_inputs = tuple(torch.from_numpy(x) for x in sample_inputs)
model.qconfig = torch.quantization.get_default_qconfig('qnnpack')
q_model = torch.quantization.prepare(model, inplace=False)
q_model = torch.quantization.convert(q_model, inplace=False)
traced_model = torch.jit.trace(q_model, pt_inputs)
buf = io.BytesIO()
torch.jit.save(traced_model, buf)
buf.seek(0)
q_model = torch.jit.load(buf)
q_model.eval()
output = q_model(*pt_inputs)
f = io.BytesIO()
torch.onnx.export(q_model, pt_inputs, f, input_names=input_names, example_outputs=output,
operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
f.seek(0)
Which produces:
$ python3 MNIST6.py
beginning training . . .
epoch 1, epochLoss = 0.8135, epochAccuracy = 75.1496%
epoch 2, epochLoss = 0.2094, epochAccuracy = 93.6835%
epoch 3, epochLoss = 0.1284, epochAccuracy = 96.0439%
epoch 4, epochLoss = 0.0989, epochAccuracy = 97.0412%
epoch 5, epochLoss = 0.0763, epochAccuracy = 97.7560%
/usr/local/lib/python3.8/dist-packages/torch/quantization/observer.py:955: UserWarning: must run observer before calling calculate_qparams. Returning default scale and zero point
warnings.warn(
Traceback (most recent call last):
File "MNIST6.py", line 260, in <module>
main()
File "MNIST6.py", line 192, in main
q_model = torch.quantization.convert(q_model, inplace=False)
File "/usr/local/lib/python3.8/dist-packages/torch/quantization/quantize.py", line 471, in convert
_convert(
File "/usr/local/lib/python3.8/dist-packages/torch/quantization/quantize.py", line 509, in _convert
reassign[name] = swap_module(mod, mapping, custom_module_class_mapping)
File "/usr/local/lib/python3.8/dist-packages/torch/quantization/quantize.py", line 534, in swap_module
new_mod = mapping[type(mod)].from_float(mod)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/conv.py", line 418, in from_float
return _ConvNd.from_float(cls, mod)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/conv.py", line 220, in from_float
return cls.get_qconv(mod, activation_post_process, weight_post_process)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/conv.py", line 187, in get_qconv
qweight = _quantize_weight(mod.weight.float(), weight_post_process)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/quantized/modules/utils.py", line 9, in _quantize_weight
qweight = torch.quantize_per_tensor(
RuntimeError: quantize_tensor_per_tensor_affine expects a quantized and float tensors to be on the same device.
Any suggestions? It’s not really clear to me if converting to onnx is supported after using quantize_dynamic
or optimize_for_mobile
first. Is there a way around these errors? Or is there an alternative way to make a PyTorch saved model much smaller?