Hi there, I tried to export a small pretrained (fashion MNIST) model to ONNX for test cases and evaluated the results. The outputs were completely differnt and I already tried different solutions which did not help to solve the problem.
I spent weeks on finding a solution to this error - may you please help me?
Here are the labels and a formatted output of the PyTorch model and the ONNX model which was run with ONNX Runtime
Labels: ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', ' Bag', 'Ankle boot']
PyTorch output: [14.139054 -1.463651 3.0200443 5.6304793 -3.21046 -6.91372 10.828293 -12.109244 -0.4888268 -7.0172133]
ONNX Runtime Output: [5.685347 4.6847196 -4.7160497 -11.215544 -14.1343975 28.269392 1.3876779 -3.9011323 1.518218 0.49619618]
Below you can find the unformatted output and the used files.
Unformatted output
Label: T-shirt Confidence: 0.9645941 Time taken: 0.07566659999999992
PyTorch output: [ 14.139054 -1.463651 3.0200443 5.6304793 -3.21046 -6.91372
10.828293 -12.109244 -0.4888268 -7.0172133]
ONNX Runtime output: [ 5.685347 4.6847196 -4.7160497 -11.215544 -14.1343975
28.269392 1.3876779 -3.9011323 1.518218 0.49619618]
========================================
Final top prediction is: Sandal with confidence of: 1.0
========================================
========================================
Inference time: 0.99 ms
========================================
Export routine
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RAND_INPUT = torch.randn(BATCH_SIZE, 3, 112, 112, device=DEVICE)
torch.onnx.export(MODEL, # model being run
RAND_INPUT, # model input (or a tuple for multiple inputs)
str(MODEL.__class__.__name__)+".onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
#verbose=True, # output graph to console
opset_version=11, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
enable_onnx_checker=True,
input_names = ['input'], # the model's input names
output_names = ['output'], # the model's output names
dynamic_axes={'input' : {0 : 'batch_size'}, # variable lenght axes
'output' : {0 : 'batch_size'}})
Neural Network Model (mnist_model.py)
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
import numpy as np
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return x
Testing routine (test.py)
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
def bulkPredictImages(model: nn.Module, classes: list, dataLoader: DataLoader):
resultArray = []
resultConfidenceArray = []
totalTime = 0.0
for images, _ in dataLoader:
if torch.cuda.is_available():
images = images.cuda()
for image in images:
image = image.unsqueeze_(0)
result, confidence, timeTaken = predictImage(model, classes, image)
resultArray.append(result)
resultConfidenceArray.append(confidence)
totalTime += timeTaken
return resultArray, resultConfidenceArray, totalTime
def predictImage(model: nn.Module, classes: list, inputImage: Image, transform = None):
timeStart = time.perf_counter() #in seconds
model.eval()
with torch.no_grad():
if transform != None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputImage = transform(inputImage).float()
inputImage = inputImage.unsqueeze_(0)
inputImage = inputImage.to(device)
output = model(inputImage)
idx = output.data.cpu().numpy().argmax()
label = classes[idx]
top_p, top_classes = F.softmax(output, dim=1).topk(1, dim = 1)
confidence = top_p.data.cpu().numpy().max()
timeStop = time.perf_counter() #in seconds
return label, confidence, timeStop - timeStart
def main():
pass
if __name__ == '__main__':
main()
Converting and evaluation (PyTorchToOnnxConverter.py) (please have mercy for my coding style)
import io
import os
import sys
import numpy as np
import time
from torch import nn
import torch.onnx
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torchvision import transforms
from collections import OrderedDict
import onnx
import onnxruntime
from onnx import optimizer as optim
from PIL import Image
import test
import mnist_model
MODEL = None
BATCH_SIZE = 1
MODEL_PATH = ""
TEST_IMAGE_PATH = ""
LABELS = []
IMG = None
MONOCHROME = False
INPUT_LAYOUT = ()
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RAND_INPUT = None
TRANSFORM = None
def setMNIST():
global MODEL
global MODEL_PATH
global TEST_IMAGE_PATH
global LABELS
global IMG
global MONOCHROME
global INPUT_LAYOUT
global RAND_INPUT
global TRANSFORM
MODEL = mnist_model.Net()
if torch.cuda.is_available():
MODEL = MODEL.cuda()
MODEL_PATH = "D:/Projekte/Masterthesis/DeepNerualNetworks/CustomFashionMNIST/best_model.pth"
TEST_IMAGE_PATH = "D:/Projekte/Masterthesis/DeepNerualNetworks/PyTorchToOnnxConverter/tshirt.png"
LABELS = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', ' Bag', 'Ankle boot']
IMG = Image.open(TEST_IMAGE_PATH).convert('L')
INPUT_LAYOUT = (1, 1, 28, 28)
MONOCHROME = True
RAND_INPUT = torch.randn(BATCH_SIZE, 1, 28, 28, device=DEVICE)
TRANSFORM = transforms.Compose([
transforms.Resize(size=28),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
def preprocess(input_data):
# convert the input data into the float32 input
img_data = input_data.astype('float32')
#normalize
mean_vec = np.array([0.485, 0.456, 0.406])
stddev_vec = np.array([0.229, 0.224, 0.225])
norm_img_data = np.zeros(img_data.shape).astype('float32')
for i in range(img_data.shape[0]):
norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
#add batch channel
norm_img_data = norm_img_data[np.newaxis, :, :, :]
#norm_img_data = img_data[np.newaxis, :, :, :]
norm_img_data = np.resize(norm_img_data, INPUT_LAYOUT).astype('float32')
return norm_img_data
def softmax2(arr):
sum = 0
max = np.max(arr)
print(max)
for i in arr:
print(i)
sum += np.exp(i - max) # subtract max in numr and denr to avoid overflow
res = []
for i in arr:
res.append(np.exp(i - max) / sum)
return res
def softmax3(inp):
top_p, top_classes = F.softmax(inp, dim=1).topk(1, dim = 1)
confidence = top_p.data.cpu().numpy().max()
return top_p, confidence
def softmax(x):
sum = 0
print("ONNX Runtime output: " + str(x[0][0]))
x = x.reshape(-1)
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def postprocess(result):
smRes = softmax(np.array(result)).tolist()
return smRes
def onnxImageProcess(img, monochrome: bool = False):
image_array = np.asarray(img)
if monochrome:
image_array = image_array[:, :, np.newaxis]
image_array = image_array.transpose(2, 0, 1)
return preprocess(image_array)
def runOnnxTest(ort_session, input_data):
#image = Image.open(TEST_IMAGE_PATH)
input_name = ort_session.get_inputs()[0].name
start = time.time()
raw_result = ort_session.run(None, {input_name: input_data})
end = time.time()
res = postprocess(raw_result)
#res = softmax2(raw_result)
#softmax3(raw_result)
inference_time = np.round((end - start) * 1000, 2)
idx = np.argmax(res)
print('========================================')
print('Final top prediction is: ' + LABELS[idx] + " with confidence of: " + str(res[idx]))
print('========================================')
print('========================================')
print('Inference time: ' + str(inference_time) + " ms")
print('========================================')
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
def torchImageProcess(img, transform):
inputImage = transform(img).float()
inputImage = inputImage.unsqueeze_(0)
return inputImage.to(DEVICE)
def compareOnnxModelWithTorch(session, onnxInput, torchOutput):
ort_inputs = {session.get_inputs()[0].name: onnxInput}
ort_outs = session.run(None, ort_inputs)
# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torchOutput), ort_outs[0], rtol=1e-03, atol=1e-05)
def add_value_info_for_constants(model : onnx.ModelProto):
"""
Currently onnx.shape_inference doesn't use the shape of initializers, so add
that info explicitly as ValueInfoProtos.
Mutates the model.
Args:
model: The ModelProto to update.
"""
# All (top-level) constants will have ValueInfos before IRv4 as they are all inputs
if model.ir_version < 4:
return
def add_const_value_infos_to_graph(graph : onnx.GraphProto):
inputs = {i.name for i in graph.input}
existing_info = {vi.name: vi for vi in graph.value_info}
for init in graph.initializer:
# Check it really is a constant, not an input
if init.name in inputs:
continue
# The details we want to add
elem_type = init.data_type
shape = init.dims
# Get existing or create new value info for this constant
vi = existing_info.get(init.name)
if vi is None:
vi = graph.value_info.add()
vi.name = init.name
# Even though it would be weird, we will not overwrite info even if it doesn't match
tt = vi.type.tensor_type
if tt.elem_type == onnx.TensorProto.UNDEFINED:
tt.elem_type = elem_type
if not tt.HasField("shape"):
# Ensure we set an empty list if the const is scalar (zero dims)
tt.shape.dim.extend([])
for dim in shape:
tt.shape.dim.add().dim_value = dim
# Handle subgraphs
for node in graph.node:
for attr in node.attribute:
# Ref attrs refer to other attrs, so we don't need to do anything
if attr.ref_attr_name != "":
continue
if attr.type == onnx.AttributeProto.GRAPH:
add_const_value_infos_to_graph(attr.g)
if attr.type == onnx.AttributeProto.GRAPHS:
for g in attr.graphs:
add_const_value_infos_to_graph(g)
return add_const_value_infos_to_graph(model.graph)
def remove_initializer_from_input(model):
if model.ir_version < 4:
print(
'Model with ir_version below 4 requires to include initilizer in graph input'
)
return
inputs = model.graph.input
name_to_input = {}
for input in inputs:
name_to_input[input.name] = input
for initializer in model.graph.initializer:
if initializer.name in name_to_input:
inputs.remove(name_to_input[initializer.name])
return model
def main():
setMNIST()
state = 0
so = onnxruntime.SessionOptions()
so.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
state = torch.load(MODEL_PATH, map_location=DEVICE)
new_state_dict = OrderedDict()
for k, v in state['state_dict'].items():
head = k[:7]
if head == 'module.':
name = k[7:] # remove `module.`
else:
name = k
new_state_dict[name] = v
MODEL.load_state_dict(new_state_dict)
MODEL.eval()
MODEL.is_training = False
#torch.save(MODEL, "512.pt")
result, confidence, timeTaken = test.predictImage(MODEL, LABELS, IMG, TRANSFORM)
sys.stdout.write("Label: " + str(result) + " Confidence: " + str(confidence) + " Time taken: " + str(timeTaken) + "\n")
#x = torch.randn(BATCH_SIZE, 3, 112, 112, requires_grad=True)
# Export the model
torch.onnx.export(MODEL, # model being run
RAND_INPUT, # model input (or a tuple for multiple inputs)
str(MODEL.__class__.__name__)+".onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
#verbose=True, # output graph to console
opset_version=11, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
enable_onnx_checker=True,
input_names = ['input'], # the model's input names
output_names = ['output'], # the model's output names
dynamic_axes={'input' : {0 : 'batch_size'}, # variable lenght axes
'output' : {0 : 'batch_size'}})
onnx_model = onnx.load(str(MODEL.__class__.__name__)+".onnx")
onnx_model = remove_initializer_from_input(onnx_model)
#passes = ["extract_constant_to_initializer", "eliminate_unused_initializer"]
#add_value_info_for_constants(onnx_model)
#for init in onnx_model.graph.initializer:
# for value_info in onnx_model.graph.value_info:
# if init.name == value_info.name:
# onnx_model.graph.input.append(value_info)
#onnx_model = optim.optimize(onnx_model, passes)
onnx.save(onnx_model, str(MODEL.__class__.__name__)+".onnx")
# Check that the IR is well formed
onnx.checker.check_model(onnx_model)
# Print a human readable representation of the graph
graph = onnx.helper.printable_graph(onnx_model.graph)
with open("graph_output.txt", mode="w") as fout:
fout.write(graph)
ort_session = onnxruntime.InferenceSession(str(MODEL.__class__.__name__)+".onnx", so)
compareOnnxModelWithTorch(ort_session, to_numpy(RAND_INPUT), MODEL(RAND_INPUT))
#compareOnnxModelWithTorch(ort_session, onnxImageProcess(IMG, MONOCHROME), MODEL(torchImageProcess(IMG, TRANSFORM)))
res = to_numpy(MODEL(torchImageProcess(IMG, TRANSFORM)))
print("PyTorch output: " + str(res[0]))
runOnnxTest(ort_session, onnxImageProcess(IMG, MONOCHROME))
if __name__ == "__main__":
main()
Thank you for your time and help