PyTorch to ONNX export - ONNX Runtime inference output (Python) differs from PyTorch

Hi there, I tried to export a small pretrained (fashion MNIST) model to ONNX for test cases and evaluated the results. The outputs were completely differnt and I already tried different solutions which did not help to solve the problem.
I spent weeks on finding a solution to this error - may you please help me?

Here are the labels and a formatted output of the PyTorch model and the ONNX model which was run with ONNX Runtime

Labels: ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', ' Bag', 'Ankle boot']

PyTorch output: [14.139054 -1.463651 3.0200443 5.6304793 -3.21046 -6.91372 10.828293 -12.109244 -0.4888268 -7.0172133]
ONNX Runtime Output: [5.685347 4.6847196 -4.7160497 -11.215544 -14.1343975 28.269392 1.3876779 -3.9011323 1.518218 0.49619618]

Below you can find the unformatted output and the used files.

Unformatted output
Label: T-shirt Confidence: 0.9645941 Time taken: 0.07566659999999992
PyTorch output: [ 14.139054   -1.463651    3.0200443   5.6304793  -3.21046    -6.91372
  10.828293  -12.109244   -0.4888268  -7.0172133]
ONNX Runtime output: [  5.685347     4.6847196   -4.7160497  -11.215544   -14.1343975
  28.269392     1.3876779   -3.9011323    1.518218     0.49619618]
========================================
Final top prediction is: Sandal with confidence of: 1.0
========================================
========================================
Inference time: 0.99 ms
========================================
Export routine
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RAND_INPUT = torch.randn(BATCH_SIZE, 3, 112, 112, device=DEVICE)

torch.onnx.export(MODEL,                                            # model being run
                      RAND_INPUT,                                   # model input (or a tuple for multiple inputs)
                      str(MODEL.__class__.__name__)+".onnx",        # where to save the model (can be a file or file-like object)
                      export_params=True,                           # store the trained parameter weights inside the model file
                      #verbose=True,                                 # output graph to console
                      opset_version=11,                             # the ONNX version to export the model to
                      do_constant_folding=True,                     # whether to execute constant folding for optimization
                      enable_onnx_checker=True,
                      input_names = ['input'],                      # the model's input names
                      output_names = ['output'],                    # the model's output names
                      dynamic_axes={'input' : {0 : 'batch_size'},   # variable lenght axes
                                    'output' : {0 : 'batch_size'}})
Neural Network Model (mnist_model.py)
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
import numpy as np

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x
Testing routine (test.py)
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

def bulkPredictImages(model: nn.Module, classes: list, dataLoader: DataLoader):
    resultArray = []
    resultConfidenceArray = []
    totalTime = 0.0
    for images, _ in dataLoader:
        if torch.cuda.is_available():
            images = images.cuda()
        for image in images:
            image = image.unsqueeze_(0)
            result, confidence, timeTaken = predictImage(model, classes, image)
            resultArray.append(result)
            resultConfidenceArray.append(confidence)
            totalTime += timeTaken

    return resultArray, resultConfidenceArray, totalTime

def predictImage(model: nn.Module, classes: list, inputImage: Image, transform = None):
    timeStart = time.perf_counter() #in seconds
    model.eval()
    with torch.no_grad():
        if transform != None:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            inputImage = transform(inputImage).float()
            inputImage = inputImage.unsqueeze_(0)
            inputImage = inputImage.to(device)
        output = model(inputImage)
        idx = output.data.cpu().numpy().argmax()
        label = classes[idx]
        top_p, top_classes = F.softmax(output, dim=1).topk(1, dim = 1)
        confidence = top_p.data.cpu().numpy().max()

        timeStop = time.perf_counter() #in seconds
        return label, confidence, timeStop - timeStart

def main():
    pass

if __name__ == '__main__':
    main()
Converting and evaluation (PyTorchToOnnxConverter.py) (please have mercy for my coding style)

import io
import os
import sys
import numpy as np

import time

from torch import nn
import torch.onnx

import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torchvision import transforms
from collections import OrderedDict

import onnx
import onnxruntime
from onnx import optimizer as optim

from PIL import Image

import test
import mnist_model

MODEL = None

BATCH_SIZE = 1
MODEL_PATH = ""
TEST_IMAGE_PATH = ""
LABELS = []

IMG = None

MONOCHROME = False
INPUT_LAYOUT = ()

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

RAND_INPUT = None

TRANSFORM = None

def setMNIST():
    global MODEL
    global MODEL_PATH
    global TEST_IMAGE_PATH
    global LABELS
    global IMG
    global MONOCHROME
    global INPUT_LAYOUT
    global RAND_INPUT
    global TRANSFORM

    MODEL = mnist_model.Net()
    if torch.cuda.is_available():
        MODEL = MODEL.cuda()
    MODEL_PATH = "D:/Projekte/Masterthesis/DeepNerualNetworks/CustomFashionMNIST/best_model.pth"
    TEST_IMAGE_PATH = "D:/Projekte/Masterthesis/DeepNerualNetworks/PyTorchToOnnxConverter/tshirt.png"
    LABELS = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', ' Bag', 'Ankle boot']
    IMG = Image.open(TEST_IMAGE_PATH).convert('L')
    INPUT_LAYOUT = (1, 1, 28, 28)
    MONOCHROME = True
    RAND_INPUT = torch.randn(BATCH_SIZE, 1, 28, 28, device=DEVICE)
    TRANSFORM = transforms.Compose([
        transforms.Resize(size=28),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

def preprocess(input_data):
    # convert the input data into the float32 input
    img_data = input_data.astype('float32')

    #normalize
    mean_vec = np.array([0.485, 0.456, 0.406])
    stddev_vec = np.array([0.229, 0.224, 0.225])
    norm_img_data = np.zeros(img_data.shape).astype('float32')
    for i in range(img_data.shape[0]):
        norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
        
    #add batch channel
    norm_img_data = norm_img_data[np.newaxis, :, :, :]
    #norm_img_data = img_data[np.newaxis, :, :, :]
    norm_img_data = np.resize(norm_img_data, INPUT_LAYOUT).astype('float32')
    return norm_img_data

def softmax2(arr):
    sum = 0
    max = np.max(arr)
    print(max)
    for i in arr:
        print(i)
        sum += np.exp(i - max)    # subtract max in numr and denr to avoid overflow
 
    res = []
    for i in arr:
        res.append(np.exp(i - max) / sum)
    
    return res 

def softmax3(inp):
    top_p, top_classes = F.softmax(inp, dim=1).topk(1, dim = 1)
    confidence = top_p.data.cpu().numpy().max()
    return top_p, confidence

def softmax(x):
    sum = 0
    print("ONNX Runtime output: " + str(x[0][0]))
    x = x.reshape(-1)
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def postprocess(result):
    smRes = softmax(np.array(result)).tolist()
    return smRes

def onnxImageProcess(img, monochrome: bool = False):
    image_array = np.asarray(img)
    if monochrome:
        image_array = image_array[:, :, np.newaxis]
    image_array = image_array.transpose(2, 0, 1)
    
    return preprocess(image_array)

def runOnnxTest(ort_session, input_data):
    #image = Image.open(TEST_IMAGE_PATH)

    input_name = ort_session.get_inputs()[0].name 

    start = time.time()
    raw_result = ort_session.run(None, {input_name: input_data})
    end = time.time()
    res = postprocess(raw_result)
    #res = softmax2(raw_result)
    #softmax3(raw_result)

    inference_time = np.round((end - start) * 1000, 2)
    idx = np.argmax(res)

    print('========================================')
    print('Final top prediction is: ' + LABELS[idx] + " with confidence of: " + str(res[idx]))
    print('========================================')

    print('========================================')
    print('Inference time: ' + str(inference_time) + " ms")
    print('========================================')

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

def torchImageProcess(img, transform):
    inputImage = transform(img).float()
    inputImage = inputImage.unsqueeze_(0)
    return inputImage.to(DEVICE)

def compareOnnxModelWithTorch(session, onnxInput, torchOutput):
    ort_inputs = {session.get_inputs()[0].name: onnxInput}
    ort_outs = session.run(None, ort_inputs)

    # compare ONNX Runtime and PyTorch results
    np.testing.assert_allclose(to_numpy(torchOutput), ort_outs[0], rtol=1e-03, atol=1e-05)

def add_value_info_for_constants(model : onnx.ModelProto):
    """
    Currently onnx.shape_inference doesn't use the shape of initializers, so add
    that info explicitly as ValueInfoProtos.
    Mutates the model.
    Args:
        model: The ModelProto to update.
    """
    # All (top-level) constants will have ValueInfos before IRv4 as they are all inputs
    if model.ir_version < 4:
        return

    def add_const_value_infos_to_graph(graph : onnx.GraphProto):
        inputs = {i.name for i in graph.input}
        existing_info = {vi.name: vi for vi in graph.value_info}
        for init in graph.initializer:
            # Check it really is a constant, not an input
            if init.name in inputs:
                continue

            # The details we want to add
            elem_type = init.data_type
            shape = init.dims

            # Get existing or create new value info for this constant
            vi = existing_info.get(init.name)
            if vi is None:
                vi = graph.value_info.add()
                vi.name = init.name

            # Even though it would be weird, we will not overwrite info even if it doesn't match
            tt = vi.type.tensor_type
            if tt.elem_type == onnx.TensorProto.UNDEFINED:
                tt.elem_type = elem_type
            if not tt.HasField("shape"):
                # Ensure we set an empty list if the const is scalar (zero dims)
                tt.shape.dim.extend([])
                for dim in shape:
                    tt.shape.dim.add().dim_value = dim

        # Handle subgraphs
        for node in graph.node:
            for attr in node.attribute:
                # Ref attrs refer to other attrs, so we don't need to do anything
                if attr.ref_attr_name != "":
                    continue

                if attr.type == onnx.AttributeProto.GRAPH:
                    add_const_value_infos_to_graph(attr.g)
                if attr.type == onnx.AttributeProto.GRAPHS:
                    for g in attr.graphs:
                        add_const_value_infos_to_graph(g)


    return add_const_value_infos_to_graph(model.graph)

def remove_initializer_from_input(model):
    if model.ir_version < 4:
        print(
            'Model with ir_version below 4 requires to include initilizer in graph input'
        )
        return

    inputs = model.graph.input
    name_to_input = {}
    for input in inputs:
        name_to_input[input.name] = input

    for initializer in model.graph.initializer:
        if initializer.name in name_to_input:
            inputs.remove(name_to_input[initializer.name])

    return model

def main():
    setMNIST()
    state = 0
    so = onnxruntime.SessionOptions()
    so.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL

    state = torch.load(MODEL_PATH, map_location=DEVICE)

    new_state_dict = OrderedDict()
    for k, v in state['state_dict'].items():
        head = k[:7]
        if head == 'module.':
            name = k[7:]  # remove `module.`
        else:
            name = k
        new_state_dict[name] = v

    MODEL.load_state_dict(new_state_dict)

    MODEL.eval()
    MODEL.is_training = False

    #torch.save(MODEL, "512.pt")

    result, confidence, timeTaken = test.predictImage(MODEL, LABELS, IMG, TRANSFORM)

    sys.stdout.write("Label: " + str(result) + " Confidence: " + str(confidence) + " Time taken: " + str(timeTaken) + "\n")

    #x = torch.randn(BATCH_SIZE, 3, 112, 112, requires_grad=True)

    # Export the model
    torch.onnx.export(MODEL,                                        # model being run
                      RAND_INPUT,                                   # model input (or a tuple for multiple inputs)
                      str(MODEL.__class__.__name__)+".onnx",        # where to save the model (can be a file or file-like object)
                      export_params=True,                           # store the trained parameter weights inside the model file
                      #verbose=True,                                 # output graph to console
                      opset_version=11,                             # the ONNX version to export the model to
                      do_constant_folding=True,                     # whether to execute constant folding for optimization
                      enable_onnx_checker=True,
                      input_names = ['input'],                      # the model's input names
                      output_names = ['output'],                    # the model's output names
                      dynamic_axes={'input' : {0 : 'batch_size'},   # variable lenght axes
                                    'output' : {0 : 'batch_size'}})

    onnx_model = onnx.load(str(MODEL.__class__.__name__)+".onnx")

    onnx_model = remove_initializer_from_input(onnx_model)

    #passes = ["extract_constant_to_initializer", "eliminate_unused_initializer"]
    #add_value_info_for_constants(onnx_model)
    #for init in onnx_model.graph.initializer:
    #    for value_info in onnx_model.graph.value_info:
    #        if init.name == value_info.name:
    #            onnx_model.graph.input.append(value_info)
    #onnx_model = optim.optimize(onnx_model, passes)

    onnx.save(onnx_model, str(MODEL.__class__.__name__)+".onnx")
    # Check that the IR is well formed
    onnx.checker.check_model(onnx_model)

    # Print a human readable representation of the graph
    graph = onnx.helper.printable_graph(onnx_model.graph)

    with open("graph_output.txt", mode="w") as fout:
        fout.write(graph)

    ort_session = onnxruntime.InferenceSession(str(MODEL.__class__.__name__)+".onnx", so)
    compareOnnxModelWithTorch(ort_session, to_numpy(RAND_INPUT), MODEL(RAND_INPUT))
    #compareOnnxModelWithTorch(ort_session, onnxImageProcess(IMG, MONOCHROME), MODEL(torchImageProcess(IMG, TRANSFORM)))

    res = to_numpy(MODEL(torchImageProcess(IMG, TRANSFORM)))
    print("PyTorch output: " + str(res[0]))
    runOnnxTest(ort_session, onnxImageProcess(IMG, MONOCHROME))

if __name__ == "__main__":
    main()

Thank you for your time and help

Could you test the PyTorch and ONNX model with a constant input, e.g. torch.ones, and check if the result still differs?
If not, I guess the preprocessing of the input data might be different, which would also change the model outputs.

1 Like

Totally right and Thank You!

The preprocessing of the image was the problem because I did not do the same preprocessing steps for the ONNX inference.

  • The mistake was that I resized the images after scaling from [0, 255] to [0.0, 1.0]

Now the outputs are correct:

PyTorch output:
[ 5.6148922e-01 -1.7514462e+00  8.5258484e-04 -1.0577362e+00
 -1.6647207e+00  1.5879002e+00 -1.5137303e-01  3.6405781e-01
  1.8391986e+00  3.9515528e-01]
ONNX Runtime output:
[ 5.6148922e-01 -1.7514462e+00  8.5237622e-04 -1.0577362e+00
 -1.6647205e+00  1.5878999e+00 -1.5137285e-01  3.6405745e-01
  1.8391986e+00  3.9515522e-01]

Problem solved, thank you :slight_smile: