Hi, I want to try mixed precision at inference time with onnx models. It seems, that there is a problem with the batchnorm layers:
import torch
from torch import nn
import onnx
import onnxruntime as ort
class TestModel(nn.Module):
def __init__(self, do_bn=False):
super().__init__()
self.conv = nn.Conv2d(3, 1, 3)
self.bn = nn.BatchNorm2d(1)
self.do_bn = do_bn
return
def forward(self, x):
with torch.cuda.amp.autocast(True):
x = self.conv(x)
if self.do_bn:
x = self.bn(x)
return x
def test(do_bn):
model = TestModel(do_bn=do_bn).cuda()
i = torch.randn(2, 3, 50, 50).cuda()
torch.onnx.export(model.cuda(), (i,), 'test.onnx', input_names=['input'])
onnx.checker.check_model("test.onnx")
s = ort.InferenceSession("test.onnx", providers=['CUDAExecutionProvider'])
s.run(None, {'input': i.cpu().numpy().astype('float32')})[0]
return
test(False) # works
test(True) # Fail: [ONNXRuntimeError] : 1 : FAIL : Load model from test.onnx failed:Type Error: Type parameter (T) of Optype (BatchNormalization) bound to different types (tensor(float16) and tensor(float) in node (BatchNormalization_2).
I’m using
torch==1.11.0+cu113
onnxruntime-gpu==1.10.0
Is this a bug, or is there a better way to do this?
Thanks in advance