How to convert layer_norm layer to ONNX?

I’m trying to convert my model to ONNX format for further deployment in TensorRT. Here is a sample code to illustrate my problem in layer_norm here.

import torch
from torch import nn

class ExportModel(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        # n, c, h, w = x.shape
        # y = nn.functional.layer_norm(x, [c, h, w]])       # not working
        # y = nn.functional.layer_norm(x, x.size()[1:])     # not working
        y = nn.functional.layer_norm(x, [16, 32, 128])

        return y

def main():
    model = ExportModel()

    dummy_input = torch.randn(64, 16, 32, 128)
    input_names = [ "input" ]
    output_names = [ "output" ]

    with torch.no_grad():
        torch.onnx.export(
            model, dummy_input, "sample.onnx", verbose=True,
            input_names=input_names, output_names=output_names
        )
    return

if __name__ == '__main__':
    main()

It could only work when the parameter of layer_norm is constant number. If not, the following error will occur.


Traceback (most recent call last):
  File "sample.py", line 31, in <module>
    main()
  File "sample.py", line 26, in main
    verbose=True, input_names=input_names, output_names=output_names
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/__init__.py", line 148, in export
    strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/utils.py", line 66, in export
    dynamic_axes=dynamic_axes, keep_initializers_as_inputs=keep_initializers_as_inputs)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/utils.py", line 409, in _export
    fixed_batch_size=fixed_batch_size)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/utils.py", line 289, in _model_to_graph
    fixed_batch_size=fixed_batch_size)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/utils.py", line 132, in _optimize_graph
    graph = torch._C._jit_pass_onnx(graph, operator_export_type)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/__init__.py", line 179, in _run_symbolic_function
    return utils._run_symbolic_function(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/utils.py", line 647, in _run_symbolic_function
    return op_fn(g, *inputs, **attrs)
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/symbolic_helper.py", line 128, in wrapper
    args = [_parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors)]
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/symbolic_helper.py", line 128, in <listcomp>
    args = [_parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors)]
  File "/opt/conda/lib/python3.6/site-packages/torch/onnx/symbolic_helper.py", line 81, in _parse_arg
    "', since it's not constant, please try to make "
RuntimeError: Failed to export an ONNX attribute 'onnx::Gather', since it's not constant, please try to make things (e.g., kernel size) static if possible

I have few code blocks in my model have layer_norm op. It would turn into some ugly code if I explicitly mark all parameters constant number. Is there any “best practice” of how to use dynamic shape for this kind of use case?

Thanks in advance.