Error while converting pytorch model which uses point cloud input to ONNX

The following is my code:

import torch
import torch.onnx
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.onnx as torch_onnx
from torch import FloatTensor
import numpy as np
from typing import Tuple, Callable, Optional

Internal Modules

#from util_funcs import UFloatTensor, ULongTensor
#from util_layers import Conv, SepConv, Dense, EndChannels
from pointnet import TransformationNet,BasePointNet,ClassificationPointNet

A model class instance (class not shown)

model = ClassificationPointNet(10,0.3,3)

torch.save(model.state_dict(), ‘/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/Lidarmnist/PointMNISTDataset/processed/training.pth’)
#torch.save(model, ‘/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/Lidarmnist/PointMNISTDataset/processed/training.pth’)

Load the weights from a file (.pth usually)

state_dict = torch.load(’/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/Lidarmnist/PointMNISTDataset/processed/training.pth’)

Load the weights now into a model net architecture defined by our class

model.load_state_dict(state_dict)

Create the right input shape (e.g. for an image)

dummy_input = torch.randn(64,1,3)
torch.onnx.export(model, dummy_input, “onnx_model_name.onnx”)

I get the following error while running the code:

File "Conversion_ONNX.py", line 32, in <module>
    torch.onnx.export(model, dummy_input, "onnx_model_name.onnx")
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/__init__.py", line 27, in export
    return utils.export(*args, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 111, in export
    _retain_param_name=_retain_param_name)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 313, in _export
    _retain_param_name)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 237, in _model_to_graph
    graph, torch_out = _trace_and_get_graph_from_model(model, args, training)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 204, in _trace_and_get_graph_from_model
    trace, torch_out = torch.jit.get_trace_graph(model, args, _force_outplace=True)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 219, in get_trace_graph
    return LegacyTracedModule(f, _force_outplace, return_inputs)(*args, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 491, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 276, in forward
    out = self.inner(*trace_inputs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/model/pointnet.py", line 113, in forward
    x, feature_transform = self.base_pointnet(x)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/model/pointnet.py", line 70, in forward
    input_transform = self.input_transform(x)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/model/pointnet.py", line 33, in forward
    x = nn.MaxPool1d(num_points)(x)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/pooling.py", line 77, in forward
    self.return_indices)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/_jit_internal.py", line 133, in fn
    return if_false(*args, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 461, in _max_pool1d
    input, kernel_size, stride, padding, dilation, ceil_mode)[0]
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 453, in max_pool1d_with_indices
    input, kernel_size, stride, padding, dilation, ceil_mode)
TypeError: max_pool1d_with_indices(): argument 'kernel_size' (position 2) must be tuple of ints, not Tensor

Please assist me in solving this issue.

Best Regards,
Vijay

Hi Vijay, what’s your forward method’s signature?

Hello Kamil,
Please find the complete code given below:

import torch
import torch.nn as nn
import torch.nn.functional as F

class TransformationNet(nn.Module):

def __init__(self, input_dim, output_dim):
    super(TransformationNet, self).__init__()
    self.output_dim = output_dim

    self.conv_1 = nn.Conv1d(input_dim, 64, 1)
    self.conv_2 = nn.Conv1d(64, 128, 1)
    self.conv_3 = nn.Conv1d(128, 1024, 1)

    self.bn_1 = nn.BatchNorm1d(64)
    self.bn_2 = nn.BatchNorm1d(128)
    self.bn_3 = nn.BatchNorm1d(1024)
    self.bn_4 = nn.BatchNorm1d(512)
    self.bn_5 = nn.BatchNorm1d(256)

    self.fc_1 = nn.Linear(1024, 512)
    self.fc_2 = nn.Linear(512, 256)
    self.fc_3 = nn.Linear(256, self.output_dim*self.output_dim)

def forward(self, x):
    num_points = x.shape[1]
    x = x.transpose(2, 1)
    x = F.relu(self.bn_1(self.conv_1(x)))
    x = F.relu(self.bn_2(self.conv_2(x)))
    x = F.relu(self.bn_3(self.conv_3(x)))

    x = nn.MaxPool1d(num_points)(x)
    x = x.view(-1, 1024)

    x = F.relu(self.bn_4(self.fc_1(x)))
    x = F.relu(self.bn_5(self.fc_2(x)))
    x = self.fc_3(x)

    identity_matrix = torch.eye(self.output_dim)
    if torch.cuda.is_available():
        identity_matrix = identity_matrix.cuda()
    x = x.view(-1, self.output_dim, self.output_dim) + identity_matrix
    return x

class BasePointNet(nn.Module):

def __init__(self, point_dimension, return_local_features=False):
    super(BasePointNet, self).__init__()
    self.return_local_features = return_local_features
    self.input_transform = TransformationNet(input_dim=point_dimension, output_dim=point_dimension)
    self.feature_transform = TransformationNet(input_dim=64, output_dim=64)

    self.conv_1 = nn.Conv1d(point_dimension, 64, 1)
    self.conv_2 = nn.Conv1d(64, 64, 1)
    self.conv_3 = nn.Conv1d(64, 64, 1)
    self.conv_4 = nn.Conv1d(64, 128, 1)
    self.conv_5 = nn.Conv1d(128, 1024, 1)

    self.bn_1 = nn.BatchNorm1d(64)
    self.bn_2 = nn.BatchNorm1d(64)
    self.bn_3 = nn.BatchNorm1d(64)
    self.bn_4 = nn.BatchNorm1d(128)
    self.bn_5 = nn.BatchNorm1d(1024)

def forward(self, x):
    num_points = x.shape[1]

    input_transform = self.input_transform(x)

    x = torch.bmm(x, input_transform)
    x = x.transpose(2, 1)
    x = F.relu(self.bn_1(self.conv_1(x)))
    x = F.relu(self.bn_2(self.conv_2(x)))
    x = x.transpose(2, 1)

    feature_transform = self.feature_transform(x)

    x = torch.bmm(x, feature_transform)
    local_point_features = x

    x = x.transpose(2, 1)
    x = F.relu(self.bn_3(self.conv_3(x)))
    x = F.relu(self.bn_4(self.conv_4(x)))
    x = F.relu(self.bn_5(self.conv_5(x)))
    x = nn.MaxPool1d(num_points)(x)
    x = x.view(-1, 1024)

    if self.return_local_features:
        x = x.view(-1, 1024, 1).repeat(1, 1, num_points)
        return torch.cat([x.transpose(2, 1), local_point_features], 2), feature_transform
    else:
        return x, feature_transform

class ClassificationPointNet(nn.Module):

def __init__(self, num_classes, dropout=0.3, point_dimension=3):
    super(ClassificationPointNet, self).__init__()
    self.base_pointnet = BasePointNet(return_local_features=False, point_dimension=point_dimension)

    self.fc_1 = nn.Linear(1024, 512)
    self.fc_2 = nn.Linear(512, 256)
    self.fc_3 = nn.Linear(256, num_classes)

    self.bn_1 = nn.BatchNorm1d(512)
    self.bn_2 = nn.BatchNorm1d(256)

    self.dropout_1 = nn.Dropout(dropout)

def forward(self, x):
    x, feature_transform = self.base_pointnet(x)

    x = F.relu(self.bn_1(self.fc_1(x)))
    x = F.relu(self.bn_2(self.fc_2(x)))
    x = self.dropout_1(x)

    return F.log_softmax(self.fc_3(x), dim=1), feature_transform

Best Regards,
Vijay

It looks related to the max pools being instantiated in forward. I would suggest instantiating them in the __init__ and then using them in forward to see if that address the issue with the export to ONNX.

Hello Kamil,
The max pools are dependent on num_points which in turn depends on x.shape[1].

I wrote something like this in the init

self.maxpool = nn.MaxPool1d(1)

and used it in forward like the following:
self.maxpool = nn.MaxPool1d(num_points)
x = self.maxpool(x)

Would this work? Is it in line with your suggestion.

Best Regards,
Vijay

Hello Kamil,
I got the same error with the above code(instantiation in init).Please tell me if I am doing anything wrong.

Best Regards,
Vijay

Yes, the export should work then. But, it will be for a kernel with fixed size. If you want it to be dynamic, you’ll have to consider another approach for downsampling and make sure it is supported for ONNX export.

It worked for me. Did you do it for both classes?

Thank you for pointing it out.I did it for one class.Forgot to do it for the other.

Best Regards,
Vijay

Hello Kamil,
I am getting the same error after doing the change in both the classes.Am I doing anything wrong?Can you please share your code change.

Best Regards,
Vijay

Here is it:

import torch
import torch.nn as nn
import torch.nn.functional as F


class TransformationNet(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(TransformationNet, self).__init__()
        self.output_dim = output_dim

        self.conv_1 = nn.Conv1d(input_dim, 64, 1)
        self.conv_2 = nn.Conv1d(64, 128, 1)
        self.conv_3 = nn.Conv1d(128, 1024, 1)

        self.bn_1 = nn.BatchNorm1d(64)
        self.bn_2 = nn.BatchNorm1d(128)
        self.bn_3 = nn.BatchNorm1d(1024)
        self.bn_4 = nn.BatchNorm1d(512)
        self.bn_5 = nn.BatchNorm1d(256)

        self.fc_1 = nn.Linear(1024, 512)
        self.fc_2 = nn.Linear(512, 256)
        self.fc_3 = nn.Linear(256, self.output_dim * self.output_dim)

        self.max_pool = nn.MaxPool1d(2)

    def forward(self, x):
        x = x.transpose(2, 1)
        x = F.relu(self.bn_1(self.conv_1(x)))
        x = F.relu(self.bn_2(self.conv_2(x)))
        x = F.relu(self.bn_3(self.conv_3(x)))
        x = self.max_pool(x)

        x = x.view(-1, 1024)

        x = F.relu(self.bn_4(self.fc_1(x)))
        x = F.relu(self.bn_5(self.fc_2(x)))
        x = self.fc_3(x)

        identity_matrix = torch.eye(self.output_dim)
        if torch.cuda.is_available():
            identity_matrix = identity_matrix.cuda()
        x = x.view(-1, self.output_dim, self.output_dim) + identity_matrix
        return x


class BasePointNet(nn.Module):
    def __init__(self, point_dimension, return_local_features=False):
        super(BasePointNet, self).__init__()
        self.return_local_features = return_local_features
        self.input_transform = TransformationNet(input_dim=point_dimension, output_dim=point_dimension)
        self.feature_transform = TransformationNet(input_dim=64, output_dim=64)

        self.conv_1 = nn.Conv1d(point_dimension, 64, 1)
        self.conv_2 = nn.Conv1d(64, 64, 1)
        self.conv_3 = nn.Conv1d(64, 64, 1)
        self.conv_4 = nn.Conv1d(64, 128, 1)
        self.conv_5 = nn.Conv1d(128, 1024, 1)

        self.bn_1 = nn.BatchNorm1d(64)
        self.bn_2 = nn.BatchNorm1d(64)
        self.bn_3 = nn.BatchNorm1d(64)
        self.bn_4 = nn.BatchNorm1d(128)
        self.bn_5 = nn.BatchNorm1d(1024)

        self.max_pool = nn.MaxPool1d(2)

    def forward(self, x):
        num_points = x.shape[1]

        input_transform = self.input_transform(x)

        x = torch.bmm(x, input_transform)
        x = x.transpose(2, 1)
        x = F.relu(self.bn_1(self.conv_1(x)))
        x = F.relu(self.bn_2(self.conv_2(x)))
        x = x.transpose(2, 1)

        feature_transform = self.feature_transform(x)

        x = torch.bmm(x, feature_transform)
        local_point_features = x

        x = x.transpose(2, 1)
        x = F.relu(self.bn_3(self.conv_3(x)))
        x = F.relu(self.bn_4(self.conv_4(x)))
        x = F.relu(self.bn_5(self.conv_5(x)))
        x = self.max_pool(x)

        x = x.view(-1, 1024)

        if self.return_local_features:
            x = x.view(-1, 1024, 1).repeat(1, 1, num_points)
            return torch.cat([x.transpose(2, 1), local_point_features], 2), feature_transform
        else:
            return x, feature_transform


class ClassificationPointNet(nn.Module):

    def __init__(self, num_classes, dropout=0.3, point_dimension=3):
        super(ClassificationPointNet, self).__init__()
        self.base_pointnet = BasePointNet(return_local_features=False, point_dimension=point_dimension)

        self.fc_1 = nn.Linear(1024, 512)
        self.fc_2 = nn.Linear(512, 256)
        self.fc_3 = nn.Linear(256, num_classes)

        self.bn_1 = nn.BatchNorm1d(512)
        self.bn_2 = nn.BatchNorm1d(256)

        self.dropout_1 = nn.Dropout(dropout)

    def forward(self, x):
        x, feature_transform = self.base_pointnet(x)

        x = F.relu(self.bn_1(self.fc_1(x)))
        x = F.relu(self.bn_2(self.fc_2(x)))
        x = self.dropout_1(x)

        return F.log_softmax(self.fc_3(x), dim=1), feature_transform


def test():
    model = ClassificationPointNet(10, 0.3, 3)
    dummy_input = torch.randn(64, 2, 3)
    x = model(dummy_input)

    torch.save(model.state_dict(), 'file.pth')
    state_dict = torch.load('file.pth')
    model.load_state_dict(state_dict)

    dummy_input = torch.randn(64, 2, 3)
    torch.onnx.export(model, dummy_input, 'file.onnx')

if __name__ == '__main__':
    test()

Hello Kamil,
Thank you for the code.The error went away.But I get a runtime error which is as follows:

RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor

Please assist me in solving this.

Best Regards,
Vijay

Make sure your model and tensors are on the device you want to use (with .to(device)). I was on device='cpu'.