Error while converting pytorch model which uses point cloud input to ONNX

vjkrishn · March 5, 2020, 7:25pm

The following is my code:

import torch
import torch.onnx
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.onnx as torch_onnx
from torch import FloatTensor
import numpy as np
from typing import Tuple, Callable, Optional

Internal Modules

#from util_funcs import UFloatTensor, ULongTensor
#from util_layers import Conv, SepConv, Dense, EndChannels
from pointnet import TransformationNet,BasePointNet,ClassificationPointNet

A model class instance (class not shown)

model = ClassificationPointNet(10,0.3,3)

torch.save(model.state_dict(), ‘/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/Lidarmnist/PointMNISTDataset/processed/training.pth’)
#torch.save(model, ‘/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/Lidarmnist/PointMNISTDataset/processed/training.pth’)

Load the weights from a file (.pth usually)

state_dict = torch.load(’/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/Lidarmnist/PointMNISTDataset/processed/training.pth’)

Load the weights now into a model net architecture defined by our class

model.load_state_dict(state_dict)

Create the right input shape (e.g. for an image)

dummy_input = torch.randn(64,1,3)
torch.onnx.export(model, dummy_input, “onnx_model_name.onnx”)

I get the following error while running the code:

File "Conversion_ONNX.py", line 32, in <module>
    torch.onnx.export(model, dummy_input, "onnx_model_name.onnx")
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/__init__.py", line 27, in export
    return utils.export(*args, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 111, in export
    _retain_param_name=_retain_param_name)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 313, in _export
    _retain_param_name)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 237, in _model_to_graph
    graph, torch_out = _trace_and_get_graph_from_model(model, args, training)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 204, in _trace_and_get_graph_from_model
    trace, torch_out = torch.jit.get_trace_graph(model, args, _force_outplace=True)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 219, in get_trace_graph
    return LegacyTracedModule(f, _force_outplace, return_inputs)(*args, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 491, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 276, in forward
    out = self.inner(*trace_inputs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/model/pointnet.py", line 113, in forward
    x, feature_transform = self.base_pointnet(x)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/model/pointnet.py", line 70, in forward
    input_transform = self.input_transform(x)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/Documents/Vijay/Lidar/pytorch_pointnet/model/pointnet.py", line 33, in forward
    x = nn.MaxPool1d(num_points)(x)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self._slow_forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/pooling.py", line 77, in forward
    self.return_indices)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/_jit_internal.py", line 133, in fn
    return if_false(*args, **kwargs)
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 461, in _max_pool1d
    input, kernel_size, stride, padding, dilation, ceil_mode)[0]
  File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 453, in max_pool1d_with_indices
    input, kernel_size, stride, padding, dilation, ceil_mode)
TypeError: max_pool1d_with_indices(): argument 'kernel_size' (position 2) must be tuple of ints, not Tensor

Please assist me in solving this issue.

Best Regards,
Vijay

kwojcicki · March 5, 2020, 7:30pm

Hi Vijay, what’s your forward method’s signature?

vjkrishn · March 5, 2020, 7:33pm

Hello Kamil,
Please find the complete code given below:

import torch
import torch.nn as nn
import torch.nn.functional as F

class TransformationNet(nn.Module):

def __init__(self, input_dim, output_dim):
    super(TransformationNet, self).__init__()
    self.output_dim = output_dim

    self.conv_1 = nn.Conv1d(input_dim, 64, 1)
    self.conv_2 = nn.Conv1d(64, 128, 1)
    self.conv_3 = nn.Conv1d(128, 1024, 1)

    self.bn_1 = nn.BatchNorm1d(64)
    self.bn_2 = nn.BatchNorm1d(128)
    self.bn_3 = nn.BatchNorm1d(1024)
    self.bn_4 = nn.BatchNorm1d(512)
    self.bn_5 = nn.BatchNorm1d(256)

    self.fc_1 = nn.Linear(1024, 512)
    self.fc_2 = nn.Linear(512, 256)
    self.fc_3 = nn.Linear(256, self.output_dim*self.output_dim)

def forward(self, x):
    num_points = x.shape[1]
    x = x.transpose(2, 1)
    x = F.relu(self.bn_1(self.conv_1(x)))
    x = F.relu(self.bn_2(self.conv_2(x)))
    x = F.relu(self.bn_3(self.conv_3(x)))

    x = nn.MaxPool1d(num_points)(x)
    x = x.view(-1, 1024)

    x = F.relu(self.bn_4(self.fc_1(x)))
    x = F.relu(self.bn_5(self.fc_2(x)))
    x = self.fc_3(x)

    identity_matrix = torch.eye(self.output_dim)
    if torch.cuda.is_available():
        identity_matrix = identity_matrix.cuda()
    x = x.view(-1, self.output_dim, self.output_dim) + identity_matrix
    return x

class BasePointNet(nn.Module):

def __init__(self, point_dimension, return_local_features=False):
    super(BasePointNet, self).__init__()
    self.return_local_features = return_local_features
    self.input_transform = TransformationNet(input_dim=point_dimension, output_dim=point_dimension)
    self.feature_transform = TransformationNet(input_dim=64, output_dim=64)

    self.conv_1 = nn.Conv1d(point_dimension, 64, 1)
    self.conv_2 = nn.Conv1d(64, 64, 1)
    self.conv_3 = nn.Conv1d(64, 64, 1)
    self.conv_4 = nn.Conv1d(64, 128, 1)
    self.conv_5 = nn.Conv1d(128, 1024, 1)

    self.bn_1 = nn.BatchNorm1d(64)
    self.bn_2 = nn.BatchNorm1d(64)
    self.bn_3 = nn.BatchNorm1d(64)
    self.bn_4 = nn.BatchNorm1d(128)
    self.bn_5 = nn.BatchNorm1d(1024)

def forward(self, x):
    num_points = x.shape[1]

    input_transform = self.input_transform(x)

    x = torch.bmm(x, input_transform)
    x = x.transpose(2, 1)
    x = F.relu(self.bn_1(self.conv_1(x)))
    x = F.relu(self.bn_2(self.conv_2(x)))
    x = x.transpose(2, 1)

    feature_transform = self.feature_transform(x)

    x = torch.bmm(x, feature_transform)
    local_point_features = x

    x = x.transpose(2, 1)
    x = F.relu(self.bn_3(self.conv_3(x)))
    x = F.relu(self.bn_4(self.conv_4(x)))
    x = F.relu(self.bn_5(self.conv_5(x)))
    x = nn.MaxPool1d(num_points)(x)
    x = x.view(-1, 1024)

    if self.return_local_features:
        x = x.view(-1, 1024, 1).repeat(1, 1, num_points)
        return torch.cat([x.transpose(2, 1), local_point_features], 2), feature_transform
    else:
        return x, feature_transform

class ClassificationPointNet(nn.Module):

def __init__(self, num_classes, dropout=0.3, point_dimension=3):
    super(ClassificationPointNet, self).__init__()
    self.base_pointnet = BasePointNet(return_local_features=False, point_dimension=point_dimension)

    self.fc_1 = nn.Linear(1024, 512)
    self.fc_2 = nn.Linear(512, 256)
    self.fc_3 = nn.Linear(256, num_classes)

    self.bn_1 = nn.BatchNorm1d(512)
    self.bn_2 = nn.BatchNorm1d(256)

    self.dropout_1 = nn.Dropout(dropout)

def forward(self, x):
    x, feature_transform = self.base_pointnet(x)

    x = F.relu(self.bn_1(self.fc_1(x)))
    x = F.relu(self.bn_2(self.fc_2(x)))
    x = self.dropout_1(x)

    return F.log_softmax(self.fc_3(x), dim=1), feature_transform

Best Regards,
Vijay

kwojcicki · March 5, 2020, 8:10pm

It looks related to the max pools being instantiated in forward. I would suggest instantiating them in the __init__ and then using them in forward to see if that address the issue with the export to ONNX.

vjkrishn · March 5, 2020, 8:26pm

Hello Kamil,
The max pools are dependent on num_points which in turn depends on x.shape[1].

I wrote something like this in the init

self.maxpool = nn.MaxPool1d(1)

and used it in forward like the following:
self.maxpool = nn.MaxPool1d(num_points)
x = self.maxpool(x)

Would this work? Is it in line with your suggestion.

Best Regards,
Vijay

vjkrishn · March 5, 2020, 8:42pm

Hello Kamil,
I got the same error with the above code(instantiation in init).Please tell me if I am doing anything wrong.

Best Regards,
Vijay

kwojcicki · March 5, 2020, 8:57pm

Yes, the export should work then. But, it will be for a kernel with fixed size. If you want it to be dynamic, you’ll have to consider another approach for downsampling and make sure it is supported for ONNX export.

kwojcicki · March 5, 2020, 8:59pm

It worked for me. Did you do it for both classes?

vjkrishn · March 5, 2020, 8:59pm

Thank you for pointing it out.I did it for one class.Forgot to do it for the other.

Best Regards,
Vijay

vjkrishn · March 5, 2020, 9:12pm

Hello Kamil,
I am getting the same error after doing the change in both the classes.Am I doing anything wrong?Can you please share your code change.

Best Regards,
Vijay

kwojcicki · March 5, 2020, 9:27pm

Here is it:

import torch
import torch.nn as nn
import torch.nn.functional as F


class TransformationNet(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(TransformationNet, self).__init__()
        self.output_dim = output_dim

        self.conv_1 = nn.Conv1d(input_dim, 64, 1)
        self.conv_2 = nn.Conv1d(64, 128, 1)
        self.conv_3 = nn.Conv1d(128, 1024, 1)

        self.bn_1 = nn.BatchNorm1d(64)
        self.bn_2 = nn.BatchNorm1d(128)
        self.bn_3 = nn.BatchNorm1d(1024)
        self.bn_4 = nn.BatchNorm1d(512)
        self.bn_5 = nn.BatchNorm1d(256)

        self.fc_1 = nn.Linear(1024, 512)
        self.fc_2 = nn.Linear(512, 256)
        self.fc_3 = nn.Linear(256, self.output_dim * self.output_dim)

        self.max_pool = nn.MaxPool1d(2)

    def forward(self, x):
        x = x.transpose(2, 1)
        x = F.relu(self.bn_1(self.conv_1(x)))
        x = F.relu(self.bn_2(self.conv_2(x)))
        x = F.relu(self.bn_3(self.conv_3(x)))
        x = self.max_pool(x)

        x = x.view(-1, 1024)

        x = F.relu(self.bn_4(self.fc_1(x)))
        x = F.relu(self.bn_5(self.fc_2(x)))
        x = self.fc_3(x)

        identity_matrix = torch.eye(self.output_dim)
        if torch.cuda.is_available():
            identity_matrix = identity_matrix.cuda()
        x = x.view(-1, self.output_dim, self.output_dim) + identity_matrix
        return x


class BasePointNet(nn.Module):
    def __init__(self, point_dimension, return_local_features=False):
        super(BasePointNet, self).__init__()
        self.return_local_features = return_local_features
        self.input_transform = TransformationNet(input_dim=point_dimension, output_dim=point_dimension)
        self.feature_transform = TransformationNet(input_dim=64, output_dim=64)

        self.conv_1 = nn.Conv1d(point_dimension, 64, 1)
        self.conv_2 = nn.Conv1d(64, 64, 1)
        self.conv_3 = nn.Conv1d(64, 64, 1)
        self.conv_4 = nn.Conv1d(64, 128, 1)
        self.conv_5 = nn.Conv1d(128, 1024, 1)

        self.bn_1 = nn.BatchNorm1d(64)
        self.bn_2 = nn.BatchNorm1d(64)
        self.bn_3 = nn.BatchNorm1d(64)
        self.bn_4 = nn.BatchNorm1d(128)
        self.bn_5 = nn.BatchNorm1d(1024)

        self.max_pool = nn.MaxPool1d(2)

    def forward(self, x):
        num_points = x.shape[1]

        input_transform = self.input_transform(x)

        x = torch.bmm(x, input_transform)
        x = x.transpose(2, 1)
        x = F.relu(self.bn_1(self.conv_1(x)))
        x = F.relu(self.bn_2(self.conv_2(x)))
        x = x.transpose(2, 1)

        feature_transform = self.feature_transform(x)

        x = torch.bmm(x, feature_transform)
        local_point_features = x

        x = x.transpose(2, 1)
        x = F.relu(self.bn_3(self.conv_3(x)))
        x = F.relu(self.bn_4(self.conv_4(x)))
        x = F.relu(self.bn_5(self.conv_5(x)))
        x = self.max_pool(x)

        x = x.view(-1, 1024)

        if self.return_local_features:
            x = x.view(-1, 1024, 1).repeat(1, 1, num_points)
            return torch.cat([x.transpose(2, 1), local_point_features], 2), feature_transform
        else:
            return x, feature_transform


class ClassificationPointNet(nn.Module):

    def __init__(self, num_classes, dropout=0.3, point_dimension=3):
        super(ClassificationPointNet, self).__init__()
        self.base_pointnet = BasePointNet(return_local_features=False, point_dimension=point_dimension)

        self.fc_1 = nn.Linear(1024, 512)
        self.fc_2 = nn.Linear(512, 256)
        self.fc_3 = nn.Linear(256, num_classes)

        self.bn_1 = nn.BatchNorm1d(512)
        self.bn_2 = nn.BatchNorm1d(256)

        self.dropout_1 = nn.Dropout(dropout)

    def forward(self, x):
        x, feature_transform = self.base_pointnet(x)

        x = F.relu(self.bn_1(self.fc_1(x)))
        x = F.relu(self.bn_2(self.fc_2(x)))
        x = self.dropout_1(x)

        return F.log_softmax(self.fc_3(x), dim=1), feature_transform


def test():
    model = ClassificationPointNet(10, 0.3, 3)
    dummy_input = torch.randn(64, 2, 3)
    x = model(dummy_input)

    torch.save(model.state_dict(), 'file.pth')
    state_dict = torch.load('file.pth')
    model.load_state_dict(state_dict)

    dummy_input = torch.randn(64, 2, 3)
    torch.onnx.export(model, dummy_input, 'file.onnx')

if __name__ == '__main__':
    test()

vjkrishn · March 5, 2020, 9:35pm

Hello Kamil,
Thank you for the code.The error went away.But I get a runtime error which is as follows:

RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor

Please assist me in solving this.

Best Regards,
Vijay

kwojcicki · March 5, 2020, 9:40pm

Make sure your model and tensors are on the device you want to use (with .to(device)). I was on device='cpu'.