Error returning c++ tensor to pytorch

alfie-nsugh · October 26, 2023, 1:08pm

I have been trying to get a libtorch tensor to return so that I can use it in pytorch but it keeps failing.

#include <iostream>
//#include <cuda_runtime.h>
//#include <torch/torch.h>
//#include <ATen/ATen.h>
#include <pybind11/pybind11.h>
#include <torch/extension.h>

struct MyData {
    float x, y, z, w;
};

struct MyMat4{
	MyData rows[4];
};

namespace py = pybind11;

void create_mat(MyMat4& mat);
torch::Tensor create_tensor(){
    //MyMat4 mat;
    //create_mat(mat);

    ////move data to device
    //MyMat4 *d_mat;
    //cudaMalloc((void **)&d_mat, sizeof(MyMat4));
    //cudaMemcpy(d_mat, &mat, sizeof(MyMat4), cudaMemcpyHostToDevice);

    //torch::TensorOptions options_cuda = torch::TensorOptions().dtype(torch::kFloat31).device(torch::kCUDA, 0);
    torch::TensorOptions options_cpu = torch::TensorOptions().dtype(torch::kFloat32);

    ////torch::Tensor tensor_cuda = torch::from_blob(d_mat, {4, 4}, options_cuda);
    //torch::Tensor tensor_cpu = torch::from_blob(mat.rows, {4, 4},options_cpu);

    ////std::cout << "tensor_cuda: " << std::endl << tensor_cuda << std::endl;

    std::cout << "libtorch version: " << TORCH_VERSION << std::endl;
    //return tensor_cpu;
    return torch::zeros({4,4}, options_cpu);
    //std::cout << "tensor_zerps: " << std::endl << torch::zeros({4,4}) << std::endl;
    //return 0;
}


void create_mat(MyMat4& mat){

    mat.rows[0].x = 1.0f;
    mat.rows[0].y = 2.0f;
    mat.rows[0].z = 3.0f;
    mat.rows[0].w = 4.0f;

    mat.rows[1].x = 5.0f;
    mat.rows[1].y = 6.0f;
    mat.rows[1].z = 7.0f;
    mat.rows[1].w = 8.0f;

    mat.rows[2].x = 9.0f;
    mat.rows[2].y = 10.0f;
    mat.rows[2].z = 11.0f;
    mat.rows[2].w = 12.0f;

    mat.rows[3].x = 13.0f;
    mat.rows[3].y = 14.0f;
    mat.rows[3].z = 15.0f;
    mat.rows[3].w = 16.0f;
}


PYBIND11_MODULE(tensor_export, m) {
    m.doc() = "pybind11 demo plugin"; // optional module docstring

    m.def("create_tensor", &create_tensor, "A function that creates a tensor from a struct");

    //py::class_<MyMat4>(m, "PyMyMat4")
        //.def(py::init<>())
        //.def_readwrite("rows", &MyMat4::rows);
}

When I modify the function to return an integer and return zero and just print the torch version it works. But if I return a tensor it fails.
Giving me:

In [1]: import torch

In [2]: import tensor_export
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-9aca188a181e> in <module>
----> 1 import tensor_export

ImportError: /home/avi/demos/cuda_torch_pybind_demo/build/tensor_export.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN8pybind116detail11type_casterIN2at6TensorEvE4castERKS3_NS_19return_value_policyENS_6handleE

I tried the pytorch version using pip and libtorch version that is on the website.
I also tried compiling v2.1.0 from source using python setup.py install, and python setup.py build
And I also tried compiling v2.0.1 from source using python setup.py install, and python setup.py build
This is my cmake:

cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(tensor_export LANGUAGES CXX CUDA)
enable_language(CUDA)
add_subdirectory(pybind11)

#set(CMAKE_PREFIX_PATH "/home/avi/libtorch")
set(CMAKE_PREFIX_PATH "/home/avi/pytorch/torch")
find_package(Torch REQUIRED)
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

find_package(Python3 COMPONENTS Development)
include_directories(${Python3_INCLUDE_DIRS})

pybind11_add_module(tensor_export MODULE libtorchPytorch.cpp)
# Link the PyTorch libraries and include directories
target_link_libraries(tensor_export PRIVATE ${TORCH_LIBRARIES})
target_include_directories(tensor_export PRIVATE ${TORCH_INCLUDE_DIRS})
#set_property(TARGET tensor_export PROPERTY CXX_STANDARD 17)


# The following code block is suggested to be used on Windows.
# According to https://github.com/pytorch/pytorch/issues/25457,
# the DLLs need to be copied to avoid memory errors.
if (MSVC)
  file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
  add_custom_command(TARGET tensor_export
                     POST_BUILD
                     COMMAND ${CMAKE_COMMAND} -E copy_if_different
                     ${TORCH_DLLS}
                     $<TARGET_FILE_DIR:tensor_export>)
endif (MSVC)

I also compiled magma (magma-2.7.2) myself instead of using the conda environment. And I am using cudatoolkit 11.8 on WSL2.

ptrblck · October 26, 2023, 6:48pm

I don’t know what pybind11_add_module does but the unresolved symbol is:

pybind11::detail::type_caster<at::Tensor, void>::cast(at::Tensor const&, pybind11::return_value_policy, pybind11::handle)

Maybe you would need to add pybind11 to target_link_libraries.

alfie-nsugh · October 27, 2023, 9:12am

Thanks for the advice, I tried it but unfortunately it didn’t work. However, when using the setup.py:

from setuptools import setup, Extension
from torch.utils import cpp_extension
from torch.utils.cpp_extension import BuildExtension, CUDAExtension

setup(name='tensor_export',
      ext_modules=[CUDAExtension('tensor_export', ['libtorchPytorch.cpp'])],
      cmdclass={'build_ext': cpp_extension.BuildExtension},
      )

It did work, so I guess problem solved. Just out of curiosity I tried looking at the compiler options in the make file as well as in the ninja file:

running build
running build_ext
building 'tensor_export' extension
Emitting ninja build file /home/avi/demos/cuda_torch_pybind_demo/build/temp.linux-x86_64-3.10/build.ninja...
Compiling objects...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
[1/1] c++ -MMD -MF /home/avi/demos/cuda_torch_pybind_demo/build/temp.linux-x86_64-3.10/libtorchPytorch.o.d -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.10/dist-packages/torch/include -I/usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.10/dist-packages/torch/include/TH -I/usr/local/lib/python3.10/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.10 -c -c /home/avi/demos/cuda_torch_pybind_demo/libtorchPytorch.cpp -o /home/avi/demos/cuda_torch_pybind_demo/build/temp.linux-x86_64-3.10/libtorchPytorch.o -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1016"' -DTORCH_EXTENSION_NAME=tensor_export -D_GLIBCXX_USE_CXX11_ABI=1 -std=c++17
x86_64-linux-gnu-g++ -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -g -fwrapv -O2 -Wl,-Bsymbolic-functions -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 /home/avi/demos/cuda_torch_pybind_demo/build/temp.linux-x86_64-3.10/libtorchPytorch.o -L/usr/local/lib/python3.10/dist-packages/torch/lib -L/usr/local/cuda/lib64 -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.10/tensor_export.cpython-310-x86_64-linux-gnu.so

/home/avi/.local/lib/python3.10/site-packages/cmake/data/bin/cmake -S/home/avi/demos/cuda_torch_pybind_demo -B/home/avi/demos/cuda_torch_pybind_demo/build --check-build-system CMakeFiles/Makefile.cmake 0
/home/avi/.local/lib/python3.10/site-packages/cmake/data/bin/cmake -E cmake_progress_start /home/avi/demos/cuda_torch_pybind_demo/build/CMakeFiles /home/avi/demos/cuda_torch_pybind_demo/build//CMakeFiles/progress.marks
make  -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/home/avi/demos/cuda_torch_pybind_demo/build'
make  -f CMakeFiles/tensor_export.dir/build.make CMakeFiles/tensor_export.dir/depend
make[2]: Entering directory '/home/avi/demos/cuda_torch_pybind_demo/build'
cd /home/avi/demos/cuda_torch_pybind_demo/build && /home/avi/.local/lib/python3.10/site-packages/cmake/data/bin/cmake -E cmake_depends "Unix Makefiles" /home/avi/demos/cuda_torch_pybind_demo /home/avi/demos/cuda_torch_pybind_demo /home/avi/demos/cuda_torch_pybind_demo/build /home/avi/demos/cuda_torch_pybind_demo/build /home/avi/demos/cuda_torch_pybind_demo/build/CMakeFiles/tensor_export.dir/DependInfo.cmake "--color="
make[2]: Leaving directory '/home/avi/demos/cuda_torch_pybind_demo/build'
make  -f CMakeFiles/tensor_export.dir/build.make CMakeFiles/tensor_export.dir/build
make[2]: Entering directory '/home/avi/demos/cuda_torch_pybind_demo/build'
[ 50%] Building CXX object CMakeFiles/tensor_export.dir/libtorchPytorch.cpp.o
/usr/bin/c++ -DUSE_C10D_GLOO -DUSE_C10D_MPI -DUSE_C10D_NCCL -DUSE_DISTRIBUTED -DUSE_RPC -DUSE_TENSORPIPE -Dtensor_export_EXPORTS -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /home/avi/demos/cuda_torch_pybind_demo/pybind11/include -isystem /usr/local/cuda/include -D_GLIBCXX_USE_CXX11_ABI=1 -fPIC -fvisibility=hidden -flto -fno-fat-lto-objects -D_GLIBCXX_USE_CXX11_ABI=1 -MD -MT CMakeFiles/tensor_export.dir/libtorchPytorch.cpp.o -MF CMakeFiles/tensor_export.dir/libtorchPytorch.cpp.o.d -o CMakeFiles/tensor_export.dir/libtorchPytorch.cpp.o -c /home/avi/demos/cuda_torch_pybind_demo/libtorchPytorch.cpp
[100%] Linking CXX shared module tensor_export.cpython-310-x86_64-linux-gnu.so
/home/avi/.local/lib/python3.10/site-packages/cmake/data/bin/cmake -E cmake_link_script CMakeFiles/tensor_export.dir/link.txt --verbose=1
/usr/bin/c++ -fPIC  -D_GLIBCXX_USE_CXX11_ABI=1 -flto -shared  -o tensor_export.cpython-310-x86_64-linux-gnu.so CMakeFiles/tensor_export.dir/libtorchPytorch.cpp.o  -Wl,-rpath,/usr/local/lib/python3.10/dist-packages/torch/lib:/usr/lib/wsl/lib:/usr/local/cuda/lib64 /usr/local/lib/python3.10/dist-packages/torch/lib/libtorch.so /usr/local/lib/python3.10/dist-packages/torch/lib/libc10.so /usr/lib/wsl/lib/libcuda.so /usr/local/cuda/lib64/libnvrtc.so /usr/local/cuda/lib64/libnvToolsExt.so /usr/local/cuda/lib64/libcudart.so /usr/local/lib/python3.10/dist-packages/torch/lib/libc10_cuda.so -Wl,--no-as-needed,"/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cpu.so" -Wl,--as-needed -Wl,--no-as-needed,"/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda.so" -Wl,--as-needed /usr/local/lib/python3.10/dist-packages/torch/lib/libc10_cuda.so /usr/local/lib/python3.10/dist-packages/torch/lib/libc10.so /usr/local/cuda/lib64/libcufft.so /usr/local/cuda/lib64/libcurand.so /usr/local/cuda/lib64/libcublas.so /usr/local/cuda/lib64/libcublasLt.so -Wl,--no-as-needed,"/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch.so" -Wl,--as-needed /usr/local/cuda/lib64/libnvToolsExt.so /usr/local/cuda/lib64/libcudart.so 
/usr/bin/strip /home/avi/demos/cuda_torch_pybind_demo/build/tensor_export.cpython-310-x86_64-linux-gnu.so
make[2]: Leaving directory '/home/avi/demos/cuda_torch_pybind_demo/build'
[100%] Built target tensor_export
make[1]: Leaving directory '/home/avi/demos/cuda_torch_pybind_demo/build'
/home/avi/.local/lib/python3.10/site-packages/cmake/data/bin/cmake -E cmake_progress_start /home/avi/demos/cuda_torch_pybind_demo/build/CMakeFiles 0

But couldn’t really figure out why cmake doesn’t work but the setup.py does, but at least it works now.

alfie-nsugh · November 1, 2023, 3:20pm

The user woywoy123 at github linked this project that uses cmake:

I just tested it, and got it to work.

1317257555 · September 17, 2024, 2:53pm

Such problem is already solved in https://github.com/pytorch/pytorch/issues/108041#issuecomment-1753895665