HI, Dear guys,
I’m build a cuda extention for my pytorch script. However, the following error occurs.
File "<frozen importlib._bootstrap>", line 684, in _load File "<frozen importlib._bootstrap>", line 658, in _load_unlocked File "<frozen importlib._bootstrap>", line 571, in module_from_spec File "<frozen importlib._bootstrap_external>", line 922, in create_module File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed ImportError: /tmp/torch_extensions/tools/tools.so: undefined symbol: _ZNK2at6Tensor4dataIcEEPT_v
I wonder if anyone could give me some light.
The code is very simple.
from torch.utils.cpp_extension import load
tools = load(name='tools', sources=['test.cpp', 'test.cu'])
print(tools.dispatch, type(tools.dispatch), str(tools.dispatch))
in file test.cpp:
#include <torch/extension.h>
#include <vector>
#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
// declarations
int cuda_dispatch(torch::Tensor input, torch::Tensor codec);
std::vector<torch::Tensor> dispatch(torch::Tensor input, torch::Tensor codec) {
CHECK_INPUT(input);
CHECK_INPUT(codec);
cuda_dispatch(input, codec);
return { codec };
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("dispatch", &dispatch, "dispatch input according quantization levels (CUDA only)");
}
in file test.cu:
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
template <typename Dtype>
__global__ void kernel_dispatch(
const Dtype * __restrict__ input,
char * __restrict__ codec) {
}
int cuda_dispatch(torch::Tensor input, torch::Tensor codec) {
const int gsize = 10240;
const int lsize = 128;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.type(), "kernel_dispatch",
( [&] {
kernel_dispatch <scalar_t> <<< gsize, lsize>>> (
input.data<scalar_t>(),
codec.data<char>());
}
)
);
return 0;
}
if the codec
tensor is changed to a float tensor and replace codec.data <char>() to codec.data<scalar_t>()) in the test.cu, the error was gone.
Thus I suspect that there’s some limitation I unaware for the cuda template around AT_DISPATCH_FLOATING_TYPES_AND_HALF.