I’m trying to add support for a quantized version of torch.roll
. I followed this guide and I’m getting the following error during the build:
#26 229.7 [ 91%] Building CXX object test_cpp_c10d/CMakeFiles/ProcessGroupGlooTest.dir/ProcessGroupGlooTest.cpp.o
#26 229.9 [ 91%] Linking CXX executable ../bin/ProcessGroupGlooTest
#26 229.9 ../lib/libtorch_cpu.so: undefined reference to `at::native::quantized_roll(at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>)'
#26 229.9 collect2: error: ld returned 1 exit status
#26 229.9 make[2]: *** [bin/HashStoreTest] Error 1
#26 229.9 test_cpp_c10d/CMakeFiles/HashStoreTest.dir/build.make:101: recipe for target 'bin/HashStoreTest' failed
#26 229.9 CMakeFiles/Makefile2:9060: recipe for target 'test_cpp_c10d/CMakeFiles/HashStoreTest.dir/all' failed
#26 229.9 make[1]: *** [test_cpp_c10d/CMakeFiles/HashStoreTest.dir/all] Error 2
#26 229.9 make[1]: *** Waiting for unfinished jobs....
#26 230.0 ../lib/libtorch_cpu.so: undefined reference to `at::native::quantized_roll(at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>)'
#26 230.0 collect2: error: ld returned 1 exit status
#26 230.0 test_cpp_c10d/CMakeFiles/FileStoreTest.dir/build.make:101: recipe for target 'bin/FileStoreTest' failed
#26 230.0 make[2]: *** [bin/FileStoreTest] Error 1
#26 230.0 CMakeFiles/Makefile2:9000: recipe for target 'test_cpp_c10d/CMakeFiles/FileStoreTest.dir/all' failed
#26 230.0 make[1]: *** [test_cpp_c10d/CMakeFiles/FileStoreTest.dir/all] Error 2
#26 230.0 ../lib/libtorch_cpu.so: undefined reference to `at::native::quantized_roll(at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>)'
#26 230.0 collect2: error: ld returned 1 exit status
#26 230.0 test_cpp_c10d/CMakeFiles/TCPStoreTest.dir/build.make:101: recipe for target 'bin/TCPStoreTest' failed
#26 230.0 make[2]: *** [bin/TCPStoreTest] Error 1
#26 230.0 CMakeFiles/Makefile2:9030: recipe for target 'test_cpp_c10d/CMakeFiles/TCPStoreTest.dir/all' failed
#26 230.0 make[1]: *** [test_cpp_c10d/CMakeFiles/TCPStoreTest.dir/all] Error 2
#26 230.2 ../lib/libtorch_cpu.so: undefined reference to `at::native::quantized_roll(at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>)'
#26 230.2 collect2: error: ld returned 1 exit status
#26 230.2 test_cpp_c10d/CMakeFiles/ProcessGroupGlooTest.dir/build.make:101: recipe for target 'bin/ProcessGroupGlooTest' failed
#26 230.2 make[2]: *** [bin/ProcessGroupGlooTest] Error 1
#26 230.2 CMakeFiles/Makefile2:9090: recipe for target 'test_cpp_c10d/CMakeFiles/ProcessGroupGlooTest.dir/all' failed
#26 230.2 make[1]: *** [test_cpp_c10d/CMakeFiles/ProcessGroupGlooTest.dir/all] Error 2
#26 230.2 Makefile:145: recipe for target 'all' failed
#26 230.2 make: *** [all] Error 2
#26 230.2 Building wheel torch-1.13.0a0+git4bbf81b
#26 230.2 -- Building version 1.13.0a0+git4bbf81b
#26 230.2 cmake -DBUILD_PYTHON=True -DBUILD_TEST=True -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/opt/pytorch/torch -DCMAKE_PREFIX_PATH=/opt/conda/lib/python3.8/site-packages;/opt/conda/bin/../ -DNUMPY_INCLUDE_DIR=/opt/conda/lib/python3.8/site-packages/numpy/core/include -DPYTHON_EXECUTABLE=/opt/conda/bin/python -DPYTHON_INCLUDE_DIR=/opt/conda/include/python3.8 -DPYTHON_LIBRARY=/opt/conda/lib/libpython3.8.so.1.0 -DTORCH_BUILD_VERSION=1.13.0a0+git4bbf81b -DUSE_NUMPY=True /opt/pytorch
#26 230.2 cmake --build . --target install --config Release -- -j 4
#26 ERROR: executor failed running [/bin/sh -c TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" python setup.py develop]: exit code: 1
------
> [build 4/4] RUN --mount=type=cache,target=/opt/ccache TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" python setup.py develop:
------
executor failed running [/bin/sh -c TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" python setup.py develop]: exit code: 1
I’ve placed my qroll.cpp
file in this path: pytorch/aten/src/ATen/native/quantized/cpu/qroll.cpp
.
This is in native_functions.yaml
:
- func: quantized_roll(Tensor self, int[1] shifts, int[1] dims=[]) -> Tensor
variants: function, method
dispatch:
QuantizedCPU: quantized_roll
autogen: quantized_roll.out
And the file pytorch/aten/src/ATen/native/quantized/library.cpp
contains this line:
m.def(TORCH_SELECTIVE_SCHEMA("quantized::roll(Tensor qx, int[] shifts, int[] dims) -> Tensor"));
Finally for completeness, here’s the contents of qroll.cpp
:
#include <ATen/ATen.h>
#include <torch/library.h>
#include <ATen/native/TensorTransformations.h> // for roll_common
#include <ATen/NativeFunctions.h> // Need that for the `native_functions.yaml`
// #include <ATen/core/Type.h>
// #include <ATen/native/TensorIterator.h>
// #include <ATen/native/cpu/Loops.h>
// #include <ATen/Parallel.h>
// #include <ATen/native/quantized/cpu/RuyUtils.h>
// #include <ruy/ruy.h>
namespace at {
namespace native {
namespace {
inline void check_inputs(const Tensor& qa) {
TORCH_CHECK(
qa.scalar_type() == c10::kQInt8 || qa.scalar_type() == c10::kQUInt8,
"quantized_roll operands should use QInt8 or QUInt8 data types.");
TORCH_CHECK(
qa.qscheme() == kPerTensorAffine || qa.qscheme() == kPerTensorSymmetric,
"Only per-tensor quantization is suported in quantized_roll.");
}
Tensor quantized_roll(const Tensor& self, IntArrayRef shifts, IntArrayRef dims) {
check_inputs(self);
if (dims.size() != 1 || shifts.size() != 1) {
return roll_common(self, shifts, dims);
}
// avoid a div zero error below.
if (self.numel() == 0) {
return self.clone(at::MemoryFormat::Preserve);
}
int64_t dim = dims[0];
int64_t size = self.size(dim);
int64_t start = (size - shifts[0]) % size;
// Behavior of % is different in C++ vs Python for negative numbers. This
// corrects the difference.
if (start < 0) {
start = start + size;
}
auto t0 = self.narrow(dim, start, size-start);
auto t1 = self.narrow(dim, 0, start);
return at::cat({t0, t1}, dim);
}
TORCH_LIBRARY_IMPL(quantized, QuantizedCPU, m) {
m.impl(TORCH_SELECTIVE_NAME("quantized::roll"), TORCH_FN(quantized_roll));
}
} // namespace
} // namespace native
} // namespace at