Good news is that I did find a user workaround that does not require tweaking any of the CMake modules that come with a PyTorch distribution. Assuming that PYTORCH_VERSION
and PYTORCH_CUDA_VERSION
are available, with these two CMake variables defined as values from running the following Python script and reading the output:
from torch import version
if __name__ == "__main__":
print(version.__version__.split("+")[0]) # for PYTORCH_VERSION
print(version.cuda) # for PYTORCH_CUDA_VERSION
Right before your find_package(Torch CONFIG)
call you can do the following:
# assumes find_package(CUDAToolkit) was already done
if(
PYTORCH_VERSION VERSION_GREATER_EQUAL 2.5.0 AND
PYTORCH_CUDA_VERSION VERSION_GREATER_EQUAL 12
)
message(STATUS "PyTorch NVTX headers workaround: Yes")
# only do this if nvToolsExt is not defined and CUDA::nvtx3 exists
if(NOT TARGET CUDA::nvToolsExt AND TARGET CUDA::nvtx3)
add_library(CUDA::nvToolsExt INTERFACE IMPORTED)
# ensure that PyTorch is told to use NVTX3 headers
target_compile_definitions(
CUDA::nvToolsExt INTERFACE
TORCH_CUDA_USE_NVTX3
)
target_link_libraries(CUDA::nvToolsExt INTERFACE CUDA::nvtx3)
endif()
else()
message(STATUS "PyTorch NVTX headers workaround: No")
endif()
# find Torch C++ as usual
find_package(Torch CONFIG)
This has pacified CMake nicely for me so hopefully it will be of help to others too.