Libtorch takes forever to compile

I am working with libtorch in a cpp environment. When I link libtorch against my cmake target and attempt to compile it takes 2-3 minutes to build. Why is this so slow?

I’m fetching libtorch like this:

##############################
# Libtorch
##############################

set(BUILD_TEST OFF)
set(CMAKE_CXX_STANDARD 17)
set(TORCH_USE_CUDA_DSA 1)
find_package(Torch ${libtorch_VERSION} QUIET CONFIG)
if (NOT Torch_FOUND)
    message(STATUS "libtorch ${libtorch_VERSION} - not found")
    message(STATUS "Fetching libtorch")
    include(FetchContent)
    FetchContent_Declare(
            libtorch
            # URL https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcu121.zip
            URL https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-static-with-deps-2.1.0%2Bcu121.zip
            SOURCE_DIR libtorch)
    FetchContent_GetProperties(libtorch)
    if (NOT libtorch_POPULATED)
        unset(FETCHCONTENT_QUIET CACHE)
        FetchContent_Populate(libtorch)
        list(APPEND CMAKE_PREFIX_PATH ${CMAKE_BINARY_DIR}/libtorch)
    endif ()
    find_package(Torch ${libtorch_VERSION} CONFIG REQUIRED)
else ()
    message(STATUS "libtorch ${libtorch_VERSION} - found")
endif ()

and link to my project link this:


##############################################
# Setup Tests
##############################################

enable_testing()

add_executable(
        tests
        test_core.cu

)
target_link_libraries(
        tests
        GTest::gtest_main
        ${CUDA_LIBRARIES}
        ${TORCH_LIBRARIES}
        simulator
        viewer
)
target_include_directories(tests PUBLIC
        ${CUDA_INCLUDE_DIRS}
        ${TORCH_INCLUDE_DIRS}
)

Yet building the tests target takes 3 minutes. Why so slow?

I ran into this issue when doing similar work. It seems like the work is being spent on including very large header files (torch.h includes a bunch of headers which each in turn include a bunch of headers, etc.) My solution was to put all torch headers in a single .h in my project and precompile that header for reuse.

/**
 * torch_includes.h
 * Place all torch headers here and include this header instead.
 * This header is pre-compiled and reused between builds.
 * Allows for linking against libtorch while maintaining fast build times.
 * **/

#ifndef CUDA_FOR_ML_TORCH_INCLUDES_H
#define CUDA_FOR_ML_TORCH_INCLUDES_H

#include "torch/torch.h"
// more torch includes here

#endif

Then I set this header to be precompiled in my cmake config with:

target_precompile_headers(CUDA_for_ML PUBLIC torch_includes.h)

The first compile is still slow but after that my builds were quite quick. The process for using pre-compiled headers may vary depending on your compiler. I believe msvc is slightly different.