The compilation of Torch from source failed with the error: “ninja: build stopped: interrupted by user.”

Jiawen_Niu · April 29, 2025, 8:25am

I have tried compiling using 8 threads, but the same error occurs. Below are the logs of the startup script, along with the error logs after attempting to compile using 48 threads and then continuing with 1 thread.

My machine has ample memory and CPU resources. During the compilation process, I monitored the CPU utilization and memory usage using htop, both of which were very low, with 300GB of memory still available.

# ==== 🧱 设置环境变量 ====
# export BUILD_TEST=ON
# export USE_CUDA=1
# export USE_DISTRIBUTED=1
# export USE_CUDNN=1
# export USE_NCCL=1
export CUDA_HOME=/usr/local/cuda
export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}"
# export USE_MPI=0
# export USE_GLOO=0
# export USE_KINETO=0
export USE_STATIC_MKL=1
export LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
# export USE_NINJA=OFF



# echo "🔄 开始构建完整的 PyTorch..." | tee -a "$LOG_FILE"


MAX_JOBS=1 python setup.py develop \
    build_ext \
    --cmake \
    -DTORCH_USE_CUDA_DSA=ON \
    -DDEBUG_CUDA=ON \
    -DCMAKE_BUILD_TYPE=Debug \
    -DUSE_CUDA=ON \
    -j 1 >> "$LOG_FILE" 2>&1

echo $?

-- 
-- ******** Summary ********
-- General:
--   CMake version         : 3.24.2
--   CMake command         : /opt/cmake/bin/cmake
--   System                : Linux
--   C++ compiler          : /usr/bin/c++
--   C++ compiler id       : GNU
--   C++ compiler version  : 9.4.0
--   Using ccache if found : ON
--   Found ccache          : CCACHE_PROGRAM-NOTFOUND
--   CXX flags             :  -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow
--   Shared LD flags       :  -Wl,--no-as-needed  -rdynamic
--   Static LD flags       : 
--   Module LD flags       : 
--   Build type            : Release
--   Compile definitions   : ONNX_ML=1;ONNXIFI_ENABLE_EXT=1;ONNX_NAMESPACE=onnx_torch;HAVE_MMAP=1;_FILE_OFFSET_BITS=64;HAVE_SHM_OPEN=1;HAVE_SHM_UNLINK=1;HAVE_MALLOC_USABLE_SIZE=1;USE_EXTERNAL_MZCRC;MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS;FLASHATTENTION_DISABLE_ALIBI
--   CMAKE_PREFIX_PATH     : /root/anaconda3/lib/python3.11/site-packages;/root/anaconda3:;/usr/local/cuda;/usr/local/cuda
--   CMAKE_INSTALL_PREFIX  : /home/gehao/njw1123/pytorch-2.4.1/torch
--   USE_GOLD_LINKER       : OFF
-- 
--   TORCH_VERSION         : 2.4.0
--   BUILD_STATIC_RUNTIME_BENCHMARK: OFF
--   BUILD_BINARY          : OFF
--   BUILD_CUSTOM_PROTOBUF : ON
--     Link local protobuf : ON
--   BUILD_DOCS            : OFF
--   BUILD_PYTHON          : True
--     Python version      : 3.11.4
--     Python executable   : /root/anaconda3/bin/python
--     Python library      : 
--     Python includes     : /root/anaconda3/include/python3.11
--     Python site-package : /root/anaconda3/lib/python3.11/site-packages
--   BUILD_SHARED_LIBS     : ON
--   CAFFE2_USE_MSVC_STATIC_RUNTIME     : OFF
--   BUILD_TEST            : True
--   BUILD_JNI             : OFF
--   BUILD_MOBILE_AUTOGRAD : OFF
--   BUILD_LITE_INTERPRETER: OFF
--   INTERN_BUILD_MOBILE   : 
--   TRACING_BASED         : OFF
--   USE_BLAS              : 1
--     BLAS                : open
--     BLAS_HAS_SBGEMM     : 
--   USE_LAPACK            : 1
--     LAPACK              : open
--   USE_ASAN              : OFF
--   USE_TSAN              : OFF
--   USE_CPP_CODE_COVERAGE : OFF
--   USE_CUDA              : ON
--     Split CUDA          : 
--     CUDA static link    : OFF
--     USE_CUDNN           : ON
--     USE_CUSPARSELT      : OFF
--     CUDA version        : 11.8
--     USE_FLASH_ATTENTION : ON
--     USE_MEM_EFF_ATTENTION : ON
--     cuDNN version       : 8.9.2
--     CUDA root directory : /usr/local/cuda
--     CUDA library        : /usr/lib/x86_64-linux-gnu/libcuda.so
--     cudart library      : /usr/local/cuda/lib64/libcudart.so
--     cublas library      : /usr/local/cuda/lib64/libcublas.so
--     cufft library       : /usr/local/cuda/lib64/libcufft.so
--     curand library      : /usr/local/cuda/lib64/libcurand.so
--     cusparse library    : /usr/local/cuda/lib64/libcusparse.so
--     cuDNN library       : /usr/local/cuda/lib64/libcudnn.so
--     nvrtc               : /usr/local/cuda/lib64/libnvrtc.so
--     CUDA include path   : /usr/local/cuda/include
--     NVCC executable     : /usr/local/cuda/bin/nvcc
--     CUDA compiler       : /usr/local/cuda/bin/nvcc
--     CUDA flags          :  -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS -D_GLIBCXX_USE_CXX11_ABI=1 -Xfatbin -compress-all -DONNX_NAMESPACE=onnx_torch -gencode arch=compute_89,code=sm_89 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda  -Wno-deprecated-gpu-targets --expt-extended-lambda -DCUB_WRAPPED_NAMESPACE=at_cuda_detail -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__
--     CUDA host compiler  : 
--     CUDA --device-c     : OFF
--     USE_TENSORRT        : 
--   USE_XPU               : OFF
--   USE_ROCM              : OFF
--   BUILD_NVFUSER         : 
--   USE_EIGEN_FOR_BLAS    : ON
--   USE_FBGEMM            : ON
--     USE_FAKELOWP          : OFF
--   USE_KINETO            : ON
--   USE_GFLAGS            : OFF
--   USE_GLOG              : OFF
--   USE_LITE_PROTO        : OFF
--   USE_PYTORCH_METAL     : OFF
--   USE_PYTORCH_METAL_EXPORT     : OFF
--   USE_MPS               : OFF
--   USE_MKL               : OFF
--   USE_MKLDNN            : ON
--   USE_MKLDNN_ACL        : OFF
--   USE_MKLDNN_CBLAS      : OFF
--   USE_UCC               : OFF
--   USE_ITT               : ON
--   USE_NCCL              : ON
--     USE_SYSTEM_NCCL     : OFF
--   USE_NNPACK            : ON
--   USE_NUMPY             : ON
--   USE_OBSERVERS         : ON
--   USE_OPENCL            : OFF
--   USE_OPENMP            : ON
--   USE_MIMALLOC          : OFF
--   USE_VULKAN            : OFF
--   USE_PROF              : OFF
--   USE_PYTORCH_QNNPACK   : ON
--   USE_XNNPACK           : ON
--   USE_DISTRIBUTED       : ON
--     USE_MPI               : ON
--     USE_GLOO              : ON
--     USE_GLOO_WITH_OPENSSL : OFF
--     USE_TENSORPIPE        : ON
--   Public Dependencies  : 
--   Private Dependencies : Threads::Threads;pthreadpool;cpuinfo;pytorch_qnnpack;nnpack;XNNPACK;fbgemm;ittnotify;fp16;caffe2::openmp;tensorpipe;gloo;foxi_loader;rt;fmt::fmt-header-only;kineto;gcc_s;gcc;dl
--   Public CUDA Deps.    : 
--   Private CUDA Deps.   : caffe2::curand;caffe2::cufft;caffe2::cublas;torch::cudnn;__caffe2_nccl;tensorpipe_cuda;gloo_cuda;/usr/local/cuda/lib64/libcudart.so;CUDA::cusparse;CUDA::cufft;ATEN_CUDA_FILES_GEN_LIB
--   USE_COREML_DELEGATE     : OFF
--   BUILD_LAZY_TS_BACKEND   : ON
--   USE_ROCM_KERNEL_ASSERT : OFF
-- Performing Test HAS_WMISSING_PROTOTYPES
-- Performing Test HAS_WMISSING_PROTOTYPES - Failed
-- Performing Test HAS_WERROR_MISSING_PROTOTYPES
-- Performing Test HAS_WERROR_MISSING_PROTOTYPES - Failed
-- Configuring done
-- Generating done
-- Build files have been written to: /home/gehao/njw1123/pytorch-2.4.1/build
[1/2399] Performing build step for 'nccl_external'
make -C src build BUILDDIR=/home/gehao/njw1123/pytorch-2.4.1/build/nccl
make[1]: Entering directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src'
NVCC_GENCODE is -gencode=arch=compute_89,code=sm_89
make[2]: Entering directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src/device'
NVCC_GENCODE is -gencode=arch=compute_89,code=sm_89
make[2]: Leaving directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src/device'
make[1]: Leaving directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src'
[2/2399] No install step for 'nccl_external'
[3/2399] Completed 'nccl_external'
[4/2399] Building CXX object third_party/fbgemm/CMakeFiles/fbgemm_avx2.dir/src/FbgemmI8Depthwise3DAvx2.cc.o
[5/2399] Building CXX object third_party/fbgemm/CMakeFiles/fbgemm_avx2.dir/src/FbgemmI8DepthwiseAvx2.cc.o
[6/2399] Linking CXX static library lib/libfbgemm.a
[7/2399] Building CXX object third_party/ideep/mkl-dnn/src/cpu/CMakeFiles/dnnl_cpu.dir/cpu_rnn_list.cpp.o
[8/2399] Building CXX object third_party/ideep/mkl-dnn/src/cpu/CMakeFiles/dnnl_cpu.dir/rnn/ref_rnn.cpp.o
[9/2399] Building CXX object third_party/ideep/mkl-dnn/src/cpu/x64/CMakeFiles/dnnl_cpu_x64.dir/jit_brgemm_conv.cpp.o
[10/2399] Linking CXX static library lib/libdnnl.a
[11/2399] Building CXX object c10/CMakeFiles/c10.dir/util/flags_use_no_gflags.cpp.o
[12/2399] Building CXX object c10/CMakeFiles/c10.dir/util/int128.cpp.o
ninja: build stopped: interrupted by user.
Building wheel torch-2.4.0a0+gitee1b680
-- Building version 2.4.0a0+gitee1b680
cmake -GNinja -DBUILD_PYTHON=True -DBUILD_TEST=True -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/home/gehao/njw1123/pytorch-2.4.1/torch -DCMAKE_PREFIX_PATH=/root/anaconda3/lib/python3.11/site-packages;/root/anaconda3: -DPython_EXECUTABLE=/root/anaconda3/bin/python -DTORCH_BUILD_VERSION=2.4.0a0+gitee1b680 -DUSE_NUMPY=True -DUSE_STATIC_MKL=1 /home/gehao/njw1123/pytorch-2.4.1
cmake --build . --target install --config Release -- -j 1