I have tried compiling using 8 threads, but the same error occurs. Below are the logs of the startup script, along with the error logs after attempting to compile using 48 threads and then continuing with 1 thread.
My machine has ample memory and CPU resources. During the compilation process, I monitored the CPU utilization and memory usage using htop, both of which were very low, with 300GB of memory still available.
# ==== 🧱 设置环境变量 ====
# export BUILD_TEST=ON
# export USE_CUDA=1
# export USE_DISTRIBUTED=1
# export USE_CUDNN=1
# export USE_NCCL=1
export CUDA_HOME=/usr/local/cuda
export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}"
# export USE_MPI=0
# export USE_GLOO=0
# export USE_KINETO=0
export USE_STATIC_MKL=1
export LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
# export USE_NINJA=OFF
# echo "🔄 开始构建完整的 PyTorch..." | tee -a "$LOG_FILE"
MAX_JOBS=1 python setup.py develop \
build_ext \
--cmake \
-DTORCH_USE_CUDA_DSA=ON \
-DDEBUG_CUDA=ON \
-DCMAKE_BUILD_TYPE=Debug \
-DUSE_CUDA=ON \
-j 1 >> "$LOG_FILE" 2>&1
echo $?
--
-- ******** Summary ********
-- General:
-- CMake version : 3.24.2
-- CMake command : /opt/cmake/bin/cmake
-- System : Linux
-- C++ compiler : /usr/bin/c++
-- C++ compiler id : GNU
-- C++ compiler version : 9.4.0
-- Using ccache if found : ON
-- Found ccache : CCACHE_PROGRAM-NOTFOUND
-- CXX flags : -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow
-- Shared LD flags : -Wl,--no-as-needed -rdynamic
-- Static LD flags :
-- Module LD flags :
-- Build type : Release
-- Compile definitions : ONNX_ML=1;ONNXIFI_ENABLE_EXT=1;ONNX_NAMESPACE=onnx_torch;HAVE_MMAP=1;_FILE_OFFSET_BITS=64;HAVE_SHM_OPEN=1;HAVE_SHM_UNLINK=1;HAVE_MALLOC_USABLE_SIZE=1;USE_EXTERNAL_MZCRC;MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS;FLASHATTENTION_DISABLE_ALIBI
-- CMAKE_PREFIX_PATH : /root/anaconda3/lib/python3.11/site-packages;/root/anaconda3:;/usr/local/cuda;/usr/local/cuda
-- CMAKE_INSTALL_PREFIX : /home/gehao/njw1123/pytorch-2.4.1/torch
-- USE_GOLD_LINKER : OFF
--
-- TORCH_VERSION : 2.4.0
-- BUILD_STATIC_RUNTIME_BENCHMARK: OFF
-- BUILD_BINARY : OFF
-- BUILD_CUSTOM_PROTOBUF : ON
-- Link local protobuf : ON
-- BUILD_DOCS : OFF
-- BUILD_PYTHON : True
-- Python version : 3.11.4
-- Python executable : /root/anaconda3/bin/python
-- Python library :
-- Python includes : /root/anaconda3/include/python3.11
-- Python site-package : /root/anaconda3/lib/python3.11/site-packages
-- BUILD_SHARED_LIBS : ON
-- CAFFE2_USE_MSVC_STATIC_RUNTIME : OFF
-- BUILD_TEST : True
-- BUILD_JNI : OFF
-- BUILD_MOBILE_AUTOGRAD : OFF
-- BUILD_LITE_INTERPRETER: OFF
-- INTERN_BUILD_MOBILE :
-- TRACING_BASED : OFF
-- USE_BLAS : 1
-- BLAS : open
-- BLAS_HAS_SBGEMM :
-- USE_LAPACK : 1
-- LAPACK : open
-- USE_ASAN : OFF
-- USE_TSAN : OFF
-- USE_CPP_CODE_COVERAGE : OFF
-- USE_CUDA : ON
-- Split CUDA :
-- CUDA static link : OFF
-- USE_CUDNN : ON
-- USE_CUSPARSELT : OFF
-- CUDA version : 11.8
-- USE_FLASH_ATTENTION : ON
-- USE_MEM_EFF_ATTENTION : ON
-- cuDNN version : 8.9.2
-- CUDA root directory : /usr/local/cuda
-- CUDA library : /usr/lib/x86_64-linux-gnu/libcuda.so
-- cudart library : /usr/local/cuda/lib64/libcudart.so
-- cublas library : /usr/local/cuda/lib64/libcublas.so
-- cufft library : /usr/local/cuda/lib64/libcufft.so
-- curand library : /usr/local/cuda/lib64/libcurand.so
-- cusparse library : /usr/local/cuda/lib64/libcusparse.so
-- cuDNN library : /usr/local/cuda/lib64/libcudnn.so
-- nvrtc : /usr/local/cuda/lib64/libnvrtc.so
-- CUDA include path : /usr/local/cuda/include
-- NVCC executable : /usr/local/cuda/bin/nvcc
-- CUDA compiler : /usr/local/cuda/bin/nvcc
-- CUDA flags : -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS -D_GLIBCXX_USE_CXX11_ABI=1 -Xfatbin -compress-all -DONNX_NAMESPACE=onnx_torch -gencode arch=compute_89,code=sm_89 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -Wno-deprecated-gpu-targets --expt-extended-lambda -DCUB_WRAPPED_NAMESPACE=at_cuda_detail -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__
-- CUDA host compiler :
-- CUDA --device-c : OFF
-- USE_TENSORRT :
-- USE_XPU : OFF
-- USE_ROCM : OFF
-- BUILD_NVFUSER :
-- USE_EIGEN_FOR_BLAS : ON
-- USE_FBGEMM : ON
-- USE_FAKELOWP : OFF
-- USE_KINETO : ON
-- USE_GFLAGS : OFF
-- USE_GLOG : OFF
-- USE_LITE_PROTO : OFF
-- USE_PYTORCH_METAL : OFF
-- USE_PYTORCH_METAL_EXPORT : OFF
-- USE_MPS : OFF
-- USE_MKL : OFF
-- USE_MKLDNN : ON
-- USE_MKLDNN_ACL : OFF
-- USE_MKLDNN_CBLAS : OFF
-- USE_UCC : OFF
-- USE_ITT : ON
-- USE_NCCL : ON
-- USE_SYSTEM_NCCL : OFF
-- USE_NNPACK : ON
-- USE_NUMPY : ON
-- USE_OBSERVERS : ON
-- USE_OPENCL : OFF
-- USE_OPENMP : ON
-- USE_MIMALLOC : OFF
-- USE_VULKAN : OFF
-- USE_PROF : OFF
-- USE_PYTORCH_QNNPACK : ON
-- USE_XNNPACK : ON
-- USE_DISTRIBUTED : ON
-- USE_MPI : ON
-- USE_GLOO : ON
-- USE_GLOO_WITH_OPENSSL : OFF
-- USE_TENSORPIPE : ON
-- Public Dependencies :
-- Private Dependencies : Threads::Threads;pthreadpool;cpuinfo;pytorch_qnnpack;nnpack;XNNPACK;fbgemm;ittnotify;fp16;caffe2::openmp;tensorpipe;gloo;foxi_loader;rt;fmt::fmt-header-only;kineto;gcc_s;gcc;dl
-- Public CUDA Deps. :
-- Private CUDA Deps. : caffe2::curand;caffe2::cufft;caffe2::cublas;torch::cudnn;__caffe2_nccl;tensorpipe_cuda;gloo_cuda;/usr/local/cuda/lib64/libcudart.so;CUDA::cusparse;CUDA::cufft;ATEN_CUDA_FILES_GEN_LIB
-- USE_COREML_DELEGATE : OFF
-- BUILD_LAZY_TS_BACKEND : ON
-- USE_ROCM_KERNEL_ASSERT : OFF
-- Performing Test HAS_WMISSING_PROTOTYPES
-- Performing Test HAS_WMISSING_PROTOTYPES - Failed
-- Performing Test HAS_WERROR_MISSING_PROTOTYPES
-- Performing Test HAS_WERROR_MISSING_PROTOTYPES - Failed
-- Configuring done
-- Generating done
-- Build files have been written to: /home/gehao/njw1123/pytorch-2.4.1/build
[1/2399] Performing build step for 'nccl_external'
make -C src build BUILDDIR=/home/gehao/njw1123/pytorch-2.4.1/build/nccl
make[1]: Entering directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src'
NVCC_GENCODE is -gencode=arch=compute_89,code=sm_89
make[2]: Entering directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src/device'
NVCC_GENCODE is -gencode=arch=compute_89,code=sm_89
make[2]: Leaving directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src/device'
make[1]: Leaving directory '/home/gehao/njw1123/pytorch-2.4.1/third_party/nccl/nccl/src'
[2/2399] No install step for 'nccl_external'
[3/2399] Completed 'nccl_external'
[4/2399] Building CXX object third_party/fbgemm/CMakeFiles/fbgemm_avx2.dir/src/FbgemmI8Depthwise3DAvx2.cc.o
[5/2399] Building CXX object third_party/fbgemm/CMakeFiles/fbgemm_avx2.dir/src/FbgemmI8DepthwiseAvx2.cc.o
[6/2399] Linking CXX static library lib/libfbgemm.a
[7/2399] Building CXX object third_party/ideep/mkl-dnn/src/cpu/CMakeFiles/dnnl_cpu.dir/cpu_rnn_list.cpp.o
[8/2399] Building CXX object third_party/ideep/mkl-dnn/src/cpu/CMakeFiles/dnnl_cpu.dir/rnn/ref_rnn.cpp.o
[9/2399] Building CXX object third_party/ideep/mkl-dnn/src/cpu/x64/CMakeFiles/dnnl_cpu_x64.dir/jit_brgemm_conv.cpp.o
[10/2399] Linking CXX static library lib/libdnnl.a
[11/2399] Building CXX object c10/CMakeFiles/c10.dir/util/flags_use_no_gflags.cpp.o
[12/2399] Building CXX object c10/CMakeFiles/c10.dir/util/int128.cpp.o
ninja: build stopped: interrupted by user.
Building wheel torch-2.4.0a0+gitee1b680
-- Building version 2.4.0a0+gitee1b680
cmake -GNinja -DBUILD_PYTHON=True -DBUILD_TEST=True -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/home/gehao/njw1123/pytorch-2.4.1/torch -DCMAKE_PREFIX_PATH=/root/anaconda3/lib/python3.11/site-packages;/root/anaconda3: -DPython_EXECUTABLE=/root/anaconda3/bin/python -DTORCH_BUILD_VERSION=2.4.0a0+gitee1b680 -DUSE_NUMPY=True -DUSE_STATIC_MKL=1 /home/gehao/njw1123/pytorch-2.4.1
cmake --build . --target install --config Release -- -j 1