When I tried to compile the PyTorch from source code using a Dockerfile, I got a fishy error. The error seems to be related to the ld linker.
Error logs
#0 3395.3 [5555/5977] Linking CXX executable bin/cuda_apply_test
#0 3395.3 FAILED: bin/cuda_apply_test
#0 3395.3 : && /usr/bin/c++ -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow -DHAVE_AVX_CPU_DEFINITION -DHAVE_AVX2_CPU_DEFINITION -O3 -DNDEBUG -DNDEBUG -rdynamic caffe2/CMakeFiles/cuda_apply_test.dir/__/aten/src/ATen/test/cuda_apply_test.cpp.o -o bin/cuda_apply_test -Wl,-rpath,/usr/local/cuda/lib64:/opt/conda/envs/pytorch/lib:/pytorch/build/lib: /usr/local/cuda/lib64/libcudart.so lib/libgtest_main.a -Wl,–no-as-needed,“/pytorch/build/lib/libtorch.so” -Wl,–as-needed -Wl,–no-as-needed,“/pytorch/build/lib/libtorch_cpu.so” -Wl,–as-needed lib/libprotobuf.a /opt/conda/envs/pytorch/lib/libmkl_intel_lp64.so /opt/conda/envs/pytorch/lib/libmkl_gnu_thread.so /opt/conda/envs/pytorch/lib/libmkl_core.so -fopenmp -lpthread -lm /usr/lib/x86_64-linux-gnu/libdl.so lib/libdnnl.a -Wl,–no-as-needed,“/pytorch/build/lib/libtorch_cuda.so” -Wl,–as-needed lib/libc10_cuda.so lib/libc10.so /usr/local/cuda/lib64/libnvToolsExt.so /usr/local/cuda/lib64/libcufft_static_nocallback.a /usr/local/cuda/lib64/libcurand_static.a /usr/local/cuda/lib64/libculibos.a /usr/local/cuda/lib64/libcublas_static.a /usr/local/cuda/lib64/libcublasLt_static.a /usr/local/cuda/lib64/libcudart_static.a -lrt -ldl lib/libgtest.a -pthread && :
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasHgemmBatched' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasCgetriBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasCgemm_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasCgetrfBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasDdot_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasDgetrfBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasGetPointerMode_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasDgemmStridedBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasDscal_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasDgetriBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasZgemm_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasGemmEx’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasDestroy_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasCgemv_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasCdotu_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSaxpy_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasGemmBatchedEx' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasGemmStridedBatchedEx’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasDgemm_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSgeam’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasHgemm' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasCreate_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasDgeam' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSetStream_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasSetMathMode' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSetPointerMode_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasSgemmStridedBatched' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasDotEx’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasZgemv_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasScalEx’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasZgetrfBatched' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasCdotc_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasZdotc_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasZgetriBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasSgemv_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSgetrfBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasSdot_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSgetriBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasSgemmEx' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSgemm_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasDgemv_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasHgemmStridedBatched’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasCgemmStridedBatched' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasSscal_v2’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasSgemmBatched' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasAxpyEx’
#0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to cublasZdotu_v2' #0 3395.3 /pytorch/build/lib/libtorch_cuda.so: undefined reference to
cublasZgemmStridedBatched’
#0 3395.3 collect2: error: ld returned 1 exit status
#0 3395.3 [5556/5977] Linking CXX executable bin/cuda_stream_test
#0 3395.3 FAILED: bin/cuda_stream_test
Minified repro
This is my dockerfile:
FROM nvcr.io/nvidia/cuda:10.2-cudnn8-devel-ubuntu18.04
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH
ENV PYTORCH_BUILD_VERSION=1.8.1+cu102
ENV PYTORCH_BUILD_NUMBER=1
RUN apt-get update && apt-get install -y --no-install-recommends
build-essential
cmake
git
curl
ca-certificates
libjpeg-dev
libpng-dev
&& rm -rf /var/lib/apt/lists/*
RUN curl -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh &&
chmod +x ~/miniconda.sh &&
~/miniconda.sh -b -p /opt/conda &&
rm ~/miniconda.sh &&
/opt/conda/bin/conda clean -ya
RUN conda create -n pytorch python=3.9.20 && conda clean -ya
ENV CONDA_DEFAULT_ENV=pytorch
ENV PATH /opt/conda/envs/pytorch/bin:$PATH
RUN echo “source activate pytorch” > ~/.bashrc
ENV BASH_ENV ~/.bashrc
ENV ENV ~/.bashrc
RUN conda install -y numpy=1.23 ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses
COPY pytorch-v1.8.1.tar.gz /pytorch-v1.8.1.tar.gz
WORKDIR /
RUN tar xvzf pytorch-v1.8.1.tar.gz
WORKDIR /pytorch
RUN git submodule sync && git submodule update --init --recursive
ENV TORCH_CUDA_ARCH_LIST=“6.0;6.1;7.0;7.5”
ENV USE_CUDA=1
ENV USE_CUDNN=1
ENV CMAKE_PREFIX_PATH=“$(dirname $(which conda))/…/”
ENV USE_STATIC_CUDA=ON
RUN ln -s /usr/lib/x86_64-linux-gnu/libcublasLt_static.a /usr/local/cuda/lib64/libcublasLt_static.a && ln -s /usr/lib/x86_64-linux-gnu/libcublasLt_static.a /usr/local/cuda-10.2/lib64/libcublas_static.a
RUN python setup.py bdist_wheel
Versions
this is the host ENV:
OS: CentOS Linux release 7.7.1908 (Core) (x86_64)
GCC version: (Anaconda gcc) 11.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.17
Python version: 3.9.20 (main, Oct 3 2024, 07:27:41) [GCC 11.2.0] (64-bit runtime)
Python platform: Linux-3.10.0-1062.12.1.el7.x86_64-x86_64-with-glibc2.17
Is CUDA available: N/A
CUDA runtime version: 10.2.89
CUDA_MODULE_LOADING set to: N/A
GPU models and configuration:
GPU 0: Tesla V100S-PCIE-32GB
GPU 1: Tesla V100S-PCIE-32GB