Caffe2 error because it requires cuDNN 7 and above but I have loaded cudnn 8.0.5

Hi All,

I am trying to use Pytorch for my application, which uses kokkos as well as pytorch. I am using cmake to build my application and I am hitting the following error

cmake -Dref_data=14 -DKokkos_ROOT=/global/homes/n/namehta4/kokkos/install_cuda/lib64/cmake/Kokkos -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=/global/homes/n/namehta4/kokkos/install_cuda/bin/nvcc_wrapper ../
-- The C compiler identification is GNU 7.4.1
-- The CXX compiler identification is GNU 7.4.1
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/gcc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /global/homes/n/namehta4/kokkos/install_cuda/bin/nvcc_wrapper - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Enabled Kokkos devices: CUDA;SERIAL
CMake Warning at /global/homes/n/namehta4/kokkos/install_cuda/lib64/cmake/Kokkos/KokkosConfigCommon.cmake:29 (MESSAGE):
  The installed Kokkos configuration does not support CXX extensions.
  Forcing -DCMAKE_CXX_EXTENSIONS=Off
Call Stack (most recent call first):
  /global/homes/n/namehta4/kokkos/install_cuda/lib64/cmake/Kokkos/KokkosConfig.cmake:57 (INCLUDE)
  CMakeLists.txt:6 (find_package)


-- Looking for pthread.h
-- Looking for pthread.h - found
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE  
-- Found CUDA: /usr/common/software/sles15_cgpu/cuda/11.0.3 (found version "11.0") 
-- Caffe2: CUDA detected: 11.0
-- Caffe2: CUDA nvcc is: /usr/common/software/sles15_cgpu/cuda/11.0.3/bin/nvcc
-- Caffe2: CUDA toolkit directory: /usr/common/software/sles15_cgpu/cuda/11.0.3
-- Caffe2: Header version is: 11.0
-- Found CUDNN: /usr/common/software/sles15_cgpu/cudnn/8.0.5/cuda/11.0.3/lib64/libcudnn.so  
-- Found cuDNN: v?  (include: /usr/common/software/sles15_cgpu/cudnn/8.0.5/cuda/11.0.3/include, library: /usr/common/software/sles15_cgpu/cudnn/8.0.5/cuda/11.0.3/lib64/libcudnn.so)
CMake Error at /global/homes/n/namehta4/libtorch/share/cmake/Caffe2/public/cuda.cmake:170 (message):
  PyTorch requires cuDNN 7 and above.
Call Stack (most recent call first):
  /global/homes/n/namehta4/libtorch/share/cmake/Caffe2/Caffe2Config.cmake:88 (include)
  /global/homes/n/namehta4/libtorch/share/cmake/Torch/TorchConfig.cmake:40 (find_package)
  CMakeLists.txt:7 (find_package)


-- Configuring incomplete, errors occurred!

I have the following modules listed

Currently Loaded Modulefiles:
  1) esslurm             2) cgpu/1.0            3) cmake/3.14.4        4) cuda/11.0.3         5) cudnn/8.0.5         6) pytorch/1.7.0-gpu

Is there a mistake on my end, because I have cuda/11 as well as cudnn/8.0.5 loaded and it is being recognized by cmake but not by Caffe2?
Thank you!

Edit: This is my cmake

  1 cmake_minimum_required (VERSION 3.12 FATAL_ERROR)
  2 project(TestSNAP
  3   LANGUAGES C CXX
  4   )
  5 
  6 find_package(Kokkos REQUIRED)
  7 find_package(Torch REQUIRED)
  8 find_package(Python REQUIRED COMPONENTS Interpreter Development)
  9 
 10 
 11 # don't allow in-source builds
 12 if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
 13   message(STATUS "Warning! Building from the source directory is not allow")
 14   message(STATUS "Remove 'CMakeCache.txt' and 'CMakeFiles' and build from a separate directory")
 15   message(ERROR "In-source build")
 16 endif()
 17 
 18 SET(MyTarget test_snap)
 19 
 20 message(STATUS "CMAKE_SOURCE_DIR = ${CMAKE_SOURCE_DIR}")
 21 FILE(GLOB sources
 22   ${CMAKE_SOURCE_DIR}/src/*.cpp
 23   ${CMAKE_SOURCE_DIR}/src/*.h
 24   )
 25 
 26 add_executable(${MyTarget} ${sources})
 27 
 28 target_compile_features(${MyTarget} PUBLIC cxx_std_17)
 29 set_target_properties(${MyTarget} PROPERTIES
 30         CXX_EXTENSIONS OFF
 31         CXX_STANDARD_REQUIRED ON
 32 )
 33 ADD_COMPILE_DEFINITIONS(REFDATA_TWOJ=${ref_data})
 34 
 35 target_include_directories(${MyTarget} PRIVATE -DCUSTOM_SYS_PATH="${TestSNAP_SOURCE_DIR}/include")
 36 target_include_directories(${MyTarget} PRIVATE $<BUILD_INTERFACE:${TestSNAP_SOURCE_DIR}/include>)
 37 target_include_directories(${MyTarget} SYSTEM PRIVATE ${PYTHON_INCLUDE_DIRS})
 38 target_link_libraries(${MyTarget} PRIVATE Kokkos::kokkos)
 39 target_link_libraries(${MyTarget} PRIVATE "${TORCH_LIBRARIES}")
 40 target_link_libraries(${MyTarget} PRIVATE Python::Python)

Which PyTorch branch are you using? Are you trying to build from the current master or an older version?

Hi Ptrblck,

I am not sure. I am using the one on CORI system at NERSC. I can ask them and check but I do believe it must be the latest package. Thank you!

Edit: It is stable pytorch v1.7.0 installed via conda

Based on the error and output it seems you are trying to build a library/package from source, so I don’t understand the “pytorch v1.7.0 installed via conda” statement.
If you’ve installed the PyTorch binaries via conda, you wouldn’t have to rebuild anything.

My apologies. I am also using the libtorch libraries and cmake was looking for them in the wrong directory.
I am sorry for the inconvenice. Thank you!

For the record, this specific v? issue with cuDNN 8 was fixed by Initial support for building on Ampere GPU, CUDA 11, cuDNN 8 by zasdfgbnm · Pull Request #39277 · pytorch/pytorch · GitHub