Hi, i tried to export mdel from pytorch to libtorch, and i found libtorch model forward time is same with pytorch(my model is resnet50), so Is there any guide to help me improve the speed of libtorch? For example, gcc flag, function considerations, and so on… and what factors will affect the running time of net->forward?
i know that Python is not necessarily slower than C++, i just want to make sure i can get the highest performance with libtorch.
This is the CMakeLists I use:
cmake_minimum_required(VERSION 3.0)
project(pvanet_release)
set(Torch_DIR "/data/home/ryankang/Workspace/pvanet/libtorch_example/pvanet_libtorch/libtorch/share/cmake/Torch/")
#set(Torch_DIR "/data/home/ryankang/Workspace/pvanet/libtorch_example/pvanet_libtorch/test_0723/ft_local/libtorch/share/cmake/Torch/")
# set(OpenCV_DIR /usr/local/opt/opencv@2/share/OpenCV)
find_package(Torch REQUIRED)
find_package(OpenCV REQUIRED)
message(STATUS "Pytorch status:")
message(STATUS " libraries: ${TORCH_LIBRARIES}")
message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
message(STATUS " libraries: ${OpenCV_LIBS}")
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_VERBOSE_MAKEFILE ON)
include_directories(${TORCH_INCLUDE_DIRS})
include_directories(${OpenCV_INCLUDE_DIRS})
add_compile_options( -fopenmp
-pthread
-msse3
-Wall
-Wextra
-Wno-unused-parameter)
set(CUDA_NVCC_FLAGS "-std=c++11;")
list(APPEND CUDA_NVCC_FLAGS "--compiler-options;-Wall;")
list(APPEND CUDA_NVCC_FLAGS "--compiler-options;-Wextra;")
list(APPEND CUDA_NVCC_FLAGS "--compiler-options;-Wno-unused-parameter;")
list(APPEND CUDA_NVCC_FLAGS "--compiler-options;-Wno-unknown-pragmas;")
set(CUDA_NVCC_FLAGS_RELEASE "-O3;-DNDEBUG;")
set(CUDA_NVCC_FLAGS_DEBUG "-g;-G;-O0;")
set(CUDA_PROPAGATE_HOST_FLAGS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pthread -D__INTRIN__ENABLED__=1 -Wpedantic -funroll-loops -ftree-vectorize -std=c++0x -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result -Wsign-compare -Wunreachable-code -fno-common -d -DNDEBUG -g -fwrapv -O3 -Wall -g -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 ")
set(SOURCE_FILES
anchors.cpp
anchors.h
boxutils.cpp
boxutils.h
config.cpp
config.h
image_utils.cpp
image_utils.h
nms.cpp
nms.h
proposal_layer.cpp
proposal_layer.h
dataset_classes.cpp
dataset_classes.h
debug.h
debug.cpp
roi_pooling.h
roi_pooling.cpp
nms/nms.h
nms/nms.cpp
nms/nms_cuda.h
nms/nms_cuda.cu
roipool/roipool_cuda.h
roipool/roipool_cuda.cu
)
cuda_add_library("${CMAKE_PROJECT_NAME}_lib" STATIC ${SOURCE_FILES})
target_link_libraries("${CMAKE_PROJECT_NAME}_lib" ${REQUIRED_LIBS})
add_executable(pvanet_release pvanet_demo.cpp ${SOURCE_FILES})
target_link_libraries(pvanet_release "${CMAKE_PROJECT_NAME}_lib" ${TORCH_LIBRARIES} ${OpenCV_LIBS})
thank you!