I’ve tried a simple code in C++ to test libtorch:
int main() {
torch::Tensor out = torch::eye( { 3, 3 }, torch::device(at::kCUDA).dtype(at::kFloat) );
std::cout << out << std::endl;
return 0;
}
After some tinkering, it worked ok:
1 0 0
0 1 0
0 0 1
[ CUDAFloatType{3,3} ]
However, after trying to compile it with OpenACC, it doesnt work anymore…
#define SIZE 10000
float a[SIZE][SIZE];
float b[SIZE][SIZE];
float c[SIZE][SIZE];
int main() {
int i,j;
for (i = 0; i < SIZE; ++i) {
for (j = 0; j < SIZE; ++j) {
a[i][j] = (float)i + j;
b[i][j] = (float)i - j;
c[i][j] = 0.0f;
}
}
#pragma acc kernels copyin(a,b) copy(c)
for (i = 0; i < SIZE; ++i) {
for (j = 0; j < SIZE; ++j) {
c[i][j] = a[i][j] + b[i][j];
}
}
torch::Tensor out = torch::eye( { 3, 3 }, torch::device(at::kCUDA).dtype(at::kFloat) );
std::cout << out << std::endl;
return 0;
}
It only evaluates at CPU now.
1 0 0
0 1 0
0 0 1
[ CPUFloatType{3,3} ]
My cmake is as minimal as I could do it:
cmake_minimum_required(VERSION 3.10)
set (LANGUAGES "CXX")
project(main LANGUAGES ${LANGUAGES})
add_definitions("-DENABLE_SSE")
find_package(Torch REQUIRED)
find_package(OpenACC REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenACC_CXX_FLAGS} ${TORCH_CXX_FLAGS} ")
add_executable(main main.cpp)
target_link_libraries(main "${TORCH_LIBRARIES}")
target_link_libraries(main "${TORCH_CUDA_LIBRARIES}")
target_link_libraries(main ${OpenACC_CXX_OPTIONS})
set_property(TARGET main PROPERTY CXX_STANDARD 14)
I’m trying to expose memory from torch via cuda array interface to openacc, that’s why I’m compiling both frameworks in the same code.