Hi. I got some weird issue about pthread_setaffinity_np and Tensor allocation.
Hardware
Intel(R) Xeon(R) CPU Gold 5218 x 2
Nvidia Titan RTX
Software
Ubuntu 20.04
Pytorch v1.9.0
CUDA 11.3
CUDNN 8.2.1
Below is my program code snippet.
#include <torch/script.h>
#include <torch/cuda.h>
#include <chrono>
#include <iostream>
#include <thread>
#include <pthread.h>
#include <cuda_runtime_api.h>
void create()
{
auto options = torch::TensorOptions();
while(true) {
std::vector<torch::jit::IValue> inputs;
long start_time = now(); // now() is my function that use chrono library and steady_clock
auto tensor= torch::ones({1, 3, 224, 224}, torch::TensorOptions().pinned_memory(true));
long end_time = now();
std::cout << (end_time - start_time) << std::endl;
}
}
int main(int argc, char* argv[])
{
std::thread crt(create);
// cpu_set_t cpuset;
// CPU_ZERO(&cpuset);
// CPU_SET(0, &cpuset);
//
// int s = pthread_setaffinity_np(crt.native_handle(), sizeof(cpu_set_t), &cpuset);
crt.join();
return 0;
}
When i run this program, the time for creating tensor is under 1ms. (almost 0.01ms)
But, if i remove // and make that code lines work, the time result increases extreamely. (60ms ~ 100ms and more)
I also use under command. That is normal.
taskset -c 0 ./program
Can you give me some advice for this issue?