Hi, I use Libtorch to deploy my project. Sometimes, I want to reload model to detect objects, then I call cudaDeviceReset
to release resources. My code like this:
//
/****************
some preprocess
****************/
#region load model and detect
// load model
m_device = torch::Device(torch::kCUDA);
m_model = std::make_unique<torch::jit::Module>(torch::jit::load(modelFile));
m_model->to(m_device);
// read images with opencv
cv::Mat mat = cv::imread("xxxx.bmp");
cvtColor(mat , mat , cv::COLOR_GRAY2RGB);
// some other process
torch::Tensor inputs = torch::from_blob(mat.data,
{ 1,imgRow,imgCol,3 }, torch::kByte);
inputs = inputs.permute({ 0, 3, 1, 2 });
inputs = inputs.toType(torch::kFloat32);
inputs = inputs.div(255);
// tensor to device(GPU)
inputs = inputs.to(m_device);
if (nullptr == m_model)
{
return -1;
}
// this works well
auto out = m_model->forward({ inputs }).toTuple();
#endregion
#region release model
// release model
if (nullptr != m_model)
{
m_model->to(kCPU);
m_model.release();
m_model = nullptr;
c10::cuda::CUDACachingAllocator::CUDAAllocator* pCudaAllocator = c10::cuda::CUDACachingAllocator::get();
if (nullptr != pCudaAllocator)
{
c10::cuda::CUDACachingAllocator::emptyCache();
}
}
cudaDeviceReset();
cudaDeviceSynchronize();
#endregion
// after release model, recall the code `load model and detect`
Well, the above code shows the main procedure, after call the code above 3 times, the m_model->forward({ inputs }).toTuple();
will throw an exception, if I donnot call cudaDeviceReset(); cudaDeviceSynchronize();
, it will works well.
I want to know why calling cudaDeviceReset
multitimes will raise an inference problem, please help me to resolve this problem, thank you very much.