Hi glaringlee,
Thanks a lot for your answer !
I was working on your suggestions, but I couldn’t figure out how to free up all the GPU memory that was allocated after my program ran.
I was trying with several ways . I began trying to release the nn::Conv2d memory . Then I tried with a Tensor , a partial memory was released , I use the c10::cuda::CUDACachingAllocator::emptyCache() and nothing changed .
Here my testing code :
I commented and uncommented several parts of the code trying to find the best way .
I was trying to find the way to obtain all the Tensors of the Module and it didn’t work using the cudaFree with the Tensor’ data . Perhaps my approach was not the best.
If you or someone could give me some simple code that works for a module like nn :: Conv2d and / or a Tensor it would be great!
Here my testing code :
int gpu_id = 0;
auto device = torch::Device(torch::kCUDA, gpu_id);
///// TRYING TO RELEASE A SIMPLE TENSOR ////
///// GPU MEMORY : 0.7 GB
///// DEDICATED GPU MEMORY : 0.6 GB
int rows = 10000;
int colums = 10000;
int channels = 3;
float * tensorDataPtr = new float[rows*colums*channels];
auto tensorCreated = torch::from_blob(tensorDataPtr, { rows,colums,channels }, c10::TensorOptions().dtype(torch::kFloat32))/*.to(torch::kCUDA)*/;
tensorCreated = tensorCreated.to(device);
///// GPU MEMORY : 2.3 GB
///// DEDICATED GPU MEMORY : 2.2 GB
cudaFree(tensorCreated.data_ptr());
///// GPU MEMORY : 1.2 GB
///// DEDICATED GPU MEMORY : 1.1 GB
c10::cuda::CUDACachingAllocator::resetAccumulatedStats(gpu_id);
c10::cuda::CUDACachingAllocator::resetPeakStats(gpu_id);
c10::cuda::CUDACachingAllocator::emptyCache();
///////////////////////////////////////////
///// GPU MEMORY : 1.2 GB
///// DEDICATED GPU MEMORY : 1.1 GB
int c_in = 1000;
int c_out = 1000;
auto conv2d = nn::Conv2d(nn::Conv2dOptions(c_in, c_out, torch::ExpandingArray<2>(3)).stride(torch::ExpandingArray<2>(2)).padding(torch::ExpandingArray<2>(1)).bias(false));
conv2d->to(device);
//cudaFree(conv2d->weight.storage().data());
//cudaFree(conv2d->bias.storage().data());
std::vector<at::Tensor> tensors = conv2d->parameters(true);
for (size_t i = 0; i < tensors.size() ; i++)
{
auto tensor = tensors[i];
auto storage = tensor.storage();
auto data = storage.data();
cudaFree(data);
}
tensors = conv2d->buffers(true);
for (size_t i = 0; i < tensors.size(); i++)
{
auto tensor = tensors[i];
auto storage = tensor.storage();
auto data = storage.data();
cudaFree(data);
}
//auto namedBuffers = conv2d->named_buffers(true);
//
//for (size_t i = 0; i < namedBuffers.size(); i++)
//{
// auto tensor = namedBuffers[i]->values();
// auto storage = tensor.storage();
// auto data = storage.data();
// cudaFree(data);
//}
//auto named_parameters = conv2d->named_parameters(true);
//
//for (size_t i = 0; i < named_parameters.size(); i++)
//{
// auto tensor = named_parameters[i]->values();
// auto storage = tensor.storage();
// auto data = storage.data();
// cudaFree(data);
//}