Hi, recently l noticed some potential memory leak in libtorch_cpu.
I am using Libtorch_cpu 1.8.1 in linux. Here is the c++ code that I used for testing.
#include <iostream>
#include <torch/torch.h>
#include <torch/script.h>
bool f()
{
torch::NoGradGuard no_grad;
std::cout << "--------------------------------------------------------------------" << std::endl;
bool det = true;
try {
std::cout << "Engine test" << std::endl;
//torch::jit::script::Module model = torch::jit::load("/home/ion3/Downloads/retinaface-mnet-complete.pt");
torch::jit::script::Module model = torch::jit::load("/home/ion3/Downloads/arcface-r101.pt");
model.eval();
model.to(torch::kCPU);
for (int i = 0; i < 100; ++i) {
std::cout << i << std::endl;
//torch::Tensor input_tensor = torch::rand({ 1, 3, 480, 640}, torch::kFloat32).to(torch::Device(torch::kCPU));
torch::Tensor input_tensor = torch::rand({ 1, 3, 112, 112}, torch::kFloat32).to(torch::Device(torch::kCPU));
//auto outputs = model.forward({ input_tensor }).toTuple();
auto outputs = model.forward({ input_tensor });
}
}
catch (std::exception& e) {
std::cout << e.what() << std::endl;
std::cout << "--------------------------------------------------------------------\n";
return false;
}
std::cout << "--------------------------------------------------------------------\n";
return det;
}
int main(int, char**) {
bool res = f();
return res;
}
I am not sure if this is libtorch bug or not because valgrind shows
==39105== LEAK SUMMARY:
==39105== definitely lost: 0 bytes in 0 blocks
==39105== indirectly lost: 0 bytes in 0 blocks
==39105== possibly lost: 70,976 bytes in 5 blocks
==39105== still reachable: 776,523 bytes in 12,086 blocks
==39105== suppressed: 0 bytes in 0 blocks
==39105==
==39105== ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
Any opinion regarding this problem?
PS:
here is the link for the full valgrind report. I have tested with 2 different model.
https://drive.google.com/file/d/1qHvIgSFTMzPLZH9Zxbyi4PpIAS57mDWw/view?usp=sharing