I am trying to build torchlib c++ shared library using cmake for using it later in python. My c++ class is as follows:
#include <torch/torch.h>
#include <torch/script.h>
// This header is what defines the custom class registration
// behavior specifically. script.h already includes this, but
// we include it here so you know it exists in case you want
// to look at the API or implementation.
#include <torch/custom_class.h>
#include <string>
#include <vector>
#include <ATen/ATen.h>
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <c10/cuda/CUDACachingAllocator.h>
template <class T>
struct MyStackClass : torch::CustomClassHolder {
std::vector<T> stack_;
torch::jit::script::Module module;
MyStackClass(std::string init) {
try {
module = torch::jit::load(init, torch::kCUDA);
module.eval();
}
catch (const c10::Error& e) {
std::cerr << "error loading the face model\n";
}
}
/* function definition */
at::Tensor get_face_feat(torch::Tensor& input_tensor) {
std::vector<torch::jit::IValue> inputs;
inputs.push_back(input_tensor); //.to(at::kCUDA));
// Execute the model and turn its output into a tensor.
at::Tensor output = module.forward(inputs).toTensor();
output = output.to(at::kCPU);
c10::cuda::CUDACachingAllocator::emptyCache();
return output;
}
};
TORCH_LIBRARY(my_classes, m) {
m.class_<MyStackClass<std::string>>("MyStackClass")
.def(torch::init<std::string>())
.def("get_face_feat", &MyStackClass<std::string>::get_face_feat)
;
}
From cmake I can build the library
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
project(custom_class)
find_package(Torch REQUIRED )
find_package( CUDA REQUIRED)
# Define our library target
add_library(custom_class SHARED class.cpp)
set(CMAKE_CXX_STANDARD 14)
# Link against LibTorch
target_link_libraries(custom_class "${TORCH_LIBRARIES}")
In python I can use the library as follows:
import torch
print(torch.__version__)
import cv2
from torchvision import transforms
import sys
# `torch.classes.load_library()` allows you to pass the path to your .so file
# to load it in and make the custom C++ classes available to both Python and
# TorchScript
torch.classes.load_library("build/libcustom_class.so")
# You can query the loaded libraries like this:
print(torch.classes.loaded_libraries)
s = torch.classes.my_classes.MyStackClass("<path_to_checkpoint.pth>")
im = cv2.imread('<path_to_image.jpg>')
transform = transforms.Compose([ ... ])
normface = cv2.resize(im, (112,112))
normface = transform(normface)
normface = torch.unsqueeze(normface, 0)
feat = s.get_face_feat(normface.to('cuda'))
Now the problem is that GPU memory is not being released after the last line feat = s.get_face_feat(normface.to(‘cuda’)) and gpu memory keeps on increasing on each call. Any guidance is appreciated.