Memory problem trianing libtorch c++

Hi, I’m going to have a training phase for the folder. I have photos of a person’s face. I am also going to write according to the model I already have so that I can get a Jason file. My goal is to do this phase in C ++ using libtorch so that I can later use this training phase in the C # application to identify the user’s face, but unfortunately I wrote this code that consumes a lot of memory, can anyone help me? Modify this code or maybe suggest a good way.

#pragma region Library

#define _HAS_STD_BYTE 0

#include <stdio.h>
#include "facedetectcnn.h"
#include <exception>
#include <math.h>
#include <conio.h>

#include <memory>


#include <nlohmann/json.hpp>

#include <string>

#include <cstdlib> 
#include <ctime> 

#include <math.h>

#include <stdexcept>
#include <fstream>

#include <windows.h> // WinApi header 

#include"safe_queue .h"

#pragma endregion

#pragma region Variables
using json = nlohmann::json;
//using namespace std;
using namespace cv;
using torch::jit::Module;
const float PI = 3.1415;
std::string FaceBankPath = "D:/Project/libfacedetection/example/facebank.json";
std::string FaceModulePath = "D:/Project/libfacedetection/example/";
json FaceJSon;
torch::jit::Module FaceModule;

//define the buffer size. Do not change the size!
#define DETECT_BUFFER_SIZE 0x20000

#pragma endregion

#pragma region Calc
	std::vector<double>l2_norm(std::vector<double> const& u) {
		double accum = 0.;
		for (int i = 0; i < u.size(); ++i) {
			accum += u[i] * u[i];
		auto norm = sqrt(accum);

		std::vector<double> retVec;
		for (int i = 0; i < u.size(); i++) {
			double temp = u[i] / norm;

		return retVec;

at::Tensor l2_norm(at::Tensor input) {
	at::Tensor norm;
	at::Tensor retVec;
	norm = torch::norm(input, 2);
	retVec = torch::div(input, norm);
	return retVec;

double distance(std::vector<double> vec1, std::vector<double> vec2) {
	double temp = 0;
	double total = 0;
	auto dataSize = std::size(vec1);
	for (int i = 0; i < dataSize; i++) {
		temp = (std::abs(vec1[i] - vec2[i]));
		temp = pow(temp, 2);
		total += temp;

	return sqrt(total);

//Calculate the distance between two eyes
class distanceCalculate {

	double myMethod(double A, double B, double C, double D);

double distanceCalculate::myMethod(double A, double B, double C, double D) {
	double dist;
	double a;
	double b;
	a = A - C;
	b = B - D;

	dist = pow(a, 2) + pow(b, 2);
	dist = sqrt(dist);

	return dist;
distanceCalculate myObj;
#pragma endregion

torch::Tensor ProcessFrame(Mat image_input)//int width, int height, unsigned char* data)
	int k = 0;

	Mat result_image;

	result_image = image_input.clone();


		Mat flipimage;
		flip(result_image, flipimage, 1);

		torch::Tensor img_tensor = torch::from_blob(, { result_image.rows,result_image.cols ,3 }, torch::kByte);
		torch::Tensor img_tensor_flip = torch::from_blob(, { flipimage.rows, flipimage.cols, 3 }, torch::kByte);


		img_tensor =;
		img_tensor = img_tensor.sub_(0.5);
		img_tensor = img_tensor.permute({ 0,3,1,2 });

		img_tensor_flip =;
		img_tensor_flip = img_tensor_flip.sub_(0.5);
		img_tensor_flip = img_tensor_flip.permute({ 0,3,1,2 });

		at::Tensor output_org = FaceModule.forward({ img_tensor }).toTensor();
		at::Tensor output_flip = FaceModule.forward({ img_tensor_flip }).toTensor();

		at::Tensor out;

		out = l2_norm(output_org + output_flip);

		return out;


	catch (const std::exception& e)
		std::cout << e.what();


std::vector<std::string> get_filenames(std::filesystem::path path)
	namespace stdfs = std::filesystem;

	std::vector<std::string> filenames;

	const stdfs::directory_iterator end{};

	for (stdfs::directory_iterator iter{ path }; iter != end; ++iter)

		if (stdfs::is_regular_file(*iter)) // comment out if all names (names of directories tc.) are required
			std::string file = iter->path().string();


	return filenames;

std::vector<std::filesystem::path> get_folderPath(std::filesystem::path path)
	namespace stdfs = std::filesystem;

	std::vector<std::filesystem::path> foldersPath;

	const stdfs::directory_iterator end{};

	for (stdfs::directory_iterator iter{ path }; iter != end; ++iter)

		if (stdfs::is_directory(*iter)) // comment out if all names (names of directories tc.) are required


	return foldersPath;

int main()
	FaceModule = torch::jit::load(FaceModulePath);
	std::cout << " loadding face module " << "\n";
	std::vector<at::Tensor> embeddings;
	for (const auto& name : get_folderPath("D:/Facebank"))
		torch::AutoGradMode enable_grad(false);
		std::cout << "name :" << name.filename() << ": " << '\n';

		std::vector<torch::Tensor> embs;
		int a = 0;
		for (const auto& i : get_filenames(name))

			//std::cout <<'\t' << i << std::endl;

			Mat image = imread(i);
			cv::namedWindow("Display window");// Create a window for display.
			cv::imshow("Display window", image);

			std::cout << '\t' << a++ << std::endl;

		torch::TensorList tensor_list = torch::TensorList(embs);
		torch::Tensor embedding;
		//torch::TensorList tensor_list{ embs };
		embedding = torch::cat(tensor_list).mean();

	torch::Tensor embeded;
	//torch::TensorList tensor_list{ embeddings };
	torch::TensorList tensor_list = torch::TensorList(embeddings);
	embeded = torch::cat(tensor_list);
	std::cout << "rezvan's last vector is :  " << embeded.values().toString() << '\n';
	//for (const auto& name : embeddings) std::cout << name.toString() <<std::endl;
	return 0;