Memory problem trianing libtorch c++

Hi, I’m going to have a training phase for the folder. I have photos of a person’s face. I am also going to write according to the model I already have so that I can get a Jason file. My goal is to do this phase in C ++ using libtorch so that I can later use this training phase in the C # application to identify the user’s face, but unfortunately I wrote this code that consumes a lot of memory, can anyone help me? Modify this code or maybe suggest a good way.

#pragma region Library


#define _HAS_STD_BYTE 0

#include<torch/torch.h>
#include<torch/script.h>
#include<iostream>
#include <stdio.h>
#include<opencv2/opencv.hpp>
#include "facedetectcnn.h"
#include <exception>
#include <math.h>
#include <conio.h>
#include<vector>

#include <memory>

#include<opencv2/core/core.hpp>
#include<opencv2/highgui.hpp>

#include <nlohmann/json.hpp>

#include <string>


#include <cstdlib> 
#include <ctime> 

#include <math.h>

#include <stdexcept>
#include<map>
#include <fstream>

#include <windows.h> // WinApi header 

#include<comdef.h>
#include<atlsafe.h>
#include"safe_queue .h"
#include<filesystem>

#pragma endregion

#pragma region Variables
using json = nlohmann::json;
//using namespace std;
using namespace cv;
using torch::jit::Module;
const float PI = 3.1415;
std::string FaceBankPath = "D:/Project/libfacedetection/example/facebank.json";
std::string FaceModulePath = "D:/Project/libfacedetection/example/converted.pt";
json FaceJSon;
torch::jit::Module FaceModule;





//define the buffer size. Do not change the size!
#define DETECT_BUFFER_SIZE 0x20000

#pragma endregion

#pragma region Calc
	/*
	std::vector<double>l2_norm(std::vector<double> const& u) {
		double accum = 0.;
		for (int i = 0; i < u.size(); ++i) {
			accum += u[i] * u[i];
		}
		auto norm = sqrt(accum);

		std::vector<double> retVec;
		for (int i = 0; i < u.size(); i++) {
			double temp = u[i] / norm;
			retVec.push_back(temp);
		}

		return retVec;
	}
	*/

at::Tensor l2_norm(at::Tensor input) {
	at::Tensor norm;
	at::Tensor retVec;
	norm = torch::norm(input, 2);
	retVec = torch::div(input, norm);
	return retVec;
}


double distance(std::vector<double> vec1, std::vector<double> vec2) {
	double temp = 0;
	double total = 0;
	auto dataSize = std::size(vec1);
	for (int i = 0; i < dataSize; i++) {
		temp = (std::abs(vec1[i] - vec2[i]));
		temp = pow(temp, 2);
		total += temp;
	}

	return sqrt(total);
}

//Calculate the distance between two eyes
class distanceCalculate {

public:
	double myMethod(double A, double B, double C, double D);
};

double distanceCalculate::myMethod(double A, double B, double C, double D) {
	double dist;
	double a;
	double b;
	a = A - C;
	b = B - D;

	dist = pow(a, 2) + pow(b, 2);
	dist = sqrt(dist);

	return dist;
}
distanceCalculate myObj;
#pragma endregion






torch::Tensor ProcessFrame(Mat image_input)//int width, int height, unsigned char* data)
{
	int k = 0;

	Mat result_image;

	result_image = image_input.clone();

	try
	{

		Mat flipimage;
		flip(result_image, flipimage, 1);

		torch::Tensor img_tensor = torch::from_blob(result_image.data, { result_image.rows,result_image.cols ,3 }, torch::kByte);
		torch::Tensor img_tensor_flip = torch::from_blob(flipimage.data, { flipimage.rows, flipimage.cols, 3 }, torch::kByte);

		result_image.release();
		flipimage.release();

		img_tensor = img_tensor.to(at::kFloat).div(255).unsqueeze(0);
		img_tensor = img_tensor.sub_(0.5);
		img_tensor = img_tensor.permute({ 0,3,1,2 });

		img_tensor_flip = img_tensor_flip.to(at::kFloat).div(255).unsqueeze(0);
		img_tensor_flip = img_tensor_flip.sub_(0.5);
		img_tensor_flip = img_tensor_flip.permute({ 0,3,1,2 });



		at::Tensor output_org = FaceModule.forward({ img_tensor }).toTensor();
		at::Tensor output_flip = FaceModule.forward({ img_tensor_flip }).toTensor();

		at::Tensor out;


		out = l2_norm(output_org + output_flip);

		return out;

	}

	catch (const std::exception& e)
	{
		std::cout << e.what();
	}


}


std::vector<std::string> get_filenames(std::filesystem::path path)
{
	namespace stdfs = std::filesystem;

	std::vector<std::string> filenames;


	const stdfs::directory_iterator end{};

	for (stdfs::directory_iterator iter{ path }; iter != end; ++iter)
	{

		if (stdfs::is_regular_file(*iter)) // comment out if all names (names of directories tc.) are required
		{
			std::string file = iter->path().string();

			filenames.push_back(file);
		}
	}

	return filenames;
}

std::vector<std::filesystem::path> get_folderPath(std::filesystem::path path)
{
	namespace stdfs = std::filesystem;

	std::vector<std::filesystem::path> foldersPath;


	const stdfs::directory_iterator end{};

	for (stdfs::directory_iterator iter{ path }; iter != end; ++iter)
	{

		if (stdfs::is_directory(*iter)) // comment out if all names (names of directories tc.) are required
		{

			foldersPath.push_back(iter->path());
		}
	}

	return foldersPath;
}

int main()
{
	FaceModule = torch::jit::load(FaceModulePath);
	
	std::cout << " loadding face module " << "\n";
	std::vector<at::Tensor> embeddings;
	for (const auto& name : get_folderPath("D:/Facebank"))
	{
		torch::AutoGradMode enable_grad(false);
		std::cout << "name :" << name.filename() << ": " << '\n';

		std::vector<torch::Tensor> embs;
		embs.clear();
		int a = 0;
		for (const auto& i : get_filenames(name))
		{

			//std::cout <<'\t' << i << std::endl;

			Mat image = imread(i);
			/*
			cv::namedWindow("Display window");// Create a window for display.
			cv::imshow("Display window", image);
			cv::waitKey(0);
			*/

			embs.push_back(ProcessFrame(image));
			
			image.release();
			std::cout << '\t' << a++ << std::endl;
		}

		torch::TensorList tensor_list = torch::TensorList(embs);
		torch::Tensor embedding;
		//torch::TensorList tensor_list{ embs };
		embedding = torch::cat(tensor_list).mean();
		embeddings.push_back(embedding);
	}


	torch::Tensor embeded;
	//torch::TensorList tensor_list{ embeddings };
	torch::TensorList tensor_list = torch::TensorList(embeddings);
	embeded = torch::cat(tensor_list);
	std::cout << "rezvan's last vector is :  " << embeded.values().toString() << '\n';
	//for (const auto& name : embeddings) std::cout << name.toString() <<std::endl;
	return 0;
}