Good day!
I’m just starting to get used to PyTorch and I just can’t figure out how to work with the result of the generated model.
Task: using C / C ++ and openCV to search for key points on faces. To find faces, I use the dnn module of the opencv library with the caffe model.
std::vector<cv::Point> MainClass::face_detector(cv::Mat src, cv::dnn::Net &net)
{
std::vector <cv::Point> res;
if (src.empty() || net.empty())
{
std::cerr << "No image or model empty!" << std::endl;
return res;
}
cv::Mat blob = cv::dnn::blobFromImage(src,1.0,cv::Size(300,300),cv::Scalar(104.0,177.0,123.0),false,false);
net.setInput(blob,"data");
cv::Mat detection = net.forward("detection_out");
cv::Mat detection_matrix(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
for (int i = 0; i < detection_matrix.rows; i++)
{
double confidence = detection_matrix.at<float>(i, 2);
if (confidence < 0.7) continue;
int left = static_cast<int>(detection_matrix.at<float>(i, 3) * src.cols);
int top = static_cast<int>(detection_matrix.at<float>(i, 4) * src.rows);
res.push_back(cv::Point(left,top));
int right = static_cast<int>(detection_matrix.at<float>(i, 5) * src.cols);
int bottom = static_cast<int>(detection_matrix.at<float>(i, 6) * src.rows);
res.push_back(cv::Point(right,bottom));
}
return res;
}
bool MainClass::prepare(std::string path2img,cv::Mat &img, cv::dnn::Net &net,std::vector<cv::Point> &face_areas)
{
img = cv::imread(path2img);
if (img.empty())
{
std::cerr << "Image not open!" << std::endl;
return false;
}
if (!fillCaffe(net,"..//models//caffe_faces//faces_deploy_caffe.prototxt","..//models//caffe_faces//faces_caffe.caffemodel")) return false;
face_areas.clear();
unsigned long start = clock();
face_areas = face_detector(img,net);
int size = face_areas.size();
if (size < 1)
{
std::cerr << "No faces on photo" << std::endl;
return false;
}
std::cout << "Time of PreProcessing" << (clock() - start)/(double)(CLOCKS_PER_SEC) << std::endl;
return true;
}
Then I use the MobileNetV2 (56x56) model from here (https://github.com/cunjian/pytorch_face_landmark) to find keypoints on the found face. For porting to C, I prepared it in advance and got a 5.2 MB * .pt fIle:
void MainClass::on_pushButton_3_clicked()
https://github.com/cunjian/pytorch_face_landmark
{
cv::Mat img;
cv::dnn::Net net;
std::vector<cv::Point> face_areas;
if (!prepare("photo.jpg",img,net,face_areas)) return;
torch::jit::script::Module model = torch::jit::load("../models/MobileFaceNet.pt");
cv::Point *area_ptr = face_areas.data();
int size = face_areas.size();
torch::NoGradGuard no_grad;
for (int i = 0; i < size; i+=2,area_ptr+=2)
{
unsigned long start_image = clock();
cv::Mat cropped_face = getCroppedFace_mobileNet(img,area_ptr[0],area_ptr[1],112);
cv::imwrite("cropped"+std::to_string(i)+".png",cropped_face);
if (cropped_face.empty()) continue;
unsigned long start_net = clock();
//from: https://discuss.pytorch.org/t/how-to-convert-an-opencv-image-into-libtorch-tensor/90818/2
at::Tensor tensor = ToTensor(cropped_face).cpu();
tensor = tensor.clamp_max(c10::Scalar(50));
tensor = tensor.toType(c10::kFloat).div(255.0);
tensor = tensor.permute({ 2,0,1 } );
tensor = tensor.unsqueeze_(0);
std::vector<torch::jit::IValue> input;
input.push_back(tensor);
auto output = model.forward(input).toTuple(); //https://github.com/pytorch/pytorch/issues/22440
at::Tensor out_tensor = output->elements()[0].toTensor();
}
}
As a result, I get a tensor, the second dimension of which is 3 (tensor .size (2)). How do I get 68 dots from this tensor? Am I doing everything right?