├── README.md
├── cpp
    ├── face68landmarks.cpp
    ├── face68landmarks.h
    ├── faceenhancer.cpp
    ├── faceenhancer.h
    ├── facerecognizer.cpp
    ├── facerecognizer.h
    ├── faceswap.cpp
    ├── faceswap.h
    ├── images
    │   ├── 1.jpg
    │   ├── 14.jpg
    │   ├── 2.jpg
    │   ├── 5.jpg
    │   ├── source.jpg
    │   └── target.jpg
    ├── main.cpp
    ├── model_matrix.bin
    ├── utils.cpp
    ├── utils.h
    ├── yolov8face.cpp
    └── yolov8face.h
└── python
    ├── face_68landmarks.py
    ├── face_enhancer.py
    ├── face_recognizer.py
    ├── face_swap.py
    ├── images
        ├── 1.jpg
        ├── 14.jpg
        ├── 2.jpg
        ├── 5.jpg
        ├── source.jpg
        └── target.jpg
    ├── main.py
    ├── model_matrix.npy
    ├── utils.py
    └── yolov8face.py


/README.md:
--------------------------------------------------------------------------------
1 | 本套程序是对最近火热的换脸程序 https://github.com/facefusion/facefusion 
2 | 的剥离。在官方程序里，定义了太多的.py文件和函数模块，模块之间的嵌套太复杂，
3 | 因此我重新编写了Python程序，我的程序里只有7个.py文件，换脸程序里一共包含5个模块，
4 | 除去main.py和utils.py文件，在我的程序里每个模块对应一个.py文件，接着我编写了C++程序。
5 | 
6 | onnx文件在百度云盘，链接：https://pan.baidu.com/s/12Fw2lqkhxWD5Xbk5A2Q2YQ 
7 | 提取码：sz78
8 | 


--------------------------------------------------------------------------------
/cpp/face68landmarks.cpp:
--------------------------------------------------------------------------------
  1 | #include "face68landmarks.h"
  2 | 
  3 | using namespace cv;
  4 | using namespace std;
  5 | using namespace Ort;
  6 | 
  7 | Face68Landmarks::Face68Landmarks(string model_path)
  8 | {
  9 |     /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);   ///如果使用cuda加速，需要取消注释
 10 | 
 11 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 12 |     /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end());  ////windows写法
 13 |     /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法
 14 |     ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法
 15 | 
 16 |     size_t numInputNodes = ort_session->GetInputCount();
 17 |     size_t numOutputNodes = ort_session->GetOutputCount();
 18 |     AllocatorWithDefaultOptions allocator;
 19 |     for (int i = 0; i < numInputNodes; i++)
 20 |     {
 21 |         input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数
 22 |         ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator);  /// 高版本onnxruntime的接口函数
 23 |         ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 24 |         Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
 25 |         auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
 26 |         auto input_dims = input_tensor_info.GetShape();
 27 |         input_node_dims.push_back(input_dims);
 28 |     }
 29 |     for (int i = 0; i < numOutputNodes; i++)
 30 |     {
 31 |         output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数
 32 |         ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator);
 33 |         ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 34 |         Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
 35 |         auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
 36 |         auto output_dims = output_tensor_info.GetShape();
 37 |         output_node_dims.push_back(output_dims);
 38 |     }
 39 | 
 40 |     this->input_height = input_node_dims[0][2];
 41 |     this->input_width = input_node_dims[0][3];
 42 | }
 43 | 
 44 | void Face68Landmarks::preprocess(Mat srcimg, const Bbox bounding_box)
 45 | {
 46 |     float sub_max = max(bounding_box.xmax - bounding_box.xmin, bounding_box.ymax - bounding_box.ymin);
 47 |     const float scale = 195.f / sub_max;
 48 |     const float translation[2] = {(256.f - (bounding_box.xmax + bounding_box.xmin) * scale) * 0.5f, (256.f - (bounding_box.ymax + bounding_box.ymin) * scale) * 0.5f};
 49 |     ////python程序里的warp_face_by_translation函数////
 50 |     Mat affine_matrix = (Mat_<float>(2, 3) << scale, 0.f, translation[0], 0.f, scale, translation[1]);
 51 |     Mat crop_img;
 52 |     warpAffine(srcimg, crop_img, affine_matrix, Size(256, 256));
 53 |     ////python程序里的warp_face_by_translation函数////
 54 |     cv::invertAffineTransform(affine_matrix, this->inv_affine_matrix);
 55 | 
 56 |     vector<cv::Mat> bgrChannels(3);
 57 |     split(crop_img, bgrChannels);
 58 |     for (int c = 0; c < 3; c++)
 59 |     {
 60 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0);
 61 |     }
 62 | 
 63 |     const int image_area = this->input_height * this->input_width;
 64 |     this->input_image.resize(3 * image_area);
 65 |     size_t single_chn_size = image_area * sizeof(float);
 66 |     memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size);
 67 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 68 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
 69 | }
 70 | 
 71 | vector<Point2f> Face68Landmarks::detect(Mat srcimg, const Bbox bounding_box, vector<Point2f> &face_landmark_5of68)
 72 | {
 73 |     this->preprocess(srcimg, bounding_box);
 74 | 
 75 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_height, this->input_width};
 76 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 77 | 
 78 |     Ort::RunOptions runOptions;
 79 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size());
 80 | 
 81 |     float *pdata = ort_outputs[0].GetTensorMutableData<float>(); /// 形状是(1, 68, 3), 每一行的长度是3，表示一个关键点坐标x,y和置信度
 82 |     const int num_points = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1];
 83 |     vector<Point2f> face_landmark_68(num_points);
 84 |     for (int i = 0; i < num_points; i++)
 85 |     {
 86 |         float x = pdata[i * 3] / 64.0 * 256.0;
 87 |         float y = pdata[i * 3 + 1] / 64.0 * 256.0;
 88 |         face_landmark_68[i] = Point2f(x, y);
 89 |     }
 90 |     vector<Point2f> face68landmarks;
 91 |     cv::transform(face_landmark_68, face68landmarks, this->inv_affine_matrix);
 92 | 
 93 |     ////python程序里的convert_face_landmark_68_to_5函数////
 94 |     face_landmark_5of68.resize(5);
 95 |     float x = 0, y = 0;
 96 |     for (int i = 36; i < 42; i++) /// left_eye
 97 |     {
 98 |         x += face68landmarks[i].x;
 99 |         y += face68landmarks[i].y;
100 |     }
101 |     x /= 6;
102 |     y /= 6;
103 |     face_landmark_5of68[0] = Point2f(x, y); /// left_eye
104 | 
105 |     x = 0, y = 0;
106 |     for (int i = 42; i < 48; i++) /// right_eye
107 |     {
108 |         x += face68landmarks[i].x;
109 |         y += face68landmarks[i].y;
110 |     }
111 |     x /= 6;
112 |     y /= 6;
113 |     face_landmark_5of68[1] = Point2f(x, y); /// right_eye
114 | 
115 |     face_landmark_5of68[2] = face68landmarks[30]; /// nose
116 |     face_landmark_5of68[3] = face68landmarks[48]; /// left_mouth_end
117 |     face_landmark_5of68[4] = face68landmarks[54]; /// right_mouth_end
118 |     ////python程序里的convert_face_landmark_68_to_5函数////
119 |     return face68landmarks;
120 | }


--------------------------------------------------------------------------------
/cpp/face68landmarks.h:
--------------------------------------------------------------------------------
 1 | # ifndef DETECT_FACE68LANDMARKS
 2 | # define DETECT_FACE68LANDMARKS
 3 | #include <fstream>
 4 | #include <sstream>
 5 | #include <opencv2/imgproc.hpp>
 6 | #include <opencv2/highgui.hpp>
 7 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
 8 | #include <onnxruntime_cxx_api.h>
 9 | #include"utils.h"
10 | 
11 | 
12 | class Face68Landmarks
13 | {
14 | public:
15 | 	Face68Landmarks(std::string modelpath);
16 | 	std::vector<cv::Point2f> detect(cv::Mat srcimg, const Bbox bounding_box, std::vector<cv::Point2f> &face_landmark_5of68);
17 | private:
18 | 	void preprocess(cv::Mat img, const Bbox bounding_box);
19 | 	std::vector<float> input_image;
20 | 	int input_height;
21 | 	int input_width;
22 |     cv::Mat inv_affine_matrix;
23 | 
24 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "68FaceLandMarks Detect");
25 | 	Ort::Session *ort_session = nullptr;
26 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
27 | 	std::vector<char*> input_names;
28 | 	std::vector<char*> output_names;
29 | 	std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
30 | 	std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
31 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
32 | };
33 | #endif


--------------------------------------------------------------------------------
/cpp/faceenhancer.cpp:
--------------------------------------------------------------------------------
  1 | #include"faceenhancer.h"
  2 | 
  3 | using namespace cv;
  4 | using namespace std;
  5 | using namespace Ort;
  6 | 
  7 | FaceEnhance::FaceEnhance(string model_path)
  8 | {
  9 |     /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);   ///如果使用cuda加速，需要取消注释
 10 | 
 11 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 12 |     /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end());  ////windows写法
 13 |     /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法
 14 |     ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法
 15 | 
 16 |     size_t numInputNodes = ort_session->GetInputCount();
 17 |     size_t numOutputNodes = ort_session->GetOutputCount();
 18 |     AllocatorWithDefaultOptions allocator;
 19 |     for (int i = 0; i < numInputNodes; i++)
 20 |     {
 21 |         input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数
 22 |         ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator);  /// 高版本onnxruntime的接口函数
 23 |         ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 24 |         Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
 25 |         auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
 26 |         auto input_dims = input_tensor_info.GetShape();
 27 |         input_node_dims.push_back(input_dims);
 28 |     }
 29 |     for (int i = 0; i < numOutputNodes; i++)
 30 |     {
 31 |         output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数
 32 |         ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator);
 33 |         ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 34 |         Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
 35 |         auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
 36 |         auto output_dims = output_tensor_info.GetShape();
 37 |         output_node_dims.push_back(output_dims);
 38 |     }
 39 | 
 40 |     this->input_height = input_node_dims[0][2];
 41 |     this->input_width = input_node_dims[0][3];
 42 | 
 43 |     ////在这里就直接定义了，没有像python程序里的那样normed_template = TEMPLATES.get(template) * crop_size
 44 |     this->normed_template.emplace_back(Point2f(192.98138112, 239.94707968));
 45 |     this->normed_template.emplace_back(Point2f(318.90276864, 240.19360256));
 46 |     this->normed_template.emplace_back(Point2f(256.63415808, 314.01934848));
 47 |     this->normed_template.emplace_back(Point2f(201.26116864, 371.410432));
 48 |     this->normed_template.emplace_back(Point2f(313.0890496,  371.1511808));
 49 | }
 50 | 
 51 | void FaceEnhance::preprocess(Mat srcimg, const vector<Point2f> face_landmark_5, Mat& affine_matrix, Mat& box_mask)
 52 | {
 53 |     Mat crop_img;
 54 |     affine_matrix = warp_face_by_face_landmark_5(srcimg, crop_img, face_landmark_5, this->normed_template, Size(512, 512));
 55 |     const int crop_size[2] = {crop_img.cols, crop_img.rows};
 56 |     box_mask = create_static_box_mask(crop_size, this->FACE_MASK_BLUR, this->FACE_MASK_PADDING);
 57 | 
 58 |     vector<cv::Mat> bgrChannels(3);
 59 |     split(crop_img, bgrChannels);
 60 |     for (int c = 0; c < 3; c++)
 61 |     {
 62 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / (255.0*0.5), -1.0);
 63 |     }
 64 | 
 65 |     const int image_area = this->input_height * this->input_width;
 66 |     this->input_image.resize(3 * image_area);
 67 |     size_t single_chn_size = image_area * sizeof(float);
 68 |     memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size);    ///rgb顺序
 69 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 70 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size);
 71 | }
 72 | 
 73 | Mat FaceEnhance::process(Mat target_img, const vector<Point2f> target_landmark_5)
 74 | {
 75 |     Mat affine_matrix;
 76 |     Mat box_mask;
 77 |     this->preprocess(target_img, target_landmark_5, affine_matrix, box_mask);
 78 | 
 79 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_height, this->input_width};
 80 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 81 | 
 82 |     Ort::RunOptions runOptions;
 83 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size());
 84 | 
 85 |     float* pdata = ort_outputs[0].GetTensorMutableData<float>();
 86 |     std::vector<int64_t> outs_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
 87 | 	const int out_h = outs_shape[2];
 88 | 	const int out_w = outs_shape[3];
 89 | 	const int channel_step = out_h * out_w;
 90 | 	Mat rmat(out_h, out_w, CV_32FC1, pdata);
 91 | 	Mat gmat(out_h, out_w, CV_32FC1, pdata + channel_step);
 92 | 	Mat bmat(out_h, out_w, CV_32FC1, pdata + 2 * channel_step);
 93 |     rmat.setTo(-1, rmat < -1);
 94 | 	rmat.setTo(1, rmat > 1);
 95 |     rmat = (rmat+1)*0.5;
 96 | 	gmat.setTo(-1, gmat < -1);
 97 | 	gmat.setTo(1, gmat > 1);
 98 |     gmat = (gmat+1)*0.5;
 99 | 	bmat.setTo(-1, bmat < -1);
100 | 	bmat.setTo(1, bmat > 1);
101 |     bmat = (bmat+1)*0.5;
102 | 
103 |     rmat *= 255.f;
104 | 	gmat *= 255.f;
105 | 	bmat *= 255.f;
106 |     rmat.setTo(0, rmat < 0);
107 | 	rmat.setTo(255, rmat > 255);
108 | 	gmat.setTo(0, gmat < 0);
109 | 	gmat.setTo(255, gmat > 255);
110 | 	bmat.setTo(0, bmat < 0);
111 | 	bmat.setTo(255, bmat > 255);
112 | 
113 | 	vector<Mat> channel_mats(3);
114 | 	channel_mats[0] = bmat;
115 | 	channel_mats[1] = gmat;
116 | 	channel_mats[2] = rmat;
117 |     Mat result;
118 | 	merge(channel_mats, result);
119 | 	result.convertTo(result, CV_8UC3);
120 | 
121 |     box_mask.setTo(0, box_mask < 0);
122 | 	box_mask.setTo(1, box_mask > 1);
123 |     Mat paste_frame = paste_back(target_img, result, box_mask, affine_matrix);
124 |     Mat dstimg = blend_frame(target_img, paste_frame);
125 |     return dstimg;
126 | }


--------------------------------------------------------------------------------
/cpp/faceenhancer.h:
--------------------------------------------------------------------------------
 1 | # ifndef FACEENHANCE
 2 | # define FACEENHANCE
 3 | #include <fstream>
 4 | #include <sstream>
 5 | #include "opencv2/opencv.hpp"
 6 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
 7 | #include <onnxruntime_cxx_api.h>
 8 | #include"utils.h"
 9 | 
10 | 
11 | class FaceEnhance
12 | {
13 | public:
14 | 	FaceEnhance(std::string modelpath);
15 | 	cv::Mat process(cv::Mat target_img, const std::vector<cv::Point2f> target_landmark_5);
16 | private:
17 | 	void preprocess(cv::Mat target_img, const std::vector<cv::Point2f> face_landmark_5, cv::Mat& affine_matrix, cv::Mat& box_mask);
18 | 	std::vector<float> input_image;
19 | 	int input_height;
20 | 	int input_width;
21 | 	std::vector<cv::Point2f> normed_template;
22 |     const float FACE_MASK_BLUR = 0.3;
23 | 	const int FACE_MASK_PADDING[4] = {0, 0, 0, 0};
24 | 
25 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Enhance");
26 | 	Ort::Session *ort_session = nullptr;
27 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
28 | 	std::vector<char*> input_names;
29 | 	std::vector<char*> output_names;
30 | 	std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
31 | 	std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
32 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
33 | };
34 | #endif


--------------------------------------------------------------------------------
/cpp/facerecognizer.cpp:
--------------------------------------------------------------------------------
 1 | #include"facerecognizer.h"
 2 | 
 3 | using namespace cv;
 4 | using namespace std;
 5 | using namespace Ort;
 6 | 
 7 | FaceEmbdding::FaceEmbdding(string model_path)
 8 | {
 9 |     /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);   ///如果使用cuda加速，需要取消注释
10 | 
11 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
12 |     /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end());  ////windows写法
13 |     /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法
14 |     ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法
15 | 
16 |     size_t numInputNodes = ort_session->GetInputCount();
17 |     size_t numOutputNodes = ort_session->GetOutputCount();
18 |     AllocatorWithDefaultOptions allocator;
19 |     for (int i = 0; i < numInputNodes; i++)
20 |     {
21 |         input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数
22 |         ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator);  /// 高版本onnxruntime的接口函数
23 |         ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数
24 |         Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
25 |         auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
26 |         auto input_dims = input_tensor_info.GetShape();
27 |         input_node_dims.push_back(input_dims);
28 |     }
29 |     for (int i = 0; i < numOutputNodes; i++)
30 |     {
31 |         output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数
32 |         ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator);
33 |         ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数
34 |         Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
35 |         auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
36 |         auto output_dims = output_tensor_info.GetShape();
37 |         output_node_dims.push_back(output_dims);
38 |     }
39 | 
40 |     this->input_height = input_node_dims[0][2];
41 |     this->input_width = input_node_dims[0][3];
42 |     ////在这里就直接定义了，没有像python程序里的那样normed_template = TEMPLATES.get(template) * crop_size
43 |     this->normed_template.emplace_back(Point2f(38.29459984, 51.69630032));
44 |     this->normed_template.emplace_back(Point2f(73.53180016, 51.50140016));
45 |     this->normed_template.emplace_back(Point2f(56.0252,     71.73660032));
46 |     this->normed_template.emplace_back(Point2f(41.54929968, 92.36549952));
47 |     this->normed_template.emplace_back(Point2f(70.72989952, 92.20409968));
48 | }
49 | 
50 | void FaceEmbdding::preprocess(Mat srcimg, const vector<Point2f> face_landmark_5)
51 | {
52 |     Mat crop_img;
53 |     warp_face_by_face_landmark_5(srcimg, crop_img, face_landmark_5, this->normed_template, Size(112, 112));
54 |     /*vector<uchar> inliers(face_landmark_5.size(), 0);
55 |     Mat affine_matrix = cv::estimateAffinePartial2D(face_landmark_5, this->normed_template, cv::noArray(), cv::RANSAC, 100.0);
56 |     Mat crop_img;
57 |     Size crop_size(112, 112);
58 |     warpAffine(srcimg, crop_img, affine_matrix, crop_size, cv::INTER_AREA, cv::BORDER_REPLICATE);*/
59 | 
60 |     vector<cv::Mat> bgrChannels(3);
61 |     split(crop_img, bgrChannels);
62 |     for (int c = 0; c < 3; c++)
63 |     {
64 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 127.5, -1.0);
65 |     }
66 | 
67 |     const int image_area = this->input_height * this->input_width;
68 |     this->input_image.resize(3 * image_area);
69 |     size_t single_chn_size = image_area * sizeof(float);
70 |     memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size);
71 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
72 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size);
73 | }
74 | 
75 | vector<float> FaceEmbdding::detect(Mat srcimg, const vector<Point2f> face_landmark_5)
76 | {
77 |     this->preprocess(srcimg, face_landmark_5);
78 | 
79 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_height, this->input_width};
80 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
81 | 
82 |     Ort::RunOptions runOptions;
83 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size());
84 | 
85 |     float *pdata = ort_outputs[0].GetTensorMutableData<float>(); /// 形状是(1, 512)
86 |     const int len_feature = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1];
87 |     vector<float> embedding(len_feature);
88 |     memcpy(embedding.data(), pdata, len_feature*sizeof(float));
89 |     return embedding;
90 | }


--------------------------------------------------------------------------------
/cpp/facerecognizer.h:
--------------------------------------------------------------------------------
 1 | # ifndef FACERECOGNIZER
 2 | # define FACERECOGNIZER
 3 | #include <fstream>
 4 | #include <sstream>
 5 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
 6 | #include <onnxruntime_cxx_api.h>
 7 | #include"utils.h"
 8 | 
 9 | 
10 | class FaceEmbdding
11 | {
12 | public:
13 | 	FaceEmbdding(std::string modelpath);
14 | 	std::vector<float> detect(cv::Mat srcimg, const std::vector<cv::Point2f> face_landmark_5);
15 | private:
16 | 	void preprocess(cv::Mat img, const std::vector<cv::Point2f> face_landmark_5);
17 | 	std::vector<float> input_image;
18 | 	int input_height;
19 | 	int input_width;
20 |     std::vector<cv::Point2f> normed_template;
21 | 
22 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Feature Extract");
23 | 	Ort::Session *ort_session = nullptr;
24 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
25 | 	std::vector<char*> input_names;
26 | 	std::vector<char*> output_names;
27 | 	std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
28 | 	std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
29 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
30 | };
31 | #endif


--------------------------------------------------------------------------------
/cpp/faceswap.cpp:
--------------------------------------------------------------------------------
  1 | #include"faceswap.h"
  2 | 
  3 | using namespace cv;
  4 | using namespace std;
  5 | using namespace Ort;
  6 | 
  7 | SwapFace::SwapFace(string model_path)
  8 | {
  9 |     /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);   ///如果使用cuda加速，需要取消注释
 10 | 
 11 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 12 |     /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end());  ////windows写法
 13 |     /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法
 14 |     ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法
 15 |     
 16 |     size_t numInputNodes = ort_session->GetInputCount();
 17 |     size_t numOutputNodes = ort_session->GetOutputCount();
 18 |     AllocatorWithDefaultOptions allocator;
 19 |     for (int i = 0; i < numInputNodes; i++)
 20 |     {
 21 |         input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数
 22 |         ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator);  /// 高版本onnxruntime的接口函数
 23 |         ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 24 |         Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
 25 |         auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
 26 |         auto input_dims = input_tensor_info.GetShape();
 27 |         input_node_dims.push_back(input_dims);
 28 |     }
 29 |     
 30 |     for (int i = 0; i < numOutputNodes; i++)
 31 |     {
 32 |         output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数
 33 |         ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator);
 34 |         ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 35 |         Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
 36 |         auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
 37 |         auto output_dims = output_tensor_info.GetShape();
 38 |         output_node_dims.push_back(output_dims);
 39 |     }
 40 | 
 41 |     this->input_height = input_node_dims[0][2];
 42 |     this->input_width = input_node_dims[0][3];
 43 |     
 44 |     const int length = this->len_feature*this->len_feature;
 45 |     this->model_matrix = new float[length];
 46 |     cout<<"start read model_matrix.bin"<<endl;
 47 |     FILE* fp = fopen("model_matrix.bin", "rb");
 48 |     fread(this->model_matrix, sizeof(float), length, fp);//导入数据
 49 |     fclose(fp);//关闭文件
 50 |     cout<<"read model_matrix.bin finish"<<endl;
 51 | 
 52 |     ////在这里就直接定义了，没有像python程序里的那样normed_template = TEMPLATES.get(template) * crop_size
 53 |     this->normed_template.emplace_back(Point2f(46.29459968, 51.69629952));
 54 |     this->normed_template.emplace_back(Point2f(81.53180032, 51.50140032));
 55 |     this->normed_template.emplace_back(Point2f(64.02519936, 71.73660032));
 56 |     this->normed_template.emplace_back(Point2f(49.54930048, 92.36550016));
 57 |     this->normed_template.emplace_back(Point2f(78.72989952, 92.20409984));
 58 | }
 59 | 
 60 | SwapFace::~SwapFace()
 61 | {
 62 | 	delete[] this->model_matrix;
 63 | 	this->model_matrix = nullptr;
 64 |     this->normed_template.clear();
 65 | }
 66 | 
 67 | void SwapFace::preprocess(Mat srcimg, const vector<Point2f> face_landmark_5, const vector<float> source_face_embedding, Mat& affine_matrix, Mat& box_mask)
 68 | {
 69 |     Mat crop_img;
 70 |     affine_matrix = warp_face_by_face_landmark_5(srcimg, crop_img, face_landmark_5, this->normed_template, Size(128, 128));
 71 |     const int crop_size[2] = {crop_img.cols, crop_img.rows};
 72 |     box_mask = create_static_box_mask(crop_size, this->FACE_MASK_BLUR, this->FACE_MASK_PADDING);
 73 | 
 74 |     vector<cv::Mat> bgrChannels(3);
 75 |     split(crop_img, bgrChannels);
 76 |     for (int c = 0; c < 3; c++)
 77 |     {
 78 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / (255.0*this->INSWAPPER_128_MODEL_STD[c]), -this->INSWAPPER_128_MODEL_MEAN[c]/this->INSWAPPER_128_MODEL_STD[c]);
 79 |     }
 80 | 
 81 |     const int image_area = this->input_height * this->input_width;
 82 |     this->input_image.resize(3 * image_area);
 83 |     size_t single_chn_size = image_area * sizeof(float);
 84 |     memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size);    ///rgb顺序
 85 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 86 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size);
 87 | 
 88 |     float linalg_norm = 0;
 89 |     for(int i=0;i<this->len_feature;i++)
 90 |     {
 91 |         linalg_norm += powf(source_face_embedding[i], 2);
 92 |     }
 93 |     linalg_norm = sqrt(linalg_norm);
 94 |     this->input_embedding.resize(this->len_feature);
 95 |     for(int i=0;i<this->len_feature;i++)
 96 |     {
 97 |         float sum=0;
 98 |         for(int j=0;j<this->len_feature;j++)
 99 |         {
100 |             sum += (source_face_embedding[j]*this->model_matrix[j*this->len_feature+i]);
101 |         }
102 |         this->input_embedding[i] = sum/linalg_norm;
103 |     }
104 | }
105 | 
106 | Mat SwapFace::process(Mat target_img, const vector<float> source_face_embedding, const vector<Point2f> target_landmark_5)
107 | {
108 |     Mat affine_matrix;
109 |     Mat box_mask;
110 |     this->preprocess(target_img, target_landmark_5, source_face_embedding, affine_matrix, box_mask);
111 | 
112 |     std::vector<Ort::Value> inputs_tensor;
113 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_height, this->input_width};
114 |     inputs_tensor.emplace_back(Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()));
115 |     std::vector<int64_t> input_embedding_shape = {1, this->len_feature};
116 |     inputs_tensor.emplace_back(Value::CreateTensor<float>(memory_info_handler, this->input_embedding.data(), this->input_embedding.size(), input_embedding_shape.data(), input_embedding_shape.size()));
117 |     
118 | 
119 |     Ort::RunOptions runOptions;
120 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), inputs_tensor.data(), inputs_tensor.size(), this->output_names.data(), output_names.size());
121 | 
122 |     float* pdata = ort_outputs[0].GetTensorMutableData<float>();
123 |     std::vector<int64_t> outs_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
124 | 	const int out_h = outs_shape[2];
125 | 	const int out_w = outs_shape[3];
126 | 	const int channel_step = out_h * out_w;
127 | 	Mat rmat(out_h, out_w, CV_32FC1, pdata);
128 | 	Mat gmat(out_h, out_w, CV_32FC1, pdata + channel_step);
129 | 	Mat bmat(out_h, out_w, CV_32FC1, pdata + 2 * channel_step);
130 | 	rmat *= 255.f;
131 | 	gmat *= 255.f;
132 | 	bmat *= 255.f;
133 |     rmat.setTo(0, rmat < 0);
134 | 	rmat.setTo(255, rmat > 255);
135 | 	gmat.setTo(0, gmat < 0);
136 | 	gmat.setTo(255, gmat > 255);
137 | 	bmat.setTo(0, bmat < 0);
138 | 	bmat.setTo(255, bmat > 255);
139 | 
140 | 	vector<Mat> channel_mats(3);
141 | 	channel_mats[0] = bmat;
142 | 	channel_mats[1] = gmat;
143 | 	channel_mats[2] = rmat;
144 |     Mat result;
145 | 	merge(channel_mats, result);
146 | 
147 |     box_mask.setTo(0, box_mask < 0);
148 | 	box_mask.setTo(1, box_mask > 1);
149 |     Mat dstimg = paste_back(target_img, result, box_mask, affine_matrix);
150 |     return dstimg;
151 | }


--------------------------------------------------------------------------------
/cpp/faceswap.h:
--------------------------------------------------------------------------------
 1 | # ifndef FACESWAP
 2 | # define FACESWAP
 3 | #include <fstream>
 4 | #include <sstream>
 5 | #include "opencv2/opencv.hpp"
 6 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
 7 | #include <onnxruntime_cxx_api.h>
 8 | #include"utils.h"
 9 | 
10 | 
11 | class SwapFace
12 | {
13 | public:
14 | 	SwapFace(std::string modelpath);
15 | 	cv::Mat process(cv::Mat target_img, const std::vector<float> source_face_embedding, const std::vector<cv::Point2f> target_landmark_5);
16 | 	~SwapFace();  // 析构函数, 释放内存
17 | private:
18 | 	void preprocess(cv::Mat target_img, const std::vector<cv::Point2f> face_landmark_5, const std::vector<float> source_face_embedding, cv::Mat& affine_matrix, cv::Mat& box_mask);
19 | 	std::vector<float> input_image;
20 | 	std::vector<float> input_embedding;
21 | 	int input_height;
22 | 	int input_width;
23 | 	const int len_feature = 512;
24 |     float* model_matrix;
25 | 	std::vector<cv::Point2f> normed_template;
26 | 	const float FACE_MASK_BLUR = 0.3;
27 | 	const int FACE_MASK_PADDING[4] = {0, 0, 0, 0};
28 | 	const float INSWAPPER_128_MODEL_MEAN[3] = {0.0, 0.0, 0.0};
29 | 	const float INSWAPPER_128_MODEL_STD[3] = {1.0, 1.0, 1.0};
30 | 
31 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Swap");
32 | 	Ort::Session *ort_session = nullptr;
33 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
34 | 	std::vector<char*> input_names;
35 | 	std::vector<char*> output_names;
36 | 	std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
37 | 	std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
38 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
39 | };
40 | #endif


--------------------------------------------------------------------------------
/cpp/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/1.jpg


--------------------------------------------------------------------------------
/cpp/images/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/14.jpg


--------------------------------------------------------------------------------
/cpp/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/2.jpg


--------------------------------------------------------------------------------
/cpp/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/5.jpg


--------------------------------------------------------------------------------
/cpp/images/source.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/source.jpg


--------------------------------------------------------------------------------
/cpp/images/target.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/target.jpg


--------------------------------------------------------------------------------
/cpp/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "yolov8face.h"
 2 | #include "face68landmarks.h"
 3 | #include "facerecognizer.h"
 4 | #include "faceswap.h"
 5 | #include "faceenhancer.h"
 6 | 
 7 | using namespace cv;
 8 | using namespace std;
 9 | 
10 | int main()
11 | {
12 | 	string source_path = "/project/faceswap-cpp/images/5.jpg";
13 | 	string target_path = "/project/faceswap-cpp/images/target.jpg";
14 |     
15 | 	////图片路径和onnx文件的路径，要确保写正确，才能使程序正常运行的
16 | 	Yolov8Face detect_face_net("/project/faceswap-cpp/weights/yoloface_8n.onnx");
17 | 	Face68Landmarks detect_68landmarks_net("/project/faceswap-cpp/weights/2dfan4.onnx");
18 | 	FaceEmbdding face_embedding_net("/project/faceswap-cpp/weights/arcface_w600k_r50.onnx");
19 | 	SwapFace swap_face_net("/project/faceswap-cpp/weights/inswapper_128.onnx");
20 | 	FaceEnhance enhance_face_net("/project/faceswap-cpp/weights/gfpgan_1.4.onnx");
21 | 
22 | 	Mat source_img = imread(source_path);
23 | 	Mat target_img = imread(target_path);
24 | 
25 |     vector<Bbox> boxes;
26 | 	detect_face_net.detect(source_img, boxes);
27 | 	int position = 0; ////一张图片里可能有多个人脸，这里只考虑1个人脸的情况
28 | 	vector<Point2f> face_landmark_5of68;
29 | 	vector<Point2f> face68landmarks = detect_68landmarks_net.detect(source_img, boxes[position], face_landmark_5of68);
30 | 	vector<float> source_face_embedding = face_embedding_net.detect(source_img, face_landmark_5of68);
31 | 
32 | 	detect_face_net.detect(target_img, boxes);
33 | 	position = 0; ////一张图片里可能有多个人脸，这里只考虑1个人脸的情况
34 | 	vector<Point2f> target_landmark_5;
35 | 	detect_68landmarks_net.detect(target_img, boxes[position], target_landmark_5);
36 | 
37 | 	Mat swapimg = swap_face_net.process(target_img, source_face_embedding, target_landmark_5);
38 | 	Mat resultimg = enhance_face_net.process(swapimg, target_landmark_5);
39 | 	
40 | 	imwrite("resultimg.jpg", resultimg);
41 | 
42 | 	/*static const string kWinName = "Deep learning face swap use onnxruntime";
43 | 	namedWindow(kWinName, WINDOW_NORMAL);
44 | 	imshow(kWinName, resultimg);
45 | 	waitKey(0);
46 | 	destroyAllWindows();*/
47 | }


--------------------------------------------------------------------------------
/cpp/model_matrix.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/model_matrix.bin


--------------------------------------------------------------------------------
/cpp/utils.cpp:
--------------------------------------------------------------------------------
  1 | #include "utils.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cv;
  5 | 
  6 | float GetIoU(const Bbox box1, const Bbox box2)
  7 | {
  8 |     float x1 = max(box1.xmin, box2.xmin);
  9 |     float y1 = max(box1.ymin, box2.ymin);
 10 |     float x2 = min(box1.xmax, box2.xmax);
 11 |     float y2 = min(box1.ymax, box2.ymax);
 12 |     float w = max(0.f, x2 - x1);
 13 |     float h = max(0.f, y2 - y1);
 14 |     float over_area = w * h;
 15 |     if (over_area == 0)
 16 |         return 0.0;
 17 |     float union_area = (box1.xmax - box1.xmin) * (box1.ymax - box1.ymin) + (box2.xmax - box2.xmin) * (box2.ymax - box2.ymin) - over_area;
 18 |     return over_area / union_area;
 19 | }
 20 | 
 21 | vector<int> nms(vector<Bbox> boxes, vector<float> confidences, const float nms_thresh)
 22 | {
 23 |     sort(confidences.begin(), confidences.end(), [&confidences](size_t index_1, size_t index_2)
 24 |          { return confidences[index_1] > confidences[index_2]; });
 25 |     const int num_box = confidences.size();
 26 |     vector<bool> isSuppressed(num_box, false);
 27 |     for (int i = 0; i < num_box; ++i)
 28 |     {
 29 |         if (isSuppressed[i])
 30 |         {
 31 |             continue;
 32 |         }
 33 |         for (int j = i + 1; j < num_box; ++j)
 34 |         {
 35 |             if (isSuppressed[j])
 36 |             {
 37 |                 continue;
 38 |             }
 39 | 
 40 |             float ovr = GetIoU(boxes[i], boxes[j]);
 41 |             if (ovr > nms_thresh)
 42 |             {
 43 |                 isSuppressed[j] = true;
 44 |             }
 45 |         }
 46 |     }
 47 | 
 48 |     vector<int> keep_inds;
 49 |     for (int i = 0; i < isSuppressed.size(); i++)
 50 |     {
 51 |         if (!isSuppressed[i])
 52 |         {
 53 |             keep_inds.emplace_back(i);
 54 |         }
 55 |     }
 56 |     return keep_inds;
 57 | }
 58 | 
 59 | Mat warp_face_by_face_landmark_5(const Mat temp_vision_frame, Mat &crop_img, const vector<Point2f> face_landmark_5, const vector<Point2f> normed_template, const Size crop_size)
 60 | {
 61 |     vector<uchar> inliers(face_landmark_5.size(), 0);
 62 |     Mat affine_matrix = cv::estimateAffinePartial2D(face_landmark_5, normed_template, cv::noArray(), cv::RANSAC, 100.0);
 63 |     warpAffine(temp_vision_frame, crop_img, affine_matrix, crop_size, cv::INTER_AREA, cv::BORDER_REPLICATE);
 64 |     return affine_matrix;
 65 | }
 66 | 
 67 | Mat create_static_box_mask(const int *crop_size, const float face_mask_blur, const int *face_mask_padding)
 68 | {
 69 |     const float blur_amount = int(crop_size[0] * 0.5 * face_mask_blur);
 70 |     const int blur_area = max(int(blur_amount / 2), 1);
 71 |     Mat box_mask = Mat::ones(crop_size[0], crop_size[1], CV_32FC1);
 72 | 
 73 |     int sub = max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100));
 74 |     // Mat roi = box_mask(cv::Rect(0,0,sub,crop_size[1]));
 75 |     box_mask(cv::Rect(0, 0, crop_size[1], sub)).setTo(0);
 76 | 
 77 |     sub = crop_size[0] - max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100));
 78 |     box_mask(cv::Rect(0, sub, crop_size[1], crop_size[0] - sub)).setTo(0);
 79 | 
 80 |     sub = max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100));
 81 |     box_mask(cv::Rect(0, 0, sub, crop_size[0])).setTo(0);
 82 | 
 83 |     sub = crop_size[1] - max(blur_area, int(crop_size[0] * face_mask_padding[1] / 100));
 84 |     box_mask(cv::Rect(sub, 0, crop_size[1] - sub, crop_size[0])).setTo(0);
 85 | 
 86 |     if (blur_amount > 0)
 87 |     {
 88 |         GaussianBlur(box_mask, box_mask, Size(0, 0), blur_amount * 0.25);
 89 |     }
 90 |     return box_mask;
 91 | }
 92 | 
 93 | Mat paste_back(Mat temp_vision_frame, Mat crop_vision_frame, Mat crop_mask, Mat affine_matrix)
 94 | {
 95 |     Mat inverse_matrix;
 96 |     cv::invertAffineTransform(affine_matrix, inverse_matrix);
 97 |     Mat inverse_mask;
 98 |     Size temp_size(temp_vision_frame.cols, temp_vision_frame.rows);
 99 |     warpAffine(crop_mask, inverse_mask, inverse_matrix, temp_size);
100 |     inverse_mask.setTo(0, inverse_mask < 0);
101 |     inverse_mask.setTo(1, inverse_mask > 1);
102 |     Mat inverse_vision_frame;
103 |     warpAffine(crop_vision_frame, inverse_vision_frame, inverse_matrix, temp_size, cv::INTER_LINEAR, cv::BORDER_REPLICATE);
104 | 
105 |     vector<Mat> inverse_vision_frame_bgrs(3);
106 |     split(inverse_vision_frame, inverse_vision_frame_bgrs);
107 |     vector<Mat> temp_vision_frame_bgrs(3);
108 |     split(temp_vision_frame, temp_vision_frame_bgrs);
109 |     for (int c = 0; c < 3; c++)
110 |     {
111 |         inverse_vision_frame_bgrs[c].convertTo(inverse_vision_frame_bgrs[c], CV_32FC1);   ////注意数据类型转换，不然在下面的矩阵点乘运算时会报错的
112 |         temp_vision_frame_bgrs[c].convertTo(temp_vision_frame_bgrs[c], CV_32FC1);         ////注意数据类型转换，不然在下面的矩阵点乘运算时会报错的
113 |     }
114 |     vector<Mat> channel_mats(3);
115 |     
116 |     channel_mats[0] = inverse_mask.mul(inverse_vision_frame_bgrs[0]) + temp_vision_frame_bgrs[0].mul(1 - inverse_mask);
117 |     channel_mats[1] = inverse_mask.mul(inverse_vision_frame_bgrs[1]) + temp_vision_frame_bgrs[1].mul(1 - inverse_mask);
118 |     channel_mats[2] = inverse_mask.mul(inverse_vision_frame_bgrs[2]) + temp_vision_frame_bgrs[2].mul(1 - inverse_mask);
119 |     
120 |     cv::Mat paste_vision_frame;
121 |     merge(channel_mats, paste_vision_frame);
122 |     paste_vision_frame.convertTo(paste_vision_frame, CV_8UC3);
123 |     return paste_vision_frame;
124 | }
125 | 
126 | Mat blend_frame(Mat temp_vision_frame, Mat paste_vision_frame, const int FACE_ENHANCER_BLEND)
127 | {
128 |     const float face_enhancer_blend = 1 - ((float)FACE_ENHANCER_BLEND / 100.f);
129 |     Mat dstimg;
130 |     cv::addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0, dstimg);
131 |     return dstimg;
132 | }


--------------------------------------------------------------------------------
/cpp/utils.h:
--------------------------------------------------------------------------------
 1 | # ifndef UTILS
 2 | # define UTILS
 3 | #include <iostream>
 4 | #include <algorithm>
 5 | #include <vector>
 6 | #include "opencv2/opencv.hpp"
 7 | 
 8 | typedef struct
 9 | {
10 |     float xmin;
11 |     float ymin;
12 |     float xmax;
13 |     float ymax;
14 | } Bbox;
15 | 
16 | float GetIoU(const Bbox box1, const Bbox box2);
17 | std::vector<int> nms(std::vector<Bbox> boxes, std::vector<float> confidences, const float nms_thresh);
18 | cv::Mat warp_face_by_face_landmark_5(const cv::Mat temp_vision_frame, cv::Mat &crop_img, const std::vector<cv::Point2f> face_landmark_5, const std::vector<cv::Point2f> normed_template, const cv::Size crop_size);
19 | cv::Mat create_static_box_mask(const int *crop_size, const float face_mask_blur, const int *face_mask_padding);
20 | cv::Mat paste_back(cv::Mat temp_vision_frame, cv::Mat crop_vision_frame, cv::Mat crop_mask, cv::Mat affine_matrix);
21 | cv::Mat blend_frame(cv::Mat temp_vision_frame, cv::Mat paste_vision_frame, const int FACE_ENHANCER_BLEND=80);
22 | #endif


--------------------------------------------------------------------------------
/cpp/yolov8face.cpp:
--------------------------------------------------------------------------------
  1 | #include "yolov8face.h"
  2 | 
  3 | using namespace cv;
  4 | using namespace std;
  5 | using namespace Ort;
  6 | 
  7 | Yolov8Face::Yolov8Face(string model_path, const float conf_thres, const float iou_thresh)
  8 | {
  9 |     /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);   ///如果使用cuda加速，需要取消注释
 10 | 
 11 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 12 |     /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end());  ////windows写法
 13 |     /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法
 14 |     ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法
 15 | 
 16 |     size_t numInputNodes = ort_session->GetInputCount();
 17 |     size_t numOutputNodes = ort_session->GetOutputCount();
 18 |     AllocatorWithDefaultOptions allocator;
 19 |     for (int i = 0; i < numInputNodes; i++)
 20 |     {
 21 |         input_names.push_back(ort_session->GetInputName(i, allocator));      ///低版本onnxruntime的接口函数
 22 |         ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator);  /// 高版本onnxruntime的接口函数
 23 |         ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 24 |         Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
 25 |         auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
 26 |         auto input_dims = input_tensor_info.GetShape();
 27 |         input_node_dims.push_back(input_dims);
 28 |     }
 29 |     for (int i = 0; i < numOutputNodes; i++)
 30 |     {
 31 |         output_names.push_back(ort_session->GetOutputName(i, allocator));  ///低版本onnxruntime的接口函数
 32 |         ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator);
 33 |         ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数
 34 |         Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
 35 |         auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
 36 |         auto output_dims = output_tensor_info.GetShape();
 37 |         output_node_dims.push_back(output_dims);
 38 |     }
 39 | 
 40 |     this->input_height = input_node_dims[0][2];
 41 |     this->input_width = input_node_dims[0][3];
 42 |     this->conf_threshold = conf_thres;
 43 |     this->iou_threshold = iou_thresh;
 44 | }
 45 | 
 46 | void Yolov8Face::preprocess(Mat srcimg)
 47 | {
 48 |     const int height = srcimg.rows;
 49 |     const int width = srcimg.cols;
 50 |     Mat temp_image = srcimg.clone();
 51 |     if (height > this->input_height || width > this->input_width)
 52 |     {
 53 |         const float scale = std::min((float)this->input_height / height, (float)this->input_width / width);
 54 |         Size new_size = Size(int(width * scale), int(height * scale));
 55 |         resize(srcimg, temp_image, new_size);
 56 |     }
 57 |     this->ratio_height = (float)height / temp_image.rows;
 58 |     this->ratio_width = (float)width / temp_image.cols;
 59 |     Mat input_img;
 60 |     copyMakeBorder(temp_image, input_img, 0, this->input_height - temp_image.rows, 0, this->input_width - temp_image.cols, BORDER_CONSTANT, 0);
 61 | 
 62 |     vector<cv::Mat> bgrChannels(3);
 63 |     split(input_img, bgrChannels);
 64 |     for (int c = 0; c < 3; c++)
 65 |     {
 66 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 128.0, -127.5 / 128.0);
 67 |     }
 68 | 
 69 |     const int image_area = this->input_height * this->input_width;
 70 |     this->input_image.resize(3 * image_area);
 71 |     size_t single_chn_size = image_area * sizeof(float);
 72 |     memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size);
 73 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 74 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
 75 | }
 76 | 
 77 | ////只返回检测框,因为在下游的模块里,置信度和5个关键点这两个信息在后续的模块里没有用到
 78 | void Yolov8Face::detect(Mat srcimg, std::vector<Bbox> &boxes)
 79 | {
 80 |     this->preprocess(srcimg);
 81 | 
 82 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_height, this->input_width};
 83 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 84 | 
 85 |     Ort::RunOptions runOptions;
 86 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size());
 87 | 
 88 |     float *pdata = ort_outputs[0].GetTensorMutableData<float>(); /// 形状是(1, 20, 8400),不考虑第0维batchsize，每一列的长度20,前4个元素是检测框坐标(cx,cy,w,h)，第4个元素是置信度，剩下的15个元素是5个关键点坐标x,y和置信度
 89 |     const int num_box = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[2];
 90 |     vector<Bbox> bounding_box_raw;
 91 |     vector<float> score_raw;
 92 |     for (int i = 0; i < num_box; i++)
 93 |     {
 94 |         const float score = pdata[4 * num_box + i];
 95 |         if (score > this->conf_threshold)
 96 |         {
 97 |             float xmin = (pdata[i] - 0.5 * pdata[2 * num_box + i]) * this->ratio_width;            ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图
 98 |             float ymin = (pdata[num_box + i] - 0.5 * pdata[3 * num_box + i]) * this->ratio_height; ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图
 99 |             float xmax = (pdata[i] + 0.5 * pdata[2 * num_box + i]) * this->ratio_width;            ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图
100 |             float ymax = (pdata[num_box + i] + 0.5 * pdata[3 * num_box + i]) * this->ratio_height; ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图
101 |             ////坐标的越界检查保护，可以添加一下
102 |             bounding_box_raw.emplace_back(Bbox{xmin, ymin, xmax, ymax});
103 |             score_raw.emplace_back(score);
104 |             /// 剩下的5个关键点坐标的计算,暂时不写,因为在下游的模块里没有用到5个关键点坐标信息
105 |         }
106 |     }
107 |     vector<int> keep_inds = nms(bounding_box_raw, score_raw, this->iou_threshold);
108 |     const int keep_num = keep_inds.size();
109 |     boxes.clear();
110 |     boxes.resize(keep_num);
111 |     for (int i = 0; i < keep_num; i++)
112 |     {
113 |         const int ind = keep_inds[i];
114 |         boxes[i] = bounding_box_raw[ind];
115 |     }
116 | }


--------------------------------------------------------------------------------
/cpp/yolov8face.h:
--------------------------------------------------------------------------------
 1 | # ifndef YOLOV8FACE
 2 | # define YOLOV8FACE
 3 | #include <fstream>
 4 | #include <sstream>
 5 | #include <opencv2/imgproc.hpp>
 6 | #include <opencv2/highgui.hpp>
 7 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
 8 | #include <onnxruntime_cxx_api.h>
 9 | #include"utils.h"
10 | 
11 | 
12 | class Yolov8Face
13 | {
14 | public:
15 | 	Yolov8Face(std::string modelpath, const float conf_thres=0.5, const float iou_thresh=0.4);
16 | 	void detect(cv::Mat srcimg, std::vector<Bbox> &boxes);   ////只返回检测框,置信度和5个关键点这两个信息在后续的模块里没有用到
17 | private:
18 | 	void preprocess(cv::Mat img);
19 | 	std::vector<float> input_image;
20 | 	int input_height;
21 | 	int input_width;
22 | 	float ratio_height;
23 | 	float ratio_width;
24 | 	float conf_threshold;
25 | 	float iou_threshold;
26 | 
27 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Detect");
28 | 	Ort::Session *ort_session = nullptr;
29 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
30 | 	std::vector<char*> input_names;
31 | 	std::vector<char*> output_names;
32 | 	std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
33 | 	std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
34 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
35 | };
36 | #endif


--------------------------------------------------------------------------------
/python/face_68landmarks.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import onnxruntime
 4 | from utils import warp_face_by_translation, convert_face_landmark_68_to_5
 5 | 
 6 | class face_68_landmarks:
 7 |     def __init__(self, modelpath):
 8 |         # Initialize model
 9 |         session_option = onnxruntime.SessionOptions()
10 |         session_option.log_severity_level = 3
11 |         # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
12 |         self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option)  ###opencv-dnn读取onnx失败
13 |         model_inputs = self.session.get_inputs()
14 |         self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
15 |         self.input_shape = model_inputs[0].shape
16 |         self.input_height = int(self.input_shape[2])
17 |         self.input_width = int(self.input_shape[3])
18 | 
19 | 
20 |     def preprocess(self, srcimg, bounding_box):
21 |         '''
22 |         bounding_box里的数据格式是[xmin. ymin, xmax, ymax]
23 |         '''
24 |         scale = 195 / np.subtract(bounding_box[2:], bounding_box[:2]).max()
25 |         translation = (256 - np.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5
26 |         crop_img, affine_matrix = warp_face_by_translation(srcimg, translation, scale, (256, 256))
27 | 
28 |         # crop_img = cv2.cvtColor(crop_img, cv2.COLOR_RGB2Lab)  ###可有可无
29 |         # if np.mean(crop_img[:, :, 0]) < 30:
30 |         #     crop_img[:, :, 0] = cv2.createCLAHE(clipLimit = 2).apply(crop_img[:, :, 0])
31 |         # crop_img = cv2.cvtColor(crop_img, cv2.COLOR_Lab2RGB)   ###可有可无
32 |         
33 |         crop_img = crop_img.transpose(2, 0, 1).astype(np.float32) / 255.0
34 |         crop_img = crop_img[np.newaxis, :, :, :]
35 |         return crop_img, affine_matrix
36 | 
37 |     def detect(self, srcimg, bounding_box):
38 |         '''
39 |         如果直接crop+resize,最后返回的人脸关键点有偏差
40 |         '''
41 |         input_tensor, affine_matrix = self.preprocess(srcimg, bounding_box)
42 | 
43 |         # Perform inference on the image
44 |         face_landmark_68 = self.session.run(None, {self.input_names[0]: input_tensor})[0]
45 |         face_landmark_68 = face_landmark_68[:, :, :2][0] / 64
46 |         face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256
47 |         face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix))
48 |         face_landmark_68 = face_landmark_68.reshape(-1, 2)
49 |         face_landmark_5of68 = convert_face_landmark_68_to_5(face_landmark_68)
50 |         return face_landmark_68, face_landmark_5of68
51 | 
52 | if __name__ == '__main__':
53 |     imgpath = '5.jpg'
54 |     srcimg = cv2.imread('5.jpg')
55 |     bounding_box = np.array([487, 236, 784, 624])
56 |     
57 |     # Initialize face_68landmarks detector
58 |     mynet = face_68_landmarks("weights/2dfan4.onnx")
59 | 
60 |     face_landmark_68, face_landmark_5of68 = mynet.detect(srcimg, bounding_box)
61 |     # print(face_landmark_5of68)
62 |     # Draw detections
63 |     for i in range(face_landmark_68.shape[0]):
64 |         cv2.circle(srcimg, (int(face_landmark_68[i,0]), int(face_landmark_68[i,1])), 3, (0, 255, 0), thickness=-1)
65 |     cv2.imwrite('detect_face_68lanmarks.jpg', srcimg)
66 |     winName = 'Deep learning face_68landmarks detection in ONNXRuntime'
67 |     cv2.namedWindow(winName, 0)
68 |     cv2.imshow(winName, srcimg)
69 |     cv2.waitKey(0)
70 |     cv2.destroyAllWindows()
71 | 


--------------------------------------------------------------------------------
/python/face_enhancer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import onnxruntime
 3 | from utils import warp_face_by_face_landmark_5, create_static_box_mask, paste_back, blend_frame
 4 | 
 5 | FACE_MASK_BLUR = 0.3
 6 | FACE_MASK_PADDING = (0, 0, 0, 0)
 7 | 
 8 | class enhance_face:
 9 |     def __init__(self, modelpath):
10 |         # Initialize model
11 |         session_option = onnxruntime.SessionOptions()
12 |         session_option.log_severity_level = 3
13 |         # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
14 |         self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option)  ###opencv-dnn读取onnx失败
15 |         model_inputs = self.session.get_inputs()
16 |         self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
17 |         self.input_shape = model_inputs[0].shape
18 |         self.input_height = int(self.input_shape[2])
19 |         self.input_width = int(self.input_shape[3])
20 | 
21 |     def process(self, target_img, target_landmark_5):
22 |         ###preprocess
23 |         crop_img, affine_matrix = warp_face_by_face_landmark_5(target_img, target_landmark_5, 'ffhq_512', (512, 512))
24 |         box_mask = create_static_box_mask((crop_img.shape[1],crop_img.shape[0]), FACE_MASK_BLUR, FACE_MASK_PADDING)
25 |         crop_mask_list = [box_mask]
26 |     
27 |         crop_img = crop_img[:, :, ::-1].astype(np.float32) / 255.0
28 |         crop_img = (crop_img - 0.5) / 0.5
29 |         crop_img = np.expand_dims(crop_img.transpose(2, 0, 1), axis = 0).astype(np.float32)
30 | 
31 |         ###Perform inference on the image
32 |         result = self.session.run(None, {'input':crop_img})[0][0]
33 |         ###normalize_crop_frame
34 |         result = np.clip(result, -1, 1)
35 |         result = (result + 1) / 2
36 |         result = result.transpose(1, 2, 0)
37 |         result = (result * 255.0).round()
38 |         result = result.astype(np.uint8)[:, :, ::-1]
39 | 
40 |         crop_mask = np.minimum.reduce(crop_mask_list).clip(0, 1)
41 |         paste_frame = paste_back(target_img, result, crop_mask, affine_matrix)
42 |         dstimg = blend_frame(target_img, paste_frame)
43 |         return dstimg


--------------------------------------------------------------------------------
/python/face_recognizer.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import onnxruntime
 4 | from utils import warp_face_by_face_landmark_5
 5 | 
 6 | class face_recognize:
 7 |     def __init__(self, modelpath):
 8 |         # Initialize model
 9 |         session_option = onnxruntime.SessionOptions()
10 |         session_option.log_severity_level = 3
11 |         self.session = onnxruntime.InferenceSession(modelpath, providers=['CPUExecutionProvider'])
12 |         # self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option)  ###opencv-dnn读取onnx失败
13 |         model_inputs = self.session.get_inputs()
14 |         self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
15 |         self.input_shape = model_inputs[0].shape
16 |         self.input_height = int(self.input_shape[2])
17 |         self.input_width = int(self.input_shape[3])
18 | 
19 |     def preprocess(self, srcimg, face_landmark_5):
20 |         crop_img, _ = warp_face_by_face_landmark_5(srcimg, face_landmark_5, 'arcface_112_v2', (112, 112))
21 |         crop_img = crop_img / 127.5 - 1
22 |         crop_img = crop_img[:, :, ::-1].transpose(2, 0, 1).astype(np.float32)
23 |         crop_img = np.expand_dims(crop_img, axis = 0)
24 |         return crop_img
25 | 
26 |     def detect(self, srcimg, face_landmark_5):
27 |         input_tensor = self.preprocess(srcimg, face_landmark_5)
28 | 
29 |         # Perform inference on the image
30 |         embedding = self.session.run(None, {self.input_names[0]: input_tensor})[0]
31 |         embedding = embedding.ravel()  ###拉平
32 |         normed_embedding = embedding / np.linalg.norm(embedding)
33 |         return embedding, normed_embedding
34 |     
35 | if __name__ == '__main__':
36 |     imgpath = '5.jpg'
37 |     srcimg = cv2.imread('5.jpg')
38 |     face_landmark_5 = np.array([[568.2485,  398.9512 ],
39 |                             [701.7346,  399.64795],
40 |                             [634.2213,  482.92694],
41 |                             [583.5656,  543.10187],
42 |                             [684.52405, 543.125  ]])
43 |     
44 |     mynet = face_recognize('weights/arcface_w600k_r50.onnx')
45 |     embedding, normed_embedding = mynet.detect(srcimg, face_landmark_5)
46 |     print(embedding.shape, normed_embedding.shape)


--------------------------------------------------------------------------------
/python/face_swap.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import onnxruntime
 3 | from utils import warp_face_by_face_landmark_5, create_static_box_mask, paste_back
 4 | 
 5 | FACE_MASK_BLUR = 0.3
 6 | FACE_MASK_PADDING = (0, 0, 0, 0)
 7 | INSWAPPER_128_MODEL_MEAN = [0.0, 0.0, 0.0]
 8 | INSWAPPER_128_MODEL_STD = [1.0, 1.0, 1.0]
 9 | 
10 | class swap_face:
11 |     def __init__(self, modelpath):
12 |         # Initialize model
13 |         session_option = onnxruntime.SessionOptions()
14 |         session_option.log_severity_level = 3
15 |         # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
16 |         self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option)  ###opencv-dnn读取onnx失败
17 |         model_inputs = self.session.get_inputs()
18 |         self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
19 |         self.input_shape = model_inputs[0].shape
20 |         self.input_height = int(self.input_shape[2])
21 |         self.input_width = int(self.input_shape[3])
22 |         self.model_matrix = np.load('model_matrix.npy')
23 | 
24 |     def process(self, target_img, source_face_embedding, target_landmark_5):
25 |         ###preprocess
26 |         crop_img, affine_matrix = warp_face_by_face_landmark_5(target_img, target_landmark_5, 'arcface_128_v2', (128, 128))
27 |         crop_mask_list = []
28 | 
29 |         box_mask = create_static_box_mask((crop_img.shape[1],crop_img.shape[0]), FACE_MASK_BLUR, FACE_MASK_PADDING)
30 |         crop_mask_list.append(box_mask)
31 | 
32 |         crop_img = crop_img[:, :, ::-1].astype(np.float32) / 255.0
33 |         crop_img = (crop_img - INSWAPPER_128_MODEL_MEAN) / INSWAPPER_128_MODEL_STD
34 |         crop_img = np.expand_dims(crop_img.transpose(2, 0, 1), axis = 0).astype(np.float32)
35 | 
36 |         source_embedding = source_face_embedding.reshape((1, -1))
37 |         source_embedding = np.dot(source_embedding, self.model_matrix) / np.linalg.norm(source_embedding)
38 | 
39 |         ###Perform inference on the image
40 |         result = self.session.run(None, {'target':crop_img, 'source':source_embedding})[0][0]
41 |         ###normalize_crop_frame
42 |         result = result.transpose(1, 2, 0)
43 |         result = (result * 255.0).round()
44 |         result = result[:, :, ::-1]
45 | 
46 |         crop_mask = np.minimum.reduce(crop_mask_list).clip(0, 1)   ###print(np.array_equal(np.minimum.reduce(crop_mask_list), crop_mask_list[0])) 打印是True，说明np.minimum.reduce(crop_mask_list)等于crop_mask_list[0]，也就是box_mask，因此做np.minimum.reduce(crop_mask_list)完全是多此一举
47 |         dstimg = paste_back(target_img, result, crop_mask, affine_matrix)
48 |         return dstimg


--------------------------------------------------------------------------------
/python/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/1.jpg


--------------------------------------------------------------------------------
/python/images/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/14.jpg


--------------------------------------------------------------------------------
/python/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/2.jpg


--------------------------------------------------------------------------------
/python/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/5.jpg


--------------------------------------------------------------------------------
/python/images/source.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/source.jpg


--------------------------------------------------------------------------------
/python/images/target.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/target.jpg


--------------------------------------------------------------------------------
/python/main.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import matplotlib.pyplot as plt  ###如无则pip安装
 3 | from yolov8face import YOLOface_8n
 4 | from face_68landmarks import face_68_landmarks
 5 | from face_recognizer import face_recognize
 6 | from face_swap import swap_face
 7 | from face_enhancer import enhance_face
 8 | 
 9 | if __name__ == '__main__':
10 |     source_path = 'images/1.jpg'
11 |     target_path = 'images/5.jpg'
12 |     source_img = cv2.imread(source_path)
13 |     target_img = cv2.imread(target_path)
14 |     
15 |     detect_face_net = YOLOface_8n("weights/yoloface_8n.onnx")
16 |     detect_68landmarks_net = face_68_landmarks("weights/2dfan4.onnx")
17 |     face_embedding_net = face_recognize('weights/arcface_w600k_r50.onnx')
18 |     swap_face_net = swap_face('weights/inswapper_128.onnx')
19 |     enhance_face_net = enhance_face('weights/gfpgan_1.4.onnx')
20 | 
21 |     boxes, _, _ = detect_face_net.detect(source_img)
22 |     position = 0  ###一张图片里可能有多个人脸，这里只考虑1个人脸的情况
23 |     bounding_box = boxes[position]
24 |     _, face_landmark_5of68 = detect_68landmarks_net.detect(source_img, bounding_box)
25 |     source_face_embedding, _ = face_embedding_net.detect(source_img, face_landmark_5of68)
26 | 
27 |     boxes, _, _ = detect_face_net.detect(target_img)
28 |     position = 0  ###一张图片里可能有多个人脸，这里只考虑1个人脸的情况
29 |     bounding_box = boxes[position]
30 |     _, target_landmark_5 = detect_68landmarks_net.detect(target_img, bounding_box)
31 | 
32 |     swapimg = swap_face_net.process(target_img, source_face_embedding, target_landmark_5)
33 |     resultimg = enhance_face_net.process(swapimg, target_landmark_5)
34 |     
35 |     plt.subplot(1, 2, 1)
36 |     plt.imshow(source_img[:,:,::-1])  ###plt库显示图像是RGB顺序
37 |     plt.axis('off')
38 |     plt.subplot(1, 2, 2)
39 |     plt.imshow(target_img[:,:,::-1])
40 |     plt.axis('off')
41 |     # plt.show()
42 |     plt.savefig('source_target.jpg', dpi=600, bbox_inches='tight') ###保存高清图
43 | 
44 |     cv2.imwrite('result.jpg', resultimg)
45 |     
46 |     # cv2.namedWindow('resultimg', 0)
47 |     # cv2.imshow('resultimg', resultimg)
48 |     # cv2.waitKey(0)
49 |     # cv2.destroyAllWindows()
50 | 
51 | 
52 | 
53 |     
54 | 


--------------------------------------------------------------------------------
/python/model_matrix.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/model_matrix.npy


--------------------------------------------------------------------------------
/python/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import cv2
 3 | 
 4 | def warp_face_by_translation(temp_img, translation, scale, crop_size):
 5 |     affine_matrix = np.array([[ scale, 0, translation[0] ], [ 0, scale, translation[1] ]])
 6 |     crop_img = cv2.warpAffine(temp_img, affine_matrix, crop_size)
 7 |     return crop_img, affine_matrix
 8 | 
 9 | def convert_face_landmark_68_to_5(landmark_68):
10 |     left_eye = np.mean(landmark_68[36:42], axis = 0)
11 |     right_eye = np.mean(landmark_68[42:48], axis = 0)
12 |     nose = landmark_68[30]
13 |     left_mouth_end = landmark_68[48]
14 |     right_mouth_end = landmark_68[54]
15 |     face_landmark_5 = np.array([left_eye, right_eye, nose, left_mouth_end, right_mouth_end])
16 |     return face_landmark_5
17 | 
18 | TEMPLATES = {'arcface_112_v2': np.array([[ 0.34191607, 0.46157411 ],
19 |                                          [ 0.65653393, 0.45983393 ],
20 |                                          [ 0.50022500, 0.64050536 ],
21 |                                          [ 0.37097589, 0.82469196 ],
22 |                                          [ 0.63151696, 0.82325089 ]]),
23 |              'arcface_128_v2': np.array([[ 0.36167656, 0.40387734 ],
24 |                                          [ 0.63696719, 0.40235469 ],
25 |                                          [ 0.50019687, 0.56044219 ],
26 |                                          [ 0.38710391, 0.72160547 ],
27 |                                          [ 0.61507734, 0.72034453 ]]),
28 |              'ffhq_512': np.array([[ 0.37691676, 0.46864664 ],
29 |                                    [ 0.62285697, 0.46912813 ],
30 |                                    [ 0.50123859, 0.61331904 ],
31 |                                    [ 0.39308822, 0.72541100 ],
32 |                                    [ 0.61150205, 0.72490465 ]])}
33 | 
34 | def warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5, template, crop_size):
35 |     normed_template = TEMPLATES.get(template) * crop_size
36 |     # print(normed_template)  ###打印出来，写到c++程序的std::vector<cv::Point2f> normed_template里
37 |     affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0]
38 |     crop_img = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA)
39 |     return crop_img, affine_matrix
40 | 
41 | def create_static_box_mask(crop_size, face_mask_blur, face_mask_padding):
42 |     blur_amount = int(crop_size[0] * 0.5 * face_mask_blur)
43 |     blur_area = max(blur_amount // 2, 1)
44 |     box_mask = np.ones(crop_size, np.float32)
45 |     box_mask[:max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100)), :] = 0
46 |     box_mask[-max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100)):, :] = 0
47 |     box_mask[:, :max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100))] = 0
48 |     box_mask[:, -max(blur_area, int(crop_size[0] * face_mask_padding[1] / 100)):] = 0
49 |     if blur_amount > 0:
50 |         box_mask = cv2.GaussianBlur(box_mask, (0, 0), blur_amount * 0.25)
51 |     return box_mask
52 | 
53 | def paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix):
54 |     inverse_matrix = cv2.invertAffineTransform(affine_matrix)
55 |     temp_size = temp_vision_frame.shape[:2][::-1]
56 |     inverse_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_size).clip(0, 1)
57 |     inverse_vision_frame = cv2.warpAffine(crop_vision_frame, inverse_matrix, temp_size, borderMode = cv2.BORDER_REPLICATE)
58 |     paste_vision_frame = temp_vision_frame.copy()
59 |     paste_vision_frame[:, :, 0] = inverse_mask * inverse_vision_frame[:, :, 0] + (1 - inverse_mask) * temp_vision_frame[:, :, 0]
60 |     paste_vision_frame[:, :, 1] = inverse_mask * inverse_vision_frame[:, :, 1] + (1 - inverse_mask) * temp_vision_frame[:, :, 1]
61 |     paste_vision_frame[:, :, 2] = inverse_mask * inverse_vision_frame[:, :, 2] + (1 - inverse_mask) * temp_vision_frame[:, :, 2]
62 |     return paste_vision_frame
63 | 
64 | def blend_frame(temp_vision_frame, paste_vision_frame, FACE_ENHANCER_BLEND=80):
65 |     face_enhancer_blend = 1 - (FACE_ENHANCER_BLEND / 100)
66 |     temp_vision_frame = cv2.addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0)
67 |     return temp_vision_frame


--------------------------------------------------------------------------------
/python/yolov8face.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import onnxruntime
  4 | import argparse
  5 | 
  6 | 
  7 | class YOLOface_8n:
  8 |     def __init__(self, modelpath, conf_thres=0.5, iou_thresh=0.4):
  9 |         self.conf_threshold = conf_thres
 10 |         self.iou_threshold = iou_thresh
 11 |         # Initialize model
 12 |         session_option = onnxruntime.SessionOptions()
 13 |         session_option.log_severity_level = 3
 14 |         # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
 15 |         self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option)  ###opencv-dnn读取onnx失败
 16 |         model_inputs = self.session.get_inputs()
 17 |         self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
 18 |         self.input_shape = model_inputs[0].shape
 19 |         self.input_height = int(self.input_shape[2])
 20 |         self.input_width = int(self.input_shape[3])
 21 | 
 22 |     def preprocess(self, srcimg):
 23 |         height, width = srcimg.shape[:2]
 24 |         temp_image = srcimg.copy()
 25 |         if height > self.input_height or width > self.input_width:
 26 |             scale = min(self.input_height / height, self.input_width / width)
 27 |             new_width = int(width * scale)
 28 |             new_height = int(height * scale)
 29 |             temp_image = cv2.resize(srcimg, (new_width, new_height))
 30 |         self.ratio_height = height / temp_image.shape[0]
 31 |         self.ratio_width = width / temp_image.shape[1]
 32 |         input_img = cv2.copyMakeBorder(temp_image, 0, self.input_height - temp_image.shape[0], 0, self.input_width - temp_image.shape[1], cv2.BORDER_CONSTANT,
 33 |                                          value=0)
 34 |         # Scale input pixel values to 0 to 1
 35 |         input_img = (input_img.astype(np.float32) - 127.5) / 128.0
 36 |         input_img = input_img.transpose(2, 0, 1)
 37 |         input_img = input_img[np.newaxis, :, :, :]
 38 |         return input_img
 39 | 
 40 |     def detect(self, srcimg):
 41 |         input_tensor = self.preprocess(srcimg)
 42 | 
 43 |         # Perform inference on the image
 44 |         outputs = self.session.run(None, {self.input_names[0]: input_tensor})[0]
 45 |         boxes, kpts, scores = self.postprocess(outputs)
 46 |         return boxes, kpts, scores
 47 | 
 48 |     def postprocess(self, outputs):
 49 |         bounding_box_list, face_landmark5_list, score_list= [], [], []
 50 |         
 51 |         outputs = np.squeeze(outputs, axis=0).T
 52 |         bounding_box_raw, score_raw, face_landmark_5_raw = np.split(outputs, [ 4, 5 ], axis = 1)
 53 |         keep_indices = np.where(score_raw > self.conf_threshold)[0]
 54 |         if keep_indices.any():
 55 |             bounding_box_raw, face_landmark_5_raw, score_raw = bounding_box_raw[keep_indices], face_landmark_5_raw[keep_indices], score_raw[keep_indices]
 56 |             bboxes_wh = bounding_box_raw.copy()
 57 |             bboxes_wh[:, :2] = bounding_box_raw[:, :2] - 0.5 * bounding_box_raw[:, 2:]  ####(cx,cy,w,h)转到(x,y,w,h)
 58 |             bboxes_wh *= np.array([[self.ratio_width, self.ratio_height, self.ratio_width, self.ratio_height]])  ###合理使用广播法则
 59 |             face_landmark_5_raw *= np.tile(np.array([self.ratio_width, self.ratio_height, 1]), 5).reshape((1, 15))  ###合理使用广播法则,每个点的信息是(x,y,conf), 第3个元素点的置信度，可以不要，那也就需要要乘以1
 60 |             score_raw = score_raw.flatten()
 61 | 
 62 |             indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), score_raw.tolist(), self.conf_threshold, self.iou_threshold)
 63 |             if isinstance(indices, np.ndarray):
 64 |                 indices = indices.flatten()
 65 |             if len(indices) > 0:
 66 |                 # bounding_box_list = list(bboxes_wh[indices])
 67 |                 bounding_box_list = list(map(lambda x:np.array([x[0], x[1], x[0]+x[2], x[1]+x[3]], dtype=np.float64), bboxes_wh[indices])) ###xywh转到xminyminxmaxymax
 68 |                 score_list = list(score_raw[indices])
 69 |                 face_landmark5_list = list(face_landmark_5_raw[indices])
 70 | 
 71 |         return bounding_box_list, face_landmark5_list, score_list
 72 | 
 73 |     def draw_detections(self, image, boxes,  kpts, scores):
 74 |         for box, kp, score in zip(boxes, kpts, scores):
 75 |             xmin, ymin, xmax, ymax = box.astype(int)
 76 |             
 77 |             # Draw rectangle
 78 |             cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), thickness=2)
 79 |             label = "face:"+str(round(score,2))
 80 |             cv2.putText(image, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
 81 |             for i in range(5):
 82 |                 cv2.circle(image, (int(kp[i * 3]), int(kp[i * 3 + 1])), 3, (0, 255, 0), thickness=-1)
 83 |         return image
 84 | 
 85 | 
 86 | if __name__ == '__main__':
 87 |     parser = argparse.ArgumentParser()
 88 |     parser.add_argument('--imgpath', type=str, default='5.jpg', help="image path")
 89 |     parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence')
 90 |     args = parser.parse_args()
 91 | 
 92 |     # Initialize YOLOface_8n object detector
 93 |     mynet = YOLOface_8n("weights/yoloface_8n.onnx", conf_thres=args.confThreshold)
 94 |     srcimg = cv2.imread(args.imgpath)
 95 | 
 96 |     # Detect Objects
 97 |     boxes, kpts, scores = mynet.detect(srcimg)
 98 | 
 99 |     # Draw detections
100 |     dstimg = mynet.draw_detections(srcimg, boxes, kpts, scores)
101 |     winName = 'Deep learning yolov8face detection in ONNXRuntime'
102 |     cv2.namedWindow(winName, 0)
103 |     cv2.imshow(winName, dstimg)
104 |     cv2.waitKey(0)
105 |     cv2.destroyAllWindows()
106 | 


--------------------------------------------------------------------------------