├── README.md ├── cpp ├── face68landmarks.cpp ├── face68landmarks.h ├── faceenhancer.cpp ├── faceenhancer.h ├── facerecognizer.cpp ├── facerecognizer.h ├── faceswap.cpp ├── faceswap.h ├── images │ ├── 1.jpg │ ├── 14.jpg │ ├── 2.jpg │ ├── 5.jpg │ ├── source.jpg │ └── target.jpg ├── main.cpp ├── model_matrix.bin ├── utils.cpp ├── utils.h ├── yolov8face.cpp └── yolov8face.h └── python ├── face_68landmarks.py ├── face_enhancer.py ├── face_recognizer.py ├── face_swap.py ├── images ├── 1.jpg ├── 14.jpg ├── 2.jpg ├── 5.jpg ├── source.jpg └── target.jpg ├── main.py ├── model_matrix.npy ├── utils.py └── yolov8face.py /README.md: -------------------------------------------------------------------------------- 1 | 本套程序是对最近火热的换脸程序 https://github.com/facefusion/facefusion 2 | 的剥离。在官方程序里,定义了太多的.py文件和函数模块,模块之间的嵌套太复杂, 3 | 因此我重新编写了Python程序,我的程序里只有7个.py文件,换脸程序里一共包含5个模块, 4 | 除去main.py和utils.py文件,在我的程序里每个模块对应一个.py文件,接着我编写了C++程序。 5 | 6 | onnx文件在百度云盘,链接:https://pan.baidu.com/s/12Fw2lqkhxWD5Xbk5A2Q2YQ 7 | 提取码:sz78 8 | -------------------------------------------------------------------------------- /cpp/face68landmarks.cpp: -------------------------------------------------------------------------------- 1 | #include "face68landmarks.h" 2 | 3 | using namespace cv; 4 | using namespace std; 5 | using namespace Ort; 6 | 7 | Face68Landmarks::Face68Landmarks(string model_path) 8 | { 9 | /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释 10 | 11 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 12 | /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); ////windows写法 13 | /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法 14 | ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法 15 | 16 | size_t numInputNodes = ort_session->GetInputCount(); 17 | size_t numOutputNodes = ort_session->GetOutputCount(); 18 | AllocatorWithDefaultOptions allocator; 19 | for (int i = 0; i < numInputNodes; i++) 20 | { 21 | input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数 22 | ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator); /// 高版本onnxruntime的接口函数 23 | ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数 24 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 25 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 26 | auto input_dims = input_tensor_info.GetShape(); 27 | input_node_dims.push_back(input_dims); 28 | } 29 | for (int i = 0; i < numOutputNodes; i++) 30 | { 31 | output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数 32 | ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator); 33 | ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数 34 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 35 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 36 | auto output_dims = output_tensor_info.GetShape(); 37 | output_node_dims.push_back(output_dims); 38 | } 39 | 40 | this->input_height = input_node_dims[0][2]; 41 | this->input_width = input_node_dims[0][3]; 42 | } 43 | 44 | void Face68Landmarks::preprocess(Mat srcimg, const Bbox bounding_box) 45 | { 46 | float sub_max = max(bounding_box.xmax - bounding_box.xmin, bounding_box.ymax - bounding_box.ymin); 47 | const float scale = 195.f / sub_max; 48 | const float translation[2] = {(256.f - (bounding_box.xmax + bounding_box.xmin) * scale) * 0.5f, (256.f - (bounding_box.ymax + bounding_box.ymin) * scale) * 0.5f}; 49 | ////python程序里的warp_face_by_translation函数//// 50 | Mat affine_matrix = (Mat_(2, 3) << scale, 0.f, translation[0], 0.f, scale, translation[1]); 51 | Mat crop_img; 52 | warpAffine(srcimg, crop_img, affine_matrix, Size(256, 256)); 53 | ////python程序里的warp_face_by_translation函数//// 54 | cv::invertAffineTransform(affine_matrix, this->inv_affine_matrix); 55 | 56 | vector bgrChannels(3); 57 | split(crop_img, bgrChannels); 58 | for (int c = 0; c < 3; c++) 59 | { 60 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0); 61 | } 62 | 63 | const int image_area = this->input_height * this->input_width; 64 | this->input_image.resize(3 * image_area); 65 | size_t single_chn_size = image_area * sizeof(float); 66 | memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size); 67 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 68 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size); 69 | } 70 | 71 | vector Face68Landmarks::detect(Mat srcimg, const Bbox bounding_box, vector &face_landmark_5of68) 72 | { 73 | this->preprocess(srcimg, bounding_box); 74 | 75 | std::vector input_img_shape = {1, 3, this->input_height, this->input_width}; 76 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 77 | 78 | Ort::RunOptions runOptions; 79 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size()); 80 | 81 | float *pdata = ort_outputs[0].GetTensorMutableData(); /// 形状是(1, 68, 3), 每一行的长度是3,表示一个关键点坐标x,y和置信度 82 | const int num_points = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]; 83 | vector face_landmark_68(num_points); 84 | for (int i = 0; i < num_points; i++) 85 | { 86 | float x = pdata[i * 3] / 64.0 * 256.0; 87 | float y = pdata[i * 3 + 1] / 64.0 * 256.0; 88 | face_landmark_68[i] = Point2f(x, y); 89 | } 90 | vector face68landmarks; 91 | cv::transform(face_landmark_68, face68landmarks, this->inv_affine_matrix); 92 | 93 | ////python程序里的convert_face_landmark_68_to_5函数//// 94 | face_landmark_5of68.resize(5); 95 | float x = 0, y = 0; 96 | for (int i = 36; i < 42; i++) /// left_eye 97 | { 98 | x += face68landmarks[i].x; 99 | y += face68landmarks[i].y; 100 | } 101 | x /= 6; 102 | y /= 6; 103 | face_landmark_5of68[0] = Point2f(x, y); /// left_eye 104 | 105 | x = 0, y = 0; 106 | for (int i = 42; i < 48; i++) /// right_eye 107 | { 108 | x += face68landmarks[i].x; 109 | y += face68landmarks[i].y; 110 | } 111 | x /= 6; 112 | y /= 6; 113 | face_landmark_5of68[1] = Point2f(x, y); /// right_eye 114 | 115 | face_landmark_5of68[2] = face68landmarks[30]; /// nose 116 | face_landmark_5of68[3] = face68landmarks[48]; /// left_mouth_end 117 | face_landmark_5of68[4] = face68landmarks[54]; /// right_mouth_end 118 | ////python程序里的convert_face_landmark_68_to_5函数//// 119 | return face68landmarks; 120 | } -------------------------------------------------------------------------------- /cpp/face68landmarks.h: -------------------------------------------------------------------------------- 1 | # ifndef DETECT_FACE68LANDMARKS 2 | # define DETECT_FACE68LANDMARKS 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include ///如果使用cuda加速,需要取消注释 8 | #include 9 | #include"utils.h" 10 | 11 | 12 | class Face68Landmarks 13 | { 14 | public: 15 | Face68Landmarks(std::string modelpath); 16 | std::vector detect(cv::Mat srcimg, const Bbox bounding_box, std::vector &face_landmark_5of68); 17 | private: 18 | void preprocess(cv::Mat img, const Bbox bounding_box); 19 | std::vector input_image; 20 | int input_height; 21 | int input_width; 22 | cv::Mat inv_affine_matrix; 23 | 24 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "68FaceLandMarks Detect"); 25 | Ort::Session *ort_session = nullptr; 26 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 27 | std::vector input_names; 28 | std::vector output_names; 29 | std::vector> input_node_dims; // >=1 outputs 30 | std::vector> output_node_dims; // >=1 outputs 31 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 32 | }; 33 | #endif -------------------------------------------------------------------------------- /cpp/faceenhancer.cpp: -------------------------------------------------------------------------------- 1 | #include"faceenhancer.h" 2 | 3 | using namespace cv; 4 | using namespace std; 5 | using namespace Ort; 6 | 7 | FaceEnhance::FaceEnhance(string model_path) 8 | { 9 | /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释 10 | 11 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 12 | /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); ////windows写法 13 | /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法 14 | ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法 15 | 16 | size_t numInputNodes = ort_session->GetInputCount(); 17 | size_t numOutputNodes = ort_session->GetOutputCount(); 18 | AllocatorWithDefaultOptions allocator; 19 | for (int i = 0; i < numInputNodes; i++) 20 | { 21 | input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数 22 | ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator); /// 高版本onnxruntime的接口函数 23 | ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数 24 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 25 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 26 | auto input_dims = input_tensor_info.GetShape(); 27 | input_node_dims.push_back(input_dims); 28 | } 29 | for (int i = 0; i < numOutputNodes; i++) 30 | { 31 | output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数 32 | ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator); 33 | ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数 34 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 35 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 36 | auto output_dims = output_tensor_info.GetShape(); 37 | output_node_dims.push_back(output_dims); 38 | } 39 | 40 | this->input_height = input_node_dims[0][2]; 41 | this->input_width = input_node_dims[0][3]; 42 | 43 | ////在这里就直接定义了,没有像python程序里的那样normed_template = TEMPLATES.get(template) * crop_size 44 | this->normed_template.emplace_back(Point2f(192.98138112, 239.94707968)); 45 | this->normed_template.emplace_back(Point2f(318.90276864, 240.19360256)); 46 | this->normed_template.emplace_back(Point2f(256.63415808, 314.01934848)); 47 | this->normed_template.emplace_back(Point2f(201.26116864, 371.410432)); 48 | this->normed_template.emplace_back(Point2f(313.0890496, 371.1511808)); 49 | } 50 | 51 | void FaceEnhance::preprocess(Mat srcimg, const vector face_landmark_5, Mat& affine_matrix, Mat& box_mask) 52 | { 53 | Mat crop_img; 54 | affine_matrix = warp_face_by_face_landmark_5(srcimg, crop_img, face_landmark_5, this->normed_template, Size(512, 512)); 55 | const int crop_size[2] = {crop_img.cols, crop_img.rows}; 56 | box_mask = create_static_box_mask(crop_size, this->FACE_MASK_BLUR, this->FACE_MASK_PADDING); 57 | 58 | vector bgrChannels(3); 59 | split(crop_img, bgrChannels); 60 | for (int c = 0; c < 3; c++) 61 | { 62 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / (255.0*0.5), -1.0); 63 | } 64 | 65 | const int image_area = this->input_height * this->input_width; 66 | this->input_image.resize(3 * image_area); 67 | size_t single_chn_size = image_area * sizeof(float); 68 | memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size); ///rgb顺序 69 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 70 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size); 71 | } 72 | 73 | Mat FaceEnhance::process(Mat target_img, const vector target_landmark_5) 74 | { 75 | Mat affine_matrix; 76 | Mat box_mask; 77 | this->preprocess(target_img, target_landmark_5, affine_matrix, box_mask); 78 | 79 | std::vector input_img_shape = {1, 3, this->input_height, this->input_width}; 80 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 81 | 82 | Ort::RunOptions runOptions; 83 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size()); 84 | 85 | float* pdata = ort_outputs[0].GetTensorMutableData(); 86 | std::vector outs_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape(); 87 | const int out_h = outs_shape[2]; 88 | const int out_w = outs_shape[3]; 89 | const int channel_step = out_h * out_w; 90 | Mat rmat(out_h, out_w, CV_32FC1, pdata); 91 | Mat gmat(out_h, out_w, CV_32FC1, pdata + channel_step); 92 | Mat bmat(out_h, out_w, CV_32FC1, pdata + 2 * channel_step); 93 | rmat.setTo(-1, rmat < -1); 94 | rmat.setTo(1, rmat > 1); 95 | rmat = (rmat+1)*0.5; 96 | gmat.setTo(-1, gmat < -1); 97 | gmat.setTo(1, gmat > 1); 98 | gmat = (gmat+1)*0.5; 99 | bmat.setTo(-1, bmat < -1); 100 | bmat.setTo(1, bmat > 1); 101 | bmat = (bmat+1)*0.5; 102 | 103 | rmat *= 255.f; 104 | gmat *= 255.f; 105 | bmat *= 255.f; 106 | rmat.setTo(0, rmat < 0); 107 | rmat.setTo(255, rmat > 255); 108 | gmat.setTo(0, gmat < 0); 109 | gmat.setTo(255, gmat > 255); 110 | bmat.setTo(0, bmat < 0); 111 | bmat.setTo(255, bmat > 255); 112 | 113 | vector channel_mats(3); 114 | channel_mats[0] = bmat; 115 | channel_mats[1] = gmat; 116 | channel_mats[2] = rmat; 117 | Mat result; 118 | merge(channel_mats, result); 119 | result.convertTo(result, CV_8UC3); 120 | 121 | box_mask.setTo(0, box_mask < 0); 122 | box_mask.setTo(1, box_mask > 1); 123 | Mat paste_frame = paste_back(target_img, result, box_mask, affine_matrix); 124 | Mat dstimg = blend_frame(target_img, paste_frame); 125 | return dstimg; 126 | } -------------------------------------------------------------------------------- /cpp/faceenhancer.h: -------------------------------------------------------------------------------- 1 | # ifndef FACEENHANCE 2 | # define FACEENHANCE 3 | #include 4 | #include 5 | #include "opencv2/opencv.hpp" 6 | //#include ///如果使用cuda加速,需要取消注释 7 | #include 8 | #include"utils.h" 9 | 10 | 11 | class FaceEnhance 12 | { 13 | public: 14 | FaceEnhance(std::string modelpath); 15 | cv::Mat process(cv::Mat target_img, const std::vector target_landmark_5); 16 | private: 17 | void preprocess(cv::Mat target_img, const std::vector face_landmark_5, cv::Mat& affine_matrix, cv::Mat& box_mask); 18 | std::vector input_image; 19 | int input_height; 20 | int input_width; 21 | std::vector normed_template; 22 | const float FACE_MASK_BLUR = 0.3; 23 | const int FACE_MASK_PADDING[4] = {0, 0, 0, 0}; 24 | 25 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Enhance"); 26 | Ort::Session *ort_session = nullptr; 27 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 28 | std::vector input_names; 29 | std::vector output_names; 30 | std::vector> input_node_dims; // >=1 outputs 31 | std::vector> output_node_dims; // >=1 outputs 32 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 33 | }; 34 | #endif -------------------------------------------------------------------------------- /cpp/facerecognizer.cpp: -------------------------------------------------------------------------------- 1 | #include"facerecognizer.h" 2 | 3 | using namespace cv; 4 | using namespace std; 5 | using namespace Ort; 6 | 7 | FaceEmbdding::FaceEmbdding(string model_path) 8 | { 9 | /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释 10 | 11 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 12 | /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); ////windows写法 13 | /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法 14 | ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法 15 | 16 | size_t numInputNodes = ort_session->GetInputCount(); 17 | size_t numOutputNodes = ort_session->GetOutputCount(); 18 | AllocatorWithDefaultOptions allocator; 19 | for (int i = 0; i < numInputNodes; i++) 20 | { 21 | input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数 22 | ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator); /// 高版本onnxruntime的接口函数 23 | ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数 24 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 25 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 26 | auto input_dims = input_tensor_info.GetShape(); 27 | input_node_dims.push_back(input_dims); 28 | } 29 | for (int i = 0; i < numOutputNodes; i++) 30 | { 31 | output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数 32 | ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator); 33 | ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数 34 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 35 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 36 | auto output_dims = output_tensor_info.GetShape(); 37 | output_node_dims.push_back(output_dims); 38 | } 39 | 40 | this->input_height = input_node_dims[0][2]; 41 | this->input_width = input_node_dims[0][3]; 42 | ////在这里就直接定义了,没有像python程序里的那样normed_template = TEMPLATES.get(template) * crop_size 43 | this->normed_template.emplace_back(Point2f(38.29459984, 51.69630032)); 44 | this->normed_template.emplace_back(Point2f(73.53180016, 51.50140016)); 45 | this->normed_template.emplace_back(Point2f(56.0252, 71.73660032)); 46 | this->normed_template.emplace_back(Point2f(41.54929968, 92.36549952)); 47 | this->normed_template.emplace_back(Point2f(70.72989952, 92.20409968)); 48 | } 49 | 50 | void FaceEmbdding::preprocess(Mat srcimg, const vector face_landmark_5) 51 | { 52 | Mat crop_img; 53 | warp_face_by_face_landmark_5(srcimg, crop_img, face_landmark_5, this->normed_template, Size(112, 112)); 54 | /*vector inliers(face_landmark_5.size(), 0); 55 | Mat affine_matrix = cv::estimateAffinePartial2D(face_landmark_5, this->normed_template, cv::noArray(), cv::RANSAC, 100.0); 56 | Mat crop_img; 57 | Size crop_size(112, 112); 58 | warpAffine(srcimg, crop_img, affine_matrix, crop_size, cv::INTER_AREA, cv::BORDER_REPLICATE);*/ 59 | 60 | vector bgrChannels(3); 61 | split(crop_img, bgrChannels); 62 | for (int c = 0; c < 3; c++) 63 | { 64 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 127.5, -1.0); 65 | } 66 | 67 | const int image_area = this->input_height * this->input_width; 68 | this->input_image.resize(3 * image_area); 69 | size_t single_chn_size = image_area * sizeof(float); 70 | memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size); 71 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 72 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size); 73 | } 74 | 75 | vector FaceEmbdding::detect(Mat srcimg, const vector face_landmark_5) 76 | { 77 | this->preprocess(srcimg, face_landmark_5); 78 | 79 | std::vector input_img_shape = {1, 3, this->input_height, this->input_width}; 80 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 81 | 82 | Ort::RunOptions runOptions; 83 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size()); 84 | 85 | float *pdata = ort_outputs[0].GetTensorMutableData(); /// 形状是(1, 512) 86 | const int len_feature = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]; 87 | vector embedding(len_feature); 88 | memcpy(embedding.data(), pdata, len_feature*sizeof(float)); 89 | return embedding; 90 | } -------------------------------------------------------------------------------- /cpp/facerecognizer.h: -------------------------------------------------------------------------------- 1 | # ifndef FACERECOGNIZER 2 | # define FACERECOGNIZER 3 | #include 4 | #include 5 | //#include ///如果使用cuda加速,需要取消注释 6 | #include 7 | #include"utils.h" 8 | 9 | 10 | class FaceEmbdding 11 | { 12 | public: 13 | FaceEmbdding(std::string modelpath); 14 | std::vector detect(cv::Mat srcimg, const std::vector face_landmark_5); 15 | private: 16 | void preprocess(cv::Mat img, const std::vector face_landmark_5); 17 | std::vector input_image; 18 | int input_height; 19 | int input_width; 20 | std::vector normed_template; 21 | 22 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Feature Extract"); 23 | Ort::Session *ort_session = nullptr; 24 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 25 | std::vector input_names; 26 | std::vector output_names; 27 | std::vector> input_node_dims; // >=1 outputs 28 | std::vector> output_node_dims; // >=1 outputs 29 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 30 | }; 31 | #endif -------------------------------------------------------------------------------- /cpp/faceswap.cpp: -------------------------------------------------------------------------------- 1 | #include"faceswap.h" 2 | 3 | using namespace cv; 4 | using namespace std; 5 | using namespace Ort; 6 | 7 | SwapFace::SwapFace(string model_path) 8 | { 9 | /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释 10 | 11 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 12 | /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); ////windows写法 13 | /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法 14 | ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法 15 | 16 | size_t numInputNodes = ort_session->GetInputCount(); 17 | size_t numOutputNodes = ort_session->GetOutputCount(); 18 | AllocatorWithDefaultOptions allocator; 19 | for (int i = 0; i < numInputNodes; i++) 20 | { 21 | input_names.push_back(ort_session->GetInputName(i, allocator)); /// 低版本onnxruntime的接口函数 22 | ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator); /// 高版本onnxruntime的接口函数 23 | ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数 24 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 25 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 26 | auto input_dims = input_tensor_info.GetShape(); 27 | input_node_dims.push_back(input_dims); 28 | } 29 | 30 | for (int i = 0; i < numOutputNodes; i++) 31 | { 32 | output_names.push_back(ort_session->GetOutputName(i, allocator)); /// 低版本onnxruntime的接口函数 33 | ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator); 34 | ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数 35 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 36 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 37 | auto output_dims = output_tensor_info.GetShape(); 38 | output_node_dims.push_back(output_dims); 39 | } 40 | 41 | this->input_height = input_node_dims[0][2]; 42 | this->input_width = input_node_dims[0][3]; 43 | 44 | const int length = this->len_feature*this->len_feature; 45 | this->model_matrix = new float[length]; 46 | cout<<"start read model_matrix.bin"<model_matrix, sizeof(float), length, fp);//导入数据 49 | fclose(fp);//关闭文件 50 | cout<<"read model_matrix.bin finish"<normed_template.emplace_back(Point2f(46.29459968, 51.69629952)); 54 | this->normed_template.emplace_back(Point2f(81.53180032, 51.50140032)); 55 | this->normed_template.emplace_back(Point2f(64.02519936, 71.73660032)); 56 | this->normed_template.emplace_back(Point2f(49.54930048, 92.36550016)); 57 | this->normed_template.emplace_back(Point2f(78.72989952, 92.20409984)); 58 | } 59 | 60 | SwapFace::~SwapFace() 61 | { 62 | delete[] this->model_matrix; 63 | this->model_matrix = nullptr; 64 | this->normed_template.clear(); 65 | } 66 | 67 | void SwapFace::preprocess(Mat srcimg, const vector face_landmark_5, const vector source_face_embedding, Mat& affine_matrix, Mat& box_mask) 68 | { 69 | Mat crop_img; 70 | affine_matrix = warp_face_by_face_landmark_5(srcimg, crop_img, face_landmark_5, this->normed_template, Size(128, 128)); 71 | const int crop_size[2] = {crop_img.cols, crop_img.rows}; 72 | box_mask = create_static_box_mask(crop_size, this->FACE_MASK_BLUR, this->FACE_MASK_PADDING); 73 | 74 | vector bgrChannels(3); 75 | split(crop_img, bgrChannels); 76 | for (int c = 0; c < 3; c++) 77 | { 78 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / (255.0*this->INSWAPPER_128_MODEL_STD[c]), -this->INSWAPPER_128_MODEL_MEAN[c]/this->INSWAPPER_128_MODEL_STD[c]); 79 | } 80 | 81 | const int image_area = this->input_height * this->input_width; 82 | this->input_image.resize(3 * image_area); 83 | size_t single_chn_size = image_area * sizeof(float); 84 | memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size); ///rgb顺序 85 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 86 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size); 87 | 88 | float linalg_norm = 0; 89 | for(int i=0;ilen_feature;i++) 90 | { 91 | linalg_norm += powf(source_face_embedding[i], 2); 92 | } 93 | linalg_norm = sqrt(linalg_norm); 94 | this->input_embedding.resize(this->len_feature); 95 | for(int i=0;ilen_feature;i++) 96 | { 97 | float sum=0; 98 | for(int j=0;jlen_feature;j++) 99 | { 100 | sum += (source_face_embedding[j]*this->model_matrix[j*this->len_feature+i]); 101 | } 102 | this->input_embedding[i] = sum/linalg_norm; 103 | } 104 | } 105 | 106 | Mat SwapFace::process(Mat target_img, const vector source_face_embedding, const vector target_landmark_5) 107 | { 108 | Mat affine_matrix; 109 | Mat box_mask; 110 | this->preprocess(target_img, target_landmark_5, source_face_embedding, affine_matrix, box_mask); 111 | 112 | std::vector inputs_tensor; 113 | std::vector input_img_shape = {1, 3, this->input_height, this->input_width}; 114 | inputs_tensor.emplace_back(Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size())); 115 | std::vector input_embedding_shape = {1, this->len_feature}; 116 | inputs_tensor.emplace_back(Value::CreateTensor(memory_info_handler, this->input_embedding.data(), this->input_embedding.size(), input_embedding_shape.data(), input_embedding_shape.size())); 117 | 118 | 119 | Ort::RunOptions runOptions; 120 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), inputs_tensor.data(), inputs_tensor.size(), this->output_names.data(), output_names.size()); 121 | 122 | float* pdata = ort_outputs[0].GetTensorMutableData(); 123 | std::vector outs_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape(); 124 | const int out_h = outs_shape[2]; 125 | const int out_w = outs_shape[3]; 126 | const int channel_step = out_h * out_w; 127 | Mat rmat(out_h, out_w, CV_32FC1, pdata); 128 | Mat gmat(out_h, out_w, CV_32FC1, pdata + channel_step); 129 | Mat bmat(out_h, out_w, CV_32FC1, pdata + 2 * channel_step); 130 | rmat *= 255.f; 131 | gmat *= 255.f; 132 | bmat *= 255.f; 133 | rmat.setTo(0, rmat < 0); 134 | rmat.setTo(255, rmat > 255); 135 | gmat.setTo(0, gmat < 0); 136 | gmat.setTo(255, gmat > 255); 137 | bmat.setTo(0, bmat < 0); 138 | bmat.setTo(255, bmat > 255); 139 | 140 | vector channel_mats(3); 141 | channel_mats[0] = bmat; 142 | channel_mats[1] = gmat; 143 | channel_mats[2] = rmat; 144 | Mat result; 145 | merge(channel_mats, result); 146 | 147 | box_mask.setTo(0, box_mask < 0); 148 | box_mask.setTo(1, box_mask > 1); 149 | Mat dstimg = paste_back(target_img, result, box_mask, affine_matrix); 150 | return dstimg; 151 | } -------------------------------------------------------------------------------- /cpp/faceswap.h: -------------------------------------------------------------------------------- 1 | # ifndef FACESWAP 2 | # define FACESWAP 3 | #include 4 | #include 5 | #include "opencv2/opencv.hpp" 6 | //#include ///如果使用cuda加速,需要取消注释 7 | #include 8 | #include"utils.h" 9 | 10 | 11 | class SwapFace 12 | { 13 | public: 14 | SwapFace(std::string modelpath); 15 | cv::Mat process(cv::Mat target_img, const std::vector source_face_embedding, const std::vector target_landmark_5); 16 | ~SwapFace(); // 析构函数, 释放内存 17 | private: 18 | void preprocess(cv::Mat target_img, const std::vector face_landmark_5, const std::vector source_face_embedding, cv::Mat& affine_matrix, cv::Mat& box_mask); 19 | std::vector input_image; 20 | std::vector input_embedding; 21 | int input_height; 22 | int input_width; 23 | const int len_feature = 512; 24 | float* model_matrix; 25 | std::vector normed_template; 26 | const float FACE_MASK_BLUR = 0.3; 27 | const int FACE_MASK_PADDING[4] = {0, 0, 0, 0}; 28 | const float INSWAPPER_128_MODEL_MEAN[3] = {0.0, 0.0, 0.0}; 29 | const float INSWAPPER_128_MODEL_STD[3] = {1.0, 1.0, 1.0}; 30 | 31 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Swap"); 32 | Ort::Session *ort_session = nullptr; 33 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 34 | std::vector input_names; 35 | std::vector output_names; 36 | std::vector> input_node_dims; // >=1 outputs 37 | std::vector> output_node_dims; // >=1 outputs 38 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 39 | }; 40 | #endif -------------------------------------------------------------------------------- /cpp/images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/1.jpg -------------------------------------------------------------------------------- /cpp/images/14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/14.jpg -------------------------------------------------------------------------------- /cpp/images/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/2.jpg -------------------------------------------------------------------------------- /cpp/images/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/5.jpg -------------------------------------------------------------------------------- /cpp/images/source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/source.jpg -------------------------------------------------------------------------------- /cpp/images/target.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/images/target.jpg -------------------------------------------------------------------------------- /cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include "yolov8face.h" 2 | #include "face68landmarks.h" 3 | #include "facerecognizer.h" 4 | #include "faceswap.h" 5 | #include "faceenhancer.h" 6 | 7 | using namespace cv; 8 | using namespace std; 9 | 10 | int main() 11 | { 12 | string source_path = "/project/faceswap-cpp/images/5.jpg"; 13 | string target_path = "/project/faceswap-cpp/images/target.jpg"; 14 | 15 | ////图片路径和onnx文件的路径,要确保写正确,才能使程序正常运行的 16 | Yolov8Face detect_face_net("/project/faceswap-cpp/weights/yoloface_8n.onnx"); 17 | Face68Landmarks detect_68landmarks_net("/project/faceswap-cpp/weights/2dfan4.onnx"); 18 | FaceEmbdding face_embedding_net("/project/faceswap-cpp/weights/arcface_w600k_r50.onnx"); 19 | SwapFace swap_face_net("/project/faceswap-cpp/weights/inswapper_128.onnx"); 20 | FaceEnhance enhance_face_net("/project/faceswap-cpp/weights/gfpgan_1.4.onnx"); 21 | 22 | Mat source_img = imread(source_path); 23 | Mat target_img = imread(target_path); 24 | 25 | vector boxes; 26 | detect_face_net.detect(source_img, boxes); 27 | int position = 0; ////一张图片里可能有多个人脸,这里只考虑1个人脸的情况 28 | vector face_landmark_5of68; 29 | vector face68landmarks = detect_68landmarks_net.detect(source_img, boxes[position], face_landmark_5of68); 30 | vector source_face_embedding = face_embedding_net.detect(source_img, face_landmark_5of68); 31 | 32 | detect_face_net.detect(target_img, boxes); 33 | position = 0; ////一张图片里可能有多个人脸,这里只考虑1个人脸的情况 34 | vector target_landmark_5; 35 | detect_68landmarks_net.detect(target_img, boxes[position], target_landmark_5); 36 | 37 | Mat swapimg = swap_face_net.process(target_img, source_face_embedding, target_landmark_5); 38 | Mat resultimg = enhance_face_net.process(swapimg, target_landmark_5); 39 | 40 | imwrite("resultimg.jpg", resultimg); 41 | 42 | /*static const string kWinName = "Deep learning face swap use onnxruntime"; 43 | namedWindow(kWinName, WINDOW_NORMAL); 44 | imshow(kWinName, resultimg); 45 | waitKey(0); 46 | destroyAllWindows();*/ 47 | } -------------------------------------------------------------------------------- /cpp/model_matrix.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/cpp/model_matrix.bin -------------------------------------------------------------------------------- /cpp/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | using namespace std; 4 | using namespace cv; 5 | 6 | float GetIoU(const Bbox box1, const Bbox box2) 7 | { 8 | float x1 = max(box1.xmin, box2.xmin); 9 | float y1 = max(box1.ymin, box2.ymin); 10 | float x2 = min(box1.xmax, box2.xmax); 11 | float y2 = min(box1.ymax, box2.ymax); 12 | float w = max(0.f, x2 - x1); 13 | float h = max(0.f, y2 - y1); 14 | float over_area = w * h; 15 | if (over_area == 0) 16 | return 0.0; 17 | float union_area = (box1.xmax - box1.xmin) * (box1.ymax - box1.ymin) + (box2.xmax - box2.xmin) * (box2.ymax - box2.ymin) - over_area; 18 | return over_area / union_area; 19 | } 20 | 21 | vector nms(vector boxes, vector confidences, const float nms_thresh) 22 | { 23 | sort(confidences.begin(), confidences.end(), [&confidences](size_t index_1, size_t index_2) 24 | { return confidences[index_1] > confidences[index_2]; }); 25 | const int num_box = confidences.size(); 26 | vector isSuppressed(num_box, false); 27 | for (int i = 0; i < num_box; ++i) 28 | { 29 | if (isSuppressed[i]) 30 | { 31 | continue; 32 | } 33 | for (int j = i + 1; j < num_box; ++j) 34 | { 35 | if (isSuppressed[j]) 36 | { 37 | continue; 38 | } 39 | 40 | float ovr = GetIoU(boxes[i], boxes[j]); 41 | if (ovr > nms_thresh) 42 | { 43 | isSuppressed[j] = true; 44 | } 45 | } 46 | } 47 | 48 | vector keep_inds; 49 | for (int i = 0; i < isSuppressed.size(); i++) 50 | { 51 | if (!isSuppressed[i]) 52 | { 53 | keep_inds.emplace_back(i); 54 | } 55 | } 56 | return keep_inds; 57 | } 58 | 59 | Mat warp_face_by_face_landmark_5(const Mat temp_vision_frame, Mat &crop_img, const vector face_landmark_5, const vector normed_template, const Size crop_size) 60 | { 61 | vector inliers(face_landmark_5.size(), 0); 62 | Mat affine_matrix = cv::estimateAffinePartial2D(face_landmark_5, normed_template, cv::noArray(), cv::RANSAC, 100.0); 63 | warpAffine(temp_vision_frame, crop_img, affine_matrix, crop_size, cv::INTER_AREA, cv::BORDER_REPLICATE); 64 | return affine_matrix; 65 | } 66 | 67 | Mat create_static_box_mask(const int *crop_size, const float face_mask_blur, const int *face_mask_padding) 68 | { 69 | const float blur_amount = int(crop_size[0] * 0.5 * face_mask_blur); 70 | const int blur_area = max(int(blur_amount / 2), 1); 71 | Mat box_mask = Mat::ones(crop_size[0], crop_size[1], CV_32FC1); 72 | 73 | int sub = max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100)); 74 | // Mat roi = box_mask(cv::Rect(0,0,sub,crop_size[1])); 75 | box_mask(cv::Rect(0, 0, crop_size[1], sub)).setTo(0); 76 | 77 | sub = crop_size[0] - max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100)); 78 | box_mask(cv::Rect(0, sub, crop_size[1], crop_size[0] - sub)).setTo(0); 79 | 80 | sub = max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100)); 81 | box_mask(cv::Rect(0, 0, sub, crop_size[0])).setTo(0); 82 | 83 | sub = crop_size[1] - max(blur_area, int(crop_size[0] * face_mask_padding[1] / 100)); 84 | box_mask(cv::Rect(sub, 0, crop_size[1] - sub, crop_size[0])).setTo(0); 85 | 86 | if (blur_amount > 0) 87 | { 88 | GaussianBlur(box_mask, box_mask, Size(0, 0), blur_amount * 0.25); 89 | } 90 | return box_mask; 91 | } 92 | 93 | Mat paste_back(Mat temp_vision_frame, Mat crop_vision_frame, Mat crop_mask, Mat affine_matrix) 94 | { 95 | Mat inverse_matrix; 96 | cv::invertAffineTransform(affine_matrix, inverse_matrix); 97 | Mat inverse_mask; 98 | Size temp_size(temp_vision_frame.cols, temp_vision_frame.rows); 99 | warpAffine(crop_mask, inverse_mask, inverse_matrix, temp_size); 100 | inverse_mask.setTo(0, inverse_mask < 0); 101 | inverse_mask.setTo(1, inverse_mask > 1); 102 | Mat inverse_vision_frame; 103 | warpAffine(crop_vision_frame, inverse_vision_frame, inverse_matrix, temp_size, cv::INTER_LINEAR, cv::BORDER_REPLICATE); 104 | 105 | vector inverse_vision_frame_bgrs(3); 106 | split(inverse_vision_frame, inverse_vision_frame_bgrs); 107 | vector temp_vision_frame_bgrs(3); 108 | split(temp_vision_frame, temp_vision_frame_bgrs); 109 | for (int c = 0; c < 3; c++) 110 | { 111 | inverse_vision_frame_bgrs[c].convertTo(inverse_vision_frame_bgrs[c], CV_32FC1); ////注意数据类型转换,不然在下面的矩阵点乘运算时会报错的 112 | temp_vision_frame_bgrs[c].convertTo(temp_vision_frame_bgrs[c], CV_32FC1); ////注意数据类型转换,不然在下面的矩阵点乘运算时会报错的 113 | } 114 | vector channel_mats(3); 115 | 116 | channel_mats[0] = inverse_mask.mul(inverse_vision_frame_bgrs[0]) + temp_vision_frame_bgrs[0].mul(1 - inverse_mask); 117 | channel_mats[1] = inverse_mask.mul(inverse_vision_frame_bgrs[1]) + temp_vision_frame_bgrs[1].mul(1 - inverse_mask); 118 | channel_mats[2] = inverse_mask.mul(inverse_vision_frame_bgrs[2]) + temp_vision_frame_bgrs[2].mul(1 - inverse_mask); 119 | 120 | cv::Mat paste_vision_frame; 121 | merge(channel_mats, paste_vision_frame); 122 | paste_vision_frame.convertTo(paste_vision_frame, CV_8UC3); 123 | return paste_vision_frame; 124 | } 125 | 126 | Mat blend_frame(Mat temp_vision_frame, Mat paste_vision_frame, const int FACE_ENHANCER_BLEND) 127 | { 128 | const float face_enhancer_blend = 1 - ((float)FACE_ENHANCER_BLEND / 100.f); 129 | Mat dstimg; 130 | cv::addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0, dstimg); 131 | return dstimg; 132 | } -------------------------------------------------------------------------------- /cpp/utils.h: -------------------------------------------------------------------------------- 1 | # ifndef UTILS 2 | # define UTILS 3 | #include 4 | #include 5 | #include 6 | #include "opencv2/opencv.hpp" 7 | 8 | typedef struct 9 | { 10 | float xmin; 11 | float ymin; 12 | float xmax; 13 | float ymax; 14 | } Bbox; 15 | 16 | float GetIoU(const Bbox box1, const Bbox box2); 17 | std::vector nms(std::vector boxes, std::vector confidences, const float nms_thresh); 18 | cv::Mat warp_face_by_face_landmark_5(const cv::Mat temp_vision_frame, cv::Mat &crop_img, const std::vector face_landmark_5, const std::vector normed_template, const cv::Size crop_size); 19 | cv::Mat create_static_box_mask(const int *crop_size, const float face_mask_blur, const int *face_mask_padding); 20 | cv::Mat paste_back(cv::Mat temp_vision_frame, cv::Mat crop_vision_frame, cv::Mat crop_mask, cv::Mat affine_matrix); 21 | cv::Mat blend_frame(cv::Mat temp_vision_frame, cv::Mat paste_vision_frame, const int FACE_ENHANCER_BLEND=80); 22 | #endif -------------------------------------------------------------------------------- /cpp/yolov8face.cpp: -------------------------------------------------------------------------------- 1 | #include "yolov8face.h" 2 | 3 | using namespace cv; 4 | using namespace std; 5 | using namespace Ort; 6 | 7 | Yolov8Face::Yolov8Face(string model_path, const float conf_thres, const float iou_thresh) 8 | { 9 | /// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释 10 | 11 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 12 | /// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); ////windows写法 13 | /// ort_session = new Session(env, widestr.c_str(), sessionOptions); ////windows写法 14 | ort_session = new Session(env, model_path.c_str(), sessionOptions); ////linux写法 15 | 16 | size_t numInputNodes = ort_session->GetInputCount(); 17 | size_t numOutputNodes = ort_session->GetOutputCount(); 18 | AllocatorWithDefaultOptions allocator; 19 | for (int i = 0; i < numInputNodes; i++) 20 | { 21 | input_names.push_back(ort_session->GetInputName(i, allocator)); ///低版本onnxruntime的接口函数 22 | ////AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator); /// 高版本onnxruntime的接口函数 23 | ////input_names.push_back(input_name_Ptr.get()); /// 高版本onnxruntime的接口函数 24 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 25 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 26 | auto input_dims = input_tensor_info.GetShape(); 27 | input_node_dims.push_back(input_dims); 28 | } 29 | for (int i = 0; i < numOutputNodes; i++) 30 | { 31 | output_names.push_back(ort_session->GetOutputName(i, allocator)); ///低版本onnxruntime的接口函数 32 | ////AllocatedStringPtr output_name_Ptr= ort_session->GetInputNameAllocated(i, allocator); 33 | ////output_names.push_back(output_name_Ptr.get()); /// 高版本onnxruntime的接口函数 34 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 35 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 36 | auto output_dims = output_tensor_info.GetShape(); 37 | output_node_dims.push_back(output_dims); 38 | } 39 | 40 | this->input_height = input_node_dims[0][2]; 41 | this->input_width = input_node_dims[0][3]; 42 | this->conf_threshold = conf_thres; 43 | this->iou_threshold = iou_thresh; 44 | } 45 | 46 | void Yolov8Face::preprocess(Mat srcimg) 47 | { 48 | const int height = srcimg.rows; 49 | const int width = srcimg.cols; 50 | Mat temp_image = srcimg.clone(); 51 | if (height > this->input_height || width > this->input_width) 52 | { 53 | const float scale = std::min((float)this->input_height / height, (float)this->input_width / width); 54 | Size new_size = Size(int(width * scale), int(height * scale)); 55 | resize(srcimg, temp_image, new_size); 56 | } 57 | this->ratio_height = (float)height / temp_image.rows; 58 | this->ratio_width = (float)width / temp_image.cols; 59 | Mat input_img; 60 | copyMakeBorder(temp_image, input_img, 0, this->input_height - temp_image.rows, 0, this->input_width - temp_image.cols, BORDER_CONSTANT, 0); 61 | 62 | vector bgrChannels(3); 63 | split(input_img, bgrChannels); 64 | for (int c = 0; c < 3; c++) 65 | { 66 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 128.0, -127.5 / 128.0); 67 | } 68 | 69 | const int image_area = this->input_height * this->input_width; 70 | this->input_image.resize(3 * image_area); 71 | size_t single_chn_size = image_area * sizeof(float); 72 | memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size); 73 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 74 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size); 75 | } 76 | 77 | ////只返回检测框,因为在下游的模块里,置信度和5个关键点这两个信息在后续的模块里没有用到 78 | void Yolov8Face::detect(Mat srcimg, std::vector &boxes) 79 | { 80 | this->preprocess(srcimg); 81 | 82 | std::vector input_img_shape = {1, 3, this->input_height, this->input_width}; 83 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 84 | 85 | Ort::RunOptions runOptions; 86 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, 1, this->output_names.data(), output_names.size()); 87 | 88 | float *pdata = ort_outputs[0].GetTensorMutableData(); /// 形状是(1, 20, 8400),不考虑第0维batchsize,每一列的长度20,前4个元素是检测框坐标(cx,cy,w,h),第4个元素是置信度,剩下的15个元素是5个关键点坐标x,y和置信度 89 | const int num_box = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[2]; 90 | vector bounding_box_raw; 91 | vector score_raw; 92 | for (int i = 0; i < num_box; i++) 93 | { 94 | const float score = pdata[4 * num_box + i]; 95 | if (score > this->conf_threshold) 96 | { 97 | float xmin = (pdata[i] - 0.5 * pdata[2 * num_box + i]) * this->ratio_width; ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图 98 | float ymin = (pdata[num_box + i] - 0.5 * pdata[3 * num_box + i]) * this->ratio_height; ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图 99 | float xmax = (pdata[i] + 0.5 * pdata[2 * num_box + i]) * this->ratio_width; ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图 100 | float ymax = (pdata[num_box + i] + 0.5 * pdata[3 * num_box + i]) * this->ratio_height; ///(cx,cy,w,h)转到(x,y,w,h)并还原到原图 101 | ////坐标的越界检查保护,可以添加一下 102 | bounding_box_raw.emplace_back(Bbox{xmin, ymin, xmax, ymax}); 103 | score_raw.emplace_back(score); 104 | /// 剩下的5个关键点坐标的计算,暂时不写,因为在下游的模块里没有用到5个关键点坐标信息 105 | } 106 | } 107 | vector keep_inds = nms(bounding_box_raw, score_raw, this->iou_threshold); 108 | const int keep_num = keep_inds.size(); 109 | boxes.clear(); 110 | boxes.resize(keep_num); 111 | for (int i = 0; i < keep_num; i++) 112 | { 113 | const int ind = keep_inds[i]; 114 | boxes[i] = bounding_box_raw[ind]; 115 | } 116 | } -------------------------------------------------------------------------------- /cpp/yolov8face.h: -------------------------------------------------------------------------------- 1 | # ifndef YOLOV8FACE 2 | # define YOLOV8FACE 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include ///如果使用cuda加速,需要取消注释 8 | #include 9 | #include"utils.h" 10 | 11 | 12 | class Yolov8Face 13 | { 14 | public: 15 | Yolov8Face(std::string modelpath, const float conf_thres=0.5, const float iou_thresh=0.4); 16 | void detect(cv::Mat srcimg, std::vector &boxes); ////只返回检测框,置信度和5个关键点这两个信息在后续的模块里没有用到 17 | private: 18 | void preprocess(cv::Mat img); 19 | std::vector input_image; 20 | int input_height; 21 | int input_width; 22 | float ratio_height; 23 | float ratio_width; 24 | float conf_threshold; 25 | float iou_threshold; 26 | 27 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "Face Detect"); 28 | Ort::Session *ort_session = nullptr; 29 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 30 | std::vector input_names; 31 | std::vector output_names; 32 | std::vector> input_node_dims; // >=1 outputs 33 | std::vector> output_node_dims; // >=1 outputs 34 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 35 | }; 36 | #endif -------------------------------------------------------------------------------- /python/face_68landmarks.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | from utils import warp_face_by_translation, convert_face_landmark_68_to_5 5 | 6 | class face_68_landmarks: 7 | def __init__(self, modelpath): 8 | # Initialize model 9 | session_option = onnxruntime.SessionOptions() 10 | session_option.log_severity_level = 3 11 | # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 12 | self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option) ###opencv-dnn读取onnx失败 13 | model_inputs = self.session.get_inputs() 14 | self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] 15 | self.input_shape = model_inputs[0].shape 16 | self.input_height = int(self.input_shape[2]) 17 | self.input_width = int(self.input_shape[3]) 18 | 19 | 20 | def preprocess(self, srcimg, bounding_box): 21 | ''' 22 | bounding_box里的数据格式是[xmin. ymin, xmax, ymax] 23 | ''' 24 | scale = 195 / np.subtract(bounding_box[2:], bounding_box[:2]).max() 25 | translation = (256 - np.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 26 | crop_img, affine_matrix = warp_face_by_translation(srcimg, translation, scale, (256, 256)) 27 | 28 | # crop_img = cv2.cvtColor(crop_img, cv2.COLOR_RGB2Lab) ###可有可无 29 | # if np.mean(crop_img[:, :, 0]) < 30: 30 | # crop_img[:, :, 0] = cv2.createCLAHE(clipLimit = 2).apply(crop_img[:, :, 0]) 31 | # crop_img = cv2.cvtColor(crop_img, cv2.COLOR_Lab2RGB) ###可有可无 32 | 33 | crop_img = crop_img.transpose(2, 0, 1).astype(np.float32) / 255.0 34 | crop_img = crop_img[np.newaxis, :, :, :] 35 | return crop_img, affine_matrix 36 | 37 | def detect(self, srcimg, bounding_box): 38 | ''' 39 | 如果直接crop+resize,最后返回的人脸关键点有偏差 40 | ''' 41 | input_tensor, affine_matrix = self.preprocess(srcimg, bounding_box) 42 | 43 | # Perform inference on the image 44 | face_landmark_68 = self.session.run(None, {self.input_names[0]: input_tensor})[0] 45 | face_landmark_68 = face_landmark_68[:, :, :2][0] / 64 46 | face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256 47 | face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) 48 | face_landmark_68 = face_landmark_68.reshape(-1, 2) 49 | face_landmark_5of68 = convert_face_landmark_68_to_5(face_landmark_68) 50 | return face_landmark_68, face_landmark_5of68 51 | 52 | if __name__ == '__main__': 53 | imgpath = '5.jpg' 54 | srcimg = cv2.imread('5.jpg') 55 | bounding_box = np.array([487, 236, 784, 624]) 56 | 57 | # Initialize face_68landmarks detector 58 | mynet = face_68_landmarks("weights/2dfan4.onnx") 59 | 60 | face_landmark_68, face_landmark_5of68 = mynet.detect(srcimg, bounding_box) 61 | # print(face_landmark_5of68) 62 | # Draw detections 63 | for i in range(face_landmark_68.shape[0]): 64 | cv2.circle(srcimg, (int(face_landmark_68[i,0]), int(face_landmark_68[i,1])), 3, (0, 255, 0), thickness=-1) 65 | cv2.imwrite('detect_face_68lanmarks.jpg', srcimg) 66 | winName = 'Deep learning face_68landmarks detection in ONNXRuntime' 67 | cv2.namedWindow(winName, 0) 68 | cv2.imshow(winName, srcimg) 69 | cv2.waitKey(0) 70 | cv2.destroyAllWindows() 71 | -------------------------------------------------------------------------------- /python/face_enhancer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime 3 | from utils import warp_face_by_face_landmark_5, create_static_box_mask, paste_back, blend_frame 4 | 5 | FACE_MASK_BLUR = 0.3 6 | FACE_MASK_PADDING = (0, 0, 0, 0) 7 | 8 | class enhance_face: 9 | def __init__(self, modelpath): 10 | # Initialize model 11 | session_option = onnxruntime.SessionOptions() 12 | session_option.log_severity_level = 3 13 | # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 14 | self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option) ###opencv-dnn读取onnx失败 15 | model_inputs = self.session.get_inputs() 16 | self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] 17 | self.input_shape = model_inputs[0].shape 18 | self.input_height = int(self.input_shape[2]) 19 | self.input_width = int(self.input_shape[3]) 20 | 21 | def process(self, target_img, target_landmark_5): 22 | ###preprocess 23 | crop_img, affine_matrix = warp_face_by_face_landmark_5(target_img, target_landmark_5, 'ffhq_512', (512, 512)) 24 | box_mask = create_static_box_mask((crop_img.shape[1],crop_img.shape[0]), FACE_MASK_BLUR, FACE_MASK_PADDING) 25 | crop_mask_list = [box_mask] 26 | 27 | crop_img = crop_img[:, :, ::-1].astype(np.float32) / 255.0 28 | crop_img = (crop_img - 0.5) / 0.5 29 | crop_img = np.expand_dims(crop_img.transpose(2, 0, 1), axis = 0).astype(np.float32) 30 | 31 | ###Perform inference on the image 32 | result = self.session.run(None, {'input':crop_img})[0][0] 33 | ###normalize_crop_frame 34 | result = np.clip(result, -1, 1) 35 | result = (result + 1) / 2 36 | result = result.transpose(1, 2, 0) 37 | result = (result * 255.0).round() 38 | result = result.astype(np.uint8)[:, :, ::-1] 39 | 40 | crop_mask = np.minimum.reduce(crop_mask_list).clip(0, 1) 41 | paste_frame = paste_back(target_img, result, crop_mask, affine_matrix) 42 | dstimg = blend_frame(target_img, paste_frame) 43 | return dstimg -------------------------------------------------------------------------------- /python/face_recognizer.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | from utils import warp_face_by_face_landmark_5 5 | 6 | class face_recognize: 7 | def __init__(self, modelpath): 8 | # Initialize model 9 | session_option = onnxruntime.SessionOptions() 10 | session_option.log_severity_level = 3 11 | self.session = onnxruntime.InferenceSession(modelpath, providers=['CPUExecutionProvider']) 12 | # self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option) ###opencv-dnn读取onnx失败 13 | model_inputs = self.session.get_inputs() 14 | self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] 15 | self.input_shape = model_inputs[0].shape 16 | self.input_height = int(self.input_shape[2]) 17 | self.input_width = int(self.input_shape[3]) 18 | 19 | def preprocess(self, srcimg, face_landmark_5): 20 | crop_img, _ = warp_face_by_face_landmark_5(srcimg, face_landmark_5, 'arcface_112_v2', (112, 112)) 21 | crop_img = crop_img / 127.5 - 1 22 | crop_img = crop_img[:, :, ::-1].transpose(2, 0, 1).astype(np.float32) 23 | crop_img = np.expand_dims(crop_img, axis = 0) 24 | return crop_img 25 | 26 | def detect(self, srcimg, face_landmark_5): 27 | input_tensor = self.preprocess(srcimg, face_landmark_5) 28 | 29 | # Perform inference on the image 30 | embedding = self.session.run(None, {self.input_names[0]: input_tensor})[0] 31 | embedding = embedding.ravel() ###拉平 32 | normed_embedding = embedding / np.linalg.norm(embedding) 33 | return embedding, normed_embedding 34 | 35 | if __name__ == '__main__': 36 | imgpath = '5.jpg' 37 | srcimg = cv2.imread('5.jpg') 38 | face_landmark_5 = np.array([[568.2485, 398.9512 ], 39 | [701.7346, 399.64795], 40 | [634.2213, 482.92694], 41 | [583.5656, 543.10187], 42 | [684.52405, 543.125 ]]) 43 | 44 | mynet = face_recognize('weights/arcface_w600k_r50.onnx') 45 | embedding, normed_embedding = mynet.detect(srcimg, face_landmark_5) 46 | print(embedding.shape, normed_embedding.shape) -------------------------------------------------------------------------------- /python/face_swap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime 3 | from utils import warp_face_by_face_landmark_5, create_static_box_mask, paste_back 4 | 5 | FACE_MASK_BLUR = 0.3 6 | FACE_MASK_PADDING = (0, 0, 0, 0) 7 | INSWAPPER_128_MODEL_MEAN = [0.0, 0.0, 0.0] 8 | INSWAPPER_128_MODEL_STD = [1.0, 1.0, 1.0] 9 | 10 | class swap_face: 11 | def __init__(self, modelpath): 12 | # Initialize model 13 | session_option = onnxruntime.SessionOptions() 14 | session_option.log_severity_level = 3 15 | # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 16 | self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option) ###opencv-dnn读取onnx失败 17 | model_inputs = self.session.get_inputs() 18 | self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] 19 | self.input_shape = model_inputs[0].shape 20 | self.input_height = int(self.input_shape[2]) 21 | self.input_width = int(self.input_shape[3]) 22 | self.model_matrix = np.load('model_matrix.npy') 23 | 24 | def process(self, target_img, source_face_embedding, target_landmark_5): 25 | ###preprocess 26 | crop_img, affine_matrix = warp_face_by_face_landmark_5(target_img, target_landmark_5, 'arcface_128_v2', (128, 128)) 27 | crop_mask_list = [] 28 | 29 | box_mask = create_static_box_mask((crop_img.shape[1],crop_img.shape[0]), FACE_MASK_BLUR, FACE_MASK_PADDING) 30 | crop_mask_list.append(box_mask) 31 | 32 | crop_img = crop_img[:, :, ::-1].astype(np.float32) / 255.0 33 | crop_img = (crop_img - INSWAPPER_128_MODEL_MEAN) / INSWAPPER_128_MODEL_STD 34 | crop_img = np.expand_dims(crop_img.transpose(2, 0, 1), axis = 0).astype(np.float32) 35 | 36 | source_embedding = source_face_embedding.reshape((1, -1)) 37 | source_embedding = np.dot(source_embedding, self.model_matrix) / np.linalg.norm(source_embedding) 38 | 39 | ###Perform inference on the image 40 | result = self.session.run(None, {'target':crop_img, 'source':source_embedding})[0][0] 41 | ###normalize_crop_frame 42 | result = result.transpose(1, 2, 0) 43 | result = (result * 255.0).round() 44 | result = result[:, :, ::-1] 45 | 46 | crop_mask = np.minimum.reduce(crop_mask_list).clip(0, 1) ###print(np.array_equal(np.minimum.reduce(crop_mask_list), crop_mask_list[0])) 打印是True,说明np.minimum.reduce(crop_mask_list)等于crop_mask_list[0],也就是box_mask,因此做np.minimum.reduce(crop_mask_list)完全是多此一举 47 | dstimg = paste_back(target_img, result, crop_mask, affine_matrix) 48 | return dstimg -------------------------------------------------------------------------------- /python/images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/1.jpg -------------------------------------------------------------------------------- /python/images/14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/14.jpg -------------------------------------------------------------------------------- /python/images/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/2.jpg -------------------------------------------------------------------------------- /python/images/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/5.jpg -------------------------------------------------------------------------------- /python/images/source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/source.jpg -------------------------------------------------------------------------------- /python/images/target.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/images/target.jpg -------------------------------------------------------------------------------- /python/main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import matplotlib.pyplot as plt ###如无则pip安装 3 | from yolov8face import YOLOface_8n 4 | from face_68landmarks import face_68_landmarks 5 | from face_recognizer import face_recognize 6 | from face_swap import swap_face 7 | from face_enhancer import enhance_face 8 | 9 | if __name__ == '__main__': 10 | source_path = 'images/1.jpg' 11 | target_path = 'images/5.jpg' 12 | source_img = cv2.imread(source_path) 13 | target_img = cv2.imread(target_path) 14 | 15 | detect_face_net = YOLOface_8n("weights/yoloface_8n.onnx") 16 | detect_68landmarks_net = face_68_landmarks("weights/2dfan4.onnx") 17 | face_embedding_net = face_recognize('weights/arcface_w600k_r50.onnx') 18 | swap_face_net = swap_face('weights/inswapper_128.onnx') 19 | enhance_face_net = enhance_face('weights/gfpgan_1.4.onnx') 20 | 21 | boxes, _, _ = detect_face_net.detect(source_img) 22 | position = 0 ###一张图片里可能有多个人脸,这里只考虑1个人脸的情况 23 | bounding_box = boxes[position] 24 | _, face_landmark_5of68 = detect_68landmarks_net.detect(source_img, bounding_box) 25 | source_face_embedding, _ = face_embedding_net.detect(source_img, face_landmark_5of68) 26 | 27 | boxes, _, _ = detect_face_net.detect(target_img) 28 | position = 0 ###一张图片里可能有多个人脸,这里只考虑1个人脸的情况 29 | bounding_box = boxes[position] 30 | _, target_landmark_5 = detect_68landmarks_net.detect(target_img, bounding_box) 31 | 32 | swapimg = swap_face_net.process(target_img, source_face_embedding, target_landmark_5) 33 | resultimg = enhance_face_net.process(swapimg, target_landmark_5) 34 | 35 | plt.subplot(1, 2, 1) 36 | plt.imshow(source_img[:,:,::-1]) ###plt库显示图像是RGB顺序 37 | plt.axis('off') 38 | plt.subplot(1, 2, 2) 39 | plt.imshow(target_img[:,:,::-1]) 40 | plt.axis('off') 41 | # plt.show() 42 | plt.savefig('source_target.jpg', dpi=600, bbox_inches='tight') ###保存高清图 43 | 44 | cv2.imwrite('result.jpg', resultimg) 45 | 46 | # cv2.namedWindow('resultimg', 0) 47 | # cv2.imshow('resultimg', resultimg) 48 | # cv2.waitKey(0) 49 | # cv2.destroyAllWindows() 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /python/model_matrix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/facefusion-onnxrun/4e6fe2e771fb9caaa28fafcb873742a020515878/python/model_matrix.npy -------------------------------------------------------------------------------- /python/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def warp_face_by_translation(temp_img, translation, scale, crop_size): 5 | affine_matrix = np.array([[ scale, 0, translation[0] ], [ 0, scale, translation[1] ]]) 6 | crop_img = cv2.warpAffine(temp_img, affine_matrix, crop_size) 7 | return crop_img, affine_matrix 8 | 9 | def convert_face_landmark_68_to_5(landmark_68): 10 | left_eye = np.mean(landmark_68[36:42], axis = 0) 11 | right_eye = np.mean(landmark_68[42:48], axis = 0) 12 | nose = landmark_68[30] 13 | left_mouth_end = landmark_68[48] 14 | right_mouth_end = landmark_68[54] 15 | face_landmark_5 = np.array([left_eye, right_eye, nose, left_mouth_end, right_mouth_end]) 16 | return face_landmark_5 17 | 18 | TEMPLATES = {'arcface_112_v2': np.array([[ 0.34191607, 0.46157411 ], 19 | [ 0.65653393, 0.45983393 ], 20 | [ 0.50022500, 0.64050536 ], 21 | [ 0.37097589, 0.82469196 ], 22 | [ 0.63151696, 0.82325089 ]]), 23 | 'arcface_128_v2': np.array([[ 0.36167656, 0.40387734 ], 24 | [ 0.63696719, 0.40235469 ], 25 | [ 0.50019687, 0.56044219 ], 26 | [ 0.38710391, 0.72160547 ], 27 | [ 0.61507734, 0.72034453 ]]), 28 | 'ffhq_512': np.array([[ 0.37691676, 0.46864664 ], 29 | [ 0.62285697, 0.46912813 ], 30 | [ 0.50123859, 0.61331904 ], 31 | [ 0.39308822, 0.72541100 ], 32 | [ 0.61150205, 0.72490465 ]])} 33 | 34 | def warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5, template, crop_size): 35 | normed_template = TEMPLATES.get(template) * crop_size 36 | # print(normed_template) ###打印出来,写到c++程序的std::vector normed_template里 37 | affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] 38 | crop_img = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) 39 | return crop_img, affine_matrix 40 | 41 | def create_static_box_mask(crop_size, face_mask_blur, face_mask_padding): 42 | blur_amount = int(crop_size[0] * 0.5 * face_mask_blur) 43 | blur_area = max(blur_amount // 2, 1) 44 | box_mask = np.ones(crop_size, np.float32) 45 | box_mask[:max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100)), :] = 0 46 | box_mask[-max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100)):, :] = 0 47 | box_mask[:, :max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100))] = 0 48 | box_mask[:, -max(blur_area, int(crop_size[0] * face_mask_padding[1] / 100)):] = 0 49 | if blur_amount > 0: 50 | box_mask = cv2.GaussianBlur(box_mask, (0, 0), blur_amount * 0.25) 51 | return box_mask 52 | 53 | def paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix): 54 | inverse_matrix = cv2.invertAffineTransform(affine_matrix) 55 | temp_size = temp_vision_frame.shape[:2][::-1] 56 | inverse_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_size).clip(0, 1) 57 | inverse_vision_frame = cv2.warpAffine(crop_vision_frame, inverse_matrix, temp_size, borderMode = cv2.BORDER_REPLICATE) 58 | paste_vision_frame = temp_vision_frame.copy() 59 | paste_vision_frame[:, :, 0] = inverse_mask * inverse_vision_frame[:, :, 0] + (1 - inverse_mask) * temp_vision_frame[:, :, 0] 60 | paste_vision_frame[:, :, 1] = inverse_mask * inverse_vision_frame[:, :, 1] + (1 - inverse_mask) * temp_vision_frame[:, :, 1] 61 | paste_vision_frame[:, :, 2] = inverse_mask * inverse_vision_frame[:, :, 2] + (1 - inverse_mask) * temp_vision_frame[:, :, 2] 62 | return paste_vision_frame 63 | 64 | def blend_frame(temp_vision_frame, paste_vision_frame, FACE_ENHANCER_BLEND=80): 65 | face_enhancer_blend = 1 - (FACE_ENHANCER_BLEND / 100) 66 | temp_vision_frame = cv2.addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0) 67 | return temp_vision_frame -------------------------------------------------------------------------------- /python/yolov8face.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | import argparse 5 | 6 | 7 | class YOLOface_8n: 8 | def __init__(self, modelpath, conf_thres=0.5, iou_thresh=0.4): 9 | self.conf_threshold = conf_thres 10 | self.iou_threshold = iou_thresh 11 | # Initialize model 12 | session_option = onnxruntime.SessionOptions() 13 | session_option.log_severity_level = 3 14 | # self.session = onnxruntime.InferenceSession(modelpath, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 15 | self.session = onnxruntime.InferenceSession(modelpath, sess_options=session_option) ###opencv-dnn读取onnx失败 16 | model_inputs = self.session.get_inputs() 17 | self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] 18 | self.input_shape = model_inputs[0].shape 19 | self.input_height = int(self.input_shape[2]) 20 | self.input_width = int(self.input_shape[3]) 21 | 22 | def preprocess(self, srcimg): 23 | height, width = srcimg.shape[:2] 24 | temp_image = srcimg.copy() 25 | if height > self.input_height or width > self.input_width: 26 | scale = min(self.input_height / height, self.input_width / width) 27 | new_width = int(width * scale) 28 | new_height = int(height * scale) 29 | temp_image = cv2.resize(srcimg, (new_width, new_height)) 30 | self.ratio_height = height / temp_image.shape[0] 31 | self.ratio_width = width / temp_image.shape[1] 32 | input_img = cv2.copyMakeBorder(temp_image, 0, self.input_height - temp_image.shape[0], 0, self.input_width - temp_image.shape[1], cv2.BORDER_CONSTANT, 33 | value=0) 34 | # Scale input pixel values to 0 to 1 35 | input_img = (input_img.astype(np.float32) - 127.5) / 128.0 36 | input_img = input_img.transpose(2, 0, 1) 37 | input_img = input_img[np.newaxis, :, :, :] 38 | return input_img 39 | 40 | def detect(self, srcimg): 41 | input_tensor = self.preprocess(srcimg) 42 | 43 | # Perform inference on the image 44 | outputs = self.session.run(None, {self.input_names[0]: input_tensor})[0] 45 | boxes, kpts, scores = self.postprocess(outputs) 46 | return boxes, kpts, scores 47 | 48 | def postprocess(self, outputs): 49 | bounding_box_list, face_landmark5_list, score_list= [], [], [] 50 | 51 | outputs = np.squeeze(outputs, axis=0).T 52 | bounding_box_raw, score_raw, face_landmark_5_raw = np.split(outputs, [ 4, 5 ], axis = 1) 53 | keep_indices = np.where(score_raw > self.conf_threshold)[0] 54 | if keep_indices.any(): 55 | bounding_box_raw, face_landmark_5_raw, score_raw = bounding_box_raw[keep_indices], face_landmark_5_raw[keep_indices], score_raw[keep_indices] 56 | bboxes_wh = bounding_box_raw.copy() 57 | bboxes_wh[:, :2] = bounding_box_raw[:, :2] - 0.5 * bounding_box_raw[:, 2:] ####(cx,cy,w,h)转到(x,y,w,h) 58 | bboxes_wh *= np.array([[self.ratio_width, self.ratio_height, self.ratio_width, self.ratio_height]]) ###合理使用广播法则 59 | face_landmark_5_raw *= np.tile(np.array([self.ratio_width, self.ratio_height, 1]), 5).reshape((1, 15)) ###合理使用广播法则,每个点的信息是(x,y,conf), 第3个元素点的置信度,可以不要,那也就需要要乘以1 60 | score_raw = score_raw.flatten() 61 | 62 | indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), score_raw.tolist(), self.conf_threshold, self.iou_threshold) 63 | if isinstance(indices, np.ndarray): 64 | indices = indices.flatten() 65 | if len(indices) > 0: 66 | # bounding_box_list = list(bboxes_wh[indices]) 67 | bounding_box_list = list(map(lambda x:np.array([x[0], x[1], x[0]+x[2], x[1]+x[3]], dtype=np.float64), bboxes_wh[indices])) ###xywh转到xminyminxmaxymax 68 | score_list = list(score_raw[indices]) 69 | face_landmark5_list = list(face_landmark_5_raw[indices]) 70 | 71 | return bounding_box_list, face_landmark5_list, score_list 72 | 73 | def draw_detections(self, image, boxes, kpts, scores): 74 | for box, kp, score in zip(boxes, kpts, scores): 75 | xmin, ymin, xmax, ymax = box.astype(int) 76 | 77 | # Draw rectangle 78 | cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), thickness=2) 79 | label = "face:"+str(round(score,2)) 80 | cv2.putText(image, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) 81 | for i in range(5): 82 | cv2.circle(image, (int(kp[i * 3]), int(kp[i * 3 + 1])), 3, (0, 255, 0), thickness=-1) 83 | return image 84 | 85 | 86 | if __name__ == '__main__': 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument('--imgpath', type=str, default='5.jpg', help="image path") 89 | parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence') 90 | args = parser.parse_args() 91 | 92 | # Initialize YOLOface_8n object detector 93 | mynet = YOLOface_8n("weights/yoloface_8n.onnx", conf_thres=args.confThreshold) 94 | srcimg = cv2.imread(args.imgpath) 95 | 96 | # Detect Objects 97 | boxes, kpts, scores = mynet.detect(srcimg) 98 | 99 | # Draw detections 100 | dstimg = mynet.draw_detections(srcimg, boxes, kpts, scores) 101 | winName = 'Deep learning yolov8face detection in ONNXRuntime' 102 | cv2.namedWindow(winName, 0) 103 | cv2.imshow(winName, dstimg) 104 | cv2.waitKey(0) 105 | cv2.destroyAllWindows() 106 | --------------------------------------------------------------------------------