├── det1.npy ├── det2.npy ├── det3.npy ├── cpp ├── standalone │ ├── test.jpg │ ├── models │ │ └── mtcnn_frozen_model.pb │ ├── tensorflow_mtcnn.hpp │ ├── utils.hpp │ ├── Makefile │ ├── comm_lib.hpp │ ├── README.md │ ├── utils.cpp │ ├── mtcnn.hpp │ ├── camera.cpp │ ├── test.cpp │ ├── comm_lib.cpp │ └── tensorflow_mtcnn.cpp └── tf_embedded │ ├── test.jpg │ ├── models │ └── mtcnn_frozen_model.pb │ ├── utils.hpp │ ├── comm_lib.hpp │ ├── README.md │ ├── BUILD │ ├── utils.cpp │ ├── mtcnn.hpp │ ├── comm_lib.cpp │ └── main.cc ├── LICENSE ├── README.md ├── facedetect_mtcnn.py └── detect_face.py /det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/det1.npy -------------------------------------------------------------------------------- /det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/det2.npy -------------------------------------------------------------------------------- /det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/det3.npy -------------------------------------------------------------------------------- /cpp/standalone/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/standalone/test.jpg -------------------------------------------------------------------------------- /cpp/tf_embedded/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/tf_embedded/test.jpg -------------------------------------------------------------------------------- /cpp/standalone/models/mtcnn_frozen_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/standalone/models/mtcnn_frozen_model.pb -------------------------------------------------------------------------------- /cpp/tf_embedded/models/mtcnn_frozen_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/tf_embedded/models/mtcnn_frozen_model.pb -------------------------------------------------------------------------------- /cpp/standalone/tensorflow_mtcnn.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TENSORFLOW_MTCNN_HPP__ 2 | #define __TENSORFLOW_MTCNN_HPP_ 3 | 4 | #include "tensorflow/c/c_api.h" 5 | #include 6 | #include "mtcnn.hpp" 7 | 8 | void mtcnn_detect(TF_Session* sess, TF_Graph * graph, cv::Mat& img, std::vector& face_list); 9 | 10 | TF_Session * load_graph(const char * frozen_fname, TF_Graph** p_graph); 11 | 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /cpp/standalone/utils.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MTCNN_UTILS_HPP__ 2 | #define __MTCNN_UTILS_HPP__ 3 | 4 | /* get current time: in us */ 5 | unsigned long get_cur_time(void); 6 | 7 | /* 8 | for debug purpose, to save a image or float vector to file. 9 | the image should be in cv::Mat. 10 | To avoid OpenCV header file dependency, use void * instead of cv::Mat * 11 | */ 12 | 13 | 14 | void save_img(const char * name,void * p_img ); 15 | 16 | void save_float(const char * name, const float * data, int size); 17 | 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /cpp/tf_embedded/utils.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MTCNN_UTILS_HPP__ 2 | #define __MTCNN_UTILS_HPP__ 3 | 4 | /* get current time: in us */ 5 | unsigned long get_cur_time(void); 6 | 7 | /* 8 | for debug purpose, to save a image or float vector to file. 9 | the image should be in cv::Mat. 10 | To avoid OpenCV header file dependency, use void * instead of cv::Mat * 11 | */ 12 | 13 | 14 | void save_img(const char * name,void * p_img ); 15 | 16 | void save_float(const char * name, const float * data, int size); 17 | 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /cpp/standalone/Makefile: -------------------------------------------------------------------------------- 1 | TF_ROOT=../tf_dist 2 | 3 | BIN_SRCS=test.cpp camera.cpp 4 | COMM_SRCS+=tensorflow_mtcnn.cpp comm_lib.cpp utils.cpp 5 | 6 | 7 | 8 | # 9 | # opencv settings 10 | # 11 | 12 | OPENCV_LDFLAGS=$(shell pkg-config --libs opencv) 13 | OPENCV_CFLAGS=$(shell pkg-config --cflags opencv) 14 | 15 | 16 | CXXFLAGS += $(OPENCV_CFLAGS) 17 | LIBS += $(OPENCV_LDFLAGS) 18 | 19 | # 20 | # tensorflow ettings 21 | # 22 | 23 | 24 | TF_INCS += -I$(TF_ROOT)/include 25 | CXXFLAGS += $(TF_INCS) 26 | LIBS+=-Wl,-rpath,$(TF_ROOT)/lib -L$(TF_ROOT)/lib -ltensorflow 27 | 28 | 29 | CXX := g++ 30 | CXXFLAGS+= -Wall -ggdb -std=c++11 31 | 32 | 33 | COMM_OBJS=$(COMM_SRCS:.cpp=.o) 34 | BIN_OBJS=$(BIN_SRCS:.cpp=.o) 35 | BIN_EXES=$(BIN_SRCS:.cpp=) 36 | 37 | 38 | default : $(BIN_EXES) 39 | 40 | $(BIN_EXES) : $(COMM_OBJS) 41 | 42 | $(BIN_EXES):%:%.o 43 | 44 | 45 | %:%.o 46 | $(CXX) $< -o $@ $(LDFLAGS) $(COMM_OBJS) $(LIBS) 47 | 48 | %.o : %.cpp 49 | $(CXX) $(CXXFLAGS) -c $< -o $@ 50 | 51 | clean: 52 | rm -f $(BIN_EXES) *.o 53 | 54 | .PHONY : all clean 55 | 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 cyberfire 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cpp/standalone/comm_lib.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON_LIB_HPP__ 2 | #define __COMMON_LIB_HPP__ 3 | 4 | #define NMS_UNION 1 5 | #define NMS_MIN 2 6 | 7 | 8 | 9 | struct scale_window 10 | { 11 | int h; 12 | int w; 13 | float scale; 14 | }; 15 | 16 | int numpy_round(float f); 17 | 18 | void nms_boxes(std::vector& input, float threshold, int type, std::vector&output); 19 | 20 | void regress_boxes(std::vector& rects); 21 | 22 | void square_boxes(std::vector& rects); 23 | 24 | void padding(int img_h, int img_w, std::vector& rects); 25 | 26 | void process_boxes(std::vector& input, int img_h, int img_w, std::vector& rects); 27 | 28 | void generate_bounding_box(const float * confidence_data, int confidence_size, 29 | const float * reg_data, float scale, float threshold, 30 | int feature_h, int feature_w, std::vector& output, bool transposed); 31 | 32 | 33 | void set_input_buffer(std::vector& input_channels, 34 | float* input_data, const int height, const int width); 35 | 36 | 37 | void cal_pyramid_list(int height, int width, int min_size, float factor,std::vector& list); 38 | 39 | void cal_landmark(std::vector& box_list); 40 | 41 | void set_box_bound(std::vector& box_list, int img_h, int img_w); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cpp/tf_embedded/comm_lib.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON_LIB_HPP__ 2 | #define __COMMON_LIB_HPP__ 3 | 4 | #define NMS_UNION 1 5 | #define NMS_MIN 2 6 | 7 | 8 | 9 | struct scale_window 10 | { 11 | int h; 12 | int w; 13 | float scale; 14 | }; 15 | 16 | int numpy_round(float f); 17 | 18 | void nms_boxes(std::vector& input, float threshold, int type, std::vector&output); 19 | 20 | void regress_boxes(std::vector& rects); 21 | 22 | void square_boxes(std::vector& rects); 23 | 24 | void padding(int img_h, int img_w, std::vector& rects); 25 | 26 | void process_boxes(std::vector& input, int img_h, int img_w, std::vector& rects); 27 | 28 | void generate_bounding_box(const float * confidence_data, int confidence_size, 29 | const float * reg_data, float scale, float threshold, 30 | int feature_h, int feature_w, std::vector& output, bool transposed); 31 | 32 | 33 | void set_input_buffer(std::vector& input_channels, 34 | float* input_data, const int height, const int width); 35 | 36 | 37 | void cal_pyramid_list(int height, int width, int min_size, float factor,std::vector& list); 38 | 39 | void cal_landmark(std::vector& box_list); 40 | 41 | void set_box_bound(std::vector& box_list, int img_h, int img_w); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cpp/tf_embedded/README.md: -------------------------------------------------------------------------------- 1 | # MTCNN Tensorflow C++ Implementation 2 | 3 | This is a project to implement MTCNN, a perfect face detect algorithm, based on Tensorflow 4 | 5 | This version is an exmaple of inside tensorflow code repository and use the bazel to build 6 | 7 | The example outside the will be developped soon. 8 | 9 | 10 | Here is to the trick to reuse pre-allocated buffer or tensor: 11 | 12 | In order to create a tensor with pre-allocated buffer, a friend class of Tensor has to been defined. 13 | 14 | Since TensorCApi is defined for C API usage, only used in libtensorflow.so, we can re-define this class in our file safely. 15 | 16 | 17 | # Build & Run 18 | 19 | 1 copy the mtcnn directory to tensorflow/exmaple 20 | 21 | 2 cd tensorflow/example/mtcnn 22 | 23 | 3 bazel build //tensorflow/example/mtcnn 24 | 25 | 4 run: 26 | 27 | bazel-bin/tensorflow/examples/mtcnn/mtcnn --image=photo_fname --graph=./tensorflow/examples/mtcnn/models/mtcnn_frozen_model.pb 28 | 29 | 30 | 31 | 32 | # Credit 33 | 34 | ### MTCNN algorithm 35 | 36 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 37 | 38 | ### MTCNN C++ on Caffe 39 | 40 | https://github.com/wowo200/MTCNN 41 | 42 | ### MTCNN python on Tensorflow 43 | 44 | FaceNet uses MTCNN to align face 45 | 46 | https://github.com/davidsandberg/facenet 47 | From this directory: 48 | facenet/src/align 49 | 50 | 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tensorflow-mtcnn 2 | 3 | MTCNN is one of the best face detection algorithms. 4 | Here is inference only for MTCNN face detector on Tensorflow, which is based on davidsandberg's facenet project, include the python version and C++ version. 5 | 6 | ## C++ 7 | 8 | There are two version for C++. 9 | 10 | One is to be build inside tensorflow code repository, so that it needs to be copied to the directory tensorflow/example. 11 | please check cpp/tf_embedded/README.md for details. 12 | 13 | The other is the standalone one, just needs libtensorflow.so and c_api.h to build and run. 14 | Please check cpp/standalone/README.md for more details 15 | 16 | ## Python Run 17 | 1. install tensorflow first, please refers to https://www.tensorflow.org/install 18 | 2. install python packages: opencv, numpy 19 | 3. python ./facedetect_mtcnn.py --input input.jpg --output new.jpg 20 | 21 | ## Build tensorflow on arm64 board 22 | 23 | Please check out the guide [how to build tensorflow on firefly](https://cyberfire.github.io/tensorflow/rk3399/howto%20build%20tensorflow%20on%20firefly.md) 24 | 25 | ## Credit 26 | 27 | ### MTCNN algorithm 28 | 29 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 30 | 31 | ### MTCNN C++ on Caffe 32 | 33 | https://github.com/wowo200/MTCNN 34 | 35 | ### MTCNN python on Tensorflow 36 | 37 | FaceNet uses MTCNN to align face 38 | 39 | https://github.com/davidsandberg/facenet 40 | From this directory: 41 | facenet/src/align 42 | 43 | 44 | -------------------------------------------------------------------------------- /cpp/standalone/README.md: -------------------------------------------------------------------------------- 1 | # MTCNN Tensorflow C++ Implementation 2 | 3 | This is a project to implement MTCNN, a perfect face detect algorithm, based on Tensorflow 4 | 5 | This version is an exmaple of outside tensorflow code repository and call tensorflow service by libtensorflow.so 6 | 7 | This is an example of how to tensorflow C API 8 | 9 | # Build & Run 10 | 11 | 1 build tensorflow library distribution by following command in tensorflow directory 12 | 13 | bazel build --config=opt //tensorflow/tools/lib_package:libtensorflow 14 | 15 | the tarball, bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz, includes the libtensorflow.so and c header files 16 | 17 | 2 edit Makefile, set TENSORFLOW_ROOT to the correct path in your machine 18 | 19 | 3 make 20 | two demoes will be created: "test" to check single photo while "camera" to do live face detection. 21 | 22 | 23 | 4 run: 24 | arguments for test: 25 | 26 | test [ -i input_image ] [ -o output_image] [ -m model_fname] [-s] 27 | 28 | -s save the detected face into .jpg file 29 | 30 | 31 | 32 | 33 | 34 | # Credit 35 | 36 | ### MTCNN algorithm 37 | 38 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 39 | 40 | ### MTCNN C++ on Caffe 41 | 42 | https://github.com/wowo200/MTCNN 43 | 44 | ### MTCNN python on Tensorflow 45 | 46 | FaceNet uses MTCNN to align face 47 | 48 | https://github.com/davidsandberg/facenet 49 | From this directory: 50 | facenet/src/align 51 | 52 | 53 | -------------------------------------------------------------------------------- /cpp/tf_embedded/BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # TensorFlow C++ inference example for labeling images. 3 | 4 | package(default_visibility = ["//tensorflow:internal"]) 5 | 6 | licenses(["notice"]) # Apache 2.0 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | cc_binary( 11 | name = "mtcnn", 12 | srcs = [ 13 | "main.cc", 14 | "comm_lib.cpp", 15 | "comm_lib.hpp", 16 | "mtcnn.hpp", 17 | "utils.cpp", 18 | "utils.hpp" 19 | ], 20 | 21 | copts = [ 22 | "-I/usr/local/include/opencv", 23 | " -I/usr/local/include", 24 | "-Wno-sign-compare", 25 | "-Wall", 26 | "-g", 27 | "-O0", 28 | ], 29 | 30 | linkopts = 31 | ["-lm", 32 | "-L/usr/local/lib -lopencv_shape -lopencv_stitching -lopencv_objdetect -lopencv_superres -lopencv_videostab -lopencv_calib3d -lopencv_features2d -lopencv_highgui -lopencv_videoio -lopencv_imgcodecs -lopencv_video -lopencv_photo -lopencv_ml -lopencv_imgproc -lopencv_flann -lopencv_core", 33 | "-g", 34 | ], 35 | deps = 36 | [ 37 | "//tensorflow/cc:cc_ops", 38 | "//tensorflow/core:framework_internal", 39 | "//tensorflow/core:tensorflow", 40 | ], 41 | 42 | ) 43 | 44 | filegroup( 45 | name = "all_files", 46 | srcs = glob( 47 | ["**/*"], 48 | exclude = [ 49 | "**/METADATA", 50 | "**/OWNERS", 51 | "bin/**", 52 | "gen/**", 53 | ], 54 | ), 55 | visibility = ["//tensorflow:__subpackages__"], 56 | ) 57 | -------------------------------------------------------------------------------- /cpp/standalone/utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | 12 | unsigned long get_cur_time(void) 13 | { 14 | struct timeval tv; 15 | unsigned long ts; 16 | 17 | gettimeofday(&tv,NULL); 18 | 19 | ts=tv.tv_sec*1000000+tv.tv_usec; 20 | 21 | return ts; 22 | } 23 | 24 | void save_float(const char * name, const float * data, int size) 25 | { 26 | char fname[128]; 27 | 28 | sprintf(fname,"%s",name); 29 | 30 | std::cout<<"save data to "<(i); 73 | 74 | for(int j=0;j 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | 12 | unsigned long get_cur_time(void) 13 | { 14 | struct timeval tv; 15 | unsigned long ts; 16 | 17 | gettimeofday(&tv,NULL); 18 | 19 | ts=tv.tv_sec*1000000+tv.tv_usec; 20 | 21 | return ts; 22 | } 23 | 24 | void save_float(const char * name, const float * data, int size) 25 | { 26 | char fname[128]; 27 | 28 | sprintf(fname,"%s",name); 29 | 30 | std::cout<<"save data to "<(i); 73 | 74 | for(int j=0;j 5 | #include 6 | 7 | #include 8 | 9 | 10 | struct face_landmark 11 | { 12 | float x[5]; 13 | float y[5]; 14 | }; 15 | 16 | struct face_box 17 | { 18 | float x0; 19 | float y0; 20 | float x1; 21 | float y1; 22 | 23 | /* confidence score */ 24 | float score; 25 | 26 | /*regression scale */ 27 | 28 | float regress[4]; 29 | 30 | /* padding stuff*/ 31 | float px0; 32 | float py0; 33 | float px1; 34 | float py1; 35 | 36 | face_landmark landmark; 37 | }; 38 | 39 | 40 | 41 | class mtcnn { 42 | public: 43 | mtcnn(void){ 44 | min_size_=40; 45 | pnet_threshold_=0.6; 46 | rnet_threshold_=0.7; 47 | onet_threshold_=0.9; 48 | factor_=0.709; 49 | 50 | } 51 | 52 | void set_threshold(float p, float r, float o) 53 | { 54 | pnet_threshold_=p; 55 | rnet_threshold_=r; 56 | onet_threshold_=o; 57 | } 58 | 59 | void set_factor_min_size(float factor, float min_size) 60 | { 61 | factor_=factor; 62 | min_size_=min_size; 63 | } 64 | 65 | 66 | virtual int load_model(const std::string& model_dir)=0; 67 | virtual void detect(cv::Mat& img, std::vector& face_list)=0; 68 | virtual ~mtcnn(void){}; 69 | 70 | protected: 71 | 72 | int min_size_; 73 | float pnet_threshold_; 74 | float rnet_threshold_; 75 | float onet_threshold_; 76 | float factor_; 77 | }; 78 | 79 | /* factory part */ 80 | 81 | class mtcnn_factory 82 | { 83 | public: 84 | 85 | typedef mtcnn * (*creator)(void); 86 | 87 | static void register_creator(const std::string& name, creator& create_func); 88 | static mtcnn * create_detector(const std::string& name); 89 | static std::vector list(void); 90 | 91 | private: 92 | mtcnn_factory(){}; 93 | 94 | 95 | }; 96 | 97 | class only_for_auto_register 98 | { 99 | public: 100 | only_for_auto_register(std::string name, mtcnn_factory::creator func) 101 | { 102 | mtcnn_factory::register_creator(name,func); 103 | } 104 | 105 | }; 106 | 107 | #define REGISTER_MTCNN_CREATOR(name,func) \ 108 | static only_for_auto_register dummy_mtcnn_creator_## name (#name, func) 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /cpp/tf_embedded/mtcnn.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MTCNN_HPP__ 2 | #define __MTCNN_HPP__ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | 10 | struct face_landmark 11 | { 12 | float x[5]; 13 | float y[5]; 14 | }; 15 | 16 | struct face_box 17 | { 18 | float x0; 19 | float y0; 20 | float x1; 21 | float y1; 22 | 23 | /* confidence score */ 24 | float score; 25 | 26 | /*regression scale */ 27 | 28 | float regress[4]; 29 | 30 | /* padding stuff*/ 31 | float px0; 32 | float py0; 33 | float px1; 34 | float py1; 35 | 36 | face_landmark landmark; 37 | }; 38 | 39 | 40 | 41 | class mtcnn { 42 | public: 43 | mtcnn(void){ 44 | min_size_=40; 45 | pnet_threshold_=0.6; 46 | rnet_threshold_=0.7; 47 | onet_threshold_=0.9; 48 | factor_=0.709; 49 | 50 | } 51 | 52 | void set_threshold(float p, float r, float o) 53 | { 54 | pnet_threshold_=p; 55 | rnet_threshold_=r; 56 | onet_threshold_=o; 57 | } 58 | 59 | void set_factor_min_size(float factor, float min_size) 60 | { 61 | factor_=factor; 62 | min_size_=min_size; 63 | } 64 | 65 | 66 | virtual int load_model(const std::string& model_dir)=0; 67 | virtual void detect(cv::Mat& img, std::vector& face_list)=0; 68 | virtual ~mtcnn(void){}; 69 | 70 | protected: 71 | 72 | int min_size_; 73 | float pnet_threshold_; 74 | float rnet_threshold_; 75 | float onet_threshold_; 76 | float factor_; 77 | }; 78 | 79 | /* factory part */ 80 | 81 | class mtcnn_factory 82 | { 83 | public: 84 | 85 | typedef mtcnn * (*creator)(void); 86 | 87 | static void register_creator(const std::string& name, creator& create_func); 88 | static mtcnn * create_detector(const std::string& name); 89 | static std::vector list(void); 90 | 91 | private: 92 | mtcnn_factory(){}; 93 | 94 | 95 | }; 96 | 97 | class only_for_auto_register 98 | { 99 | public: 100 | only_for_auto_register(std::string name, mtcnn_factory::creator func) 101 | { 102 | mtcnn_factory::register_creator(name,func); 103 | } 104 | 105 | }; 106 | 107 | #define REGISTER_MTCNN_CREATOR(name,func) \ 108 | static only_for_auto_register dummy_mtcnn_creator_## name (#name, func) 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /facedetect_mtcnn.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2016 David Sandberg 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # 24 | # Borrowed from davidsandberg's facenet project: https://github.com/davidsandberg/facenet 25 | # From this directory: 26 | # facenet/src/align 27 | # 28 | # Just keep the MTCNN related stuff and removed other codes 29 | # python package required: 30 | # tensorflow, opencv,numpy 31 | 32 | 33 | from __future__ import absolute_import 34 | from __future__ import division 35 | from __future__ import print_function 36 | 37 | import sys 38 | import os 39 | import argparse 40 | import tensorflow as tf 41 | import numpy as np 42 | import detect_face 43 | import cv2 44 | 45 | def main(args): 46 | 47 | sess = tf.Session() 48 | pnet, rnet, onet = detect_face.create_mtcnn(sess, None) 49 | 50 | minsize = 40 # minimum size of face 51 | threshold = [ 0.6, 0.7, 0.9 ] # three steps's threshold 52 | factor = 0.709 # scale factor 53 | 54 | 55 | filename =args.input 56 | output_filename =args.output 57 | 58 | 59 | draw = cv2.imread(filename) 60 | 61 | img=cv2.cvtColor(draw,cv2.COLOR_BGR2RGB) 62 | 63 | bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) 64 | 65 | nrof_faces = bounding_boxes.shape[0] 66 | 67 | 68 | for b in bounding_boxes: 69 | cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0)) 70 | print(b) 71 | 72 | 73 | 74 | for p in points.T: 75 | for i in range(5): 76 | cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2) 77 | 78 | cv2.imwrite(output_filename,draw) 79 | 80 | print('Total %d face(s) detected, saved in %s' % (nrof_faces,output_filename)) 81 | 82 | 83 | def parse_arguments(argv): 84 | parser = argparse.ArgumentParser() 85 | parser.add_argument('--input', type=str, help='image to be detected for faces.',default='./test.jpg') 86 | parser.add_argument('--output', type=str, help='new image with boxed faces',default='new.jpg') 87 | return parser.parse_args(argv) 88 | 89 | if __name__ == '__main__': 90 | main(parse_arguments(sys.argv[1:])) 91 | -------------------------------------------------------------------------------- /cpp/standalone/camera.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | // A minimal but useful C++ example showing how to load an Imagenet-style object 17 | // recognition TensorFlow model, prepare input images for it, run them through 18 | // the graph, and interpret the results. 19 | // 20 | // It's designed to have as few dependencies and be as clear as possible, so 21 | // it's more verbose than it could be in production code. In particular, using 22 | // auto for the types of a lot of the returned values from TensorFlow calls can 23 | // remove a lot of boilerplate, but I find the explicit types useful in sample 24 | // code to make it simple to look up the classes involved. 25 | // 26 | // To use it, compile and then run in a working directory with the 27 | // learning/brain/tutorials/label_image/data/ folder below it, and you should 28 | // see the top five labels for the example Lena image output. You can then 29 | // customize it to use your own models or images by changing the file names at 30 | // the top of the main() function. 31 | // 32 | // The googlenet_graph.pb file included by default is created from Inception. 33 | // 34 | // Note that, for GIF inputs, to reuse existing code, only single-frame ones 35 | // are supported. 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | 42 | #include "tensorflow/c/c_api.h" 43 | #include "tensorflow_mtcnn.hpp" 44 | #include "mtcnn.hpp" 45 | #include "comm_lib.hpp" 46 | #include "utils.hpp" 47 | 48 | using std::string; 49 | 50 | int main(int argc, char* argv[]) 51 | { 52 | string model_fname ="./models/mtcnn_frozen_model.pb"; 53 | 54 | cv::VideoCapture camera; 55 | 56 | camera.open(0); 57 | 58 | if(!camera.isOpened()) 59 | { 60 | std::cerr<<"failed to open camera"< face_info; 84 | 85 | unsigned long start_time=get_cur_time(); 86 | 87 | mtcnn_detect(sess,graph,frame,face_info); 88 | 89 | unsigned long end_time=get_cur_time(); 90 | 91 | 92 | 93 | for(unsigned int i=0;i 38 | #include 39 | #include 40 | 41 | 42 | #include "tensorflow/c/c_api.h" 43 | #include "tensorflow_mtcnn.hpp" 44 | #include "mtcnn.hpp" 45 | #include "comm_lib.hpp" 46 | #include "utils.hpp" 47 | 48 | #include 49 | 50 | using std::string; 51 | 52 | int main(int argc, char* argv[]) 53 | { 54 | string image = "./test.jpg"; 55 | string model_fname = "./models/mtcnn_frozen_model.pb"; 56 | string output_fname="./new.jpg"; 57 | int save_chop=0; 58 | int res; 59 | 60 | 61 | while((res=getopt(argc,argv,"i:o:m:s"))!=-1) 62 | { 63 | switch(res) 64 | { 65 | case 'i': 66 | image=optarg; 67 | break; 68 | case 'o': 69 | output_fname=optarg; 70 | break; 71 | case 's': 72 | save_chop=1; 73 | break; 74 | case 'm': 75 | model_fname=optarg; 76 | break; 77 | default: 78 | break; 79 | } 80 | } 81 | 82 | TF_Session * sess; 83 | TF_Graph * graph; 84 | 85 | 86 | sess=load_graph(model_fname.c_str(),&graph); 87 | 88 | if(sess==nullptr) 89 | return 1; 90 | 91 | //Load image 92 | 93 | cv::Mat frame = cv::imread(image); 94 | 95 | if(!frame.data) 96 | { 97 | std::cerr<<"failed to read image file: "< face_info; 103 | 104 | unsigned long start_time=get_cur_time(); 105 | 106 | mtcnn_detect(sess,graph,frame,face_info); 107 | 108 | unsigned long end_time=get_cur_time(); 109 | 110 | 111 | 112 | for(unsigned int i=0;i& input, float threshold, int type, std::vector&output) 6 | { 7 | 8 | std::sort(input.begin(),input.end(), 9 | [](const face_box& a, const face_box&b) { 10 | return a.score > b.score; 11 | }); 12 | 13 | int box_num=input.size(); 14 | 15 | std::vector merged(box_num,0); 16 | 17 | for(int i=0;ithreshold) 68 | merged[j]=1; 69 | } 70 | 71 | 72 | } 73 | 74 | 75 | } 76 | 77 | void regress_boxes(std::vector& rects) 78 | { 79 | for(unsigned int i=0;i& rects) 95 | { 96 | 97 | for(unsigned int i=0;i& rects) 112 | { 113 | for(unsigned int i=0; i& input, int img_h, int img_w, std::vector& rects) 124 | { 125 | 126 | nms_boxes(input,0.7,NMS_UNION,rects); 127 | 128 | regress_boxes(rects); 129 | 130 | square_boxes(rects); 131 | 132 | padding(img_h,img_w,rects); 133 | 134 | } 135 | 136 | 137 | void generate_bounding_box(const float * confidence_data, int confidence_size, 138 | const float * reg_data, float scale, float threshold, 139 | int feature_h, int feature_w, std::vector& output, bool transposed) 140 | { 141 | 142 | int stride = 2; 143 | int cellSize = 12; 144 | 145 | int img_h= feature_h; 146 | int img_w = feature_w; 147 | 148 | int count = confidence_size/ 2; 149 | confidence_data += count; 150 | 151 | for (int i = 0; i= threshold){ 153 | int y = i / img_w; 154 | int x = i - img_w * y; 155 | 156 | float top_x = (int)((x*stride + 1) / scale); 157 | float top_y = (int)((y*stride + 1) / scale); 158 | float bottom_x = (int)((x*stride + cellSize) / scale); 159 | float bottom_y = (int)((y*stride + cellSize) / scale); 160 | 161 | 162 | face_box box; 163 | 164 | 165 | box.x0 = top_x; 166 | box.y0 = top_y; 167 | box.x1 = bottom_x; 168 | box.y1 = bottom_y; 169 | 170 | box.score = *(confidence_data + i); 171 | 172 | int c_offset=y*img_w+x; 173 | int c_size=img_w*img_h; 174 | 175 | if(transposed) 176 | { 177 | 178 | box.regress[1]=reg_data[c_offset]; 179 | box.regress[0]=reg_data[c_offset+c_size]; 180 | box.regress[3]=reg_data[c_offset+2*c_size]; 181 | box.regress[2]= reg_data[c_offset+3*c_size]; 182 | } 183 | else { 184 | 185 | box.regress[0]=reg_data[c_offset]; 186 | box.regress[1]=reg_data[c_offset+c_size]; 187 | box.regress[2]=reg_data[c_offset+2*c_size]; 188 | box.regress[3]= reg_data[c_offset+3*c_size]; 189 | } 190 | 191 | output.push_back(box); 192 | } 193 | } 194 | 195 | } 196 | 197 | 198 | void set_input_buffer(std::vector& input_channels, 199 | float* input_data, const int height, const int width) 200 | { 201 | for (int i = 0; i < 3; ++i) { 202 | cv::Mat channel(height, width, CV_32FC1, input_data); 203 | input_channels.push_back(channel); 204 | input_data += width * height; 205 | } 206 | } 207 | 208 | 209 | void cal_pyramid_list(int height, int width, int min_size, float factor,std::vector& list) 210 | { 211 | int min_side = std::min(height, width); 212 | double m = 12.0 / min_size; 213 | 214 | min_side=min_side*m; 215 | double cur_scale=1.0; 216 | 217 | double scale; 218 | 219 | 220 | while (min_side >= 12) 221 | { 222 | scale=m*cur_scale; 223 | cur_scale=cur_scale *factor; 224 | min_side *= factor; 225 | 226 | int hs = std::ceil(height*scale); 227 | int ws = std::ceil(width*scale); 228 | 229 | scale_window win; 230 | win.h=hs; 231 | win.w=ws; 232 | win.scale=scale; 233 | list.push_back(win); 234 | } 235 | 236 | } 237 | 238 | void cal_landmark(std::vector& box_list) 239 | { 240 | for(unsigned int i=0;i& box_list, int img_h, int img_w) 257 | { 258 | for(unsigned int i=0; i& input, float threshold, int type, std::vector&output) 6 | { 7 | 8 | std::sort(input.begin(),input.end(), 9 | [](const face_box& a, const face_box&b) { 10 | return a.score > b.score; 11 | }); 12 | 13 | int box_num=input.size(); 14 | 15 | std::vector merged(box_num,0); 16 | 17 | for(int i=0;ithreshold) 68 | merged[j]=1; 69 | } 70 | 71 | 72 | } 73 | 74 | 75 | } 76 | 77 | void regress_boxes(std::vector& rects) 78 | { 79 | for(unsigned int i=0;i& rects) 95 | { 96 | 97 | for(unsigned int i=0;i& rects) 112 | { 113 | for(unsigned int i=0; i& input, int img_h, int img_w, std::vector& rects) 124 | { 125 | 126 | nms_boxes(input,0.7,NMS_UNION,rects); 127 | 128 | regress_boxes(rects); 129 | 130 | square_boxes(rects); 131 | 132 | padding(img_h,img_w,rects); 133 | 134 | } 135 | 136 | 137 | void generate_bounding_box(const float * confidence_data, int confidence_size, 138 | const float * reg_data, float scale, float threshold, 139 | int feature_h, int feature_w, std::vector& output, bool transposed) 140 | { 141 | 142 | int stride = 2; 143 | int cellSize = 12; 144 | 145 | int img_h= feature_h; 146 | int img_w = feature_w; 147 | 148 | int count = confidence_size/ 2; 149 | confidence_data += count; 150 | 151 | for (int i = 0; i= threshold){ 153 | int y = i / img_w; 154 | int x = i - img_w * y; 155 | 156 | float top_x = (int)((x*stride + 1) / scale); 157 | float top_y = (int)((y*stride + 1) / scale); 158 | float bottom_x = (int)((x*stride + cellSize) / scale); 159 | float bottom_y = (int)((y*stride + cellSize) / scale); 160 | 161 | 162 | face_box box; 163 | 164 | 165 | box.x0 = top_x; 166 | box.y0 = top_y; 167 | box.x1 = bottom_x; 168 | box.y1 = bottom_y; 169 | 170 | box.score = *(confidence_data + i); 171 | 172 | int c_offset=y*img_w+x; 173 | int c_size=img_w*img_h; 174 | 175 | if(transposed) 176 | { 177 | 178 | box.regress[1]=reg_data[c_offset]; 179 | box.regress[0]=reg_data[c_offset+c_size]; 180 | box.regress[3]=reg_data[c_offset+2*c_size]; 181 | box.regress[2]= reg_data[c_offset+3*c_size]; 182 | } 183 | else { 184 | 185 | box.regress[0]=reg_data[c_offset]; 186 | box.regress[1]=reg_data[c_offset+c_size]; 187 | box.regress[2]=reg_data[c_offset+2*c_size]; 188 | box.regress[3]= reg_data[c_offset+3*c_size]; 189 | } 190 | 191 | output.push_back(box); 192 | } 193 | } 194 | 195 | } 196 | 197 | 198 | void set_input_buffer(std::vector& input_channels, 199 | float* input_data, const int height, const int width) 200 | { 201 | for (int i = 0; i < 3; ++i) { 202 | cv::Mat channel(height, width, CV_32FC1, input_data); 203 | input_channels.push_back(channel); 204 | input_data += width * height; 205 | } 206 | } 207 | 208 | 209 | void cal_pyramid_list(int height, int width, int min_size, float factor,std::vector& list) 210 | { 211 | int min_side = std::min(height, width); 212 | double m = 12.0 / min_size; 213 | 214 | min_side=min_side*m; 215 | double cur_scale=1.0; 216 | 217 | double scale; 218 | 219 | 220 | while (min_side >= 12) 221 | { 222 | scale=m*cur_scale; 223 | cur_scale=cur_scale *factor; 224 | min_side *= factor; 225 | 226 | int hs = std::ceil(height*scale); 227 | int ws = std::ceil(width*scale); 228 | 229 | scale_window win; 230 | win.h=hs; 231 | win.w=ws; 232 | win.scale=scale; 233 | list.push_back(win); 234 | } 235 | 236 | } 237 | 238 | void cal_landmark(std::vector& box_list) 239 | { 240 | for(unsigned int i=0;i& box_list, int img_h, int img_w) 257 | { 258 | for(unsigned int i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "tensorflow/c/c_api.h" 8 | 9 | #include "mtcnn.hpp" 10 | #include "comm_lib.hpp" 11 | #include "utils.hpp" 12 | #include "tensorflow_mtcnn.hpp" 13 | 14 | static int load_file(const std::string & fname, std::vector& buf) 15 | { 16 | std::ifstream fs(fname, std::ios::binary | std::ios::in); 17 | 18 | if(!fs.good()) 19 | { 20 | std::cerr< model_buf; 46 | 47 | load_file(frozen_fname,model_buf); 48 | 49 | TF_Buffer graph_def = {model_buf.data(), model_buf.size(), nullptr}; 50 | 51 | TF_ImportGraphDefOptions* import_opts = TF_NewImportGraphDefOptions(); 52 | TF_ImportGraphDefOptionsSetPrefix(import_opts, ""); 53 | TF_GraphImportGraphDef(graph, &graph_def, import_opts, s); 54 | 55 | if(TF_GetCode(s) != TF_OK) 56 | { 57 | printf("load graph failed!\n Error: %s\n",TF_Message(s)); 58 | 59 | return nullptr; 60 | } 61 | 62 | TF_SessionOptions* sess_opts = TF_NewSessionOptions(); 63 | TF_Session* session = TF_NewSession(graph, sess_opts, s); 64 | assert(TF_GetCode(s) == TF_OK); 65 | 66 | 67 | TF_DeleteStatus(s); 68 | 69 | 70 | *p_graph=graph; 71 | 72 | return session; 73 | } 74 | 75 | void generate_bounding_box_tf(const float * confidence_data, int confidence_size, 76 | const float * reg_data, float scale, float threshold, 77 | int feature_h, int feature_w, std::vector& output, bool transposed) 78 | { 79 | 80 | int stride = 2; 81 | int cellSize = 12; 82 | 83 | int img_h= feature_h; 84 | int img_w = feature_w; 85 | 86 | 87 | for(int y=0;y= threshold) 95 | { 96 | 97 | float top_x = (int)((x*stride + 1) / scale); 98 | float top_y = (int)((y*stride + 1) / scale); 99 | float bottom_x = (int)((x*stride + cellSize) / scale); 100 | float bottom_y = (int)((y*stride + cellSize) / scale); 101 | 102 | face_box box; 103 | 104 | box.x0 = top_x; 105 | box.y0 = top_y; 106 | box.x1 = bottom_x; 107 | box.y1 = bottom_y; 108 | 109 | box.score=score; 110 | 111 | int c_offset=(img_w*4)*y+4*x; 112 | 113 | if(transposed) 114 | { 115 | 116 | box.regress[1]=reg_data[c_offset]; 117 | box.regress[0]=reg_data[c_offset+1]; 118 | box.regress[3]=reg_data[c_offset+2]; 119 | box.regress[2]= reg_data[c_offset+3]; 120 | } 121 | else { 122 | 123 | box.regress[0]=reg_data[c_offset]; 124 | box.regress[1]=reg_data[c_offset+1]; 125 | box.regress[2]=reg_data[c_offset+2]; 126 | box.regress[3]= reg_data[c_offset+3]; 127 | } 128 | 129 | output.push_back(box); 130 | } 131 | 132 | } 133 | } 134 | 135 | /* To make tensor release happy...*/ 136 | static void dummy_deallocator(void* data, size_t len, void* arg) 137 | { 138 | } 139 | 140 | void run_PNet(TF_Session * sess, TF_Graph * graph, cv::Mat& img, scale_window& win, std::vector& box_list) 141 | { 142 | cv::Mat resized; 143 | int scale_h=win.h; 144 | int scale_w=win.w; 145 | float scale=win.scale; 146 | float pnet_threshold=0.6; 147 | 148 | cv::resize(img, resized, cv::Size(scale_w, scale_h),0,0); 149 | 150 | /* tensorflow related*/ 151 | 152 | TF_Status * s= TF_NewStatus(); 153 | 154 | std::vector input_names; 155 | std::vector input_values; 156 | 157 | TF_Operation* input_name=TF_GraphOperationByName(graph, "pnet/input"); 158 | 159 | input_names.push_back({input_name, 0}); 160 | 161 | const int64_t dim[4] = {1,scale_h,scale_w,3}; 162 | 163 | TF_Tensor* input_tensor = TF_NewTensor(TF_FLOAT,dim,4,resized.ptr(),sizeof(float)*scale_w*scale_h*3,dummy_deallocator,nullptr); 164 | 165 | input_values.push_back(input_tensor); 166 | 167 | 168 | 169 | std::vector output_names; 170 | 171 | TF_Operation* output_name = TF_GraphOperationByName(graph,"pnet/conv4-2/BiasAdd"); 172 | output_names.push_back({output_name,0}); 173 | 174 | output_name = TF_GraphOperationByName(graph,"pnet/prob1"); 175 | output_names.push_back({output_name,0}); 176 | 177 | std::vector output_values(output_names.size(), nullptr); 178 | 179 | 180 | TF_SessionRun(sess,nullptr,input_names.data(),input_values.data(),input_names.size(), 181 | output_names.data(),output_values.data(),output_names.size(), 182 | nullptr,0,nullptr,s); 183 | 184 | 185 | assert(TF_GetCode(s) == TF_OK); 186 | 187 | /*retrieval the forward results*/ 188 | 189 | const float * conf_data=(const float *)TF_TensorData(output_values[1]); 190 | const float * reg_data=(const float *)TF_TensorData(output_values[0]); 191 | 192 | 193 | int feature_h=TF_Dim(output_values[0],1); 194 | int feature_w=TF_Dim(output_values[0],2); 195 | 196 | int conf_size=feature_h*feature_w*2; 197 | 198 | std::vector candidate_boxes; 199 | 200 | generate_bounding_box_tf(conf_data,conf_size,reg_data, 201 | scale,pnet_threshold,feature_h,feature_w,candidate_boxes,true); 202 | 203 | 204 | nms_boxes(candidate_boxes, 0.5, NMS_UNION,box_list); 205 | 206 | TF_DeleteStatus(s); 207 | TF_DeleteTensor(output_values[0]); 208 | TF_DeleteTensor(output_values[1]); 209 | TF_DeleteTensor(input_tensor); 210 | 211 | } 212 | 213 | 214 | 215 | void copy_one_patch(const cv::Mat& img,face_box&input_box,float * data_to, int height, int width) 216 | { 217 | cv::Mat resized(height,width,CV_32FC3,data_to); 218 | 219 | 220 | cv::Mat chop_img = img(cv::Range(input_box.py0,input_box.py1), 221 | cv::Range(input_box.px0, input_box.px1)); 222 | 223 | int pad_top = std::abs(input_box.py0 - input_box.y0); 224 | int pad_left = std::abs(input_box.px0 - input_box.x0); 225 | int pad_bottom = std::abs(input_box.py1 - input_box.y1); 226 | int pad_right = std::abs(input_box.px1-input_box.x1); 227 | 228 | cv::copyMakeBorder(chop_img, chop_img, pad_top, pad_bottom,pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0)); 229 | 230 | cv::resize(chop_img,resized, cv::Size(width, height), 0, 0); 231 | } 232 | 233 | 234 | void run_RNet(TF_Session * sess, TF_Graph * graph, cv::Mat& img, std::vector& pnet_boxes, std::vector& output_boxes) 235 | { 236 | int batch=pnet_boxes.size(); 237 | int channel = 3; 238 | int height = 24; 239 | int width = 24; 240 | 241 | float rnet_threshold=0.7; 242 | 243 | /* prepare input image data */ 244 | 245 | int input_size=batch*height*width*channel; 246 | 247 | std::vector input_buffer(input_size); 248 | 249 | float * input_data=input_buffer.data(); 250 | 251 | for(int i=0;i input_names; 266 | std::vector input_values; 267 | 268 | TF_Operation* input_name=TF_GraphOperationByName(graph, "rnet/input"); 269 | 270 | input_names.push_back({input_name, 0}); 271 | 272 | 273 | const int64_t dim[4] = {batch,height,width,channel}; 274 | 275 | 276 | TF_Tensor* input_tensor = TF_NewTensor(TF_FLOAT,dim,4,input_buffer.data(),sizeof(float)*input_size, 277 | dummy_deallocator,nullptr); 278 | 279 | input_values.push_back(input_tensor); 280 | 281 | 282 | std::vector output_names; 283 | 284 | TF_Operation* output_name = TF_GraphOperationByName(graph,"rnet/conv5-2/conv5-2"); 285 | output_names.push_back({output_name,0}); 286 | 287 | output_name = TF_GraphOperationByName(graph,"rnet/prob1"); 288 | output_names.push_back({output_name,0}); 289 | 290 | std::vector output_values(output_names.size(), nullptr); 291 | 292 | 293 | TF_SessionRun(sess,nullptr,input_names.data(),input_values.data(),input_names.size(), 294 | output_names.data(),output_values.data(),output_names.size(), 295 | nullptr,0,nullptr,s); 296 | 297 | 298 | assert(TF_GetCode(s) == TF_OK); 299 | 300 | /*retrieval the forward results*/ 301 | 302 | const float * conf_data=(const float *)TF_TensorData(output_values[1]); 303 | const float * reg_data=(const float *)TF_TensorData(output_values[0]); 304 | 305 | 306 | for(int i=0;irnet_threshold) 310 | { 311 | face_box output_box; 312 | 313 | face_box& input_box=pnet_boxes[i]; 314 | 315 | output_box.x0=input_box.x0; 316 | output_box.y0=input_box.y0; 317 | output_box.x1=input_box.x1; 318 | output_box.y1=input_box.y1; 319 | 320 | output_box.score = *(conf_data+1); 321 | 322 | /*Note: regress's value is swaped here!!!*/ 323 | 324 | output_box.regress[0]=reg_data[1]; 325 | output_box.regress[1]=reg_data[0]; 326 | output_box.regress[2]=reg_data[3]; 327 | output_box.regress[3]=reg_data[2]; 328 | 329 | output_boxes.push_back(output_box); 330 | 331 | 332 | } 333 | 334 | conf_data+=2; 335 | reg_data+=4; 336 | 337 | } 338 | 339 | TF_DeleteStatus(s); 340 | TF_DeleteTensor(output_values[0]); 341 | TF_DeleteTensor(output_values[1]); 342 | TF_DeleteTensor(input_tensor); 343 | } 344 | 345 | void run_ONet(TF_Session * sess, TF_Graph * graph, cv::Mat& img, std::vector& rnet_boxes, std::vector& output_boxes) 346 | { 347 | int batch=rnet_boxes.size(); 348 | int channel = 3; 349 | int height = 48; 350 | int width = 48; 351 | 352 | float onet_threshold=0.9; 353 | 354 | /* prepare input image data */ 355 | 356 | int input_size=batch*height*width*channel; 357 | 358 | std::vector input_buffer(input_size); 359 | 360 | float * input_data=input_buffer.data(); 361 | 362 | for(int i=0;i input_names; 377 | std::vector input_values; 378 | 379 | TF_Operation* input_name=TF_GraphOperationByName(graph, "onet/input"); 380 | 381 | input_names.push_back({input_name, 0}); 382 | 383 | const int64_t dim[4] = {batch,height,width,channel}; 384 | 385 | TF_Tensor* input_tensor = TF_NewTensor(TF_FLOAT,dim,4,input_buffer.data(),sizeof(float)*input_size, 386 | dummy_deallocator,nullptr); 387 | 388 | input_values.push_back(input_tensor); 389 | 390 | 391 | std::vector output_names; 392 | 393 | TF_Operation* output_name = TF_GraphOperationByName(graph,"onet/conv6-2/conv6-2"); 394 | output_names.push_back({output_name,0}); 395 | 396 | output_name = TF_GraphOperationByName(graph,"onet/conv6-3/conv6-3"); 397 | output_names.push_back({output_name,0}); 398 | 399 | output_name = TF_GraphOperationByName(graph,"onet/prob1"); 400 | output_names.push_back({output_name,0}); 401 | 402 | std::vector output_values(output_names.size(), nullptr); 403 | 404 | 405 | TF_SessionRun(sess,nullptr,input_names.data(),input_values.data(),input_names.size(), 406 | output_names.data(),output_values.data(),output_names.size(), 407 | nullptr,0,nullptr,s); 408 | 409 | 410 | assert(TF_GetCode(s) == TF_OK); 411 | 412 | /*retrieval the forward results*/ 413 | 414 | const float * conf_data=(const float *)TF_TensorData(output_values[2]); 415 | const float * reg_data=(const float *)TF_TensorData(output_values[0]); 416 | const float * points_data=(const float *)TF_TensorData(output_values[1]); 417 | 418 | for(int i=0;ionet_threshold) 422 | { 423 | face_box output_box; 424 | 425 | face_box& input_box=rnet_boxes[i]; 426 | 427 | output_box.x0=input_box.x0; 428 | output_box.y0=input_box.y0; 429 | output_box.x1=input_box.x1; 430 | output_box.y1=input_box.y1; 431 | 432 | output_box.score = conf_data[1]; 433 | 434 | output_box.regress[0]=reg_data[1]; 435 | output_box.regress[1]=reg_data[0]; 436 | output_box.regress[2]=reg_data[3]; 437 | output_box.regress[3]=reg_data[2]; 438 | 439 | /*Note: switched x,y points value too..*/ 440 | for (int j = 0; j<5; j++){ 441 | output_box.landmark.x[j] = *(points_data + j+5); 442 | output_box.landmark.y[j] = *(points_data + j); 443 | } 444 | 445 | output_boxes.push_back(output_box); 446 | 447 | 448 | } 449 | 450 | conf_data+=2; 451 | reg_data+=4; 452 | points_data+=10; 453 | } 454 | 455 | TF_DeleteStatus(s); 456 | TF_DeleteTensor(output_values[0]); 457 | TF_DeleteTensor(output_values[1]); 458 | TF_DeleteTensor(output_values[2]); 459 | TF_DeleteTensor(input_tensor); 460 | 461 | } 462 | 463 | 464 | void mtcnn_detect(TF_Session * sess, TF_Graph * graph, cv::Mat& img, std::vector& face_list) 465 | { 466 | cv::Mat working_img; 467 | 468 | float alpha=0.0078125; 469 | float mean=127.5; 470 | 471 | 472 | 473 | img.convertTo(working_img, CV_32FC3); 474 | 475 | working_img=(working_img-mean)*alpha; 476 | 477 | working_img=working_img.t(); 478 | 479 | cv::cvtColor(working_img,working_img, cv::COLOR_BGR2RGB); 480 | 481 | int img_h=working_img.rows; 482 | int img_w=working_img.cols; 483 | 484 | 485 | int min_size=40; 486 | float factor=0.709; 487 | 488 | 489 | std::vector win_list; 490 | 491 | std::vector total_pnet_boxes; 492 | std::vector total_rnet_boxes; 493 | std::vector total_onet_boxes; 494 | 495 | 496 | cal_pyramid_list(img_h,img_w,min_size,factor,win_list); 497 | 498 | for(unsigned int i=0;iboxes; 501 | 502 | run_PNet(sess,graph,working_img,win_list[i],boxes); 503 | 504 | total_pnet_boxes.insert(total_pnet_boxes.end(),boxes.begin(),boxes.end()); 505 | } 506 | 507 | 508 | std::vector pnet_boxes; 509 | process_boxes(total_pnet_boxes,img_h,img_w,pnet_boxes); 510 | 511 | 512 | // RNet 513 | std::vector rnet_boxes; 514 | 515 | run_RNet(sess, graph,working_img, pnet_boxes,total_rnet_boxes); 516 | 517 | process_boxes(total_rnet_boxes,img_h,img_w,rnet_boxes); 518 | 519 | 520 | //ONet 521 | run_ONet(sess,graph,working_img, rnet_boxes,total_onet_boxes); 522 | 523 | //calculate the landmark 524 | 525 | for(unsigned int i=0;i 38 | #include 39 | #include 40 | 41 | #include "tensorflow/cc/ops/const_op.h" 42 | #include "tensorflow/cc/ops/image_ops.h" 43 | #include "tensorflow/cc/ops/standard_ops.h" 44 | #include "tensorflow/core/framework/graph.pb.h" 45 | #include "tensorflow/core/framework/tensor.h" 46 | #include "tensorflow/core/graph/default_device.h" 47 | #include "tensorflow/core/graph/graph_def_builder.h" 48 | #include "tensorflow/core/lib/core/errors.h" 49 | #include "tensorflow/core/lib/core/stringpiece.h" 50 | #include "tensorflow/core/lib/core/threadpool.h" 51 | #include "tensorflow/core/lib/io/path.h" 52 | #include "tensorflow/core/lib/strings/stringprintf.h" 53 | #include "tensorflow/core/platform/env.h" 54 | #include "tensorflow/core/platform/init_main.h" 55 | #include "tensorflow/core/platform/logging.h" 56 | #include "tensorflow/core/platform/types.h" 57 | #include "tensorflow/core/public/session.h" 58 | #include "tensorflow/core/util/command_line_flags.h" 59 | 60 | #include "mtcnn.hpp" 61 | #include "comm_lib.hpp" 62 | #include "utils.hpp" 63 | 64 | // These are all common classes it's handy to reference with no namespace. 65 | using tensorflow::Flag; 66 | using tensorflow::Tensor; 67 | using tensorflow::Status; 68 | using tensorflow::string; 69 | using tensorflow::int32; 70 | using tensorflow::TensorBuffer; 71 | using tensorflow::DT_FLOAT; 72 | using tensorflow::TensorShape; 73 | using tensorflow::AllocationDescription; 74 | 75 | 76 | // Reads a model graph definition from disk, and creates a session object you 77 | // can use to run it. 78 | Status LoadGraph(const string& graph_file_name, 79 | std::unique_ptr* session) { 80 | tensorflow::GraphDef graph_def; 81 | Status load_graph_status = 82 | ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def); 83 | if (!load_graph_status.ok()) { 84 | return tensorflow::errors::NotFound("Failed to load compute graph at '", 85 | graph_file_name, "'"); 86 | } 87 | session->reset(tensorflow::NewSession(tensorflow::SessionOptions())); 88 | Status session_create_status = (*session)->Create(graph_def); 89 | if (!session_create_status.ok()) { 90 | return session_create_status; 91 | } 92 | return Status::OK(); 93 | } 94 | 95 | 96 | /* copied from TF_ManagedBuffer in c_api.cc */ 97 | class my_buffer: public TensorBuffer{ 98 | 99 | public: 100 | float * data_; 101 | size_t len_; 102 | 103 | ~my_buffer() override { 104 | //nothing to do 105 | } 106 | 107 | void* data() const override { return data_; } 108 | size_t size() const override { return len_; } 109 | bool OwnsMemory() const override { return false; } 110 | 111 | TensorBuffer* root_buffer() override { return this; } 112 | 113 | void FillAllocationDescription(AllocationDescription* proto) const override { 114 | tensorflow::int64 rb = size(); 115 | proto->set_requested_bytes(rb); 116 | proto->set_allocator_name(tensorflow::cpu_allocator()->Name()); 117 | } 118 | 119 | }; 120 | 121 | 122 | class tensorflow::TensorCApi { 123 | public: 124 | static Tensor create_tensor(DataType dtype, const TensorShape& shape, TensorBuffer* buf) 125 | { 126 | return Tensor(dtype,shape,buf); 127 | } 128 | 129 | }; 130 | 131 | 132 | 133 | 134 | void generate_bounding_box_tf(const float * confidence_data, int confidence_size, 135 | const float * reg_data, float scale, float threshold, 136 | int feature_h, int feature_w, std::vector& output, bool transposed) 137 | { 138 | 139 | int stride = 2; 140 | int cellSize = 12; 141 | 142 | int img_h= feature_h; 143 | int img_w = feature_w; 144 | 145 | 146 | for(int y=0;y= threshold) 154 | { 155 | 156 | float top_x = (int)((x*stride + 1) / scale); 157 | float top_y = (int)((y*stride + 1) / scale); 158 | float bottom_x = (int)((x*stride + cellSize) / scale); 159 | float bottom_y = (int)((y*stride + cellSize) / scale); 160 | 161 | face_box box; 162 | 163 | box.x0 = top_x; 164 | box.y0 = top_y; 165 | box.x1 = bottom_x; 166 | box.y1 = bottom_y; 167 | 168 | box.score=score; 169 | 170 | int c_offset=(img_w*4)*y+4*x; 171 | 172 | if(transposed) 173 | { 174 | 175 | box.regress[1]=reg_data[c_offset]; 176 | box.regress[0]=reg_data[c_offset+1]; 177 | box.regress[3]=reg_data[c_offset+2]; 178 | box.regress[2]= reg_data[c_offset+3]; 179 | } 180 | else { 181 | 182 | box.regress[0]=reg_data[c_offset]; 183 | box.regress[1]=reg_data[c_offset+1]; 184 | box.regress[2]=reg_data[c_offset+2]; 185 | box.regress[3]= reg_data[c_offset+3]; 186 | } 187 | 188 | output.push_back(box); 189 | } 190 | 191 | } 192 | } 193 | 194 | 195 | 196 | void run_PNet(std::unique_ptr& sess, cv::Mat& img, scale_window& win, std::vector& box_list) 197 | { 198 | cv::Mat resized; 199 | int scale_h=win.h; 200 | int scale_w=win.w; 201 | float scale=win.scale; 202 | float pnet_threshold=0.6; 203 | 204 | 205 | cv::resize(img, resized, cv::Size(scale_w, scale_h),0,0); 206 | 207 | /* tensorflow related*/ 208 | 209 | const int64_t dim[4] = {1,scale_h,scale_w,3}; 210 | 211 | my_buffer tensor_buf; 212 | 213 | 214 | tensor_buf.data_=(float *)resized.ptr(); 215 | tensor_buf.len_=scale_h*scale_w*3; 216 | 217 | std::vector tensor_dim; 218 | 219 | for(int i=0;i<4;i++) 220 | tensor_dim.push_back(dim[i]); 221 | 222 | 223 | Tensor input_tensor=tensorflow::TensorCApi::create_tensor(DT_FLOAT,TensorShape(tensor_dim), &tensor_buf); 224 | 225 | 226 | std::vector output_tensor; 227 | 228 | 229 | std::vector > input_tname; 230 | 231 | std::pair input0("pnet/input:0",input_tensor); 232 | input_tname.push_back(input0); 233 | 234 | std::vector output_tname; 235 | 236 | output_tname.push_back("pnet/conv4-2/BiasAdd:0"); 237 | output_tname.push_back("pnet/prob1:0"); 238 | 239 | std::vector output_node; 240 | 241 | 242 | 243 | Status run_status = sess->Run(input_tname,output_tname,output_node,&output_tensor); 244 | 245 | if(!run_status.ok()) 246 | { 247 | std::cerr<<"run PNet error"< candidate_boxes; 261 | 262 | const tensorflow::StringPiece conf_piece=output_tensor[1].tensor_data(); 263 | const tensorflow::StringPiece reg_piece=output_tensor[0].tensor_data(); 264 | 265 | 266 | const float * conf_data=(const float *)conf_piece.data(); 267 | int conf_size=feature_h*feature_w*2; 268 | const float * reg_data=(const float *)reg_piece.data(); 269 | 270 | generate_bounding_box_tf(conf_data,conf_size,reg_data, 271 | scale,pnet_threshold,feature_h,feature_w,candidate_boxes,true); 272 | 273 | 274 | nms_boxes(candidate_boxes, 0.5, NMS_UNION,box_list); 275 | 276 | } 277 | 278 | 279 | 280 | void copy_one_patch(const cv::Mat& img,face_box&input_box,float * data_to, int height, int width) 281 | { 282 | cv::Mat resized(height,width,CV_32FC3,data_to); 283 | 284 | 285 | cv::Mat chop_img = img(cv::Range(input_box.py0,input_box.py1), 286 | cv::Range(input_box.px0, input_box.px1)); 287 | 288 | int pad_top = std::abs(input_box.py0 - input_box.y0); 289 | int pad_left = std::abs(input_box.px0 - input_box.x0); 290 | int pad_bottom = std::abs(input_box.py1 - input_box.y1); 291 | int pad_right = std::abs(input_box.px1-input_box.x1); 292 | 293 | cv::copyMakeBorder(chop_img, chop_img, pad_top, pad_bottom,pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0)); 294 | 295 | cv::resize(chop_img,resized, cv::Size(width, height), 0, 0); 296 | } 297 | 298 | 299 | void run_RNet(std::unique_ptr& sess, cv::Mat& img, std::vector& pnet_boxes, std::vector& output_boxes) 300 | { 301 | int batch=pnet_boxes.size(); 302 | int channel = 3; 303 | int height = 24; 304 | int width = 24; 305 | 306 | float rnet_threshold=0.7; 307 | 308 | /* prepare input image data */ 309 | 310 | int input_size=batch*height*width*channel; 311 | 312 | std::vector input_buffer(input_size); 313 | 314 | float * input_data=input_buffer.data(); 315 | 316 | for(int i=0;i tensor_dim; 337 | 338 | for(int i=0;i<4;i++) 339 | tensor_dim.push_back(dim[i]); 340 | 341 | Tensor input_tensor=tensorflow::TensorCApi::create_tensor(DT_FLOAT,TensorShape(tensor_dim), &tensor_buf); 342 | 343 | std::vector output_tensor; 344 | 345 | 346 | std::vector > input_tname; 347 | 348 | std::pair input0("rnet/input:0",input_tensor); 349 | input_tname.push_back(input0); 350 | 351 | std::vector output_tname; 352 | 353 | output_tname.push_back("rnet/conv5-2/conv5-2:0"); 354 | output_tname.push_back("rnet/prob1:0"); 355 | 356 | std::vector output_node; 357 | 358 | Status run_status = sess->Run(input_tname,output_tname,output_node,&output_tensor); 359 | 360 | if(!run_status.ok()) 361 | { 362 | std::cerr<<"run PNet error"<rnet_threshold) 378 | { 379 | face_box output_box; 380 | 381 | face_box& input_box=pnet_boxes[i]; 382 | 383 | output_box.x0=input_box.x0; 384 | output_box.y0=input_box.y0; 385 | output_box.x1=input_box.x1; 386 | output_box.y1=input_box.y1; 387 | 388 | output_box.score = *(conf_data+1); 389 | 390 | /*Note: regress's value is swaped here!!!*/ 391 | 392 | output_box.regress[0]=reg_data[1]; 393 | output_box.regress[1]=reg_data[0]; 394 | output_box.regress[2]=reg_data[3]; 395 | output_box.regress[3]=reg_data[2]; 396 | 397 | output_boxes.push_back(output_box); 398 | 399 | 400 | } 401 | 402 | conf_data+=2; 403 | reg_data+=4; 404 | 405 | } 406 | 407 | } 408 | 409 | void run_ONet(std::unique_ptr& sess, cv::Mat& img, std::vector& rnet_boxes, std::vector& output_boxes) 410 | { 411 | int batch=rnet_boxes.size(); 412 | int channel = 3; 413 | int height = 48; 414 | int width = 48; 415 | 416 | float onet_threshold=0.9; 417 | 418 | /* prepare input image data */ 419 | 420 | int input_size=batch*height*width*channel; 421 | 422 | std::vector input_buffer(input_size); 423 | 424 | float * input_data=input_buffer.data(); 425 | 426 | for(int i=0;i tensor_dim; 447 | 448 | for(int i=0;i<4;i++) 449 | tensor_dim.push_back(dim[i]); 450 | 451 | Tensor input_tensor=tensorflow::TensorCApi::create_tensor(DT_FLOAT,TensorShape(tensor_dim), &tensor_buf); 452 | 453 | std::vector output_tensor; 454 | 455 | 456 | std::vector > input_tname; 457 | 458 | std::pair input0("onet/input:0",input_tensor); 459 | input_tname.push_back(input0); 460 | 461 | std::vector output_tname; 462 | 463 | output_tname.push_back("onet/conv6-2/conv6-2:0"); 464 | output_tname.push_back("onet/conv6-3/conv6-3:0"); 465 | output_tname.push_back("onet/prob1:0"); 466 | 467 | std::vector output_node; 468 | 469 | Status run_status = sess->Run(input_tname,output_tname,output_node,&output_tensor); 470 | 471 | if(!run_status.ok()) 472 | { 473 | std::cerr<<"run PNet error"<onet_threshold) 491 | { 492 | face_box output_box; 493 | 494 | face_box& input_box=rnet_boxes[i]; 495 | 496 | output_box.x0=input_box.x0; 497 | output_box.y0=input_box.y0; 498 | output_box.x1=input_box.x1; 499 | output_box.y1=input_box.y1; 500 | 501 | output_box.score = conf_data[1]; 502 | 503 | output_box.regress[0]=reg_data[1]; 504 | output_box.regress[1]=reg_data[0]; 505 | output_box.regress[2]=reg_data[3]; 506 | output_box.regress[3]=reg_data[2]; 507 | 508 | /*Note: switched x,y points value too..*/ 509 | for (int j = 0; j<5; j++){ 510 | output_box.landmark.x[j] = *(points_data + j+5); 511 | output_box.landmark.y[j] = *(points_data + j); 512 | } 513 | 514 | output_boxes.push_back(output_box); 515 | 516 | 517 | } 518 | 519 | conf_data+=2; 520 | reg_data+=4; 521 | points_data+=10; 522 | } 523 | 524 | } 525 | 526 | void mtcnn_detect(std::unique_ptr& sess, cv::Mat& img, std::vector& face_list) 527 | { 528 | cv::Mat working_img; 529 | 530 | float alpha=0.0078125; 531 | float mean=127.5; 532 | 533 | 534 | 535 | img.convertTo(working_img, CV_32FC3); 536 | 537 | working_img=(working_img-mean)*alpha; 538 | 539 | working_img=working_img.t(); 540 | 541 | cv::cvtColor(working_img,working_img, cv::COLOR_BGR2RGB); 542 | 543 | int img_h=working_img.rows; 544 | int img_w=working_img.cols; 545 | 546 | 547 | int min_size=40; 548 | float factor=0.709; 549 | 550 | 551 | std::vector win_list; 552 | 553 | std::vector total_pnet_boxes; 554 | std::vector total_rnet_boxes; 555 | std::vector total_onet_boxes; 556 | 557 | 558 | cal_pyramid_list(img_h,img_w,min_size,factor,win_list); 559 | 560 | for(unsigned int i=0;iboxes; 563 | 564 | run_PNet(sess,working_img,win_list[i],boxes); 565 | 566 | total_pnet_boxes.insert(total_pnet_boxes.end(),boxes.begin(),boxes.end()); 567 | } 568 | 569 | 570 | std::vector pnet_boxes; 571 | process_boxes(total_pnet_boxes,img_h,img_w,pnet_boxes); 572 | 573 | 574 | // RNet 575 | std::vector rnet_boxes; 576 | 577 | run_RNet(sess,working_img, pnet_boxes,total_rnet_boxes); 578 | 579 | process_boxes(total_rnet_boxes,img_h,img_w,rnet_boxes); 580 | 581 | 582 | //ONet 583 | run_ONet(sess,working_img, rnet_boxes,total_onet_boxes); 584 | 585 | //calculate the landmark 586 | 587 | for(unsigned int i=0;i flag_list = { 636 | Flag("image", &image, "image to be processed"), 637 | Flag("graph", &graph, "graph to be executed"), 638 | Flag("output", &graph, "image with face boxed"), 639 | }; 640 | 641 | string usage = tensorflow::Flags::Usage(argv[0], flag_list); 642 | const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); 643 | if (!parse_result) { 644 | LOG(ERROR) << usage; 645 | return -1; 646 | } 647 | 648 | // We need to call this to set up global state for TensorFlow. 649 | tensorflow::port::InitMain(argv[0], &argc, &argv); 650 | if (argc > 1) { 651 | LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; 652 | return -1; 653 | } 654 | 655 | // First we load and initialize the model. 656 | std::unique_ptr session; 657 | string graph_path = tensorflow::io::JoinPath(root_dir, graph); 658 | Status load_graph_status = LoadGraph(graph_path, &session); 659 | if (!load_graph_status.ok()) { 660 | LOG(ERROR) << load_graph_status; 661 | return -1; 662 | } 663 | 664 | //Load image 665 | 666 | cv::Mat frame = cv::imread(image); 667 | 668 | if(!frame.data) 669 | { 670 | std::cerr<<"failed to read image file: "< face_info; 676 | 677 | unsigned long start_time=get_cur_time(); 678 | 679 | mtcnn_detect(session,frame,face_info); 680 | 681 | unsigned long end_time=get_cur_time(); 682 | 683 | int save_chop=0; 684 | 685 | 686 | for(unsigned int i=0;i=12: 315 | scales += [m*np.power(factor, factor_count)] 316 | minl = minl*factor 317 | factor_count += 1 318 | 319 | # first stage 320 | for j in range(len(scales)): 321 | scale=scales[j] 322 | hs=int(np.ceil(h*scale)) 323 | ws=int(np.ceil(w*scale)) 324 | im_data = imresample(img, (hs, ws)) 325 | im_data = (im_data-127.5)*0.0078125 326 | img_x = np.expand_dims(im_data, 0) 327 | img_y = np.transpose(img_x, (0,2,1,3)) 328 | out = pnet(img_y) 329 | out0 = np.transpose(out[0], (0,2,1,3)) 330 | out1 = np.transpose(out[1], (0,2,1,3)) 331 | 332 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 333 | 334 | # inter-scale nms 335 | pick = nms(boxes.copy(), 0.5, 'Union') 336 | if boxes.size>0 and pick.size>0: 337 | boxes = boxes[pick,:] 338 | total_boxes = np.append(total_boxes, boxes, axis=0) 339 | 340 | numbox = total_boxes.shape[0] 341 | if numbox>0: 342 | pick = nms(total_boxes.copy(), 0.7, 'Union') 343 | total_boxes = total_boxes[pick,:] 344 | regw = total_boxes[:,2]-total_boxes[:,0] 345 | regh = total_boxes[:,3]-total_boxes[:,1] 346 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 347 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 348 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 349 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 350 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 351 | total_boxes = rerec(total_boxes.copy()) 352 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 353 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 354 | 355 | numbox = total_boxes.shape[0] 356 | if numbox>0: 357 | # second stage 358 | tempimg = np.zeros((24,24,3,numbox)) 359 | for k in range(0,numbox): 360 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 361 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 362 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 363 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 364 | else: 365 | return np.empty() 366 | tempimg = (tempimg-127.5)*0.0078125 367 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 368 | out = rnet(tempimg1) 369 | out0 = np.transpose(out[0]) 370 | out1 = np.transpose(out[1]) 371 | score = out1[1,:] 372 | ipass = np.where(score>threshold[1]) 373 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 374 | mv = out0[:,ipass[0]] 375 | if total_boxes.shape[0]>0: 376 | pick = nms(total_boxes, 0.7, 'Union') 377 | total_boxes = total_boxes[pick,:] 378 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 379 | total_boxes = rerec(total_boxes.copy()) 380 | 381 | numbox = total_boxes.shape[0] 382 | if numbox>0: 383 | # third stage 384 | total_boxes = np.fix(total_boxes).astype(np.int32) 385 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 386 | tempimg = np.zeros((48,48,3,numbox)) 387 | for k in range(0,numbox): 388 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 389 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 390 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 391 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 392 | else: 393 | return np.empty() 394 | tempimg = (tempimg-127.5)*0.0078125 395 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 396 | out = onet(tempimg1) 397 | out0 = np.transpose(out[0]) 398 | out1 = np.transpose(out[1]) 399 | out2 = np.transpose(out[2]) 400 | score = out2[1,:] 401 | points = out1 402 | ipass = np.where(score>threshold[2]) 403 | points = points[:,ipass[0]] 404 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 405 | mv = out0[:,ipass[0]] 406 | 407 | w = total_boxes[:,2]-total_boxes[:,0]+1 408 | h = total_boxes[:,3]-total_boxes[:,1]+1 409 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 410 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 411 | if total_boxes.shape[0]>0: 412 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 413 | pick = nms(total_boxes.copy(), 0.7, 'Min') 414 | total_boxes = total_boxes[pick,:] 415 | points = points[:,pick] 416 | 417 | return total_boxes, points 418 | 419 | 420 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): 421 | # im: input image 422 | # minsize: minimum of faces' size 423 | # pnet, rnet, onet: caffemodel 424 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] 425 | 426 | all_scales = [None] * len(images) 427 | images_with_boxes = [None] * len(images) 428 | 429 | for i in range(len(images)): 430 | images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} 431 | 432 | # create scale pyramid 433 | for index, img in enumerate(images): 434 | all_scales[index] = [] 435 | h = img.shape[0] 436 | w = img.shape[1] 437 | minsize = int(detection_window_size_ratio * np.minimum(w, h)) 438 | factor_count = 0 439 | minl = np.amin([h, w]) 440 | if minsize <= 12: 441 | minsize = 12 442 | 443 | m = 12.0 / minsize 444 | minl = minl * m 445 | while minl >= 12: 446 | all_scales[index].append(m * np.power(factor, factor_count)) 447 | minl = minl * factor 448 | factor_count += 1 449 | 450 | # # # # # # # # # # # # # 451 | # first stage - fast proposal network (pnet) to obtain face candidates 452 | # # # # # # # # # # # # # 453 | 454 | images_obj_per_resolution = {} 455 | 456 | # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images 457 | 458 | for index, scales in enumerate(all_scales): 459 | h = images[index].shape[0] 460 | w = images[index].shape[1] 461 | 462 | for scale in scales: 463 | hs = int(np.ceil(h * scale)) 464 | ws = int(np.ceil(w * scale)) 465 | 466 | if (ws, hs) not in images_obj_per_resolution: 467 | images_obj_per_resolution[(ws, hs)] = [] 468 | 469 | im_data = imresample(images[index], (hs, ws)) 470 | im_data = (im_data - 127.5) * 0.0078125 471 | img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering 472 | images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) 473 | 474 | for resolution in images_obj_per_resolution: 475 | images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] 476 | outs = pnet(images_per_resolution) 477 | 478 | for index in range(len(outs[0])): 479 | scale = images_obj_per_resolution[resolution][index]['scale'] 480 | image_index = images_obj_per_resolution[resolution][index]['index'] 481 | out0 = np.transpose(outs[0][index], (1, 0, 2)) 482 | out1 = np.transpose(outs[1][index], (1, 0, 2)) 483 | 484 | boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) 485 | 486 | # inter-scale nms 487 | pick = nms(boxes.copy(), 0.5, 'Union') 488 | if boxes.size > 0 and pick.size > 0: 489 | boxes = boxes[pick, :] 490 | images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], 491 | boxes, 492 | axis=0) 493 | 494 | for index, image_obj in enumerate(images_with_boxes): 495 | numbox = image_obj['total_boxes'].shape[0] 496 | if numbox > 0: 497 | h = images[index].shape[0] 498 | w = images[index].shape[1] 499 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') 500 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 501 | regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] 502 | regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] 503 | qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw 504 | qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh 505 | qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw 506 | qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh 507 | image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) 508 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 509 | image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) 510 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 511 | 512 | numbox = image_obj['total_boxes'].shape[0] 513 | tempimg = np.zeros((24, 24, 3, numbox)) 514 | 515 | if numbox > 0: 516 | for k in range(0, numbox): 517 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 518 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 519 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 520 | tempimg[:, :, :, k] = imresample(tmp, (24, 24)) 521 | else: 522 | return np.empty() 523 | 524 | tempimg = (tempimg - 127.5) * 0.0078125 525 | image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 526 | 527 | # # # # # # # # # # # # # 528 | # second stage - refinement of face candidates with rnet 529 | # # # # # # # # # # # # # 530 | 531 | bulk_rnet_input = np.empty((0, 24, 24, 3)) 532 | for index, image_obj in enumerate(images_with_boxes): 533 | if 'rnet_input' in image_obj: 534 | bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) 535 | 536 | out = rnet(bulk_rnet_input) 537 | out0 = np.transpose(out[0]) 538 | out1 = np.transpose(out[1]) 539 | score = out1[1, :] 540 | 541 | i = 0 542 | for index, image_obj in enumerate(images_with_boxes): 543 | if 'rnet_input' not in image_obj: 544 | continue 545 | 546 | rnet_input_count = image_obj['rnet_input'].shape[0] 547 | score_per_image = score[i:i + rnet_input_count] 548 | out0_per_image = out0[:, i:i + rnet_input_count] 549 | 550 | ipass = np.where(score_per_image > threshold[1]) 551 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 552 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 553 | 554 | mv = out0_per_image[:, ipass[0]] 555 | 556 | if image_obj['total_boxes'].shape[0] > 0: 557 | h = images[index].shape[0] 558 | w = images[index].shape[1] 559 | pick = nms(image_obj['total_boxes'], 0.7, 'Union') 560 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 561 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) 562 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 563 | 564 | numbox = image_obj['total_boxes'].shape[0] 565 | 566 | if numbox > 0: 567 | tempimg = np.zeros((48, 48, 3, numbox)) 568 | image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) 569 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 570 | 571 | for k in range(0, numbox): 572 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 573 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 574 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 575 | tempimg[:, :, :, k] = imresample(tmp, (48, 48)) 576 | else: 577 | return np.empty() 578 | tempimg = (tempimg - 127.5) * 0.0078125 579 | image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 580 | 581 | i += rnet_input_count 582 | 583 | # # # # # # # # # # # # # 584 | # third stage - further refinement and facial landmarks positions with onet 585 | # # # # # # # # # # # # # 586 | 587 | bulk_onet_input = np.empty((0, 48, 48, 3)) 588 | for index, image_obj in enumerate(images_with_boxes): 589 | if 'onet_input' in image_obj: 590 | bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) 591 | 592 | out = onet(bulk_onet_input) 593 | 594 | out0 = np.transpose(out[0]) 595 | out1 = np.transpose(out[1]) 596 | out2 = np.transpose(out[2]) 597 | score = out2[1, :] 598 | points = out1 599 | 600 | i = 0 601 | ret = [] 602 | for index, image_obj in enumerate(images_with_boxes): 603 | if 'onet_input' not in image_obj: 604 | ret.append(None) 605 | continue 606 | 607 | onet_input_count = image_obj['onet_input'].shape[0] 608 | 609 | out0_per_image = out0[:, i:i + onet_input_count] 610 | score_per_image = score[i:i + onet_input_count] 611 | points_per_image = points[:, i:i + onet_input_count] 612 | 613 | ipass = np.where(score_per_image > threshold[2]) 614 | points_per_image = points_per_image[:, ipass[0]] 615 | 616 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 617 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 618 | mv = out0_per_image[:, ipass[0]] 619 | 620 | w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 621 | h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 622 | points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( 623 | image_obj['total_boxes'][:, 0], (5, 1)) - 1 624 | points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( 625 | image_obj['total_boxes'][:, 1], (5, 1)) - 1 626 | 627 | if image_obj['total_boxes'].shape[0] > 0: 628 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) 629 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') 630 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 631 | points_per_image = points_per_image[:, pick] 632 | 633 | ret.append((image_obj['total_boxes'], points_per_image)) 634 | else: 635 | ret.append(None) 636 | 637 | i += onet_input_count 638 | 639 | return ret 640 | 641 | 642 | # function [boundingbox] = bbreg(boundingbox,reg) 643 | def bbreg(boundingbox,reg): 644 | # calibrate bounding boxes 645 | if reg.shape[1]==1: 646 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 647 | 648 | w = boundingbox[:,2]-boundingbox[:,0]+1 649 | h = boundingbox[:,3]-boundingbox[:,1]+1 650 | b1 = boundingbox[:,0]+reg[:,0]*w 651 | b2 = boundingbox[:,1]+reg[:,1]*h 652 | b3 = boundingbox[:,2]+reg[:,2]*w 653 | b4 = boundingbox[:,3]+reg[:,3]*h 654 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 655 | return boundingbox 656 | 657 | def generateBoundingBox(imap, reg, scale, t): 658 | # use heatmap to generate bounding boxes 659 | stride=2 660 | cellsize=12 661 | 662 | imap = np.transpose(imap) 663 | dx1 = np.transpose(reg[:,:,0]) 664 | dy1 = np.transpose(reg[:,:,1]) 665 | dx2 = np.transpose(reg[:,:,2]) 666 | dy2 = np.transpose(reg[:,:,3]) 667 | y, x = np.where(imap >= t) 668 | if y.shape[0]==1: 669 | dx1 = np.flipud(dx1) 670 | dy1 = np.flipud(dy1) 671 | dx2 = np.flipud(dx2) 672 | dy2 = np.flipud(dy2) 673 | score = imap[(y,x)] 674 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 675 | if reg.size==0: 676 | reg = np.empty((0,3)) 677 | bb = np.transpose(np.vstack([y,x])) 678 | q1 = np.fix((stride*bb+1)/scale) 679 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 680 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 681 | return boundingbox, reg 682 | 683 | # function pick = nms(boxes,threshold,type) 684 | def nms(boxes, threshold, method): 685 | if boxes.size==0: 686 | return np.empty((0,3)) 687 | x1 = boxes[:,0] 688 | y1 = boxes[:,1] 689 | x2 = boxes[:,2] 690 | y2 = boxes[:,3] 691 | s = boxes[:,4] 692 | area = (x2-x1+1) * (y2-y1+1) 693 | I = np.argsort(s) 694 | pick = np.zeros_like(s, dtype=np.int16) 695 | counter = 0 696 | while I.size>0: 697 | i = I[-1] 698 | pick[counter] = i 699 | counter += 1 700 | idx = I[0:-1] 701 | xx1 = np.maximum(x1[i], x1[idx]) 702 | yy1 = np.maximum(y1[i], y1[idx]) 703 | xx2 = np.minimum(x2[i], x2[idx]) 704 | yy2 = np.minimum(y2[i], y2[idx]) 705 | w = np.maximum(0.0, xx2-xx1+1) 706 | h = np.maximum(0.0, yy2-yy1+1) 707 | inter = w * h 708 | if method is 'Min': 709 | o = inter / np.minimum(area[i], area[idx]) 710 | else: 711 | o = inter / (area[i] + area[idx] - inter) 712 | I = I[np.where(o<=threshold)] 713 | pick = pick[0:counter] 714 | return pick 715 | 716 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 717 | def pad(total_boxes, w, h): 718 | # compute the padding coordinates (pad the bounding boxes to square) 719 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 720 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 721 | numbox = total_boxes.shape[0] 722 | 723 | dx = np.ones((numbox), dtype=np.int32) 724 | dy = np.ones((numbox), dtype=np.int32) 725 | edx = tmpw.copy().astype(np.int32) 726 | edy = tmph.copy().astype(np.int32) 727 | 728 | x = total_boxes[:,0].copy().astype(np.int32) 729 | y = total_boxes[:,1].copy().astype(np.int32) 730 | ex = total_boxes[:,2].copy().astype(np.int32) 731 | ey = total_boxes[:,3].copy().astype(np.int32) 732 | 733 | tmp = np.where(ex>w) 734 | edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 735 | ex[tmp] = w 736 | 737 | tmp = np.where(ey>h) 738 | edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 739 | ey[tmp] = h 740 | 741 | tmp = np.where(x<1) 742 | dx.flat[tmp] = np.expand_dims(2-x[tmp],1) 743 | x[tmp] = 1 744 | 745 | tmp = np.where(y<1) 746 | dy.flat[tmp] = np.expand_dims(2-y[tmp],1) 747 | y[tmp] = 1 748 | 749 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 750 | 751 | # function [bboxA] = rerec(bboxA) 752 | def rerec(bboxA): 753 | # convert bboxA to square 754 | h = bboxA[:,3]-bboxA[:,1] 755 | w = bboxA[:,2]-bboxA[:,0] 756 | l = np.maximum(w, h) 757 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 758 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 759 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 760 | return bboxA 761 | 762 | def imresample(img, sz): 763 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable 764 | return im_data 765 | 766 | # This method is kept for debugging purpose 767 | # h=img.shape[0] 768 | # w=img.shape[1] 769 | # hs, ws = sz 770 | # dx = float(w) / ws 771 | # dy = float(h) / hs 772 | # im_data = np.zeros((hs,ws,3)) 773 | # for a1 in range(0,hs): 774 | # for a2 in range(0,ws): 775 | # for a3 in range(0,3): 776 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 777 | # return im_data 778 | 779 | --------------------------------------------------------------------------------