├── det1.npy
├── det2.npy
├── det3.npy
├── cpp
    ├── standalone
    │   ├── test.jpg
    │   ├── models
    │   │   └── mtcnn_frozen_model.pb
    │   ├── tensorflow_mtcnn.hpp
    │   ├── utils.hpp
    │   ├── Makefile
    │   ├── comm_lib.hpp
    │   ├── README.md
    │   ├── utils.cpp
    │   ├── mtcnn.hpp
    │   ├── camera.cpp
    │   ├── test.cpp
    │   ├── comm_lib.cpp
    │   └── tensorflow_mtcnn.cpp
    └── tf_embedded
    │   ├── test.jpg
    │   ├── models
    │       └── mtcnn_frozen_model.pb
    │   ├── utils.hpp
    │   ├── comm_lib.hpp
    │   ├── README.md
    │   ├── BUILD
    │   ├── utils.cpp
    │   ├── mtcnn.hpp
    │   ├── comm_lib.cpp
    │   └── main.cc
├── LICENSE
├── README.md
├── facedetect_mtcnn.py
└── detect_face.py


/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/det1.npy


--------------------------------------------------------------------------------
/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/det2.npy


--------------------------------------------------------------------------------
/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/det3.npy


--------------------------------------------------------------------------------
/cpp/standalone/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/standalone/test.jpg


--------------------------------------------------------------------------------
/cpp/tf_embedded/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/tf_embedded/test.jpg


--------------------------------------------------------------------------------
/cpp/standalone/models/mtcnn_frozen_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/standalone/models/mtcnn_frozen_model.pb


--------------------------------------------------------------------------------
/cpp/tf_embedded/models/mtcnn_frozen_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberfire/tensorflow-mtcnn/HEAD/cpp/tf_embedded/models/mtcnn_frozen_model.pb


--------------------------------------------------------------------------------
/cpp/standalone/tensorflow_mtcnn.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __TENSORFLOW_MTCNN_HPP__
 2 | #define __TENSORFLOW_MTCNN_HPP_
 3 | 
 4 | #include "tensorflow/c/c_api.h"
 5 | #include <opencv2/opencv.hpp>
 6 | #include "mtcnn.hpp"
 7 | 
 8 | void mtcnn_detect(TF_Session* sess, TF_Graph * graph, cv::Mat& img, std::vector<face_box>& face_list);
 9 | 
10 | TF_Session * load_graph(const char * frozen_fname, TF_Graph** p_graph);
11 | 
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/cpp/standalone/utils.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __MTCNN_UTILS_HPP__
 2 | #define __MTCNN_UTILS_HPP__
 3 | 
 4 | /* get current time: in us */
 5 | unsigned long get_cur_time(void);
 6 | 
 7 | /* 
 8 |    for debug purpose, to save a image or float vector to file.
 9 |    the image should be in cv::Mat.
10 |    To avoid OpenCV header file dependency, use void * instead of cv::Mat *
11 | */
12 | 
13 | 
14 | void save_img(const char * name,void * p_img );  
15 | 
16 | void save_float(const char * name, const float * data, int size);
17 | 
18 | 
19 | #endif
20 | 
21 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/utils.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __MTCNN_UTILS_HPP__
 2 | #define __MTCNN_UTILS_HPP__
 3 | 
 4 | /* get current time: in us */
 5 | unsigned long get_cur_time(void);
 6 | 
 7 | /* 
 8 |    for debug purpose, to save a image or float vector to file.
 9 |    the image should be in cv::Mat.
10 |    To avoid OpenCV header file dependency, use void * instead of cv::Mat *
11 | */
12 | 
13 | 
14 | void save_img(const char * name,void * p_img );  
15 | 
16 | void save_float(const char * name, const float * data, int size);
17 | 
18 | 
19 | #endif
20 | 
21 | 


--------------------------------------------------------------------------------
/cpp/standalone/Makefile:
--------------------------------------------------------------------------------
 1 | TF_ROOT=../tf_dist
 2 | 
 3 | BIN_SRCS=test.cpp  camera.cpp
 4 | COMM_SRCS+=tensorflow_mtcnn.cpp comm_lib.cpp  utils.cpp
 5 | 
 6 | 
 7 | 
 8 | #
 9 | # opencv settings
10 | #
11 | 
12 | OPENCV_LDFLAGS=$(shell pkg-config --libs opencv)
13 | OPENCV_CFLAGS=$(shell pkg-config --cflags opencv)
14 | 
15 | 
16 | CXXFLAGS += $(OPENCV_CFLAGS) 
17 | LIBS += $(OPENCV_LDFLAGS)
18 | 
19 | #
20 | # tensorflow ettings
21 | #
22 | 
23 | 
24 | TF_INCS += -I$(TF_ROOT)/include
25 | CXXFLAGS += $(TF_INCS) 
26 | LIBS+=-Wl,-rpath,$(TF_ROOT)/lib -L$(TF_ROOT)/lib -ltensorflow
27 | 
28 | 
29 | CXX := g++
30 | CXXFLAGS+= -Wall  -ggdb -std=c++11 
31 | 
32 | 
33 | COMM_OBJS=$(COMM_SRCS:.cpp=.o)
34 | BIN_OBJS=$(BIN_SRCS:.cpp=.o)
35 | BIN_EXES=$(BIN_SRCS:.cpp=)
36 | 
37 | 
38 | default : $(BIN_EXES)
39 | 
40 | $(BIN_EXES) : $(COMM_OBJS)
41 | 
42 | $(BIN_EXES):%:%.o
43 | 
44 | 
45 | %:%.o
46 | 	$(CXX) $< -o $@ $(LDFLAGS) $(COMM_OBJS) $(LIBS)
47 | 
48 | %.o : %.cpp 
49 | 	$(CXX) $(CXXFLAGS) -c $< -o $@ 
50 | 
51 | clean:
52 | 	rm -f $(BIN_EXES) *.o
53 | 
54 | .PHONY : all clean
55 | 
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 cyberfire
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/cpp/standalone/comm_lib.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __COMMON_LIB_HPP__
 2 | #define __COMMON_LIB_HPP__
 3 | 
 4 | #define NMS_UNION 1
 5 | #define NMS_MIN  2
 6 | 
 7 | 
 8 | 
 9 | struct scale_window
10 | {
11 | 	int h;
12 | 	int w;
13 | 	float scale;
14 | };
15 | 
16 | int numpy_round(float f);
17 | 
18 | void nms_boxes(std::vector<face_box>& input, float threshold, int type, std::vector<face_box>&output);
19 | 
20 | void regress_boxes(std::vector<face_box>& rects);
21 | 
22 | void square_boxes(std::vector<face_box>& rects);
23 | 
24 | void padding(int img_h, int img_w, std::vector<face_box>& rects);
25 | 
26 | void process_boxes(std::vector<face_box>& input, int img_h, int img_w, std::vector<face_box>& rects);
27 | 
28 | void generate_bounding_box(const float * confidence_data, int confidence_size,
29 |                const float * reg_data, float scale, float threshold,
30 |                int feature_h, int feature_w, std::vector<face_box>&  output, bool transposed);
31 | 
32 | 
33 | void set_input_buffer(std::vector<cv::Mat>& input_channels,
34 | 		float* input_data, const int height, const int width);
35 | 
36 | 
37 | void  cal_pyramid_list(int height, int width, int min_size, float factor,std::vector<scale_window>& list);
38 | 
39 | void cal_landmark(std::vector<face_box>& box_list);
40 | 
41 | void set_box_bound(std::vector<face_box>& box_list, int img_h, int img_w);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/comm_lib.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __COMMON_LIB_HPP__
 2 | #define __COMMON_LIB_HPP__
 3 | 
 4 | #define NMS_UNION 1
 5 | #define NMS_MIN  2
 6 | 
 7 | 
 8 | 
 9 | struct scale_window
10 | {
11 | 	int h;
12 | 	int w;
13 | 	float scale;
14 | };
15 | 
16 | int numpy_round(float f);
17 | 
18 | void nms_boxes(std::vector<face_box>& input, float threshold, int type, std::vector<face_box>&output);
19 | 
20 | void regress_boxes(std::vector<face_box>& rects);
21 | 
22 | void square_boxes(std::vector<face_box>& rects);
23 | 
24 | void padding(int img_h, int img_w, std::vector<face_box>& rects);
25 | 
26 | void process_boxes(std::vector<face_box>& input, int img_h, int img_w, std::vector<face_box>& rects);
27 | 
28 | void generate_bounding_box(const float * confidence_data, int confidence_size,
29 |                const float * reg_data, float scale, float threshold,
30 |                int feature_h, int feature_w, std::vector<face_box>&  output, bool transposed);
31 | 
32 | 
33 | void set_input_buffer(std::vector<cv::Mat>& input_channels,
34 | 		float* input_data, const int height, const int width);
35 | 
36 | 
37 | void  cal_pyramid_list(int height, int width, int min_size, float factor,std::vector<scale_window>& list);
38 | 
39 | void cal_landmark(std::vector<face_box>& box_list);
40 | 
41 | void set_box_bound(std::vector<face_box>& box_list, int img_h, int img_w);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/README.md:
--------------------------------------------------------------------------------
 1 | # MTCNN Tensorflow C++ Implementation
 2 | 
 3 | This is a project to implement MTCNN, a perfect face detect algorithm, based on Tensorflow
 4 | 
 5 | This version is an exmaple of inside tensorflow code repository and use the bazel to build
 6 | 
 7 | The example outside the  will be developped  soon.
 8 | 
 9 | 
10 | Here is to the trick to reuse pre-allocated buffer or tensor:
11 | 
12 | In order to create a tensor with pre-allocated buffer, a friend class of Tensor has to been defined.
13 | 
14 | Since TensorCApi is defined for C API usage, only used in libtensorflow.so, we can re-define this class in our file safely. 
15 | 
16 | 
17 | # Build & Run
18 | 
19 | 1 copy the mtcnn directory to tensorflow/exmaple
20 | 
21 | 2 cd tensorflow/example/mtcnn
22 | 
23 | 3 bazel build //tensorflow/example/mtcnn 
24 | 
25 | 4 run: 
26 | 
27 | bazel-bin/tensorflow/examples/mtcnn/mtcnn  --image=photo_fname --graph=./tensorflow/examples/mtcnn/models/mtcnn_frozen_model.pb
28 | 
29 | 
30 | 
31 | 
32 | # Credit
33 | 
34 | ### MTCNN algorithm
35 | 
36 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
37 | 
38 | ### MTCNN C++ on Caffe
39 | 
40 | https://github.com/wowo200/MTCNN
41 | 
42 | ### MTCNN python on Tensorflow 
43 | 
44 | FaceNet uses MTCNN to align face
45 | 
46 | https://github.com/davidsandberg/facenet
47 | From this directory:
48 |   facenet/src/align
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tensorflow-mtcnn
 2 | 
 3 | MTCNN is one of the best face detection algorithms.
 4 | Here is inference only for MTCNN face detector on Tensorflow, which is based on davidsandberg's facenet project, include the python version and C++ version.
 5 | 
 6 | ## C++
 7 | 
 8 | There are two version for C++.
 9 | 
10 | One is to be build inside tensorflow code repository, so that it needs to be copied to the directory tensorflow/example.
11 | please check cpp/tf_embedded/README.md for details.
12 | 
13 | The other is the standalone one, just needs libtensorflow.so and c_api.h to build and run.
14 | Please check cpp/standalone/README.md for more details
15 | 
16 | ## Python Run
17 | 1. install tensorflow first, please refers to https://www.tensorflow.org/install
18 | 2. install python packages: opencv, numpy
19 | 3. python ./facedetect_mtcnn.py --input input.jpg --output  new.jpg
20 | 
21 | ## Build tensorflow on arm64 board
22 | 
23 | Please check out the guide [how to build tensorflow on firefly](https://cyberfire.github.io/tensorflow/rk3399/howto%20build%20tensorflow%20on%20firefly.md)
24 | 
25 | ## Credit
26 | 
27 | ### MTCNN algorithm
28 | 
29 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
30 | 
31 | ### MTCNN C++ on Caffe
32 | 
33 | https://github.com/wowo200/MTCNN
34 | 
35 | ### MTCNN python on Tensorflow 
36 | 
37 | FaceNet uses MTCNN to align face
38 | 
39 | https://github.com/davidsandberg/facenet
40 | From this directory:
41 |   facenet/src/align
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/cpp/standalone/README.md:
--------------------------------------------------------------------------------
 1 | # MTCNN Tensorflow C++ Implementation
 2 | 
 3 | This is a project to implement MTCNN, a perfect face detect algorithm, based on Tensorflow
 4 | 
 5 | This version is an exmaple of outside tensorflow code repository and call tensorflow service by libtensorflow.so
 6 | 
 7 | This is an example of how to tensorflow C API
 8 | 
 9 | # Build & Run
10 | 
11 | 1 build tensorflow library distribution by following command in tensorflow directory
12 |    
13 |     bazel build --config=opt //tensorflow/tools/lib_package:libtensorflow
14 | 
15 |   the tarball, bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz, includes the libtensorflow.so and c header files 
16 |   
17 | 2 edit Makefile, set TENSORFLOW_ROOT to the correct path in your machine
18 | 
19 | 3  make
20 |    two demoes will be created: "test" to check single photo while "camera" to do live face detection.
21 | 
22 |  
23 | 4 run: 
24 |    arguments for test:
25 | 
26 |    test [ -i input_image ] [ -o output_image]  [ -m model_fname] [-s]
27 | 
28 |          -s     save the detected face into .jpg file  
29 | 
30 |    
31 | 
32 | 
33 | 
34 | # Credit
35 | 
36 | ### MTCNN algorithm
37 | 
38 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
39 | 
40 | ### MTCNN C++ on Caffe
41 | 
42 | https://github.com/wowo200/MTCNN
43 | 
44 | ### MTCNN python on Tensorflow 
45 | 
46 | FaceNet uses MTCNN to align face
47 | 
48 | https://github.com/davidsandberg/facenet
49 | From this directory:
50 |   facenet/src/align
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/BUILD:
--------------------------------------------------------------------------------
 1 | # Description:
 2 | #   TensorFlow C++ inference example for labeling images.
 3 | 
 4 | package(default_visibility = ["//tensorflow:internal"])
 5 | 
 6 | licenses(["notice"])  # Apache 2.0
 7 | 
 8 | exports_files(["LICENSE"])
 9 | 
10 | cc_binary(
11 |     name = "mtcnn",
12 |     srcs = [
13 |         "main.cc",
14 |         "comm_lib.cpp",
15 |         "comm_lib.hpp",
16 |         "mtcnn.hpp",
17 |         "utils.cpp",
18 |         "utils.hpp"
19 |     ],
20 | 
21 |     copts = [
22 |          "-I/usr/local/include/opencv",
23 |          " -I/usr/local/include",
24 |          "-Wno-sign-compare",
25 |          "-Wall",
26 |          "-g",
27 |          "-O0",
28 |           ],
29 | 
30 |     linkopts = 
31 |         ["-lm",
32 |          "-L/usr/local/lib -lopencv_shape -lopencv_stitching -lopencv_objdetect -lopencv_superres -lopencv_videostab -lopencv_calib3d -lopencv_features2d -lopencv_highgui -lopencv_videoio -lopencv_imgcodecs -lopencv_video -lopencv_photo -lopencv_ml -lopencv_imgproc -lopencv_flann -lopencv_core",
33 |          "-g",
34 |         ],
35 |     deps = 
36 |         [
37 |             "//tensorflow/cc:cc_ops",
38 |             "//tensorflow/core:framework_internal",
39 |             "//tensorflow/core:tensorflow",
40 |         ],
41 | 
42 | )
43 | 
44 | filegroup(
45 |     name = "all_files",
46 |     srcs = glob(
47 |         ["**/*"],
48 |         exclude = [
49 |             "**/METADATA",
50 |             "**/OWNERS",
51 |             "bin/**",
52 |             "gen/**",
53 |         ],
54 |     ),
55 |     visibility = ["//tensorflow:__subpackages__"],
56 | )
57 | 


--------------------------------------------------------------------------------
/cpp/standalone/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <vector>
 3 | #include <iostream>
 4 | #include <iomanip>
 5 | #include <fstream>
 6 | 
 7 | #include <opencv2/opencv.hpp>
 8 | 
 9 | #include <sys/time.h>
10 | 
11 | 
12 | unsigned long get_cur_time(void)
13 | {
14 | 	struct timeval tv;
15 | 	unsigned long ts;
16 | 
17 | 	gettimeofday(&tv,NULL);
18 | 
19 | 	ts=tv.tv_sec*1000000+tv.tv_usec;
20 | 
21 | 	return ts;
22 | }
23 | 
24 | void save_float(const char * name, const float * data, int size)
25 | {
26 | 	char fname[128];
27 | 
28 | 	sprintf(fname,"%s",name);
29 | 
30 | 	std::cout<<"save data to "<<fname<<"   size " <<size<<std::endl;
31 | 	std::ofstream of;
32 | 	of.open(fname);
33 | 
34 | 	for(int i=0;i<size;i++)
35 |         {
36 | 		of<<std::setprecision(6)<<data[i]<<","<<std::endl;
37 |         }
38 | 
39 | 	of.close();
40 | }
41 | 
42 | 
43 | void save_img(const char * name, void * p_img)
44 | {
45 | 	const cv::Mat& img= *(cv::Mat *)p_img;
46 | 	int row=img.rows;
47 | 	int col=img.cols;
48 | 	int chan=img.channels();
49 | 
50 | 	int sz=row*col*chan;
51 | 	char fname[128];
52 | 
53 | 	int data;
54 | 
55 | 	sprintf(fname,"%s",name);
56 | 
57 | 	std::cout<<"save data to "<<fname<<"   size " <<sz<<std::endl;
58 | 	std::ofstream of;
59 | 	of.open(fname);
60 | 
61 | 
62 | 	col=col*chan;
63 | 
64 | 	if(img.isContinuous())
65 | 	{
66 | 		col=col*row;
67 | 		row=1;
68 | 	}
69 | 
70 | 	for(int i=0;i<row;i++)
71 | 	{
72 | 		const unsigned char  * p=img.ptr<unsigned char >(i);
73 | 
74 | 		for(int j=0;j<col;j++)
75 | 		{
76 | 			data=p[j];
77 | 
78 | 			of<<data<<","<<std::endl;
79 | 		}
80 | 	}
81 | 
82 | 	of.close();
83 | }
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <vector>
 3 | #include <iostream>
 4 | #include <iomanip>
 5 | #include <fstream>
 6 | 
 7 | #include <opencv2/opencv.hpp>
 8 | 
 9 | #include <sys/time.h>
10 | 
11 | 
12 | unsigned long get_cur_time(void)
13 | {
14 | 	struct timeval tv;
15 | 	unsigned long ts;
16 | 
17 | 	gettimeofday(&tv,NULL);
18 | 
19 | 	ts=tv.tv_sec*1000000+tv.tv_usec;
20 | 
21 | 	return ts;
22 | }
23 | 
24 | void save_float(const char * name, const float * data, int size)
25 | {
26 | 	char fname[128];
27 | 
28 | 	sprintf(fname,"%s",name);
29 | 
30 | 	std::cout<<"save data to "<<fname<<"   size " <<size<<std::endl;
31 | 	std::ofstream of;
32 | 	of.open(fname);
33 | 
34 | 	for(int i=0;i<size;i++)
35 |         {
36 | 		of<<std::setprecision(6)<<data[i]<<","<<std::endl;
37 |         }
38 | 
39 | 	of.close();
40 | }
41 | 
42 | 
43 | void save_img(const char * name, void * p_img)
44 | {
45 | 	const cv::Mat& img= *(cv::Mat *)p_img;
46 | 	int row=img.rows;
47 | 	int col=img.cols;
48 | 	int chan=img.channels();
49 | 
50 | 	int sz=row*col*chan;
51 | 	char fname[128];
52 | 
53 | 	int data;
54 | 
55 | 	sprintf(fname,"%s",name);
56 | 
57 | 	std::cout<<"save data to "<<fname<<"   size " <<sz<<std::endl;
58 | 	std::ofstream of;
59 | 	of.open(fname);
60 | 
61 | 
62 | 	col=col*chan;
63 | 
64 | 	if(img.isContinuous())
65 | 	{
66 | 		col=col*row;
67 | 		row=1;
68 | 	}
69 | 
70 | 	for(int i=0;i<row;i++)
71 | 	{
72 | 		const unsigned char  * p=img.ptr<unsigned char >(i);
73 | 
74 | 		for(int j=0;j<col;j++)
75 | 		{
76 | 			data=p[j];
77 | 
78 | 			of<<data<<","<<std::endl;
79 | 		}
80 | 	}
81 | 
82 | 	of.close();
83 | }
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/cpp/standalone/mtcnn.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef __MTCNN_HPP__
  2 | #define __MTCNN_HPP__
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | 
  7 | #include <opencv2/opencv.hpp>
  8 | 
  9 | 
 10 | struct face_landmark
 11 | {
 12 | 	float x[5];
 13 | 	float y[5];
 14 | };
 15 | 
 16 | struct face_box
 17 | {
 18 | 	float x0;
 19 | 	float y0;
 20 | 	float x1;
 21 | 	float y1;
 22 | 
 23 | 	/* confidence score */
 24 | 	float score;
 25 | 
 26 | 	/*regression scale */
 27 | 
 28 | 	float regress[4];
 29 | 
 30 | 	/* padding stuff*/
 31 | 	float px0;
 32 | 	float py0;
 33 | 	float px1;
 34 | 	float py1;
 35 | 
 36 | 	face_landmark landmark;  
 37 | };
 38 | 
 39 | 
 40 | 
 41 | class mtcnn {
 42 | 	public:
 43 | 		mtcnn(void){
 44 | 			min_size_=40;
 45 | 			pnet_threshold_=0.6;
 46 | 			rnet_threshold_=0.7;
 47 | 			onet_threshold_=0.9;
 48 | 			factor_=0.709;
 49 | 
 50 | 		}
 51 | 
 52 | 		void set_threshold(float p, float r, float o)
 53 | 		{
 54 | 			pnet_threshold_=p;
 55 | 			rnet_threshold_=r;
 56 | 			onet_threshold_=o;
 57 | 		}
 58 | 
 59 | 		void set_factor_min_size(float factor, float min_size)
 60 | 		{
 61 | 			factor_=factor;
 62 | 			min_size_=min_size;   
 63 | 		}
 64 | 
 65 | 
 66 | 		virtual int load_model(const std::string& model_dir)=0;
 67 | 		virtual void detect(cv::Mat& img, std::vector<face_box>& face_list)=0;
 68 | 		virtual ~mtcnn(void){};
 69 | 
 70 | 	protected:
 71 | 
 72 | 		int min_size_;
 73 | 		float pnet_threshold_;
 74 | 		float rnet_threshold_;
 75 | 		float onet_threshold_;
 76 | 		float factor_;	 
 77 | };
 78 | 
 79 | /* factory part */
 80 | 
 81 | class mtcnn_factory
 82 | {
 83 | 	public:
 84 | 
 85 | 		typedef mtcnn * (*creator)(void);
 86 | 
 87 | 		static void register_creator(const std::string& name, creator& create_func);
 88 | 		static mtcnn * create_detector(const std::string& name);
 89 |                 static std::vector<std::string> list(void);
 90 | 
 91 | 	private:
 92 | 		mtcnn_factory(){};
 93 | 
 94 | 
 95 | };
 96 | 
 97 | class  only_for_auto_register
 98 | {
 99 | 	public:
100 | 		only_for_auto_register(std::string name, mtcnn_factory::creator func)
101 | 		{
102 | 			mtcnn_factory::register_creator(name,func);
103 | 		}
104 | 
105 | };
106 | 
107 | #define REGISTER_MTCNN_CREATOR(name,func) \
108 | 	static  only_for_auto_register dummy_mtcnn_creator_## name (#name, func)
109 | 
110 | #endif
111 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/mtcnn.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef __MTCNN_HPP__
  2 | #define __MTCNN_HPP__
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | 
  7 | #include <opencv2/opencv.hpp>
  8 | 
  9 | 
 10 | struct face_landmark
 11 | {
 12 | 	float x[5];
 13 | 	float y[5];
 14 | };
 15 | 
 16 | struct face_box
 17 | {
 18 | 	float x0;
 19 | 	float y0;
 20 | 	float x1;
 21 | 	float y1;
 22 | 
 23 | 	/* confidence score */
 24 | 	float score;
 25 | 
 26 | 	/*regression scale */
 27 | 
 28 | 	float regress[4];
 29 | 
 30 | 	/* padding stuff*/
 31 | 	float px0;
 32 | 	float py0;
 33 | 	float px1;
 34 | 	float py1;
 35 | 
 36 | 	face_landmark landmark;  
 37 | };
 38 | 
 39 | 
 40 | 
 41 | class mtcnn {
 42 | 	public:
 43 | 		mtcnn(void){
 44 | 			min_size_=40;
 45 | 			pnet_threshold_=0.6;
 46 | 			rnet_threshold_=0.7;
 47 | 			onet_threshold_=0.9;
 48 | 			factor_=0.709;
 49 | 
 50 | 		}
 51 | 
 52 | 		void set_threshold(float p, float r, float o)
 53 | 		{
 54 | 			pnet_threshold_=p;
 55 | 			rnet_threshold_=r;
 56 | 			onet_threshold_=o;
 57 | 		}
 58 | 
 59 | 		void set_factor_min_size(float factor, float min_size)
 60 | 		{
 61 | 			factor_=factor;
 62 | 			min_size_=min_size;   
 63 | 		}
 64 | 
 65 | 
 66 | 		virtual int load_model(const std::string& model_dir)=0;
 67 | 		virtual void detect(cv::Mat& img, std::vector<face_box>& face_list)=0;
 68 | 		virtual ~mtcnn(void){};
 69 | 
 70 | 	protected:
 71 | 
 72 | 		int min_size_;
 73 | 		float pnet_threshold_;
 74 | 		float rnet_threshold_;
 75 | 		float onet_threshold_;
 76 | 		float factor_;	 
 77 | };
 78 | 
 79 | /* factory part */
 80 | 
 81 | class mtcnn_factory
 82 | {
 83 | 	public:
 84 | 
 85 | 		typedef mtcnn * (*creator)(void);
 86 | 
 87 | 		static void register_creator(const std::string& name, creator& create_func);
 88 | 		static mtcnn * create_detector(const std::string& name);
 89 |                 static std::vector<std::string> list(void);
 90 | 
 91 | 	private:
 92 | 		mtcnn_factory(){};
 93 | 
 94 | 
 95 | };
 96 | 
 97 | class  only_for_auto_register
 98 | {
 99 | 	public:
100 | 		only_for_auto_register(std::string name, mtcnn_factory::creator func)
101 | 		{
102 | 			mtcnn_factory::register_creator(name,func);
103 | 		}
104 | 
105 | };
106 | 
107 | #define REGISTER_MTCNN_CREATOR(name,func) \
108 | 	static  only_for_auto_register dummy_mtcnn_creator_## name (#name, func)
109 | 
110 | #endif
111 | 


--------------------------------------------------------------------------------
/facedetect_mtcnn.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | # 
 3 | # Copyright (c) 2016 David Sandberg
 4 | # 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | # 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | # 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | #
24 | # Borrowed from davidsandberg's facenet project: https://github.com/davidsandberg/facenet
25 | # From this directory:
26 | #   facenet/src/align
27 | #
28 | # Just keep the MTCNN related stuff and removed other codes
29 | # python package required:
30 | #     tensorflow, opencv,numpy
31 | 
32 | 
33 | from __future__ import absolute_import
34 | from __future__ import division
35 | from __future__ import print_function
36 | 
37 | import sys
38 | import os
39 | import argparse
40 | import tensorflow as tf
41 | import numpy as np
42 | import detect_face
43 | import cv2
44 | 
45 | def main(args):
46 |     
47 |     sess = tf.Session()
48 |     pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
49 |     
50 |     minsize = 40 # minimum size of face
51 |     threshold = [ 0.6, 0.7, 0.9 ]  # three steps's threshold
52 |     factor = 0.709 # scale factor
53 | 
54 |     
55 |     filename =args.input 
56 |     output_filename =args.output
57 | 
58 | 
59 |     draw = cv2.imread(filename)
60 | 
61 |     img=cv2.cvtColor(draw,cv2.COLOR_BGR2RGB)
62 |     
63 |     bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
64 | 
65 |     nrof_faces = bounding_boxes.shape[0]
66 | 
67 | 
68 |     for b in bounding_boxes:
69 |         cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0))
70 |         print(b)
71 | 
72 | 
73 | 
74 |     for p in points.T:
75 |         for i in range(5):
76 |             cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2)
77 | 
78 |     cv2.imwrite(output_filename,draw)
79 |                             
80 |     print('Total %d face(s) detected, saved in %s' % (nrof_faces,output_filename))
81 |             
82 | 
83 | def parse_arguments(argv):
84 |     parser = argparse.ArgumentParser()
85 |     parser.add_argument('--input', type=str, help='image to be detected for faces.',default='./test.jpg')
86 |     parser.add_argument('--output', type=str, help='new image with boxed faces',default='new.jpg')
87 |     return parser.parse_args(argv)
88 | 
89 | if __name__ == '__main__':
90 |     main(parse_arguments(sys.argv[1:]))
91 | 


--------------------------------------------------------------------------------
/cpp/standalone/camera.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 |    Licensed under the Apache License, Version 2.0 (the "License");
  4 |    you may not use this file except in compliance with the License.
  5 |    You may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | // A minimal but useful C++ example showing how to load an Imagenet-style object
 17 | // recognition TensorFlow model, prepare input images for it, run them through
 18 | // the graph, and interpret the results.
 19 | //
 20 | // It's designed to have as few dependencies and be as clear as possible, so
 21 | // it's more verbose than it could be in production code. In particular, using
 22 | // auto for the types of a lot of the returned values from TensorFlow calls can
 23 | // remove a lot of boilerplate, but I find the explicit types useful in sample
 24 | // code to make it simple to look up the classes involved.
 25 | //
 26 | // To use it, compile and then run in a working directory with the
 27 | // learning/brain/tutorials/label_image/data/ folder below it, and you should
 28 | // see the top five labels for the example Lena image output. You can then
 29 | // customize it to use your own models or images by changing the file names at
 30 | // the top of the main() function.
 31 | //
 32 | // The googlenet_graph.pb file included by default is created from Inception.
 33 | //
 34 | // Note that, for GIF inputs, to reuse existing code, only single-frame ones
 35 | // are supported.
 36 | 
 37 | #include <fstream>
 38 | #include <utility>
 39 | #include <vector>
 40 | 
 41 | 
 42 | #include "tensorflow/c/c_api.h"
 43 | #include "tensorflow_mtcnn.hpp"
 44 | #include "mtcnn.hpp"
 45 | #include "comm_lib.hpp"
 46 | #include "utils.hpp"
 47 | 
 48 | using std::string;
 49 | 
 50 | int main(int argc, char* argv[]) 
 51 | {
 52 | 	string model_fname ="./models/mtcnn_frozen_model.pb";
 53 | 
 54 | 	cv::VideoCapture camera;
 55 | 
 56 | 	camera.open(0);
 57 | 
 58 | 	if(!camera.isOpened())
 59 | 	{
 60 | 		std::cerr<<"failed to open camera"<<std::endl;
 61 | 		return 1;
 62 | 	}
 63 | 
 64 | 
 65 | 	TF_Session * sess;
 66 | 	TF_Graph * graph;
 67 | 
 68 | 
 69 | 	sess=load_graph(model_fname.c_str(),&graph);
 70 | 
 71 | 	if(sess==nullptr)
 72 | 		return 1;
 73 | 
 74 | 
 75 | 	cv::Mat frame;
 76 | 
 77 | 
 78 | 	while(1)
 79 | 	{
 80 | 
 81 | 		camera.read(frame);
 82 | 
 83 | 		std::vector<face_box> face_info;
 84 | 
 85 | 		unsigned long start_time=get_cur_time();
 86 | 
 87 | 		mtcnn_detect(sess,graph,frame,face_info);
 88 | 
 89 | 		unsigned long end_time=get_cur_time();
 90 | 
 91 | 
 92 | 
 93 | 		for(unsigned int i=0;i<face_info.size();i++)
 94 | 		{
 95 | 			face_box& box=face_info[i];
 96 | 
 97 | 			/*draw box */
 98 | 
 99 | 			cv::rectangle(frame, cv::Point(box.x0, box.y0), cv::Point(box.x1, box.y1), cv::Scalar(0, 255, 0), 1);
100 | 
101 | 
102 | 			/* draw landmark */
103 | 
104 | 			for(int l=0;l<5;l++)
105 | 			{
106 | 				cv::circle(frame,cv::Point(box.landmark.x[l],box.landmark.y[l]),1,cv::Scalar(0, 0, 255),2);
107 | 
108 | 			}
109 | 		}
110 | 
111 | 		std::cout<<"total detected: "<<face_info.size()<<" faces. used "<<(end_time-start_time)<<" us"<<std::endl;
112 | 
113 | 		cv::imshow("camera",frame);
114 | 
115 | 		cv::waitKey(1000);
116 | 	}
117 | 
118 | 	TF_Status* s = TF_NewStatus();
119 | 
120 | 	TF_CloseSession(sess,s);
121 | 	TF_DeleteSession(sess,s);
122 | 	TF_DeleteGraph(graph);
123 | 	TF_DeleteStatus(s);
124 | 
125 | 	return 0;
126 | }
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/cpp/standalone/test.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 |    Licensed under the Apache License, Version 2.0 (the "License");
  4 |    you may not use this file except in compliance with the License.
  5 |    You may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | // A minimal but useful C++ example showing how to load an Imagenet-style object
 17 | // recognition TensorFlow model, prepare input images for it, run them through
 18 | // the graph, and interpret the results.
 19 | //
 20 | // It's designed to have as few dependencies and be as clear as possible, so
 21 | // it's more verbose than it could be in production code. In particular, using
 22 | // auto for the types of a lot of the returned values from TensorFlow calls can
 23 | // remove a lot of boilerplate, but I find the explicit types useful in sample
 24 | // code to make it simple to look up the classes involved.
 25 | //
 26 | // To use it, compile and then run in a working directory with the
 27 | // learning/brain/tutorials/label_image/data/ folder below it, and you should
 28 | // see the top five labels for the example Lena image output. You can then
 29 | // customize it to use your own models or images by changing the file names at
 30 | // the top of the main() function.
 31 | //
 32 | // The googlenet_graph.pb file included by default is created from Inception.
 33 | //
 34 | // Note that, for GIF inputs, to reuse existing code, only single-frame ones
 35 | // are supported.
 36 | 
 37 | #include <fstream>
 38 | #include <utility>
 39 | #include <vector>
 40 | 
 41 | 
 42 | #include "tensorflow/c/c_api.h"
 43 | #include "tensorflow_mtcnn.hpp"
 44 | #include "mtcnn.hpp"
 45 | #include "comm_lib.hpp"
 46 | #include "utils.hpp"
 47 | 
 48 | #include <unistd.h>
 49 | 
 50 | using std::string;
 51 | 
 52 | int main(int argc, char* argv[]) 
 53 | {
 54 | 	string image = "./test.jpg";
 55 | 	string model_fname = "./models/mtcnn_frozen_model.pb";
 56 | 	string output_fname="./new.jpg";
 57 | 	int save_chop=0;
 58 |         int res;
 59 | 
 60 | 
 61 |         while((res=getopt(argc,argv,"i:o:m:s"))!=-1)
 62 |         {
 63 |                 switch(res)
 64 |                 {
 65 |                         case 'i':
 66 |                                 image=optarg;
 67 |                                 break;
 68 |                         case 'o':
 69 |                                 output_fname=optarg;
 70 |                                 break;
 71 |                         case 's':
 72 |                                 save_chop=1;
 73 |                                 break;
 74 |                         case 'm':
 75 |                                 model_fname=optarg;
 76 |                                 break;
 77 |                         default:
 78 |                                 break;
 79 |                 }
 80 |         }
 81 | 
 82 | 	TF_Session * sess;
 83 | 	TF_Graph * graph;
 84 | 
 85 | 
 86 | 	sess=load_graph(model_fname.c_str(),&graph);
 87 | 
 88 | 	if(sess==nullptr)
 89 | 		return 1;
 90 | 
 91 | 	//Load image
 92 | 
 93 | 	cv::Mat frame = cv::imread(image);
 94 | 
 95 | 	if(!frame.data)
 96 | 	{
 97 | 		std::cerr<<"failed to read image file: "<<image<<std::endl;
 98 | 		return 1;
 99 | 	}
100 | 
101 | 
102 | 	std::vector<face_box> face_info;
103 | 
104 | 	unsigned long start_time=get_cur_time();
105 | 
106 | 	mtcnn_detect(sess,graph,frame,face_info);
107 | 
108 | 	unsigned long end_time=get_cur_time();
109 | 
110 | 
111 | 
112 | 	for(unsigned int i=0;i<face_info.size();i++)
113 | 	{
114 | 		face_box& box=face_info[i];
115 | 
116 | 		printf("face %d: x0,y0 %2.5f %2.5f  x1,y1 %2.5f  %2.5f conf: %2.5f\n",i,
117 | 				box.x0,box.y0,box.x1,box.y1, box.score);
118 | 		printf("landmark: ");
119 | 
120 | 		for(unsigned int j=0;j<5;j++)
121 | 			printf(" (%2.5f %2.5f)",box.landmark.x[j], box.landmark.y[j]);
122 | 
123 | 		printf("\n");
124 | 
125 | 
126 | 		if(save_chop)
127 | 		{
128 | 
129 | 			cv::Mat corp_img=frame(cv::Range(box.y0,box.y1),
130 | 					cv::Range(box.x0,box.x1));
131 | 
132 | 			char title[128];
133 | 
134 | 			sprintf(title,"id%d.jpg",i);
135 | 
136 | 			cv::imwrite(title,corp_img);
137 | 		}
138 | 
139 | 		/*draw box */
140 | 
141 | 		cv::rectangle(frame, cv::Point(box.x0, box.y0), cv::Point(box.x1, box.y1), cv::Scalar(0, 255, 0), 1);
142 | 
143 | 
144 | 		/* draw landmark */
145 | 
146 | 		for(int l=0;l<5;l++)
147 | 		{
148 | 			cv::circle(frame,cv::Point(box.landmark.x[l],box.landmark.y[l]),1,cv::Scalar(0, 0, 255),2);
149 | 
150 | 		}
151 | 	}
152 | 
153 | 	cv::imwrite(output_fname,frame);
154 | 
155 | 	std::cout<<"total detected: "<<face_info.size()<<" faces. used "<<(end_time-start_time)<<" us"<<std::endl;
156 | 	std::cout<<"boxed faces are in file: "<<output_fname<<std::endl;
157 | 
158 | 	TF_Status* s = TF_NewStatus();
159 | 
160 | 	TF_CloseSession(sess,s);
161 | 	TF_DeleteSession(sess,s);
162 | 	TF_DeleteGraph(graph);
163 | 	TF_DeleteStatus(s);
164 | 
165 | 	return 0;
166 | }
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 


--------------------------------------------------------------------------------
/cpp/standalone/comm_lib.cpp:
--------------------------------------------------------------------------------
  1 | #include "mtcnn.hpp"
  2 | #include "comm_lib.hpp"
  3 | 
  4 | 
  5 | void nms_boxes(std::vector<face_box>& input, float threshold, int type, std::vector<face_box>&output)
  6 | {
  7 | 
  8 | 	std::sort(input.begin(),input.end(),
  9 | 			[](const face_box& a, const face_box&b) {
 10 | 			return a.score > b.score;  
 11 | 			});
 12 | 
 13 | 	int box_num=input.size();
 14 | 
 15 | 	std::vector<int> merged(box_num,0);
 16 | 
 17 | 	for(int i=0;i<box_num;i++)
 18 | 	{ 
 19 | 		if(merged[i])
 20 | 			continue;
 21 | 
 22 | 		output.push_back(input[i]);
 23 | 
 24 | 		float h0=input[i].y1-input[i].y0+1;
 25 | 		float w0=input[i].x1-input[i].x0+1;
 26 | 
 27 | 		float area0=h0*w0;
 28 | 
 29 | 
 30 | 		for(int j=i+1;j<box_num;j++)
 31 | 		{
 32 | 			if(merged[j]) 
 33 | 				continue;
 34 | 
 35 | 			float inner_x0=std::max(input[i].x0,input[j].x0);
 36 | 			float inner_y0=std::max(input[i].y0,input[j].y0);
 37 | 
 38 | 			float inner_x1=std::min(input[i].x1,input[j].x1);
 39 | 			float inner_y1=std::min(input[i].y1,input[j].y1);
 40 | 
 41 | 			float inner_h=inner_y1-inner_y0+1;
 42 | 			float inner_w=inner_x1-inner_x0+1;
 43 | 
 44 | 
 45 | 			if(inner_h<=0 || inner_w<=0)
 46 | 				continue;
 47 | 
 48 | 			float inner_area=inner_h*inner_w;
 49 | 
 50 | 			float h1=input[j].y1-input[j].y0+1;
 51 | 			float w1=input[j].x1-input[j].x0+1;
 52 | 
 53 | 			float area1=h1*w1;
 54 | 
 55 | 			float score;
 56 | 
 57 | 			if(type == NMS_UNION)
 58 | 			{
 59 | 				score=inner_area/(area0+area1-inner_area);
 60 | 			}
 61 | 			else
 62 | 			{
 63 | 				score=inner_area/std::min(area0,area1);
 64 | 			}
 65 | 
 66 | 
 67 | 			if(score>threshold)
 68 | 				merged[j]=1;
 69 | 		}
 70 | 
 71 | 
 72 | 	}
 73 | 
 74 | 
 75 | }
 76 | 
 77 | void regress_boxes(std::vector<face_box>& rects)
 78 | {
 79 | 	for(unsigned int i=0;i<rects.size();i++)
 80 | 	{
 81 | 		face_box& box=rects[i];
 82 | 
 83 | 		float h=box.y1-box.y0+1;
 84 | 		float w=box.x1-box.x0+1;
 85 | 
 86 | 		box.x0=box.x0+w*box.regress[0];
 87 | 		box.y0=box.y0+h*box.regress[1];
 88 | 		box.x1=box.x1+w*box.regress[2];
 89 | 		box.y1=box.y1+h*box.regress[3];
 90 | 	}    
 91 | 
 92 | }
 93 | 
 94 | void square_boxes(std::vector<face_box>& rects)
 95 | {
 96 | 
 97 | 	for(unsigned int i=0;i<rects.size();i++)
 98 | 	{
 99 | 		float h=rects[i].y1-rects[i].y0+1;
100 | 		float w=rects[i].x1-rects[i].x0+1;
101 | 
102 | 		float l=std::max(h,w);
103 | 
104 | 		rects[i].x0=rects[i].x0+(w-l)*0.5;
105 | 		rects[i].y0=rects[i].y0+(h-l)*0.5;
106 | 		rects[i].x1=rects[i].x0+l-1;
107 | 		rects[i].y1=rects[i].y0+l-1;
108 | 	}
109 | }
110 | 
111 | void padding(int img_h, int img_w, std::vector<face_box>& rects)
112 | {
113 | 	for(unsigned int i=0; i<rects.size();i++)
114 | 	{
115 | 		rects[i].px0=std::max(rects[i].x0,1.0f);
116 | 		rects[i].py0=std::max(rects[i].y0,1.0f);
117 | 		rects[i].px1=std::min(rects[i].x1,(float)img_w);
118 | 		rects[i].py1=std::min(rects[i].y1,(float)img_h);
119 | 	}
120 | } 
121 | 
122 | 
123 | void process_boxes(std::vector<face_box>& input, int img_h, int img_w, std::vector<face_box>& rects)
124 | {
125 | 
126 | 	nms_boxes(input,0.7,NMS_UNION,rects); 
127 | 
128 | 	regress_boxes(rects);
129 | 
130 | 	square_boxes(rects);
131 | 
132 | 	padding(img_h,img_w,rects);
133 | 
134 | } 
135 | 
136 | 
137 | void generate_bounding_box(const float * confidence_data, int confidence_size,
138 | 		const float * reg_data, float scale, float threshold, 
139 | 		int feature_h, int feature_w, std::vector<face_box>&  output, bool transposed)
140 | {
141 | 
142 | 	int stride = 2;
143 | 	int cellSize = 12;
144 | 
145 | 	int img_h= feature_h;
146 | 	int img_w = feature_w;
147 | 
148 | 	int count = confidence_size/ 2;
149 | 	confidence_data += count;
150 | 
151 | 	for (int i = 0; i<count; i++){
152 | 		if (*(confidence_data + i) >= threshold){
153 | 			int y = i / img_w;
154 | 			int x = i - img_w * y;
155 | 
156 | 			float top_x = (int)((x*stride + 1) / scale);
157 | 			float top_y = (int)((y*stride + 1) / scale);
158 | 			float bottom_x = (int)((x*stride + cellSize) / scale);
159 | 			float bottom_y = (int)((y*stride + cellSize) / scale);
160 | 
161 | 
162 | 			face_box box;
163 | 
164 | 
165 | 			box.x0 = top_x;
166 | 			box.y0 = top_y;
167 | 			box.x1 = bottom_x;
168 | 			box.y1 = bottom_y;
169 | 
170 | 			box.score = *(confidence_data + i);
171 | 
172 | 			int c_offset=y*img_w+x;
173 | 			int c_size=img_w*img_h;
174 | 
175 | 			if(transposed)
176 | 			{
177 | 
178 | 				box.regress[1]=reg_data[c_offset];
179 | 				box.regress[0]=reg_data[c_offset+c_size]; 
180 | 				box.regress[3]=reg_data[c_offset+2*c_size];
181 | 				box.regress[2]= reg_data[c_offset+3*c_size];
182 | 			}
183 | 			else {
184 | 
185 | 				box.regress[0]=reg_data[c_offset];
186 | 				box.regress[1]=reg_data[c_offset+c_size]; 
187 | 				box.regress[2]=reg_data[c_offset+2*c_size];
188 | 				box.regress[3]= reg_data[c_offset+3*c_size];
189 | 			}
190 | 
191 | 			output.push_back(box);
192 | 		}
193 | 	}
194 | 
195 | }
196 | 
197 | 
198 | void set_input_buffer(std::vector<cv::Mat>& input_channels,
199 | 		float* input_data, const int height, const int width) 
200 | {
201 | 	for (int i = 0; i < 3; ++i) {
202 | 		cv::Mat channel(height, width, CV_32FC1, input_data);
203 | 		input_channels.push_back(channel);
204 | 		input_data += width * height;
205 | 	}
206 | }
207 | 
208 | 
209 | void  cal_pyramid_list(int height, int width, int min_size, float factor,std::vector<scale_window>& list)
210 | {
211 | 	int min_side = std::min(height, width);
212 | 	double m = 12.0 / min_size;
213 | 
214 | 	min_side=min_side*m;
215 | 	double cur_scale=1.0;
216 | 
217 | 	double scale;
218 | 
219 | 
220 | 	while (min_side >= 12)
221 | 	{
222 | 		scale=m*cur_scale;
223 | 		cur_scale=cur_scale *factor; 
224 | 		min_side *= factor;
225 | 
226 | 		int hs = std::ceil(height*scale);
227 | 		int ws = std::ceil(width*scale);
228 | 
229 | 		scale_window win;
230 | 		win.h=hs;
231 | 		win.w=ws;
232 | 		win.scale=scale;
233 | 		list.push_back(win);
234 | 	}
235 | 
236 | }
237 | 
238 | void cal_landmark(std::vector<face_box>& box_list)
239 | {
240 | 	for(unsigned int i=0;i<box_list.size();i++)
241 | 	{
242 | 		face_box& box=box_list[i];
243 | 
244 | 		float w=box.x1-box.x0+1;
245 | 		float h=box.y1-box.y0+1;
246 | 
247 | 		for(int j=0;j<5;j++)
248 | 		{
249 | 			box.landmark.x[j]=box.x0+w*box.landmark.x[j]-1;
250 | 			box.landmark.y[j]=box.y0+h*box.landmark.y[j]-1;
251 | 		}
252 | 
253 | 	}
254 | }
255 | 
256 | void set_box_bound(std::vector<face_box>& box_list, int img_h, int img_w)
257 | {
258 | 	for(unsigned int i=0; i<box_list.size();i++)
259 | 	{
260 | 		face_box& box=box_list[i];
261 | 
262 | 		box.x0=std::max((int)(box.x0),1);
263 | 		box.y0=std::max((int)(box.y0),1);
264 | 		box.x1=std::min((int)(box.x1),img_w-1);
265 | 		box.y1=std::min((int)(box.y1),img_h-1);
266 | 
267 | 	}
268 | 
269 | }
270 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/comm_lib.cpp:
--------------------------------------------------------------------------------
  1 | #include "mtcnn.hpp"
  2 | #include "comm_lib.hpp"
  3 | 
  4 | 
  5 | void nms_boxes(std::vector<face_box>& input, float threshold, int type, std::vector<face_box>&output)
  6 | {
  7 | 
  8 | 	std::sort(input.begin(),input.end(),
  9 | 			[](const face_box& a, const face_box&b) {
 10 | 			return a.score > b.score;  
 11 | 			});
 12 | 
 13 | 	int box_num=input.size();
 14 | 
 15 | 	std::vector<int> merged(box_num,0);
 16 | 
 17 | 	for(int i=0;i<box_num;i++)
 18 | 	{ 
 19 | 		if(merged[i])
 20 | 			continue;
 21 | 
 22 | 		output.push_back(input[i]);
 23 | 
 24 | 		float h0=input[i].y1-input[i].y0+1;
 25 | 		float w0=input[i].x1-input[i].x0+1;
 26 | 
 27 | 		float area0=h0*w0;
 28 | 
 29 | 
 30 | 		for(int j=i+1;j<box_num;j++)
 31 | 		{
 32 | 			if(merged[j]) 
 33 | 				continue;
 34 | 
 35 | 			float inner_x0=std::max(input[i].x0,input[j].x0);
 36 | 			float inner_y0=std::max(input[i].y0,input[j].y0);
 37 | 
 38 | 			float inner_x1=std::min(input[i].x1,input[j].x1);
 39 | 			float inner_y1=std::min(input[i].y1,input[j].y1);
 40 | 
 41 | 			float inner_h=inner_y1-inner_y0+1;
 42 | 			float inner_w=inner_x1-inner_x0+1;
 43 | 
 44 | 
 45 | 			if(inner_h<=0 || inner_w<=0)
 46 | 				continue;
 47 | 
 48 | 			float inner_area=inner_h*inner_w;
 49 | 
 50 | 			float h1=input[j].y1-input[j].y0+1;
 51 | 			float w1=input[j].x1-input[j].x0+1;
 52 | 
 53 | 			float area1=h1*w1;
 54 | 
 55 | 			float score;
 56 | 
 57 | 			if(type == NMS_UNION)
 58 | 			{
 59 | 				score=inner_area/(area0+area1-inner_area);
 60 | 			}
 61 | 			else
 62 | 			{
 63 | 				score=inner_area/std::min(area0,area1);
 64 | 			}
 65 | 
 66 | 
 67 | 			if(score>threshold)
 68 | 				merged[j]=1;
 69 | 		}
 70 | 
 71 | 
 72 | 	}
 73 | 
 74 | 
 75 | }
 76 | 
 77 | void regress_boxes(std::vector<face_box>& rects)
 78 | {
 79 | 	for(unsigned int i=0;i<rects.size();i++)
 80 | 	{
 81 | 		face_box& box=rects[i];
 82 | 
 83 | 		float h=box.y1-box.y0+1;
 84 | 		float w=box.x1-box.x0+1;
 85 | 
 86 | 		box.x0=box.x0+w*box.regress[0];
 87 | 		box.y0=box.y0+h*box.regress[1];
 88 | 		box.x1=box.x1+w*box.regress[2];
 89 | 		box.y1=box.y1+h*box.regress[3];
 90 | 	}    
 91 | 
 92 | }
 93 | 
 94 | void square_boxes(std::vector<face_box>& rects)
 95 | {
 96 | 
 97 | 	for(unsigned int i=0;i<rects.size();i++)
 98 | 	{
 99 | 		float h=rects[i].y1-rects[i].y0+1;
100 | 		float w=rects[i].x1-rects[i].x0+1;
101 | 
102 | 		float l=std::max(h,w);
103 | 
104 | 		rects[i].x0=rects[i].x0+(w-l)*0.5;
105 | 		rects[i].y0=rects[i].y0+(h-l)*0.5;
106 | 		rects[i].x1=rects[i].x0+l-1;
107 | 		rects[i].y1=rects[i].y0+l-1;
108 | 	}
109 | }
110 | 
111 | void padding(int img_h, int img_w, std::vector<face_box>& rects)
112 | {
113 | 	for(unsigned int i=0; i<rects.size();i++)
114 | 	{
115 | 		rects[i].px0=std::max(rects[i].x0,1.0f);
116 | 		rects[i].py0=std::max(rects[i].y0,1.0f);
117 | 		rects[i].px1=std::min(rects[i].x1,(float)img_w);
118 | 		rects[i].py1=std::min(rects[i].y1,(float)img_h);
119 | 	}
120 | } 
121 | 
122 | 
123 | void process_boxes(std::vector<face_box>& input, int img_h, int img_w, std::vector<face_box>& rects)
124 | {
125 | 
126 | 	nms_boxes(input,0.7,NMS_UNION,rects); 
127 | 
128 | 	regress_boxes(rects);
129 | 
130 | 	square_boxes(rects);
131 | 
132 | 	padding(img_h,img_w,rects);
133 | 
134 | } 
135 | 
136 | 
137 | void generate_bounding_box(const float * confidence_data, int confidence_size,
138 | 		const float * reg_data, float scale, float threshold, 
139 | 		int feature_h, int feature_w, std::vector<face_box>&  output, bool transposed)
140 | {
141 | 
142 | 	int stride = 2;
143 | 	int cellSize = 12;
144 | 
145 | 	int img_h= feature_h;
146 | 	int img_w = feature_w;
147 | 
148 | 	int count = confidence_size/ 2;
149 | 	confidence_data += count;
150 | 
151 | 	for (int i = 0; i<count; i++){
152 | 		if (*(confidence_data + i) >= threshold){
153 | 			int y = i / img_w;
154 | 			int x = i - img_w * y;
155 | 
156 | 			float top_x = (int)((x*stride + 1) / scale);
157 | 			float top_y = (int)((y*stride + 1) / scale);
158 | 			float bottom_x = (int)((x*stride + cellSize) / scale);
159 | 			float bottom_y = (int)((y*stride + cellSize) / scale);
160 | 
161 | 
162 | 			face_box box;
163 | 
164 | 
165 | 			box.x0 = top_x;
166 | 			box.y0 = top_y;
167 | 			box.x1 = bottom_x;
168 | 			box.y1 = bottom_y;
169 | 
170 | 			box.score = *(confidence_data + i);
171 | 
172 | 			int c_offset=y*img_w+x;
173 | 			int c_size=img_w*img_h;
174 | 
175 | 			if(transposed)
176 | 			{
177 | 
178 | 				box.regress[1]=reg_data[c_offset];
179 | 				box.regress[0]=reg_data[c_offset+c_size]; 
180 | 				box.regress[3]=reg_data[c_offset+2*c_size];
181 | 				box.regress[2]= reg_data[c_offset+3*c_size];
182 | 			}
183 | 			else {
184 | 
185 | 				box.regress[0]=reg_data[c_offset];
186 | 				box.regress[1]=reg_data[c_offset+c_size]; 
187 | 				box.regress[2]=reg_data[c_offset+2*c_size];
188 | 				box.regress[3]= reg_data[c_offset+3*c_size];
189 | 			}
190 | 
191 | 			output.push_back(box);
192 | 		}
193 | 	}
194 | 
195 | }
196 | 
197 | 
198 | void set_input_buffer(std::vector<cv::Mat>& input_channels,
199 | 		float* input_data, const int height, const int width) 
200 | {
201 | 	for (int i = 0; i < 3; ++i) {
202 | 		cv::Mat channel(height, width, CV_32FC1, input_data);
203 | 		input_channels.push_back(channel);
204 | 		input_data += width * height;
205 | 	}
206 | }
207 | 
208 | 
209 | void  cal_pyramid_list(int height, int width, int min_size, float factor,std::vector<scale_window>& list)
210 | {
211 | 	int min_side = std::min(height, width);
212 | 	double m = 12.0 / min_size;
213 | 
214 | 	min_side=min_side*m;
215 | 	double cur_scale=1.0;
216 | 
217 | 	double scale;
218 | 
219 | 
220 | 	while (min_side >= 12)
221 | 	{
222 | 		scale=m*cur_scale;
223 | 		cur_scale=cur_scale *factor; 
224 | 		min_side *= factor;
225 | 
226 | 		int hs = std::ceil(height*scale);
227 | 		int ws = std::ceil(width*scale);
228 | 
229 | 		scale_window win;
230 | 		win.h=hs;
231 | 		win.w=ws;
232 | 		win.scale=scale;
233 | 		list.push_back(win);
234 | 	}
235 | 
236 | }
237 | 
238 | void cal_landmark(std::vector<face_box>& box_list)
239 | {
240 | 	for(unsigned int i=0;i<box_list.size();i++)
241 | 	{
242 | 		face_box& box=box_list[i];
243 | 
244 | 		float w=box.x1-box.x0+1;
245 | 		float h=box.y1-box.y0+1;
246 | 
247 | 		for(int j=0;j<5;j++)
248 | 		{
249 | 			box.landmark.x[j]=box.x0+w*box.landmark.x[j]-1;
250 | 			box.landmark.y[j]=box.y0+h*box.landmark.y[j]-1;
251 | 		}
252 | 
253 | 	}
254 | }
255 | 
256 | void set_box_bound(std::vector<face_box>& box_list, int img_h, int img_w)
257 | {
258 | 	for(unsigned int i=0; i<box_list.size();i++)
259 | 	{
260 | 		face_box& box=box_list[i];
261 | 
262 | 		box.x0=std::max((int)(box.x0),1);
263 | 		box.y0=std::max((int)(box.y0),1);
264 | 		box.x1=std::min((int)(box.x1),img_w-1);
265 | 		box.y1=std::min((int)(box.y1),img_h-1);
266 | 
267 | 	}
268 | 
269 | }
270 | 


--------------------------------------------------------------------------------
/cpp/standalone/tensorflow_mtcnn.cpp:
--------------------------------------------------------------------------------
  1 | #include <fstream>
  2 | #include <utility>
  3 | #include <vector>
  4 | #include <assert.h>
  5 | #include <stdio.h>
  6 | 
  7 | #include "tensorflow/c/c_api.h"
  8 | 
  9 | #include "mtcnn.hpp"
 10 | #include "comm_lib.hpp"
 11 | #include "utils.hpp"
 12 | #include "tensorflow_mtcnn.hpp"
 13 | 
 14 | static int load_file(const std::string & fname, std::vector<char>& buf)
 15 | {
 16 | 	std::ifstream fs(fname, std::ios::binary | std::ios::in);
 17 | 
 18 | 	if(!fs.good())
 19 | 	{
 20 | 		std::cerr<<fname<<" does not exist"<<std::endl;
 21 | 		return -1;
 22 | 	}
 23 | 
 24 | 
 25 | 	fs.seekg(0, std::ios::end);
 26 | 	int fsize=fs.tellg();
 27 | 
 28 | 	fs.seekg(0, std::ios::beg);
 29 | 	buf.resize(fsize);
 30 | 	fs.read(buf.data(),fsize);
 31 | 
 32 | 	fs.close();
 33 | 
 34 | 	return 0;
 35 | 
 36 | }
 37 | 
 38 | 
 39 | TF_Session * load_graph(const char * frozen_fname, TF_Graph ** p_graph)
 40 | {
 41 | 	TF_Status* s = TF_NewStatus();
 42 | 
 43 | 	TF_Graph* graph = TF_NewGraph();
 44 | 
 45 | 	std::vector<char> model_buf;
 46 | 
 47 | 	load_file(frozen_fname,model_buf);
 48 | 
 49 | 	TF_Buffer graph_def = {model_buf.data(), model_buf.size(), nullptr};
 50 | 
 51 | 	TF_ImportGraphDefOptions* import_opts = TF_NewImportGraphDefOptions();
 52 | 	TF_ImportGraphDefOptionsSetPrefix(import_opts, "");
 53 | 	TF_GraphImportGraphDef(graph, &graph_def, import_opts, s);
 54 | 
 55 | 	if(TF_GetCode(s) != TF_OK)
 56 | 	{
 57 | 		printf("load graph failed!\n Error: %s\n",TF_Message(s));
 58 | 
 59 | 		return nullptr;
 60 | 	}
 61 | 
 62 | 	TF_SessionOptions* sess_opts = TF_NewSessionOptions();
 63 | 	TF_Session* session = TF_NewSession(graph, sess_opts, s);
 64 | 	assert(TF_GetCode(s) == TF_OK);
 65 | 
 66 | 
 67 | 	TF_DeleteStatus(s);
 68 | 
 69 | 
 70 | 	*p_graph=graph;
 71 | 
 72 | 	return session;
 73 | }
 74 | 
 75 | void generate_bounding_box_tf(const float * confidence_data, int confidence_size,
 76 | 		const float * reg_data, float scale, float threshold, 
 77 | 		int feature_h, int feature_w, std::vector<face_box>&  output, bool transposed)
 78 | {
 79 | 
 80 | 	int stride = 2;
 81 | 	int cellSize = 12;
 82 | 
 83 | 	int img_h= feature_h;
 84 | 	int img_w = feature_w;
 85 | 
 86 | 
 87 | 	for(int y=0;y<img_h;y++)
 88 | 		for(int x=0;x<img_w;x++)
 89 | 		{
 90 | 			int line_size=img_w*2;
 91 | 
 92 | 			float score=confidence_data[line_size*y+2*x+1];
 93 | 
 94 | 			if(score>= threshold)
 95 | 			{
 96 | 
 97 | 				float top_x = (int)((x*stride + 1) / scale);
 98 | 				float top_y = (int)((y*stride + 1) / scale);
 99 | 				float bottom_x = (int)((x*stride + cellSize) / scale);
100 | 				float bottom_y = (int)((y*stride + cellSize) / scale);
101 | 
102 | 				face_box box;
103 | 
104 | 				box.x0 = top_x;
105 | 				box.y0 = top_y;
106 | 				box.x1 = bottom_x;
107 | 				box.y1 = bottom_y;
108 | 
109 | 				box.score=score;
110 | 
111 | 				int c_offset=(img_w*4)*y+4*x;
112 | 
113 | 				if(transposed)
114 | 				{
115 | 
116 | 					box.regress[1]=reg_data[c_offset];
117 | 					box.regress[0]=reg_data[c_offset+1]; 
118 | 					box.regress[3]=reg_data[c_offset+2];
119 | 					box.regress[2]= reg_data[c_offset+3];
120 | 				}
121 | 				else {
122 | 
123 | 					box.regress[0]=reg_data[c_offset];
124 | 					box.regress[1]=reg_data[c_offset+1]; 
125 | 					box.regress[2]=reg_data[c_offset+2];
126 | 					box.regress[3]= reg_data[c_offset+3];
127 | 				}
128 | 
129 | 				output.push_back(box);
130 | 			}
131 | 
132 | 		}
133 | }
134 | 
135 | /* To make tensor release happy...*/
136 | static void dummy_deallocator(void* data, size_t len, void* arg)
137 | {
138 | }
139 | 
140 | void run_PNet(TF_Session * sess, TF_Graph * graph, cv::Mat& img, scale_window& win, std::vector<face_box>& box_list)
141 | {
142 | 	cv::Mat  resized;
143 | 	int scale_h=win.h;
144 | 	int scale_w=win.w;
145 | 	float scale=win.scale;
146 | 	float pnet_threshold=0.6;
147 | 
148 | 	cv::resize(img, resized, cv::Size(scale_w, scale_h),0,0);
149 | 
150 | 	/* tensorflow related*/
151 | 
152 | 	TF_Status * s= TF_NewStatus();
153 | 
154 | 	std::vector<TF_Output> input_names;
155 | 	std::vector<TF_Tensor*> input_values;
156 | 
157 | 	TF_Operation* input_name=TF_GraphOperationByName(graph, "pnet/input");
158 | 
159 | 	input_names.push_back({input_name, 0});
160 | 
161 | 	const int64_t dim[4] = {1,scale_h,scale_w,3};
162 | 
163 | 	TF_Tensor* input_tensor = TF_NewTensor(TF_FLOAT,dim,4,resized.ptr(),sizeof(float)*scale_w*scale_h*3,dummy_deallocator,nullptr);
164 | 
165 | 	input_values.push_back(input_tensor);
166 | 
167 | 
168 | 
169 | 	std::vector<TF_Output> output_names;
170 | 
171 | 	TF_Operation* output_name = TF_GraphOperationByName(graph,"pnet/conv4-2/BiasAdd");
172 | 	output_names.push_back({output_name,0});
173 | 
174 | 	output_name = TF_GraphOperationByName(graph,"pnet/prob1");
175 | 	output_names.push_back({output_name,0});
176 | 
177 | 	std::vector<TF_Tensor*> output_values(output_names.size(), nullptr);
178 | 
179 | 
180 | 	TF_SessionRun(sess,nullptr,input_names.data(),input_values.data(),input_names.size(),
181 | 			output_names.data(),output_values.data(),output_names.size(),
182 | 			nullptr,0,nullptr,s);
183 | 
184 | 
185 | 	assert(TF_GetCode(s) == TF_OK);
186 | 
187 | 	/*retrieval the forward results*/
188 | 
189 | 	const float * conf_data=(const float *)TF_TensorData(output_values[1]);
190 | 	const float * reg_data=(const float *)TF_TensorData(output_values[0]);
191 | 
192 | 
193 | 	int feature_h=TF_Dim(output_values[0],1);
194 | 	int feature_w=TF_Dim(output_values[0],2);
195 | 
196 | 	int conf_size=feature_h*feature_w*2;
197 | 
198 | 	std::vector<face_box> candidate_boxes;
199 | 
200 | 	generate_bounding_box_tf(conf_data,conf_size,reg_data, 
201 | 			scale,pnet_threshold,feature_h,feature_w,candidate_boxes,true);
202 | 
203 | 
204 | 	nms_boxes(candidate_boxes, 0.5, NMS_UNION,box_list);
205 | 
206 | 	TF_DeleteStatus(s);
207 | 	TF_DeleteTensor(output_values[0]);
208 | 	TF_DeleteTensor(output_values[1]);
209 | 	TF_DeleteTensor(input_tensor);
210 | 
211 | }
212 | 
213 | 
214 | 
215 | void copy_one_patch(const cv::Mat& img,face_box&input_box,float * data_to, int height, int width)
216 | {
217 | 	cv::Mat resized(height,width,CV_32FC3,data_to);
218 | 
219 | 
220 | 	cv::Mat chop_img = img(cv::Range(input_box.py0,input_box.py1),
221 | 			cv::Range(input_box.px0, input_box.px1));
222 | 
223 | 	int pad_top = std::abs(input_box.py0 - input_box.y0);
224 | 	int pad_left = std::abs(input_box.px0 - input_box.x0);
225 | 	int pad_bottom = std::abs(input_box.py1 - input_box.y1);
226 | 	int pad_right = std::abs(input_box.px1-input_box.x1);
227 | 
228 | 	cv::copyMakeBorder(chop_img, chop_img, pad_top, pad_bottom,pad_left, pad_right,  cv::BORDER_CONSTANT, cv::Scalar(0));
229 | 
230 | 	cv::resize(chop_img,resized, cv::Size(width, height), 0, 0);
231 | }
232 | 
233 | 
234 | void run_RNet(TF_Session * sess, TF_Graph * graph, cv::Mat& img, std::vector<face_box>& pnet_boxes, std::vector<face_box>& output_boxes)
235 | {
236 | 	int batch=pnet_boxes.size();
237 | 	int channel = 3;
238 | 	int height = 24;
239 | 	int width = 24;
240 | 
241 | 	float rnet_threshold=0.7;
242 | 
243 | 	/* prepare input image data */
244 | 
245 | 	int  input_size=batch*height*width*channel;
246 | 
247 | 	std::vector<float> input_buffer(input_size);
248 | 
249 | 	float * input_data=input_buffer.data();
250 | 
251 | 	for(int i=0;i<batch;i++)
252 | 	{
253 | 		int patch_size=width*height*channel;
254 | 
255 | 		copy_one_patch(img,pnet_boxes[i], input_data,height,width);
256 | 
257 | 		input_data+=patch_size;
258 | 	}
259 | 
260 | 
261 | 	/* tensorflow  related */
262 | 
263 | 	TF_Status * s= TF_NewStatus();
264 | 
265 | 	std::vector<TF_Output> input_names;
266 | 	std::vector<TF_Tensor*> input_values;
267 | 
268 | 	TF_Operation* input_name=TF_GraphOperationByName(graph, "rnet/input");
269 | 
270 | 	input_names.push_back({input_name, 0});
271 | 
272 | 
273 | 	const int64_t dim[4] = {batch,height,width,channel};
274 | 
275 | 
276 | 	TF_Tensor* input_tensor = TF_NewTensor(TF_FLOAT,dim,4,input_buffer.data(),sizeof(float)*input_size,
277 | 			dummy_deallocator,nullptr);
278 | 
279 | 	input_values.push_back(input_tensor);
280 | 
281 | 
282 | 	std::vector<TF_Output> output_names;
283 | 
284 | 	TF_Operation* output_name = TF_GraphOperationByName(graph,"rnet/conv5-2/conv5-2");
285 | 	output_names.push_back({output_name,0});
286 | 
287 | 	output_name = TF_GraphOperationByName(graph,"rnet/prob1");
288 | 	output_names.push_back({output_name,0});
289 | 
290 | 	std::vector<TF_Tensor*> output_values(output_names.size(), nullptr);
291 | 
292 | 
293 | 	TF_SessionRun(sess,nullptr,input_names.data(),input_values.data(),input_names.size(),
294 | 			output_names.data(),output_values.data(),output_names.size(),
295 | 			nullptr,0,nullptr,s);
296 | 
297 | 
298 | 	assert(TF_GetCode(s) == TF_OK);
299 | 
300 | 	/*retrieval the forward results*/
301 | 
302 | 	const float * conf_data=(const float *)TF_TensorData(output_values[1]);
303 | 	const float * reg_data=(const float *)TF_TensorData(output_values[0]);
304 | 
305 | 
306 | 	for(int i=0;i<batch;i++)
307 | 	{
308 | 
309 | 		if(conf_data[1]>rnet_threshold)
310 | 		{
311 | 			face_box output_box;
312 | 
313 | 			face_box& input_box=pnet_boxes[i];
314 | 
315 | 			output_box.x0=input_box.x0;
316 | 			output_box.y0=input_box.y0;
317 | 			output_box.x1=input_box.x1;
318 | 			output_box.y1=input_box.y1;
319 | 
320 | 			output_box.score = *(conf_data+1);
321 | 
322 | 			/*Note: regress's value is swaped here!!!*/
323 | 
324 | 			output_box.regress[0]=reg_data[1];
325 | 			output_box.regress[1]=reg_data[0];
326 | 			output_box.regress[2]=reg_data[3];
327 | 			output_box.regress[3]=reg_data[2];
328 | 
329 | 			output_boxes.push_back(output_box);
330 | 
331 | 
332 | 		}
333 | 
334 | 		conf_data+=2;
335 | 		reg_data+=4;
336 | 
337 | 	}
338 | 
339 | 	TF_DeleteStatus(s);
340 | 	TF_DeleteTensor(output_values[0]);
341 | 	TF_DeleteTensor(output_values[1]);
342 | 	TF_DeleteTensor(input_tensor);
343 | }
344 | 
345 | void run_ONet(TF_Session * sess, TF_Graph * graph, cv::Mat& img, std::vector<face_box>& rnet_boxes, std::vector<face_box>& output_boxes)
346 | {
347 | 	int batch=rnet_boxes.size();
348 | 	int channel = 3;
349 | 	int height = 48;
350 | 	int width = 48;
351 | 
352 | 	float onet_threshold=0.9;
353 | 
354 | 	/* prepare input image data */
355 | 
356 | 	int  input_size=batch*height*width*channel;
357 | 
358 | 	std::vector<float> input_buffer(input_size);
359 | 
360 | 	float * input_data=input_buffer.data();
361 | 
362 | 	for(int i=0;i<batch;i++)
363 | 	{
364 | 		int patch_size=width*height*channel;
365 | 
366 | 		copy_one_patch(img,rnet_boxes[i], input_data,height,width);
367 | 
368 | 		input_data+=patch_size;
369 | 	}
370 | 
371 | 
372 | 	/* tensorflow  related */
373 | 
374 | 	TF_Status * s= TF_NewStatus();
375 | 
376 | 	std::vector<TF_Output> input_names;
377 | 	std::vector<TF_Tensor*> input_values;
378 | 
379 | 	TF_Operation* input_name=TF_GraphOperationByName(graph, "onet/input");
380 | 
381 | 	input_names.push_back({input_name, 0});
382 | 
383 | 	const int64_t dim[4] = {batch,height,width,channel};
384 | 
385 | 	TF_Tensor* input_tensor = TF_NewTensor(TF_FLOAT,dim,4,input_buffer.data(),sizeof(float)*input_size,
386 | 			dummy_deallocator,nullptr);
387 | 
388 | 	input_values.push_back(input_tensor);
389 | 
390 | 
391 | 	std::vector<TF_Output> output_names;
392 | 
393 | 	TF_Operation* output_name = TF_GraphOperationByName(graph,"onet/conv6-2/conv6-2");
394 | 	output_names.push_back({output_name,0});
395 | 
396 | 	output_name = TF_GraphOperationByName(graph,"onet/conv6-3/conv6-3");
397 | 	output_names.push_back({output_name,0});
398 | 
399 | 	output_name = TF_GraphOperationByName(graph,"onet/prob1");
400 | 	output_names.push_back({output_name,0});
401 | 
402 | 	std::vector<TF_Tensor*> output_values(output_names.size(), nullptr);
403 | 
404 | 
405 | 	TF_SessionRun(sess,nullptr,input_names.data(),input_values.data(),input_names.size(),
406 | 			output_names.data(),output_values.data(),output_names.size(),
407 | 			nullptr,0,nullptr,s);
408 | 
409 | 
410 | 	assert(TF_GetCode(s) == TF_OK);
411 | 
412 | 	/*retrieval the forward results*/
413 | 
414 | 	const float * conf_data=(const float *)TF_TensorData(output_values[2]);
415 | 	const float * reg_data=(const float *)TF_TensorData(output_values[0]);
416 | 	const float * points_data=(const float *)TF_TensorData(output_values[1]);
417 | 
418 | 	for(int i=0;i<batch;i++)
419 | 	{
420 | 
421 | 		if(conf_data[1]>onet_threshold)
422 | 		{
423 | 			face_box output_box;
424 | 
425 | 			face_box& input_box=rnet_boxes[i];
426 | 
427 | 			output_box.x0=input_box.x0;
428 | 			output_box.y0=input_box.y0;
429 | 			output_box.x1=input_box.x1;
430 | 			output_box.y1=input_box.y1;
431 | 
432 | 			output_box.score = conf_data[1];
433 | 
434 | 			output_box.regress[0]=reg_data[1];
435 | 			output_box.regress[1]=reg_data[0];
436 | 			output_box.regress[2]=reg_data[3];
437 | 			output_box.regress[3]=reg_data[2];
438 | 
439 | 			/*Note: switched x,y points value too..*/
440 | 			for (int j = 0; j<5; j++){
441 | 				output_box.landmark.x[j] = *(points_data + j+5);
442 | 				output_box.landmark.y[j] = *(points_data + j);
443 | 			}
444 | 
445 | 			output_boxes.push_back(output_box);
446 | 
447 | 
448 | 		}
449 | 
450 | 		conf_data+=2;
451 | 		reg_data+=4;
452 | 		points_data+=10;
453 | 	}
454 | 
455 | 	TF_DeleteStatus(s);
456 | 	TF_DeleteTensor(output_values[0]);
457 | 	TF_DeleteTensor(output_values[1]);
458 | 	TF_DeleteTensor(output_values[2]);
459 | 	TF_DeleteTensor(input_tensor);
460 | 
461 | }
462 | 
463 | 
464 | void mtcnn_detect(TF_Session * sess, TF_Graph * graph, cv::Mat& img, std::vector<face_box>& face_list)
465 | {
466 | 	cv::Mat working_img;
467 | 
468 | 	float alpha=0.0078125;
469 | 	float mean=127.5;
470 | 
471 | 
472 | 
473 | 	img.convertTo(working_img, CV_32FC3);
474 | 
475 | 	working_img=(working_img-mean)*alpha;
476 | 
477 | 	working_img=working_img.t();
478 | 
479 | 	cv::cvtColor(working_img,working_img, cv::COLOR_BGR2RGB);
480 | 
481 | 	int img_h=working_img.rows;
482 | 	int img_w=working_img.cols;
483 | 
484 | 
485 | 	int min_size=40;
486 | 	float factor=0.709;
487 | 
488 | 
489 | 	std::vector<scale_window> win_list;
490 | 
491 | 	std::vector<face_box> total_pnet_boxes;
492 | 	std::vector<face_box> total_rnet_boxes;
493 | 	std::vector<face_box> total_onet_boxes;
494 | 
495 | 
496 | 	cal_pyramid_list(img_h,img_w,min_size,factor,win_list);
497 | 
498 | 	for(unsigned int i=0;i<win_list.size();i++)
499 | 	{
500 | 		std::vector<face_box>boxes;
501 | 
502 | 		run_PNet(sess,graph,working_img,win_list[i],boxes);
503 | 
504 | 		total_pnet_boxes.insert(total_pnet_boxes.end(),boxes.begin(),boxes.end());
505 | 	}
506 | 
507 | 
508 | 	std::vector<face_box> pnet_boxes;
509 | 	process_boxes(total_pnet_boxes,img_h,img_w,pnet_boxes);
510 | 
511 | 
512 | 	// RNet
513 | 	std::vector<face_box> rnet_boxes;
514 | 
515 | 	run_RNet(sess, graph,working_img, pnet_boxes,total_rnet_boxes);
516 | 
517 | 	process_boxes(total_rnet_boxes,img_h,img_w,rnet_boxes);
518 | 
519 | 
520 | 	//ONet
521 | 	run_ONet(sess,graph,working_img, rnet_boxes,total_onet_boxes);
522 | 
523 | 	//calculate the landmark
524 | 
525 | 	for(unsigned int i=0;i<total_onet_boxes.size();i++)
526 | 	{
527 | 		face_box& box=total_onet_boxes[i];
528 | 
529 | 		float h=box.x1-box.x0+1;
530 | 		float w=box.y1-box.y0+1;
531 | 
532 | 		for(int j=0;j<5;j++)
533 | 		{
534 | 			box.landmark.x[j]=box.x0+w*box.landmark.x[j]-1;
535 | 			box.landmark.y[j]=box.y0+h*box.landmark.y[j]-1;
536 | 		}
537 | 
538 | 	}
539 | 
540 | 
541 | 	//Get Final Result
542 | 	regress_boxes(total_onet_boxes);
543 | 	nms_boxes(total_onet_boxes, 0.7, NMS_MIN,face_list);
544 | 
545 | 	//switch x and y, since working_img is transposed
546 | 
547 | 	for(unsigned int i=0;i<face_list.size();i++)
548 | 	{
549 | 		face_box& box=face_list[i];
550 | 
551 | 		std::swap(box.x0,box.y0);
552 | 		std::swap(box.x1,box.y1);
553 | 
554 | 		for(int l=0;l<5;l++)
555 | 		{
556 | 			std::swap(box.landmark.x[l],box.landmark.y[l]);
557 | 		}
558 | 	}
559 | 
560 | }
561 | 
562 | 
563 | 
564 | 
565 | 


--------------------------------------------------------------------------------
/cpp/tf_embedded/main.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 |    Licensed under the Apache License, Version 2.0 (the "License");
  4 |    you may not use this file except in compliance with the License.
  5 |    You may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | // A minimal but useful C++ example showing how to load an Imagenet-style object
 17 | // recognition TensorFlow model, prepare input images for it, run them through
 18 | // the graph, and interpret the results.
 19 | //
 20 | // It's designed to have as few dependencies and be as clear as possible, so
 21 | // it's more verbose than it could be in production code. In particular, using
 22 | // auto for the types of a lot of the returned values from TensorFlow calls can
 23 | // remove a lot of boilerplate, but I find the explicit types useful in sample
 24 | // code to make it simple to look up the classes involved.
 25 | //
 26 | // To use it, compile and then run in a working directory with the
 27 | // learning/brain/tutorials/label_image/data/ folder below it, and you should
 28 | // see the top five labels for the example Lena image output. You can then
 29 | // customize it to use your own models or images by changing the file names at
 30 | // the top of the main() function.
 31 | //
 32 | // The googlenet_graph.pb file included by default is created from Inception.
 33 | //
 34 | // Note that, for GIF inputs, to reuse existing code, only single-frame ones
 35 | // are supported.
 36 | 
 37 | #include <fstream>
 38 | #include <utility>
 39 | #include <vector>
 40 | 
 41 | #include "tensorflow/cc/ops/const_op.h"
 42 | #include "tensorflow/cc/ops/image_ops.h"
 43 | #include "tensorflow/cc/ops/standard_ops.h"
 44 | #include "tensorflow/core/framework/graph.pb.h"
 45 | #include "tensorflow/core/framework/tensor.h"
 46 | #include "tensorflow/core/graph/default_device.h"
 47 | #include "tensorflow/core/graph/graph_def_builder.h"
 48 | #include "tensorflow/core/lib/core/errors.h"
 49 | #include "tensorflow/core/lib/core/stringpiece.h"
 50 | #include "tensorflow/core/lib/core/threadpool.h"
 51 | #include "tensorflow/core/lib/io/path.h"
 52 | #include "tensorflow/core/lib/strings/stringprintf.h"
 53 | #include "tensorflow/core/platform/env.h"
 54 | #include "tensorflow/core/platform/init_main.h"
 55 | #include "tensorflow/core/platform/logging.h"
 56 | #include "tensorflow/core/platform/types.h"
 57 | #include "tensorflow/core/public/session.h"
 58 | #include "tensorflow/core/util/command_line_flags.h"
 59 | 
 60 | #include "mtcnn.hpp"
 61 | #include "comm_lib.hpp"
 62 | #include "utils.hpp"
 63 | 
 64 | // These are all common classes it's handy to reference with no namespace.
 65 | using tensorflow::Flag;
 66 | using tensorflow::Tensor;
 67 | using tensorflow::Status;
 68 | using tensorflow::string;
 69 | using tensorflow::int32;
 70 | using tensorflow::TensorBuffer;
 71 | using tensorflow::DT_FLOAT;
 72 | using tensorflow::TensorShape;
 73 | using tensorflow::AllocationDescription;
 74 | 
 75 | 
 76 | // Reads a model graph definition from disk, and creates a session object you
 77 | // can use to run it.
 78 | Status LoadGraph(const string& graph_file_name,
 79 | 		std::unique_ptr<tensorflow::Session>* session) {
 80 | 	tensorflow::GraphDef graph_def;
 81 | 	Status load_graph_status =
 82 | 		ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
 83 | 	if (!load_graph_status.ok()) {
 84 | 		return tensorflow::errors::NotFound("Failed to load compute graph at '",
 85 | 				graph_file_name, "'");
 86 | 	}
 87 | 	session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
 88 | 	Status session_create_status = (*session)->Create(graph_def);
 89 | 	if (!session_create_status.ok()) {
 90 | 		return session_create_status;
 91 | 	}
 92 | 	return Status::OK();
 93 | }
 94 | 
 95 | 
 96 | /* copied from TF_ManagedBuffer in c_api.cc */
 97 | class my_buffer: public TensorBuffer{
 98 | 
 99 | 	public:
100 | 		float * data_;
101 | 		size_t  len_;
102 | 
103 | 		~my_buffer() override {
104 | 			//nothing to do
105 | 		}
106 | 
107 | 		void* data() const override { return data_; }
108 | 		size_t size() const override { return len_; }
109 | 		bool OwnsMemory() const override { return false; }
110 | 
111 | 		TensorBuffer* root_buffer() override { return this; }
112 | 
113 | 		void FillAllocationDescription(AllocationDescription* proto) const override {
114 | 			tensorflow::int64 rb = size();
115 | 			proto->set_requested_bytes(rb);
116 | 			proto->set_allocator_name(tensorflow::cpu_allocator()->Name());
117 | 		}
118 | 
119 | };
120 | 
121 | 
122 | class tensorflow::TensorCApi {
123 | 	public:
124 | 		static Tensor create_tensor(DataType dtype, const TensorShape& shape, TensorBuffer* buf)
125 | 		{
126 | 			return  Tensor(dtype,shape,buf);
127 | 		}
128 | 
129 | };
130 | 
131 | 
132 | 
133 | 
134 | void generate_bounding_box_tf(const float * confidence_data, int confidence_size,
135 | 		const float * reg_data, float scale, float threshold, 
136 | 		int feature_h, int feature_w, std::vector<face_box>&  output, bool transposed)
137 | {
138 | 
139 | 	int stride = 2;
140 | 	int cellSize = 12;
141 | 
142 | 	int img_h= feature_h;
143 | 	int img_w = feature_w;
144 | 
145 | 
146 | 	for(int y=0;y<img_h;y++)
147 | 		for(int x=0;x<img_w;x++)
148 | 		{
149 | 			int line_size=img_w*2;
150 | 
151 | 			float score=confidence_data[line_size*y+2*x+1];
152 | 
153 | 			if(score>= threshold)
154 | 			{
155 | 
156 | 				float top_x = (int)((x*stride + 1) / scale);
157 | 				float top_y = (int)((y*stride + 1) / scale);
158 | 				float bottom_x = (int)((x*stride + cellSize) / scale);
159 | 				float bottom_y = (int)((y*stride + cellSize) / scale);
160 | 
161 | 				face_box box;
162 | 
163 | 				box.x0 = top_x;
164 | 				box.y0 = top_y;
165 | 				box.x1 = bottom_x;
166 | 				box.y1 = bottom_y;
167 | 
168 | 				box.score=score;
169 | 
170 | 				int c_offset=(img_w*4)*y+4*x;
171 | 
172 | 				if(transposed)
173 | 				{
174 | 
175 | 					box.regress[1]=reg_data[c_offset];
176 | 					box.regress[0]=reg_data[c_offset+1]; 
177 | 					box.regress[3]=reg_data[c_offset+2];
178 | 					box.regress[2]= reg_data[c_offset+3];
179 | 				}
180 | 				else {
181 | 
182 | 					box.regress[0]=reg_data[c_offset];
183 | 					box.regress[1]=reg_data[c_offset+1]; 
184 | 					box.regress[2]=reg_data[c_offset+2];
185 | 					box.regress[3]= reg_data[c_offset+3];
186 | 				}
187 | 
188 | 				output.push_back(box);
189 | 			}
190 | 
191 | 		}
192 | }
193 | 
194 | 
195 | 
196 | void run_PNet(std::unique_ptr<tensorflow::Session>& sess, cv::Mat& img, scale_window& win, std::vector<face_box>& box_list)
197 | {
198 | 	cv::Mat  resized;
199 | 	int scale_h=win.h;
200 | 	int scale_w=win.w;
201 | 	float scale=win.scale;
202 | 	float pnet_threshold=0.6;
203 | 
204 | 
205 | 	cv::resize(img, resized, cv::Size(scale_w, scale_h),0,0);
206 | 
207 | 	/* tensorflow related*/
208 | 
209 | 	const int64_t dim[4] = {1,scale_h,scale_w,3};
210 | 
211 | 	my_buffer tensor_buf;
212 | 
213 | 
214 | 	tensor_buf.data_=(float *)resized.ptr();
215 | 	tensor_buf.len_=scale_h*scale_w*3;
216 | 
217 | 	std::vector<tensorflow::int64> tensor_dim;
218 | 
219 | 	for(int i=0;i<4;i++)
220 | 		tensor_dim.push_back(dim[i]);
221 | 
222 | 
223 | 	Tensor input_tensor=tensorflow::TensorCApi::create_tensor(DT_FLOAT,TensorShape(tensor_dim), &tensor_buf);
224 | 
225 | 
226 | 	std::vector<Tensor> output_tensor;
227 | 
228 | 
229 | 	std::vector<std::pair<string, Tensor> > input_tname;
230 | 
231 | 	std::pair<string,Tensor> input0("pnet/input:0",input_tensor);
232 | 	input_tname.push_back(input0);
233 | 
234 | 	std::vector<string> output_tname;
235 | 
236 | 	output_tname.push_back("pnet/conv4-2/BiasAdd:0");
237 | 	output_tname.push_back("pnet/prob1:0");
238 | 
239 | 	std::vector<string> output_node;
240 | 
241 | 
242 | 
243 | 	Status run_status = sess->Run(input_tname,output_tname,output_node,&output_tensor);
244 | 
245 | 	if(!run_status.ok())
246 | 	{
247 | 		std::cerr<<"run PNet error"<<std::endl;
248 | 		return;
249 | 	}
250 | 
251 | 	/*retrieval the forward results*/
252 | 
253 | 	TensorShape reg_shape=output_tensor[0].shape();
254 | 	TensorShape conf_shape=output_tensor[1].shape();
255 | 
256 | 	int feature_h=reg_shape.dim_size(1);
257 | 	int feature_w=reg_shape.dim_size(2);
258 | 
259 | 
260 | 	std::vector<face_box> candidate_boxes;
261 | 
262 | 	const tensorflow::StringPiece conf_piece=output_tensor[1].tensor_data();
263 | 	const tensorflow::StringPiece reg_piece=output_tensor[0].tensor_data();
264 | 
265 | 
266 | 	const float * conf_data=(const float *)conf_piece.data();
267 | 	int conf_size=feature_h*feature_w*2;
268 | 	const float * reg_data=(const float *)reg_piece.data();
269 | 
270 | 	generate_bounding_box_tf(conf_data,conf_size,reg_data, 
271 | 			scale,pnet_threshold,feature_h,feature_w,candidate_boxes,true);
272 | 
273 | 
274 | 	nms_boxes(candidate_boxes, 0.5, NMS_UNION,box_list);
275 | 
276 | }
277 | 
278 | 
279 | 
280 | void copy_one_patch(const cv::Mat& img,face_box&input_box,float * data_to, int height, int width)
281 | {
282 | 	cv::Mat resized(height,width,CV_32FC3,data_to);
283 | 
284 | 
285 | 	cv::Mat chop_img = img(cv::Range(input_box.py0,input_box.py1),
286 | 			cv::Range(input_box.px0, input_box.px1));
287 | 
288 | 	int pad_top = std::abs(input_box.py0 - input_box.y0);
289 | 	int pad_left = std::abs(input_box.px0 - input_box.x0);
290 | 	int pad_bottom = std::abs(input_box.py1 - input_box.y1);
291 | 	int pad_right = std::abs(input_box.px1-input_box.x1);
292 | 
293 | 	cv::copyMakeBorder(chop_img, chop_img, pad_top, pad_bottom,pad_left, pad_right,  cv::BORDER_CONSTANT, cv::Scalar(0));
294 | 
295 | 	cv::resize(chop_img,resized, cv::Size(width, height), 0, 0);
296 | }
297 | 
298 | 
299 | void run_RNet(std::unique_ptr<tensorflow::Session>& sess, cv::Mat& img, std::vector<face_box>& pnet_boxes, std::vector<face_box>& output_boxes)
300 | {
301 | 	int batch=pnet_boxes.size();
302 | 	int channel = 3;
303 | 	int height = 24;
304 | 	int width = 24;
305 | 
306 | 	float rnet_threshold=0.7;
307 | 
308 | 	/* prepare input image data */
309 | 
310 | 	int  input_size=batch*height*width*channel;
311 | 
312 | 	std::vector<float> input_buffer(input_size);
313 | 
314 | 	float * input_data=input_buffer.data();
315 | 
316 | 	for(int i=0;i<batch;i++)
317 | 	{
318 | 		int patch_size=width*height*channel;
319 | 
320 | 		copy_one_patch(img,pnet_boxes[i], input_data,height,width);
321 | 
322 | 		input_data+=patch_size;
323 | 	}
324 | 
325 | 
326 | 	/* tensorflow  related */
327 | 	const int64_t dim[4] = {batch,height,width,channel};
328 | 
329 | 
330 | 	my_buffer tensor_buf;
331 | 
332 | 	tensor_buf.data_=input_buffer.data();
333 | 	tensor_buf.len_=input_size;
334 | 
335 | 
336 | 	std::vector<tensorflow::int64> tensor_dim;
337 | 
338 | 	for(int i=0;i<4;i++)
339 | 		tensor_dim.push_back(dim[i]);
340 | 
341 | 	Tensor input_tensor=tensorflow::TensorCApi::create_tensor(DT_FLOAT,TensorShape(tensor_dim), &tensor_buf);
342 | 
343 | 	std::vector<Tensor> output_tensor;
344 | 
345 | 
346 | 	std::vector<std::pair<string, Tensor> > input_tname;
347 | 
348 | 	std::pair<string,Tensor> input0("rnet/input:0",input_tensor);
349 | 	input_tname.push_back(input0);
350 | 
351 | 	std::vector<string> output_tname;
352 | 
353 | 	output_tname.push_back("rnet/conv5-2/conv5-2:0");
354 | 	output_tname.push_back("rnet/prob1:0");
355 | 
356 | 	std::vector<string> output_node;
357 | 
358 | 	Status run_status = sess->Run(input_tname,output_tname,output_node,&output_tensor);
359 | 
360 | 	if(!run_status.ok())
361 | 	{
362 | 		std::cerr<<"run PNet error"<<std::endl;
363 | 		return;
364 | 	}
365 | 
366 | 	/*retrieval the forward results*/
367 | 
368 | 	const tensorflow::StringPiece conf_piece=output_tensor[1].tensor_data();
369 | 	const tensorflow::StringPiece reg_piece=output_tensor[0].tensor_data();
370 | 
371 | 	const float * conf_data=(const float *)conf_piece.data();
372 | 	const float * reg_data=(const float *)reg_piece.data();
373 | 
374 | 	for(int i=0;i<batch;i++)
375 | 	{
376 | 
377 | 		if(conf_data[1]>rnet_threshold)
378 | 		{
379 | 			face_box output_box;
380 | 
381 | 			face_box& input_box=pnet_boxes[i];
382 | 
383 | 			output_box.x0=input_box.x0;
384 | 			output_box.y0=input_box.y0;
385 | 			output_box.x1=input_box.x1;
386 | 			output_box.y1=input_box.y1;
387 | 
388 | 			output_box.score = *(conf_data+1);
389 | 
390 | 			/*Note: regress's value is swaped here!!!*/
391 | 
392 | 			output_box.regress[0]=reg_data[1];
393 | 			output_box.regress[1]=reg_data[0];
394 | 			output_box.regress[2]=reg_data[3];
395 | 			output_box.regress[3]=reg_data[2];
396 | 
397 | 			output_boxes.push_back(output_box);
398 | 
399 | 
400 | 		}
401 | 
402 | 		conf_data+=2;
403 | 		reg_data+=4;
404 | 
405 | 	}
406 | 
407 | }
408 | 
409 | void run_ONet(std::unique_ptr<tensorflow::Session>& sess, cv::Mat& img, std::vector<face_box>& rnet_boxes, std::vector<face_box>& output_boxes)
410 | {
411 | 	int batch=rnet_boxes.size();
412 | 	int channel = 3;
413 | 	int height = 48;
414 | 	int width = 48;
415 | 
416 | 	float onet_threshold=0.9;
417 | 
418 | 	/* prepare input image data */
419 | 
420 | 	int  input_size=batch*height*width*channel;
421 | 
422 | 	std::vector<float> input_buffer(input_size);
423 | 
424 | 	float * input_data=input_buffer.data();
425 | 
426 | 	for(int i=0;i<batch;i++)
427 | 	{
428 | 		int patch_size=width*height*channel;
429 | 
430 | 		copy_one_patch(img,rnet_boxes[i], input_data,height,width);
431 | 
432 | 		input_data+=patch_size;
433 | 	}
434 | 
435 | 
436 | 	/* tensorflow  related */
437 | 	const int64_t dim[4] = {batch,height,width,channel};
438 | 
439 | 
440 | 	my_buffer tensor_buf;
441 | 
442 | 	tensor_buf.data_=input_buffer.data();
443 | 	tensor_buf.len_=input_size;
444 | 
445 | 
446 | 	std::vector<tensorflow::int64> tensor_dim;
447 | 
448 | 	for(int i=0;i<4;i++)
449 | 		tensor_dim.push_back(dim[i]);
450 | 
451 | 	Tensor input_tensor=tensorflow::TensorCApi::create_tensor(DT_FLOAT,TensorShape(tensor_dim), &tensor_buf);
452 | 
453 | 	std::vector<Tensor> output_tensor;
454 | 
455 | 
456 | 	std::vector<std::pair<string, Tensor> > input_tname;
457 | 
458 | 	std::pair<string,Tensor> input0("onet/input:0",input_tensor);
459 | 	input_tname.push_back(input0);
460 | 
461 | 	std::vector<string> output_tname;
462 | 
463 | 	output_tname.push_back("onet/conv6-2/conv6-2:0");
464 | 	output_tname.push_back("onet/conv6-3/conv6-3:0");
465 | 	output_tname.push_back("onet/prob1:0");
466 | 
467 | 	std::vector<string> output_node;
468 | 
469 | 	Status run_status = sess->Run(input_tname,output_tname,output_node,&output_tensor);
470 | 
471 | 	if(!run_status.ok())
472 | 	{
473 | 		std::cerr<<"run PNet error"<<std::endl;
474 | 		return;
475 | 	}
476 | 
477 | 	/*retrieval the forward results*/
478 | 
479 | 	const tensorflow::StringPiece conf_piece=output_tensor[2].tensor_data();
480 | 	const tensorflow::StringPiece reg_piece=output_tensor[0].tensor_data();
481 | 	const tensorflow::StringPiece points_piece=output_tensor[1].tensor_data();
482 | 
483 | 	const float * conf_data=(const float *)conf_piece.data();
484 | 	const float * reg_data=(const float *)reg_piece.data();
485 | 	const float * points_data=(const float *)points_piece.data();
486 | 
487 | 	for(int i=0;i<batch;i++)
488 | 	{
489 | 
490 | 		if(conf_data[1]>onet_threshold)
491 | 		{
492 | 			face_box output_box;
493 | 
494 | 			face_box& input_box=rnet_boxes[i];
495 | 
496 | 			output_box.x0=input_box.x0;
497 | 			output_box.y0=input_box.y0;
498 | 			output_box.x1=input_box.x1;
499 | 			output_box.y1=input_box.y1;
500 | 
501 | 			output_box.score = conf_data[1];
502 | 
503 | 			output_box.regress[0]=reg_data[1];
504 | 			output_box.regress[1]=reg_data[0];
505 | 			output_box.regress[2]=reg_data[3];
506 | 			output_box.regress[3]=reg_data[2];
507 | 
508 | 			/*Note: switched x,y points value too..*/
509 | 			for (int j = 0; j<5; j++){
510 | 				output_box.landmark.x[j] = *(points_data + j+5);
511 | 				output_box.landmark.y[j] = *(points_data + j);
512 | 			}
513 | 
514 | 			output_boxes.push_back(output_box);
515 | 
516 | 
517 | 		}
518 | 
519 | 		conf_data+=2;
520 | 		reg_data+=4;
521 | 		points_data+=10;
522 | 	}
523 | 
524 | }
525 | 
526 | void mtcnn_detect(std::unique_ptr<tensorflow::Session>& sess, cv::Mat& img, std::vector<face_box>& face_list)
527 | {
528 | 	cv::Mat working_img;
529 | 
530 | 	float alpha=0.0078125;
531 | 	float mean=127.5;
532 | 
533 | 
534 | 
535 | 	img.convertTo(working_img, CV_32FC3);
536 | 
537 | 	working_img=(working_img-mean)*alpha;
538 | 
539 | 	working_img=working_img.t();
540 | 
541 | 	cv::cvtColor(working_img,working_img, cv::COLOR_BGR2RGB);
542 | 
543 | 	int img_h=working_img.rows;
544 | 	int img_w=working_img.cols;
545 | 
546 | 
547 | 	int min_size=40;
548 | 	float factor=0.709;
549 | 
550 | 
551 | 	std::vector<scale_window> win_list;
552 | 
553 | 	std::vector<face_box> total_pnet_boxes;
554 | 	std::vector<face_box> total_rnet_boxes;
555 | 	std::vector<face_box> total_onet_boxes;
556 | 
557 | 
558 | 	cal_pyramid_list(img_h,img_w,min_size,factor,win_list);
559 | 
560 | 	for(unsigned int i=0;i<win_list.size();i++)
561 | 	{
562 | 		std::vector<face_box>boxes;
563 | 
564 | 		run_PNet(sess,working_img,win_list[i],boxes);
565 | 
566 | 		total_pnet_boxes.insert(total_pnet_boxes.end(),boxes.begin(),boxes.end());
567 | 	}
568 | 
569 | 
570 | 	std::vector<face_box> pnet_boxes;
571 | 	process_boxes(total_pnet_boxes,img_h,img_w,pnet_boxes);
572 | 
573 | 
574 | 	// RNet
575 | 	std::vector<face_box> rnet_boxes;
576 | 
577 | 	run_RNet(sess,working_img, pnet_boxes,total_rnet_boxes);
578 | 
579 | 	process_boxes(total_rnet_boxes,img_h,img_w,rnet_boxes);
580 | 
581 | 
582 | 	//ONet
583 | 	run_ONet(sess,working_img, rnet_boxes,total_onet_boxes);
584 | 
585 | 	//calculate the landmark
586 | 
587 | 	for(unsigned int i=0;i<total_onet_boxes.size();i++)
588 | 	{
589 | 		face_box& box=total_onet_boxes[i];
590 | 
591 | 		float h=box.x1-box.x0+1;
592 | 		float w=box.y1-box.y0+1;
593 | 
594 | 		for(int j=0;j<5;j++)
595 | 		{
596 | 			box.landmark.x[j]=box.x0+w*box.landmark.x[j]-1;
597 | 			box.landmark.y[j]=box.y0+h*box.landmark.y[j]-1;
598 | 		}
599 | 
600 | 	}
601 | 
602 | 
603 | 	//Get Final Result
604 | 	regress_boxes(total_onet_boxes);
605 | 	nms_boxes(total_onet_boxes, 0.7, NMS_MIN,face_list);
606 | 
607 | 	//switch x and y, since working_img is transposed
608 | 
609 | 	for(unsigned int i=0;i<face_list.size();i++)
610 | 	{
611 | 		face_box& box=face_list[i];
612 | 
613 | 		std::swap(box.x0,box.y0);
614 | 		std::swap(box.x1,box.y1);
615 | 
616 | 		for(int l=0;l<5;l++)
617 | 		{
618 | 			std::swap(box.landmark.x[l],box.landmark.y[l]);
619 | 		}
620 | 	}
621 | 
622 | 
623 | }
624 | 
625 | 
626 | 
627 | int main(int argc, char* argv[]) {
628 | 	string image = "./test.jpg";
629 | 	string graph =
630 | 		"./models/mtcnn_frozen_model.pb";
631 | 
632 |         string output_fname="./new.jpg";
633 | 
634 | 	string root_dir = "";
635 | 	std::vector<Flag> flag_list = {
636 | 		Flag("image", &image, "image to be processed"),
637 | 		Flag("graph", &graph, "graph to be executed"),
638 | 		Flag("output", &graph, "image with face boxed"),
639 | 	};
640 | 
641 | 	string usage = tensorflow::Flags::Usage(argv[0], flag_list);
642 | 	const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
643 | 	if (!parse_result) {
644 | 		LOG(ERROR) << usage;
645 | 		return -1;
646 | 	}
647 | 
648 | 	// We need to call this to set up global state for TensorFlow.
649 | 	tensorflow::port::InitMain(argv[0], &argc, &argv);
650 | 	if (argc > 1) {
651 | 		LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
652 | 		return -1;
653 | 	}
654 | 
655 | 	// First we load and initialize the model.
656 | 	std::unique_ptr<tensorflow::Session> session;
657 | 	string graph_path = tensorflow::io::JoinPath(root_dir, graph);
658 | 	Status load_graph_status = LoadGraph(graph_path, &session);
659 | 	if (!load_graph_status.ok()) {
660 | 		LOG(ERROR) << load_graph_status;
661 | 		return -1;
662 | 	}
663 | 
664 | 	//Load image
665 | 
666 | 	cv::Mat frame = cv::imread(image);
667 | 
668 | 	if(!frame.data)
669 | 	{
670 | 		std::cerr<<"failed to read image file: "<<image<<std::endl;
671 | 		return 1;
672 | 	}
673 | 
674 | 
675 | 	std::vector<face_box> face_info;
676 | 
677 | 	unsigned long start_time=get_cur_time();
678 | 
679 | 	mtcnn_detect(session,frame,face_info);
680 | 
681 | 	unsigned long end_time=get_cur_time();
682 | 
683 | 	int save_chop=0;
684 | 
685 | 
686 | 	for(unsigned int i=0;i<face_info.size();i++)
687 | 	{
688 | 		face_box& box=face_info[i];
689 | 
690 | 		printf("face %d: x0,y0 %2.5f %2.5f  x1,y1 %2.5f  %2.5f conf: %2.5f\n",i,
691 | 				box.x0,box.y0,box.x1,box.y1, box.score);
692 | 		printf("landmark: ");
693 | 
694 | 		for(unsigned int j=0;j<5;j++)
695 | 			printf(" (%2.5f %2.5f)",box.landmark.x[j], box.landmark.y[j]);
696 | 
697 | 		printf("\n");
698 | 
699 | 
700 | 		if(save_chop)
701 | 		{
702 | 
703 | 			cv::Mat corp_img=frame(cv::Range(box.y0,box.y1),
704 | 					cv::Range(box.x0,box.x1));
705 | 
706 | 			char title[128];
707 | 
708 | 			sprintf(title,"id%d.jpg",i);
709 | 
710 | 			cv::imwrite(title,corp_img);
711 | 		}
712 | 
713 | 		/*draw box */
714 | 
715 | 		cv::rectangle(frame, cv::Point(box.x0, box.y0), cv::Point(box.x1, box.y1), cv::Scalar(0, 255, 0), 1);
716 | 
717 | 
718 | 		/* draw landmark */
719 | 
720 | 		for(int l=0;l<5;l++)
721 | 		{
722 | 			cv::circle(frame,cv::Point(box.landmark.x[l],box.landmark.y[l]),1,cv::Scalar(0, 0, 255),2);
723 | 
724 | 		}
725 | 	}
726 | 
727 | 	cv::imwrite(output_fname,frame);
728 | 
729 | 	std::cout<<"total detected: "<<face_info.size()<<" faces. used "<<(end_time-start_time)<<" us"<<std::endl;
730 | 	std::cout<<"boxed faces are in file: "<<output_fname<<std::endl;
731 | 
732 | 	return 0;
733 | }
734 | 
735 | 
736 | 
737 | 
738 | 
739 | 
740 | 
741 | 


--------------------------------------------------------------------------------
/detect_face.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow implementation of the face detection / alignment algorithm found at
  2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
  3 | """
  4 | # MIT License
  5 | # 
  6 | # Copyright (c) 2016 David Sandberg
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | from six import string_types, iteritems
 30 | 
 31 | import numpy as np
 32 | import tensorflow as tf
 33 | #from math import floor
 34 | import cv2
 35 | import os
 36 | 
 37 | def layer(op):
 38 |     '''Decorator for composable network layers.'''
 39 | 
 40 |     def layer_decorated(self, *args, **kwargs):
 41 |         # Automatically set a name if not provided.
 42 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 43 |         # Figure out the layer inputs.
 44 |         if len(self.terminals) == 0:
 45 |             raise RuntimeError('No input variables found for layer %s.' % name)
 46 |         elif len(self.terminals) == 1:
 47 |             layer_input = self.terminals[0]
 48 |         else:
 49 |             layer_input = list(self.terminals)
 50 |         # Perform the operation and get the output.
 51 |         layer_output = op(self, layer_input, *args, **kwargs)
 52 |         # Add to layer LUT.
 53 |         self.layers[name] = layer_output
 54 |         # This output is now the input for the next layer.
 55 |         self.feed(layer_output)
 56 |         # Return self for chained calls.
 57 |         return self
 58 | 
 59 |     return layer_decorated
 60 | 
 61 | class Network(object):
 62 | 
 63 |     def __init__(self, inputs, trainable=True):
 64 |         # The input nodes for this network
 65 |         self.inputs = inputs
 66 |         # The current list of terminal nodes
 67 |         self.terminals = []
 68 |         # Mapping from layer names to layers
 69 |         self.layers = dict(inputs)
 70 |         # If true, the resulting variables are set as trainable
 71 |         self.trainable = trainable
 72 | 
 73 |         self.setup()
 74 | 
 75 |     def setup(self):
 76 |         '''Construct the network. '''
 77 |         raise NotImplementedError('Must be implemented by the subclass.')
 78 | 
 79 |     def load(self, data_path, session, ignore_missing=False):
 80 |         '''Load network weights.
 81 |         data_path: The path to the numpy-serialized network weights
 82 |         session: The current TensorFlow session
 83 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 84 |         '''
 85 |         data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
 86 | 
 87 |         for op_name in data_dict:
 88 |             with tf.variable_scope(op_name, reuse=True):
 89 |                 for param_name, data in iteritems(data_dict[op_name]):
 90 |                     try:
 91 |                         var = tf.get_variable(param_name)
 92 |                         session.run(var.assign(data))
 93 |                     except ValueError:
 94 |                         if not ignore_missing:
 95 |                             raise
 96 | 
 97 |     def feed(self, *args):
 98 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 99 |         The arguments can be either layer names or the actual layers.
100 |         '''
101 |         assert len(args) != 0
102 |         self.terminals = []
103 |         for fed_layer in args:
104 |             if isinstance(fed_layer, string_types):
105 |                 try:
106 |                     fed_layer = self.layers[fed_layer]
107 |                 except KeyError:
108 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
109 |             self.terminals.append(fed_layer)
110 |         return self
111 | 
112 |     def get_output(self):
113 |         '''Returns the current network output.'''
114 |         return self.terminals[-1]
115 | 
116 |     def get_unique_name(self, prefix):
117 |         '''Returns an index-suffixed unique name for the given prefix.
118 |         This is used for auto-generating layer names based on the type-prefix.
119 |         '''
120 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
121 |         return '%s_%d' % (prefix, ident)
122 | 
123 |     def make_var(self, name, shape):
124 |         '''Creates a new TensorFlow variable.'''
125 |         return tf.get_variable(name, shape, trainable=self.trainable)
126 | 
127 |     def validate_padding(self, padding):
128 |         '''Verifies that the padding is one of the supported ones.'''
129 |         assert padding in ('SAME', 'VALID')
130 | 
131 |     @layer
132 |     def conv(self,
133 |              inp,
134 |              k_h,
135 |              k_w,
136 |              c_o,
137 |              s_h,
138 |              s_w,
139 |              name,
140 |              relu=True,
141 |              padding='SAME',
142 |              group=1,
143 |              biased=True):
144 |         # Verify that the padding is acceptable
145 |         self.validate_padding(padding)
146 |         # Get the number of channels in the input
147 |         c_i = int(inp.get_shape()[-1])
148 |         # Verify that the grouping parameter is valid
149 |         assert c_i % group == 0
150 |         assert c_o % group == 0
151 |         # Convolution for a given input and kernel
152 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
153 |         with tf.variable_scope(name) as scope:
154 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
155 |             # This is the common-case. Convolve the input without any further complications.
156 |             output = convolve(inp, kernel)
157 |             # Add the biases
158 |             if biased:
159 |                 biases = self.make_var('biases', [c_o])
160 |                 output = tf.nn.bias_add(output, biases)
161 |             if relu:
162 |                 # ReLU non-linearity
163 |                 output = tf.nn.relu(output, name=scope.name)
164 |             return output
165 | 
166 |     @layer
167 |     def prelu(self, inp, name):
168 |         with tf.variable_scope(name):
169 |             i = int(inp.get_shape()[-1])
170 |             alpha = self.make_var('alpha', shape=(i,))
171 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
172 |         return output
173 | 
174 |     @layer
175 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
176 |         self.validate_padding(padding)
177 |         return tf.nn.max_pool(inp,
178 |                               ksize=[1, k_h, k_w, 1],
179 |                               strides=[1, s_h, s_w, 1],
180 |                               padding=padding,
181 |                               name=name)
182 | 
183 |     @layer
184 |     def fc(self, inp, num_out, name, relu=True):
185 |         with tf.variable_scope(name):
186 |             input_shape = inp.get_shape()
187 |             if input_shape.ndims == 4:
188 |                 # The input is spatial. Vectorize it first.
189 |                 dim = 1
190 |                 for d in input_shape[1:].as_list():
191 |                     dim *= int(d)
192 |                 feed_in = tf.reshape(inp, [-1, dim])
193 |             else:
194 |                 feed_in, dim = (inp, input_shape[-1].value)
195 |             weights = self.make_var('weights', shape=[dim, num_out])
196 |             biases = self.make_var('biases', [num_out])
197 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
198 |             fc = op(feed_in, weights, biases, name=name)
199 |             return fc
200 | 
201 | 
202 |     """
203 |     Multi dimensional softmax,
204 |     refer to https://github.com/tensorflow/tensorflow/issues/210
205 |     compute softmax along the dimension of target
206 |     the native softmax only supports batch_size x dimension
207 |     """
208 |     @layer
209 |     def softmax(self, target, axis, name=None):
210 |         max_axis = tf.reduce_max(target, axis, keep_dims=True)
211 |         target_exp = tf.exp(target-max_axis)
212 |         normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
213 |         softmax = tf.div(target_exp, normalize, name)
214 |         return softmax
215 |     
216 | class PNet(Network):
217 |     def setup(self):
218 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
219 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
220 |              .prelu(name='PReLU1')
221 |              .max_pool(2, 2, 2, 2, name='pool1')
222 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
223 |              .prelu(name='PReLU2')
224 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
225 |              .prelu(name='PReLU3')
226 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
227 |              .softmax(3,name='prob1'))
228 | 
229 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
230 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
231 |         
232 | class RNet(Network):
233 |     def setup(self):
234 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
235 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
236 |              .prelu(name='prelu1')
237 |              .max_pool(3, 3, 2, 2, name='pool1')
238 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
239 |              .prelu(name='prelu2')
240 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
241 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
242 |              .prelu(name='prelu3')
243 |              .fc(128, relu=False, name='conv4')
244 |              .prelu(name='prelu4')
245 |              .fc(2, relu=False, name='conv5-1')
246 |              .softmax(1,name='prob1'))
247 | 
248 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
249 |              .fc(4, relu=False, name='conv5-2'))
250 | 
251 | class ONet(Network):
252 |     def setup(self):
253 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
254 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
255 |              .prelu(name='prelu1')
256 |              .max_pool(3, 3, 2, 2, name='pool1')
257 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
258 |              .prelu(name='prelu2')
259 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
260 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
261 |              .prelu(name='prelu3')
262 |              .max_pool(2, 2, 2, 2, name='pool3')
263 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
264 |              .prelu(name='prelu4')
265 |              .fc(256, relu=False, name='conv5')
266 |              .prelu(name='prelu5')
267 |              .fc(2, relu=False, name='conv6-1')
268 |              .softmax(1, name='prob1'))
269 | 
270 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
271 |              .fc(4, relu=False, name='conv6-2'))
272 | 
273 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
274 |              .fc(10, relu=False, name='conv6-3'))
275 | 
276 | def create_mtcnn(sess, model_path):
277 |     if not model_path:
278 |         model_path,_ = os.path.split(os.path.realpath(__file__))
279 | 
280 |     with tf.variable_scope('pnet'):
281 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
282 |         pnet = PNet({'data':data})
283 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
284 |     with tf.variable_scope('rnet'):
285 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
286 |         rnet = RNet({'data':data})
287 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
288 |     with tf.variable_scope('onet'):
289 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
290 |         onet = ONet({'data':data})
291 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
292 |         
293 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
294 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
295 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
296 |     return pnet_fun, rnet_fun, onet_fun
297 | 
298 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
299 |     # im: input image
300 |     # minsize: minimum of faces' size
301 |     # pnet, rnet, onet: caffemodel
302 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
303 |     # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
304 |     factor_count=0
305 |     total_boxes=np.empty((0,9))
306 |     points=[]
307 |     h=img.shape[0]
308 |     w=img.shape[1]
309 |     minl=np.amin([h, w])
310 |     m=12.0/minsize
311 |     minl=minl*m
312 |     # creat scale pyramid
313 |     scales=[]
314 |     while minl>=12:
315 |         scales += [m*np.power(factor, factor_count)]
316 |         minl = minl*factor
317 |         factor_count += 1
318 | 
319 |     # first stage
320 |     for j in range(len(scales)):
321 |         scale=scales[j]
322 |         hs=int(np.ceil(h*scale))
323 |         ws=int(np.ceil(w*scale))
324 |         im_data = imresample(img, (hs, ws))
325 |         im_data = (im_data-127.5)*0.0078125
326 |         img_x = np.expand_dims(im_data, 0)
327 |         img_y = np.transpose(img_x, (0,2,1,3))
328 |         out = pnet(img_y)
329 |         out0 = np.transpose(out[0], (0,2,1,3))
330 |         out1 = np.transpose(out[1], (0,2,1,3))
331 |         
332 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
333 |         
334 |         # inter-scale nms
335 |         pick = nms(boxes.copy(), 0.5, 'Union')
336 |         if boxes.size>0 and pick.size>0:
337 |             boxes = boxes[pick,:]
338 |             total_boxes = np.append(total_boxes, boxes, axis=0)
339 | 
340 |     numbox = total_boxes.shape[0]
341 |     if numbox>0:
342 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
343 |         total_boxes = total_boxes[pick,:]
344 |         regw = total_boxes[:,2]-total_boxes[:,0]
345 |         regh = total_boxes[:,3]-total_boxes[:,1]
346 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
347 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
348 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
349 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
350 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
351 |         total_boxes = rerec(total_boxes.copy())
352 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
353 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
354 | 
355 |     numbox = total_boxes.shape[0]
356 |     if numbox>0:
357 |         # second stage
358 |         tempimg = np.zeros((24,24,3,numbox))
359 |         for k in range(0,numbox):
360 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
361 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
362 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
363 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
364 |             else:
365 |                 return np.empty()
366 |         tempimg = (tempimg-127.5)*0.0078125
367 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
368 |         out = rnet(tempimg1)
369 |         out0 = np.transpose(out[0])
370 |         out1 = np.transpose(out[1])
371 |         score = out1[1,:]
372 |         ipass = np.where(score>threshold[1])
373 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
374 |         mv = out0[:,ipass[0]]
375 |         if total_boxes.shape[0]>0:
376 |             pick = nms(total_boxes, 0.7, 'Union')
377 |             total_boxes = total_boxes[pick,:]
378 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
379 |             total_boxes = rerec(total_boxes.copy())
380 | 
381 |     numbox = total_boxes.shape[0]
382 |     if numbox>0:
383 |         # third stage
384 |         total_boxes = np.fix(total_boxes).astype(np.int32)
385 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
386 |         tempimg = np.zeros((48,48,3,numbox))
387 |         for k in range(0,numbox):
388 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
389 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
390 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
391 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
392 |             else:
393 |                 return np.empty()
394 |         tempimg = (tempimg-127.5)*0.0078125
395 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
396 |         out = onet(tempimg1)
397 |         out0 = np.transpose(out[0])
398 |         out1 = np.transpose(out[1])
399 |         out2 = np.transpose(out[2])
400 |         score = out2[1,:]
401 |         points = out1
402 |         ipass = np.where(score>threshold[2])
403 |         points = points[:,ipass[0]]
404 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
405 |         mv = out0[:,ipass[0]]
406 | 
407 |         w = total_boxes[:,2]-total_boxes[:,0]+1
408 |         h = total_boxes[:,3]-total_boxes[:,1]+1
409 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
410 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
411 |         if total_boxes.shape[0]>0:
412 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
413 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
414 |             total_boxes = total_boxes[pick,:]
415 |             points = points[:,pick]
416 |                 
417 |     return total_boxes, points
418 | 
419 | 
420 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
421 |     # im: input image
422 |     # minsize: minimum of faces' size
423 |     # pnet, rnet, onet: caffemodel
424 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
425 | 
426 |     all_scales = [None] * len(images)
427 |     images_with_boxes = [None] * len(images)
428 | 
429 |     for i in range(len(images)):
430 |         images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
431 | 
432 |     # create scale pyramid
433 |     for index, img in enumerate(images):
434 |         all_scales[index] = []
435 |         h = img.shape[0]
436 |         w = img.shape[1]
437 |         minsize = int(detection_window_size_ratio * np.minimum(w, h))
438 |         factor_count = 0
439 |         minl = np.amin([h, w])
440 |         if minsize <= 12:
441 |             minsize = 12
442 | 
443 |         m = 12.0 / minsize
444 |         minl = minl * m
445 |         while minl >= 12:
446 |             all_scales[index].append(m * np.power(factor, factor_count))
447 |             minl = minl * factor
448 |             factor_count += 1
449 | 
450 |     # # # # # # # # # # # # #
451 |     # first stage - fast proposal network (pnet) to obtain face candidates
452 |     # # # # # # # # # # # # #
453 | 
454 |     images_obj_per_resolution = {}
455 | 
456 |     # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
457 | 
458 |     for index, scales in enumerate(all_scales):
459 |         h = images[index].shape[0]
460 |         w = images[index].shape[1]
461 | 
462 |         for scale in scales:
463 |             hs = int(np.ceil(h * scale))
464 |             ws = int(np.ceil(w * scale))
465 | 
466 |             if (ws, hs) not in images_obj_per_resolution:
467 |                 images_obj_per_resolution[(ws, hs)] = []
468 | 
469 |             im_data = imresample(images[index], (hs, ws))
470 |             im_data = (im_data - 127.5) * 0.0078125
471 |             img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
472 |             images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
473 | 
474 |     for resolution in images_obj_per_resolution:
475 |         images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
476 |         outs = pnet(images_per_resolution)
477 | 
478 |         for index in range(len(outs[0])):
479 |             scale = images_obj_per_resolution[resolution][index]['scale']
480 |             image_index = images_obj_per_resolution[resolution][index]['index']
481 |             out0 = np.transpose(outs[0][index], (1, 0, 2))
482 |             out1 = np.transpose(outs[1][index], (1, 0, 2))
483 | 
484 |             boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
485 | 
486 |             # inter-scale nms
487 |             pick = nms(boxes.copy(), 0.5, 'Union')
488 |             if boxes.size > 0 and pick.size > 0:
489 |                 boxes = boxes[pick, :]
490 |                 images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
491 |                                                                           boxes,
492 |                                                                           axis=0)
493 | 
494 |     for index, image_obj in enumerate(images_with_boxes):
495 |         numbox = image_obj['total_boxes'].shape[0]
496 |         if numbox > 0:
497 |             h = images[index].shape[0]
498 |             w = images[index].shape[1]
499 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
500 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
501 |             regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
502 |             regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
503 |             qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
504 |             qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
505 |             qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
506 |             qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
507 |             image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
508 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
509 |             image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
510 |             dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
511 | 
512 |             numbox = image_obj['total_boxes'].shape[0]
513 |             tempimg = np.zeros((24, 24, 3, numbox))
514 | 
515 |             if numbox > 0:
516 |                 for k in range(0, numbox):
517 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
518 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
519 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
520 |                         tempimg[:, :, :, k] = imresample(tmp, (24, 24))
521 |                     else:
522 |                         return np.empty()
523 | 
524 |                 tempimg = (tempimg - 127.5) * 0.0078125
525 |                 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
526 | 
527 |     # # # # # # # # # # # # #
528 |     # second stage - refinement of face candidates with rnet
529 |     # # # # # # # # # # # # #
530 | 
531 |     bulk_rnet_input = np.empty((0, 24, 24, 3))
532 |     for index, image_obj in enumerate(images_with_boxes):
533 |         if 'rnet_input' in image_obj:
534 |             bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
535 | 
536 |     out = rnet(bulk_rnet_input)
537 |     out0 = np.transpose(out[0])
538 |     out1 = np.transpose(out[1])
539 |     score = out1[1, :]
540 | 
541 |     i = 0
542 |     for index, image_obj in enumerate(images_with_boxes):
543 |         if 'rnet_input' not in image_obj:
544 |             continue
545 | 
546 |         rnet_input_count = image_obj['rnet_input'].shape[0]
547 |         score_per_image = score[i:i + rnet_input_count]
548 |         out0_per_image = out0[:, i:i + rnet_input_count]
549 | 
550 |         ipass = np.where(score_per_image > threshold[1])
551 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
552 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
553 | 
554 |         mv = out0_per_image[:, ipass[0]]
555 | 
556 |         if image_obj['total_boxes'].shape[0] > 0:
557 |             h = images[index].shape[0]
558 |             w = images[index].shape[1]
559 |             pick = nms(image_obj['total_boxes'], 0.7, 'Union')
560 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
561 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
562 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
563 | 
564 |             numbox = image_obj['total_boxes'].shape[0]
565 | 
566 |             if numbox > 0:
567 |                 tempimg = np.zeros((48, 48, 3, numbox))
568 |                 image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
569 |                 dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
570 | 
571 |                 for k in range(0, numbox):
572 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
573 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
574 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
575 |                         tempimg[:, :, :, k] = imresample(tmp, (48, 48))
576 |                     else:
577 |                         return np.empty()
578 |                 tempimg = (tempimg - 127.5) * 0.0078125
579 |                 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
580 | 
581 |         i += rnet_input_count
582 | 
583 |     # # # # # # # # # # # # #
584 |     # third stage - further refinement and facial landmarks positions with onet
585 |     # # # # # # # # # # # # #
586 | 
587 |     bulk_onet_input = np.empty((0, 48, 48, 3))
588 |     for index, image_obj in enumerate(images_with_boxes):
589 |         if 'onet_input' in image_obj:
590 |             bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
591 | 
592 |     out = onet(bulk_onet_input)
593 | 
594 |     out0 = np.transpose(out[0])
595 |     out1 = np.transpose(out[1])
596 |     out2 = np.transpose(out[2])
597 |     score = out2[1, :]
598 |     points = out1
599 | 
600 |     i = 0
601 |     ret = []
602 |     for index, image_obj in enumerate(images_with_boxes):
603 |         if 'onet_input' not in image_obj:
604 |             ret.append(None)
605 |             continue
606 | 
607 |         onet_input_count = image_obj['onet_input'].shape[0]
608 | 
609 |         out0_per_image = out0[:, i:i + onet_input_count]
610 |         score_per_image = score[i:i + onet_input_count]
611 |         points_per_image = points[:, i:i + onet_input_count]
612 | 
613 |         ipass = np.where(score_per_image > threshold[2])
614 |         points_per_image = points_per_image[:, ipass[0]]
615 | 
616 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
617 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
618 |         mv = out0_per_image[:, ipass[0]]
619 | 
620 |         w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
621 |         h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
622 |         points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
623 |             image_obj['total_boxes'][:, 0], (5, 1)) - 1
624 |         points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
625 |             image_obj['total_boxes'][:, 1], (5, 1)) - 1
626 | 
627 |         if image_obj['total_boxes'].shape[0] > 0:
628 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
629 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
630 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
631 |             points_per_image = points_per_image[:, pick]
632 | 
633 |             ret.append((image_obj['total_boxes'], points_per_image))
634 |         else:
635 |             ret.append(None)
636 | 
637 |         i += onet_input_count
638 | 
639 |     return ret
640 | 
641 | 
642 | # function [boundingbox] = bbreg(boundingbox,reg)
643 | def bbreg(boundingbox,reg):
644 |     # calibrate bounding boxes
645 |     if reg.shape[1]==1:
646 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
647 | 
648 |     w = boundingbox[:,2]-boundingbox[:,0]+1
649 |     h = boundingbox[:,3]-boundingbox[:,1]+1
650 |     b1 = boundingbox[:,0]+reg[:,0]*w
651 |     b2 = boundingbox[:,1]+reg[:,1]*h
652 |     b3 = boundingbox[:,2]+reg[:,2]*w
653 |     b4 = boundingbox[:,3]+reg[:,3]*h
654 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
655 |     return boundingbox
656 |  
657 | def generateBoundingBox(imap, reg, scale, t):
658 |     # use heatmap to generate bounding boxes
659 |     stride=2
660 |     cellsize=12
661 | 
662 |     imap = np.transpose(imap)
663 |     dx1 = np.transpose(reg[:,:,0])
664 |     dy1 = np.transpose(reg[:,:,1])
665 |     dx2 = np.transpose(reg[:,:,2])
666 |     dy2 = np.transpose(reg[:,:,3])
667 |     y, x = np.where(imap >= t)
668 |     if y.shape[0]==1:
669 |         dx1 = np.flipud(dx1)
670 |         dy1 = np.flipud(dy1)
671 |         dx2 = np.flipud(dx2)
672 |         dy2 = np.flipud(dy2)
673 |     score = imap[(y,x)]
674 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
675 |     if reg.size==0:
676 |         reg = np.empty((0,3))
677 |     bb = np.transpose(np.vstack([y,x]))
678 |     q1 = np.fix((stride*bb+1)/scale)
679 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
680 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
681 |     return boundingbox, reg
682 |  
683 | # function pick = nms(boxes,threshold,type)
684 | def nms(boxes, threshold, method):
685 |     if boxes.size==0:
686 |         return np.empty((0,3))
687 |     x1 = boxes[:,0]
688 |     y1 = boxes[:,1]
689 |     x2 = boxes[:,2]
690 |     y2 = boxes[:,3]
691 |     s = boxes[:,4]
692 |     area = (x2-x1+1) * (y2-y1+1)
693 |     I = np.argsort(s)
694 |     pick = np.zeros_like(s, dtype=np.int16)
695 |     counter = 0
696 |     while I.size>0:
697 |         i = I[-1]
698 |         pick[counter] = i
699 |         counter += 1
700 |         idx = I[0:-1]
701 |         xx1 = np.maximum(x1[i], x1[idx])
702 |         yy1 = np.maximum(y1[i], y1[idx])
703 |         xx2 = np.minimum(x2[i], x2[idx])
704 |         yy2 = np.minimum(y2[i], y2[idx])
705 |         w = np.maximum(0.0, xx2-xx1+1)
706 |         h = np.maximum(0.0, yy2-yy1+1)
707 |         inter = w * h
708 |         if method is 'Min':
709 |             o = inter / np.minimum(area[i], area[idx])
710 |         else:
711 |             o = inter / (area[i] + area[idx] - inter)
712 |         I = I[np.where(o<=threshold)]
713 |     pick = pick[0:counter]
714 |     return pick
715 | 
716 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
717 | def pad(total_boxes, w, h):
718 |     # compute the padding coordinates (pad the bounding boxes to square)
719 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
720 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
721 |     numbox = total_boxes.shape[0]
722 | 
723 |     dx = np.ones((numbox), dtype=np.int32)
724 |     dy = np.ones((numbox), dtype=np.int32)
725 |     edx = tmpw.copy().astype(np.int32)
726 |     edy = tmph.copy().astype(np.int32)
727 | 
728 |     x = total_boxes[:,0].copy().astype(np.int32)
729 |     y = total_boxes[:,1].copy().astype(np.int32)
730 |     ex = total_boxes[:,2].copy().astype(np.int32)
731 |     ey = total_boxes[:,3].copy().astype(np.int32)
732 | 
733 |     tmp = np.where(ex>w)
734 |     edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
735 |     ex[tmp] = w
736 |     
737 |     tmp = np.where(ey>h)
738 |     edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
739 |     ey[tmp] = h
740 | 
741 |     tmp = np.where(x<1)
742 |     dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
743 |     x[tmp] = 1
744 | 
745 |     tmp = np.where(y<1)
746 |     dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
747 |     y[tmp] = 1
748 |     
749 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
750 | 
751 | # function [bboxA] = rerec(bboxA)
752 | def rerec(bboxA):
753 |     # convert bboxA to square
754 |     h = bboxA[:,3]-bboxA[:,1]
755 |     w = bboxA[:,2]-bboxA[:,0]
756 |     l = np.maximum(w, h)
757 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
758 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
759 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
760 |     return bboxA
761 | 
762 | def imresample(img, sz):
763 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
764 |     return im_data
765 | 
766 |     # This method is kept for debugging purpose
767 | #     h=img.shape[0]
768 | #     w=img.shape[1]
769 | #     hs, ws = sz
770 | #     dx = float(w) / ws
771 | #     dy = float(h) / hs
772 | #     im_data = np.zeros((hs,ws,3))
773 | #     for a1 in range(0,hs):
774 | #         for a2 in range(0,ws):
775 | #             for a3 in range(0,3):
776 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
777 | #     return im_data
778 | 
779 | 


--------------------------------------------------------------------------------