├── models ├── __init__.py ├── net.py ├── net_slim.py └── retinaface.py ├── utils ├── __init__.py ├── nms │ ├── __init__.py │ └── py_cpu_nms.py └── timer.py ├── convertor ├── onnx2caffe │ ├── __init__.py │ ├── __init__.pyc │ ├── _graph.pyc │ ├── _operators.pyc │ ├── _error_utils.pyc │ ├── _transformers.pyc │ ├── _weightloader.pyc │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ ├── _error_utils.py │ └── _weightloader.py ├── convert_to_tf.py ├── convert_to_tflite.py ├── froze_graph_from_ckpt.py ├── convertCaffe.py ├── convert_to_onnx.py └── MyCaffe.py ├── .gitignore ├── Detector_cpp ├── Face_Detector_ncnn │ ├── .idea │ │ ├── .name │ │ ├── 1M_lib.iml │ │ ├── vcs.xml │ │ ├── misc.xml │ │ └── modules.xml │ ├── sample.jpg │ ├── model │ │ └── face.bin │ ├── tools │ │ └── onnx2ncnn │ ├── ncnn │ │ ├── lib │ │ │ ├── libncnn.a │ │ │ └── cmake │ │ │ │ └── ncnn │ │ │ │ ├── ncnnConfig.cmake │ │ │ │ ├── ncnn-release.cmake │ │ │ │ └── ncnn.cmake │ │ └── include │ │ │ └── ncnn │ │ │ ├── layer_type.h │ │ │ ├── benchmark.h │ │ │ ├── blob.h │ │ │ ├── layer_type_enum.h │ │ │ ├── cpu.h │ │ │ ├── paramdict.h │ │ │ ├── modelbin.h │ │ │ ├── pipeline.h │ │ │ ├── option.h │ │ │ ├── platform.h │ │ │ ├── layer.h │ │ │ ├── gpu.h │ │ │ ├── opencv.h │ │ │ ├── net.h │ │ │ └── command.h │ ├── CMakeLists.txt │ ├── FaceDetector.h │ ├── main_video.cpp │ └── main.cpp ├── Face_Detector_tf │ ├── sample.jpg │ ├── model │ │ ├── RFB.pb │ │ └── RFB-bak.pb │ ├── CMakeLists.txt │ ├── face_detector.h │ ├── main.cpp │ └── main_folder.cpp └── Face_Detector_tflite │ ├── sample.jpg │ ├── model │ ├── RFB.tflite │ └── RFB_640x480.tflite │ ├── alignment.h │ ├── alignment.cpp │ ├── CMakeLists.txt │ ├── face_detector.h │ ├── main.cpp │ └── main_folder.cpp ├── layers ├── __init__.py ├── modules │ ├── __init__.py │ └── multibox_loss.py └── functions │ └── prior_box.py ├── .gitattributes ├── img ├── 1.jpg └── sample.jpg ├── test ├── test.jpg ├── test_onnx.py ├── test_caffe.py ├── test_tf.py └── test_pytorch.py ├── weights ├── RBF_Final.pth ├── slim_Final.pth └── mobilenet0.25_Final.pth ├── data ├── __init__.py ├── config.py ├── wider_face.py └── data_augment.py ├── widerface_evaluate ├── ground_truth │ ├── wider_easy_val.mat │ ├── wider_face_val.mat │ ├── wider_hard_val.mat │ └── wider_medium_val.mat ├── setup.py ├── README.md └── box_overlaps.pyx ├── LICENSE ├── calculate_paremeter_flop.py ├── Detect_python ├── main.py ├── detect_pytorch.py └── detect_tf.py ├── train.py └── README.md /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/nms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /convertor/onnx2caffe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | weights/* 2 | converted_models/* 3 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/.idea/.name: -------------------------------------------------------------------------------- 1 | facebox -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.cpp linguist-language=Python 2 | 3 | *.h linguist-language=Python 4 | -------------------------------------------------------------------------------- /img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/img/1.jpg -------------------------------------------------------------------------------- /layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .multibox_loss import MultiBoxLoss 2 | 3 | __all__ = ['MultiBoxLoss'] 4 | -------------------------------------------------------------------------------- /test/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/test/test.jpg -------------------------------------------------------------------------------- /img/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/img/sample.jpg -------------------------------------------------------------------------------- /weights/RBF_Final.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/weights/RBF_Final.pth -------------------------------------------------------------------------------- /weights/slim_Final.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/weights/slim_Final.pth -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from .wider_face import WiderFaceDetection, detection_collate 2 | from .data_augment import * 3 | from .config import * 4 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/.idea/1M_lib.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /convertor/onnx2caffe/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/__init__.pyc -------------------------------------------------------------------------------- /convertor/onnx2caffe/_graph.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/_graph.pyc -------------------------------------------------------------------------------- /weights/mobilenet0.25_Final.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/weights/mobilenet0.25_Final.pth -------------------------------------------------------------------------------- /convertor/onnx2caffe/_operators.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/_operators.pyc -------------------------------------------------------------------------------- /convertor/onnx2caffe/_error_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/_error_utils.pyc -------------------------------------------------------------------------------- /convertor/onnx2caffe/_transformers.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/_transformers.pyc -------------------------------------------------------------------------------- /convertor/onnx2caffe/_weightloader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/_weightloader.pyc -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_tf/sample.jpg -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_ncnn/sample.jpg -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/model/RFB.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_tf/model/RFB.pb -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_tflite/sample.jpg -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/model/face.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_ncnn/model/face.bin -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/tools/onnx2ncnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_ncnn/tools/onnx2ncnn -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/model/RFB-bak.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_tf/model/RFB-bak.pb -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/lib/libncnn.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_ncnn/ncnn/lib/libncnn.a -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/model/RFB.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_tflite/model/RFB.tflite -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_easy_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/widerface_evaluate/ground_truth/wider_easy_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_face_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/widerface_evaluate/ground_truth/wider_face_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_hard_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/widerface_evaluate/ground_truth/wider_hard_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_medium_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/widerface_evaluate/ground_truth/wider_medium_val.mat -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/model/RFB_640x480.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/Detector_cpp/Face_Detector_tflite/model/RFB_640x480.tflite -------------------------------------------------------------------------------- /convertor/onnx2caffe/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangyu1006/Lightweight-Face-Detector-With-Landmarks/HEAD/convertor/onnx2caffe/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /test/test_onnx.py: -------------------------------------------------------------------------------- 1 | import onnxruntime as nxrun 2 | import numpy as np 3 | 4 | ximg = np.random.rand(1,3,240,320).astype(np.float32) 5 | sess = nxrun.InferenceSession("./converted_models/mobilenet/mobilenet_sim.onnx") 6 | 7 | input_name = sess.get_inputs()[0].name 8 | output_name = sess.get_outputs()[0].name 9 | result = sess.run(None, {input_name: ximg}) 10 | print(result) 11 | -------------------------------------------------------------------------------- /widerface_evaluate/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | WiderFace evaluation code 3 | author: wondervictor 4 | mail: tianhengcheng@gmail.com 5 | copyright@wondervictor 6 | """ 7 | 8 | from distutils.core import setup, Extension 9 | from Cython.Build import cythonize 10 | import numpy 11 | 12 | package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()]) 13 | setup(ext_modules=cythonize([package])) 14 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/lib/cmake/ncnn/ncnnConfig.cmake: -------------------------------------------------------------------------------- 1 | set(NCNN_OPENMP ON) 2 | set(NCNN_VULKAN OFF) 3 | 4 | if(NCNN_OPENMP) 5 | find_package(OpenMP) 6 | endif() 7 | 8 | if(NCNN_VULKAN) 9 | find_package(Vulkan REQUIRED) 10 | 11 | add_library(Vulkan UNKNOWN IMPORTED) 12 | set_target_properties(Vulkan PROPERTIES IMPORTED_LOCATION ${Vulkan_LIBRARY}) 13 | set_target_properties(Vulkan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${Vulkan_INCLUDE_DIR}) 14 | endif() 15 | 16 | include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) 17 | -------------------------------------------------------------------------------- /widerface_evaluate/README.md: -------------------------------------------------------------------------------- 1 | # WiderFace-Evaluation 2 | Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) 3 | 4 | 5 | ## Usage 6 | 7 | 8 | ##### before evaluating .... 9 | 10 | ```` 11 | python3 setup.py build_ext --inplace 12 | ```` 13 | 14 | ##### evaluating 15 | 16 | **GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat` 17 | 18 | ```` 19 | python3 evaluation.py -p -g 20 | ```` 21 | 22 | ## Bugs & Problems 23 | please issue 24 | 25 | ## Acknowledgements 26 | 27 | some code borrowed from Sergey Karayev 28 | -------------------------------------------------------------------------------- /convertor/convert_to_tf.py: -------------------------------------------------------------------------------- 1 | from onnx_tf.backend import prepare 2 | import onnx 3 | 4 | model_onnx = onnx.load('./converted_models/mobilenet/mobilenet_sim.onnx') 5 | 6 | # prepare model for exporting to tensorflow using tensorflow backend 7 | tf_rep = prepare(model_onnx) 8 | #print(tf_rep.run(dummy_input)) # run sample inference of model 9 | print(tf_rep.inputs) # input nodes to the model 10 | print('------') 11 | print(tf_rep.outputs) # output nodes from the model 12 | print('-----') 13 | print(tf_rep.tensor_dict) # all nodes in the model 14 | 15 | # export tensorflow backend to tensorflow tf file 16 | tf_rep.export_graph('./converted_models/mobilenet/mobilenet.pb') 17 | -------------------------------------------------------------------------------- /convertor/convert_to_tflite.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | converter = tf.lite.TFLiteConverter.from_frozen_graph('./converted_models/mobilenet/mobilenetv3.pb', 4 | input_arrays=['input0'], 5 | output_arrays=['Concat_223', 'Concat_198', 'Concat_248'], 6 | input_shapes={"input0":[1,240,320,3]}) 7 | 8 | converter.optimizations = [tf.lite.Optimize.DEFAULT] 9 | converter.allow_custom_ops=True 10 | converter.inference_type = tf.float32 11 | converter.inference_input_type = tf.float32 12 | tf_lite_model = converter.convert() 13 | open('./converted_models/mobilenet/mobilenetv3.tflite', 'wb').write(tf_lite_model) 14 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/alignment.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file alignment.h 3 | * 4 | * @brief alignment 5 | * 6 | * @version 1.0 7 | * @date 19/05/2020 10:37:52 8 | * @author Yu Fang (Robotics), yu.fang@iim.ltd 9 | * 10 | * @section Description 11 | * 12 | * Revision: none 13 | * Compiler: g++ 14 | * Company: IIM 15 | * 16 | * @section Description 17 | * 18 | * -->describe more here<-- 19 | * 20 | */ 21 | 22 | #ifndef _ALIGNMENT_H_ 23 | #define _ALIGNMENT_H_ 24 | 25 | #include "face_detector.h" 26 | 27 | class Alignment 28 | { 29 | public: 30 | Alignment(); 31 | ~Alignment(){} 32 | std::vector alignFace(cv::Mat const& img, std::vector const& boxes); 33 | 34 | private: 35 | cv::Mat alignOneFace(cv::Mat const& img, bbox const& box); 36 | }; 37 | 38 | 39 | #endif //!_ALIGNMENT_H_ 40 | 41 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/lib/cmake/ncnn/ncnn-release.cmake: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------- 2 | # Generated CMake target import file for configuration "release". 3 | #---------------------------------------------------------------- 4 | 5 | # Commands may need to know the format version. 6 | set(CMAKE_IMPORT_FILE_VERSION 1) 7 | 8 | # Import target "ncnn" for configuration "release" 9 | set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) 10 | set_target_properties(ncnn PROPERTIES 11 | IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" 12 | IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" 13 | ) 14 | 15 | list(APPEND _IMPORT_CHECK_TARGETS ncnn ) 16 | list(APPEND _IMPORT_CHECK_FILES_FOR_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) 17 | 18 | # Commands beyond this point should not need to know the version. 19 | set(CMAKE_IMPORT_FILE_VERSION) 20 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/layer_type.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_TYPE_H 16 | #define NCNN_LAYER_TYPE_H 17 | 18 | namespace ncnn { 19 | 20 | namespace LayerType { 21 | enum 22 | { 23 | #include "layer_type_enum.h" 24 | CustomBit = (1<<8), 25 | }; 26 | } // namespace LayerType 27 | 28 | } // namespace ncnn 29 | 30 | #endif // NCNN_LAYER_TYPE_H 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 biubug6 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /convertor/froze_graph_from_ckpt.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def freeze_graph(input_checkpoint, output_node_names, file_save): 4 | with tf.Graph().as_default(): 5 | with tf.compat.v1.Session() as sess: 6 | path_meta = input_checkpoint+'.meta' 7 | saver = tf.compat.v1.train.import_meta_graph(path_meta) 8 | saver.restore(sess, input_checkpoint) 9 | 10 | output_graph_def = tf.graph_util.convert_variables_to_constants( 11 | sess, 12 | tf.compat.v1.get_default_graph().as_graph_def(), 13 | output_node_names.split(",") 14 | ) 15 | with tf.io.gfile.GFile(file_save, "wb") as f: 16 | f.write(output_graph_def.SerializeToString()) 17 | for v in output_graph_def.node: 18 | print(v.name) 19 | print("save pb file to " + file_save) 20 | 21 | 22 | freeze_graph("/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenetv3.ckpt", "input0,Concat_198,Concat_223,Concat_248","/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenetv3.pb") 23 | -------------------------------------------------------------------------------- /utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/benchmark.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_BENCHMARK_H 16 | #define NCNN_BENCHMARK_H 17 | 18 | #include "platform.h" 19 | #include "mat.h" 20 | #include "layer.h" 21 | 22 | namespace ncnn { 23 | 24 | // get now timestamp in ms 25 | double get_current_time(); 26 | 27 | #if NCNN_BENCHMARK 28 | 29 | void benchmark(const Layer* layer, double start, double end); 30 | void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); 31 | 32 | #endif // NCNN_BENCHMARK 33 | 34 | } // namespace ncnn 35 | 36 | #endif // NCNN_BENCHMARK_H 37 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/blob.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_BLOB_H 16 | #define NCNN_BLOB_H 17 | 18 | #include 19 | #include 20 | #include "platform.h" 21 | 22 | namespace ncnn { 23 | 24 | class Blob 25 | { 26 | public: 27 | // empty 28 | Blob(); 29 | 30 | public: 31 | #if NCNN_STRING 32 | // blob name 33 | std::string name; 34 | #endif // NCNN_STRING 35 | // layer index which produce this blob as output 36 | int producer; 37 | // layer index which need this blob as input 38 | std::vector consumers; 39 | }; 40 | 41 | } // namespace ncnn 42 | 43 | #endif // NCNN_BLOB_H 44 | -------------------------------------------------------------------------------- /test/test_caffe.py: -------------------------------------------------------------------------------- 1 | import os 2 | import caffe 3 | import cv2 4 | import numpy as np 5 | 6 | caffe.set_mode_cpu() 7 | net = caffe.Net("./converted_models/mobilenet/mobilenet.prototxt", "./converted_models/mobilenet/mobilenet.caffemodel", caffe.TEST) 8 | net.blobs['input0'].reshape(1, 3, 480, 640) 9 | tmp_batch = np.zeros([1, 3, 480, 640], dtype=np.float32) 10 | 11 | img_raw = cv2.imread("./Face_Detector_ncnn/sample.jpg") 12 | img = np.float32(img_raw) 13 | long_side = 640 14 | im_shape = img.shape 15 | im_size_min = np.min(im_shape[0:2]) 16 | im_size_max = np.max(im_shape[0:2]) 17 | resize = float(long_side) / float(im_size_min) 18 | if np.round(resize * im_size_max) > long_side: 19 | resize = float(long_side) / float(im_size_max) 20 | if resize != 1: 21 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 22 | img -= (104, 117, 123) 23 | img = img.transpose(2,0,1) 24 | tmp_batch[0, :, :, :] = img 25 | 26 | net.blobs['input0'].data[...] = tmp_batch 27 | scores = net.forward()['586'][0] 28 | boxes = net.forward()['output0'][0] 29 | landmarks = net.forward()['585'][0] 30 | 31 | print(scores.shape) 32 | print(boxes.shape) 33 | print(landmarks.shape) 34 | n, _ = scores.shape 35 | with open("caffe_result.txt", 'w') as fd: 36 | for i in range(n): 37 | fd.write(str(scores[i][1]) + " " + str(boxes[i][0]) + " " + str(boxes[i][1]) + " " + str(boxes[i][2]) + " " + str(boxes[i][3]) + "\n") 38 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/layer_type_enum.h: -------------------------------------------------------------------------------- 1 | // Layer Type Enum header 2 | // 3 | // This file is auto-generated by cmake, don't edit it. 4 | 5 | AbsVal = 0, 6 | ArgMax = 1, 7 | BatchNorm = 2, 8 | Bias = 3, 9 | BNLL = 4, 10 | Concat = 5, 11 | Convolution = 6, 12 | Crop = 7, 13 | Deconvolution = 8, 14 | Dropout = 9, 15 | Eltwise = 10, 16 | ELU = 11, 17 | Embed = 12, 18 | Exp = 13, 19 | Flatten = 14, 20 | InnerProduct = 15, 21 | Input = 16, 22 | Log = 17, 23 | LRN = 18, 24 | MemoryData = 19, 25 | MVN = 20, 26 | Pooling = 21, 27 | Power = 22, 28 | PReLU = 23, 29 | Proposal = 24, 30 | Reduction = 25, 31 | ReLU = 26, 32 | Reshape = 27, 33 | ROIPooling = 28, 34 | Scale = 29, 35 | Sigmoid = 30, 36 | Slice = 31, 37 | Softmax = 32, 38 | Split = 33, 39 | SPP = 34, 40 | TanH = 35, 41 | Threshold = 36, 42 | Tile = 37, 43 | RNN = 38, 44 | LSTM = 39, 45 | BinaryOp = 40, 46 | UnaryOp = 41, 47 | ConvolutionDepthWise = 42, 48 | Padding = 43, 49 | Squeeze = 44, 50 | ExpandDims = 45, 51 | Normalize = 46, 52 | Permute = 47, 53 | PriorBox = 48, 54 | DetectionOutput = 49, 55 | Interp = 50, 56 | DeconvolutionDepthWise = 51, 57 | ShuffleChannel = 52, 58 | InstanceNorm = 53, 59 | Clip = 54, 60 | Reorg = 55, 61 | YoloDetectionOutput = 56, 62 | Quantize = 57, 63 | Dequantize = 58, 64 | Yolov3DetectionOutput = 59, 65 | PSROIPooling = 60, 66 | ROIAlign = 61, 67 | Packing = 62, 68 | Requantize = 63, 69 | Cast = 64, 70 | HardSigmoid = 65, 71 | SELU = 66, 72 | 73 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | project(FaceDetector) 4 | 5 | add_definitions(-std=c++11) 6 | add_definitions("-Wall") 7 | 8 | set(LIST_LIBS "") 9 | 10 | find_package(OpenCV 3.3.1 REQUIRED) 11 | if(OpenCV_FOUND) 12 | INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) 13 | list(APPEND LIST_LIBS ${OpenCV_LIBRARIES}) 14 | else(OpenCV_FOUND) 15 | message(WARNING "OpenCV not found") 16 | endif(OpenCV_FOUND) 17 | 18 | set(Tensorflow_INCLUDE_DIRS $ENV{TENSORFLOW_DIR}/include) 19 | set(Tensorflow_LIBRARIES $ENV{TENSORFLOW_DIR}/lib/libtensorflow.so) 20 | INCLUDE_DIRECTORIES(${Tensorflow_INCLUDE_DIRS}) 21 | list(APPEND LIST_LIBS ${Tensorflow_LIBRARIES}) 22 | 23 | find_package(Boost 1.61.0 REQUIRED COMPONENTS filesystem system) 24 | if (Boost_FOUND) 25 | INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) 26 | list(APPEND LIST_LIBS ${Boost_LIBRARIES}) 27 | else (Boost_FOUND) 28 | message(FATAL_ERROR "Need Boost to be included") 29 | endif(Boost_FOUND) 30 | 31 | set(SRC 32 | main.cpp 33 | face_detector.h 34 | face_detector.cpp 35 | ) 36 | 37 | add_executable(FaceDetector ${SRC}) 38 | target_link_libraries(FaceDetector ${LIST_LIBS}) 39 | 40 | 41 | set(SRC_FOLDER 42 | main_folder.cpp 43 | face_detector.h 44 | face_detector.cpp 45 | ) 46 | add_executable(FaceDetectorFolder ${SRC_FOLDER}) 47 | target_link_libraries(FaceDetectorFolder ${LIST_LIBS}) 48 | -------------------------------------------------------------------------------- /data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | cfg_mnet = { 3 | 'name': 'mobilenet0.25', 4 | 'min_sizes': [[10, 20], [32, 64], [128, 256]], 5 | 'steps': [8, 16, 32], 6 | 'variance': [0.1, 0.2], 7 | 'clip': False, 8 | 'loc_weight': 2.0, 9 | 'lmk_weight': 3.0, 10 | 'gpu_train': True, 11 | 'batch_size': 32, 12 | 'ngpu': 1, 13 | 'epoch': 250, 14 | 'decay1': 190, 15 | 'decay2': 220, 16 | 'image_size': 300, 17 | 'pretrain': False, 18 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 19 | 'in_channel': 32, 20 | 'out_channel': 64 21 | } 22 | 23 | cfg_slim = { 24 | 'name': 'slim', 25 | 'min_sizes': [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]], 26 | 'steps': [8, 16, 32, 64], 27 | 'variance': [0.1, 0.2], 28 | 'clip': False, 29 | 'loc_weight': 2.0, 30 | 'lmk_weight': 3.0, 31 | 'gpu_train': True, 32 | 'batch_size': 32, 33 | 'ngpu': 1, 34 | 'epoch': 250, 35 | 'decay1': 190, 36 | 'decay2': 220, 37 | 'image_size': 300 38 | } 39 | 40 | cfg_rfb = { 41 | 'name': 'RFB', 42 | 'min_sizes': [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]], 43 | 'steps': [8, 16, 32, 64], 44 | 'variance': [0.1, 0.2], 45 | 'clip': False, 46 | 'loc_weight': 2.0, 47 | 'lmk_weight': 3.0, 48 | 'gpu_train': True, 49 | 'batch_size': 32, 50 | 'ngpu': 1, 51 | 'epoch': 250, 52 | 'decay1': 190, 53 | 'decay2': 220, 54 | 'image_size': 300 55 | } 56 | 57 | 58 | -------------------------------------------------------------------------------- /layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from itertools import product as product 3 | import numpy as np 4 | from math import ceil 5 | 6 | 7 | class PriorBox(object): 8 | def __init__(self, cfg, image_size=None, phase='train'): 9 | super(PriorBox, self).__init__() 10 | self.min_sizes = cfg['min_sizes'] 11 | self.steps = cfg['steps'] 12 | self.clip = cfg['clip'] 13 | self.image_size = image_size 14 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] 15 | self.name = "s" 16 | 17 | def forward(self): 18 | anchors = [] 19 | for k, f in enumerate(self.feature_maps): 20 | min_sizes = self.min_sizes[k] 21 | for i, j in product(range(f[0]), range(f[1])): 22 | for min_size in min_sizes: 23 | s_kx = min_size / self.image_size[1] 24 | s_ky = min_size / self.image_size[0] 25 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] 26 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] 27 | for cy, cx in product(dense_cy, dense_cx): 28 | anchors += [cx, cy, s_kx, s_ky] 29 | 30 | # back to torch land 31 | output = torch.Tensor(anchors).view(-1, 4) 32 | if self.clip: 33 | output.clamp_(max=1, min=0) 34 | return output 35 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/alignment.cpp: -------------------------------------------------------------------------------- 1 | // ===================================================================================== 2 | // 3 | // Filename: alignment.cpp 4 | // 5 | // Version: 1.0 6 | // Created: 19/05/2020 10:59:28 7 | // Revision: none 8 | // Compiler: g++ 9 | // 10 | // Author: Yu Fang (Robotics), yu.fang@iim.ltd 11 | // Company: IIM 12 | // 13 | // Description: alignment 14 | // 15 | // ===================================================================================== 16 | 17 | Alignment::Alignment() 18 | { 19 | 20 | } 21 | 22 | cv::Mat Alignment::alignOneFace(cv::Mat const& img, bbox const& box) 23 | { 24 | cv::Mat warp_mat,warp_dst; 25 | double mShrinkSz = 1.f; 26 | 27 | warp_mat = similarity_matrix(srcTri,mRotationCoeffCalib,mTranslate); 28 | mShrinkSz = mShrink; 29 | } 30 | 31 | 32 | cv::Mat1f Alignment::calcMatU(std::vector const& dstTri_s) 33 | { 34 | int num_point = dstTri_s.size(); 35 | cv::Mat1f X(num_point * 2, 4); 36 | for (int i = 0, p = 0; i < num_point; i++, p+=2) { 37 | X(p, 0) = dstTri_s[i].x; 38 | X(p, 1) = dstTri_s[i].y; 39 | X(p, 2) = 1.f; 40 | X(p, 3) = 0.f; 41 | 42 | X(p + 1, 0) = dstTri_s[i].y; 43 | X(p + 1, 1) = -dstTri_s[i].x; 44 | X(p + 1, 2) = 0.f; 45 | X(p + 1, 3) = 1.f; 46 | } 47 | 48 | cv::Mat1f X_t = X.t(); 49 | cv::Mat1f XX = X_t * X; 50 | return XX.inv() * X_t; 51 | } 52 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | project(FaceDetector) 4 | 5 | add_definitions(-std=c++11) 6 | add_definitions("-Wall") 7 | 8 | set(LIST_LIBS "") 9 | 10 | find_package(OpenCV 3.3.1 REQUIRED) 11 | if(OpenCV_FOUND) 12 | INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) 13 | list(APPEND LIST_LIBS ${OpenCV_LIBRARIES}) 14 | else(OpenCV_FOUND) 15 | message(WARNING "OpenCV not found") 16 | endif(OpenCV_FOUND) 17 | 18 | 19 | set(TfLite_INCLUDE_DIRS $ENV{TFLITE_DIR}/include) 20 | set(TfLite_LIBRARIES $ENV{TFLITE_DIR}/lib/libtensorflowlite.so) 21 | INCLUDE_DIRECTORIES(${TfLite_INCLUDE_DIRS}) 22 | INCLUDE_DIRECTORIES($ENV{TFLITE_DIR}/include/tensorflow/lite/tools/make/downloads/flatbuffers/include) 23 | INCLUDE_DIRECTORIES($ENV{TFLITE_DIR}/include/tensorflow/lite/tools/make/downloads/absl) 24 | list(APPEND LIST_LIBS ${TfLite_LIBRARIES}) 25 | 26 | 27 | find_package(Boost 1.61.0 REQUIRED COMPONENTS filesystem system) 28 | if (Boost_FOUND) 29 | INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) 30 | list(APPEND LIST_LIBS ${Boost_LIBRARIES}) 31 | else (Boost_FOUND) 32 | message(FATAL_ERROR "Need Boost to be included") 33 | endif(Boost_FOUND) 34 | 35 | set(SRC 36 | main.cpp 37 | face_detector.h 38 | face_detector.cpp 39 | ) 40 | 41 | set(SRC_FOLDER 42 | main_folder.cpp 43 | face_detector.h 44 | face_detector.cpp 45 | ) 46 | 47 | 48 | add_executable(FaceDetector ${SRC}) 49 | target_link_libraries(FaceDetector ${LIST_LIBS}) 50 | 51 | add_executable(FaceDetectorFolder ${SRC_FOLDER}) 52 | target_link_libraries(FaceDetectorFolder ${LIST_LIBS}) 53 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | project(FaceDetector) 4 | 5 | add_definitions(-std=c++11) 6 | add_definitions("-Wall") 7 | 8 | set(OpenCV_DIR "/usr/local/opencv/") 9 | 10 | if(True) 11 | find_package(OpenMP) 12 | if(NOT TARGET OpenMP::OpenMP_CXX AND (OpenMP_CXX_FOUND OR OPENMP_FOUND)) 13 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 15 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 16 | endif() 17 | endif() 18 | 19 | include_directories(${OpenCV_DIR}/include 20 | ${CMAKE_CURRENT_SOURCE_DIR}/ncnn/include/ncnn) 21 | 22 | link_directories(${OpenCV_DIR}/lib/ 23 | ${CMAKE_CURRENT_SOURCE_DIR}/ncnn/lib) 24 | set(OpenCV_LIBS opencv_core 25 | opencv_highgui 26 | opencv_imgproc 27 | opencv_imgcodecs 28 | opencv_videoio) 29 | 30 | 31 | find_package(Boost 1.61.0 REQUIRED COMPONENTS filesystem system) 32 | if (Boost_FOUND) 33 | INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) 34 | list(APPEND OpenCV_LIBS ${Boost_LIBRARIES}) 35 | else (Boost_FOUND) 36 | message(FATAL_ERROR "Need Boost to be included") 37 | endif(Boost_FOUND) 38 | 39 | set(SRC 40 | main.cpp 41 | FaceDetector.h 42 | FaceDetector.cpp 43 | ) 44 | 45 | add_executable(FaceDetector ${SRC}) 46 | target_link_libraries(FaceDetector ncnn ${OpenCV_LIBS}) 47 | if(OpenMP_CXX_FOUND) 48 | if(NCNN_CMAKE_VERBOSE) 49 | message("Building with OpenMP") 50 | endif() 51 | target_link_libraries(FaceDetector PUBLIC OpenMP::OpenMP_CXX) 52 | endif() 53 | -------------------------------------------------------------------------------- /calculate_paremeter_flop.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import argparse 4 | import torch 5 | import torch.backends.cudnn as cudnn 6 | import numpy as np 7 | from data import cfg_mnet, cfg_slim, cfg_rfb 8 | from layers.functions.prior_box import PriorBox 9 | from utils.nms.py_cpu_nms import py_cpu_nms 10 | import cv2 11 | from thop import profile 12 | from thop import clever_format 13 | from models.retinaface import RetinaFace 14 | from models.net_slim import Slim 15 | from models.net_rfb import RFB 16 | from utils.box_utils import decode, decode_landm 17 | from utils.timer import Timer 18 | 19 | 20 | parser = argparse.ArgumentParser(description='Test') 21 | parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or slim or RFB') 22 | parser.add_argument('--long_side', default=320, help='when origin_size is false, long_side is scaled size(320 or 640 for long side)') 23 | 24 | args = parser.parse_args() 25 | 26 | if __name__ == '__main__': 27 | torch.set_grad_enabled(False) 28 | 29 | cfg = None 30 | net = None 31 | if args.network == "mobile0.25": 32 | cfg = cfg_mnet 33 | net = RetinaFace(cfg = cfg, phase = 'test') 34 | elif args.network == "slim": 35 | cfg = cfg_slim 36 | net = Slim(cfg = cfg, phase = 'test') 37 | elif args.network == "RFB": 38 | cfg = cfg_rfb 39 | net = RFB(cfg = cfg, phase = 'test') 40 | else: 41 | print("Don't support network!") 42 | exit(0) 43 | long_side = int(args.long_side) 44 | short_side = int(args.long_side/4*3) 45 | img = torch.randn(1, 3, long_side, short_side) 46 | flops, params = profile(net, inputs=(img, )) 47 | 48 | flops, params = clever_format([flops, params], "%.3f") 49 | print("param:", params, "flops:", flops) 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/cpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_CPU_H 16 | #define NCNN_CPU_H 17 | 18 | namespace ncnn { 19 | 20 | // test optional cpu features 21 | // neon = armv7 neon or aarch64 asimd 22 | int cpu_support_arm_neon(); 23 | // vfpv4 = armv7 fp16 + fma 24 | int cpu_support_arm_vfpv4(); 25 | // asimdhp = aarch64 asimd half precision 26 | int cpu_support_arm_asimdhp(); 27 | 28 | // cpu info 29 | int get_cpu_count(); 30 | 31 | // bind all threads on little clusters if powersave enabled 32 | // affacts HMP arch cpu like ARM big.LITTLE 33 | // only implemented on android at the moment 34 | // switching powersave is expensive and not thread-safe 35 | // 0 = all cores enabled(default) 36 | // 1 = only little clusters enabled 37 | // 2 = only big clusters enabled 38 | // return 0 if success for setter function 39 | int get_cpu_powersave(); 40 | int set_cpu_powersave(int powersave); 41 | 42 | // misc function wrapper for openmp routines 43 | int get_omp_num_threads(); 44 | void set_omp_num_threads(int num_threads); 45 | 46 | int get_omp_dynamic(); 47 | void set_omp_dynamic(int dynamic); 48 | 49 | } // namespace ncnn 50 | 51 | #endif // NCNN_CPU_H 52 | -------------------------------------------------------------------------------- /widerface_evaluate/box_overlaps.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/paramdict.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PARAMDICT_H 16 | #define NCNN_PARAMDICT_H 17 | 18 | #include 19 | #include "mat.h" 20 | #include "platform.h" 21 | 22 | // at most 20 parameters 23 | #define NCNN_MAX_PARAM_COUNT 20 24 | 25 | namespace ncnn { 26 | 27 | class Net; 28 | class ParamDict 29 | { 30 | public: 31 | // empty 32 | ParamDict(); 33 | 34 | // get int 35 | int get(int id, int def) const; 36 | // get float 37 | float get(int id, float def) const; 38 | // get array 39 | Mat get(int id, const Mat& def) const; 40 | 41 | // set int 42 | void set(int id, int i); 43 | // set float 44 | void set(int id, float f); 45 | // set array 46 | void set(int id, const Mat& v); 47 | 48 | protected: 49 | friend class Net; 50 | 51 | void clear(); 52 | 53 | #if NCNN_STDIO 54 | #if NCNN_STRING 55 | int load_param(FILE* fp); 56 | int load_param_mem(const char*& mem); 57 | #endif // NCNN_STRING 58 | int load_param_bin(FILE* fp); 59 | #endif // NCNN_STDIO 60 | int load_param(const unsigned char*& mem); 61 | 62 | protected: 63 | struct 64 | { 65 | int loaded; 66 | union { int i; float f; }; 67 | Mat v; 68 | } params[NCNN_MAX_PARAM_COUNT]; 69 | }; 70 | 71 | } // namespace ncnn 72 | 73 | #endif // NCNN_PARAMDICT_H 74 | -------------------------------------------------------------------------------- /convertor/onnx2caffe/_error_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from typing import Dict, Text, Any, Callable 6 | from ._graph import Node, Graph 7 | 8 | class ErrorHandling(object): 9 | ''' 10 | To handle errors and addition of custom layers 11 | ''' 12 | 13 | def __init__(self, 14 | add_custom_layers = False, # type: bool 15 | custom_conversion_functions = dict(), # type: Dict[Text, Any] 16 | custom_layer_nodes = [], # type : List[Node] 17 | ): 18 | # type: (...) -> None 19 | self.add_custom_layers = add_custom_layers 20 | self.custom_conversion_functions = custom_conversion_functions 21 | self.custom_layer_nodes = custom_layer_nodes 22 | 23 | 24 | def unsupported_op(self, 25 | node, # type: Node 26 | ): 27 | # type: (...) -> Callable[[Any, Node, Graph, ErrorHandling], None] 28 | ''' 29 | Either raise an error for an unsupported op type or return custom layer add function 30 | ''' 31 | if self.add_custom_layers: 32 | from ._operators import _convert_custom 33 | return _convert_custom 34 | else: 35 | raise TypeError( 36 | "ONNX node of type {} is not supported.\n".format(node.op_type,) 37 | ) 38 | 39 | 40 | def unsupported_op_configuration(self, 41 | node, # type: Node 42 | err_message, # type: Text 43 | ): 44 | raise TypeError( 45 | "Error while converting op of type: {}. Error message: {}\n".format(node.op_type, err_message, ) 46 | ) 47 | 48 | 49 | def missing_initializer(self, 50 | node, # type: Node 51 | err_message, # type: Text 52 | ): 53 | # type: (...) -> None 54 | ''' 55 | Missing initializer error 56 | ''' 57 | raise ValueError( 58 | "Missing initializer error in op of type {}, with input name = {}, " 59 | "output name = {}. Error message: {}\n". 60 | format(node.op_type, node.inputs[0], node.outputs[0], err_message) 61 | ) 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/FaceDetector.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dl on 19-7-19. 3 | // 4 | 5 | #ifndef FACE_DETECTOR_H 6 | #define FACE_DETECTOR_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include "net.h" 12 | #include 13 | using namespace std::chrono; 14 | 15 | class Timer 16 | { 17 | public: 18 | std::stack tictoc_stack; 19 | 20 | void tic() 21 | { 22 | high_resolution_clock::time_point t1 = high_resolution_clock::now(); 23 | tictoc_stack.push(t1); 24 | } 25 | 26 | double toc(std::string msg = "", bool flag = true) 27 | { 28 | double diff = duration_cast(high_resolution_clock::now() - tictoc_stack.top()).count(); 29 | if(msg.size() > 0){ 30 | if (flag) 31 | printf("%s time elapsed: %f ms\n", msg.c_str(), diff); 32 | } 33 | 34 | tictoc_stack.pop(); 35 | return diff; 36 | } 37 | void reset() 38 | { 39 | tictoc_stack = std::stack(); 40 | } 41 | }; 42 | 43 | struct Point{ 44 | float _x; 45 | float _y; 46 | }; 47 | struct bbox{ 48 | float x1; 49 | float y1; 50 | float x2; 51 | float y2; 52 | float s; 53 | Point point[5]; 54 | }; 55 | 56 | struct box{ 57 | float cx; 58 | float cy; 59 | float sx; 60 | float sy; 61 | }; 62 | 63 | class Detector 64 | { 65 | 66 | public: 67 | Detector(); 68 | 69 | void Init(const std::string &model_param, const std::string &model_bin); 70 | 71 | Detector(const std::string &model_param, const std::string &model_bin, bool retinaface = false); 72 | 73 | inline void Release(); 74 | 75 | void nms(std::vector &input_boxes, float NMS_THRESH); 76 | 77 | void Detect(cv::Mat& bgr, std::vector& boxes); 78 | 79 | void create_anchor(std::vector &anchor, int w, int h); 80 | 81 | void create_anchor_retinaface(std::vector &anchor, int w, int h); 82 | 83 | inline void SetDefaultParams(); 84 | 85 | static inline bool cmp(bbox a, bbox b); 86 | 87 | ~Detector(); 88 | 89 | public: 90 | float _nms; 91 | float _threshold; 92 | float _mean_val[3]; 93 | bool _retinaface; 94 | 95 | ncnn::Net *Net; 96 | }; 97 | #endif // 98 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/modelbin.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_MODELBIN_H 16 | #define NCNN_MODELBIN_H 17 | 18 | #include 19 | #include "mat.h" 20 | #include "platform.h" 21 | 22 | namespace ncnn { 23 | 24 | class Net; 25 | class ModelBin 26 | { 27 | public: 28 | virtual ~ModelBin(); 29 | // element type 30 | // 0 = auto 31 | // 1 = float32 32 | // 2 = float16 33 | // 3 = int8 34 | // load vec 35 | virtual Mat load(int w, int type) const = 0; 36 | // load image 37 | virtual Mat load(int w, int h, int type) const; 38 | // load dim 39 | virtual Mat load(int w, int h, int c, int type) const; 40 | }; 41 | 42 | #if NCNN_STDIO 43 | class ModelBinFromStdio : public ModelBin 44 | { 45 | public: 46 | // construct from file 47 | ModelBinFromStdio(FILE* binfp); 48 | 49 | virtual Mat load(int w, int type) const; 50 | 51 | protected: 52 | FILE* binfp; 53 | }; 54 | #endif // NCNN_STDIO 55 | 56 | class ModelBinFromMemory : public ModelBin 57 | { 58 | public: 59 | // construct from external memory 60 | ModelBinFromMemory(const unsigned char*& mem); 61 | 62 | virtual Mat load(int w, int type) const; 63 | 64 | protected: 65 | const unsigned char*& mem; 66 | }; 67 | 68 | class ModelBinFromMatArray : public ModelBin 69 | { 70 | public: 71 | // construct from weight blob array 72 | ModelBinFromMatArray(const Mat* weights); 73 | 74 | virtual Mat load(int w, int type) const; 75 | 76 | protected: 77 | mutable const Mat* weights; 78 | }; 79 | 80 | } // namespace ncnn 81 | 82 | #endif // NCNN_MODELBIN_H 83 | -------------------------------------------------------------------------------- /Detect_python/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | 5 | #from detect_tf import Detector 6 | from detect_pytorch import Detector 7 | from face_align import * 8 | 9 | model_path = './converted_models/slim/slimv2.pth' 10 | network = 'slim' 11 | long_side = 320 12 | threshold = 0.8 13 | 14 | 15 | src_dir = "/home/fangyu/fy/face-recognition-benchmarks/IIM/iim_dataset_registration-4827/dataset_112x112/" 16 | dst_dir = "/home/fangyu/fy/face-recognition-benchmarks/IIM/iim_dataset_registration-4827/dataset_slimv2" 17 | 18 | detector = Detector(model_path, long_side, network) 19 | 20 | if not os.path.exists(dst_dir): 21 | os.mkdir(dst_dir) 22 | 23 | for root, dirs, files in os.walk(src_dir): 24 | for name in files: 25 | img_path = os.path.join(root, name) 26 | person_name = img_path.split('/')[-2] 27 | print(person_name) 28 | person_path = os.path.join(dst_dir, person_name) 29 | if not os.path.exists(person_path): 30 | os.mkdir(person_path) 31 | 32 | img = cv2.imread(img_path) 33 | dets = detector.detect(img, threshold) 34 | for b in dets: 35 | bbox = b[0:4] 36 | points = b[5:15].reshape((5,2)) 37 | nimg = alignFace(img, bbox, points) 38 | cv2.imwrite(os.path.join(dst_dir,person_name, name), nimg) 39 | # save img 40 | ''' 41 | img_raw = cv2.imread("./Face_Detector_tflite/sample.jpg") 42 | detector = Detector(model_path, long_side, network) 43 | dets = detector.detect(img_raw, threshold) 44 | for b in dets: 45 | bbox = b[0:4] 46 | points = b[5:15].reshape((5,2)) 47 | #nimg = alignFace(img_raw, bbox, points) 48 | 49 | #cv2.imshow("test", nimg) 50 | #cv2.waitKey(0) 51 | text = "{:.4f}".format(b[4]) 52 | b = list(map(int, b)) 53 | 54 | cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 55 | cx = b[0] 56 | cy = b[1] + 12 57 | cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 58 | 59 | cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) 60 | cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) 61 | cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) 62 | cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) 63 | cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) 64 | cv2.imwrite("test.jpg", img_raw) 65 | ''' 66 | -------------------------------------------------------------------------------- /test/test_tf.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.platform import gfile 3 | import numpy as np 4 | import cv2 5 | import math 6 | 7 | GRAPH_PB_PATH = '/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenet.pb' 8 | 9 | 10 | def load_graph(frozen_graph_filename): 11 | # We load the protobuf file from the disk and parse it to retrieve the 12 | # unserialized graph_def 13 | with tf.gfile.GFile(frozen_graph_filename, "rb") as f: 14 | #print(f.readline()) 15 | graph_def = tf.GraphDef() 16 | graph_def.ParseFromString(f.read()) 17 | 18 | # Then, we import the graph_def into a new Graph and returns it 19 | with tf.Graph().as_default() as graph: 20 | # The name var will prefix every op/nodes in your graph 21 | # Since we load everything in a new graph, this is not needed 22 | #tf.import_graph_def(graph_def, name='') 23 | tf.import_graph_def(graph_def,input_map = None,return_elements = None,name = "",op_dict = None,producer_op_list = None) 24 | return graph 25 | 26 | 27 | def softmax(x, y): 28 | sum_ = float(math.exp(x)+math.exp(y)) 29 | return math.exp(y)/sum_ 30 | 31 | graph = load_graph(GRAPH_PB_PATH) 32 | 33 | for v in graph.as_graph_def().node: 34 | print(v.name) 35 | 36 | 37 | #input = np.ones((1,240,320,3)) 38 | img_raw = cv2.imread("./Face_Detector_ncnn/sample.jpg") 39 | img = np.float32(img_raw) 40 | img -= (104,117,123) 41 | long_side = 640 42 | im_shape = img.shape 43 | im_size_min = np.min(im_shape[0:2]) 44 | im_size_max = np.max(im_shape[0:2]) 45 | resize = float(long_side) / float(im_size_min) 46 | if np.round(resize * im_size_max) > long_side: 47 | resize = float(long_side) / float(im_size_max) 48 | if resize != 1: 49 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 50 | input = [] 51 | input.append(img) 52 | input = np.array(input) 53 | x = graph.get_tensor_by_name('input0:0') 54 | y = graph.get_tensor_by_name('Concat_223:0') 55 | y1 = graph.get_tensor_by_name('Concat_198:0') 56 | y2 = graph.get_tensor_by_name('Concat_248:0') 57 | 58 | with tf.Session(graph=graph) as sess: 59 | conf, loc, landmks = sess.run((y,y1,y2), feed_dict={x:input}) 60 | print(conf.shape) 61 | print(conf) 62 | print(loc.shape) 63 | print(landmks.shape) 64 | _, n, _ = loc.shape 65 | #loc = loc.reshape(1,1,-1,4) 66 | #conf = conf.reshape(1,1,-1,2) 67 | with open('tf_result.py', 'w') as fd: 68 | for j in range(n): 69 | score = softmax(conf[0,j,0], conf[0,j,1]) 70 | fd.write(str(score)+' ' + str(loc[0,j,0]) + ' ' + str(loc[0,j,1]) + ' ' + str(loc[0,j,2]) + ' ' + str(loc[0,j,3]) + '\n') 71 | #fd.write(str(landmks[0,j,0]) + ' ' + str(landmks[0,j,1]) + ' ' + str(landmks[0,j,2]) + ' ' + str(landmks[0,j,3]) + '\n') 72 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/pipeline.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PIPELINE_H 16 | #define NCNN_PIPELINE_H 17 | 18 | #include "platform.h" 19 | #include "mat.h" 20 | #if NCNN_VULKAN 21 | #include 22 | #include "gpu.h" 23 | #endif // NCNN_VULKAN 24 | 25 | namespace ncnn { 26 | 27 | #if NCNN_VULKAN 28 | class Option; 29 | class Pipeline 30 | { 31 | public: 32 | Pipeline(const VulkanDevice* vkdev); 33 | ~Pipeline(); 34 | 35 | public: 36 | void set_optimal_local_size_xyz(int w = 32, int h = 32, int c = 32); 37 | void set_local_size_xyz(int w, int h, int c); 38 | 39 | int create(const uint32_t* spv_data, size_t spv_data_size, const char* entry_name, 40 | const std::vector& specializations, int binding_count, int push_constant_count); 41 | int create(VkShaderModule shader_module, const char* entry_name, 42 | const std::vector& specializations, int binding_count, int push_constant_count); 43 | int create(const char* name, const Option& opt, const std::vector& specializations, 44 | int binding_count, int push_constant_count); 45 | void destroy(); 46 | 47 | protected: 48 | int create_descriptorset_layout(int binding_count); 49 | int create_pipeline_layout(int push_constant_count); 50 | int create_pipeline(VkShaderModule shader_module, const char* entry_name, const std::vector& specializations); 51 | int create_descriptor_update_template(int binding_count); 52 | 53 | public: 54 | const VulkanDevice* vkdev; 55 | 56 | // local shader module 57 | VkShaderModule local_shader_module; 58 | 59 | VkDescriptorSetLayout descriptorset_layout; 60 | VkPipelineLayout pipeline_layout; 61 | 62 | // op forward TODO use pipeline cache ? 63 | VkPipeline pipeline; 64 | 65 | VkDescriptorUpdateTemplateKHR descriptor_update_template; 66 | 67 | uint32_t local_size_x; 68 | uint32_t local_size_y; 69 | uint32_t local_size_z; 70 | }; 71 | #endif // NCNN_VULKAN 72 | 73 | } // namespace ncnn 74 | 75 | #endif // NCNN_PIPELINE_H 76 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/option.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_OPTION_H 16 | #define NCNN_OPTION_H 17 | 18 | #include "platform.h" 19 | 20 | namespace ncnn { 21 | 22 | #if NCNN_VULKAN 23 | class VkAllocator; 24 | #endif // NCNN_VULKAN 25 | 26 | class Allocator; 27 | class Option 28 | { 29 | public: 30 | // default option 31 | Option(); 32 | 33 | public: 34 | // light mode 35 | // intermediate blob will be recycled when enabled 36 | // enabled by default 37 | bool lightmode; 38 | 39 | // thread count 40 | // default value is the one returned by get_cpu_count() 41 | int num_threads; 42 | 43 | // blob memory allocator 44 | Allocator* blob_allocator; 45 | 46 | // workspace memory allocator 47 | Allocator* workspace_allocator; 48 | 49 | #if NCNN_VULKAN 50 | // blob memory allocator 51 | VkAllocator* blob_vkallocator; 52 | 53 | // workspace memory allocator 54 | VkAllocator* workspace_vkallocator; 55 | 56 | // staging memory allocator 57 | VkAllocator* staging_vkallocator; 58 | #endif // NCNN_VULKAN 59 | 60 | // enable winograd convolution optimization 61 | // improve convolution 3x3 stride1 performace, may consume more memory 62 | // changes should be applied before loading network structure and weight 63 | // enabled by default 64 | bool use_winograd_convolution; 65 | 66 | // enable sgemm convolution optimization 67 | // improve convolution 1x1 stride1 performace, may consume more memory 68 | // changes should be applied before loading network structure and weight 69 | // enabled by default 70 | bool use_sgemm_convolution; 71 | 72 | // enable quantized int8 inference 73 | // use low-precision int8 path for quantized model 74 | // changes should be applied before loading network structure and weight 75 | // enabled by default 76 | bool use_int8_inference; 77 | 78 | // enable vulkan compute 79 | bool use_vulkan_compute; 80 | 81 | // enable options for gpu inference 82 | bool use_fp16_packed; 83 | bool use_fp16_storage; 84 | bool use_fp16_arithmetic; 85 | bool use_int8_storage; 86 | bool use_int8_arithmetic; 87 | 88 | // 89 | bool use_packing_layout; 90 | }; 91 | 92 | } // namespace ncnn 93 | 94 | #endif // NCNN_OPTION_H 95 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/face_detector.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file face_detector.h 3 | * 4 | * @brief face detector using RFB 5 | * 6 | * @version 1.0 7 | * @date 07/05/2020 22:27:06 8 | * @author Yu Fang (Robotics), yu.fang@iim.ltd 9 | * 10 | * @section Description 11 | * 12 | * Revision: none 13 | * Compiler: g++ 14 | * Company: IIM 15 | * 16 | * @section Description 17 | * 18 | * -->describe more here<-- 19 | * 20 | */ 21 | 22 | #ifndef _FACE_DETECTOR_H_ 23 | #define _FACE_DETECTOR_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include "tensorflow/lite/model.h" 30 | 31 | using namespace std::chrono; 32 | 33 | class Timer 34 | { 35 | public: 36 | std::stack tictoc_stack; 37 | 38 | void tic() 39 | { 40 | high_resolution_clock::time_point t1 = high_resolution_clock::now(); 41 | tictoc_stack.push(t1); 42 | } 43 | 44 | double toc(std::string msg = "", bool flag = true) 45 | { 46 | double diff = duration_cast(high_resolution_clock::now() - tictoc_stack.top()).count(); 47 | if (msg.size() > 0) { 48 | if (flag) { 49 | std::cout << msg.c_str() << " time elasped " << diff << "ms" << std::endl; 50 | } 51 | } 52 | 53 | tictoc_stack.pop(); 54 | return diff; 55 | } 56 | 57 | void reset() 58 | { 59 | tictoc_stack = std::stack(); 60 | } 61 | }; 62 | 63 | 64 | struct Point { 65 | float _x; 66 | float _y; 67 | }; 68 | 69 | struct bbox { 70 | float x1; 71 | float y1; 72 | float x2; 73 | float y2; 74 | float s; 75 | Point points[5]; 76 | }; 77 | 78 | struct box { 79 | float cx; 80 | float cy; 81 | float sx; 82 | float sy; 83 | }; 84 | 85 | class Detector 86 | { 87 | public: 88 | Detector(const std::string &model_path); 89 | ~Detector(); 90 | void detect(cv::Mat &img, std::vector &boxes); 91 | void setParams(float threshold, int num_of_threads); 92 | private: 93 | TfLiteStatus load_graph_tflite(const std::string &model_path); 94 | static inline bool cmp(bbox a, bbox b); 95 | static inline std::vector softmax(float a, float b); 96 | void createAnchor(std::vector &anchor, int w, int h); 97 | void createAnchorRetinaface(std::vector &anchor, int w, int h); 98 | void nms(std::vector &input_boxes, float nms_thresh); 99 | 100 | private: 101 | std::unique_ptr model_; 102 | std::unique_ptr interpreter_; 103 | 104 | int input_id; 105 | //TfLiteTensor* input_tensor_; 106 | //TfLiteTensor* output_tensor_; 107 | 108 | cv::Size input_geometry_; 109 | cv::Mat mean_; 110 | 111 | float nms_; 112 | float threshold_; 113 | float mean_val_[3]; 114 | int num_of_threads_; 115 | }; 116 | 117 | 118 | #endif //!_FACE_DETECTOR_H_ 119 | 120 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/face_detector.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file face_detector.h 3 | * 4 | * @brief face detector using RFB 5 | * 6 | * @version 1.0 7 | * @date 07/05/2020 22:27:06 8 | * @author Yu Fang (Robotics), yu.fang@iim.ltd 9 | * 10 | * @section Description 11 | * 12 | * Revision: none 13 | * Compiler: g++ 14 | * Company: IIM 15 | * 16 | * @section Description 17 | * 18 | * -->describe more here<-- 19 | * 20 | */ 21 | 22 | #ifndef _FACE_DETECTOR_H_ 23 | #define _FACE_DETECTOR_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include "tensorflow/c/c_api.h" 30 | 31 | using namespace std::chrono; 32 | 33 | class Timer 34 | { 35 | public: 36 | std::stack tictoc_stack; 37 | 38 | void tic() 39 | { 40 | high_resolution_clock::time_point t1 = high_resolution_clock::now(); 41 | tictoc_stack.push(t1); 42 | } 43 | 44 | double toc(std::string msg = "", bool flag = true) 45 | { 46 | double diff = duration_cast(high_resolution_clock::now() - tictoc_stack.top()).count(); 47 | if (msg.size() > 0) { 48 | if (flag) { 49 | std::cout << msg.c_str() << " time elasped " << diff << "ms" << std::endl; 50 | } 51 | } 52 | 53 | tictoc_stack.pop(); 54 | return diff; 55 | } 56 | 57 | void reset() 58 | { 59 | tictoc_stack = std::stack(); 60 | } 61 | }; 62 | 63 | 64 | struct Point { 65 | float _x; 66 | float _y; 67 | }; 68 | 69 | struct bbox { 70 | float x1; 71 | float y1; 72 | float x2; 73 | float y2; 74 | float s; 75 | Point points[5]; 76 | }; 77 | 78 | struct box { 79 | float cx; 80 | float cy; 81 | float sx; 82 | float sy; 83 | }; 84 | 85 | class Detector 86 | { 87 | public: 88 | Detector(const std::string &model_path); 89 | ~Detector(); 90 | void detect(cv::Mat &img, std::vector &boxes); 91 | void setParams(float threshold, int num_of_threads); 92 | private: 93 | TF_Session * load_graph(const char * frozen_fname, TF_Graph** p_graph); 94 | static void dummy_deallocator(void* data, size_t len, void* arg){} 95 | static inline bool cmp(bbox a, bbox b); 96 | static inline std::vector softmax(float a, float b); 97 | void createAnchor(std::vector &anchor, int w, int h); 98 | void createAnchorRetinaface(std::vector &anchor, int w, int h); 99 | void nms(std::vector &input_boxes, float nms_thresh); 100 | bool loadTFModel(const std::string& fname, std::vector& buf); 101 | std::vector> matrixReshape(std::vector>& nums, int r, int c); 102 | private: 103 | TF_Session * sess; /* *< tensorflow session */ 104 | TF_Graph * graph; /* *< tensorflow graph */ 105 | 106 | cv::Size input_geometry_; 107 | cv::Mat mean_; 108 | 109 | float nms_; 110 | float threshold_; 111 | float mean_val_[3]; 112 | int num_of_threads_; 113 | }; 114 | 115 | 116 | #endif //!_FACE_DETECTOR_H_ 117 | 118 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/main.cpp: -------------------------------------------------------------------------------- 1 | // ===================================================================================== 2 | // 3 | // Filename: main.cpp 4 | // 5 | // Version: 1.0 6 | // Created: 08/05/2020 15:48:58 7 | // Revision: none 8 | // Compiler: g++ 9 | // 10 | // Author: Yu Fang (Robotics), yu.fang@iim.ltd 11 | // Company: IIM 12 | // 13 | // Description: main function for face detector 14 | // 15 | // ===================================================================================== 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "face_detector.h" 26 | 27 | int main(int argc, char** argv) 28 | { 29 | std::string img_path; 30 | if (argc == 1) { 31 | img_path = "../sample.jpg"; 32 | } else if (argc == 2) { 33 | img_path = argv[1]; 34 | } 35 | 36 | std::string model_path = "/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenetv2.tflite"; 37 | const int max_side = 320; 38 | float threshold = 0.9; 39 | int num_of_thread = 1; 40 | 41 | Detector* detector = new Detector(model_path); 42 | detector->setParams(threshold, num_of_thread); 43 | Timer timer; 44 | 45 | for (int i = 0; i < 2; i++) { 46 | cv::Mat img = cv::imread(img_path.c_str()); 47 | if (img.empty()) { 48 | std::cout << "cv imread failed: " << img_path.c_str() << std::endl; 49 | return -1; 50 | } 51 | 52 | // scale 53 | float long_side = std::max(img.cols, img.rows); 54 | float scale = max_side/long_side; 55 | cv::Mat img_scale; 56 | cv::Size size = cv::Size(img.cols*scale, img.rows*scale); 57 | cv::resize(img, img_scale, cv::Size(img.cols*scale, img.rows*scale)); 58 | 59 | std::vector boxes; 60 | 61 | timer.tic(); 62 | 63 | detector->detect(img_scale, boxes); 64 | timer.toc("-----total time:"); 65 | 66 | // draw image 67 | for (int j = 0; j < boxes.size(); ++j) { 68 | cv::Rect rect(boxes[j].x1/scale, boxes[j].y1/scale, boxes[j].x2/scale - boxes[j].x1/scale, boxes[j].y2/scale - boxes[j].y1/scale); 69 | cv::rectangle(img, rect, cv::Scalar(0, 0, 255), 1, 8, 0); 70 | char test[80]; 71 | sprintf(test, "%f", boxes[j].s); 72 | 73 | cv::putText(img, test, cv::Size((boxes[j].x1/scale), boxes[j].y1/scale), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 255, 255)); 74 | cv::circle(img, cv::Point(boxes[j].points[0]._x / scale, boxes[j].points[0]._y / scale), 1, cv::Scalar(0, 0, 225), 4); 75 | cv::circle(img, cv::Point(boxes[j].points[1]._x / scale, boxes[j].points[1]._y / scale), 1, cv::Scalar(0, 255, 225), 4); 76 | cv::circle(img, cv::Point(boxes[j].points[2]._x / scale, boxes[j].points[2]._y / scale), 1, cv::Scalar(255, 0, 225), 4); 77 | cv::circle(img, cv::Point(boxes[j].points[3]._x / scale, boxes[j].points[3]._y / scale), 1, cv::Scalar(0, 255, 0), 4); 78 | cv::circle(img, cv::Point(boxes[j].points[4]._x / scale, boxes[j].points[4]._y / scale), 1, cv::Scalar(255, 0, 0), 4); 79 | } 80 | 81 | cv::imwrite("test.png", img); 82 | } 83 | 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/main.cpp: -------------------------------------------------------------------------------- 1 | // ===================================================================================== 2 | // 3 | // Filename: main.cpp 4 | // 5 | // Version: 1.0 6 | // Created: 08/05/2020 15:48:58 7 | // Revision: none 8 | // Compiler: g++ 9 | // 10 | // Author: Yu Fang (Robotics), yu.fang@iim.ltd 11 | // Company: IIM 12 | // 13 | // Description: main function for face detector 14 | // 15 | // ===================================================================================== 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "face_detector.h" 26 | 27 | int main(int argc, char** argv) 28 | { 29 | std::string img_path; 30 | if (argc == 1) { 31 | img_path = "../sample.jpg"; 32 | } else if (argc == 2) { 33 | img_path = argv[1]; 34 | } 35 | 36 | std::string model_path = "/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenetv2.pb"; 37 | const int max_side = 320; 38 | float threshold = 0.8; 39 | int num_of_thread = 1; 40 | 41 | Detector* detector = new Detector(model_path); 42 | detector->setParams(threshold, num_of_thread); 43 | Timer timer; 44 | 45 | for (int i = 0; i < 10; i++) { 46 | cv::Mat img = cv::imread(img_path.c_str()); 47 | if (img.empty()) { 48 | std::cout << "cv imread failed: " << img_path.c_str() << std::endl; 49 | return -1; 50 | } 51 | 52 | // scale 53 | float long_side = std::max(img.cols, img.rows); 54 | float scale = max_side/long_side; 55 | std::cout << "scale: " << scale << std::endl; 56 | 57 | cv::Mat img_scale; 58 | cv::Size size = cv::Size(img.cols*scale, img.rows*scale); 59 | cv::resize(img, img_scale, size); 60 | 61 | std::vector boxes; 62 | 63 | timer.tic(); 64 | 65 | detector->detect(img_scale, boxes); 66 | timer.toc("-----total time:"); 67 | 68 | // draw image 69 | for (int j = 0; j < boxes.size(); ++j) { 70 | cv::Rect rect(boxes[j].x1/scale, boxes[j].y1/scale, boxes[j].x2/scale - boxes[j].x1/scale, boxes[j].y2/scale - boxes[j].y1/scale); 71 | cv::rectangle(img, rect, cv::Scalar(0, 0, 255), 1, 8, 0); 72 | char test[80]; 73 | sprintf(test, "%f", boxes[j].s); 74 | 75 | cv::putText(img, test, cv::Size((boxes[j].x1/scale), boxes[j].y1/scale), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 255, 255)); 76 | cv::circle(img, cv::Point(boxes[j].points[0]._x / scale, boxes[j].points[0]._y / scale), 1, cv::Scalar(0, 0, 225), 4); 77 | cv::circle(img, cv::Point(boxes[j].points[1]._x / scale, boxes[j].points[1]._y / scale), 1, cv::Scalar(0, 255, 225), 4); 78 | cv::circle(img, cv::Point(boxes[j].points[2]._x / scale, boxes[j].points[2]._y / scale), 1, cv::Scalar(255, 0, 225), 4); 79 | cv::circle(img, cv::Point(boxes[j].points[3]._x / scale, boxes[j].points[3]._y / scale), 1, cv::Scalar(0, 255, 0), 4); 80 | cv::circle(img, cv::Point(boxes[j].points[4]._x / scale, boxes[j].points[4]._y / scale), 1, cv::Scalar(255, 0, 0), 4); 81 | } 82 | 83 | cv::imwrite("test.png", img); 84 | } 85 | 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/main_video.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "FaceDetector.h" 11 | 12 | using namespace std; 13 | using namespace boost::filesystem; 14 | 15 | 16 | int main(int argc, char** argv) 17 | { 18 | 19 | string imgPath; 20 | if (argc = 1) 21 | { 22 | imgPath = "/home/fangyu/Videos/huimin/record_2020_02_27_09_57_18.avi"; 23 | } 24 | else if (argc = 2) 25 | { 26 | imgPath = argv[1]; 27 | } 28 | string param = "../model/face.param"; 29 | string bin = "../model/face.bin"; 30 | const int max_side = 320; 31 | 32 | // slim or RFB 33 | Detector detector(param, bin, false); 34 | // retinaface 35 | // Detector detector(param, bin, true); 36 | Timer timer; 37 | 38 | cv::VideoCapture cap(imgPath); 39 | if (!cap.isOpened()) { 40 | std::cout << "Error opening video stream " << imgPath << std::endl; 41 | return -1; 42 | } 43 | 44 | while (1) { 45 | cv::Mat img; 46 | cap >> img; 47 | 48 | if (img.empty()) 49 | break; 50 | 51 | // scale 52 | float long_side = std::max(img.cols, img.rows); 53 | float scale = max_side/long_side; 54 | cv::Mat img_scale; 55 | cv::Size size = cv::Size(img.cols*scale, img.rows*scale); 56 | cv::resize(img, img_scale, cv::Size(img.cols*scale, img.rows*scale)); 57 | 58 | if (img.empty()) 59 | { 60 | fprintf(stderr, "cv::imread %s failed\n", imgPath.c_str()); 61 | return -1; 62 | } 63 | std::vector boxes; 64 | 65 | timer.tic(); 66 | 67 | detector.Detect(img_scale, boxes); 68 | timer.toc("----total timer:"); 69 | 70 | // draw image 71 | for (int j = 0; j < boxes.size(); ++j) { 72 | cv::Rect rect(boxes[j].x1/scale, boxes[j].y1/scale, boxes[j].x2/scale - boxes[j].x1/scale, boxes[j].y2/scale - boxes[j].y1/scale); 73 | cv::rectangle(img, rect, cv::Scalar(0, 0, 255), 1, 8, 0); 74 | char test[80]; 75 | sprintf(test, "%f", boxes[j].s); 76 | 77 | cv::putText(img, test, cv::Size((boxes[j].x1/scale), boxes[j].y1/scale), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 255, 255)); 78 | cv::circle(img, cv::Point(boxes[j].point[0]._x / scale, boxes[j].point[0]._y / scale), 1, cv::Scalar(0, 0, 225), 4); 79 | cv::circle(img, cv::Point(boxes[j].point[1]._x / scale, boxes[j].point[1]._y / scale), 1, cv::Scalar(0, 255, 225), 4); 80 | cv::circle(img, cv::Point(boxes[j].point[2]._x / scale, boxes[j].point[2]._y / scale), 1, cv::Scalar(255, 0, 225), 4); 81 | cv::circle(img, cv::Point(boxes[j].point[3]._x / scale, boxes[j].point[3]._y / scale), 1, cv::Scalar(0, 255, 0), 4); 82 | cv::circle(img, cv::Point(boxes[j].point[4]._x / scale, boxes[j].point[4]._y / scale), 1, cv::Scalar(255, 0, 0), 4); 83 | } 84 | 85 | cv::imshow("test", img); 86 | 87 | 88 | char c = (char)cv::waitKey(25); 89 | if (c == 27) 90 | break; 91 | } 92 | 93 | cap.release(); 94 | cv::destroyAllWindows(); 95 | return 0; 96 | 97 | 98 | } 99 | 100 | -------------------------------------------------------------------------------- /convertor/convertCaffe.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | 5 | import caffe 6 | from caffe.proto import caffe_pb2 7 | import onnx 8 | 9 | caffe.set_mode_cpu() 10 | sys.path.append('../') 11 | from onnx2caffe._transformers import ConvAddFuser, ConstantsToInitializers 12 | from onnx2caffe._graph import Graph 13 | 14 | import onnx2caffe._operators as cvt 15 | import onnx2caffe._weightloader as wlr 16 | from onnx2caffe._error_utils import ErrorHandling 17 | from onnx import shape_inference 18 | 19 | transformers = [ 20 | ConstantsToInitializers(), 21 | ConvAddFuser(), 22 | ] 23 | 24 | 25 | def convertToCaffe(graph, prototxt_save_path, caffe_model_save_path): 26 | exist_edges = [] 27 | layers = [] 28 | exist_nodes = [] 29 | err = ErrorHandling() 30 | for i in graph.inputs: 31 | edge_name = i[0] 32 | input_layer = cvt.make_input(i) 33 | layers.append(input_layer) 34 | exist_edges.append(i[0]) 35 | graph.channel_dims[edge_name] = graph.shape_dict[edge_name][1] 36 | 37 | for id, node in enumerate(graph.nodes): 38 | node_name = node.name 39 | op_type = node.op_type 40 | inputs = node.inputs 41 | inputs_tensor = node.input_tensors 42 | input_non_exist_flag = False 43 | 44 | for inp in inputs: 45 | if inp not in exist_edges and inp not in inputs_tensor: 46 | input_non_exist_flag = True 47 | break 48 | if input_non_exist_flag: 49 | continue 50 | 51 | if op_type not in cvt._ONNX_NODE_REGISTRY: 52 | err.unsupported_op(node) 53 | continue 54 | converter_fn = cvt._ONNX_NODE_REGISTRY[op_type] 55 | layer = converter_fn(node, graph, err) 56 | if type(layer) == tuple: 57 | for l in layer: 58 | layers.append(l) 59 | else: 60 | layers.append(layer) 61 | outs = node.outputs 62 | for out in outs: 63 | exist_edges.append(out) 64 | 65 | net = caffe_pb2.NetParameter() 66 | for id, layer in enumerate(layers): 67 | layers[id] = layer._to_proto() 68 | net.layer.extend(layers) 69 | 70 | with open(prototxt_save_path, 'w') as f: 71 | print(net, file=f) 72 | 73 | caffe.set_mode_cpu() 74 | deploy = prototxt_save_path 75 | net = caffe.Net(deploy, 76 | caffe.TEST) 77 | 78 | for id, node in enumerate(graph.nodes): 79 | node_name = node.name 80 | op_type = node.op_type 81 | inputs = node.inputs 82 | inputs_tensor = node.input_tensors 83 | input_non_exist_flag = False 84 | if op_type not in wlr._ONNX_NODE_REGISTRY: 85 | err.unsupported_op(node) 86 | continue 87 | converter_fn = wlr._ONNX_NODE_REGISTRY[op_type] 88 | converter_fn(net, node, graph, err) 89 | 90 | net.save(caffe_model_save_path) 91 | return net 92 | 93 | 94 | def getGraph(onnx_path): 95 | model = onnx.load(onnx_path) 96 | model = shape_inference.infer_shapes(model) 97 | model_graph = model.graph 98 | graph = Graph.from_onnx(model_graph) 99 | graph = graph.transformed(transformers) 100 | graph.channel_dims = {} 101 | 102 | return graph 103 | 104 | 105 | if __name__ == "__main__": 106 | onnx_path = "./converted_models/mobilenet/mobilenetv3_sim.onnx" 107 | prototxt_path = "./converted_models/mobilenet/mobilenetv3.prototxt" 108 | caffemodel_path = "./converted_models/mobilenet/mobilenetv3.caffemodel" 109 | graph = getGraph(onnx_path) 110 | convertToCaffe(graph, prototxt_path, caffemodel_path) 111 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "FaceDetector.h" 11 | 12 | using namespace std; 13 | using namespace boost::filesystem; 14 | 15 | void get_image_names(const string &folder, std::vector& file_names) 16 | { 17 | path directory(folder); 18 | directory_iterator itr(directory), end_itr; 19 | string current_file = itr->path().string(); 20 | 21 | for (; itr != end_itr; ++itr) { 22 | if (is_regular_file(itr->path())) { 23 | string filename = itr->path().filename().string(); 24 | file_names.push_back(filename); 25 | } 26 | } 27 | 28 | } 29 | 30 | int main(int argc, char** argv) 31 | { 32 | 33 | string imgPath; 34 | if (argc = 1) 35 | { 36 | imgPath = "../sample.jpg"; 37 | } 38 | else if (argc = 2) 39 | { 40 | imgPath = argv[1]; 41 | } 42 | string param = "../model/face.param"; 43 | string bin = "../model/face.bin"; 44 | const int max_side = 320; 45 | 46 | // slim or RFB 47 | Detector detector(param, bin, false); 48 | // retinaface 49 | // Detector detector(param, bin, true); 50 | Timer timer; 51 | 52 | std::vector file_names; 53 | std::string test_path = "/home/fangyu/Videos/face_with_mask/test/"; 54 | std::string save_path = "/home/fangyu/Videos/face_with_mask/test_results/"; 55 | get_image_names(test_path, file_names); 56 | 57 | for (auto img_name : file_names){ 58 | 59 | std::string imgPath = test_path + img_name; 60 | cv::Mat img = cv::imread(imgPath.c_str()); 61 | 62 | // scale 63 | float long_side = std::max(img.cols, img.rows); 64 | float scale = max_side/long_side; 65 | cv::Mat img_scale; 66 | cv::Size size = cv::Size(img.cols*scale, img.rows*scale); 67 | cv::resize(img, img_scale, cv::Size(img.cols*scale, img.rows*scale)); 68 | 69 | if (img.empty()) 70 | { 71 | fprintf(stderr, "cv::imread %s failed\n", imgPath.c_str()); 72 | return -1; 73 | } 74 | std::vector boxes; 75 | 76 | timer.tic(); 77 | 78 | detector.Detect(img_scale, boxes); 79 | timer.toc("----total timer:"); 80 | 81 | // draw image 82 | for (int j = 0; j < boxes.size(); ++j) { 83 | cv::Rect rect(boxes[j].x1/scale, boxes[j].y1/scale, boxes[j].x2/scale - boxes[j].x1/scale, boxes[j].y2/scale - boxes[j].y1/scale); 84 | cv::rectangle(img, rect, cv::Scalar(0, 0, 255), 1, 8, 0); 85 | char test[80]; 86 | sprintf(test, "%f", boxes[j].s); 87 | 88 | cv::putText(img, test, cv::Size((boxes[j].x1/scale), boxes[j].y1/scale), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 255, 255)); 89 | cv::circle(img, cv::Point(boxes[j].point[0]._x / scale, boxes[j].point[0]._y / scale), 1, cv::Scalar(0, 0, 225), 4); 90 | cv::circle(img, cv::Point(boxes[j].point[1]._x / scale, boxes[j].point[1]._y / scale), 1, cv::Scalar(0, 255, 225), 4); 91 | cv::circle(img, cv::Point(boxes[j].point[2]._x / scale, boxes[j].point[2]._y / scale), 1, cv::Scalar(255, 0, 225), 4); 92 | cv::circle(img, cv::Point(boxes[j].point[3]._x / scale, boxes[j].point[3]._y / scale), 1, cv::Scalar(0, 255, 0), 4); 93 | cv::circle(img, cv::Point(boxes[j].point[4]._x / scale, boxes[j].point[4]._y / scale), 1, cv::Scalar(255, 0, 0), 4); 94 | } 95 | cv::imwrite(save_path + img_name, img); 96 | } 97 | return 0; 98 | } 99 | 100 | -------------------------------------------------------------------------------- /data/wider_face.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import torch 5 | import torch.utils.data as data 6 | import cv2 7 | import numpy as np 8 | 9 | class WiderFaceDetection(data.Dataset): 10 | def __init__(self, txt_path, preproc=None): 11 | self.preproc = preproc 12 | self.imgs_path = [] 13 | self.words = [] 14 | f = open(txt_path,'r') 15 | lines = f.readlines() 16 | isFirst = True 17 | labels = [] 18 | for line in lines: 19 | line = line.rstrip() 20 | if line.startswith('#'): 21 | if isFirst is True: 22 | isFirst = False 23 | else: 24 | labels_copy = labels.copy() 25 | self.words.append(labels_copy) 26 | labels.clear() 27 | path = line[2:] 28 | path = txt_path.replace('label.txt','images/') + path 29 | self.imgs_path.append(path) 30 | else: 31 | line = line.split(' ') 32 | label = [float(x) for x in line] 33 | labels.append(label) 34 | 35 | self.words.append(labels) 36 | 37 | def __len__(self): 38 | return len(self.imgs_path) 39 | 40 | def __getitem__(self, index): 41 | img = cv2.imread(self.imgs_path[index]) 42 | height, width, _ = img.shape 43 | 44 | labels = self.words[index] 45 | annotations = np.zeros((0, 15)) 46 | if len(labels) == 0: 47 | return annotations 48 | for idx, label in enumerate(labels): 49 | annotation = np.zeros((1, 15)) 50 | # bbox 51 | annotation[0, 0] = label[0] # x1 52 | annotation[0, 1] = label[1] # y1 53 | annotation[0, 2] = label[0] + label[2] # x2 54 | annotation[0, 3] = label[1] + label[3] # y2 55 | 56 | # landmarks 57 | annotation[0, 4] = label[4] # l0_x 58 | annotation[0, 5] = label[5] # l0_y 59 | annotation[0, 6] = label[7] # l1_x 60 | annotation[0, 7] = label[8] # l1_y 61 | annotation[0, 8] = label[10] # l2_x 62 | annotation[0, 9] = label[11] # l2_y 63 | annotation[0, 10] = label[13] # l3_x 64 | annotation[0, 11] = label[14] # l3_y 65 | annotation[0, 12] = label[16] # l4_x 66 | annotation[0, 13] = label[17] # l4_y 67 | if (annotation[0, 4]<0): 68 | annotation[0, 14] = -1 69 | else: 70 | annotation[0, 14] = 1 71 | 72 | annotations = np.append(annotations, annotation, axis=0) 73 | target = np.array(annotations) 74 | if self.preproc is not None: 75 | img, target = self.preproc(img, target) 76 | 77 | return torch.from_numpy(img), target 78 | 79 | def detection_collate(batch): 80 | """Custom collate fn for dealing with batches of images that have a different 81 | number of associated object annotations (bounding boxes). 82 | 83 | Arguments: 84 | batch: (tuple) A tuple of tensor images and lists of annotations 85 | 86 | Return: 87 | A tuple containing: 88 | 1) (tensor) batch of images stacked on their 0 dim 89 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 90 | """ 91 | targets = [] 92 | imgs = [] 93 | for _, sample in enumerate(batch): 94 | for _, tup in enumerate(sample): 95 | if torch.is_tensor(tup): 96 | imgs.append(tup) 97 | elif isinstance(tup, type(np.empty(0))): 98 | annos = torch.from_numpy(tup).float() 99 | targets.append(annos) 100 | 101 | return (torch.stack(imgs, 0), targets) 102 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/lib/cmake/ncnn/ncnn.cmake: -------------------------------------------------------------------------------- 1 | # Generated by CMake 2 | 3 | if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) 4 | message(FATAL_ERROR "CMake >= 2.6.0 required") 5 | endif() 6 | cmake_policy(PUSH) 7 | cmake_policy(VERSION 2.6) 8 | #---------------------------------------------------------------- 9 | # Generated CMake target import file. 10 | #---------------------------------------------------------------- 11 | 12 | # Commands may need to know the format version. 13 | set(CMAKE_IMPORT_FILE_VERSION 1) 14 | 15 | # Protect against multiple inclusion, which would fail when already imported targets are added once more. 16 | set(_targetsDefined) 17 | set(_targetsNotDefined) 18 | set(_expectedTargets) 19 | foreach(_expectedTarget ncnn) 20 | list(APPEND _expectedTargets ${_expectedTarget}) 21 | if(NOT TARGET ${_expectedTarget}) 22 | list(APPEND _targetsNotDefined ${_expectedTarget}) 23 | endif() 24 | if(TARGET ${_expectedTarget}) 25 | list(APPEND _targetsDefined ${_expectedTarget}) 26 | endif() 27 | endforeach() 28 | if("${_targetsDefined}" STREQUAL "${_expectedTargets}") 29 | unset(_targetsDefined) 30 | unset(_targetsNotDefined) 31 | unset(_expectedTargets) 32 | set(CMAKE_IMPORT_FILE_VERSION) 33 | cmake_policy(POP) 34 | return() 35 | endif() 36 | if(NOT "${_targetsDefined}" STREQUAL "") 37 | message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") 38 | endif() 39 | unset(_targetsDefined) 40 | unset(_targetsNotDefined) 41 | unset(_expectedTargets) 42 | 43 | 44 | # Compute the installation prefix relative to this file. 45 | get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) 46 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 47 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 48 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 49 | if(_IMPORT_PREFIX STREQUAL "/") 50 | set(_IMPORT_PREFIX "") 51 | endif() 52 | 53 | # Create imported target ncnn 54 | add_library(ncnn STATIC IMPORTED) 55 | 56 | set_target_properties(ncnn PROPERTIES 57 | INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" 58 | INTERFACE_LINK_LIBRARIES "OpenMP::OpenMP_CXX" 59 | INTERFACE_POSITION_INDEPENDENT_CODE "ON" 60 | ) 61 | 62 | if(CMAKE_VERSION VERSION_LESS 2.8.12) 63 | message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") 64 | endif() 65 | 66 | # Load information for each installed configuration. 67 | get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 68 | file(GLOB CONFIG_FILES "${_DIR}/ncnn-*.cmake") 69 | foreach(f ${CONFIG_FILES}) 70 | include(${f}) 71 | endforeach() 72 | 73 | # Cleanup temporary variables. 74 | set(_IMPORT_PREFIX) 75 | 76 | # Loop over all imported files and verify that they actually exist 77 | foreach(target ${_IMPORT_CHECK_TARGETS} ) 78 | foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) 79 | if(NOT EXISTS "${file}" ) 80 | message(FATAL_ERROR "The imported target \"${target}\" references the file 81 | \"${file}\" 82 | but this file does not exist. Possible reasons include: 83 | * The file was deleted, renamed, or moved to another location. 84 | * An install or uninstall procedure did not complete successfully. 85 | * The installation package was faulty and contained 86 | \"${CMAKE_CURRENT_LIST_FILE}\" 87 | but not all the files it references. 88 | ") 89 | endif() 90 | endforeach() 91 | unset(_IMPORT_CHECK_FILES_FOR_${target}) 92 | endforeach() 93 | unset(_IMPORT_CHECK_TARGETS) 94 | 95 | # This file does not depend on other imported targets which have 96 | # been exported from the same project but in a separate export set. 97 | 98 | # Commands beyond this point should not need to know the version. 99 | set(CMAKE_IMPORT_FILE_VERSION) 100 | cmake_policy(POP) 101 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/platform.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PLATFORM_H 16 | #define NCNN_PLATFORM_H 17 | 18 | #define NCNN_STDIO 1 19 | #define NCNN_STRING 1 20 | #define NCNN_OPENCV 0 21 | #define NCNN_BENCHMARK 0 22 | #define NCNN_PIXEL 1 23 | #define NCNN_PIXEL_ROTATE 0 24 | #define NCNN_VULKAN 0 25 | #define NCNN_REQUANT 0 26 | #define NCNN_AVX2 0 27 | 28 | #ifdef _WIN32 29 | #define WIN32_LEAN_AND_MEAN 30 | #include 31 | #include 32 | #else 33 | #include 34 | #endif 35 | 36 | namespace ncnn { 37 | 38 | #ifdef _WIN32 39 | class Mutex 40 | { 41 | public: 42 | Mutex() { InitializeSRWLock(&srwlock); } 43 | ~Mutex() {} 44 | void lock() { AcquireSRWLockExclusive(&srwlock); } 45 | void unlock() { ReleaseSRWLockExclusive(&srwlock); } 46 | private: 47 | friend class ConditionVariable; 48 | // NOTE SRWLock is available from windows vista 49 | SRWLOCK srwlock; 50 | }; 51 | #else // _WIN32 52 | class Mutex 53 | { 54 | public: 55 | Mutex() { pthread_mutex_init(&mutex, 0); } 56 | ~Mutex() { pthread_mutex_destroy(&mutex); } 57 | void lock() { pthread_mutex_lock(&mutex); } 58 | void unlock() { pthread_mutex_unlock(&mutex); } 59 | private: 60 | friend class ConditionVariable; 61 | pthread_mutex_t mutex; 62 | }; 63 | #endif // _WIN32 64 | 65 | class MutexLockGuard 66 | { 67 | public: 68 | MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); } 69 | ~MutexLockGuard() { mutex.unlock(); } 70 | private: 71 | Mutex& mutex; 72 | }; 73 | 74 | #if _WIN32 75 | class ConditionVariable 76 | { 77 | public: 78 | ConditionVariable() { InitializeConditionVariable(&condvar); } 79 | ~ConditionVariable() {} 80 | void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } 81 | void broadcast() { WakeAllConditionVariable(&condvar); } 82 | void signal() { WakeConditionVariable(&condvar); } 83 | private: 84 | CONDITION_VARIABLE condvar; 85 | }; 86 | #else // _WIN32 87 | class ConditionVariable 88 | { 89 | public: 90 | ConditionVariable() { pthread_cond_init(&cond, 0); } 91 | ~ConditionVariable() { pthread_cond_destroy(&cond); } 92 | void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } 93 | void broadcast() { pthread_cond_broadcast(&cond); } 94 | void signal() { pthread_cond_signal(&cond); } 95 | private: 96 | pthread_cond_t cond; 97 | }; 98 | #endif // _WIN32 99 | 100 | #if _WIN32 101 | static unsigned __stdcall start_wrapper(void* args); 102 | class Thread 103 | { 104 | public: 105 | Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } 106 | ~Thread() {} 107 | void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } 108 | private: 109 | friend static unsigned __stdcall start_wrapper(void* arg); 110 | HANDLE handle; 111 | void* (*_start)(void*); 112 | void* _args; 113 | }; 114 | 115 | static unsigned __stdcall start_wrapper(void* args) 116 | { 117 | Thread* t = (Thread*)args; 118 | t->_start(t->_args); 119 | return 0; 120 | } 121 | #else // _WIN32 122 | class Thread 123 | { 124 | public: 125 | Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } 126 | ~Thread() {} 127 | void join() { pthread_join(t, 0); } 128 | private: 129 | pthread_t t; 130 | }; 131 | #endif // _WIN32 132 | 133 | } // namespace ncnn 134 | 135 | #endif // NCNN_PLATFORM_H 136 | -------------------------------------------------------------------------------- /convertor/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import argparse 4 | import torch 5 | import torch.backends.cudnn as cudnn 6 | import numpy as np 7 | from data import cfg_mnet, cfg_slim, cfg_rfb 8 | from layers.functions.prior_box import PriorBox 9 | from utils.nms.py_cpu_nms import py_cpu_nms 10 | import cv2 11 | from models.retinaface import RetinaFace 12 | from models.net_slim import Slim 13 | from models.net_rfb import RFB 14 | from utils.box_utils import decode, decode_landm 15 | from utils.timer import Timer 16 | 17 | 18 | parser = argparse.ArgumentParser(description='Test') 19 | parser.add_argument('-m', '--trained_model', default='./weights/RBF_Final.pth', 20 | type=str, help='Trained state_dict file path to open') 21 | parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or slim or RFB') 22 | parser.add_argument('--long_side', default=320, help='when origin_size is false, long_side is scaled size(320 or 640 for long side)') 23 | parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference') 24 | 25 | args = parser.parse_args() 26 | 27 | 28 | def check_keys(model, pretrained_state_dict): 29 | ckpt_keys = set(pretrained_state_dict.keys()) 30 | model_keys = set(model.state_dict().keys()) 31 | used_pretrained_keys = model_keys & ckpt_keys 32 | unused_pretrained_keys = ckpt_keys - model_keys 33 | missing_keys = model_keys - ckpt_keys 34 | print('Missing keys:{}'.format(len(missing_keys))) 35 | print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 36 | print('Used keys:{}'.format(len(used_pretrained_keys))) 37 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 38 | return True 39 | 40 | 41 | def remove_prefix(state_dict, prefix): 42 | ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' 43 | print('remove prefix \'{}\''.format(prefix)) 44 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 45 | return {f(key): value for key, value in state_dict.items()} 46 | 47 | 48 | def load_model(model, pretrained_path, load_to_cpu): 49 | print('Loading pretrained model from {}'.format(pretrained_path)) 50 | if load_to_cpu: 51 | pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) 52 | else: 53 | device = torch.cuda.current_device() 54 | pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) 55 | if "state_dict" in pretrained_dict.keys(): 56 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') 57 | else: 58 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 59 | check_keys(model, pretrained_dict) 60 | model.load_state_dict(pretrained_dict, strict=False) 61 | return model 62 | 63 | 64 | if __name__ == '__main__': 65 | torch.set_grad_enabled(False) 66 | 67 | cfg = None 68 | net = None 69 | if args.network == "mobile0.25": 70 | cfg = cfg_mnet 71 | net = RetinaFace(cfg = cfg, phase = 'test') 72 | elif args.network == "slim": 73 | cfg = cfg_slim 74 | net = Slim(cfg = cfg, phase = 'test') 75 | elif args.network == "RFB": 76 | cfg = cfg_rfb 77 | net = RFB(cfg = cfg, phase = 'test') 78 | else: 79 | print("Don't support network!") 80 | exit(0) 81 | 82 | # load weight 83 | net = load_model(net, args.trained_model, args.cpu) 84 | net.eval() 85 | print('Finished loading model!') 86 | print(net) 87 | device = torch.device("cpu" if args.cpu else "cuda") 88 | net = net.to(device) 89 | 90 | ##################export############### 91 | output_onnx = 'faceDetector.onnx' 92 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx)) 93 | input_names = ["input0"] 94 | output_names = ["output0"] 95 | inputs = torch.randn(1, 3, 320, 320).to(device) 96 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False, 97 | input_names=input_names, output_names=output_names) 98 | ##################end############### 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /test/test_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import cv2 5 | 6 | from models.net_rfb import RFB 7 | from models.retinaface import RetinaFace 8 | from data import cfg_rfb, cfg_mnet, cfg_slim 9 | 10 | 11 | def check_keys(model, pretrained_state_dict): 12 | ckpt_keys = set(pretrained_state_dict.keys()) 13 | model_keys = set(model.state_dict().keys()) 14 | used_pretrained_keys = model_keys & ckpt_keys 15 | unused_pretrained_keys = ckpt_keys - model_keys 16 | missing_keys = model_keys - ckpt_keys 17 | print('Missing keys:{}'.format(len(missing_keys))) 18 | print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 19 | print('Used keys:{}'.format(len(used_pretrained_keys))) 20 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 21 | return True 22 | 23 | def remove_prefix(state_dict, prefix): 24 | ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' 25 | print('remove prefix \'{}\''.format(prefix)) 26 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 27 | return {f(key): value for key, value in state_dict.items()} 28 | 29 | def load_model(model, pretrained_path, load_to_cpu): 30 | print('Loading model from {}'.format(pretrained_path)) 31 | if load_to_cpu: 32 | pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) 33 | else: 34 | device = torch.cuda.current_device() 35 | pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc:storage.cuda(device)) 36 | if "state_dict" in pretrained_dict.keys(): 37 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') 38 | else: 39 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 40 | 41 | check_keys(model, pretrained_dict) 42 | model.load_state_dict(pretrained_dict, strict=False) 43 | return model 44 | 45 | 46 | def load_graph(frozen_graph_filename): 47 | with tf.gfile.GFile(frozen_graph_filename, "rb") as f: 48 | graph_def = tf.GraphDef() 49 | graph_def.ParseFromString(f.read()) 50 | with tf.Graph().as_default() as graph: 51 | tf.import_graph_def(graph_def,input_map = None,return_elements = None,name = "",op_dict = None,producer_op_list = None) 52 | return graph 53 | 54 | if __name__ == '__main__': 55 | torch.set_grad_enabled(False) 56 | 57 | cfg = cfg_mnet 58 | net = RetinaFace(cfg=cfg, phase='test') 59 | 60 | net = load_model(net, "./converted_models/mobilenet/mobilenet0.25_Final.pth", True) 61 | net.eval() 62 | print('Finish loading model!') 63 | #print(net) 64 | #cudnn.benchmark = True 65 | device = torch.device("cpu") 66 | net = net.to(device) 67 | 68 | img_raw = cv2.imread("./Face_Detector_ncnn/sample.jpg") 69 | #img = np.ones((3,240,320), dtype=np.float32) 70 | img = np.float32(img_raw) 71 | long_side = 320 72 | im_shape = img.shape 73 | im_size_min = np.min(im_shape[0:2]) 74 | im_size_max = np.max(im_shape[0:2]) 75 | resize = float(long_side) / float(im_size_min) 76 | if np.round(resize * im_size_max) > long_side: 77 | resize = float(long_side) / float(im_size_max) 78 | 79 | if resize != 1: 80 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 81 | 82 | img -= (104, 117, 123) 83 | img = img.transpose(2,0,1) 84 | img = torch.from_numpy(img).unsqueeze(0) 85 | img = img.to(device) 86 | loc, conf, landms = net(img) 87 | 88 | _, n, _ = loc.shape 89 | loc_np = loc.data.cpu().numpy() 90 | conf_np = conf.data.cpu().numpy() 91 | landms_np = landms.data.cpu().numpy() 92 | with open("pytorch_result.txt", 'w') as fd: 93 | for j in range(n): 94 | fd.write(str(conf_np[0,j,1]) + ' '+str(loc_np[0, j, 0]) + ' ' + str(loc_np[0, j, 1]) + ' ' + str(loc_np[0, j, 2]) + ' ' + str(loc_np[0, j, 3]) + '\n') 95 | #fd.write(str(landms_np[0,j,0]) + ' ' + str(landms_np[0,j,1]) + ' ' + str(landms[0,j,2]) + ' ' + str(landms_np[0,j,3]) + ' ' + str(landms_np[0,j,4) + ' ' + str(landms_np[0,j,5]) + ' ' + str(landms_np[0,j,6]) + ' ' + str(landms_np[0,j,7]) + ' ' + str(landms_np[0,j,8]) + ' ' + str(landms_np[0,j,9]) + '\n') 96 | #fd.write(str(landms_np[0,j,0]) + ' ' + str(landms_np[0,j,1]) + str(landms_np[0,j,2]) + ' ' + str(landms_np[0,j,3]) + '\n') 97 | print(loc.shape) 98 | print(loc) 99 | print(conf) 100 | print(landms) 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tflite/main_folder.cpp: -------------------------------------------------------------------------------- 1 | // ===================================================================================== 2 | // 3 | // Filename: main.cpp 4 | // 5 | // Version: 1.0 6 | // Created: 08/05/2020 15:48:58 7 | // Revision: none 8 | // Compiler: g++ 9 | // 10 | // Author: Yu Fang (Robotics), yu.fang@iim.ltd 11 | // Company: IIM 12 | // 13 | // Description: main function for face detector 14 | // 15 | // ===================================================================================== 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "face_detector.h" 27 | 28 | using namespace std; 29 | using namespace boost::filesystem; 30 | 31 | void get_image_names(const string &folder, std::vector& file_names) 32 | { 33 | path directory(folder); 34 | directory_iterator itr(directory), end_itr; 35 | string current_file = itr->path().string(); 36 | 37 | for (; itr != end_itr; ++itr) { 38 | if (is_regular_file(itr->path())) { 39 | string filename = itr->path().filename().string(); 40 | file_names.push_back(filename); 41 | } 42 | } 43 | } 44 | 45 | int main(int argc, char** argv) 46 | { 47 | std::string folder_path; 48 | if (argc == 1) { 49 | folder_path = "../images"; 50 | } else if (argc == 2) { 51 | folder_path = argv[1]; 52 | } 53 | 54 | std::string model_path = "/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenetv2.tflite"; 55 | const int max_side = 320; 56 | float threshold = 0.9; 57 | int num_of_thread = 1; 58 | 59 | Detector* detector = new Detector(model_path); 60 | detector->setParams(threshold, num_of_thread); 61 | Timer timer; 62 | int cnt = 0; 63 | 64 | std::vector file_names; 65 | get_image_names(folder_path, file_names); 66 | 67 | std::string save_path = folder_path+"_results/"; 68 | boost::filesystem::create_directories(save_path.c_str()); 69 | 70 | for (auto img_name : file_names) { 71 | std::string img_path = folder_path + "/" + img_name; 72 | cv::Mat img = cv::imread(img_path.c_str()); 73 | if (img.empty()) { 74 | std::cout << "cv imread failed: " << img_path.c_str() << std::endl; 75 | return -1; 76 | } 77 | 78 | // scale 79 | float long_side = std::max(img.cols, img.rows); 80 | float scale = max_side/long_side; 81 | cv::Mat img_scale; 82 | cv::Size size = cv::Size(img.cols*scale, img.rows*scale); 83 | cv::resize(img, img_scale, cv::Size(img.cols*scale, img.rows*scale)); 84 | 85 | std::vector boxes; 86 | 87 | timer.tic(); 88 | 89 | detector->detect(img_scale, boxes); 90 | timer.toc("-----total time:"); 91 | 92 | cnt += boxes.size(); 93 | // draw image 94 | for (int j = 0; j < boxes.size(); ++j) { 95 | cv::Rect rect(boxes[j].x1/scale, boxes[j].y1/scale, boxes[j].x2/scale - boxes[j].x1/scale, boxes[j].y2/scale - boxes[j].y1/scale); 96 | cv::rectangle(img, rect, cv::Scalar(0, 0, 255), 1, 8, 0); 97 | char test[80]; 98 | sprintf(test, "%f", boxes[j].s); 99 | 100 | cv::putText(img, test, cv::Size((boxes[j].x1/scale), boxes[j].y1/scale), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 255, 255)); 101 | cv::circle(img, cv::Point(boxes[j].points[0]._x / scale, boxes[j].points[0]._y / scale), 1, cv::Scalar(0, 0, 225), 4); 102 | cv::circle(img, cv::Point(boxes[j].points[1]._x / scale, boxes[j].points[1]._y / scale), 1, cv::Scalar(0, 255, 225), 4); 103 | cv::circle(img, cv::Point(boxes[j].points[2]._x / scale, boxes[j].points[2]._y / scale), 1, cv::Scalar(255, 0, 225), 4); 104 | cv::circle(img, cv::Point(boxes[j].points[3]._x / scale, boxes[j].points[3]._y / scale), 1, cv::Scalar(0, 255, 0), 4); 105 | cv::circle(img, cv::Point(boxes[j].points[4]._x / scale, boxes[j].points[4]._y / scale), 1, cv::Scalar(255, 0, 0), 4); 106 | } 107 | 108 | cv::imwrite(save_path + img_name, img); 109 | } 110 | 111 | std::cout << "========================" << std::endl; 112 | std::cout << "total faced detected: " << cnt << std::endl; 113 | 114 | return 0; 115 | } 116 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_tf/main_folder.cpp: -------------------------------------------------------------------------------- 1 | // ===================================================================================== 2 | // 3 | // Filename: main.cpp 4 | // 5 | // Version: 1.0 6 | // Created: 08/05/2020 15:48:58 7 | // Revision: none 8 | // Compiler: g++ 9 | // 10 | // Author: Yu Fang (Robotics), yu.fang@iim.ltd 11 | // Company: IIM 12 | // 13 | // Description: main function for face detector 14 | // 15 | // ===================================================================================== 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "face_detector.h" 27 | 28 | using namespace std; 29 | using namespace boost::filesystem; 30 | 31 | void get_image_names(const string &folder, std::vector& file_names) 32 | { 33 | path directory(folder); 34 | directory_iterator itr(directory), end_itr; 35 | string current_file = itr->path().string(); 36 | 37 | for (; itr != end_itr; ++itr) { 38 | if (is_regular_file(itr->path())) { 39 | string filename = itr->path().filename().string(); 40 | file_names.push_back(filename); 41 | } 42 | } 43 | } 44 | 45 | int main(int argc, char** argv) 46 | { 47 | std::string folder_path; 48 | if (argc == 1) { 49 | folder_path = "../images"; 50 | } else if (argc == 2) { 51 | folder_path = argv[1]; 52 | } 53 | 54 | //std::string model_path = "../model/RFB.pb"; 55 | std::string model_path = "/home/fangyu/git/Face-Detector-1MB-with-landmark/converted_models/mobilenet/mobilenetv3.pb"; 56 | const int max_side = 640; 57 | float threshold = 0.9; 58 | int num_of_thread = 1; 59 | 60 | Detector* detector = new Detector(model_path); 61 | detector->setParams(threshold, num_of_thread); 62 | Timer timer; 63 | int cnt = 0; 64 | 65 | std::vector file_names; 66 | get_image_names(folder_path, file_names); 67 | 68 | std::string save_path = folder_path+"_results/"; 69 | boost::filesystem::create_directories(save_path.c_str()); 70 | 71 | for (auto img_name : file_names) { 72 | std::string img_path = folder_path + "/" + img_name; 73 | cv::Mat img = cv::imread(img_path.c_str()); 74 | if (img.empty()) { 75 | std::cout << "cv imread failed: " << img_path.c_str() << std::endl; 76 | return -1; 77 | } 78 | 79 | // scale 80 | float long_side = std::max(img.cols, img.rows); 81 | float scale = max_side/long_side; 82 | cv::Mat img_scale; 83 | cv::Size size = cv::Size(img.cols*scale, img.rows*scale); 84 | cv::resize(img, img_scale, cv::Size(img.cols*scale, img.rows*scale)); 85 | 86 | std::vector boxes; 87 | 88 | timer.tic(); 89 | 90 | detector->detect(img_scale, boxes); 91 | timer.toc("-----total time:"); 92 | 93 | cnt += boxes.size(); 94 | // draw image 95 | for (int j = 0; j < boxes.size(); ++j) { 96 | cv::Rect rect(boxes[j].x1/scale, boxes[j].y1/scale, boxes[j].x2/scale - boxes[j].x1/scale, boxes[j].y2/scale - boxes[j].y1/scale); 97 | cv::rectangle(img, rect, cv::Scalar(0, 0, 255), 1, 8, 0); 98 | char test[80]; 99 | sprintf(test, "%f", boxes[j].s); 100 | 101 | cv::putText(img, test, cv::Size((boxes[j].x1/scale), boxes[j].y1/scale), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 255, 255)); 102 | cv::circle(img, cv::Point(boxes[j].points[0]._x / scale, boxes[j].points[0]._y / scale), 1, cv::Scalar(0, 0, 225), 4); 103 | cv::circle(img, cv::Point(boxes[j].points[1]._x / scale, boxes[j].points[1]._y / scale), 1, cv::Scalar(0, 255, 225), 4); 104 | cv::circle(img, cv::Point(boxes[j].points[2]._x / scale, boxes[j].points[2]._y / scale), 1, cv::Scalar(255, 0, 225), 4); 105 | cv::circle(img, cv::Point(boxes[j].points[3]._x / scale, boxes[j].points[3]._y / scale), 1, cv::Scalar(0, 255, 0), 4); 106 | cv::circle(img, cv::Point(boxes[j].points[4]._x / scale, boxes[j].points[4]._y / scale), 1, cv::Scalar(255, 0, 0), 4); 107 | } 108 | 109 | cv::imwrite(save_path + img_name, img); 110 | } 111 | 112 | std::cout << "========================" << std::endl; 113 | std::cout << "total faced detected: " << cnt << std::endl; 114 | 115 | delete detector; 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /models/net.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn as nn 4 | import torchvision.models._utils as _utils 5 | import torchvision.models as models 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | 9 | def conv_bn(inp, oup, stride = 1): 10 | return nn.Sequential( 11 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 12 | nn.BatchNorm2d(oup), 13 | nn.ReLU(inplace=True) 14 | ) 15 | 16 | def conv_bn_no_relu(inp, oup, stride): 17 | return nn.Sequential( 18 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 19 | nn.BatchNorm2d(oup), 20 | ) 21 | 22 | def conv_bn1X1(inp, oup, stride): 23 | return nn.Sequential( 24 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), 25 | nn.BatchNorm2d(oup), 26 | nn.ReLU(inplace=True) 27 | ) 28 | 29 | def conv_dw(inp, oup, stride): 30 | return nn.Sequential( 31 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 32 | nn.BatchNorm2d(inp), 33 | nn.ReLU(inplace=True), 34 | 35 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 36 | nn.BatchNorm2d(oup), 37 | nn.ReLU(inplace=True) 38 | ) 39 | 40 | class SSH(nn.Module): 41 | def __init__(self, in_channel, out_channel): 42 | super(SSH, self).__init__() 43 | assert out_channel % 4 == 0 44 | leaky = 0 45 | if (out_channel <= 64): 46 | leaky = 0.1 47 | self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1) 48 | 49 | self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1) 50 | self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) 51 | 52 | self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1) 53 | self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) 54 | 55 | def forward(self, input): 56 | conv3X3 = self.conv3X3(input) 57 | 58 | conv5X5_1 = self.conv5X5_1(input) 59 | conv5X5 = self.conv5X5_2(conv5X5_1) 60 | 61 | conv7X7_2 = self.conv7X7_2(conv5X5_1) 62 | conv7X7 = self.conv7x7_3(conv7X7_2) 63 | 64 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1) 65 | out = F.relu(out) 66 | return out 67 | 68 | class FPN(nn.Module): 69 | def __init__(self,in_channels_list,out_channels): 70 | super(FPN,self).__init__() 71 | leaky = 0 72 | if (out_channels <= 64): 73 | leaky = 0.1 74 | self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1) 75 | self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1) 76 | self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1) 77 | 78 | self.merge1 = conv_bn(out_channels, out_channels) 79 | self.merge2 = conv_bn(out_channels, out_channels) 80 | 81 | def forward(self, input): 82 | # names = list(input.keys()) 83 | input = list(input.values()) 84 | 85 | output1 = self.output1(input[0]) 86 | output2 = self.output2(input[1]) 87 | output3 = self.output3(input[2]) 88 | 89 | up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest") 90 | output2 = output2 + up3 91 | output2 = self.merge2(output2) 92 | 93 | up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest") 94 | output1 = output1 + up2 95 | output1 = self.merge1(output1) 96 | 97 | out = [output1, output2, output3] 98 | return out 99 | 100 | 101 | 102 | class MobileNetV1(nn.Module): 103 | def __init__(self): 104 | super(MobileNetV1, self).__init__() 105 | self.stage1 = nn.Sequential( 106 | conv_bn(3, 8, 2), # 3 107 | conv_dw(8, 16, 1), # 7 108 | conv_dw(16, 32, 2), # 11 109 | conv_dw(32, 32, 1), # 19 110 | conv_dw(32, 64, 2), # 27 111 | conv_dw(64, 64, 1), # 43 112 | ) 113 | self.stage2 = nn.Sequential( 114 | conv_dw(64, 128, 2), # 43 + 16 = 59 115 | conv_dw(128, 128, 1), # 59 + 32 = 91 116 | conv_dw(128, 128, 1), # 91 + 32 = 123 117 | conv_dw(128, 128, 1), # 123 + 32 = 155 118 | conv_dw(128, 128, 1), # 155 + 32 = 187 119 | conv_dw(128, 128, 1), # 187 + 32 = 219 120 | ) 121 | self.stage3 = nn.Sequential( 122 | conv_dw(128, 256, 2), # 219 +3 2 = 241 123 | conv_dw(256, 256, 1), # 241 + 64 = 301 124 | ) 125 | self.avg = nn.AdaptiveAvgPool2d((1,1)) 126 | self.fc = nn.Linear(256, 1000) 127 | 128 | def forward(self, x): 129 | x = self.stage1(x) 130 | x = self.stage2(x) 131 | x = self.stage3(x) 132 | x = self.avg(x) 133 | # x = self.model(x) 134 | x = x.view(-1, 256) 135 | x = self.fc(x) 136 | return x 137 | 138 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/layer.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_H 16 | #define NCNN_LAYER_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "platform.h" 23 | #include "mat.h" 24 | #include "modelbin.h" 25 | #include "option.h" 26 | #include "paramdict.h" 27 | 28 | #if NCNN_VULKAN 29 | #include 30 | #include "command.h" 31 | #include "pipeline.h" 32 | #endif // NCNN_VULKAN 33 | 34 | namespace ncnn { 35 | 36 | class Layer 37 | { 38 | public: 39 | // empty 40 | Layer(); 41 | // virtual destructor 42 | virtual ~Layer(); 43 | 44 | // load layer specific parameter from parsed dict 45 | // return 0 if success 46 | virtual int load_param(const ParamDict& pd); 47 | 48 | // load layer specific weight data from model binary 49 | // return 0 if success 50 | virtual int load_model(const ModelBin& mb); 51 | 52 | // layer implementation specific setup 53 | // return 0 if success 54 | virtual int create_pipeline(const Option& opt = Option()); 55 | 56 | // layer implementation specific clean 57 | // return 0 if success 58 | virtual int destroy_pipeline(const Option& opt = Option()); 59 | 60 | public: 61 | // one input and one output blob 62 | bool one_blob_only; 63 | 64 | // support inplace inference 65 | bool support_inplace; 66 | 67 | // support vulkan compute 68 | bool support_vulkan; 69 | 70 | // accept input blob with packed storage 71 | bool support_packing; 72 | 73 | public: 74 | // implement inference 75 | // return 0 if success 76 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt = Option()) const; 77 | virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = Option()) const; 78 | 79 | // implement inplace inference 80 | // return 0 if success 81 | virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt = Option()) const; 82 | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = Option()) const; 83 | 84 | #if NCNN_VULKAN 85 | public: 86 | // upload weight blob from host to device 87 | virtual int upload_model(VkTransfer& cmd, const Option& opt = Option()); 88 | 89 | public: 90 | // implement inference 91 | // return 0 if success 92 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt = Option()) const; 93 | virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = Option()) const; 94 | 95 | // implement inplace inference 96 | // return 0 if success 97 | virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt = Option()) const; 98 | virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = Option()) const; 99 | 100 | public: 101 | // assigned immediately after creating this layer 102 | const VulkanDevice* vkdev; 103 | #endif // NCNN_VULKAN 104 | 105 | public: 106 | // layer type index 107 | int typeindex; 108 | #if NCNN_STRING 109 | // layer type name 110 | std::string type; 111 | // layer name 112 | std::string name; 113 | #endif // NCNN_STRING 114 | // blob index which this layer needs as input 115 | std::vector bottoms; 116 | // blob index which this layer produces as output 117 | std::vector tops; 118 | }; 119 | 120 | // layer factory function 121 | typedef Layer* (*layer_creator_func)(); 122 | 123 | struct layer_registry_entry 124 | { 125 | #if NCNN_STRING 126 | // layer type name 127 | const char* name; 128 | #endif // NCNN_STRING 129 | // layer factory entry 130 | layer_creator_func creator; 131 | }; 132 | 133 | #if NCNN_STRING 134 | // get layer type from type name 135 | int layer_to_index(const char* type); 136 | // create layer from type name 137 | Layer* create_layer(const char* type); 138 | #endif // NCNN_STRING 139 | // create layer from layer type 140 | Layer* create_layer(int index); 141 | 142 | #define DEFINE_LAYER_CREATOR(name) \ 143 | ::ncnn::Layer* name##_layer_creator() { return new name; } 144 | 145 | } // namespace ncnn 146 | 147 | #endif // NCNN_LAYER_H 148 | -------------------------------------------------------------------------------- /models/net_slim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models.detection.backbone_utils as backbone_utils 4 | import torchvision.models._utils as _utils 5 | import torch.nn.functional as F 6 | from collections import OrderedDict 7 | 8 | def conv_bn(inp, oup, stride = 1): 9 | return nn.Sequential( 10 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 11 | nn.BatchNorm2d(oup), 12 | nn.ReLU(inplace=True) 13 | ) 14 | 15 | def depth_conv2d(inp, oup, kernel=1, stride=1, pad=0): 16 | return nn.Sequential( 17 | nn.Conv2d(inp, inp, kernel_size = kernel, stride = stride, padding=pad, groups=inp), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(inp, oup, kernel_size=1) 20 | ) 21 | 22 | def conv_dw(inp, oup, stride): 23 | return nn.Sequential( 24 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 25 | nn.BatchNorm2d(inp), 26 | nn.ReLU(inplace=True), 27 | 28 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 29 | nn.BatchNorm2d(oup), 30 | nn.ReLU(inplace=True) 31 | ) 32 | 33 | class Slim(nn.Module): 34 | def __init__(self, cfg = None, phase = 'train'): 35 | """ 36 | :param cfg: Network related settings. 37 | :param phase: train or test. 38 | """ 39 | super(Slim, self).__init__() 40 | self.phase = phase 41 | self.num_classes = 2 42 | 43 | self.conv1 = conv_bn(3, 16, 2) 44 | self.conv2 = conv_dw(16, 32, 1) 45 | self.conv3 = conv_dw(32, 32, 2) 46 | self.conv4 = conv_dw(32, 32, 1) 47 | self.conv5 = conv_dw(32, 64, 2) 48 | self.conv6 = conv_dw(64, 64, 1) 49 | self.conv7 = conv_dw(64, 64, 1) 50 | self.conv8 = conv_dw(64, 64, 1) 51 | 52 | self.conv9 = conv_dw(64, 128, 2) 53 | self.conv10 = conv_dw(128, 128, 1) 54 | self.conv11 = conv_dw(128, 128, 1) 55 | 56 | self.conv12 = conv_dw(128, 256, 2) 57 | self.conv13 = conv_dw(256, 256, 1) 58 | 59 | self.conv14 = nn.Sequential( 60 | nn.Conv2d(in_channels=256, out_channels=64, kernel_size=1), 61 | nn.ReLU(inplace=True), 62 | depth_conv2d(64, 256, kernel=3, stride=2, pad=1), 63 | nn.ReLU(inplace=True) 64 | ) 65 | self.loc, self.conf, self.landm = self.multibox(self.num_classes); 66 | 67 | def multibox(self, num_classes): 68 | loc_layers = [] 69 | conf_layers = [] 70 | landm_layers = [] 71 | loc_layers += [depth_conv2d(64, 3 * 4, kernel=3, pad=1)] 72 | conf_layers += [depth_conv2d(64, 3 * num_classes, kernel=3, pad=1)] 73 | landm_layers += [depth_conv2d(64, 3 * 10, kernel=3, pad=1)] 74 | 75 | loc_layers += [depth_conv2d(128, 2 * 4, kernel=3, pad=1)] 76 | conf_layers += [depth_conv2d(128, 2 * num_classes, kernel=3, pad=1)] 77 | landm_layers += [depth_conv2d(128, 2 * 10, kernel=3, pad=1)] 78 | 79 | loc_layers += [depth_conv2d(256, 2 * 4, kernel=3, pad=1)] 80 | conf_layers += [depth_conv2d(256, 2 * num_classes, kernel=3, pad=1)] 81 | landm_layers += [depth_conv2d(256, 2 * 10, kernel=3, pad=1)] 82 | 83 | loc_layers += [nn.Conv2d(256, 3 * 4, kernel_size=3, padding=1)] 84 | conf_layers += [nn.Conv2d(256, 3 * num_classes, kernel_size=3, padding=1)] 85 | landm_layers += [nn.Conv2d(256, 3 * 10, kernel_size=3, padding=1)] 86 | return nn.Sequential(*loc_layers), nn.Sequential(*conf_layers), nn.Sequential(*landm_layers) 87 | 88 | 89 | def forward(self,inputs): 90 | detections = list() 91 | loc = list() 92 | conf = list() 93 | landm = list() 94 | 95 | x1 = self.conv1(inputs) 96 | x2 = self.conv2(x1) 97 | x3 = self.conv3(x2) 98 | x4 = self.conv4(x3) 99 | x5 = self.conv5(x4) 100 | x6 = self.conv6(x5) 101 | x7 = self.conv7(x6) 102 | x8 = self.conv8(x7) 103 | detections.append(x8) 104 | 105 | x9 = self.conv9(x8) 106 | x10 = self.conv10(x9) 107 | x11 = self.conv11(x10) 108 | detections.append(x11) 109 | 110 | x12 = self.conv12(x11) 111 | x13 = self.conv13(x12) 112 | detections.append(x13) 113 | 114 | x14= self.conv14(x13) 115 | detections.append(x14) 116 | 117 | for (x, l, c, lam) in zip(detections, self.loc, self.conf, self.landm): 118 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 119 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 120 | landm.append(lam(x).permute(0, 2, 3, 1).contiguous()) 121 | 122 | bbox_regressions = torch.cat([o.view(o.size(0), -1, 4) for o in loc], 1) 123 | classifications = torch.cat([o.view(o.size(0), -1, 2) for o in conf], 1) 124 | ldm_regressions = torch.cat([o.view(o.size(0), -1, 10) for o in landm], 1) 125 | 126 | 127 | 128 | if self.phase == 'train': 129 | output = (bbox_regressions, classifications, ldm_regressions) 130 | else: 131 | output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions) 132 | return output 133 | -------------------------------------------------------------------------------- /convertor/MyCaffe.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict, Counter 2 | 3 | from caffe.proto import caffe_pb2 4 | from google import protobuf 5 | import six 6 | 7 | def param_name_dict(): 8 | """Find out the correspondence between layer names and parameter names.""" 9 | 10 | layer = caffe_pb2.LayerParameter() 11 | # get all parameter names (typically underscore case) and corresponding 12 | # type names (typically camel case), which contain the layer names 13 | # (note that not all parameters correspond to layers, but we'll ignore that) 14 | param_names = [f.name for f in layer.DESCRIPTOR.fields if f.name.endswith('_param')] 15 | param_type_names = [type(getattr(layer, s)).__name__ for s in param_names] 16 | # strip the final '_param' or 'Parameter' 17 | param_names = [s[:-len('_param')] for s in param_names] 18 | param_type_names = [s[:-len('Parameter')] for s in param_type_names] 19 | return dict(zip(param_type_names, param_names)) 20 | 21 | def assign_proto(proto, name, val): 22 | """Assign a Python object to a protobuf message, based on the Python 23 | type (in recursive fashion). Lists become repeated fields/messages, dicts 24 | become messages, and other types are assigned directly. For convenience, 25 | repeated fields whose values are not lists are converted to single-element 26 | lists; e.g., `my_repeated_int_field=3` is converted to 27 | `my_repeated_int_field=[3]`.""" 28 | 29 | is_repeated_field = hasattr(getattr(proto, name), 'extend') 30 | if is_repeated_field and not isinstance(val, list): 31 | val = [val] 32 | if isinstance(val, list): 33 | if isinstance(val[0], dict): 34 | for item in val: 35 | proto_item = getattr(proto, name).add() 36 | for k, v in six.iteritems(item): 37 | assign_proto(proto_item, k, v) 38 | else: 39 | getattr(proto, name).extend(val) 40 | elif isinstance(val, dict): 41 | for k, v in six.iteritems(val): 42 | assign_proto(getattr(proto, name), k, v) 43 | else: 44 | setattr(proto, name, val) 45 | 46 | class Function(object): 47 | """A Function specifies a layer, its parameters, and its inputs (which 48 | are Tops from other layers).""" 49 | 50 | def __init__(self, type_name, layer_name, inputs,outputs, **params): 51 | self.type_name = type_name 52 | self.inputs = inputs 53 | self.outputs = outputs 54 | self.params = params 55 | self.layer_name = layer_name 56 | self.ntop = self.params.get('ntop', 1) 57 | # use del to make sure kwargs are not double-processed as layer params 58 | if 'ntop' in self.params: 59 | del self.params['ntop'] 60 | self.in_place = self.params.get('in_place', False) 61 | if 'in_place' in self.params: 62 | del self.params['in_place'] 63 | # self.tops = tuple(Top(self, n) for n in range(self.ntop))l 64 | 65 | def _get_name(self, names, autonames): 66 | if self not in names and self.ntop > 0: 67 | names[self] = self._get_top_name(self.tops[0], names, autonames) 68 | elif self not in names: 69 | autonames[self.type_name] += 1 70 | names[self] = self.type_name + str(autonames[self.type_name]) 71 | return names[self] 72 | 73 | def _get_top_name(self, top, names, autonames): 74 | if top not in names: 75 | autonames[top.fn.type_name] += 1 76 | names[top] = top.fn.type_name + str(autonames[top.fn.type_name]) 77 | return names[top] 78 | 79 | def _to_proto(self): 80 | bottom_names = [] 81 | for inp in self.inputs: 82 | # inp._to_proto(layers, names, autonames) 83 | bottom_names.append(inp) 84 | layer = caffe_pb2.LayerParameter() 85 | layer.type = self.type_name 86 | layer.bottom.extend(bottom_names) 87 | 88 | if self.in_place: 89 | layer.top.extend(layer.bottom) 90 | else: 91 | for top in self.outputs: 92 | layer.top.append(top) 93 | layer.name = self.layer_name 94 | # print(self.type_name + "...") 95 | for k, v in six.iteritems(self.params): 96 | # special case to handle generic *params 97 | # print("generating "+k+"...") 98 | 99 | if k.endswith('param'): 100 | assign_proto(layer, k, v) 101 | else: 102 | try: 103 | assign_proto(getattr(layer, 104 | _param_names[self.type_name] + '_param'), k, v) 105 | except (AttributeError, KeyError): 106 | assign_proto(layer, k, v) 107 | 108 | return layer 109 | 110 | class Layers(object): 111 | """A Layers object is a pseudo-module which generates functions that specify 112 | layers; e.g., Layers().Convolution(bottom, kernel_size=3) will produce a Top 113 | specifying a 3x3 convolution applied to bottom.""" 114 | 115 | def __getattr__(self, name): 116 | def layer_fn(*args, **kwargs): 117 | fn = Function(name, args, kwargs) 118 | return fn 119 | return layer_fn 120 | 121 | 122 | 123 | 124 | _param_names = param_name_dict() 125 | 126 | -------------------------------------------------------------------------------- /models/retinaface.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models.detection.backbone_utils as backbone_utils 4 | import torchvision.models._utils as _utils 5 | import torch.nn.functional as F 6 | from collections import OrderedDict 7 | 8 | from models.net import MobileNetV1 as MobileNetV1 9 | from models.net import FPN as FPN 10 | from models.net import SSH as SSH 11 | 12 | 13 | 14 | class ClassHead(nn.Module): 15 | def __init__(self,inchannels=512,num_anchors=3): 16 | super(ClassHead,self).__init__() 17 | self.num_anchors = num_anchors 18 | self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0) 19 | 20 | def forward(self,x): 21 | out = self.conv1x1(x) 22 | out = out.permute(0,2,3,1).contiguous() 23 | 24 | return out.view(out.shape[0], -1, 2) 25 | 26 | class BboxHead(nn.Module): 27 | def __init__(self,inchannels=512,num_anchors=3): 28 | super(BboxHead,self).__init__() 29 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0) 30 | 31 | def forward(self,x): 32 | out = self.conv1x1(x) 33 | out = out.permute(0,2,3,1).contiguous() 34 | 35 | return out.view(out.shape[0], -1, 4) 36 | 37 | class LandmarkHead(nn.Module): 38 | def __init__(self,inchannels=512,num_anchors=3): 39 | super(LandmarkHead,self).__init__() 40 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0) 41 | 42 | def forward(self,x): 43 | out = self.conv1x1(x) 44 | out = out.permute(0,2,3,1).contiguous() 45 | 46 | return out.view(out.shape[0], -1, 10) 47 | 48 | class RetinaFace(nn.Module): 49 | def __init__(self, cfg = None, phase = 'train'): 50 | """ 51 | :param cfg: Network related settings. 52 | :param phase: train or test. 53 | """ 54 | super(RetinaFace,self).__init__() 55 | self.phase = phase 56 | backbone = None 57 | if cfg['name'] == 'mobilenet0.25': 58 | backbone = MobileNetV1() 59 | if cfg['pretrain']: 60 | checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu')) 61 | from collections import OrderedDict 62 | new_state_dict = OrderedDict() 63 | for k, v in checkpoint['state_dict'].items(): 64 | name = k[7:] # remove module. 65 | new_state_dict[name] = v 66 | # load params 67 | backbone.load_state_dict(new_state_dict) 68 | elif cfg['name'] == 'Resnet50': 69 | import torchvision.models as models 70 | backbone = models.resnet50(pretrained=cfg['pretrain']) 71 | 72 | self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers']) 73 | in_channels_stage2 = cfg['in_channel'] 74 | in_channels_list = [ 75 | in_channels_stage2 * 2, 76 | in_channels_stage2 * 4, 77 | in_channels_stage2 * 8, 78 | ] 79 | out_channels = cfg['out_channel'] 80 | self.fpn = FPN(in_channels_list,out_channels) 81 | self.ssh1 = SSH(out_channels, out_channels) 82 | self.ssh2 = SSH(out_channels, out_channels) 83 | self.ssh3 = SSH(out_channels, out_channels) 84 | 85 | self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel']) 86 | self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel']) 87 | self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel']) 88 | 89 | def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2): 90 | classhead = nn.ModuleList() 91 | for i in range(fpn_num): 92 | classhead.append(ClassHead(inchannels,anchor_num)) 93 | return classhead 94 | 95 | def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2): 96 | bboxhead = nn.ModuleList() 97 | for i in range(fpn_num): 98 | bboxhead.append(BboxHead(inchannels,anchor_num)) 99 | return bboxhead 100 | 101 | def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2): 102 | landmarkhead = nn.ModuleList() 103 | for i in range(fpn_num): 104 | landmarkhead.append(LandmarkHead(inchannels,anchor_num)) 105 | return landmarkhead 106 | 107 | def forward(self,inputs): 108 | out = self.body(inputs) 109 | 110 | # FPN 111 | fpn = self.fpn(out) 112 | 113 | # SSH 114 | feature1 = self.ssh1(fpn[0]) 115 | feature2 = self.ssh2(fpn[1]) 116 | feature3 = self.ssh3(fpn[2]) 117 | features = [feature1, feature2, feature3] 118 | 119 | bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1) 120 | classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1) 121 | ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1) 122 | 123 | if self.phase == 'train': 124 | output = (bbox_regressions, classifications, ldm_regressions) 125 | else: 126 | output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions) 127 | return output -------------------------------------------------------------------------------- /Detect_python/detect_pytorch.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from itertools import product as product 5 | from math import ceil 6 | 7 | from config import cfg_mnet, cfg_slim, cfg_rfb 8 | 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | from models.retinaface import RetinaFace 12 | from models.net_slim import Slim 13 | from models.net_rfb import RFB 14 | from utils.box_utils import decode, decode_landm 15 | from layers.functions.prior_box import PriorBox 16 | from utils.nms.py_cpu_nms import py_cpu_nms 17 | 18 | 19 | def check_keys(model, pretrained_state_dict): 20 | ckpt_keys = set(pretrained_state_dict.keys()) 21 | model_keys = set(model.state_dict().keys()) 22 | used_pretrained_keys = model_keys & ckpt_keys 23 | unused_pretrained_keys = ckpt_keys - model_keys 24 | missing_keys = model_keys - ckpt_keys 25 | print('Missing keys:{}'.format(len(missing_keys))) 26 | print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 27 | print('Used keys:{}'.format(len(used_pretrained_keys))) 28 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 29 | return True 30 | 31 | 32 | def remove_prefix(state_dict, prefix): 33 | print('remove prefix \'{}\''.format(prefix)) 34 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 35 | return {f(key): value for key, value in state_dict.items()} 36 | 37 | def load_model(model, pretrained_path, load_to_cpu): 38 | print('Loading pretrained model from {}'.format(pretrained_path)) 39 | if load_to_cpu: 40 | pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) 41 | else: 42 | device = torch.cuda.current_device() 43 | pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) 44 | if "state_dict" in pretrained_dict.keys(): 45 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') 46 | else: 47 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 48 | check_keys(model, pretrained_dict) 49 | model.load_state_dict(pretrained_dict, strict=False) 50 | return model 51 | 52 | 53 | class Detector(): 54 | def __init__(self, model_path, long_side, network): 55 | torch.set_grad_enabled(False) 56 | 57 | if network == 'mobilenet': 58 | self.cfg = cfg_mnet 59 | net = RetinaFace(cfg = self.cfg, phase = 'test') 60 | elif network == 'slim': 61 | self.cfg = cfg_slim 62 | net = Slim(cfg = self.cfg, phase = 'test') 63 | elif network == 'RFB': 64 | self.cfg = cfg_rfb 65 | net = RFB(cfg = self.cfg, phase = 'test') 66 | else: 67 | print("not supported network!!") 68 | exit(0) 69 | 70 | self.net = load_model(net, model_path, True) 71 | self.net.eval() 72 | print("Finished loading model!") 73 | cudnn.benchmark = True 74 | self.device = torch.device("cpu") 75 | self.net = self.net.to(self.device) 76 | 77 | 78 | self.long_side = long_side 79 | 80 | 81 | def detect(self,img_raw, threshold): 82 | img = np.float32(img_raw) 83 | 84 | target_size = self.long_side 85 | max_size = self.long_side 86 | im_shape = img.shape 87 | im_size_min = np.min(im_shape[0:2]) 88 | im_size_max = np.max(im_shape[0:2]) 89 | resize = float(target_size) / float(im_size_min) 90 | 91 | if np.round(resize * im_size_max) > max_size: 92 | resize = float(max_size) / float(im_size_max) 93 | 94 | if resize != 1: 95 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 96 | im_height, im_width, _ = img.shape 97 | 98 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) 99 | img -= (104, 117, 123) 100 | img = img.transpose(2, 0, 1) 101 | img = torch.from_numpy(img).unsqueeze(0) 102 | img = img.to(self.device) 103 | scale = scale.to(self.device) 104 | 105 | loc, conf, landms = self.net(img) # forward pass 106 | 107 | priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) 108 | priors = priorbox.forward() 109 | priors = priors.to(self.device) 110 | prior_data = priors.data 111 | 112 | boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) 113 | boxes = boxes * scale / resize 114 | boxes = boxes.cpu().numpy() 115 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 116 | landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) 117 | scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], 118 | img.shape[3], img.shape[2], img.shape[3], img.shape[2], 119 | img.shape[3], img.shape[2]]) 120 | scale1 = scale1.to(self.device) 121 | landms = landms * scale1 / resize 122 | landms = landms.cpu().numpy() 123 | 124 | # ignore low socres 125 | inds = np.where(scores > threshold)[0] 126 | boxes = boxes[inds] 127 | landms = landms[inds] 128 | scores = scores[inds] 129 | 130 | # do NMS 131 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 132 | keep = py_cpu_nms(dets, 0.4) 133 | dets = dets[keep, :] 134 | landms = landms[keep] 135 | 136 | dets = np.concatenate((dets, landms), axis=1) 137 | return dets 138 | 139 | 140 | -------------------------------------------------------------------------------- /layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from utils.box_utils import match, log_sum_exp 6 | from data import cfg_mnet 7 | GPU = cfg_mnet['gpu_train'] 8 | 9 | class MultiBoxLoss(nn.Module): 10 | """SSD Weighted Loss Function 11 | Compute Targets: 12 | 1) Produce Confidence Target Indices by matching ground truth boxes 13 | with (default) 'priorboxes' that have jaccard index > threshold parameter 14 | (default threshold: 0.5). 15 | 2) Produce localization target by 'encoding' variance into offsets of ground 16 | truth boxes and their matched 'priorboxes'. 17 | 3) Hard negative mining to filter the excessive number of negative examples 18 | that comes with using a large number of default bounding boxes. 19 | (default negative:positive ratio 3:1) 20 | Objective Loss: 21 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 22 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 23 | weighted by α which is set to 1 by cross val. 24 | Args: 25 | c: class confidences, 26 | l: predicted boxes, 27 | g: ground truth boxes 28 | N: number of matched default boxes 29 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 30 | """ 31 | 32 | def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target): 33 | super(MultiBoxLoss, self).__init__() 34 | self.num_classes = num_classes 35 | self.threshold = overlap_thresh 36 | self.background_label = bkg_label 37 | self.encode_target = encode_target 38 | self.use_prior_for_matching = prior_for_matching 39 | self.do_neg_mining = neg_mining 40 | self.negpos_ratio = neg_pos 41 | self.neg_overlap = neg_overlap 42 | self.variance = [0.1, 0.2] 43 | 44 | def forward(self, predictions, priors, targets): 45 | """Multibox Loss 46 | Args: 47 | predictions (tuple): A tuple containing loc preds, conf preds, 48 | and prior boxes from SSD net. 49 | conf shape: torch.size(batch_size,num_priors,num_classes) 50 | loc shape: torch.size(batch_size,num_priors,4) 51 | priors shape: torch.size(num_priors,4) 52 | 53 | ground_truth (tensor): Ground truth boxes and labels for a batch, 54 | shape: [batch_size,num_objs,5] (last idx is the label). 55 | """ 56 | 57 | loc_data, conf_data, landm_data = predictions 58 | priors = priors 59 | num = loc_data.size(0) 60 | num_priors = (priors.size(0)) 61 | 62 | # match priors (default boxes) and ground truth boxes 63 | loc_t = torch.Tensor(num, num_priors, 4) 64 | landm_t = torch.Tensor(num, num_priors, 10) 65 | conf_t = torch.LongTensor(num, num_priors) 66 | for idx in range(num): 67 | truths = targets[idx][:, :4].data 68 | labels = targets[idx][:, -1].data 69 | landms = targets[idx][:, 4:14].data 70 | defaults = priors.data 71 | match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) 72 | if GPU: 73 | loc_t = loc_t.cuda() 74 | conf_t = conf_t.cuda() 75 | landm_t = landm_t.cuda() 76 | 77 | zeros = torch.tensor(0).cuda() 78 | # landm Loss (Smooth L1) 79 | # Shape: [batch,num_priors,10] 80 | pos1 = conf_t > zeros 81 | num_pos_landm = pos1.long().sum(1, keepdim=True) 82 | N1 = max(num_pos_landm.data.sum().float(), 1) 83 | pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) 84 | landm_p = landm_data[pos_idx1].view(-1, 10) 85 | landm_t = landm_t[pos_idx1].view(-1, 10) 86 | loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') 87 | 88 | 89 | pos = conf_t != zeros 90 | conf_t[pos] = 1 91 | 92 | # Localization Loss (Smooth L1) 93 | # Shape: [batch,num_priors,4] 94 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 95 | loc_p = loc_data[pos_idx].view(-1, 4) 96 | loc_t = loc_t[pos_idx].view(-1, 4) 97 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') 98 | 99 | # Compute max conf across batch for hard negative mining 100 | batch_conf = conf_data.view(-1, self.num_classes) 101 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) 102 | 103 | # Hard Negative Mining 104 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now 105 | loss_c = loss_c.view(num, -1) 106 | _, loss_idx = loss_c.sort(1, descending=True) 107 | _, idx_rank = loss_idx.sort(1) 108 | num_pos = pos.long().sum(1, keepdim=True) 109 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 110 | neg = idx_rank < num_neg.expand_as(idx_rank) 111 | 112 | # Confidence Loss Including Positive and Negative Examples 113 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 114 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 115 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 116 | targets_weighted = conf_t[(pos+neg).gt(0)] 117 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') 118 | 119 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 120 | N = max(num_pos.data.sum().float(), 1) 121 | loss_l /= N 122 | loss_c /= N 123 | loss_landm /= N1 124 | 125 | return loss_l, loss_c, loss_landm 126 | -------------------------------------------------------------------------------- /convertor/onnx2caffe/_weightloader.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | # from caffe import params as P 6 | import numpy as np 7 | from ._graph import Node, Graph 8 | 9 | 10 | def _convert_conv(net, node, graph, err): 11 | weight_name = node.inputs[1] 12 | input_name = str(node.inputs[0]) 13 | output_name = str(node.outputs[0]) 14 | node_name = node.name 15 | W = None 16 | if weight_name in node.input_tensors: 17 | W = node.input_tensors[weight_name] 18 | else: 19 | err.missing_initializer(node, 20 | "Weight tensor: {} not found in the graph initializer".format(weight_name, )) 21 | bias_flag = False 22 | bias = None 23 | if len(node.inputs) > 2: 24 | bias = node.input_tensors[node.inputs[2]] 25 | bias_flag = True 26 | # net.params[node_name][0].data = W 27 | # if bias_flag: 28 | # net.params[node_name][1].data = bias 29 | np.copyto(net.params[node_name][0].data, W, casting='same_kind') 30 | if bias_flag: 31 | np.copyto(net.params[node_name][1].data, bias, casting='same_kind') 32 | 33 | 34 | def _convert_relu(net, node, graph, err): 35 | pass 36 | 37 | 38 | def _convert_sigmoid(net, node, graph, err): 39 | pass 40 | 41 | 42 | def _convert_BatchNorm(net, node, graph, err): 43 | scale = node.input_tensors[node.inputs[1]] 44 | bias = node.input_tensors[node.inputs[2]] 45 | mean = node.input_tensors[node.inputs[3]] 46 | var = node.input_tensors[node.inputs[4]] 47 | node_name = node.name 48 | np.copyto(net.params[node_name + '_bn'][0].data, mean, casting='same_kind') 49 | np.copyto(net.params[node_name + '_bn'][1].data, var, casting='same_kind') 50 | net.params[node_name + '_bn'][2].data[...] = 1.0 51 | np.copyto(net.params[node_name][0].data, scale, casting='same_kind') 52 | np.copyto(net.params[node_name][1].data, bias, casting='same_kind') 53 | # net.params[node_name+'_bn'][1].data = var 54 | # net.params[node_name][0].data = scale 55 | # net.params[node_name][1].data = bias 56 | 57 | 58 | def _convert_Add(net, node, graph, err): 59 | pass 60 | 61 | 62 | def _convert_Mul(net, node, graph, err): 63 | pass 64 | 65 | 66 | def _convert_Reshape(net, node, graph, err): 67 | pass 68 | 69 | 70 | def _convert_Flatten(net, node, graph, err): 71 | pass 72 | 73 | 74 | def _convert_pool(net, node, graph, err): 75 | pass 76 | 77 | 78 | def _convert_dropout(net, node, graph, err): 79 | pass 80 | 81 | 82 | def _convert_Permute(net, node, graph, err): 83 | pass 84 | 85 | 86 | def _convert_Softmax(net, node, graph, err): 87 | pass 88 | 89 | 90 | def _convert_gemm(net, node, graph, err): 91 | node_name = node.name 92 | weight_name = node.inputs[1] 93 | if weight_name in node.input_tensors: 94 | W = node.input_tensors[weight_name] 95 | else: 96 | err.missing_initializer(node, 97 | "Weight tensor: {} not found in the graph initializer".format(weight_name, )) 98 | if node.attrs["broadcast"] != 1 or node.attrs["transB"] != 1: 99 | return err.unsupported_op_configuration(node, "Gemm is supported only for inner_product layer") 100 | b = None 101 | if len(node.inputs) > 2: 102 | b = node.input_tensors[node.inputs[2]] 103 | if len(W.shape) != 2 or (b is not None and len(b.shape) != 1): 104 | return err.unsupported_op_configuration(node, "Gemm is supported only for inner_product layer") 105 | if b is not None: 106 | if W.shape[0] != b.shape[0]: 107 | return err.unsupported_op_configuration(node, "Gemm is supported only for inner_product layer") 108 | net.params[node_name][0].data[...] = W 109 | net.params[node_name][1].data[...] = b 110 | 111 | 112 | def _convert_upsample(net, node, graph, err): 113 | mode = node.attrs["mode"] 114 | node_name = node.name 115 | if mode == "nearest": 116 | caffe_params = net.params[node_name][0].data 117 | weights = np.ones(caffe_params.shape).astype("float32") 118 | np.copyto(net.params[node_name][0].data, weights, casting='same_kind') 119 | # net.params[node_name][0].data[] 120 | 121 | 122 | def _convert_concat(net, node, graph, err): 123 | pass 124 | 125 | 126 | def _convert_conv_transpose(net, node, graph, err): 127 | weight_name = node.inputs[1] 128 | input_name = str(node.inputs[0]) 129 | output_name = str(node.outputs[0]) 130 | node_name = node.name 131 | W = None 132 | if weight_name in node.input_tensors: 133 | W = node.input_tensors[weight_name] 134 | else: 135 | err.missing_initializer(node, 136 | "Weight tensor: {} not found in the graph initializer".format(weight_name, )) 137 | bias_flag = False 138 | bias = None 139 | if len(node.inputs) > 2: 140 | bias = node.input_tensors[node.inputs[2]] 141 | bias_flag = True 142 | # net.params[node_name][0].data = W 143 | # if bias_flag: 144 | # net.params[node_name][1].data = bias 145 | np.copyto(net.params[node_name][0].data, W, casting='same_kind') 146 | if bias_flag: 147 | np.copyto(net.params[node_name][1].data, bias, casting='same_kind') 148 | 149 | 150 | _ONNX_NODE_REGISTRY = { 151 | "Conv": _convert_conv, 152 | "Relu": _convert_relu, 153 | "BatchNormalization": _convert_BatchNorm, 154 | "Add": _convert_Add, 155 | "Mul": _convert_Mul, 156 | "Reshape": _convert_Reshape, 157 | "MaxPool": _convert_pool, 158 | "AveragePool": _convert_pool, 159 | "Dropout": _convert_dropout, 160 | "Gemm": _convert_gemm, 161 | "Upsample": _convert_upsample, 162 | "Concat": _convert_concat, 163 | "ConvTranspose": _convert_conv_transpose, 164 | "Sigmoid": _convert_sigmoid, 165 | "Flatten": _convert_Flatten, 166 | "Transpose": _convert_Permute, 167 | "Softmax": _convert_Softmax, 168 | } 169 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/gpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_GPU_H 16 | #define NCNN_GPU_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | 22 | #include 23 | #include 24 | 25 | namespace ncnn { 26 | 27 | // instance 28 | int create_gpu_instance(); 29 | void destroy_gpu_instance(); 30 | 31 | // instance extension capability 32 | extern int support_VK_KHR_get_physical_device_properties2; 33 | extern int support_VK_EXT_debug_utils; 34 | 35 | // VK_KHR_get_physical_device_properties2 36 | extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; 37 | extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; 38 | extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; 39 | extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; 40 | extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; 41 | extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; 42 | extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; 43 | 44 | // get info 45 | int get_gpu_count(); 46 | int get_default_gpu_index(); 47 | 48 | class GpuInfo 49 | { 50 | public: 51 | // vulkan physical device 52 | VkPhysicalDevice physical_device; 53 | 54 | // info 55 | uint32_t api_version; 56 | uint32_t driver_version; 57 | uint32_t vendor_id; 58 | uint32_t device_id; 59 | uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; 60 | 61 | // 0 = discrete gpu 62 | // 1 = integrated gpu 63 | // 2 = virtual gpu 64 | // 3 = cpu 65 | int type; 66 | 67 | // hardware capability 68 | uint32_t max_shared_memory_size; 69 | uint32_t max_workgroup_count[3]; 70 | uint32_t max_workgroup_invocations; 71 | uint32_t max_workgroup_size[3]; 72 | size_t memory_map_alignment; 73 | size_t buffer_offset_alignment; 74 | float timestamp_period; 75 | 76 | // runtime 77 | uint32_t compute_queue_family_index; 78 | uint32_t transfer_queue_family_index; 79 | 80 | uint32_t compute_queue_count; 81 | uint32_t transfer_queue_count; 82 | 83 | uint32_t unified_memory_index; 84 | uint32_t device_local_memory_index; 85 | uint32_t host_visible_memory_index; 86 | 87 | // fp16 and int8 feature 88 | bool support_fp16_packed; 89 | bool support_fp16_storage; 90 | bool support_fp16_arithmetic; 91 | bool support_int8_storage; 92 | bool support_int8_arithmetic; 93 | 94 | // extension capability 95 | int support_VK_KHR_8bit_storage; 96 | int support_VK_KHR_16bit_storage; 97 | int support_VK_KHR_bind_memory2; 98 | int support_VK_KHR_dedicated_allocation; 99 | int support_VK_KHR_descriptor_update_template; 100 | int support_VK_KHR_get_memory_requirements2; 101 | int support_VK_KHR_push_descriptor; 102 | int support_VK_KHR_shader_float16_int8; 103 | int support_VK_KHR_shader_float_controls; 104 | int support_VK_KHR_storage_buffer_storage_class; 105 | }; 106 | 107 | const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); 108 | 109 | class VkAllocator; 110 | class VulkanDevice 111 | { 112 | public: 113 | VulkanDevice(int device_index = get_default_gpu_index()); 114 | ~VulkanDevice(); 115 | 116 | const GpuInfo& info; 117 | 118 | VkDevice vkdevice() const { return device; } 119 | 120 | VkShaderModule get_shader_module(const char* name) const; 121 | 122 | VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; 123 | 124 | VkQueue acquire_queue(uint32_t queue_family_index) const; 125 | void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; 126 | 127 | // allocator on this device 128 | VkAllocator* acquire_blob_allocator() const; 129 | void reclaim_blob_allocator(VkAllocator* allocator) const; 130 | 131 | VkAllocator* acquire_staging_allocator() const; 132 | void reclaim_staging_allocator(VkAllocator* allocator) const; 133 | 134 | // VK_KHR_descriptor_update_template 135 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; 136 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; 137 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; 138 | 139 | // VK_KHR_get_memory_requirements2 140 | PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; 141 | PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; 142 | PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; 143 | 144 | // VK_KHR_push_descriptor 145 | PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; 146 | PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; 147 | 148 | protected: 149 | // shader management 150 | int create_shader_module(); 151 | void destroy_shader_module(); 152 | 153 | // device extension 154 | int init_device_extension(); 155 | 156 | private: 157 | VkDevice device; 158 | std::vector shader_modules; 159 | 160 | // hardware queue 161 | mutable std::vector compute_queues; 162 | mutable std::vector transfer_queues; 163 | mutable Mutex queue_lock; 164 | 165 | // default blob allocator for each queue 166 | mutable std::vector blob_allocators; 167 | mutable Mutex blob_allocator_lock; 168 | 169 | // default staging allocator for each queue 170 | mutable std::vector staging_allocators; 171 | mutable Mutex staging_allocator_lock; 172 | }; 173 | 174 | VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); 175 | 176 | } // namespace ncnn 177 | 178 | #endif // NCNN_VULKAN 179 | 180 | #endif // NCNN_GPU_H 181 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | import torch.optim as optim 5 | import torch.backends.cudnn as cudnn 6 | import argparse 7 | import torch.utils.data as data 8 | from data import WiderFaceDetection, detection_collate, preproc, cfg_mnet, cfg_slim, cfg_rfb 9 | from layers.modules import MultiBoxLoss 10 | from layers.functions.prior_box import PriorBox 11 | import time 12 | import datetime 13 | import math 14 | from models.retinaface import RetinaFace 15 | from models.net_slim import Slim 16 | from models.net_rfb import RFB 17 | 18 | parser = argparse.ArgumentParser(description='Training') 19 | parser.add_argument('--training_dataset', default='./data/widerface/train/label.txt', help='Training dataset directory') 20 | parser.add_argument('--network', default='RFB', help='Backbone network mobile0.25 or slim or RFB') 21 | parser.add_argument('--num_workers', default=4, type=int, help='Number of workers used in dataloading') 22 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate') 23 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum') 24 | parser.add_argument('--resume_net', default=None, help='resume net for retraining') 25 | parser.add_argument('--resume_epoch', default=0, type=int, help='resume iter for retraining') 26 | parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD') 27 | parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD') 28 | parser.add_argument('--save_folder', default='./weights/', help='Location to save checkpoint models') 29 | 30 | args = parser.parse_args() 31 | 32 | if not os.path.exists(args.save_folder): 33 | os.mkdir(args.save_folder) 34 | cfg = None 35 | net = None 36 | if args.network == "mobile0.25": 37 | cfg = cfg_mnet 38 | net = RetinaFace(cfg=cfg) 39 | elif args.network == "slim": 40 | cfg = cfg_slim 41 | net = Slim(cfg=cfg) 42 | elif args.network == "RFB": 43 | cfg = cfg_rfb 44 | net = RFB(cfg=cfg) 45 | else: 46 | print("Don't support network!") 47 | exit(0) 48 | 49 | print("Printing net...") 50 | print(net) 51 | 52 | rgb_mean = (104, 117, 123) # bgr order 53 | num_classes = 2 54 | img_dim = cfg['image_size'] 55 | num_gpu = cfg['ngpu'] 56 | batch_size = cfg['batch_size'] 57 | max_epoch = cfg['epoch'] 58 | gpu_train = cfg['gpu_train'] 59 | 60 | num_workers = args.num_workers 61 | momentum = args.momentum 62 | weight_decay = args.weight_decay 63 | initial_lr = args.lr 64 | gamma = args.gamma 65 | training_dataset = args.training_dataset 66 | save_folder = args.save_folder 67 | 68 | if args.resume_net is not None: 69 | print('Loading resume network...') 70 | state_dict = torch.load(args.resume_net) 71 | # create new OrderedDict that does not contain `module.` 72 | from collections import OrderedDict 73 | new_state_dict = OrderedDict() 74 | for k, v in state_dict.items(): 75 | head = k[:7] 76 | if head == 'module.': 77 | name = k[7:] # remove `module.` 78 | else: 79 | name = k 80 | new_state_dict[name] = v 81 | net.load_state_dict(new_state_dict) 82 | 83 | if num_gpu > 1 and gpu_train: 84 | net = torch.nn.DataParallel(net).cuda() 85 | else: 86 | net = net.cuda() 87 | 88 | cudnn.benchmark = True 89 | 90 | 91 | optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) 92 | criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) 93 | 94 | priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) 95 | with torch.no_grad(): 96 | priors = priorbox.forward() 97 | priors = priors.cuda() 98 | 99 | def train(): 100 | net.train() 101 | epoch = 0 + args.resume_epoch 102 | print('Loading Dataset...') 103 | 104 | dataset = WiderFaceDetection( training_dataset,preproc(img_dim, rgb_mean)) 105 | 106 | epoch_size = math.ceil(len(dataset) / batch_size) 107 | max_iter = max_epoch * epoch_size 108 | 109 | stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) 110 | step_index = 0 111 | 112 | if args.resume_epoch > 0: 113 | start_iter = args.resume_epoch * epoch_size 114 | else: 115 | start_iter = 0 116 | 117 | for iteration in range(start_iter, max_iter): 118 | if iteration % epoch_size == 0: 119 | # create batch iterator 120 | batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) 121 | if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): 122 | torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth') 123 | epoch += 1 124 | 125 | load_t0 = time.time() 126 | if iteration in stepvalues: 127 | step_index += 1 128 | lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) 129 | 130 | # load train data 131 | images, targets = next(batch_iterator) 132 | images = images.cuda() 133 | targets = [anno.cuda() for anno in targets] 134 | 135 | # forward 136 | out = net(images) 137 | 138 | # backprop 139 | optimizer.zero_grad() 140 | loss_l, loss_c, loss_landm = criterion(out, priors, targets) 141 | loss = cfg['loc_weight'] * loss_l + loss_c + cfg['lmk_weight'] * loss_landm 142 | loss.backward() 143 | optimizer.step() 144 | load_t1 = time.time() 145 | batch_time = load_t1 - load_t0 146 | eta = int(batch_time * (max_iter - iteration)) 147 | print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' 148 | .format(epoch, max_epoch, (iteration % epoch_size) + 1, 149 | epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) 150 | 151 | torch.save(net.state_dict(), save_folder + cfg['name'] + '_Final.pth') 152 | 153 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size): 154 | """Sets the learning rate 155 | # Adapted from PyTorch Imagenet example: 156 | # https://github.com/pytorch/examples/blob/master/imagenet/main.py 157 | """ 158 | warmup_epoch = -1 159 | if epoch <= warmup_epoch: 160 | lr = 1e-6 + (initial_lr-1e-6) * iteration / (epoch_size * warmup_epoch) 161 | else: 162 | lr = initial_lr * (gamma ** (step_index)) 163 | for param_group in optimizer.param_groups: 164 | param_group['lr'] = lr 165 | return lr 166 | 167 | if __name__ == '__main__': 168 | train() 169 | -------------------------------------------------------------------------------- /Detect_python/detect_tf.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import cv2 3 | import numpy as np 4 | 5 | from itertools import product as product 6 | from math import ceil 7 | 8 | from config import cfg_mnet, cfg_slim, cfg_rfb 9 | 10 | def load_graph(file_name): 11 | with tf.gfile.GFile(file_name, "rb") as f: 12 | graph_def = tf.GraphDef() 13 | graph_def.ParseFromString(f.read()) 14 | with tf.Graph().as_default() as graph: 15 | tf.import_graph_def(graph_def, name="") 16 | return graph 17 | 18 | class Detector(): 19 | def __init__(self, model_path, long_side, network): 20 | if network == 'mobilenet': 21 | self.cfg = cfg_mnet 22 | elif network == 'slim': 23 | self.cfg = cfg_slim 24 | elif network == 'RFB': 25 | self.cfg = cfg_rfb 26 | else: 27 | print("not supported network!!") 28 | exit(0) 29 | 30 | self.min_sizes = self.cfg['min_sizes'] 31 | self.steps = self.cfg['steps'] 32 | self.variances = self.cfg['variance'] 33 | 34 | self.long_side = long_side 35 | 36 | self.graph = load_graph(model_path) 37 | self.input_tensor = self.graph.get_tensor_by_name('input0:0') 38 | self.conf_tensor = self.graph.get_tensor_by_name('Concat_223:0') 39 | self.loc_tensor = self.graph.get_tensor_by_name('Concat_198:0') 40 | self.landms_tensor = self.graph.get_tensor_by_name('Concat_248:0') 41 | 42 | def detect(self,img_raw, threshold): 43 | img = np.float32(img_raw) 44 | 45 | target_size = self.long_side 46 | max_size = self.long_side 47 | im_shape = img.shape 48 | im_size_min = np.min(im_shape[0:2]) 49 | im_size_max = np.max(im_shape[0:2]) 50 | resize = float(target_size) / float(im_size_min) 51 | 52 | if np.round(resize * im_size_max) > max_size: 53 | resize = float(max_size) / float(im_size_max) 54 | 55 | if resize != 1: 56 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 57 | im_height, im_width, _ = img.shape 58 | img -= (104, 117, 123) 59 | 60 | 61 | with tf.Session(graph=self.graph) as sess: 62 | conf, loc, landms = sess.run((self.conf_tensor,self.loc_tensor,self.landms_tensor), 63 | feed_dict={self.input_tensor:[img]}) 64 | 65 | 66 | conf = conf.reshape((-1, 2)) 67 | conf = self.softmax(conf) 68 | loc = loc.reshape((-1, 4)) 69 | landms = landms.reshape((-1, 10)) 70 | 71 | priors = self.createAnchors(image_size=(im_height, im_width)) 72 | boxes = self.decode(loc, priors, self.variances) 73 | scale = np.array([im_width, im_height, im_width, im_height]) 74 | boxes = boxes * scale / resize 75 | 76 | scores = conf[:, 1] 77 | 78 | landms = self.decode_landm(landms, priors, self.variances) 79 | scale1 = np.array([im_width, im_height, im_width, im_height, im_width, 80 | im_height, im_width, im_height, im_width, im_height]) 81 | landms = landms * scale1 / resize 82 | 83 | # ignore low socres 84 | inds = np.where(scores > threshold)[0] 85 | boxes = boxes[inds] 86 | landms = landms[inds] 87 | scores = scores[inds] 88 | 89 | # do NMS 90 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 91 | keep = self.py_cpu_nms(dets, 0.4) 92 | dets = dets[keep, :] 93 | landms = landms[keep] 94 | 95 | dets = np.concatenate((dets, landms), axis=1) 96 | return dets 97 | 98 | 99 | def createAnchors(self, image_size = None): 100 | feature_maps = [[ceil(image_size[0]/float(step)), ceil(image_size[1]/float(step))] for step in self.steps] 101 | anchors = [] 102 | for k, f in enumerate(feature_maps): 103 | min_sizes = self.min_sizes[k] 104 | for i, j in product(range(int(f[0])), range(int(f[1]))): 105 | for min_size in min_sizes: 106 | s_kx = float(min_size) / float(image_size[1]) 107 | s_ky = float(min_size) / float(image_size[0]) 108 | dense_cx = [x * self.steps[k] / float(image_size[1]) for x in [j + 0.5]] 109 | dense_cy = [y * self.steps[k] / float(image_size[0]) for y in [i + 0.5]] 110 | for cy, cx in product(dense_cy, dense_cx): 111 | anchors += [cx, cy, s_kx, s_ky] 112 | 113 | anchors = np.array(anchors) 114 | return anchors.reshape(-1,4) 115 | 116 | 117 | def decode(self, loc, priors, variances): 118 | boxes = np.concatenate((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 119 | priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1) 120 | boxes[:, :2] -= boxes[:, 2:] / 2 121 | boxes[:, 2:] += boxes[:, :2] 122 | return boxes 123 | 124 | def decode_landm(self, pre, priors, variances): 125 | landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], 126 | priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], 127 | priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], 128 | priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], 129 | priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1) 130 | return landms 131 | 132 | def py_cpu_nms(self, dets, thresh): 133 | x1 = dets[:, 0] 134 | y1 = dets[:, 1] 135 | x2 = dets[:, 2] 136 | y2 = dets[:, 3] 137 | scores = dets[:, 4] 138 | 139 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 140 | order = scores.argsort()[::-1] 141 | keep = [] 142 | while order.size > 0: 143 | i = order[0] 144 | keep.append(i) 145 | xx1 = np.maximum(x1[i], x1[order[1:]]) 146 | yy1 = np.maximum(y1[i], y1[order[1:]]) 147 | xx2 = np.minimum(x2[i], x2[order[1:]]) 148 | yy2 = np.minimum(y2[i], y2[order[1:]]) 149 | 150 | w = np.maximum(0.0, xx2 - xx1 + 1) 151 | h = np.maximum(0.0, yy2 - yy1 + 1) 152 | inter = w * h 153 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 154 | 155 | inds = np.where(ovr <= thresh)[0] 156 | order = order[inds + 1] 157 | 158 | return keep 159 | 160 | def softmax(self, x): 161 | e_x = np.exp(x) 162 | result = np.zeros(e_x.shape) 163 | result[:, 0] = e_x[:, 0]/e_x.sum(axis=1) 164 | result[:, 1] = e_x[:, 1]/e_x.sum(axis=1) 165 | return result 166 | 167 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/opencv.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_OPENCV_H 16 | #define NCNN_OPENCV_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_OPENCV 21 | 22 | #include 23 | #include 24 | #include "mat.h" 25 | 26 | // minimal opencv style data structure implementation 27 | namespace cv 28 | { 29 | 30 | struct Size 31 | { 32 | Size() : width(0), height(0) {} 33 | Size(int _w, int _h) : width(_w), height(_h) {} 34 | 35 | int width; 36 | int height; 37 | }; 38 | 39 | template 40 | struct Rect_ 41 | { 42 | Rect_() : x(0), y(0), width(0), height(0) {} 43 | Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) : x(_x), y(_y), width(_w), height(_h) {} 44 | 45 | _Tp x; 46 | _Tp y; 47 | _Tp width; 48 | _Tp height; 49 | 50 | // area 51 | _Tp area() const 52 | { 53 | return width * height; 54 | } 55 | }; 56 | 57 | template static inline Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) 58 | { 59 | _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); 60 | a.width = std::min(a.x + a.width, b.x + b.width) - x1; 61 | a.height = std::min(a.y + a.height, b.y + b.height) - y1; 62 | a.x = x1; a.y = y1; 63 | if( a.width <= 0 || a.height <= 0 ) 64 | a = Rect_<_Tp>(); 65 | return a; 66 | } 67 | 68 | template static inline Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) 69 | { 70 | _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); 71 | a.width = std::max(a.x + a.width, b.x + b.width) - x1; 72 | a.height = std::max(a.y + a.height, b.y + b.height) - y1; 73 | a.x = x1; a.y = y1; 74 | return a; 75 | } 76 | 77 | template static inline Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b) 78 | { 79 | Rect_<_Tp> c = a; 80 | return c &= b; 81 | } 82 | 83 | template static inline Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b) 84 | { 85 | Rect_<_Tp> c = a; 86 | return c |= b; 87 | } 88 | 89 | typedef Rect_ Rect; 90 | typedef Rect_ Rect2f; 91 | 92 | template 93 | struct Point_ 94 | { 95 | Point_() : x(0), y(0) {} 96 | Point_(_Tp _x, _Tp _y) : x(_x), y(_y) {} 97 | 98 | _Tp x; 99 | _Tp y; 100 | }; 101 | 102 | typedef Point_ Point; 103 | typedef Point_ Point2f; 104 | 105 | #define CV_8UC1 1 106 | #define CV_8UC3 3 107 | #define CV_8UC4 4 108 | #define CV_32FC1 4 109 | 110 | struct Mat 111 | { 112 | Mat() : data(0), refcount(0), rows(0), cols(0), c(0) {} 113 | 114 | Mat(int _rows, int _cols, int flags) : data(0), refcount(0) 115 | { 116 | create(_rows, _cols, flags); 117 | } 118 | 119 | // copy 120 | Mat(const Mat& m) : data(m.data), refcount(m.refcount) 121 | { 122 | if (refcount) 123 | NCNN_XADD(refcount, 1); 124 | 125 | rows = m.rows; 126 | cols = m.cols; 127 | c = m.c; 128 | } 129 | 130 | Mat(int _rows, int _cols, int flags, void* _data) : data((unsigned char*)_data), refcount(0) 131 | { 132 | rows = _rows; 133 | cols = _cols; 134 | c = flags; 135 | } 136 | 137 | ~Mat() 138 | { 139 | release(); 140 | } 141 | 142 | // assign 143 | Mat& operator=(const Mat& m) 144 | { 145 | if (this == &m) 146 | return *this; 147 | 148 | if (m.refcount) 149 | NCNN_XADD(m.refcount, 1); 150 | 151 | release(); 152 | 153 | data = m.data; 154 | refcount = m.refcount; 155 | 156 | rows = m.rows; 157 | cols = m.cols; 158 | c = m.c; 159 | 160 | return *this; 161 | } 162 | 163 | void create(int _rows, int _cols, int flags) 164 | { 165 | release(); 166 | 167 | rows = _rows; 168 | cols = _cols; 169 | c = flags; 170 | 171 | if (total() > 0) 172 | { 173 | // refcount address must be aligned, so we expand totalsize here 174 | size_t totalsize = (total() + 3) >> 2 << 2; 175 | data = (unsigned char*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); 176 | refcount = (int*)(((unsigned char*)data) + totalsize); 177 | *refcount = 1; 178 | } 179 | } 180 | 181 | void release() 182 | { 183 | if (refcount && NCNN_XADD(refcount, -1) == 1) 184 | ncnn::fastFree(data); 185 | 186 | data = 0; 187 | 188 | rows = 0; 189 | cols = 0; 190 | c = 0; 191 | 192 | refcount = 0; 193 | } 194 | 195 | Mat clone() const 196 | { 197 | if (empty()) 198 | return Mat(); 199 | 200 | Mat m(rows, cols, c); 201 | 202 | if (total() > 0) 203 | { 204 | memcpy(m.data, data, total()); 205 | } 206 | 207 | return m; 208 | } 209 | 210 | bool empty() const { return data == 0 || total() == 0; } 211 | 212 | int channels() const { return c; } 213 | 214 | size_t total() const { return cols * rows * c; } 215 | 216 | const unsigned char* ptr(int y) const { return data + y * cols * c; } 217 | 218 | unsigned char* ptr(int y) { return data + y * cols * c; } 219 | 220 | // roi 221 | Mat operator()( const Rect& roi ) const 222 | { 223 | if (empty()) 224 | return Mat(); 225 | 226 | Mat m(roi.height, roi.width, c); 227 | 228 | int sy = roi.y; 229 | for (int y = 0; y < roi.height; y++) 230 | { 231 | const unsigned char* sptr = ptr(sy) + roi.x * c; 232 | unsigned char* dptr = m.ptr(y); 233 | memcpy(dptr, sptr, roi.width * c); 234 | sy++; 235 | } 236 | 237 | return m; 238 | } 239 | 240 | unsigned char* data; 241 | 242 | // pointer to the reference counter; 243 | // when points to user-allocated data, the pointer is NULL 244 | int* refcount; 245 | 246 | int rows; 247 | int cols; 248 | 249 | int c; 250 | 251 | }; 252 | 253 | #define CV_LOAD_IMAGE_GRAYSCALE 1 254 | #define CV_LOAD_IMAGE_COLOR 3 255 | Mat imread(const std::string& path, int flags); 256 | void imwrite(const std::string& path, const Mat& m); 257 | 258 | #if NCNN_PIXEL 259 | void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); 260 | #endif // NCNN_PIXEL 261 | 262 | } // namespace cv 263 | 264 | #endif // NCNN_OPENCV 265 | 266 | #endif // NCNN_OPENCV_H 267 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lightwight Face Detector with Landmarks 2 | 3 | ## Contents 4 | * [Introduction](#introduction) 5 | * [Functions](#functions) 6 | * [Test_environment](#test_environment) 7 | * [Evaluation](#evaluation) 8 | * [Widerface](#Widerface) 9 | * [Parameter_and_flop](#Parameter_and_flop) 10 | * [Speed](#speed) 11 | * [How_to_use](#how_to_use) 12 | * [Installation](#installation) 13 | * [Training](#training) 14 | * [Evaluation](#evaluation) 15 | * [Convertor](#convertor) 16 | * [C++_inference](#c++_inference) 17 | * [References](#references) 18 | 19 | 20 | ## Introduction 21 | This project provides a serias of lightweight face detectors with landmarks which can be deployed on mobile devices. 22 | - Modify the anchor size of [Retinaface-mobile0.25](https://github.com/biubug6/Pytorch_Retinaface) 23 | - Add landmarks estimation to [Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) 24 | ### Functions 25 | - Train/test/evaluation/ncnn/tensorflow/tflite/C++ inference of Retinaface-mobile0.25 26 | - Train/test/evaluation/ncnn/tensorflow/tflite/C++ inference of Face-Detector-1MB slim and RFB version 27 | - Add 5 landmarks estimation to Face-Detector-1MB 28 | - Support the inference using pytorch/ncnn/tensorflow/tflite 29 | 30 | ### Test_environment 31 | - Ubuntu18.04 32 | - Python3.7 33 | - Pytorch1.2 34 | - CUDA10.0 + CUDNN7.5 35 | 36 | ## Evaluation 37 | ### Widerface 38 | 39 | - Evaluation result on wider face val (input image size: **320*240**) 40 | 41 | 19 | #include 20 | #include "platform.h" 21 | #include "blob.h" 22 | #include "layer.h" 23 | #include "mat.h" 24 | #include "option.h" 25 | 26 | namespace ncnn { 27 | 28 | #if NCNN_VULKAN 29 | class VkCompute; 30 | #endif // NCNN_VULKAN 31 | class Extractor; 32 | class Net 33 | { 34 | public: 35 | // empty init 36 | Net(); 37 | // clear and destroy 38 | ~Net(); 39 | 40 | public: 41 | // option can be changed before loading 42 | Option opt; 43 | 44 | #if NCNN_VULKAN 45 | // set gpu device by index 46 | void set_vulkan_device(int device_index); 47 | 48 | // set gpu device by device handle, no owner transfer 49 | void set_vulkan_device(const VulkanDevice* vkdev); 50 | 51 | const VulkanDevice* vulkan_device() const; 52 | #endif // NCNN_VULKAN 53 | 54 | #if NCNN_STRING 55 | // register custom layer by layer type name 56 | // return 0 if success 57 | int register_custom_layer(const char* type, layer_creator_func creator); 58 | #endif // NCNN_STRING 59 | // register custom layer by layer type 60 | // return 0 if success 61 | int register_custom_layer(int index, layer_creator_func creator); 62 | 63 | #if NCNN_STDIO 64 | #if NCNN_STRING 65 | // load network structure from plain param file 66 | // return 0 if success 67 | int load_param(FILE* fp); 68 | int load_param(const char* protopath); 69 | int load_param_mem(const char* mem); 70 | #endif // NCNN_STRING 71 | // load network structure from binary param file 72 | // return 0 if success 73 | int load_param_bin(FILE* fp); 74 | int load_param_bin(const char* protopath); 75 | 76 | // load network weight data from model file 77 | // return 0 if success 78 | int load_model(FILE* fp); 79 | int load_model(const char* modelpath); 80 | #endif // NCNN_STDIO 81 | 82 | // load network structure from external memory 83 | // memory pointer must be 32-bit aligned 84 | // return bytes consumed 85 | int load_param(const unsigned char* mem); 86 | 87 | // reference network weight data from external memory 88 | // weight data is not copied but referenced 89 | // so external memory should be retained when used 90 | // memory pointer must be 32-bit aligned 91 | // return bytes consumed 92 | int load_model(const unsigned char* mem); 93 | 94 | // unload network structure and weight data 95 | void clear(); 96 | 97 | // construct an Extractor from network 98 | Extractor create_extractor() const; 99 | 100 | protected: 101 | // parse the structure of network 102 | // fuse int8 op dequantize and quantize by requantize 103 | int fuse_network(); 104 | 105 | #if NCNN_VULKAN 106 | 107 | int upload_model(); 108 | 109 | int create_pipeline(); 110 | 111 | int destroy_pipeline(); 112 | 113 | #endif // NCNN_VULKAN 114 | 115 | friend class Extractor; 116 | #if NCNN_STRING 117 | int find_blob_index_by_name(const char* name) const; 118 | int find_layer_index_by_name(const char* name) const; 119 | int custom_layer_to_index(const char* type); 120 | Layer* create_custom_layer(const char* type); 121 | #endif // NCNN_STRING 122 | Layer* create_custom_layer(int index); 123 | int forward_layer(int layer_index, std::vector& blob_mats, Option& opt) const; 124 | 125 | #if NCNN_VULKAN 126 | int forward_layer(int layer_index, std::vector& blob_mats, std::vector& blob_mats_gpu, VkCompute& cmd, Option& opt) const; 127 | #endif // NCNN_VULKAN 128 | 129 | protected: 130 | std::vector blobs; 131 | std::vector layers; 132 | 133 | std::vector custom_layer_registry; 134 | 135 | #if NCNN_VULKAN 136 | const VulkanDevice* vkdev; 137 | 138 | VkAllocator* weight_vkallocator; 139 | VkAllocator* weight_staging_vkallocator; 140 | 141 | ncnn::Layer* cast_float32_to_float16; 142 | ncnn::Layer* cast_float16_to_float32; 143 | ncnn::Layer* packing_pack1; 144 | ncnn::Layer* packing_pack4; 145 | #endif // NCNN_VULKAN 146 | }; 147 | 148 | class Extractor 149 | { 150 | public: 151 | // enable light mode 152 | // intermediate blob will be recycled when enabled 153 | // enabled by default 154 | void set_light_mode(bool enable); 155 | 156 | // set thread count for this extractor 157 | // this will overwrite the global setting 158 | // default count is system depended 159 | void set_num_threads(int num_threads); 160 | 161 | // set blob memory allocator 162 | void set_blob_allocator(Allocator* allocator); 163 | 164 | // set workspace memory allocator 165 | void set_workspace_allocator(Allocator* allocator); 166 | 167 | #if NCNN_VULKAN 168 | void set_vulkan_compute(bool enable); 169 | 170 | void set_blob_vkallocator(VkAllocator* allocator); 171 | 172 | void set_workspace_vkallocator(VkAllocator* allocator); 173 | 174 | void set_staging_vkallocator(VkAllocator* allocator); 175 | #endif // NCNN_VULKAN 176 | 177 | #if NCNN_STRING 178 | // set input by blob name 179 | // return 0 if success 180 | int input(const char* blob_name, const Mat& in); 181 | 182 | // get result by blob name 183 | // return 0 if success 184 | int extract(const char* blob_name, Mat& feat); 185 | #endif // NCNN_STRING 186 | 187 | // set input by blob index 188 | // return 0 if success 189 | int input(int blob_index, const Mat& in); 190 | 191 | // get result by blob index 192 | // return 0 if success 193 | int extract(int blob_index, Mat& feat); 194 | 195 | #if NCNN_VULKAN 196 | #if NCNN_STRING 197 | // set input by blob name 198 | // return 0 if success 199 | int input(const char* blob_name, const VkMat& in); 200 | 201 | // get result by blob name 202 | // return 0 if success 203 | int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); 204 | #endif // NCNN_STRING 205 | 206 | // set input by blob index 207 | // return 0 if success 208 | int input(int blob_index, const VkMat& in); 209 | 210 | // get result by blob index 211 | // return 0 if success 212 | int extract(int blob_index, VkMat& feat, VkCompute& cmd); 213 | #endif // NCNN_VULKAN 214 | 215 | protected: 216 | friend Extractor Net::create_extractor() const; 217 | Extractor(const Net* net, int blob_count); 218 | 219 | private: 220 | const Net* net; 221 | std::vector blob_mats; 222 | Option opt; 223 | 224 | #if NCNN_VULKAN 225 | std::vector blob_mats_gpu; 226 | #endif // NCNN_VULKAN 227 | }; 228 | 229 | } // namespace ncnn 230 | 231 | #endif // NCNN_NET_H 232 | -------------------------------------------------------------------------------- /Detector_cpp/Face_Detector_ncnn/ncnn/include/ncnn/command.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_COMMAND_H 16 | #define NCNN_COMMAND_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | 22 | #include 23 | #include 24 | #include "mat.h" 25 | #include "pipeline.h" 26 | 27 | namespace ncnn { 28 | 29 | class Command 30 | { 31 | public: 32 | Command(const VulkanDevice* vkdev, uint32_t queue_family_index); 33 | virtual ~Command(); 34 | 35 | protected: 36 | int create_command_pool(); 37 | int create_command_buffer(); 38 | 39 | // record issue 40 | int begin_command_buffer(); 41 | int end_command_buffer(); 42 | int queue_submit_and_wait_fence(); 43 | 44 | protected: 45 | const VulkanDevice* vkdev; 46 | uint32_t queue_family_index; 47 | 48 | VkCommandPool command_pool; 49 | VkCommandBuffer command_buffer; 50 | 51 | VkFence fence; 52 | }; 53 | 54 | class VkCompute : public Command 55 | { 56 | public: 57 | VkCompute(const VulkanDevice* vkdev); 58 | ~VkCompute(); 59 | 60 | void record_upload(const VkMat& m); 61 | 62 | void record_download(const VkMat& m); 63 | 64 | void record_clone(const VkMat& src, const VkMat& dst); 65 | 66 | void record_copy_region(const VkMat& src, const VkMat& dst, const VkBufferCopy& region); 67 | 68 | void record_copy_regions(const VkMat& src, const VkMat& dst, const std::vector& regions); 69 | 70 | void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& m); 71 | 72 | void record_write_timestamp(uint32_t query); 73 | 74 | int submit_and_wait(); 75 | 76 | int reset(); 77 | 78 | #if NCNN_BENCHMARK 79 | int create_query_pool(uint32_t query_count); 80 | 81 | int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); 82 | #endif // NCNN_BENCHMARK 83 | 84 | protected: 85 | // record pipeline things 86 | void record_bind_pipeline(VkPipeline pipeline); 87 | void record_update_bindings(VkPipelineLayout pipeline_layout, VkDescriptorSetLayout descriptorset_layout, VkDescriptorUpdateTemplateKHR descriptor_update_template, const std::vector& bindings); 88 | void record_push_constants(VkPipelineLayout pipeline_layout, const std::vector& constants); 89 | void record_dispatch(const uint32_t* group_count_xyz); 90 | 91 | // record barrier things 92 | void record_transfer_compute_barrier(const VkMat& m); 93 | void record_compute_transfer_barrier(const VkMat& m); 94 | void record_compute_compute_barrier(const VkMat& m); 95 | void record_transfer_transfer_barrier(const VkMat& m); 96 | 97 | // record prepare things 98 | void record_prepare_transfer_barrier(const VkMat& m); 99 | void record_prepare_compute_barrier(const VkMat& m); 100 | 101 | #if NCNN_BENCHMARK 102 | void reset_query_pool(); 103 | #endif // NCNN_BENCHMARK 104 | 105 | protected: 106 | // recording issue 107 | void copy_buffer(VkBuffer src, size_t src_offset, VkBuffer dst, size_t dst_offset, size_t size); 108 | void copy_buffer_regions(VkBuffer src, VkBuffer dst, const std::vector& regions); 109 | void bind_pipeline(VkPipeline pipeline); 110 | void bind_descriptorset(VkPipelineLayout pipeline_layout, VkDescriptorSet descriptorset); 111 | void update_bindings(VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR descriptor_update_template, const std::vector& descriptorBufferInfos); 112 | void push_constants(VkPipelineLayout pipeline_layout, const std::vector& constants); 113 | void dispatch(const uint32_t* group_count_xyz); 114 | void transfer_compute_barrier(VkBuffer buffer, size_t offset, size_t size); 115 | void compute_transfer_barrier(VkBuffer buffer, size_t offset, size_t size); 116 | void compute_compute_barrier(VkBuffer buffer, size_t offset, size_t size); 117 | void transfer_transfer_barrier(VkBuffer buffer, size_t offset, size_t size); 118 | #if NCNN_BENCHMARK 119 | void write_timestamp(uint32_t query); 120 | #endif // NCNN_BENCHMARK 121 | 122 | protected: 123 | // delayed record 124 | // the good-old path for device without VK_KHR_push_descriptor 125 | std::vector descriptor_pools; 126 | std::vector descriptorsets; 127 | struct record_type 128 | { 129 | // 0=copy 130 | // 1=copy regions 131 | // 2=bind pipeline 132 | // 3=bind descriptorset 133 | // 4=push constants 134 | // 5=dispatch 135 | // 6=transfer-compute barrier 136 | // 7=compute-transfer barrier 137 | // 8=compute-compute barrier 138 | // 9=transfer-transfer barrier 139 | // 10=write timestamp 140 | int type; 141 | 142 | union 143 | { 144 | struct { VkBuffer src; size_t src_offset; VkBuffer dst; size_t dst_offset; size_t size; } copy; 145 | struct { VkBuffer src; VkBuffer dst; } copy_regions; 146 | struct { VkPipeline pipeline; } bind_pipeline; 147 | struct { VkPipelineLayout pipeline_layout; VkDescriptorSet descriptorset; } bind_descriptorset; 148 | struct { VkPipelineLayout pipeline_layout; } push_constants; 149 | struct { uint32_t group_count_xyz[3]; } dispatch; 150 | struct { VkBuffer buffer; size_t offset; size_t size; } transfer_compute_barrier; 151 | struct { VkBuffer buffer; size_t offset; size_t size; } compute_transfer_barrier; 152 | struct { VkBuffer buffer; size_t offset; size_t size; } compute_compute_barrier; 153 | struct { VkBuffer buffer; size_t offset; size_t size; } transfer_transfer_barrier; 154 | #if NCNN_BENCHMARK 155 | struct { uint32_t query; } write_timestamp; 156 | #endif // NCNN_BENCHMARK 157 | }; 158 | 159 | std::vector regions; 160 | std::vector constants; 161 | }; 162 | std::vector delayed_records; 163 | 164 | #if NCNN_BENCHMARK 165 | uint32_t query_count; 166 | VkQueryPool query_pool; 167 | #endif // NCNN_BENCHMARK 168 | }; 169 | 170 | class VkTransfer : public Command 171 | { 172 | public: 173 | VkTransfer(const VulkanDevice* vkdev); 174 | ~VkTransfer(); 175 | 176 | void record_upload(const Mat& src, VkMat& dst, const Option& opt); 177 | 178 | int submit_and_wait(); 179 | 180 | public: 181 | VkAllocator* weight_vkallocator; 182 | VkAllocator* staging_vkallocator; 183 | 184 | protected: 185 | // recording issue 186 | void copy_buffer(VkBuffer src, size_t src_offset, VkBuffer dst, size_t dst_offset, size_t size); 187 | void copy_buffer_regions(VkBuffer src, VkBuffer dst, const std::vector& regions); 188 | 189 | protected: 190 | size_t buffer_offset_alignment; 191 | VkBufferMemory* staging_data; 192 | 193 | // delayed record 194 | struct record_type 195 | { 196 | size_t size; 197 | Mat mat; 198 | VkMat vkmat; 199 | }; 200 | std::vector delayed_records; 201 | }; 202 | 203 | } // namespace ncnn 204 | 205 | #endif // NCNN_VULKAN 206 | 207 | #endif // NCNN_COMMAND_H 208 | -------------------------------------------------------------------------------- /data/data_augment.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | from utils.box_utils import matrix_iof 5 | 6 | 7 | def _crop(image, boxes, labels, landm, img_dim): 8 | height, width, _ = image.shape 9 | pad_image_flag = True 10 | 11 | for _ in range(250): 12 | if random.uniform(0, 1) <= 0.2: 13 | scale = 1.0 14 | else: 15 | scale = random.uniform(0.3, 1.0) 16 | # PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0] 17 | # scale = random.choice(PRE_SCALES) 18 | short_side = min(width, height) 19 | w = int(scale * short_side) 20 | h = w 21 | 22 | if width == w: 23 | l = 0 24 | else: 25 | l = random.randrange(width - w) 26 | if height == h: 27 | t = 0 28 | else: 29 | t = random.randrange(height - h) 30 | roi = np.array((l, t, l + w, t + h)) 31 | 32 | value = matrix_iof(boxes, roi[np.newaxis]) 33 | flag = (value >= 1) 34 | if not flag.any(): 35 | continue 36 | 37 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2 38 | mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1) 39 | boxes_t = boxes[mask_a].copy() 40 | labels_t = labels[mask_a].copy() 41 | landms_t = landm[mask_a].copy() 42 | landms_t = landms_t.reshape([-1, 5, 2]) 43 | 44 | if boxes_t.shape[0] == 0: 45 | continue 46 | 47 | image_t = image[roi[1]:roi[3], roi[0]:roi[2]] 48 | 49 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2]) 50 | boxes_t[:, :2] -= roi[:2] 51 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:]) 52 | boxes_t[:, 2:] -= roi[:2] 53 | 54 | # landm 55 | landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2] 56 | landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0])) 57 | landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2]) 58 | landms_t = landms_t.reshape([-1, 10]) 59 | 60 | 61 | # make sure that the cropped image contains at least one face > 16 pixel at training image scale 62 | b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim 63 | b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim 64 | mask_b = np.minimum(b_w_t, b_h_t) > 5 65 | boxes_t = boxes_t[mask_b] 66 | labels_t = labels_t[mask_b] 67 | landms_t = landms_t[mask_b] 68 | 69 | if boxes_t.shape[0] == 0: 70 | continue 71 | 72 | pad_image_flag = False 73 | 74 | return image_t, boxes_t, labels_t, landms_t, pad_image_flag 75 | return image, boxes, labels, landm, pad_image_flag 76 | 77 | 78 | def _distort(image): 79 | 80 | def _convert(image, alpha=1, beta=0): 81 | tmp = image.astype(float) * alpha + beta 82 | tmp[tmp < 0] = 0 83 | tmp[tmp > 255] = 255 84 | image[:] = tmp 85 | 86 | image = image.copy() 87 | 88 | if random.randrange(2): 89 | 90 | #brightness distortion 91 | if random.randrange(2): 92 | _convert(image, beta=random.uniform(-32, 32)) 93 | 94 | #contrast distortion 95 | if random.randrange(2): 96 | _convert(image, alpha=random.uniform(0.5, 1.5)) 97 | 98 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 99 | 100 | #saturation distortion 101 | if random.randrange(2): 102 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 103 | 104 | #hue distortion 105 | if random.randrange(2): 106 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 107 | tmp %= 180 108 | image[:, :, 0] = tmp 109 | 110 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 111 | 112 | else: 113 | 114 | #brightness distortion 115 | if random.randrange(2): 116 | _convert(image, beta=random.uniform(-32, 32)) 117 | 118 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 119 | 120 | #saturation distortion 121 | if random.randrange(2): 122 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 123 | 124 | #hue distortion 125 | if random.randrange(2): 126 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 127 | tmp %= 180 128 | image[:, :, 0] = tmp 129 | 130 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 131 | 132 | #contrast distortion 133 | if random.randrange(2): 134 | _convert(image, alpha=random.uniform(0.5, 1.5)) 135 | 136 | return image 137 | 138 | 139 | def _expand(image, boxes, fill, p): 140 | if random.randrange(2): 141 | return image, boxes 142 | 143 | height, width, depth = image.shape 144 | 145 | scale = random.uniform(1, p) 146 | w = int(scale * width) 147 | h = int(scale * height) 148 | 149 | left = random.randint(0, w - width) 150 | top = random.randint(0, h - height) 151 | 152 | boxes_t = boxes.copy() 153 | boxes_t[:, :2] += (left, top) 154 | boxes_t[:, 2:] += (left, top) 155 | expand_image = np.empty( 156 | (h, w, depth), 157 | dtype=image.dtype) 158 | expand_image[:, :] = fill 159 | expand_image[top:top + height, left:left + width] = image 160 | image = expand_image 161 | 162 | return image, boxes_t 163 | 164 | 165 | def _mirror(image, boxes, landms): 166 | _, width, _ = image.shape 167 | if random.randrange(2): 168 | image = image[:, ::-1] 169 | boxes = boxes.copy() 170 | boxes[:, 0::2] = width - boxes[:, 2::-2] 171 | 172 | # landm 173 | landms = landms.copy() 174 | landms = landms.reshape([-1, 5, 2]) 175 | landms[:, :, 0] = width - landms[:, :, 0] 176 | tmp = landms[:, 1, :].copy() 177 | landms[:, 1, :] = landms[:, 0, :] 178 | landms[:, 0, :] = tmp 179 | tmp1 = landms[:, 4, :].copy() 180 | landms[:, 4, :] = landms[:, 3, :] 181 | landms[:, 3, :] = tmp1 182 | landms = landms.reshape([-1, 10]) 183 | 184 | return image, boxes, landms 185 | 186 | 187 | def _pad_to_square(image, rgb_mean, pad_image_flag): 188 | if not pad_image_flag: 189 | return image 190 | height, width, _ = image.shape 191 | long_side = max(width, height) 192 | image_t = np.empty((long_side, long_side, 3), dtype=image.dtype) 193 | image_t[:, :] = rgb_mean 194 | image_t[0:0 + height, 0:0 + width] = image 195 | return image_t 196 | 197 | 198 | def _resize_subtract_mean(image, insize, rgb_mean): 199 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 200 | interp_method = interp_methods[random.randrange(5)] 201 | image = cv2.resize(image, (insize, insize), interpolation=interp_method) 202 | image = image.astype(np.float32) 203 | image -= rgb_mean 204 | return image.transpose(2, 0, 1) 205 | 206 | 207 | class preproc(object): 208 | 209 | def __init__(self, img_dim, rgb_means): 210 | self.img_dim = img_dim 211 | self.rgb_means = rgb_means 212 | 213 | def __call__(self, image, targets): 214 | assert targets.shape[0] > 0, "this image does not have gt" 215 | 216 | boxes = targets[:, :4].copy() 217 | labels = targets[:, -1].copy() 218 | landm = targets[:, 4:-1].copy() 219 | 220 | image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim) 221 | image_t = _distort(image_t) 222 | image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag) 223 | image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t) 224 | height, width, _ = image_t.shape 225 | image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means) 226 | boxes_t[:, 0::2] /= width 227 | boxes_t[:, 1::2] /= height 228 | 229 | landm_t[:, 0::2] /= width 230 | landm_t[:, 1::2] /= height 231 | 232 | labels_t = np.expand_dims(labels_t, 1) 233 | targets_t = np.hstack((boxes_t, landm_t, labels_t)) 234 | 235 | return image_t, targets_t 236 | --------------------------------------------------------------------------------