├── .gitignore ├── .gitmodules ├── LICENSE ├── MNN ├── CMakeLists.txt ├── README.md ├── imgs │ ├── 1.jpg │ ├── 2.jpg │ ├── 3.jpg │ └── 4.jpg ├── mnn │ ├── include │ │ ├── AutoTime.hpp │ │ ├── Backend.hpp │ │ ├── ErrorCode.hpp │ │ ├── HalideRuntime.h │ │ ├── ImageProcess.hpp │ │ ├── Interpreter.hpp │ │ ├── MNNDefine.h │ │ ├── MNNForwardType.h │ │ ├── MNNSharedContext.h │ │ ├── Matrix.h │ │ ├── NonCopyable.hpp │ │ ├── Rect.h │ │ ├── Tensor.hpp │ │ └── revertMNNModel.hpp │ └── lib │ │ └── libMNN.so ├── model │ ├── version-RFB │ │ ├── RFB-320-quant-ADMM-32.mnn │ │ ├── RFB-320-quant-KL-5792.mnn │ │ └── RFB-320.mnn │ └── version-slim │ │ ├── slim-320-quant-ADMM-50.mnn │ │ └── slim-320.mnn ├── python │ ├── README.md │ └── ultraface_py_mnn.py ├── result.jpg └── src │ ├── UltraFace.cpp │ ├── UltraFace.hpp │ └── main.cpp ├── README.md ├── README_CN.md ├── caffe ├── MyCaffe.py ├── README.md ├── convertCaffe.py ├── model │ ├── RFB-320 │ │ ├── RFB-320.caffemodel │ │ └── RFB-320.prototxt │ └── Slim-320 │ │ ├── slim-320.caffemodel │ │ └── slim-320.prototxt ├── onnx2caffe │ ├── __init__.py │ ├── _error_utils.py │ ├── _graph.py │ ├── _operators.py │ ├── _transformers.py │ └── _weightloader.py ├── ultra_face_caffe_inference.py └── ultra_face_opencvdnn_inference.py ├── cal_flops.py ├── check_gt_box.py ├── convert_to_onnx.py ├── data ├── retinaface_labels │ ├── test │ │ └── label.txt │ ├── train │ │ └── label.txt │ └── val │ │ └── label.txt └── wider_face_2_voc_add_landmark.py ├── detect_imgs.py ├── detect_imgs_onnx.py ├── imgs ├── 1.jpg ├── 10.jpg ├── 11.jpg ├── 12.jpg ├── 13.jpg ├── 15.jpg ├── 16.jpg ├── 17.jpg ├── 18.jpg ├── 19.jpg ├── 2.jpg ├── 20.jpg ├── 21.jpg ├── 22.jpg ├── 23.jpg ├── 24.jpg ├── 25.jpg ├── 26.jpg ├── 27.jpg ├── 3.jpg ├── 5.jpg ├── 6.jpg ├── 8.jpg └── 9.jpg ├── masked_face ├── README.md ├── detect_imgs.py ├── imgs │ ├── img1.jpeg │ ├── img2.jpeg │ ├── img3.jpg │ ├── img4.jpeg │ ├── img5.jpeg │ ├── img6.webp │ ├── img7.webp │ └── img8.jpeg ├── mafa2voc.py ├── pretrained │ ├── RFB-1280-masked_face-v2.onnx │ ├── RFB-320-masked_face-v2.pth │ ├── RFB-640-masked_face-v2.onnx │ └── RFB-640-masked_face-v2.pth ├── readme_imgs │ ├── img1.jpeg │ ├── img2.jpeg │ ├── img3.jpg │ ├── img4.jpeg │ ├── img5.jpeg │ ├── img6.webp │ ├── img7.webp │ └── img8.jpeg └── voc-model-labels.txt ├── models ├── onnx │ ├── version-RFB-320.onnx │ ├── version-RFB-320_simplified.onnx │ ├── version-RFB-320_without_postprocessing.onnx │ ├── version-RFB-640.onnx │ ├── version-slim-320.onnx │ ├── version-slim-320_simplified.onnx │ └── version-slim-320_without_postprocessing.onnx ├── pretrained │ ├── version-RFB-320.pth │ ├── version-RFB-640.pth │ ├── version-slim-320.pth │ └── version-slim-640.pth ├── readme └── voc-model-labels.txt ├── ncnn ├── .clang-format ├── CMakeLists.txt ├── README.md ├── data │ ├── result.jpg │ ├── test.jpg │ ├── version-RFB │ │ ├── RFB-320.bin │ │ └── RFB-320.param │ └── version-slim │ │ ├── slim_320.bin │ │ └── slim_320.param └── src │ ├── UltraFace.cpp │ ├── UltraFace.hpp │ └── main.cpp ├── opencv_dnn ├── cv_dnn_ultraface.cpp └── cv_dnn_ultraface.h ├── paddle ├── data ├── train-version-RFB.sh ├── train-version-slim.sh ├── train.py └── vision │ ├── __init__.py │ ├── datasets │ ├── __init__.py │ └── voc_dataset.py │ ├── nn │ ├── __init__.py │ ├── mb_tiny.py │ ├── mb_tiny_RFB.py │ └── multibox_loss.py │ ├── ssd │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ └── fd_config.py │ ├── data_preprocessing.py │ ├── mb_tiny_RFB_fd.py │ ├── mb_tiny_fd.py │ ├── predictor.py │ └── ssd.py │ ├── transforms │ ├── __init__.py │ └── transforms.py │ └── utils │ ├── __init__.py │ ├── box_utils.py │ ├── box_utils_numpy.py │ └── misc.py ├── readme_imgs ├── 1.jpg ├── 2.jpg ├── 26.jpg ├── 27.jpg └── 4.jpg ├── requirements.txt ├── run_video_face_detect.py ├── run_video_face_detect_onnx.py ├── tf ├── README.md ├── backend │ ├── op.py │ └── utils.py ├── convert_tensorflow.py ├── det_image.py ├── export_models │ ├── RFB │ │ ├── saved_model.pb │ │ └── variables │ │ │ ├── variables.data-00000-of-00001 │ │ │ └── variables.index │ └── slim │ │ ├── saved_model.pb │ │ └── variables │ │ ├── variables.data-00000-of-00001 │ │ └── variables.index ├── imgs │ ├── test_input.jpg │ ├── test_output_RFB.jpg │ ├── test_output_origin_RFB.jpg │ ├── test_output_origin_slim.jpg │ └── test_output_slim.jpg ├── mapping_tables │ ├── rfb_320.json │ └── slim_320.json └── model │ ├── rfb_320.py │ └── slim_320.py ├── tflite ├── README.md ├── TFLiteFaceDetector.py ├── inference_test.py ├── model │ ├── tflite_RFB_320_without_postprocessing.py │ └── tflite_slim_320_without_postprocessing.py └── pretrained │ ├── version-RFB-320_without_postprocessing.tflite │ └── version-slim-320_without_postprocessing.tflite ├── train-version-RFB.sh ├── train-version-slim.sh ├── train.py ├── vision ├── __init__.py ├── datasets │ ├── __init__.py │ └── voc_dataset.py ├── nn │ ├── __init__.py │ ├── mb_tiny.py │ ├── mb_tiny_RFB.py │ └── multibox_loss.py ├── ssd │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ └── fd_config.py │ ├── data_preprocessing.py │ ├── mb_tiny_RFB_fd.py │ ├── mb_tiny_fd.py │ ├── predictor.py │ └── ssd.py ├── transforms │ ├── __init__.py │ └── transforms.py └── utils │ ├── __init__.py │ ├── box_utils.py │ ├── box_utils_numpy.py │ └── misc.py └── widerface_evaluate ├── README.md ├── box_overlaps.pyx ├── evaluation.py ├── evaluation_on_widerface.py ├── ground_truth ├── wider_easy_val.mat ├── wider_face_val.mat ├── wider_hard_val.mat └── wider_medium_val.mat └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | data/wider_face_add_lm_10_10 3 | 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | detect_imgs_results 9 | detect_imgs_results_onnx 10 | widerface_evaluation 11 | 12 | widerface_evaluate/build 13 | widerface_evaluate/*.so 14 | widerface_evaluate/*.c -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ncnn/3rdparty/ncnn"] 2 | path = ncnn/3rdparty/ncnn 3 | url = https://github.com/Tencent/ncnn 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 linzai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MNN/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(Ultra-face-mnn) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | find_package(OpenCV REQUIRED) 7 | include_directories( 8 | mnn/include 9 | src 10 | ) 11 | 12 | link_directories(mnn/lib) 13 | 14 | add_executable(Ultra-face-mnn src/main.cpp src/UltraFace.cpp) 15 | target_link_libraries(Ultra-face-mnn MNN ${OpenCV_LIBS}) -------------------------------------------------------------------------------- /MNN/README.md: -------------------------------------------------------------------------------- 1 | # C++ implemententation of [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) with [MNN](https://github.com/alibaba/MNN) 2 | 3 | ## Build 4 | 5 | ```bash 6 | git clone --recursive --depth=1 https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB 7 | 8 | cd Ultra-Light-Fast-Generic-Face-Detector-1MB/MNN 9 | ``` 10 | 11 | * Replace **libMNN.so** under ./mnn/lib with your compiled libMNN.so and then : 12 | 13 | ```bash 14 | mkdir build && cd build && cmake .. 15 | make -j$(nproc) 16 | ``` 17 | 18 | ## Run 19 | * Use FP32 model and run in FP16 mode: 20 | ```bash 21 | ./Ultra-face-mnn ../model/version-RFB/RFB-320.mnn ../imgs/1.jpg ../imgs/2.jpg ../imgs/3.jpg ../imgs/4.jpg 22 | ``` 23 | * Use quantized INT8 model: 24 | ```bash 25 | ./Ultra-face-mnn ../model/version-RFB/RFB-320-quant-KL-5792.mnn ../imgs/1.jpg ../imgs/2.jpg ../imgs/3.jpg ../imgs/4.jpg 26 | ``` 27 | 28 | * We provide both converted MNN FP32 and **quantized INT8** models of version-slim-320 and version-RFB-320 in ./MNN/model . The xxx-quant-KL-xxx.mnn is quantified by the **KL** method and xxx-quant-ADMM-xxx.mnn is quantified by the **ADMM** method. 29 | 30 | ## How to convert pretrained model to MNN 31 | 32 | * Code bellow (```vision/ssd/ssd.py```) should be commented out when convert pytorch pretrained model to onnx. Comment it out and use the **convert_to_onnx.py** in official repo to finish this step. 33 | 34 | ```python 35 | if self.is_test: 36 | confidences = F.softmax(confidences, dim=2) 37 | boxes = locations # this line should be added. 38 | #boxes = box_utils.convert_locations_to_boxes( 39 | # locations, self.priors, self.config.center_variance, self.config.size_variance 40 | #) 41 | # boxes = box_utils.center_form_to_corner_form(boxes) # these lines should be commented out. detail information and analyze comming soon. 42 | return confidences, boxes 43 | else: 44 | return confidences, locations 45 | ``` 46 | Then you can generate the onnx model like **version-RFB-320_without_postprocessing.onnx** in onnx directory. (You need to rename your model when convert.) 47 | * Then we can use this tool to simplify onnx : 48 | https://github.com/daquexian/onnx-simplifier 49 | 50 | ``` 51 | python3 -m onnxsim version-RFB-320_without_postprocessing.onnx version-RFB-320_simplified.onnx 52 | 53 | ``` 54 | 55 | Next, you can convert this onnx model like **version-RFB-320_simplified.onnx** into a MNN model. Here is a website for online conversion : https://convertmodel.com. You can also use the MNN compiled conversion tool **MNNConvert**. 56 | 57 | 58 | 59 | ## PS 60 | * Since MNN mainly accelerates model inference on mobile, so the INT8 quantified model will run slower on **PC** than FP32 model in CPU mode. 61 | * If you want to run faster, try using the version-slim model ,using lower-resolution inputs like 160x120 /128x96 or using quantified models(On the mobile). 62 | 63 | ## Result 64 | ![img1](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/MNN/result.jpg) 65 | -------------------------------------------------------------------------------- /MNN/imgs/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/1.jpg -------------------------------------------------------------------------------- /MNN/imgs/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/2.jpg -------------------------------------------------------------------------------- /MNN/imgs/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/3.jpg -------------------------------------------------------------------------------- /MNN/imgs/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/4.jpg -------------------------------------------------------------------------------- /MNN/mnn/include/AutoTime.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // AutoTime.hpp 3 | // MNN 4 | // 5 | // Created by MNN on 2018/07/27. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef AutoTime_hpp 10 | #define AutoTime_hpp 11 | 12 | #include 13 | #include 14 | #include "MNNDefine.h" 15 | 16 | namespace MNN { 17 | 18 | /** time tracing util. prints duration between init and deinit. */ 19 | class MNN_PUBLIC AutoTime { 20 | public: 21 | AutoTime(int line, const char* func); 22 | ~AutoTime(); 23 | AutoTime(const AutoTime&) = delete; 24 | AutoTime(const AutoTime&&) = delete; 25 | AutoTime& operator=(const AutoTime&) = delete; 26 | AutoTime& operator=(const AutoTime&&) = delete; 27 | 28 | private: 29 | int mLine; 30 | char* mName; 31 | uint64_t mCurrentTime; 32 | }; 33 | } // namespace MNN 34 | 35 | #ifdef MNN_OPEN_TIME_TRACE 36 | #define AUTOTIME MNN::AutoTime ___t(__LINE__, __func__) 37 | #else 38 | #define AUTOTIME 39 | #endif 40 | 41 | #endif /* AutoTime_hpp */ 42 | -------------------------------------------------------------------------------- /MNN/mnn/include/ErrorCode.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // ErrorCode.hpp 3 | // MNN 4 | // 5 | // Created by MNN on 2018/09/18. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef ErrorCode_h 10 | #define ErrorCode_h 11 | 12 | namespace MNN { 13 | enum ErrorCode { 14 | #ifdef NO_ERROR 15 | #undef NO_ERROR 16 | #endif // NO_ERROR 17 | NO_ERROR = 0, 18 | OUT_OF_MEMORY = 1, 19 | NOT_SUPPORT = 2, 20 | COMPUTE_SIZE_ERROR = 3, 21 | NO_EXECUTION = 4, 22 | INVALID_VALUE = 5, 23 | 24 | // User error 25 | INPUT_DATA_ERROR = 10, 26 | CALL_BACK_STOP = 11, 27 | 28 | // Op Resize Error 29 | TENSOR_NOT_SUPPORT = 20, 30 | TENSOR_NEED_DIVIDE = 21, 31 | }; 32 | } // namespace MNN 33 | 34 | #endif /* ErrorCode_h */ 35 | -------------------------------------------------------------------------------- /MNN/mnn/include/ImageProcess.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // ImageProcess.hpp 3 | // MNN 4 | // 5 | // Created by MNN on 2018/09/19. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef ImageProcess_hpp 10 | #define ImageProcess_hpp 11 | 12 | #include "ErrorCode.hpp" 13 | #include "Matrix.h" 14 | #include "Tensor.hpp" 15 | 16 | namespace MNN { 17 | namespace CV { 18 | enum ImageFormat { 19 | RGBA = 0, 20 | RGB, 21 | BGR, 22 | GRAY, 23 | BGRA, 24 | YUV_NV21 = 11, 25 | }; 26 | 27 | enum Filter { NEAREST = 0, BILINEAR = 1, BICUBIC = 2 }; 28 | 29 | enum Wrap { CLAMP_TO_EDGE = 0, ZERO = 1, REPEAT = 2 }; 30 | 31 | /** 32 | * handle image process for tensor. 33 | * step: 34 | * 1: Do transform compute and get points 35 | * 2: Sample line and do format convert 36 | * 3: Turn RGBA to float tensor, and do sub and normalize 37 | */ 38 | class MNN_PUBLIC ImageProcess { 39 | public: 40 | struct Inside; 41 | struct Config { 42 | /** data filter */ 43 | Filter filterType = NEAREST; 44 | /** format of source data */ 45 | ImageFormat sourceFormat = RGBA; 46 | /** format of destination data */ 47 | ImageFormat destFormat = RGBA; 48 | 49 | // Only valid if the dest type is float 50 | float mean[4] = {0.0f, 0.0f, 0.0f, 0.0f}; 51 | float normal[4] = {1.0f, 1.0f, 1.0f, 1.0f}; 52 | 53 | /** edge wrapper */ 54 | Wrap wrap = CLAMP_TO_EDGE; 55 | }; 56 | 57 | public: 58 | /** 59 | * @brief create image process with given config for given tensor. 60 | * @param config given config. 61 | * @param dstTensor given tensor. 62 | * @return image processor. 63 | */ 64 | static ImageProcess* create(const Config& config, const Tensor* dstTensor = nullptr); 65 | 66 | /** 67 | * @brief create image process with given config for given tensor. 68 | * @param means given means 69 | * @param meanCount given means count 70 | * @param normals given normals 71 | * @param normalCount given normal count 72 | * @param sourceFormat format of source data 73 | * @param destFormat format of destination data 74 | * @param dstTensor given tensor. 75 | * @return image processor. 76 | */ 77 | static ImageProcess* create(const ImageFormat sourceFormat = RGBA, const ImageFormat destFormat = RGBA, 78 | const float* means = nullptr, const int meanCount = 0, const float* normals = nullptr, 79 | const int normalCount = 0, const Tensor* dstTensor = nullptr); 80 | 81 | ~ImageProcess(); 82 | 83 | /** 84 | * @brief get affine transform matrix. 85 | * @return affine transform matrix. 86 | */ 87 | inline const Matrix& matrix() const { 88 | return mTransform; 89 | } 90 | void setMatrix(const Matrix& matrix); 91 | 92 | /** 93 | * @brief convert source data to given tensor. 94 | * @param source source data. 95 | * @param iw source width. 96 | * @param ih source height. 97 | * @param stride number of elements per row. eg: 100 width RGB contains at least 300 elements. 98 | * @param dest given tensor. 99 | * @return result code. 100 | */ 101 | ErrorCode convert(const uint8_t* source, int iw, int ih, int stride, Tensor* dest); 102 | 103 | /** 104 | * @brief convert source data to given tensor. 105 | * @param source source data. 106 | * @param iw source width. 107 | * @param ih source height. 108 | * @param stride number of elements per row. eg: 100 width RGB contains at least 300 elements. 109 | * @param dest dest data. 110 | * @param ow output width. 111 | * @param oh output height. 112 | * @param outputBpp output bpp, if 0, set as the save and config.destFormat. 113 | * @param outputStride output stride, if 0, set as ow * outputBpp. 114 | * @param type Only support halide_type_of and halide_type_of. 115 | * @return result code. 116 | */ 117 | ErrorCode convert(const uint8_t* source, int iw, int ih, int stride, void* dest, int ow, int oh, int outputBpp = 0, 118 | int outputStride = 0, halide_type_t type = halide_type_of()); 119 | 120 | /** 121 | * @brief create tensor with given data. 122 | * @param w image width. 123 | * @param h image height. 124 | * @param bpp bytes per pixel. 125 | * @param p pixel data pointer. 126 | * @return created tensor. 127 | */ 128 | template 129 | static Tensor* createImageTensor(int w, int h, int bpp, void* p = nullptr) { 130 | return createImageTensor(halide_type_of(), w, h, bpp, p); 131 | } 132 | static Tensor* createImageTensor(halide_type_t type, int w, int h, int bpp, void* p = nullptr); 133 | 134 | private: 135 | ImageProcess(const Config& config); 136 | Matrix mTransform; 137 | Matrix mTransformInvert; 138 | Inside* mInside; 139 | }; 140 | } // namespace CV 141 | } // namespace MNN 142 | 143 | #endif /* ImageProcess_hpp */ 144 | -------------------------------------------------------------------------------- /MNN/mnn/include/MNNDefine.h: -------------------------------------------------------------------------------- 1 | // 2 | // MNNDefine.h 3 | // MNN 4 | // 5 | // Created by MNN on 2018/08/09. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef MNNDefine_h 10 | #define MNNDefine_h 11 | 12 | #include 13 | #include 14 | 15 | #if defined(__APPLE__) 16 | #include "TargetConditionals.h" 17 | #if TARGET_OS_IPHONE 18 | #define MNN_BUILD_FOR_IOS 19 | #endif 20 | #endif 21 | 22 | #ifdef MNN_USE_LOGCAT 23 | #include 24 | #define MNN_ERROR(format, ...) __android_log_print(ANDROID_LOG_ERROR, "MNNJNI", format, ##__VA_ARGS__) 25 | #define MNN_PRINT(format, ...) __android_log_print(ANDROID_LOG_INFO, "MNNJNI", format, ##__VA_ARGS__) 26 | #else 27 | #define MNN_PRINT(format, ...) printf(format, ##__VA_ARGS__) 28 | #define MNN_ERROR(format, ...) printf(format, ##__VA_ARGS__) 29 | #endif 30 | 31 | #ifdef DEBUG 32 | #define MNN_ASSERT(x) \ 33 | { \ 34 | int res = (x); \ 35 | if (!res) { \ 36 | MNN_ERROR("Error for %s, %d\n", __FILE__, __LINE__); \ 37 | assert(res); \ 38 | } \ 39 | } 40 | #else 41 | #define MNN_ASSERT(x) \ 42 | { \ 43 | int res = (x); \ 44 | if (!res) { \ 45 | MNN_ERROR("Error for %s, %d\n", __FILE__, __LINE__); \ 46 | } \ 47 | } 48 | #endif 49 | 50 | #define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__); 51 | #define FUNC_PRINT_ALL(x, type) MNN_PRINT(#x "=" #type " %" #type " in %s, %d \n", x, __func__, __LINE__); 52 | 53 | #define MNN_CHECK(success, log) \ 54 | if(!(success)){ \ 55 | MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \ 56 | } 57 | 58 | #if defined(_MSC_VER) 59 | #if defined(BUILDING_MNN_DLL) 60 | #define MNN_PUBLIC __declspec(dllexport) 61 | #elif defined(USING_MNN_DLL) 62 | #define MNN_PUBLIC __declspec(dllimport) 63 | #else 64 | #define MNN_PUBLIC 65 | #endif 66 | #else 67 | #define MNN_PUBLIC __attribute__((visibility("default"))) 68 | #endif 69 | 70 | #endif /* MNNDefine_h */ 71 | -------------------------------------------------------------------------------- /MNN/mnn/include/MNNForwardType.h: -------------------------------------------------------------------------------- 1 | // 2 | // MNNForwardType.h 3 | // MNN 4 | // 5 | // Created by MNN on 2019/01/19. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef MNNForwardType_h 10 | #define MNNForwardType_h 11 | #include 12 | #include 13 | 14 | typedef enum { 15 | MNN_FORWARD_CPU = 0, 16 | 17 | /* 18 | Firtly find the first available backends not equal to CPU 19 | If no other backends, use cpu 20 | */ 21 | MNN_FORWARD_AUTO = 4, 22 | 23 | /*Hand write metal*/ 24 | MNN_FORWARD_METAL = 1, 25 | 26 | /*Use IOS's MPS instead of hand-write metal, Not Support yet*/ 27 | MNN_FORWARD_MPS = 2, 28 | 29 | /*Android / Common Device GPU API*/ 30 | MNN_FORWARD_OPENCL = 3, 31 | MNN_FORWARD_OPENGL = 6, 32 | MNN_FORWARD_VULKAN = 7, 33 | 34 | /*Android 8.1's NNAPI, Not Support yet*/ 35 | MNN_FORWARD_NN = 5, 36 | 37 | /*User can use API from Backend.hpp to add or search Backend*/ 38 | MNN_FORWARD_USER_0 = 8, 39 | MNN_FORWARD_USER_1 = 9, 40 | MNN_FORWARD_USER_2 = 10, 41 | MNN_FORWARD_USER_3 = 11, 42 | 43 | MNN_FORWARD_ALL 44 | } MNNForwardType; 45 | #ifdef __cplusplus 46 | namespace MNN { 47 | struct BackendConfig { 48 | enum MemoryMode { Memory_Normal = 0, Memory_High, Memory_Low }; 49 | 50 | MemoryMode memory = Memory_Normal; 51 | 52 | enum PowerMode { Power_Normal = 0, Power_High, Power_Low }; 53 | 54 | PowerMode power = Power_Normal; 55 | 56 | enum PrecisionMode { Precision_Normal = 0, Precision_High, Precision_Low }; 57 | 58 | PrecisionMode precision = Precision_Normal; 59 | 60 | /** user defined context */ 61 | union { 62 | void* sharedContext = nullptr; 63 | size_t flags; // Valid for CPU Backend 64 | }; 65 | }; 66 | }; // namespace MNN 67 | #endif 68 | #endif /* MNNForwardType_h */ 69 | -------------------------------------------------------------------------------- /MNN/mnn/include/MNNSharedContext.h: -------------------------------------------------------------------------------- 1 | // 2 | // MNNSharedContext.h 3 | // MNN 4 | // 5 | // Created by MNN on 2018/10/11. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef MNNSharedContext_h 10 | #define MNNSharedContext_h 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | #include /*uint32_t*/ 16 | 17 | #ifndef VK_DEFINE_HANDLE 18 | #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; 19 | VK_DEFINE_HANDLE(VkInstance) 20 | VK_DEFINE_HANDLE(VkPhysicalDevice) 21 | VK_DEFINE_HANDLE(VkDevice) 22 | VK_DEFINE_HANDLE(VkQueue) 23 | #endif 24 | struct MNNVulkanContext { 25 | VkInstance pInstance; 26 | VkPhysicalDevice pPhysicalDevice; 27 | VkDevice pDevice; 28 | VkQueue pQueue; 29 | uint32_t iQueueFamilyIndex; 30 | }; 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | #endif /* MNNSharedContext_h */ 36 | -------------------------------------------------------------------------------- /MNN/mnn/include/NonCopyable.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // NonCopyable.hpp 3 | // MNN 4 | // 5 | // Created by MNN on 2018/09/19. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef NonCopyable_hpp 10 | #define NonCopyable_hpp 11 | 12 | namespace MNN { 13 | /** protocol class. used to delete assignment operator. */ 14 | class NonCopyable { 15 | public: 16 | NonCopyable() = default; 17 | NonCopyable(const NonCopyable&) = delete; 18 | NonCopyable(const NonCopyable&&) = delete; 19 | NonCopyable& operator=(const NonCopyable&) = delete; 20 | NonCopyable& operator=(const NonCopyable&&) = delete; 21 | }; 22 | } // namespace MNN 23 | 24 | #endif /* NonCopyable_hpp */ 25 | -------------------------------------------------------------------------------- /MNN/mnn/include/revertMNNModel.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // revertMNNModel.hpp 3 | // MNN 4 | // 5 | // Created by MNN on 2019/01/31. 6 | // Copyright © 2018, Alibaba Group Holding Limited 7 | // 8 | 9 | #ifndef REVERTMNNMODEL_HPP 10 | #define REVERTMNNMODEL_HPP 11 | 12 | #include "mnn/MNN_generated.h" 13 | 14 | class Revert { 15 | public: 16 | Revert(const char* originalModelFileName); 17 | ~Revert(); 18 | void* getBuffer() const; 19 | const size_t getBufferSize() const; 20 | void initialize(); 21 | static float getRandValue(); 22 | 23 | private: 24 | Revert(); 25 | std::unique_ptr mMNNNet; 26 | size_t mBufferSize; 27 | std::shared_ptr mBuffer; 28 | void randStart(); 29 | void packMNNNet(); 30 | }; 31 | 32 | #endif // REVERTMNNMODEL_HPP 33 | -------------------------------------------------------------------------------- /MNN/mnn/lib/libMNN.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/mnn/lib/libMNN.so -------------------------------------------------------------------------------- /MNN/model/version-RFB/RFB-320-quant-ADMM-32.mnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-RFB/RFB-320-quant-ADMM-32.mnn -------------------------------------------------------------------------------- /MNN/model/version-RFB/RFB-320-quant-KL-5792.mnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-RFB/RFB-320-quant-KL-5792.mnn -------------------------------------------------------------------------------- /MNN/model/version-RFB/RFB-320.mnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-RFB/RFB-320.mnn -------------------------------------------------------------------------------- /MNN/model/version-slim/slim-320-quant-ADMM-50.mnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-slim/slim-320-quant-ADMM-50.mnn -------------------------------------------------------------------------------- /MNN/model/version-slim/slim-320.mnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-slim/slim-320.mnn -------------------------------------------------------------------------------- /MNN/python/README.md: -------------------------------------------------------------------------------- 1 | # Python implemententation of [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) with [MNN](https://github.com/alibaba/MNN) 2 | 3 | ## How to use MNN in Python 4 | 5 | ### Install 6 | #### Install Depencies 7 | ##### graphviz 8 | for macOS: 9 | ```bash 10 | brew install graphviz 11 | ``` 12 | for Linux: 13 | ```bash 14 | apt-get install graphviz 15 | ``` 16 | 17 | #### Python Version Limitation 18 | Python2.7, 3.5, 3.6, 3.7 are supported, but for Windows, python2.7 is not supported. 19 | for macOS: 20 | ```bash 21 | pip install -U MNN 22 | ``` 23 | 24 | for Linux: 25 | As PyPi requires all wheels to be tagged with "ManyLinux", and old version pip can't get the "ManyLinux" Tagged wheel, thus you have to upgrade your pip to newer version in order to use "pip install" 26 | ```bash 27 | pip install -U pip 28 | pip install -U MNN 29 | ``` 30 | 31 | ## Run 32 | * Use FP32 model(version-RFB) and run in FP16 mode: 33 | ```bash 34 | python ultraface_py_mnn.py --model_path ../model/version-RFB/RFB-320.mnn 35 | ``` 36 | * Use quantized INT8 model: 37 | ```bash 38 | python ultraface_py_mnn.py --model_path ../model/version-RFB/RFB-320-quant-KL-5792.mnn 39 | ``` 40 | 41 | * We provide both converted MNN FP32 and **quantized INT8** models of version-slim-320 and version-RFB-320 in ./MNN/model . The xxx-quant-KL-xxx.mnn is quantified by the **KL** method and xxx-quant-ADMM-xxx.mnn is quantified by the **ADMM** method. 42 | 43 | 44 | ## PS 45 | * Since MNN mainly accelerates model inference on mobile, so the INT8 quantified model will run slower on **PC** than FP32 model in CPU mode. 46 | * If you want to run faster, try using the version-slim model ,using lower-resolution inputs like 160x120 /128x96 or using quantified models(On the mobile). 47 | 48 | ## Result 49 | ![img1](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/MNN/result.jpg) -------------------------------------------------------------------------------- /MNN/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/result.jpg -------------------------------------------------------------------------------- /MNN/src/UltraFace.hpp: -------------------------------------------------------------------------------- 1 | // Created by Linzaer on 2019/11/15. 2 | // Copyright © 2019 Linzaer. All rights reserved. 3 | 4 | #ifndef UltraFace_hpp 5 | #define UltraFace_hpp 6 | 7 | #pragma once 8 | 9 | #include "Interpreter.hpp" 10 | 11 | #include "MNNDefine.h" 12 | #include "Tensor.hpp" 13 | #include "ImageProcess.hpp" 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define num_featuremap 4 23 | #define hard_nms 1 24 | #define blending_nms 2 /* mix nms was been proposaled in paper blaze face, aims to minimize the temporal jitter*/ 25 | typedef struct FaceInfo { 26 | float x1; 27 | float y1; 28 | float x2; 29 | float y2; 30 | float score; 31 | 32 | } FaceInfo; 33 | 34 | class UltraFace { 35 | public: 36 | UltraFace(const std::string &mnn_path, 37 | int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.7, float iou_threshold_ = 0.3, 38 | int topk_ = -1); 39 | 40 | ~UltraFace(); 41 | 42 | int detect(cv::Mat &img, std::vector &face_list); 43 | 44 | private: 45 | void generateBBox(std::vector &bbox_collection, MNN::Tensor *scores, MNN::Tensor *boxes); 46 | 47 | void nms(std::vector &input, std::vector &output, int type = blending_nms); 48 | 49 | private: 50 | 51 | std::shared_ptr ultraface_interpreter; 52 | MNN::Session *ultraface_session = nullptr; 53 | MNN::Tensor *input_tensor = nullptr; 54 | 55 | int num_thread; 56 | int image_w; 57 | int image_h; 58 | 59 | int in_w; 60 | int in_h; 61 | int num_anchors; 62 | 63 | float score_threshold; 64 | float iou_threshold; 65 | 66 | 67 | const float mean_vals[3] = {127, 127, 127}; 68 | const float norm_vals[3] = {1.0 / 128, 1.0 / 128, 1.0 / 128}; 69 | 70 | const float center_variance = 0.1; 71 | const float size_variance = 0.2; 72 | const std::vector> min_boxes = { 73 | {10.0f, 16.0f, 24.0f}, 74 | {32.0f, 48.0f}, 75 | {64.0f, 96.0f}, 76 | {128.0f, 192.0f, 256.0f}}; 77 | const std::vector strides = {8.0, 16.0, 32.0, 64.0}; 78 | std::vector> featuremap_size; 79 | std::vector> shrinkage_size; 80 | std::vector w_h_list; 81 | 82 | std::vector> priors = {}; 83 | }; 84 | 85 | #endif /* UltraFace_hpp */ 86 | -------------------------------------------------------------------------------- /MNN/src/main.cpp: -------------------------------------------------------------------------------- 1 | // Created by Linzaer on 2019/11/15. 2 | // Copyright © 2019 Linzaer. All rights reserved. 3 | 4 | #include "UltraFace.hpp" 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | int main(int argc, char **argv) { 11 | if (argc <= 2) { 12 | fprintf(stderr, "Usage: %s [image files...]\n", argv[0]); 13 | return 1; 14 | } 15 | 16 | string mnn_path = argv[1]; 17 | UltraFace ultraface(mnn_path, 320, 240, 4, 0.65); // config model input 18 | 19 | for (int i = 2; i < argc; i++) { 20 | string image_file = argv[i]; 21 | cout << "Processing " << image_file << endl; 22 | 23 | cv::Mat frame = cv::imread(image_file); 24 | auto start = chrono::steady_clock::now(); 25 | vector face_info; 26 | ultraface.detect(frame, face_info); 27 | 28 | for (auto face : face_info) { 29 | cv::Point pt1(face.x1, face.y1); 30 | cv::Point pt2(face.x2, face.y2); 31 | cv::rectangle(frame, pt1, pt2, cv::Scalar(0, 255, 0), 2); 32 | } 33 | 34 | auto end = chrono::steady_clock::now(); 35 | chrono::duration elapsed = end - start; 36 | cout << "all time: " << elapsed.count() << " s" << endl; 37 | cv::imshow("UltraFace", frame); 38 | cv::waitKey(); 39 | string result_name = "result" + to_string(i) + ".jpg"; 40 | cv::imwrite(result_name, frame); 41 | } 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /caffe/MyCaffe.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict, Counter 2 | 3 | from caffe.proto import caffe_pb2 4 | from google import protobuf 5 | import six 6 | 7 | def param_name_dict(): 8 | """Find out the correspondence between layer names and parameter names.""" 9 | 10 | layer = caffe_pb2.LayerParameter() 11 | # get all parameter names (typically underscore case) and corresponding 12 | # type names (typically camel case), which contain the layer names 13 | # (note that not all parameters correspond to layers, but we'll ignore that) 14 | param_names = [f.name for f in layer.DESCRIPTOR.fields if f.name.endswith('_param')] 15 | param_type_names = [type(getattr(layer, s)).__name__ for s in param_names] 16 | # strip the final '_param' or 'Parameter' 17 | param_names = [s[:-len('_param')] for s in param_names] 18 | param_type_names = [s[:-len('Parameter')] for s in param_type_names] 19 | return dict(zip(param_type_names, param_names)) 20 | 21 | def assign_proto(proto, name, val): 22 | """Assign a Python object to a protobuf message, based on the Python 23 | type (in recursive fashion). Lists become repeated fields/messages, dicts 24 | become messages, and other types are assigned directly. For convenience, 25 | repeated fields whose values are not lists are converted to single-element 26 | lists; e.g., `my_repeated_int_field=3` is converted to 27 | `my_repeated_int_field=[3]`.""" 28 | 29 | is_repeated_field = hasattr(getattr(proto, name), 'extend') 30 | if is_repeated_field and not isinstance(val, list): 31 | val = [val] 32 | if isinstance(val, list): 33 | if isinstance(val[0], dict): 34 | for item in val: 35 | proto_item = getattr(proto, name).add() 36 | for k, v in six.iteritems(item): 37 | assign_proto(proto_item, k, v) 38 | else: 39 | getattr(proto, name).extend(val) 40 | elif isinstance(val, dict): 41 | for k, v in six.iteritems(val): 42 | assign_proto(getattr(proto, name), k, v) 43 | else: 44 | setattr(proto, name, val) 45 | 46 | class Function(object): 47 | """A Function specifies a layer, its parameters, and its inputs (which 48 | are Tops from other layers).""" 49 | 50 | def __init__(self, type_name, layer_name, inputs,outputs, **params): 51 | self.type_name = type_name 52 | self.inputs = inputs 53 | self.outputs = outputs 54 | self.params = params 55 | self.layer_name = layer_name 56 | self.ntop = self.params.get('ntop', 1) 57 | # use del to make sure kwargs are not double-processed as layer params 58 | if 'ntop' in self.params: 59 | del self.params['ntop'] 60 | self.in_place = self.params.get('in_place', False) 61 | if 'in_place' in self.params: 62 | del self.params['in_place'] 63 | # self.tops = tuple(Top(self, n) for n in range(self.ntop))l 64 | 65 | def _get_name(self, names, autonames): 66 | if self not in names and self.ntop > 0: 67 | names[self] = self._get_top_name(self.tops[0], names, autonames) 68 | elif self not in names: 69 | autonames[self.type_name] += 1 70 | names[self] = self.type_name + str(autonames[self.type_name]) 71 | return names[self] 72 | 73 | def _get_top_name(self, top, names, autonames): 74 | if top not in names: 75 | autonames[top.fn.type_name] += 1 76 | names[top] = top.fn.type_name + str(autonames[top.fn.type_name]) 77 | return names[top] 78 | 79 | def _to_proto(self): 80 | bottom_names = [] 81 | for inp in self.inputs: 82 | # inp._to_proto(layers, names, autonames) 83 | bottom_names.append(inp) 84 | layer = caffe_pb2.LayerParameter() 85 | layer.type = self.type_name 86 | layer.bottom.extend(bottom_names) 87 | 88 | if self.in_place: 89 | layer.top.extend(layer.bottom) 90 | else: 91 | for top in self.outputs: 92 | layer.top.append(top) 93 | layer.name = self.layer_name 94 | # print(self.type_name + "...") 95 | for k, v in six.iteritems(self.params): 96 | # special case to handle generic *params 97 | # print("generating "+k+"...") 98 | 99 | if k.endswith('param'): 100 | assign_proto(layer, k, v) 101 | else: 102 | try: 103 | assign_proto(getattr(layer, 104 | _param_names[self.type_name] + '_param'), k, v) 105 | except (AttributeError, KeyError): 106 | assign_proto(layer, k, v) 107 | 108 | return layer 109 | 110 | class Layers(object): 111 | """A Layers object is a pseudo-module which generates functions that specify 112 | layers; e.g., Layers().Convolution(bottom, kernel_size=3) will produce a Top 113 | specifying a 3x3 convolution applied to bottom.""" 114 | 115 | def __getattr__(self, name): 116 | def layer_fn(*args, **kwargs): 117 | fn = Function(name, args, kwargs) 118 | return fn 119 | return layer_fn 120 | 121 | 122 | 123 | 124 | _param_names = param_name_dict() 125 | 126 | -------------------------------------------------------------------------------- /caffe/README.md: -------------------------------------------------------------------------------- 1 | ## I added several operator(Transpose/Permute/Softmax) conversion support based on [onnx2caffe](https://github.com/MTlab/onnx2caffe). 2 | # Convert pytorch to Caffe by ONNX 3 | This tool converts [pytorch](https://github.com/pytorch/pytorch) model to [Caffe](https://github.com/BVLC/caffe) model by [ONNX](https://github.com/onnx/onnx) 4 | only use for inference 5 | 6 | ### Dependencies 7 | * caffe (with python support) 8 | * pytorch 0.4+ (optional if you only want to convert onnx) 9 | * onnx 10 | 11 | ### Current support operation 12 | * Conv 13 | * ConvTranspose 14 | * BatchNormalization 15 | * MaxPool 16 | * AveragePool 17 | * Relu 18 | * Sigmoid 19 | * Dropout 20 | * Gemm (InnerProduct only) 21 | * Add 22 | * Mul 23 | * Reshape 24 | * Upsample 25 | * Concat 26 | * Flatten 27 | * Transpose/Permute (new) 28 | * Softmax (new) 29 | 30 | ## PS 31 | * You need to use [onnx-simplifier](https://github.com/daquexian/onnx-simplifier) to simplify onnx model and then run convertCaffe.py to convert it into caffe model. 32 | * You need to install [ssd-caffe](https://github.com/weiliu89/caffe/tree/ssd) and pycaffe of ssd-caffe. 33 | -------------------------------------------------------------------------------- /caffe/convertCaffe.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | 5 | import caffe 6 | from caffe.proto import caffe_pb2 7 | import onnx 8 | 9 | caffe.set_mode_cpu() 10 | sys.path.append('../') 11 | from onnx2caffe._transformers import ConvAddFuser, ConstantsToInitializers 12 | from onnx2caffe._graph import Graph 13 | 14 | import onnx2caffe._operators as cvt 15 | import onnx2caffe._weightloader as wlr 16 | from onnx2caffe._error_utils import ErrorHandling 17 | from onnx import shape_inference 18 | 19 | transformers = [ 20 | ConstantsToInitializers(), 21 | ConvAddFuser(), 22 | ] 23 | 24 | 25 | def convertToCaffe(graph, prototxt_save_path, caffe_model_save_path): 26 | exist_edges = [] 27 | layers = [] 28 | exist_nodes = [] 29 | err = ErrorHandling() 30 | for i in graph.inputs: 31 | edge_name = i[0] 32 | input_layer = cvt.make_input(i) 33 | layers.append(input_layer) 34 | exist_edges.append(i[0]) 35 | graph.channel_dims[edge_name] = graph.shape_dict[edge_name][1] 36 | 37 | for id, node in enumerate(graph.nodes): 38 | node_name = node.name 39 | op_type = node.op_type 40 | inputs = node.inputs 41 | inputs_tensor = node.input_tensors 42 | input_non_exist_flag = False 43 | 44 | for inp in inputs: 45 | if inp not in exist_edges and inp not in inputs_tensor: 46 | input_non_exist_flag = True 47 | break 48 | if input_non_exist_flag: 49 | continue 50 | 51 | if op_type not in cvt._ONNX_NODE_REGISTRY: 52 | err.unsupported_op(node) 53 | continue 54 | converter_fn = cvt._ONNX_NODE_REGISTRY[op_type] 55 | layer = converter_fn(node, graph, err) 56 | if type(layer) == tuple: 57 | for l in layer: 58 | layers.append(l) 59 | else: 60 | layers.append(layer) 61 | outs = node.outputs 62 | for out in outs: 63 | exist_edges.append(out) 64 | 65 | net = caffe_pb2.NetParameter() 66 | for id, layer in enumerate(layers): 67 | layers[id] = layer._to_proto() 68 | net.layer.extend(layers) 69 | 70 | with open(prototxt_save_path, 'w') as f: 71 | print(net, file=f) 72 | 73 | caffe.set_mode_cpu() 74 | deploy = prototxt_save_path 75 | net = caffe.Net(deploy, 76 | caffe.TEST) 77 | 78 | for id, node in enumerate(graph.nodes): 79 | node_name = node.name 80 | op_type = node.op_type 81 | inputs = node.inputs 82 | inputs_tensor = node.input_tensors 83 | input_non_exist_flag = False 84 | if op_type not in wlr._ONNX_NODE_REGISTRY: 85 | err.unsupported_op(node) 86 | continue 87 | converter_fn = wlr._ONNX_NODE_REGISTRY[op_type] 88 | converter_fn(net, node, graph, err) 89 | 90 | net.save(caffe_model_save_path) 91 | return net 92 | 93 | 94 | def getGraph(onnx_path): 95 | model = onnx.load(onnx_path) 96 | model = shape_inference.infer_shapes(model) 97 | model_graph = model.graph 98 | graph = Graph.from_onnx(model_graph) 99 | graph = graph.transformed(transformers) 100 | graph.channel_dims = {} 101 | 102 | return graph 103 | 104 | 105 | if __name__ == "__main__": 106 | onnx_path = "../models/onnx/version-RFB-320_simplified.onnx" 107 | prototxt_path = "./RFB-320.prototxt" 108 | caffemodel_path = "./RFB-320.caffemodel" 109 | graph = getGraph(onnx_path) 110 | convertToCaffe(graph, prototxt_path, caffemodel_path) 111 | -------------------------------------------------------------------------------- /caffe/model/RFB-320/RFB-320.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/caffe/model/RFB-320/RFB-320.caffemodel -------------------------------------------------------------------------------- /caffe/model/Slim-320/slim-320.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/caffe/model/Slim-320/slim-320.caffemodel -------------------------------------------------------------------------------- /caffe/onnx2caffe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/caffe/onnx2caffe/__init__.py -------------------------------------------------------------------------------- /caffe/onnx2caffe/_error_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from typing import Dict, Text, Any, Callable 6 | from ._graph import Node, Graph 7 | 8 | class ErrorHandling(object): 9 | ''' 10 | To handle errors and addition of custom layers 11 | ''' 12 | 13 | def __init__(self, 14 | add_custom_layers = False, # type: bool 15 | custom_conversion_functions = dict(), # type: Dict[Text, Any] 16 | custom_layer_nodes = [], # type : List[Node] 17 | ): 18 | # type: (...) -> None 19 | self.add_custom_layers = add_custom_layers 20 | self.custom_conversion_functions = custom_conversion_functions 21 | self.custom_layer_nodes = custom_layer_nodes 22 | 23 | 24 | def unsupported_op(self, 25 | node, # type: Node 26 | ): 27 | # type: (...) -> Callable[[Any, Node, Graph, ErrorHandling], None] 28 | ''' 29 | Either raise an error for an unsupported op type or return custom layer add function 30 | ''' 31 | if self.add_custom_layers: 32 | from ._operators import _convert_custom 33 | return _convert_custom 34 | else: 35 | raise TypeError( 36 | "ONNX node of type {} is not supported.\n".format(node.op_type,) 37 | ) 38 | 39 | 40 | def unsupported_op_configuration(self, 41 | node, # type: Node 42 | err_message, # type: Text 43 | ): 44 | raise TypeError( 45 | "Error while converting op of type: {}. Error message: {}\n".format(node.op_type, err_message, ) 46 | ) 47 | 48 | 49 | def missing_initializer(self, 50 | node, # type: Node 51 | err_message, # type: Text 52 | ): 53 | # type: (...) -> None 54 | ''' 55 | Missing initializer error 56 | ''' 57 | raise ValueError( 58 | "Missing initializer error in op of type {}, with input name = {}, " 59 | "output name = {}. Error message: {}\n". 60 | format(node.op_type, node.inputs[0], node.outputs[0], err_message) 61 | ) 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /cal_flops.py: -------------------------------------------------------------------------------- 1 | """ 2 | Output model complexity 3 | """ 4 | import time 5 | 6 | import torch 7 | from torchstat import stat 8 | from torchsummary import summary 9 | 10 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd 11 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd 12 | 13 | device = "cpu" # default cpu 14 | width = 320 15 | height = 240 16 | 17 | # fd = create_mb_tiny_fd(2) 18 | fd = create_Mb_Tiny_RFB_fd(2) 19 | 20 | print(fd) 21 | fd.eval() 22 | fd.to(device) 23 | x = torch.randn(1, 3, width, height).to(device) 24 | 25 | summary(fd.to("cuda"), (3, width, height)) 26 | 27 | from ptflops import get_model_complexity_info 28 | 29 | flops, params = get_model_complexity_info(fd.to(device), (3, width, height), print_per_layer_stat=True, as_strings=True) 30 | print("FLOPS:", flops) 31 | print("PARAMS:", params) 32 | 33 | for i in range(5): 34 | time_time = time.time() 35 | features = fd(x) 36 | print("inference time :{} s".format(time.time() - time_time)) 37 | 38 | stat(fd, (3, width, height)) 39 | -------------------------------------------------------------------------------- /check_gt_box.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is used to check the data size distribution in the dataset. 3 | """ 4 | import xml.etree.ElementTree as ET 5 | from math import sqrt as sqrt 6 | 7 | import cv2 8 | import matplotlib.pyplot as plt 9 | 10 | # sets = [("./data/wider_face_add_lm_10_10", "trainval")] 11 | sets = [("./data/wider_face_add_lm_10_10", "test")] 12 | 13 | classes = ['face'] 14 | 15 | if __name__ == '__main__': 16 | width = [] 17 | height = [] 18 | 19 | for image_set, set in sets: 20 | image_ids = open('{}/ImageSets/Main/{}.txt'.format(image_set, set)).read().strip().split() 21 | for image_id in image_ids: 22 | img_path = '{}/JPEGImages/{}.jpg'.format(image_set, image_id) 23 | label_file = open('{}/Annotations/{}.xml'.format(image_set, image_id)) 24 | tree = ET.parse(label_file) 25 | root = tree.getroot() 26 | size = root.find('size') 27 | img_w = int(size.find('width').text) 28 | img_h = int(size.find('height').text) 29 | img = cv2.imread(img_path) 30 | for obj in root.iter('object'): 31 | difficult = obj.find('difficult').text 32 | cls = obj.find('name').text 33 | if cls not in classes or int(difficult) == 2: 34 | continue 35 | cls_id = classes.index(cls) 36 | 37 | xmlbox = obj.find('bndbox') 38 | xmin = int(xmlbox.find('xmin').text) 39 | ymin = int(xmlbox.find('ymin').text) 40 | xmax = int(xmlbox.find('xmax').text) 41 | ymax = int(xmlbox.find('ymax').text) 42 | w = xmax - xmin 43 | h = ymax - ymin 44 | 45 | # img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 8) 46 | w_change = (w / img_w) * 320 47 | h_change = (h / img_h) * 240 48 | s = w_change * h_change 49 | if w_change / h_change > 6: 50 | print("{}/{}/{}/{}".format(xmin, xmax, ymin, ymax)) 51 | width.append(sqrt(s)) 52 | height.append(w_change / h_change) 53 | print(img_path) 54 | # img = cv2.resize(img, (608, 608)) 55 | # cv2.imwrite('{}_{}'.format(image_set.split('/')[-1], set), img) 56 | # cv2.waitKey() 57 | 58 | plt.plot(width, height, 'ro') 59 | plt.show() 60 | -------------------------------------------------------------------------------- /convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is used to convert the pytorch model into an onnx format model. 3 | """ 4 | import sys 5 | 6 | import torch.onnx 7 | 8 | from vision.ssd.config.fd_config import define_img_size 9 | 10 | input_img_size = 320 # define input size ,default optional(128/160/320/480/640/1280) 11 | define_img_size(input_img_size) 12 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd 13 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd 14 | 15 | # net_type = "slim" # inference faster,lower precision 16 | net_type = "RFB" # inference lower,higher precision 17 | 18 | label_path = "models/voc-model-labels.txt" 19 | class_names = [name.strip() for name in open(label_path).readlines()] 20 | num_classes = len(class_names) 21 | 22 | if net_type == 'slim': 23 | model_path = "models/pretrained/version-slim-320.pth" 24 | # model_path = "models/pretrained/version-slim-640.pth" 25 | net = create_mb_tiny_fd(len(class_names), is_test=True) 26 | elif net_type == 'RFB': 27 | model_path = "models/pretrained/version-RFB-320.pth" 28 | # model_path = "models/pretrained/version-RFB-640.pth" 29 | net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True) 30 | 31 | else: 32 | print("unsupport network type.") 33 | sys.exit(1) 34 | net.load(model_path) 35 | net.eval() 36 | net.to("cuda") 37 | 38 | model_name = model_path.split("/")[-1].split(".")[0] 39 | model_path = f"models/onnx/{model_name}.onnx" 40 | 41 | dummy_input = torch.randn(1, 3, 240, 320).to("cuda") 42 | # dummy_input = torch.randn(1, 3, 480, 640).to("cuda") #if input size is 640*480 43 | torch.onnx.export(net, dummy_input, model_path, verbose=False, input_names=['input'], output_names=['scores', 'boxes']) 44 | -------------------------------------------------------------------------------- /detect_imgs.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is used to batch detect images in a folder. 3 | """ 4 | import argparse 5 | import os 6 | import sys 7 | 8 | import cv2 9 | 10 | from vision.ssd.config.fd_config import define_img_size 11 | 12 | parser = argparse.ArgumentParser( 13 | description='detect_imgs') 14 | 15 | parser.add_argument('--net_type', default="RFB", type=str, 16 | help='The network architecture ,optional: RFB (higher precision) or slim (faster)') 17 | parser.add_argument('--input_size', default=640, type=int, 18 | help='define network input size,default optional value 128/160/320/480/640/1280') 19 | parser.add_argument('--threshold', default=0.6, type=float, 20 | help='score threshold') 21 | parser.add_argument('--candidate_size', default=1500, type=int, 22 | help='nms candidate size') 23 | parser.add_argument('--path', default="imgs", type=str, 24 | help='imgs dir') 25 | parser.add_argument('--test_device', default="cuda:0", type=str, 26 | help='cuda:0 or cpu') 27 | args = parser.parse_args() 28 | define_img_size(args.input_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor' 29 | 30 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor 31 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor 32 | 33 | result_path = "./detect_imgs_results" 34 | label_path = "./models/voc-model-labels.txt" 35 | test_device = args.test_device 36 | 37 | class_names = [name.strip() for name in open(label_path).readlines()] 38 | if args.net_type == 'slim': 39 | model_path = "models/pretrained/version-slim-320.pth" 40 | # model_path = "models/pretrained/version-slim-640.pth" 41 | net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device) 42 | predictor = create_mb_tiny_fd_predictor(net, candidate_size=args.candidate_size, device=test_device) 43 | elif args.net_type == 'RFB': 44 | model_path = "models/pretrained/version-RFB-320.pth" 45 | # model_path = "models/pretrained/version-RFB-640.pth" 46 | net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device) 47 | predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device) 48 | else: 49 | print("The net type is wrong!") 50 | sys.exit(1) 51 | net.load(model_path) 52 | 53 | if not os.path.exists(result_path): 54 | os.makedirs(result_path) 55 | listdir = os.listdir(args.path) 56 | sum = 0 57 | for file_path in listdir: 58 | img_path = os.path.join(args.path, file_path) 59 | orig_image = cv2.imread(img_path) 60 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 61 | boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold) 62 | sum += boxes.size(0) 63 | for i in range(boxes.size(0)): 64 | box = boxes[i, :] 65 | cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2) 66 | # label = f"""{voc_dataset.class_names[labels[i]]}: {probs[i]:.2f}""" 67 | label = f"{probs[i]:.2f}" 68 | # cv2.putText(orig_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) 69 | cv2.putText(orig_image, str(boxes.size(0)), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) 70 | cv2.imwrite(os.path.join(result_path, file_path), orig_image) 71 | print(f"Found {len(probs)} faces. The output image is {result_path}") 72 | print(sum) 73 | -------------------------------------------------------------------------------- /detect_imgs_onnx.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code uses the onnx model to detect faces from live video or cameras. 3 | """ 4 | import os 5 | import time 6 | 7 | import cv2 8 | import numpy as np 9 | import onnx 10 | import vision.utils.box_utils_numpy as box_utils 11 | from caffe2.python.onnx import backend 12 | 13 | # onnx runtime 14 | import onnxruntime as ort 15 | 16 | 17 | def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1): 18 | boxes = boxes[0] 19 | confidences = confidences[0] 20 | picked_box_probs = [] 21 | picked_labels = [] 22 | for class_index in range(1, confidences.shape[1]): 23 | probs = confidences[:, class_index] 24 | mask = probs > prob_threshold 25 | probs = probs[mask] 26 | if probs.shape[0] == 0: 27 | continue 28 | subset_boxes = boxes[mask, :] 29 | box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) 30 | box_probs = box_utils.hard_nms(box_probs, 31 | iou_threshold=iou_threshold, 32 | top_k=top_k, 33 | ) 34 | picked_box_probs.append(box_probs) 35 | picked_labels.extend([class_index] * box_probs.shape[0]) 36 | if not picked_box_probs: 37 | return np.array([]), np.array([]), np.array([]) 38 | picked_box_probs = np.concatenate(picked_box_probs) 39 | picked_box_probs[:, 0] *= width 40 | picked_box_probs[:, 1] *= height 41 | picked_box_probs[:, 2] *= width 42 | picked_box_probs[:, 3] *= height 43 | return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4] 44 | 45 | 46 | label_path = "models/voc-model-labels.txt" 47 | 48 | onnx_path = "models/onnx/version-RFB-320.onnx" 49 | class_names = [name.strip() for name in open(label_path).readlines()] 50 | 51 | predictor = onnx.load(onnx_path) 52 | onnx.checker.check_model(predictor) 53 | onnx.helper.printable_graph(predictor.graph) 54 | predictor = backend.prepare(predictor, device="CPU") # default CPU 55 | 56 | ort_session = ort.InferenceSession(onnx_path) 57 | input_name = ort_session.get_inputs()[0].name 58 | result_path = "./detect_imgs_results_onnx" 59 | 60 | threshold = 0.7 61 | path = "imgs" 62 | sum = 0 63 | if not os.path.exists(result_path): 64 | os.makedirs(result_path) 65 | listdir = os.listdir(path) 66 | sum = 0 67 | for file_path in listdir: 68 | img_path = os.path.join(path, file_path) 69 | orig_image = cv2.imread(img_path) 70 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 71 | image = cv2.resize(image, (320, 240)) 72 | # image = cv2.resize(image, (640, 480)) 73 | image_mean = np.array([127, 127, 127]) 74 | image = (image - image_mean) / 128 75 | image = np.transpose(image, [2, 0, 1]) 76 | image = np.expand_dims(image, axis=0) 77 | image = image.astype(np.float32) 78 | # confidences, boxes = predictor.run(image) 79 | time_time = time.time() 80 | confidences, boxes = ort_session.run(None, {input_name: image}) 81 | print("cost time:{}".format(time.time() - time_time)) 82 | boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, threshold) 83 | for i in range(boxes.shape[0]): 84 | box = boxes[i, :] 85 | label = f"{class_names[labels[i]]}: {probs[i]:.2f}" 86 | 87 | cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4) 88 | 89 | # cv2.putText(orig_image, label, 90 | # (box[0] + 20, box[1] + 40), 91 | # cv2.FONT_HERSHEY_SIMPLEX, 92 | # 1, # font scale 93 | # (255, 0, 255), 94 | # 2) # line type 95 | cv2.imwrite(os.path.join(result_path, file_path), orig_image) 96 | sum += boxes.shape[0] 97 | print("sum:{}".format(sum)) 98 | -------------------------------------------------------------------------------- /imgs/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/1.jpg -------------------------------------------------------------------------------- /imgs/10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/10.jpg -------------------------------------------------------------------------------- /imgs/11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/11.jpg -------------------------------------------------------------------------------- /imgs/12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/12.jpg -------------------------------------------------------------------------------- /imgs/13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/13.jpg -------------------------------------------------------------------------------- /imgs/15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/15.jpg -------------------------------------------------------------------------------- /imgs/16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/16.jpg -------------------------------------------------------------------------------- /imgs/17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/17.jpg -------------------------------------------------------------------------------- /imgs/18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/18.jpg -------------------------------------------------------------------------------- /imgs/19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/19.jpg -------------------------------------------------------------------------------- /imgs/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/2.jpg -------------------------------------------------------------------------------- /imgs/20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/20.jpg -------------------------------------------------------------------------------- /imgs/21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/21.jpg -------------------------------------------------------------------------------- /imgs/22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/22.jpg -------------------------------------------------------------------------------- /imgs/23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/23.jpg -------------------------------------------------------------------------------- /imgs/24.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/24.jpg -------------------------------------------------------------------------------- /imgs/25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/25.jpg -------------------------------------------------------------------------------- /imgs/26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/26.jpg -------------------------------------------------------------------------------- /imgs/27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/27.jpg -------------------------------------------------------------------------------- /imgs/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/3.jpg -------------------------------------------------------------------------------- /imgs/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/5.jpg -------------------------------------------------------------------------------- /imgs/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/6.jpg -------------------------------------------------------------------------------- /imgs/8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/8.jpg -------------------------------------------------------------------------------- /imgs/9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/9.jpg -------------------------------------------------------------------------------- /masked_face/README.md: -------------------------------------------------------------------------------- 1 | # Masked Face Detection 2 | 3 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img4.jpeg) 4 | 5 | # Extending Ultra-L face model for masked facial detection 6 | 7 | Ultra-L face detection model achieves great popularity in edge and client based applications. It has a surprising balance of model size and accuracy performance, e.g., 8 | - The default FP32 *.pth model size is **1.04~1.1MB**, and the inference framework int8 quantization size is about **300KB**. 9 | - Only **90~109 MFlops** for 320x240 input resolution. 10 | - Supported inference code for [NCNN](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/ncnn), [MNN](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/MNN), [INT8](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/MNN/model), 11 | [Onnx](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/caffe), [OpencvDNN](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/caffe/ultra_face_opencvdnn_inference.py), etc. 12 | 13 | COVID-19 has ravaged the world in the past two years, and wearing masks has become the norm in our lives on many occasions. However, most traditional face datasets such as Wider Face currently lack face samples with masks. Therefore, the face detection model based on conventional datasets will fail in the scenario where all attendants wear masks. 14 | [Face-Mask-Detection](https://github.com/chandrikadeb7/Face-Mask-Detection) is the most popular face detection model we can find on Github that supports Mask detection. 15 | However, this model is trained only using 4095 images (2165 masked / 1930 without mask), which is a pretty small dataset. 16 | We will experience many false positives in the actual application scenarios. 17 | 18 | This original intention inspired me to build a larger dataset to provide better open-source masked facial detection models and help the world survive the pandemic. 19 | The main contribution of this project is to provide balanced facial training data combining the [wider_face_add_lm_10_10](https://drive.google.com/open?id=1OBY-Pk5hkcVBX1dRBOeLI4e4OCvqJRnH) and [MAFA face](https://imsg.ac.cn/research/maskedface.html) dataset. The [MAFA](https://imsg.ac.cn/research/maskedface.html) data was converted to pascal-VOC format and merged into the [wider_face_add_lm_10_10](https://drive.google.com/open?id=1OBY-Pk5hkcVBX1dRBOeLI4e4OCvqJRnH). 20 | 21 | ## About the WIDER_MAFA_Balanced dataset 22 | The *Wider_MAFA_Balanced* dataset (**4.8GB**) can be downloaded at [HPI owncloud](https://owncloud.hpi.de/s/L4MUGqrpeENLbSv). 23 | It contains 38225 images in total where 31084 for training and 7141 for testing, respectively. 24 | The specific composition information is shown in the following table: 25 | 26 | Source| Class | Train | Test |Total| 27 | ----|------|-------|------|----- 28 | MAFA face| masked_face | 15542 | 3922 | 19464 | 29 | Wider face| face | 12859 | 3219 | 16078 | 30 | *MAFA human body* | face | 2683 | 0 |2683 31 | 32 | *MAFA human body* indicates the extracted training samples with human body occlusions. 33 | 34 | I use this script for converting MAFA data format to pascal VOC: 35 | ```Shell 36 | masked_face/mafa2voc.py 37 | ``` 38 | 39 | ## About the pre-trained models 40 | ```Shell 41 | masked_face/ 42 | pretrained/ 43 | RFB-320-masked_face-v2.pth # trained with 320x240 44 | RFB-640-masked_face-v2.pth # trained with 640x480 45 | RFB-640-masked_face-v2.onnx # suitable for 640x480 46 | RFB-1280-masked_face-v2.onnx # suitable for 1280x960 47 | ``` 48 | 49 | ## Detection Result (input resolution: 1280x960) 50 | 51 | The following visual results are created by using this script: 52 | ```Shell 53 | masked_face/detect_imgs.py 54 | ``` 55 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img1.jpeg) 56 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img2.jpeg) 57 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img3.jpg) 58 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img5.jpeg) 59 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img6.webp) 60 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img7.webp) 61 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img8.jpeg) 62 | 63 | Author: Haojin Yang 64 | -------------------------------------------------------------------------------- /masked_face/detect_imgs.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is used to batch detect images in a folder. 3 | """ 4 | import os, sys 5 | currentdir = os.path.dirname(os.path.realpath(__file__)) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.append(parentdir) 8 | 9 | import argparse 10 | import os 11 | import sys 12 | 13 | import cv2 14 | 15 | from vision.ssd.config.fd_config import define_img_size 16 | 17 | parser = argparse.ArgumentParser( 18 | description='detect_imgs') 19 | 20 | parser.add_argument('--net_type', default="RFB", type=str, 21 | help='The network architecture ,optional: RFB (higher precision) or slim (faster)') 22 | parser.add_argument('--input_size', default=1280, type=int, 23 | help='define network input size,default optional value 128/160/320/480/640/1280') 24 | parser.add_argument('--threshold', default=0.3, type=float, 25 | help='score threshold') 26 | parser.add_argument('--candidate_size', default=1200, type=int, 27 | help='nms candidate size') 28 | parser.add_argument('--path', default="imgs", type=str, 29 | help='imgs dir') 30 | parser.add_argument('--test_device', default="cpu", type=str, 31 | help='cuda:0 or cpu') 32 | args = parser.parse_args() 33 | define_img_size(args.input_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor' 34 | 35 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor 36 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor 37 | 38 | result_path = "detect_imgs_results" 39 | label_path = "./voc-model-labels.txt" 40 | test_device = args.test_device 41 | 42 | class_names = [name.strip() for name in open(label_path).readlines()] 43 | 44 | if args.net_type == 'RFB': 45 | model_path = "pretrained/RFB-640-masked_face-v2.pth" 46 | net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device) 47 | predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device) 48 | else: 49 | print("The net type is wrong!") 50 | sys.exit(1) 51 | net.load(model_path) 52 | 53 | if not os.path.exists(result_path): 54 | os.makedirs(result_path) 55 | listdir = os.listdir(args.path) 56 | sum = 0 57 | for file_path in listdir: 58 | img_path = os.path.join(args.path, file_path) 59 | orig_image = cv2.imread(img_path) 60 | if orig_image is None: continue 61 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 62 | boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold) 63 | sum += boxes.size(0) 64 | for i in range(boxes.size(0)): 65 | box = boxes[i, :] 66 | label_index = labels[i].item() 67 | cv2.rectangle(orig_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 165, 255), 2) 68 | # label = f"""{voc_dataset.class_names[labels[i]]}: {probs[i]:.2f}""" 69 | label = f"{probs[i]:.2f}" 70 | # cv2.putText(orig_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) 71 | cv2.putText(orig_image, class_names[label_index], 72 | (int(box[0]), int(box[1]) - 10), 73 | cv2.FONT_HERSHEY_SIMPLEX, 74 | 0.5, # font scale 75 | (100, 0, 255), 76 | 1) # line type 77 | cv2.putText(orig_image, str(boxes.size(0)), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) 78 | cv2.imwrite(os.path.join(result_path, file_path), orig_image) 79 | print(f"Found {len(probs)} faces. The output image is {result_path}") 80 | print(sum) 81 | -------------------------------------------------------------------------------- /masked_face/imgs/img1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img1.jpeg -------------------------------------------------------------------------------- /masked_face/imgs/img2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img2.jpeg -------------------------------------------------------------------------------- /masked_face/imgs/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img3.jpg -------------------------------------------------------------------------------- /masked_face/imgs/img4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img4.jpeg -------------------------------------------------------------------------------- /masked_face/imgs/img5.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img5.jpeg -------------------------------------------------------------------------------- /masked_face/imgs/img6.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img6.webp -------------------------------------------------------------------------------- /masked_face/imgs/img7.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img7.webp -------------------------------------------------------------------------------- /masked_face/imgs/img8.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img8.jpeg -------------------------------------------------------------------------------- /masked_face/pretrained/RFB-1280-masked_face-v2.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-1280-masked_face-v2.onnx -------------------------------------------------------------------------------- /masked_face/pretrained/RFB-320-masked_face-v2.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-320-masked_face-v2.pth -------------------------------------------------------------------------------- /masked_face/pretrained/RFB-640-masked_face-v2.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-640-masked_face-v2.onnx -------------------------------------------------------------------------------- /masked_face/pretrained/RFB-640-masked_face-v2.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-640-masked_face-v2.pth -------------------------------------------------------------------------------- /masked_face/readme_imgs/img1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img1.jpeg -------------------------------------------------------------------------------- /masked_face/readme_imgs/img2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img2.jpeg -------------------------------------------------------------------------------- /masked_face/readme_imgs/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img3.jpg -------------------------------------------------------------------------------- /masked_face/readme_imgs/img4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img4.jpeg -------------------------------------------------------------------------------- /masked_face/readme_imgs/img5.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img5.jpeg -------------------------------------------------------------------------------- /masked_face/readme_imgs/img6.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img6.webp -------------------------------------------------------------------------------- /masked_face/readme_imgs/img7.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img7.webp -------------------------------------------------------------------------------- /masked_face/readme_imgs/img8.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img8.jpeg -------------------------------------------------------------------------------- /masked_face/voc-model-labels.txt: -------------------------------------------------------------------------------- 1 | BACKGROUND 2 | face 3 | masked_face -------------------------------------------------------------------------------- /models/onnx/version-RFB-320.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-320.onnx -------------------------------------------------------------------------------- /models/onnx/version-RFB-320_simplified.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-320_simplified.onnx -------------------------------------------------------------------------------- /models/onnx/version-RFB-320_without_postprocessing.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-320_without_postprocessing.onnx -------------------------------------------------------------------------------- /models/onnx/version-RFB-640.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-640.onnx -------------------------------------------------------------------------------- /models/onnx/version-slim-320.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-slim-320.onnx -------------------------------------------------------------------------------- /models/onnx/version-slim-320_simplified.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-slim-320_simplified.onnx -------------------------------------------------------------------------------- /models/onnx/version-slim-320_without_postprocessing.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-slim-320_without_postprocessing.onnx -------------------------------------------------------------------------------- /models/pretrained/version-RFB-320.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-RFB-320.pth -------------------------------------------------------------------------------- /models/pretrained/version-RFB-640.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-RFB-640.pth -------------------------------------------------------------------------------- /models/pretrained/version-slim-320.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-slim-320.pth -------------------------------------------------------------------------------- /models/pretrained/version-slim-640.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-slim-640.pth -------------------------------------------------------------------------------- /models/readme: -------------------------------------------------------------------------------- 1 | A place to store your models. 2 | 3 | ./pretrained: 4 | version-RFB-320.pth : train with img input size 320x240 5 | version-RFB-640.pth : train with img input size 640x480 6 | version-slim-320.pth : train with img input size 320x240 7 | version-slim-640.pth : train with img input size 640x480 8 | ./onnx 9 | version-RFB-320.onnx :version-RFB / train with 320x240/ with postprocessing 10 | version-RFB-320_simplified.onnx :version-RFB / train with 320x240/ without postprocessing /for ncnn & mnn 11 | version-RFB-320_without_postprocessing.onnx :version-RFB / train with 320x240/ without postprocessing 12 | version-slim-320.onnx :version-slim/ train with 320x240/ with postprocessing 13 | version-slim-320_simplified.onnx :version-slim/ train with 320x240/ without postprocessing /for ncnn & mnn 14 | version-slim-320_without_postprocessing.onnx :version-slim/ train with 320x240/ without postprocessing -------------------------------------------------------------------------------- /models/voc-model-labels.txt: -------------------------------------------------------------------------------- 1 | BACKGROUND 2 | face -------------------------------------------------------------------------------- /ncnn/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | BreakBeforeBraces: Linux 4 | IndentWidth: '4' 5 | 6 | ... 7 | -------------------------------------------------------------------------------- /ncnn/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | set(CMAKE_CXX_FLAGS "-Wall") 4 | set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") 5 | set(CMAKE_CXX_FLAGS_DEBUG "-g") 6 | set(CMAKE_CXX_STANDARD 11) 7 | 8 | if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) 9 | message(STATUS "No build type selected, default to Release") 10 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Debug)" FORCE) 11 | endif() 12 | 13 | project(Ultra-Light-Fast-Generic-Face-Detector-1MB) 14 | 15 | add_subdirectory(3rdparty/ncnn ncnn_build) 16 | include_directories(3rdparty/ncnn/src) 17 | 18 | find_package(OpenCV REQUIRED) 19 | 20 | add_executable(main src/main.cpp src/UltraFace.cpp) 21 | target_link_libraries(main ncnn ${OpenCV_LIBS}) 22 | -------------------------------------------------------------------------------- /ncnn/README.md: -------------------------------------------------------------------------------- 1 | # C++ implemententation of [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) with [NCNN](https://github.com/Tencent/ncnn) 2 | 3 | ## Build 4 | 5 | ```bash 6 | git clone --recursive --depth=1 https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB 7 | 8 | cd Ultra-Light-Fast-Generic-Face-Detector-1MB/ncnn 9 | 10 | mkdir build && cd build && cmake .. 11 | make -j$(nproc) 12 | ``` 13 | 14 | ## Run 15 | 16 | ```bash 17 | ./main ../data/version-RFB/RFB-320.bin ../data/version-RFB/RFB-320.param ../data/test.jpg 18 | ``` 19 | * We provide converted NCNN models of version-slim-320 and version-RFB-320 in ./ncnn/data . 20 | 21 | ## How to convert pretrained model to ncnn 22 | 23 | * Code bellow (```vision/ssd/ssd.py```) should be commented out when convert pytorch pretrained model to onnx. Comment it out and use the **convert_to_onnx.py** in official repo to finish this step. 24 | 25 | ```python 26 | if self.is_test: 27 | confidences = F.softmax(confidences, dim=2) 28 | boxes = locations # this line should be added. 29 | #boxes = box_utils.convert_locations_to_boxes( 30 | # locations, self.priors, self.config.center_variance, self.config.size_variance 31 | #) 32 | # boxes = box_utils.center_form_to_corner_form(boxes) # these lines should be commented out. detail information and analyze comming soon. 33 | return confidences, boxes 34 | else: 35 | return confidences, locations 36 | ``` 37 | Then you can generate the onnx model like **version-RFB-320_without_postprocessing.onnx** in onnx directory. (You need to rename your model when convert.) 38 | * But the exported onnx model may contains many redundant operators such as Shape, Gather and Unsqueeze that is not supported in ncnn. 39 | 40 | ``` 41 | Shape not supported yet! 42 | Gather not supported yet! 43 | # axis=0 44 | Unsqueeze not supported yet! 45 | # axes 7 46 | Unsqueeze not supported yet! 47 | # axes 7 48 | ``` 49 | 50 | Fortunately, we can use this tool to eliminate them : 51 | https://github.com/daquexian/onnx-simplifier 52 | 53 | ``` 54 | python3 -m onnxsim version-RFB-320_without_postprocessing.onnx version-RFB-320_simplified.onnx 55 | 56 | ``` 57 | 58 | Next, you can convert this onnx model like **version-RFB-320_simplified.onnx** into a ncnn model. Here is a website for online conversion : https://convertmodel.com/?tdsourcetag=s_pctim_aiomsg. You can also use the NCNN compiled conversion tool **onnx2ncnn**. 59 | 60 | ## PS 61 | * If you want to run faster, try using the version-slim model or using lower-resolution inputs like 160x120 or 128x96. 62 | 63 | ## Result 64 | ![img1](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/ncnn/data/result.jpg) -------------------------------------------------------------------------------- /ncnn/data/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/result.jpg -------------------------------------------------------------------------------- /ncnn/data/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/test.jpg -------------------------------------------------------------------------------- /ncnn/data/version-RFB/RFB-320.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/version-RFB/RFB-320.bin -------------------------------------------------------------------------------- /ncnn/data/version-slim/slim_320.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/version-slim/slim_320.bin -------------------------------------------------------------------------------- /ncnn/src/UltraFace.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // UltraFace.hpp 3 | // UltraFaceTest 4 | // 5 | // Created by vealocia on 2019/10/17. 6 | // Copyright © 2019 vealocia. All rights reserved. 7 | // 8 | 9 | #ifndef UltraFace_hpp 10 | #define UltraFace_hpp 11 | 12 | #pragma once 13 | 14 | #include "gpu.h" 15 | #include "net.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define num_featuremap 4 22 | #define hard_nms 1 23 | #define blending_nms 2 /* mix nms was been proposaled in paper blaze face, aims to minimize the temporal jitter*/ 24 | 25 | typedef struct FaceInfo { 26 | float x1; 27 | float y1; 28 | float x2; 29 | float y2; 30 | float score; 31 | 32 | float *landmarks; 33 | } FaceInfo; 34 | 35 | class UltraFace { 36 | public: 37 | UltraFace(const std::string &bin_path, const std::string ¶m_path, 38 | int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.7, float iou_threshold_ = 0.3, int topk_ = -1); 39 | 40 | ~UltraFace(); 41 | 42 | int detect(ncnn::Mat &img, std::vector &face_list); 43 | 44 | private: 45 | void generateBBox(std::vector &bbox_collection, ncnn::Mat scores, ncnn::Mat boxes, float score_threshold, int num_anchors); 46 | 47 | void nms(std::vector &input, std::vector &output, int type = blending_nms); 48 | 49 | private: 50 | ncnn::Net ultraface; 51 | 52 | int num_thread; 53 | int image_w; 54 | int image_h; 55 | 56 | int in_w; 57 | int in_h; 58 | int num_anchors; 59 | 60 | int topk; 61 | float score_threshold; 62 | float iou_threshold; 63 | 64 | 65 | const float mean_vals[3] = {127, 127, 127}; 66 | const float norm_vals[3] = {1.0 / 128, 1.0 / 128, 1.0 / 128}; 67 | 68 | const float center_variance = 0.1; 69 | const float size_variance = 0.2; 70 | const std::vector> min_boxes = { 71 | {10.0f, 16.0f, 24.0f}, 72 | {32.0f, 48.0f}, 73 | {64.0f, 96.0f}, 74 | {128.0f, 192.0f, 256.0f}}; 75 | const std::vector strides = {8.0, 16.0, 32.0, 64.0}; 76 | std::vector> featuremap_size; 77 | std::vector> shrinkage_size; 78 | std::vector w_h_list; 79 | 80 | std::vector> priors = {}; 81 | }; 82 | 83 | #endif /* UltraFace_hpp */ 84 | -------------------------------------------------------------------------------- /ncnn/src/main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // main.cpp 3 | // UltraFaceTest 4 | // 5 | // Created by vealocia on 2019/10/17. 6 | // Copyright © 2019 vealocia. All rights reserved. 7 | // 8 | 9 | #include "UltraFace.hpp" 10 | #include 11 | #include 12 | 13 | int main(int argc, char **argv) { 14 | if (argc <= 3) { 15 | fprintf(stderr, "Usage: %s [image files...]\n", argv[0]); 16 | return 1; 17 | } 18 | 19 | std::string bin_path = argv[1]; 20 | std::string param_path = argv[2]; 21 | UltraFace ultraface(bin_path, param_path, 320, 240, 1, 0.7); // config model input 22 | 23 | for (int i = 3; i < argc; i++) { 24 | std::string image_file = argv[i]; 25 | std::cout << "Processing " << image_file << std::endl; 26 | 27 | cv::Mat frame = cv::imread(image_file); 28 | ncnn::Mat inmat = ncnn::Mat::from_pixels(frame.data, ncnn::Mat::PIXEL_BGR2RGB, frame.cols, frame.rows); 29 | 30 | std::vector face_info; 31 | ultraface.detect(inmat, face_info); 32 | 33 | for (int i = 0; i < face_info.size(); i++) { 34 | auto face = face_info[i]; 35 | cv::Point pt1(face.x1, face.y1); 36 | cv::Point pt2(face.x2, face.y2); 37 | cv::rectangle(frame, pt1, pt2, cv::Scalar(0, 255, 0), 2); 38 | } 39 | 40 | cv::imshow("UltraFace", frame); 41 | cv::waitKey(); 42 | cv::imwrite("result.jpg", frame); 43 | } 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /opencv_dnn/cv_dnn_ultraface.h: -------------------------------------------------------------------------------- 1 | #ifndef UltraFace_hpp 2 | #define UltraFace_hpp 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "opencv2/opencv.hpp" 11 | 12 | #define num_featuremap 4 13 | #define hard_nms 1 14 | #define blending_nms 2 /* mix nms was been proposaled in paper blaze face, aims to minimize the temporal jitter*/ 15 | 16 | typedef struct FaceInfo { 17 | float x1; 18 | float y1; 19 | float x2; 20 | float y2; 21 | float score; 22 | 23 | float landmarks[10]; 24 | } FaceInfo; 25 | 26 | class UltraFace { 27 | public: 28 | UltraFace(const std::string model_path, 29 | int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.7, float iou_threshold_ = 0.3, int topk_ = -1); 30 | 31 | ~UltraFace(); 32 | 33 | int detect(cv::Mat &img, std::vector &face_list); 34 | 35 | private: 36 | void generateBBox(std::vector &bbox_collection, cv::Mat scores, cv::Mat boxes, float score_threshold, int num_anchors); 37 | 38 | void nms(std::vector &input, std::vector &output, int type = blending_nms); 39 | 40 | private: 41 | cv::dnn::Net ultraface; 42 | 43 | int num_thread; 44 | int image_w; 45 | int image_h; 46 | 47 | int in_w; 48 | int in_h; 49 | int num_anchors; 50 | 51 | int topk; 52 | float score_threshold; 53 | float iou_threshold; 54 | 55 | 56 | const float mean_vals[3] = { 127, 127, 127 }; 57 | const float norm_vals[3] = { 1.0 / 128, 1.0 / 128, 1.0 / 128 }; 58 | 59 | const float center_variance = 0.1; 60 | const float size_variance = 0.2; 61 | const std::vector> min_boxes = { 62 | {10.0f, 16.0f, 24.0f}, 63 | {32.0f, 48.0f}, 64 | {64.0f, 96.0f}, 65 | {128.0f, 192.0f, 256.0f} }; 66 | const std::vector strides = { 8.0, 16.0, 32.0, 64.0 }; 67 | std::vector> featuremap_size; 68 | std::vector> shrinkage_size; 69 | std::vector w_h_list; 70 | 71 | std::vector> priors = {}; 72 | }; 73 | 74 | #endif /* UltraFace_hpp */ -------------------------------------------------------------------------------- /paddle/data: -------------------------------------------------------------------------------- 1 | ../data -------------------------------------------------------------------------------- /paddle/train-version-RFB.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | model_root_path="./models/train-version-RFB" 3 | log_dir="$model_root_path/logs" 4 | log="$log_dir/log" 5 | mkdir -p "$log_dir" 6 | 7 | python3 -u train.py \ 8 | --datasets \ 9 | ./data/wider_face_add_lm_10_10 \ 10 | --validation_dataset \ 11 | ./data/wider_face_add_lm_10_10 \ 12 | --net \ 13 | RFB \ 14 | --num_epochs \ 15 | 200 \ 16 | --milestones \ 17 | "95,150" \ 18 | --lr \ 19 | 1e-2 \ 20 | --batch_size \ 21 | 24 \ 22 | --input_size \ 23 | 320 \ 24 | --checkpoint_folder \ 25 | ${model_root_path} \ 26 | --num_workers \ 27 | 4 \ 28 | --log_dir \ 29 | ${log_dir} \ 30 | --cuda_index \ 31 | 0 \ 32 | 2>&1 | tee "$log" 33 | -------------------------------------------------------------------------------- /paddle/train-version-slim.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | model_root_path="./models/train-version-slim" 3 | log_dir="$model_root_path/logs" 4 | log="$log_dir/log" 5 | mkdir -p "$log_dir" 6 | 7 | python3 -u train.py \ 8 | --datasets \ 9 | ./data/wider_face_add_lm_10_10 \ 10 | --validation_dataset \ 11 | ./data/wider_face_add_lm_10_10 \ 12 | --net \ 13 | slim \ 14 | --num_epochs \ 15 | 200 \ 16 | --milestones \ 17 | "95,150" \ 18 | --lr \ 19 | 1e-2 \ 20 | --batch_size \ 21 | 24 \ 22 | --input_size \ 23 | 320 \ 24 | --checkpoint_folder \ 25 | ${model_root_path} \ 26 | --num_workers \ 27 | 4 \ 28 | --log_dir \ 29 | ${log_dir} \ 30 | --cuda_index \ 31 | 0 \ 32 | 2>&1 | tee "$log" 33 | -------------------------------------------------------------------------------- /paddle/vision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/__init__.py -------------------------------------------------------------------------------- /paddle/vision/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/datasets/__init__.py -------------------------------------------------------------------------------- /paddle/vision/datasets/voc_dataset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pathlib 4 | import xml.etree.ElementTree as ET 5 | 6 | import cv2 7 | import numpy as np 8 | from paddle.io import Dataset 9 | 10 | class VOCDataset(Dataset): 11 | def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False): 12 | """Dataset for VOC data. 13 | Args: 14 | root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 15 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 16 | """ 17 | super(VOCDataset, self).__init__() 18 | self.root = pathlib.Path(root) 19 | self.transform = transform 20 | self.target_transform = target_transform 21 | if is_test: 22 | image_sets_file = self.root / "ImageSets/Main/test.txt" 23 | else: 24 | image_sets_file = self.root / "ImageSets/Main/trainval.txt" 25 | self.ids = VOCDataset._read_image_ids(image_sets_file) 26 | self.keep_difficult = keep_difficult 27 | 28 | # if the labels file exists, read in the class names 29 | label_file_name = self.root / "labels.txt" 30 | 31 | if os.path.isfile(label_file_name): 32 | class_string = "" 33 | with open(label_file_name, 'r') as infile: 34 | for line in infile: 35 | class_string += line.rstrip() 36 | 37 | # classes should be a comma separated list 38 | classes = class_string.split(',') 39 | # prepend BACKGROUND as first class 40 | classes.insert(0, 'BACKGROUND') 41 | classes = [elem.replace(" ", "") for elem in classes] 42 | self.class_names = tuple(classes) 43 | logging.info("VOC Labels read from file: " + str(self.class_names)) 44 | 45 | else: 46 | logging.info("No labels file, using default VOC classes.") 47 | self.class_names = ('BACKGROUND', 48 | 'face') 49 | 50 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 51 | 52 | def __getitem__(self, index): 53 | image_id = self.ids[index] 54 | boxes, labels, is_difficult = self._get_annotation(image_id) 55 | if not self.keep_difficult: 56 | boxes = boxes[is_difficult == 0] 57 | labels = labels[is_difficult == 0] 58 | image = self._read_image(image_id) 59 | if self.transform: 60 | image, boxes, labels = self.transform(image, boxes, labels) 61 | if self.target_transform: 62 | boxes, labels = self.target_transform(boxes, labels) 63 | return image, boxes, labels 64 | 65 | def get_image(self, index): 66 | image_id = self.ids[index] 67 | image = self._read_image(image_id) 68 | if self.transform: 69 | image, _ = self.transform(image) 70 | return image 71 | 72 | def get_annotation(self, index): 73 | image_id = self.ids[index] 74 | return image_id, self._get_annotation(image_id) 75 | 76 | def __len__(self): 77 | return len(self.ids) 78 | 79 | @staticmethod 80 | def _read_image_ids(image_sets_file): 81 | ids = [] 82 | with open(image_sets_file) as f: 83 | for line in f: 84 | ids.append(line.rstrip()) 85 | return ids 86 | 87 | def _get_annotation(self, image_id): 88 | annotation_file = self.root / f"Annotations/{image_id}.xml" 89 | objects = ET.parse(annotation_file).findall("object") 90 | boxes = [] 91 | labels = [] 92 | is_difficult = [] 93 | for object in objects: 94 | class_name = object.find('name').text.lower().strip() 95 | # we're only concerned with clases in our list 96 | if class_name in self.class_dict: 97 | bbox = object.find('bndbox') 98 | 99 | # VOC dataset format follows Matlab, in which indexes start from 0 100 | x1 = float(bbox.find('xmin').text) - 1 101 | y1 = float(bbox.find('ymin').text) - 1 102 | x2 = float(bbox.find('xmax').text) - 1 103 | y2 = float(bbox.find('ymax').text) - 1 104 | boxes.append([x1, y1, x2, y2]) 105 | 106 | labels.append(self.class_dict[class_name]) 107 | is_difficult_str = object.find('difficult').text 108 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 109 | 110 | return (np.array(boxes, dtype=np.float32), 111 | np.array(labels, dtype=np.int64), 112 | np.array(is_difficult, dtype=np.uint8)) 113 | 114 | def _read_image(self, image_id): 115 | image_file = self.root / f"JPEGImages/{image_id}.jpg" 116 | image = cv2.imread(str(image_file)) 117 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 118 | return image 119 | -------------------------------------------------------------------------------- /paddle/vision/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/nn/__init__.py -------------------------------------------------------------------------------- /paddle/vision/nn/mb_tiny.py: -------------------------------------------------------------------------------- 1 | import paddle.nn as nn 2 | import paddle.nn.functional as F 3 | 4 | 5 | class Mb_Tiny(nn.Layer): 6 | 7 | def __init__(self, num_classes=2): 8 | super(Mb_Tiny, self).__init__() 9 | self.base_channel = 8 * 2 10 | 11 | def conv_bn(inp, oup, stride): 12 | return nn.Sequential( 13 | nn.Conv2D(inp, oup, 3, stride, 1, bias_attr=None), 14 | nn.BatchNorm2D(oup), 15 | nn.ReLU() 16 | ) 17 | 18 | def conv_dw(inp, oup, stride): 19 | return nn.Sequential( 20 | nn.Conv2D(inp, inp, 3, stride, 1, groups=inp, bias_attr=None), 21 | nn.BatchNorm2D(inp), 22 | nn.ReLU(), 23 | 24 | nn.Conv2D(inp, oup, 1, 1, 0, bias_attr=None), 25 | nn.BatchNorm2D(oup), 26 | nn.ReLU(), 27 | ) 28 | 29 | self.model = nn.Sequential( 30 | conv_bn(3, self.base_channel, 2), # 160*120 31 | conv_dw(self.base_channel, self.base_channel * 2, 1), 32 | conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60 33 | conv_dw(self.base_channel * 2, self.base_channel * 2, 1), 34 | conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30 35 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 36 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 37 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 38 | conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15 39 | conv_dw(self.base_channel * 8, self.base_channel * 8, 1), 40 | conv_dw(self.base_channel * 8, self.base_channel * 8, 1), 41 | conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8 42 | conv_dw(self.base_channel * 16, self.base_channel * 16, 1) 43 | ) 44 | self.fc = nn.Linear(1024, num_classes) 45 | 46 | def forward(self, x): 47 | x = self.model(x) 48 | x = F.avg_pool2d(x, 7) 49 | x = x.view(-1, 1024) 50 | x = self.fc(x) 51 | return x 52 | -------------------------------------------------------------------------------- /paddle/vision/nn/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | 5 | from ..utils import box_utils 6 | 7 | 8 | # class MultiboxLoss(object): 9 | class MultiboxLoss(nn.Layer): 10 | def __init__(self, priors, neg_pos_ratio, center_variance, size_variance): 11 | """Implement SSD Multibox Loss. 12 | 13 | Basically, Multibox loss combines classification loss 14 | and Smooth L1 regression loss. 15 | """ 16 | super(MultiboxLoss, self).__init__() 17 | self.neg_pos_ratio = neg_pos_ratio 18 | self.center_variance = center_variance 19 | self.size_variance = size_variance 20 | self.priors = priors 21 | 22 | def forward(self, confidence, predicted_locations, labels, gt_locations): 23 | """Compute classification loss and smooth l1 loss. 24 | 25 | Args: 26 | confidence (batch_size, num_priors, num_classes): class predictions. 27 | locations (batch_size, num_priors, 4): predicted locations. 28 | labels (batch_size, num_priors): real labels of all the priors. 29 | boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. 30 | """ 31 | num_classes = confidence.shape[2] 32 | with paddle.no_grad(): 33 | # derived from cross_entropy=sum(log(p)) 34 | loss = -F.log_softmax(confidence, 2)[:, :, 0] 35 | mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) 36 | 37 | confidence = paddle.concat([confidence[:, :, 0].masked_select(mask).reshape([-1, 1]), 38 | confidence[:, :,1].masked_select(mask).reshape([-1, 1])], axis=1) 39 | classification_loss = F.cross_entropy(confidence.reshape([-1, num_classes]), labels.masked_select(mask), reduction='sum') 40 | pos_mask = labels > 0 41 | predicted_locations = predicted_locations.masked_select(paddle.concat([pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1])], axis=2)).reshape([-1, 4]) 42 | gt_locations = gt_locations.masked_select(paddle.concat([pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1])], axis=2)).reshape([-1, 4]) 43 | smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations.cast('float32'), reduction='sum') # smooth_l1_loss 44 | # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum') #l2 loss 45 | num_pos = gt_locations.shape[0] 46 | return smooth_l1_loss / num_pos, classification_loss / num_pos 47 | -------------------------------------------------------------------------------- /paddle/vision/ssd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/ssd/__init__.py -------------------------------------------------------------------------------- /paddle/vision/ssd/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/ssd/config/__init__.py -------------------------------------------------------------------------------- /paddle/vision/ssd/config/fd_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from vision.utils.box_utils import generate_priors 4 | 5 | image_mean_test = image_mean = np.array([127, 127, 127]) 6 | image_std = 128.0 7 | iou_threshold = 0.3 8 | center_variance = 0.1 9 | size_variance = 0.2 10 | 11 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] 12 | shrinkage_list = [] 13 | image_size = [320, 240] # default input size 320*240 14 | feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]] # default feature map size 15 | priors = [] 16 | 17 | 18 | def define_img_size(size): 19 | global image_size, feature_map_w_h_list, priors 20 | img_size_dict = {128: [128, 96], 21 | 160: [160, 120], 22 | 320: [320, 240], 23 | 480: [480, 360], 24 | 640: [640, 480], 25 | 1280: [1280, 960]} 26 | image_size = img_size_dict[size] 27 | 28 | feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]], 29 | 160: [[20, 10, 5, 3], [15, 8, 4, 2]], 30 | 320: [[40, 20, 10, 5], [30, 15, 8, 4]], 31 | 480: [[60, 30, 15, 8], [45, 23, 12, 6]], 32 | 640: [[80, 40, 20, 10], [60, 30, 15, 8]], 33 | 1280: [[160, 80, 40, 20], [120, 60, 30, 15]]} 34 | feature_map_w_h_list = feature_map_w_h_list_dict[size] 35 | 36 | for i in range(0, len(image_size)): 37 | item_list = [] 38 | for k in range(0, len(feature_map_w_h_list[i])): 39 | item_list.append(image_size[i] / feature_map_w_h_list[i][k]) 40 | shrinkage_list.append(item_list) 41 | priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes) 42 | -------------------------------------------------------------------------------- /paddle/vision/ssd/data_preprocessing.py: -------------------------------------------------------------------------------- 1 | from ..transforms.transforms import * 2 | 3 | 4 | class TrainAugmentation: 5 | def __init__(self, size, mean=0, std=1.0): 6 | """ 7 | Args: 8 | size: the size the of final image. 9 | mean: mean pixel value per channel. 10 | """ 11 | self.mean = mean 12 | self.size = size 13 | self.augment = Compose([ 14 | ConvertFromInts(), 15 | PhotometricDistort(), 16 | RandomSampleCrop_v2(), 17 | RandomMirror(), 18 | ToPercentCoords(), 19 | Resize(self.size), 20 | SubtractMeans(self.mean), 21 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 22 | ToTensor(), 23 | ]) 24 | 25 | def __call__(self, img, boxes, labels): 26 | """ 27 | 28 | Args: 29 | img: the output of cv.imread in RGB layout. 30 | boxes: boundding boxes in the form of (x1, y1, x2, y2). 31 | labels: labels of boxes. 32 | """ 33 | return self.augment(img, boxes, labels) 34 | 35 | 36 | class TestTransform: 37 | def __init__(self, size, mean=0.0, std=1.0): 38 | self.transform = Compose([ 39 | ToPercentCoords(), 40 | Resize(size), 41 | SubtractMeans(mean), 42 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 43 | ToTensor(), 44 | ]) 45 | 46 | def __call__(self, image, boxes, labels): 47 | return self.transform(image, boxes, labels) 48 | 49 | 50 | class PredictionTransform: 51 | def __init__(self, size, mean=0.0, std=1.0): 52 | self.transform = Compose([ 53 | Resize(size), 54 | SubtractMeans(mean), 55 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 56 | ToTensor() 57 | ]) 58 | 59 | def __call__(self, image): 60 | image, _, _ = self.transform(image) 61 | return image 62 | -------------------------------------------------------------------------------- /paddle/vision/ssd/mb_tiny_RFB_fd.py: -------------------------------------------------------------------------------- 1 | from paddle.nn import Conv2D, Sequential, LayerList, ReLU 2 | 3 | from vision.nn.mb_tiny_RFB import Mb_Tiny_RFB 4 | from vision.ssd.config import fd_config as config 5 | from vision.ssd.predictor import Predictor 6 | from vision.ssd.ssd import SSD 7 | 8 | 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0): 10 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 11 | """ 12 | return Sequential( 13 | Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 14 | groups=in_channels, stride=stride, padding=padding), 15 | ReLU(), 16 | Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 17 | ) 18 | 19 | 20 | def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"): 21 | base_net = Mb_Tiny_RFB(2) 22 | base_net_model = base_net.model # disable dropout layer 23 | 24 | source_layer_indexes = [ 25 | 8, 26 | 11, 27 | 13 28 | ] 29 | extras = LayerList([ 30 | Sequential( 31 | Conv2D(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1), 32 | ReLU(), 33 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1), 34 | ReLU() 35 | ) 36 | ]) 37 | 38 | regression_headers = LayerList([ 39 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1), 40 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1), 41 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1), 42 | Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1) 43 | ]) 44 | 45 | classification_headers = LayerList([ 46 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1), 47 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1), 48 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1), 49 | Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1) 50 | ]) 51 | 52 | return SSD(num_classes, base_net_model, source_layer_indexes, 53 | extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device) 54 | 55 | 56 | def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None): 57 | predictor = Predictor(net, config.image_size, config.image_mean_test, 58 | config.image_std, 59 | nms_method=nms_method, 60 | iou_threshold=config.iou_threshold, 61 | candidate_size=candidate_size, 62 | sigma=sigma, 63 | device=device) 64 | return predictor 65 | -------------------------------------------------------------------------------- /paddle/vision/ssd/mb_tiny_fd.py: -------------------------------------------------------------------------------- 1 | from paddle.nn import Conv2D, Sequential, LayerList, ReLU 2 | 3 | from vision.nn.mb_tiny import Mb_Tiny 4 | from vision.ssd.config import fd_config as config 5 | from vision.ssd.predictor import Predictor 6 | from vision.ssd.ssd import SSD 7 | 8 | 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0): 10 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 11 | """ 12 | return Sequential( 13 | Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 14 | groups=in_channels, stride=stride, padding=padding), 15 | ReLU(), 16 | Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 17 | ) 18 | 19 | 20 | def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"): 21 | base_net = Mb_Tiny(2) 22 | base_net_model = base_net.model # disable dropout layer 23 | 24 | source_layer_indexes = [ 25 | 8, 26 | 11, 27 | 13 28 | ] 29 | extras = LayerList([ 30 | Sequential( 31 | Conv2D(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1), 32 | ReLU(), 33 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1), 34 | ReLU() 35 | ) 36 | ]) 37 | 38 | regression_headers = LayerList([ 39 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1), 40 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1), 41 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1), 42 | Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1) 43 | ]) 44 | 45 | classification_headers = LayerList([ 46 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1), 47 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1), 48 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1), 49 | Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1) 50 | ]) 51 | 52 | return SSD(num_classes, base_net_model, source_layer_indexes, 53 | extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device) 54 | 55 | 56 | def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None): 57 | predictor = Predictor(net, config.image_size, config.image_mean_test, 58 | config.image_std, 59 | nms_method=nms_method, 60 | iou_threshold=config.iou_threshold, 61 | candidate_size=candidate_size, 62 | sigma=sigma, 63 | device=device) 64 | return predictor 65 | -------------------------------------------------------------------------------- /paddle/vision/ssd/predictor.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | 3 | from ..utils import box_utils 4 | from .data_preprocessing import PredictionTransform 5 | from ..utils.misc import Timer 6 | 7 | 8 | class Predictor: 9 | def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None, 10 | iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None): 11 | self.net = net 12 | self.transform = PredictionTransform(size, mean, std) 13 | self.iou_threshold = iou_threshold 14 | self.filter_threshold = filter_threshold 15 | self.candidate_size = candidate_size 16 | self.nms_method = nms_method 17 | 18 | self.sigma = sigma 19 | if device: 20 | self.device = device 21 | else: 22 | self.device = paddle.set_device("cuda" if paddle.is_compiled_with_cuda() else "cpu") 23 | 24 | self.net.to(self.device) 25 | self.net.eval() 26 | 27 | self.timer = Timer() 28 | 29 | def predict(self, image, top_k=-1, prob_threshold=None): 30 | cpu_device = paddle.set_device("cpu") 31 | height, width, _ = image.shape 32 | image = self.transform(image) 33 | images = image.unsqueeze(0) 34 | images = images.to(self.device) 35 | with paddle.no_grad(): 36 | for i in range(1): 37 | self.timer.start() 38 | scores, boxes = self.net.forward(images) 39 | print("Inference time: ", self.timer.end()) 40 | boxes = boxes[0] 41 | scores = scores[0] 42 | if not prob_threshold: 43 | prob_threshold = self.filter_threshold 44 | # this version of nms is slower on GPU, so we move data to CPU. 45 | boxes = boxes.to(cpu_device) 46 | scores = scores.to(cpu_device) 47 | picked_box_probs = [] 48 | picked_labels = [] 49 | for class_index in range(1, scores.size(1)): 50 | probs = scores[:, class_index] 51 | mask = probs > prob_threshold 52 | probs = probs[mask] 53 | if probs.size(0) == 0: 54 | continue 55 | subset_boxes = boxes[mask, :] 56 | box_probs = paddle.concat([subset_boxes, probs.reshape(-1, 1)], 1) 57 | box_probs = box_utils.nms(box_probs, self.nms_method, 58 | score_threshold=prob_threshold, 59 | iou_threshold=self.iou_threshold, 60 | sigma=self.sigma, 61 | top_k=top_k, 62 | candidate_size=self.candidate_size) 63 | picked_box_probs.append(box_probs) 64 | picked_labels.extend([class_index] * box_probs.size(0)) 65 | if not picked_box_probs: 66 | return paddle.to_tensor([]), paddle.to_tensor([]), paddle.to_tensor([]) 67 | picked_box_probs = paddle.concat(picked_box_probs) 68 | picked_box_probs[:, 0] *= width 69 | picked_box_probs[:, 1] *= height 70 | picked_box_probs[:, 2] *= width 71 | picked_box_probs[:, 3] *= height 72 | return picked_box_probs[:, :4], paddle.to_tensor(picked_labels), picked_box_probs[:, 4] 73 | -------------------------------------------------------------------------------- /paddle/vision/transforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/transforms/__init__.py -------------------------------------------------------------------------------- /paddle/vision/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import * 2 | -------------------------------------------------------------------------------- /paddle/vision/utils/box_utils_numpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def convert_locations_to_boxes(locations, priors, center_variance, size_variance): 5 | """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w). 6 | 7 | The conversion: 8 | $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$ 9 | $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$ 10 | We do it in the inverse direction here. 11 | Args: 12 | locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well. 13 | priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes. 14 | center_variance: a float used to change the scale of center. 15 | size_variance: a float used to change of scale of size. 16 | Returns: 17 | boxes: priors: [[center_x, center_y, h, w]]. All the values 18 | are relative to the image size. 19 | """ 20 | # priors can have one dimension less. 21 | if len(priors.shape) + 1 == len(locations.shape): 22 | priors = np.expand_dims(priors, 0) 23 | return np.concatenate([ 24 | locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], 25 | np.exp(locations[..., 2:] * size_variance) * priors[..., 2:] 26 | ], axis=len(locations.shape) - 1) 27 | 28 | 29 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): 30 | # priors can have one dimension less 31 | if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape): 32 | center_form_priors = np.expand_dims(center_form_priors, 0) 33 | return np.concatenate([ 34 | (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance, 35 | np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance 36 | ], axis=len(center_form_boxes.shape) - 1) 37 | 38 | 39 | def area_of(left_top, right_bottom): 40 | """Compute the areas of rectangles given two corners. 41 | 42 | Args: 43 | left_top (N, 2): left top corner. 44 | right_bottom (N, 2): right bottom corner. 45 | 46 | Returns: 47 | area (N): return the area. 48 | """ 49 | hw = np.clip(right_bottom - left_top, 0.0, None) 50 | return hw[..., 0] * hw[..., 1] 51 | 52 | 53 | def iou_of(boxes0, boxes1, eps=1e-5): 54 | """Return intersection-over-union (Jaccard index) of boxes. 55 | 56 | Args: 57 | boxes0 (N, 4): ground truth boxes. 58 | boxes1 (N or 1, 4): predicted boxes. 59 | eps: a small number to avoid 0 as denominator. 60 | Returns: 61 | iou (N): IoU values. 62 | """ 63 | overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) 64 | overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) 65 | 66 | overlap_area = area_of(overlap_left_top, overlap_right_bottom) 67 | area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) 68 | area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) 69 | return overlap_area / (area0 + area1 - overlap_area + eps) 70 | 71 | 72 | def center_form_to_corner_form(locations): 73 | return np.concatenate([locations[..., :2] - locations[..., 2:] / 2, 74 | locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1) 75 | 76 | 77 | def corner_form_to_center_form(boxes): 78 | return np.concatenate([ 79 | (boxes[..., :2] + boxes[..., 2:]) / 2, 80 | boxes[..., 2:] - boxes[..., :2] 81 | ], len(boxes.shape) - 1) 82 | 83 | 84 | def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): 85 | """ 86 | 87 | Args: 88 | box_scores (N, 5): boxes in corner-form and probabilities. 89 | iou_threshold: intersection over union threshold. 90 | top_k: keep top_k results. If k <= 0, keep all the results. 91 | candidate_size: only consider the candidates with the highest scores. 92 | Returns: 93 | picked: a list of indexes of the kept boxes 94 | """ 95 | scores = box_scores[:, -1] 96 | boxes = box_scores[:, :-1] 97 | picked = [] 98 | # _, indexes = scores.sort(descending=True) 99 | indexes = np.argsort(scores) 100 | # indexes = indexes[:candidate_size] 101 | indexes = indexes[-candidate_size:] 102 | while len(indexes) > 0: 103 | # current = indexes[0] 104 | current = indexes[-1] 105 | picked.append(current) 106 | if 0 < top_k == len(picked) or len(indexes) == 1: 107 | break 108 | current_box = boxes[current, :] 109 | # indexes = indexes[1:] 110 | indexes = indexes[:-1] 111 | rest_boxes = boxes[indexes, :] 112 | iou = iou_of( 113 | rest_boxes, 114 | np.expand_dims(current_box, axis=0), 115 | ) 116 | indexes = indexes[iou <= iou_threshold] 117 | 118 | return box_scores[picked, :] 119 | -------------------------------------------------------------------------------- /paddle/vision/utils/misc.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import paddle 4 | 5 | 6 | def str2bool(s): 7 | return s.lower() in ('true', '1') 8 | 9 | 10 | class Timer: 11 | def __init__(self): 12 | self.clock = {} 13 | 14 | def start(self, key="default"): 15 | self.clock[key] = datetime.datetime.now() 16 | 17 | def end(self, key="default"): 18 | if key not in self.clock: 19 | raise Exception(f"{key} is not in the clock.") 20 | interval = datetime.datetime.now() - self.clock[key] 21 | del self.clock[key] 22 | return interval.total_seconds() 23 | 24 | 25 | def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): 26 | paddle.save({ 27 | 'epoch': epoch, 28 | 'model': net_state_dict, 29 | 'optimizer': optimizer_state_dict, 30 | 'best_score': best_score 31 | }, checkpoint_path) 32 | paddle.save(net_state_dict, model_path) 33 | 34 | 35 | def load_checkpoint(checkpoint_path): 36 | return paddle.load(checkpoint_path) 37 | 38 | 39 | def freeze_net_layers(net): 40 | for param in net.parameters(): 41 | param.stop_gradient = True 42 | 43 | 44 | def store_labels(path, labels): 45 | with open(path, "w") as f: 46 | f.write("\n".join(labels)) 47 | -------------------------------------------------------------------------------- /readme_imgs/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/1.jpg -------------------------------------------------------------------------------- /readme_imgs/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/2.jpg -------------------------------------------------------------------------------- /readme_imgs/26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/26.jpg -------------------------------------------------------------------------------- /readme_imgs/27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/27.jpg -------------------------------------------------------------------------------- /readme_imgs/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/4.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | torch 3 | opencv_python 4 | torchvision 5 | typing 6 | torchstat 7 | torchsummary 8 | ptflops 9 | matplotlib 10 | onnx 11 | onnxruntime -------------------------------------------------------------------------------- /run_video_face_detect.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code uses the pytorch model to detect faces from live video or camera. 3 | """ 4 | import argparse 5 | import sys 6 | import cv2 7 | 8 | from vision.ssd.config.fd_config import define_img_size 9 | 10 | parser = argparse.ArgumentParser( 11 | description='detect_video') 12 | 13 | parser.add_argument('--net_type', default="RFB", type=str, 14 | help='The network architecture ,optional: RFB (higher precision) or slim (faster)') 15 | parser.add_argument('--input_size', default=480, type=int, 16 | help='define network input size,default optional value 128/160/320/480/640/1280') 17 | parser.add_argument('--threshold', default=0.7, type=float, 18 | help='score threshold') 19 | parser.add_argument('--candidate_size', default=1000, type=int, 20 | help='nms candidate size') 21 | parser.add_argument('--path', default="imgs", type=str, 22 | help='imgs dir') 23 | parser.add_argument('--test_device', default="cuda:0", type=str, 24 | help='cuda:0 or cpu') 25 | parser.add_argument('--video_path', default="/home/linzai/Videos/video/16_1.MP4", type=str, 26 | help='path of video') 27 | args = parser.parse_args() 28 | 29 | input_img_size = args.input_size 30 | define_img_size(input_img_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor' 31 | 32 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor 33 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor 34 | from vision.utils.misc import Timer 35 | 36 | label_path = "./models/voc-model-labels.txt" 37 | 38 | net_type = args.net_type 39 | 40 | cap = cv2.VideoCapture(args.video_path) # capture from video 41 | # cap = cv2.VideoCapture(0) # capture from camera 42 | 43 | class_names = [name.strip() for name in open(label_path).readlines()] 44 | num_classes = len(class_names) 45 | test_device = args.test_device 46 | 47 | candidate_size = args.candidate_size 48 | threshold = args.threshold 49 | 50 | if net_type == 'slim': 51 | model_path = "models/pretrained/version-slim-320.pth" 52 | # model_path = "models/pretrained/version-slim-640.pth" 53 | net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device) 54 | predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device) 55 | elif net_type == 'RFB': 56 | model_path = "models/pretrained/version-RFB-320.pth" 57 | # model_path = "models/pretrained/version-RFB-640.pth" 58 | net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device) 59 | predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device) 60 | else: 61 | print("The net type is wrong!") 62 | sys.exit(1) 63 | net.load(model_path) 64 | 65 | timer = Timer() 66 | sum = 0 67 | while True: 68 | ret, orig_image = cap.read() 69 | if orig_image is None: 70 | print("end") 71 | break 72 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 73 | timer.start() 74 | boxes, labels, probs = predictor.predict(image, candidate_size / 2, threshold) 75 | interval = timer.end() 76 | print('Time: {:.6f}s, Detect Objects: {:d}.'.format(interval, labels.size(0))) 77 | for i in range(boxes.size(0)): 78 | box = boxes[i, :] 79 | label = f" {probs[i]:.2f}" 80 | cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 4) 81 | 82 | # cv2.putText(orig_image, label, 83 | # (box[0], box[1] - 10), 84 | # cv2.FONT_HERSHEY_SIMPLEX, 85 | # 0.5, # font scale 86 | # (0, 0, 255), 87 | # 2) # line type 88 | orig_image = cv2.resize(orig_image, None, None, fx=0.8, fy=0.8) 89 | sum += boxes.size(0) 90 | cv2.imshow('annotated', orig_image) 91 | if cv2.waitKey(1) & 0xFF == ord('q'): 92 | break 93 | cap.release() 94 | cv2.destroyAllWindows() 95 | print("all face num:{}".format(sum)) 96 | -------------------------------------------------------------------------------- /run_video_face_detect_onnx.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code uses the onnx model to detect faces from live video or cameras. 3 | """ 4 | import time 5 | 6 | import cv2 7 | import numpy as np 8 | import onnx 9 | import vision.utils.box_utils_numpy as box_utils 10 | from caffe2.python.onnx import backend 11 | 12 | # onnx runtime 13 | import onnxruntime as ort 14 | 15 | 16 | def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1): 17 | boxes = boxes[0] 18 | confidences = confidences[0] 19 | picked_box_probs = [] 20 | picked_labels = [] 21 | for class_index in range(1, confidences.shape[1]): 22 | probs = confidences[:, class_index] 23 | mask = probs > prob_threshold 24 | probs = probs[mask] 25 | if probs.shape[0] == 0: 26 | continue 27 | subset_boxes = boxes[mask, :] 28 | box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) 29 | box_probs = box_utils.hard_nms(box_probs, 30 | iou_threshold=iou_threshold, 31 | top_k=top_k, 32 | ) 33 | picked_box_probs.append(box_probs) 34 | picked_labels.extend([class_index] * box_probs.shape[0]) 35 | if not picked_box_probs: 36 | return np.array([]), np.array([]), np.array([]) 37 | picked_box_probs = np.concatenate(picked_box_probs) 38 | picked_box_probs[:, 0] *= width 39 | picked_box_probs[:, 1] *= height 40 | picked_box_probs[:, 2] *= width 41 | picked_box_probs[:, 3] *= height 42 | return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4] 43 | 44 | 45 | label_path = "models/voc-model-labels.txt" 46 | 47 | onnx_path = "models/onnx/version-RFB-320.onnx" 48 | class_names = [name.strip() for name in open(label_path).readlines()] 49 | 50 | predictor = onnx.load(onnx_path) 51 | onnx.checker.check_model(predictor) 52 | onnx.helper.printable_graph(predictor.graph) 53 | predictor = backend.prepare(predictor, device="CPU") # default CPU 54 | 55 | ort_session = ort.InferenceSession(onnx_path) 56 | input_name = ort_session.get_inputs()[0].name 57 | 58 | cap = cv2.VideoCapture("/home/linzai/Videos/video/16_6.MP4") # capture from camera 59 | 60 | threshold = 0.7 61 | 62 | sum = 0 63 | while True: 64 | ret, orig_image = cap.read() 65 | if orig_image is None: 66 | print("no img") 67 | break 68 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 69 | image = cv2.resize(image, (320, 240)) 70 | # image = cv2.resize(image, (640, 480)) 71 | image_mean = np.array([127, 127, 127]) 72 | image = (image - image_mean) / 128 73 | image = np.transpose(image, [2, 0, 1]) 74 | image = np.expand_dims(image, axis=0) 75 | image = image.astype(np.float32) 76 | # confidences, boxes = predictor.run(image) 77 | time_time = time.time() 78 | confidences, boxes = ort_session.run(None, {input_name: image}) 79 | print("cost time:{}".format(time.time() - time_time)) 80 | boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, threshold) 81 | for i in range(boxes.shape[0]): 82 | box = boxes[i, :] 83 | label = f"{class_names[labels[i]]}: {probs[i]:.2f}" 84 | 85 | cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4) 86 | 87 | # cv2.putText(orig_image, label, 88 | # (box[0] + 20, box[1] + 40), 89 | # cv2.FONT_HERSHEY_SIMPLEX, 90 | # 1, # font scale 91 | # (255, 0, 255), 92 | # 2) # line type 93 | sum += boxes.shape[0] 94 | orig_image = cv2.resize(orig_image, (0, 0), fx=0.7, fy=0.7) 95 | cv2.imshow('annotated', orig_image) 96 | if cv2.waitKey(1) & 0xFF == ord('q'): 97 | break 98 | cap.release() 99 | cv2.destroyAllWindows() 100 | print("sum:{}".format(sum)) 101 | -------------------------------------------------------------------------------- /tf/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow implementation of Ultra-Light-Fast-Generic-Face-Detector-1MB with converter 2 | 3 | You can use this script to converter origin model to tensorflow version. 4 | 5 | ## Run 6 | Covert model 7 | ```Python 8 | python3 ./convert_tensorflow.py --net_type 9 | ``` 10 | 11 | Inference on image 12 | ```Python 13 | python3 ./det_image.py --net_type --img_path 14 | ``` 15 | 16 | ## Result 17 | ![img1](https://github.com/jason9075/Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter/blob/master/imgs/test_output_RFB.jpg) 18 | 19 | ## Reference 20 | - [Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter](https://github.com/jason9075/Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter) 21 | -------------------------------------------------------------------------------- /tf/backend/op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def basic_conv(x, out_ch, kernel_size, stride=(1, 1), padding=0, dilation=1, relu=True, 5 | bn=True, prefix='basic_conv'): 6 | if 0 < padding: 7 | out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}_padding')(x) 8 | else: 9 | out = x 10 | out = tf.keras.layers.Conv2D(out_ch, 11 | kernel_size, 12 | strides=stride, 13 | dilation_rate=dilation, 14 | use_bias=(not bn), 15 | name=f'{prefix}_conv')(out) 16 | if bn: 17 | out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}_bn')(out) 18 | if relu: 19 | out = tf.keras.layers.ReLU(name=f'{prefix}_relu')(out) 20 | 21 | return out 22 | 23 | 24 | def basic_rfb(x, in_ch, out_ch, stride=1, scale=0.1, map_reduce=8, vision=1, prefix='basic_rfb'): 25 | inter_ch = in_ch // map_reduce 26 | 27 | branch0 = basic_conv(x, inter_ch, kernel_size=1, stride=1, relu=False, 28 | prefix=f'{prefix}.branch0.0') 29 | branch0 = basic_conv(branch0, 2 * inter_ch, kernel_size=3, stride=stride, padding=1, 30 | prefix=f'{prefix}.branch0.1') 31 | branch0 = basic_conv(branch0, 2 * inter_ch, kernel_size=3, stride=1, dilation=vision + 1, 32 | padding=vision + 1, relu=False, prefix=f'{prefix}.branch0.2') 33 | 34 | branch1 = basic_conv(x, inter_ch, kernel_size=1, stride=1, relu=False, 35 | prefix=f'{prefix}.branch1.0') 36 | branch1 = basic_conv(branch1, 2 * inter_ch, kernel_size=3, stride=stride, padding=1, 37 | prefix=f'{prefix}.branch1.1') 38 | branch1 = basic_conv(branch1, 2 * inter_ch, kernel_size=3, stride=1, dilation=vision + 2, 39 | padding=vision + 2, relu=False, prefix=f'{prefix}.branch1.2') 40 | 41 | branch2 = basic_conv(x, inter_ch, kernel_size=1, stride=1, relu=False, 42 | prefix=f'{prefix}.branch2.0') 43 | branch2 = basic_conv(branch2, (inter_ch // 2) * 3, kernel_size=3, stride=1, padding=1, 44 | prefix=f'{prefix}.branch2.1') 45 | branch2 = basic_conv(branch2, 2 * inter_ch, kernel_size=3, stride=stride, padding=1, 46 | prefix=f'{prefix}.branch2.2') 47 | branch2 = basic_conv(branch2, 2 * inter_ch, kernel_size=3, stride=1, dilation=vision + 4, 48 | padding=vision + 4, relu=False, prefix=f'{prefix}.branch2.3') 49 | 50 | out = tf.keras.layers.Concatenate(axis=-1, name=f'{prefix}_cat')([branch0, branch1, branch2]) 51 | out = basic_conv(out, out_ch, kernel_size=1, stride=1, relu=False, prefix=f'{prefix}.convlinear') 52 | shortcut = basic_conv(x, out_ch, kernel_size=1, stride=stride, relu=False, prefix=f'{prefix}.shortcut') 53 | out = tf.multiply(out, scale, name=f'{prefix}_mul') 54 | out = tf.keras.layers.Add(name=f'{prefix}_add')([out, shortcut]) 55 | out = tf.keras.layers.ReLU(name=f'{prefix}_relu')(out) 56 | 57 | return out 58 | 59 | 60 | def separable_conv(x, out_ch, kernel_size, stride, padding, prefix='separable_conv'): 61 | out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}_dconv_padding')(x) 62 | 63 | out = tf.keras.layers.DepthwiseConv2D(kernel_size, 64 | strides=stride, 65 | name=f'{prefix}_dconvbias')(out) 66 | out = tf.keras.layers.ReLU(name=f'{prefix}_relu')(out) 67 | out = tf.keras.layers.Conv2D(out_ch, 1, 68 | name=f'{prefix}_convbias')(out) 69 | 70 | return out 71 | 72 | 73 | def conv_bn(x, out_ch, stride, padding=1, prefix='conv_bn'): 74 | out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}.0_padding')(x) 75 | out = tf.keras.layers.Conv2D(out_ch, 76 | (3, 3), 77 | strides=stride, 78 | use_bias=False, 79 | name=f'{prefix}.0_conv')(out) 80 | out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}.1_bn')(out) 81 | out = tf.keras.layers.ReLU(name=f'{prefix}.2_relu')(out) 82 | 83 | return out 84 | 85 | 86 | def conv_dw(x, out_ch, stride, padding=1, prefix='conv_dw'): 87 | out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}.0_padding')(x) 88 | out = tf.keras.layers.DepthwiseConv2D(3, strides=stride, 89 | use_bias=False, 90 | name=f'{prefix}.0_dconv')(out) 91 | out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}.1_bn')(out) 92 | out = tf.keras.layers.ReLU(name=f'{prefix}.2_relu')(out) 93 | 94 | out = tf.keras.layers.Conv2D(out_ch, 1, use_bias=False, name=f'{prefix}.3_conv')(out) 95 | out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}.4_bn')(out) 96 | out = tf.keras.layers.ReLU(name=f'{prefix}.5_relu')(out) 97 | 98 | return out 99 | -------------------------------------------------------------------------------- /tf/backend/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import torch 6 | 7 | 8 | def post_processing(reg_list, cls_list, num_classes, image_size, feature_map_wh_list, min_boxes, 9 | center_variance, size_variance, 10 | conf_threshold=0.6, nms_max_output_size=100, nms_iou_threshold=0.3, top_k=100): 11 | reg_list = [tf.keras.layers.Reshape([-1, 4])(reg) for reg in reg_list] 12 | cls_list = [tf.keras.layers.Reshape([-1, num_classes])(cls) for cls in cls_list] 13 | 14 | reg = tf.keras.layers.Concatenate(axis=1)(reg_list) 15 | cls = tf.keras.layers.Concatenate(axis=1)(cls_list) 16 | 17 | # post process 18 | cls = tf.keras.layers.Softmax(axis=-1)(cls) 19 | loc = decode_regression(reg, image_size, feature_map_wh_list, min_boxes, 20 | center_variance, size_variance) 21 | 22 | result = tf.keras.layers.Concatenate(axis=-1)([cls, loc]) 23 | 24 | # confidence thresholding 25 | mask = conf_threshold < cls[..., 1] 26 | result = tf.boolean_mask(tensor=result, mask=mask) 27 | 28 | # non-maximum suppression 29 | mask = tf.image.non_max_suppression(boxes=result[..., -4:], 30 | scores=result[..., 1], 31 | max_output_size=nms_max_output_size, 32 | iou_threshold=nms_iou_threshold, 33 | name='non_maximum_suppresion') 34 | result = tf.gather(params=result, indices=mask, axis=0) 35 | 36 | # top-k filtering 37 | top_k_value = tf.math.minimum(tf.constant(top_k), tf.shape(result)[0]) 38 | mask = tf.nn.top_k(result[..., 1], k=top_k_value, sorted=True).indices 39 | result = tf.gather(params=result, indices=mask, axis=0) 40 | 41 | return result 42 | 43 | 44 | def decode_regression(reg, image_size, feature_map_w_h_list, min_boxes, 45 | center_variance, size_variance): 46 | priors = [] 47 | for feature_map_w_h, min_box in zip(feature_map_w_h_list, min_boxes): 48 | xy_grid = np.meshgrid(range(feature_map_w_h[0]), range(feature_map_w_h[1])) 49 | xy_grid = np.add(xy_grid, 0.5) 50 | xy_grid[0, :, :] /= feature_map_w_h[0] 51 | xy_grid[1, :, :] /= feature_map_w_h[1] 52 | xy_grid = np.stack(xy_grid, axis=-1) 53 | xy_grid = np.tile(xy_grid, [1, 1, len(min_box)]) 54 | xy_grid = np.reshape(xy_grid, (-1, 2)) 55 | 56 | wh_grid = np.array(min_box) / np.array(image_size)[:, np.newaxis] 57 | wh_grid = np.tile(np.transpose(wh_grid), [np.product(feature_map_w_h), 1]) 58 | 59 | prior = np.concatenate((xy_grid, wh_grid), axis=-1) 60 | priors.append(prior) 61 | 62 | priors = np.concatenate(priors, axis=0) 63 | print(f'priors nums:{priors.shape[0]}') 64 | 65 | priors = tf.constant(priors, dtype=tf.float32, shape=priors.shape, name='priors') 66 | 67 | center_xy = reg[..., :2] * center_variance * priors[..., 2:] + priors[..., :2] 68 | center_wh = tf.exp(reg[..., 2:] * size_variance) * priors[..., 2:] 69 | 70 | # center to corner 71 | start_xy = center_xy - center_wh / 2 72 | end_xy = center_xy + center_wh / 2 73 | 74 | loc = tf.concat([start_xy, end_xy], axis=-1) 75 | loc = tf.clip_by_value(loc, clip_value_min=0.0, clip_value_max=1.0) 76 | 77 | return loc 78 | 79 | 80 | def load_weight(model, torch_path, mapping_table_path): 81 | torch_weights = torch.load(torch_path, map_location=torch.device('cpu')) 82 | 83 | with open(mapping_table_path, 'r') as f: 84 | mapping_table = json.load(f) 85 | mapping_table = {layer['name']: layer['weight'] for layer in mapping_table} 86 | 87 | for layer in model.layers: 88 | if layer.name in mapping_table: 89 | print(f'Set layer: {layer.name}') 90 | layer_type = layer.name.split('_')[-1] 91 | 92 | torch_layer_names = mapping_table[layer.name] 93 | if layer_type == 'conv': 94 | weight = np.array(torch_weights[torch_layer_names[0]]) 95 | weight = np.transpose(weight, [2, 3, 1, 0]) 96 | layer.set_weights([weight]) 97 | elif layer_type == 'dconv': 98 | weight = np.array(torch_weights[torch_layer_names[0]]) 99 | weight = np.transpose(weight, [2, 3, 0, 1]) 100 | layer.set_weights([weight]) 101 | elif layer_type == 'bn': 102 | gamma = np.array(torch_weights[torch_layer_names[0]]) 103 | beta = np.array(torch_weights[torch_layer_names[1]]) 104 | running_mean = np.array(torch_weights[torch_layer_names[2]]) 105 | running_var = np.array(torch_weights[torch_layer_names[3]]) 106 | layer.set_weights([gamma, beta, running_mean, running_var]) 107 | elif layer_type == 'convbias': 108 | weight = np.array(torch_weights[torch_layer_names[0]]) 109 | bias = np.array(torch_weights[torch_layer_names[1]]) 110 | weight = np.transpose(weight, [2, 3, 1, 0]) 111 | layer.set_weights([weight, bias]) 112 | elif layer_type == 'dconvbias': 113 | weight = np.array(torch_weights[torch_layer_names[0]]) 114 | bias = np.array(torch_weights[torch_layer_names[1]]) 115 | weight = np.transpose(weight, [2, 3, 0, 1]) 116 | layer.set_weights([weight, bias]) 117 | else: 118 | raise RuntimeError(f'Unknown Layer type \'{layer_type}\'.') 119 | else: 120 | print(f'Ignore layer: {layer.name}') 121 | -------------------------------------------------------------------------------- /tf/convert_tensorflow.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | from tf.backend.utils import load_weight 5 | from tf.model.rfb_320 import create_rfb_net 6 | from tf.model.slim_320 import create_slim_net 7 | 8 | parser = argparse.ArgumentParser( 9 | description='convert model') 10 | 11 | parser.add_argument('--net_type', default="RFB", type=str, 12 | help='The network architecture ,optional: RFB (higher precision) or slim (faster)') 13 | args = parser.parse_args() 14 | 15 | 16 | def main(): 17 | input_shape = (240, 320) # H,W 18 | base_channel = 8 * 2 19 | num_classes = 2 20 | 21 | if args.net_type == 'slim': 22 | torch_path = "../models/pretrained/version-slim-320.pth" 23 | mapping_table = "mapping_tables/slim_320.json" 24 | model = create_slim_net(input_shape, base_channel, num_classes) 25 | elif args.net_type == 'RFB': 26 | torch_path = "../models/pretrained/version-RFB-320.pth" 27 | mapping_table = "mapping_tables/rfb_320.json" 28 | model = create_rfb_net(input_shape, base_channel, num_classes) 29 | else: 30 | print("The net type is wrong!") 31 | sys.exit(1) 32 | 33 | load_weight(model, torch_path, mapping_table) 34 | model.save(f'export_models/{args.net_type}/', include_optimizer=False) 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /tf/det_image.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | import cv2 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | parser = argparse.ArgumentParser( 9 | description='convert model') 10 | 11 | parser.add_argument('--net_type', default="RFB", type=str, 12 | help='The network architecture ,optional: RFB (higher precision) or slim (faster)') 13 | parser.add_argument('--img_path', default='imgs/test_input.jpg', type=str, 14 | help='Image path for inference') 15 | args = parser.parse_args() 16 | 17 | 18 | def main(): 19 | if args.net_type == 'slim': 20 | model_path = "export_models/slim/" 21 | elif args.net_type == 'RFB': 22 | model_path = "export_models/RFB/" 23 | else: 24 | print("The net type is wrong!") 25 | sys.exit(1) 26 | 27 | model = tf.keras.models.load_model(model_path) 28 | 29 | img = cv2.imread(args.img_path) 30 | h, w, _ = img.shape 31 | img_resize = cv2.resize(img, (320, 240)) 32 | img_resize = cv2.cvtColor(img_resize, cv2.COLOR_BGR2RGB) 33 | img_resize = img_resize - 127.0 34 | img_resize = img_resize / 128.0 35 | 36 | results = model.predict(np.expand_dims(img_resize, axis=0)) # result=[background,face,x1,y1,x2,y2] 37 | 38 | for result in results: 39 | start_x = int(result[2] * w) 40 | start_y = int(result[3] * h) 41 | end_x = int(result[4] * w) 42 | end_y = int(result[5] * h) 43 | 44 | cv2.rectangle(img, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2) 45 | 46 | cv2.imwrite(f'imgs/test_output_{args.net_type}.jpg', img) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /tf/export_models/RFB/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/RFB/saved_model.pb -------------------------------------------------------------------------------- /tf/export_models/RFB/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/RFB/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /tf/export_models/RFB/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/RFB/variables/variables.index -------------------------------------------------------------------------------- /tf/export_models/slim/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/slim/saved_model.pb -------------------------------------------------------------------------------- /tf/export_models/slim/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/slim/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /tf/export_models/slim/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/slim/variables/variables.index -------------------------------------------------------------------------------- /tf/imgs/test_input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_input.jpg -------------------------------------------------------------------------------- /tf/imgs/test_output_RFB.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_RFB.jpg -------------------------------------------------------------------------------- /tf/imgs/test_output_origin_RFB.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_origin_RFB.jpg -------------------------------------------------------------------------------- /tf/imgs/test_output_origin_slim.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_origin_slim.jpg -------------------------------------------------------------------------------- /tf/imgs/test_output_slim.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_slim.jpg -------------------------------------------------------------------------------- /tf/model/rfb_320.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tf.backend.op import conv_bn, conv_dw, basic_rfb, separable_conv 4 | from tf.backend.utils import post_processing 5 | 6 | conf_threshold = 0.6 7 | nms_iou_threshold = 0.3 8 | nms_max_output_size = 200 9 | top_k = 100 10 | center_variance = 0.1 11 | size_variance = 0.2 12 | 13 | image_size = [320, 240] # default input size 320*240 14 | feature_map_wh_list = [[40, 30], [20, 15], [10, 8], [5, 4]] # default feature map size 15 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] 16 | 17 | 18 | def create_rfb_net(input_shape, base_channel, num_classes): 19 | input_node = tf.keras.layers.Input(shape=(input_shape[0], input_shape[1], 3)) 20 | 21 | net = conv_bn(input_node, base_channel, stride=2, prefix='basenet.0') # 120x160 22 | net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.1') 23 | net = conv_dw(net, base_channel * 2, stride=2, prefix='basenet.2') # 60x80 24 | net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.3') 25 | net = conv_dw(net, base_channel * 4, stride=2, prefix='basenet.4') # 30x40 26 | net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.5') 27 | net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.6') 28 | header_0 = basic_rfb(net, base_channel * 4, base_channel * 4, stride=1, scale=1.0, prefix='basenet.7') 29 | net = conv_dw(header_0, base_channel * 8, stride=2, prefix='basenet.8') # 15x20 30 | net = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.9') 31 | header_1 = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.10') 32 | net = conv_dw(header_1, base_channel * 16, stride=2, prefix='basenet.11') # 8x10 33 | header_2 = conv_dw(net, base_channel * 16, stride=1, prefix='basenet.12') 34 | 35 | out = tf.keras.layers.Conv2D(base_channel * 4, 1, padding='SAME', name='extras_convbias')(header_2) 36 | out = tf.keras.layers.ReLU(name='extras_relu1')(out) 37 | out = separable_conv(out, base_channel * 16, kernel_size=3, stride=2, padding=1, 38 | prefix='extras_sep') 39 | header_3 = tf.keras.layers.ReLU(name='extras_relu2')(out) 40 | 41 | reg_0 = separable_conv(header_0, 3 * 4, kernel_size=3, stride=1, padding=1, 42 | prefix='reg_0_sep') 43 | cls_0 = separable_conv(header_0, 3 * num_classes, kernel_size=3, stride=1, padding=1, 44 | prefix='cls_0_sep') 45 | 46 | reg_1 = separable_conv(header_1, 2 * 4, kernel_size=3, stride=1, padding=1, 47 | prefix='reg_1_sep') 48 | cls_1 = separable_conv(header_1, 2 * num_classes, kernel_size=3, stride=1, padding=1, 49 | prefix='cls_1_sep') 50 | 51 | reg_2 = separable_conv(header_2, 2 * 4, kernel_size=3, stride=1, padding=1, 52 | prefix='reg_2_sep') 53 | cls_2 = separable_conv(header_2, 2 * num_classes, kernel_size=3, stride=1, padding=1, 54 | prefix='cls_2_sep') 55 | 56 | reg_3 = tf.keras.layers.Conv2D(3 * 4, kernel_size=3, padding='SAME', 57 | name='reg_3_convbias')(header_3) 58 | cls_3 = tf.keras.layers.Conv2D(3 * num_classes, kernel_size=3, padding='SAME', 59 | name='cls_3_convbias')(header_3) 60 | 61 | result = post_processing([reg_0, reg_1, reg_2, reg_3], 62 | [cls_0, cls_1, cls_2, cls_3], 63 | num_classes, image_size, feature_map_wh_list, min_boxes, 64 | center_variance, size_variance) 65 | 66 | model = tf.keras.Model(inputs=[input_node], outputs=[result]) 67 | model.summary() 68 | 69 | return model 70 | -------------------------------------------------------------------------------- /tf/model/slim_320.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tf.backend.op import conv_bn, conv_dw, separable_conv 4 | from tf.backend.utils import post_processing 5 | 6 | conf_threshold = 0.6 7 | nms_iou_threshold = 0.3 8 | nms_max_output_size = 200 9 | top_k = 100 10 | center_variance = 0.1 11 | size_variance = 0.2 12 | 13 | image_size = [320, 240] # default input size 320*240 14 | feature_map_wh_list = [[40, 30], [20, 15], [10, 8], [5, 4]] # default feature map size 15 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] 16 | 17 | 18 | def create_slim_net(input_shape, base_channel, num_classes): 19 | input_node = tf.keras.layers.Input(shape=(input_shape[0], input_shape[1], 3)) 20 | 21 | net = conv_bn(input_node, base_channel, stride=2, prefix='basenet.0') # 120x160 22 | net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.1') 23 | net = conv_dw(net, base_channel * 2, stride=2, prefix='basenet.2') # 60x80 24 | net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.3') 25 | net = conv_dw(net, base_channel * 4, stride=2, prefix='basenet.4') # 30x40 26 | net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.5') 27 | net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.6') 28 | header_0 = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.7') 29 | net = conv_dw(header_0, base_channel * 8, stride=2, prefix='basenet.8') # 15x20 30 | net = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.9') 31 | header_1 = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.10') 32 | net = conv_dw(header_1, base_channel * 16, stride=2, prefix='basenet.11') # 8x10 33 | header_2 = conv_dw(net, base_channel * 16, stride=1, prefix='basenet.12') 34 | 35 | out = tf.keras.layers.Conv2D(base_channel * 4, 1, padding='SAME', name='extras_convbias')(header_2) 36 | out = tf.keras.layers.ReLU(name='extras_relu1')(out) 37 | out = separable_conv(out, base_channel * 16, kernel_size=3, stride=2, padding=1, 38 | prefix='extras_sep') 39 | header_3 = tf.keras.layers.ReLU(name='extras_relu2')(out) 40 | 41 | reg_0 = separable_conv(header_0, 3 * 4, kernel_size=3, stride=1, padding=1, 42 | prefix='reg_0_sep') 43 | cls_0 = separable_conv(header_0, 3 * num_classes, kernel_size=3, stride=1, padding=1, 44 | prefix='cls_0_sep') 45 | 46 | reg_1 = separable_conv(header_1, 2 * 4, kernel_size=3, stride=1, padding=1, 47 | prefix='reg_1_sep') 48 | cls_1 = separable_conv(header_1, 2 * num_classes, kernel_size=3, stride=1, padding=1, 49 | prefix='cls_1_sep') 50 | 51 | reg_2 = separable_conv(header_2, 2 * 4, kernel_size=3, stride=1, padding=1, 52 | prefix='reg_2_sep') 53 | cls_2 = separable_conv(header_2, 2 * num_classes, kernel_size=3, stride=1, padding=1, 54 | prefix='cls_2_sep') 55 | 56 | reg_3 = tf.keras.layers.Conv2D(3 * 4, kernel_size=3, padding='SAME', 57 | name='reg_3_convbias')(header_3) 58 | cls_3 = tf.keras.layers.Conv2D(3 * num_classes, kernel_size=3, padding='SAME', 59 | name='cls_3_convbias')(header_3) 60 | 61 | result = post_processing([reg_0, reg_1, reg_2, reg_3], 62 | [cls_0, cls_1, cls_2, cls_3], 63 | num_classes, image_size, feature_map_wh_list, min_boxes, 64 | center_variance, size_variance) 65 | 66 | model = tf.keras.Model(inputs=[input_node], outputs=[result]) 67 | model.summary() 68 | 69 | return model 70 | -------------------------------------------------------------------------------- /tflite/README.md: -------------------------------------------------------------------------------- 1 | # TFLite implementation of Ultra-Light-Fast-Generic-Face-Detector-1MB 2 | 3 | TFLite model is suitable for edge computing devices. 4 | Please refer to the official [Android Demo](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android) for learning how to deploy the face detection model on your phones. 5 | 6 | ![Dwq3dS.jpg](https://s3.ax1x.com/2020/11/26/Dwq3dS.jpg) 7 | 8 | ## Run 9 | 10 | Inference on image 11 | 12 | ``` bash 13 | python3 inference_test.py --net_type --img_path 14 | ``` 15 | 16 | Inference on video 17 | 18 | ``` bash 19 | python3 inference_test.py --net_type --video_path 20 | ``` 21 | 22 | ## Import 23 | 24 | ``` python 25 | from TFLiteFaceDetector import UltraLightFaceDetecion 26 | 27 | fd = UltraLightFaceDetecion(model_path, 28 | input_size=(320, 240), conf_threshold=0.6, 29 | center_variance=0.1, size_variance=0.2, 30 | nms_max_output_size=200, nms_iou_threshold=0.3) 31 | ``` 32 | 33 | ## Files Tree 34 | 35 | The pretrained weights are converted form `onnx -> protobuf -> tflite`. 36 | 37 | ``` bash 38 | . 39 | ├── model # keras defined model architecture 40 | │ ├── tflite_RFB_320_without_postprocessing.py 41 | │ └── tflite_slim_320_without_postprocessing.py 42 | ├── pretrained # pretrained model without post-processing 43 | │ ├── version-RFB-320_without_postprocessing.tflite 44 | │ └── version-slim-320_without_postprocessing.tflite 45 | ├── README.md 46 | ├── inference_test.py # detector test script 47 | └── TFLiteFaceDetector.py # class file of the tflite detector 48 | ``` 49 | 50 | ## Special Thanks 51 | 52 | Part of the code for this work is referenced from the following repositories: 53 | 54 | - [Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter](https://github.com/jason9075/Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter) 55 | -------------------------------------------------------------------------------- /tflite/TFLiteFaceDetector.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import cv2 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | 7 | class UltraLightFaceDetecion(): 8 | def __init__(self, filepath, input_size=(320, 240), conf_threshold=0.6, 9 | center_variance=0.1, size_variance=0.2, 10 | nms_max_output_size=200, nms_iou_threshold=0.3) -> None: 11 | 12 | self._feature_maps = np.array([[40, 30], [20, 15], [10, 8], [5, 4]]) 13 | self._min_boxes = np.array([[10, 16, 24], [32, 48], 14 | [64, 96], [128, 192, 256]]) 15 | 16 | self._resize = partial(cv2.resize, dsize=input_size) 17 | self._input_size = np.array(input_size)[:, None] 18 | 19 | self._anchors_xy, self._anchors_wh = self._generate_anchors() 20 | self._conf_threshold = conf_threshold 21 | self._center_variance = center_variance 22 | self._size_variance = size_variance 23 | self._nms = partial(tf.image.non_max_suppression, 24 | max_output_size=nms_max_output_size, 25 | iou_threshold=nms_iou_threshold) 26 | 27 | # tflite model init 28 | self._interpreter = tf.lite.Interpreter(model_path=filepath) 29 | self._interpreter.allocate_tensors() 30 | 31 | # model details 32 | input_details = self._interpreter.get_input_details() 33 | output_details = self._interpreter.get_output_details() 34 | 35 | # inference helper 36 | self._set_input_tensor = partial(self._interpreter.set_tensor, 37 | input_details[0]["index"]) 38 | self._get_boxes_tensor = partial(self._interpreter.get_tensor, 39 | output_details[0]["index"]) 40 | self._get_scores_tensor = partial(self._interpreter.get_tensor, 41 | output_details[1]["index"]) 42 | 43 | def _generate_anchors(self): 44 | anchors = [] 45 | for feature_map_w_h, min_box in zip(self._feature_maps, self._min_boxes): 46 | 47 | wh_grid = min_box / self._input_size 48 | wh_grid = np.tile(wh_grid.T, (np.prod(feature_map_w_h), 1)) 49 | 50 | xy_grid = np.meshgrid(range(feature_map_w_h[0]), 51 | range(feature_map_w_h[1])) 52 | xy_grid = np.add(xy_grid, 0.5) 53 | 54 | xy_grid /= feature_map_w_h[..., None, None] 55 | 56 | xy_grid = np.stack(xy_grid, axis=-1) 57 | xy_grid = np.tile(xy_grid, [1, 1, len(min_box)]) 58 | xy_grid = xy_grid.reshape(-1, 2) 59 | 60 | prior = np.concatenate((xy_grid, wh_grid), axis=-1) 61 | anchors.append(prior) 62 | 63 | anchors = np.concatenate(anchors, axis=0) 64 | anchors = np.clip(anchors, 0.0, 1.0) 65 | 66 | return anchors[:, :2], anchors[:, 2:] 67 | 68 | def _pre_processing(self, img): 69 | resized = self._resize(img) 70 | image_rgb = resized[..., ::-1] 71 | image_norm = image_rgb.astype(np.float32) 72 | cv2.normalize(image_norm, image_norm, 73 | alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX) 74 | return image_norm[None, ...] 75 | 76 | def inference(self, img): 77 | # BGR image to tensor 78 | input_tensor = self._pre_processing(img) 79 | 80 | # set tensor and invoke 81 | self._set_input_tensor(input_tensor) 82 | self._interpreter.invoke() 83 | 84 | # get results 85 | boxes = self._get_boxes_tensor()[0] 86 | scores = self._get_scores_tensor()[0] 87 | 88 | # decode boxes to corner format 89 | boxes, scores = self._post_processing(boxes, scores) 90 | boxes *= np.tile(img.shape[1::-1], 2) 91 | 92 | return boxes, scores 93 | 94 | def _post_processing(self, boxes, scores): 95 | # bounding box regression 96 | boxes = self._decode_regression(boxes) 97 | scores = scores[:, 1] 98 | 99 | # confidence threshold filter 100 | conf_mask = self._conf_threshold < scores 101 | boxes, scores = boxes[conf_mask], scores[conf_mask] 102 | 103 | # non-maximum suppression 104 | nms_mask = self._nms(boxes=boxes, scores=scores) 105 | boxes = np.take(boxes, nms_mask, axis=0) 106 | 107 | return boxes, scores 108 | 109 | def _decode_regression(self, reg): 110 | # bounding box regression 111 | center_xy = reg[:, :2] * self._center_variance * \ 112 | self._anchors_wh + self._anchors_xy 113 | center_wh = np.exp( 114 | reg[:, 2:] * self._size_variance) * self._anchors_wh / 2 115 | 116 | # center to corner 117 | start_xy = center_xy - center_wh 118 | end_xy = center_xy + center_wh 119 | 120 | boxes = np.concatenate((start_xy, end_xy), axis=-1) 121 | boxes = np.clip(boxes, 0.0, 1.0) 122 | 123 | return boxes 124 | -------------------------------------------------------------------------------- /tflite/inference_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import time 4 | 5 | from TFLiteFaceDetector import UltraLightFaceDetecion 6 | 7 | 8 | parser = argparse.ArgumentParser(description='TFLite Face Detector') 9 | 10 | parser.add_argument('--net_type', default="RFB", type=str, 11 | help='The network architecture ,optional: RFB (higher precision) or slim (faster)') 12 | parser.add_argument('--img_path', type=str, help='Image path for inference') 13 | parser.add_argument('--video_path', type=str, help='Video path for inference') 14 | 15 | args = parser.parse_args() 16 | 17 | 18 | def image_inference(image_path, model_path, color=(125, 255, 0)): 19 | 20 | fd = UltraLightFaceDetecion(model_path, 21 | conf_threshold=0.6) 22 | 23 | img = cv2.imread(image_path) 24 | 25 | boxes, scores = fd.inference(img) 26 | 27 | for result in boxes.astype(int): 28 | cv2.rectangle(img, (result[0], result[1]), 29 | (result[2], result[3]), color, 2) 30 | 31 | cv2.imshow('res', img) 32 | cv2.waitKey(0) 33 | cv2.destroyAllWindows() 34 | 35 | 36 | def video_inference(video, model_path, color=(125, 255, 0)): 37 | 38 | fd = UltraLightFaceDetecion(model_path, 39 | conf_threshold=0.88) 40 | 41 | cap = cv2.VideoCapture(video) 42 | 43 | while True: 44 | ret, frame = cap.read() 45 | 46 | if not ret: 47 | break 48 | 49 | start_time = time.perf_counter() 50 | boxes, scores = fd.inference(frame) 51 | print(time.perf_counter() - start_time) 52 | 53 | for result in boxes.astype(int): 54 | cv2.rectangle(frame, (result[0], result[1]), 55 | (result[2], result[3]), color, 2) 56 | 57 | cv2.imshow('res', frame) 58 | if cv2.waitKey(1) == ord('q'): 59 | break 60 | 61 | cap.release() 62 | cv2.destroyAllWindows() 63 | 64 | 65 | if __name__ == '__main__': 66 | 67 | filepath = f"pretrained/version-{args.net_type}-320_without_postprocessing.tflite" 68 | 69 | if args.img_path: 70 | image_inference(args.img_path, filepath) 71 | elif args.video_path: 72 | video_inference(args.video_path, filepath) 73 | else: 74 | print('--ima_path or --video_path must be filled') 75 | -------------------------------------------------------------------------------- /tflite/model/tflite_slim_320_without_postprocessing.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | import sys 4 | sys.path.append("../tf") 5 | from backend.op import conv_bn, conv_dw, separable_conv 6 | 7 | 8 | def create_slim_net(input_shape, base_channel, num_classes): 9 | input_node = tf.keras.layers.Input( 10 | shape=(input_shape[0], input_shape[1], 3)) 11 | 12 | net = conv_bn(input_node, base_channel, stride=2, 13 | prefix='basenet.0') # 120x160 14 | net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.1') 15 | net = conv_dw(net, base_channel * 2, stride=2, prefix='basenet.2') # 60x80 16 | net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.3') 17 | net = conv_dw(net, base_channel * 4, stride=2, prefix='basenet.4') # 30x40 18 | net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.5') 19 | net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.6') 20 | header_0 = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.7') 21 | net = conv_dw(header_0, base_channel * 8, stride=2, 22 | prefix='basenet.8') # 15x20 23 | net = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.9') 24 | header_1 = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.10') 25 | net = conv_dw(header_1, base_channel * 16, 26 | stride=2, prefix='basenet.11') # 8x10 27 | header_2 = conv_dw(net, base_channel * 16, stride=1, prefix='basenet.12') 28 | 29 | out = tf.keras.layers.Conv2D( 30 | base_channel * 4, 1, padding='SAME', name='extras_convbias')(header_2) 31 | out = tf.keras.layers.ReLU(name='extras_relu1')(out) 32 | out = separable_conv(out, base_channel * 16, kernel_size=3, stride=2, padding=1, 33 | prefix='extras_sep') 34 | header_3 = tf.keras.layers.ReLU(name='extras_relu2')(out) 35 | 36 | reg_0 = separable_conv(header_0, 3 * 4, kernel_size=3, stride=1, padding=1, 37 | prefix='reg_0_sep') 38 | cls_0 = separable_conv(header_0, 3 * num_classes, kernel_size=3, stride=1, padding=1, 39 | prefix='cls_0_sep') 40 | 41 | reg_1 = separable_conv(header_1, 2 * 4, kernel_size=3, stride=1, padding=1, 42 | prefix='reg_1_sep') 43 | cls_1 = separable_conv(header_1, 2 * num_classes, kernel_size=3, stride=1, padding=1, 44 | prefix='cls_1_sep') 45 | 46 | reg_2 = separable_conv(header_2, 2 * 4, kernel_size=3, stride=1, padding=1, 47 | prefix='reg_2_sep') 48 | cls_2 = separable_conv(header_2, 2 * num_classes, kernel_size=3, stride=1, padding=1, 49 | prefix='cls_2_sep') 50 | 51 | reg_3 = tf.keras.layers.Conv2D(3 * 4, kernel_size=3, padding='SAME', 52 | name='reg_3_convbias')(header_3) 53 | cls_3 = tf.keras.layers.Conv2D(3 * num_classes, kernel_size=3, padding='SAME', 54 | name='cls_3_convbias')(header_3) 55 | 56 | reg_list = [tf.keras.layers.Reshape([-1, 4])(reg) for reg in [reg_0, reg_1, reg_2, reg_3]] 57 | cls_list = [tf.keras.layers.Reshape([-1, num_classes])(cls) for cls in [cls_0, cls_1, cls_2, cls_3]] 58 | 59 | reg = tf.keras.layers.Concatenate(axis=1, name='face_boxes')(reg_list) 60 | cls = tf.keras.layers.Concatenate(axis=1)(cls_list) 61 | 62 | cls = tf.keras.layers.Softmax(axis=-1, name='face_scores')(cls) 63 | 64 | model = tf.keras.Model(inputs=[input_node], outputs=[reg, cls]) 65 | 66 | model.summary() 67 | 68 | return model 69 | -------------------------------------------------------------------------------- /tflite/pretrained/version-RFB-320_without_postprocessing.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tflite/pretrained/version-RFB-320_without_postprocessing.tflite -------------------------------------------------------------------------------- /tflite/pretrained/version-slim-320_without_postprocessing.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tflite/pretrained/version-slim-320_without_postprocessing.tflite -------------------------------------------------------------------------------- /train-version-RFB.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | model_root_path="./models/train-version-RFB" 3 | log_dir="$model_root_path/logs" 4 | log="$log_dir/log" 5 | mkdir -p "$log_dir" 6 | 7 | python3 -u train.py \ 8 | --datasets \ 9 | ./data/wider_face_add_lm_10_10 \ 10 | --validation_dataset \ 11 | ./data/wider_face_add_lm_10_10 \ 12 | --net \ 13 | RFB \ 14 | --num_epochs \ 15 | 200 \ 16 | --milestones \ 17 | "95,150" \ 18 | --lr \ 19 | 1e-2 \ 20 | --batch_size \ 21 | 24 \ 22 | --input_size \ 23 | 320 \ 24 | --checkpoint_folder \ 25 | ${model_root_path} \ 26 | --num_workers \ 27 | 0 \ 28 | --log_dir \ 29 | ${log_dir} \ 30 | --cuda_index \ 31 | 0 \ 32 | 2>&1 | tee "$log" 33 | -------------------------------------------------------------------------------- /train-version-slim.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | model_root_path="./models/train-version-slim" 3 | log_dir="$model_root_path/logs" 4 | log="$log_dir/log" 5 | mkdir -p "$log_dir" 6 | 7 | python3 -u train.py \ 8 | --datasets \ 9 | ./data/wider_face_add_lm_10_10 \ 10 | --validation_dataset \ 11 | ./data/wider_face_add_lm_10_10 \ 12 | --net \ 13 | slim \ 14 | --num_epochs \ 15 | 200 \ 16 | --milestones \ 17 | "95,150" \ 18 | --lr \ 19 | 1e-2 \ 20 | --batch_size \ 21 | 24 \ 22 | --input_size \ 23 | 320 \ 24 | --checkpoint_folder \ 25 | ${model_root_path} \ 26 | --num_workers \ 27 | 4 \ 28 | --log_dir \ 29 | ${log_dir} \ 30 | --cuda_index \ 31 | 0 \ 32 | 2>&1 | tee "$log" 33 | -------------------------------------------------------------------------------- /vision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/__init__.py -------------------------------------------------------------------------------- /vision/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/datasets/__init__.py -------------------------------------------------------------------------------- /vision/datasets/voc_dataset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pathlib 4 | import xml.etree.ElementTree as ET 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | class VOCDataset: 11 | 12 | def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None): 13 | """Dataset for VOC data. 14 | Args: 15 | root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 16 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 17 | """ 18 | self.root = pathlib.Path(root) 19 | self.transform = transform 20 | self.target_transform = target_transform 21 | if is_test: 22 | image_sets_file = self.root / "ImageSets/Main/test.txt" 23 | else: 24 | image_sets_file = self.root / "ImageSets/Main/trainval.txt" 25 | self.ids = VOCDataset._read_image_ids(image_sets_file) 26 | self.keep_difficult = keep_difficult 27 | 28 | # if the labels file exists, read in the class names 29 | label_file_name = self.root / "labels.txt" 30 | 31 | if os.path.isfile(label_file_name): 32 | class_string = "" 33 | with open(label_file_name, 'r') as infile: 34 | for line in infile: 35 | class_string += line.rstrip() 36 | 37 | # classes should be a comma separated list 38 | 39 | classes = class_string.split(',') 40 | # prepend BACKGROUND as first class 41 | classes.insert(0, 'BACKGROUND') 42 | classes = [elem.replace(" ", "") for elem in classes] 43 | self.class_names = tuple(classes) 44 | logging.info("VOC Labels read from file: " + str(self.class_names)) 45 | 46 | else: 47 | logging.info("No labels file, using default VOC classes.") 48 | self.class_names = ('BACKGROUND', 49 | 'face') 50 | 51 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 52 | 53 | def __getitem__(self, index): 54 | image_id = self.ids[index] 55 | boxes, labels, is_difficult = self._get_annotation(image_id) 56 | if not self.keep_difficult: 57 | boxes = boxes[is_difficult == 0] 58 | labels = labels[is_difficult == 0] 59 | image = self._read_image(image_id) 60 | if self.transform: 61 | image, boxes, labels = self.transform(image, boxes, labels) 62 | if self.target_transform: 63 | boxes, labels = self.target_transform(boxes, labels) 64 | return image, boxes, labels 65 | 66 | def get_image(self, index): 67 | image_id = self.ids[index] 68 | image = self._read_image(image_id) 69 | if self.transform: 70 | image, _ = self.transform(image) 71 | return image 72 | 73 | def get_annotation(self, index): 74 | image_id = self.ids[index] 75 | return image_id, self._get_annotation(image_id) 76 | 77 | def __len__(self): 78 | return len(self.ids) 79 | 80 | @staticmethod 81 | def _read_image_ids(image_sets_file): 82 | ids = [] 83 | with open(image_sets_file) as f: 84 | for line in f: 85 | ids.append(line.rstrip()) 86 | return ids 87 | 88 | def _get_annotation(self, image_id): 89 | annotation_file = self.root / f"Annotations/{image_id}.xml" 90 | objects = ET.parse(annotation_file).findall("object") 91 | boxes = [] 92 | labels = [] 93 | is_difficult = [] 94 | for object in objects: 95 | class_name = object.find('name').text.lower().strip() 96 | # we're only concerned with clases in our list 97 | if class_name in self.class_dict: 98 | bbox = object.find('bndbox') 99 | 100 | # VOC dataset format follows Matlab, in which indexes start from 0 101 | x1 = float(bbox.find('xmin').text) - 1 102 | y1 = float(bbox.find('ymin').text) - 1 103 | x2 = float(bbox.find('xmax').text) - 1 104 | y2 = float(bbox.find('ymax').text) - 1 105 | boxes.append([x1, y1, x2, y2]) 106 | 107 | labels.append(self.class_dict[class_name]) 108 | is_difficult_str = object.find('difficult').text 109 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 110 | 111 | return (np.array(boxes, dtype=np.float32), 112 | np.array(labels, dtype=np.int64), 113 | np.array(is_difficult, dtype=np.uint8)) 114 | 115 | def _read_image(self, image_id): 116 | image_file = self.root / f"JPEGImages/{image_id}.jpg" 117 | image = cv2.imread(str(image_file)) 118 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 119 | return image 120 | -------------------------------------------------------------------------------- /vision/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/nn/__init__.py -------------------------------------------------------------------------------- /vision/nn/mb_tiny.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class Mb_Tiny(nn.Module): 6 | 7 | def __init__(self, num_classes=2): 8 | super(Mb_Tiny, self).__init__() 9 | self.base_channel = 8 * 2 10 | 11 | def conv_bn(inp, oup, stride): 12 | return nn.Sequential( 13 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 14 | nn.BatchNorm2d(oup), 15 | nn.ReLU(inplace=True) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride): 19 | return nn.Sequential( 20 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 21 | nn.BatchNorm2d(inp), 22 | nn.ReLU(inplace=True), 23 | 24 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 25 | nn.BatchNorm2d(oup), 26 | nn.ReLU(inplace=True), 27 | ) 28 | 29 | self.model = nn.Sequential( 30 | conv_bn(3, self.base_channel, 2), # 160*120 31 | conv_dw(self.base_channel, self.base_channel * 2, 1), 32 | conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60 33 | conv_dw(self.base_channel * 2, self.base_channel * 2, 1), 34 | conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30 35 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 36 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 37 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 38 | conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15 39 | conv_dw(self.base_channel * 8, self.base_channel * 8, 1), 40 | conv_dw(self.base_channel * 8, self.base_channel * 8, 1), 41 | conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8 42 | conv_dw(self.base_channel * 16, self.base_channel * 16, 1) 43 | ) 44 | self.fc = nn.Linear(1024, num_classes) 45 | 46 | def forward(self, x): 47 | x = self.model(x) 48 | x = F.avg_pool2d(x, 7) 49 | x = x.view(-1, 1024) 50 | x = self.fc(x) 51 | return x 52 | -------------------------------------------------------------------------------- /vision/nn/mb_tiny_RFB.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicConv(nn.Module): 7 | 8 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True): 9 | super(BasicConv, self).__init__() 10 | self.out_channels = out_planes 11 | if bn: 12 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False) 13 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) 14 | self.relu = nn.ReLU(inplace=True) if relu else None 15 | else: 16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True) 17 | self.bn = None 18 | self.relu = nn.ReLU(inplace=True) if relu else None 19 | 20 | def forward(self, x): 21 | x = self.conv(x) 22 | if self.bn is not None: 23 | x = self.bn(x) 24 | if self.relu is not None: 25 | x = self.relu(x) 26 | return x 27 | 28 | 29 | class BasicRFB(nn.Module): 30 | 31 | def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8, vision=1, groups=1): 32 | super(BasicRFB, self).__init__() 33 | self.scale = scale 34 | self.out_channels = out_planes 35 | inter_planes = in_planes // map_reduce 36 | 37 | self.branch0 = nn.Sequential( 38 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 39 | BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), 40 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 1, dilation=vision + 1, relu=False, groups=groups) 41 | ) 42 | self.branch1 = nn.Sequential( 43 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 44 | BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), 45 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups) 46 | ) 47 | self.branch2 = nn.Sequential( 48 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 49 | BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=3, stride=1, padding=1, groups=groups), 50 | BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups), 51 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups) 52 | ) 53 | 54 | self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False) 55 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) 56 | self.relu = nn.ReLU(inplace=False) 57 | 58 | def forward(self, x): 59 | x0 = self.branch0(x) 60 | x1 = self.branch1(x) 61 | x2 = self.branch2(x) 62 | 63 | out = torch.cat((x0, x1, x2), 1) 64 | out = self.ConvLinear(out) 65 | short = self.shortcut(x) 66 | out = out * self.scale + short 67 | out = self.relu(out) 68 | 69 | return out 70 | 71 | 72 | class Mb_Tiny_RFB(nn.Module): 73 | 74 | def __init__(self, num_classes=2): 75 | super(Mb_Tiny_RFB, self).__init__() 76 | self.base_channel = 8 * 2 77 | 78 | def conv_bn(inp, oup, stride): 79 | return nn.Sequential( 80 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 81 | nn.BatchNorm2d(oup), 82 | nn.ReLU(inplace=True) 83 | ) 84 | 85 | def conv_dw(inp, oup, stride): 86 | return nn.Sequential( 87 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 88 | nn.BatchNorm2d(inp), 89 | nn.ReLU(inplace=True), 90 | 91 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 92 | nn.BatchNorm2d(oup), 93 | nn.ReLU(inplace=True), 94 | ) 95 | 96 | self.model = nn.Sequential( 97 | conv_bn(3, self.base_channel, 2), # 160*120 98 | conv_dw(self.base_channel, self.base_channel * 2, 1), 99 | conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60 100 | conv_dw(self.base_channel * 2, self.base_channel * 2, 1), 101 | conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30 102 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 103 | conv_dw(self.base_channel * 4, self.base_channel * 4, 1), 104 | BasicRFB(self.base_channel * 4, self.base_channel * 4, stride=1, scale=1.0), 105 | conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15 106 | conv_dw(self.base_channel * 8, self.base_channel * 8, 1), 107 | conv_dw(self.base_channel * 8, self.base_channel * 8, 1), 108 | conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8 109 | conv_dw(self.base_channel * 16, self.base_channel * 16, 1) 110 | ) 111 | self.fc = nn.Linear(1024, num_classes) 112 | 113 | def forward(self, x): 114 | x = self.model(x) 115 | x = F.avg_pool2d(x, 7) 116 | x = x.view(-1, 1024) 117 | x = self.fc(x) 118 | return x 119 | -------------------------------------------------------------------------------- /vision/nn/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..utils import box_utils 6 | 7 | 8 | class MultiboxLoss(nn.Module): 9 | def __init__(self, priors, neg_pos_ratio, 10 | center_variance, size_variance, device): 11 | """Implement SSD Multibox Loss. 12 | 13 | Basically, Multibox loss combines classification loss 14 | and Smooth L1 regression loss. 15 | """ 16 | super(MultiboxLoss, self).__init__() 17 | self.neg_pos_ratio = neg_pos_ratio 18 | self.center_variance = center_variance 19 | self.size_variance = size_variance 20 | self.priors = priors 21 | self.priors.to(device) 22 | 23 | def forward(self, confidence, predicted_locations, labels, gt_locations): 24 | """Compute classification loss and smooth l1 loss. 25 | 26 | Args: 27 | confidence (batch_size, num_priors, num_classes): class predictions. 28 | locations (batch_size, num_priors, 4): predicted locations. 29 | labels (batch_size, num_priors): real labels of all the priors. 30 | boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. 31 | """ 32 | num_classes = confidence.size(2) 33 | with torch.no_grad(): 34 | # derived from cross_entropy=sum(log(p)) 35 | loss = -F.log_softmax(confidence, dim=2)[:, :, 0] 36 | mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) 37 | 38 | confidence = confidence[mask, :] 39 | classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], reduction='sum') 40 | pos_mask = labels > 0 41 | predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4) 42 | gt_locations = gt_locations[pos_mask, :].reshape(-1, 4) 43 | smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum') # smooth_l1_loss 44 | # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum') #l2 loss 45 | num_pos = gt_locations.size(0) 46 | return smooth_l1_loss / num_pos, classification_loss / num_pos 47 | -------------------------------------------------------------------------------- /vision/ssd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/ssd/__init__.py -------------------------------------------------------------------------------- /vision/ssd/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/ssd/config/__init__.py -------------------------------------------------------------------------------- /vision/ssd/config/fd_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from vision.utils.box_utils import generate_priors 4 | 5 | image_mean_test = image_mean = np.array([127, 127, 127]) 6 | image_std = 128.0 7 | iou_threshold = 0.3 8 | center_variance = 0.1 9 | size_variance = 0.2 10 | 11 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] 12 | shrinkage_list = [] 13 | image_size = [320, 240] # default input size 320*240 14 | feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]] # default feature map size 15 | priors = [] 16 | 17 | 18 | def define_img_size(size): 19 | global image_size, feature_map_w_h_list, priors 20 | img_size_dict = {128: [128, 96], 21 | 160: [160, 120], 22 | 320: [320, 240], 23 | 480: [480, 360], 24 | 640: [640, 480], 25 | 1280: [1280, 960]} 26 | image_size = img_size_dict[size] 27 | 28 | feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]], 29 | 160: [[20, 10, 5, 3], [15, 8, 4, 2]], 30 | 320: [[40, 20, 10, 5], [30, 15, 8, 4]], 31 | 480: [[60, 30, 15, 8], [45, 23, 12, 6]], 32 | 640: [[80, 40, 20, 10], [60, 30, 15, 8]], 33 | 1280: [[160, 80, 40, 20], [120, 60, 30, 15]]} 34 | feature_map_w_h_list = feature_map_w_h_list_dict[size] 35 | 36 | for i in range(0, len(image_size)): 37 | item_list = [] 38 | for k in range(0, len(feature_map_w_h_list[i])): 39 | item_list.append(image_size[i] / feature_map_w_h_list[i][k]) 40 | shrinkage_list.append(item_list) 41 | priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes) 42 | -------------------------------------------------------------------------------- /vision/ssd/data_preprocessing.py: -------------------------------------------------------------------------------- 1 | from ..transforms.transforms import * 2 | 3 | 4 | class TrainAugmentation: 5 | def __init__(self, size, mean=0, std=1.0): 6 | """ 7 | Args: 8 | size: the size the of final image. 9 | mean: mean pixel value per channel. 10 | """ 11 | self.mean = mean 12 | self.size = size 13 | self.augment = Compose([ 14 | ConvertFromInts(), 15 | PhotometricDistort(), 16 | RandomSampleCrop_v2(), 17 | RandomMirror(), 18 | ToPercentCoords(), 19 | Resize(self.size), 20 | SubtractMeans(self.mean), 21 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 22 | ToTensor(), 23 | ]) 24 | 25 | def __call__(self, img, boxes, labels): 26 | """ 27 | 28 | Args: 29 | img: the output of cv.imread in RGB layout. 30 | boxes: boundding boxes in the form of (x1, y1, x2, y2). 31 | labels: labels of boxes. 32 | """ 33 | return self.augment(img, boxes, labels) 34 | 35 | 36 | class TestTransform: 37 | def __init__(self, size, mean=0.0, std=1.0): 38 | self.transform = Compose([ 39 | ToPercentCoords(), 40 | Resize(size), 41 | SubtractMeans(mean), 42 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 43 | ToTensor(), 44 | ]) 45 | 46 | def __call__(self, image, boxes, labels): 47 | return self.transform(image, boxes, labels) 48 | 49 | 50 | class PredictionTransform: 51 | def __init__(self, size, mean=0.0, std=1.0): 52 | self.transform = Compose([ 53 | Resize(size), 54 | SubtractMeans(mean), 55 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 56 | ToTensor() 57 | ]) 58 | 59 | def __call__(self, image): 60 | image, _, _ = self.transform(image) 61 | return image 62 | -------------------------------------------------------------------------------- /vision/ssd/mb_tiny_RFB_fd.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU 2 | 3 | from vision.nn.mb_tiny_RFB import Mb_Tiny_RFB 4 | from vision.ssd.config import fd_config as config 5 | from vision.ssd.predictor import Predictor 6 | from vision.ssd.ssd import SSD 7 | 8 | 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0): 10 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 11 | """ 12 | return Sequential( 13 | Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 14 | groups=in_channels, stride=stride, padding=padding), 15 | ReLU(), 16 | Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 17 | ) 18 | 19 | 20 | def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"): 21 | base_net = Mb_Tiny_RFB(2) 22 | base_net_model = base_net.model # disable dropout layer 23 | 24 | source_layer_indexes = [ 25 | 8, 26 | 11, 27 | 13 28 | ] 29 | extras = ModuleList([ 30 | Sequential( 31 | Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1), 32 | ReLU(), 33 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1), 34 | ReLU() 35 | ) 36 | ]) 37 | 38 | regression_headers = ModuleList([ 39 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1), 40 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1), 41 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1), 42 | Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1) 43 | ]) 44 | 45 | classification_headers = ModuleList([ 46 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1), 47 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1), 48 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1), 49 | Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1) 50 | ]) 51 | 52 | return SSD(num_classes, base_net_model, source_layer_indexes, 53 | extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device) 54 | 55 | 56 | def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None): 57 | predictor = Predictor(net, config.image_size, config.image_mean_test, 58 | config.image_std, 59 | nms_method=nms_method, 60 | iou_threshold=config.iou_threshold, 61 | candidate_size=candidate_size, 62 | sigma=sigma, 63 | device=device) 64 | return predictor 65 | -------------------------------------------------------------------------------- /vision/ssd/mb_tiny_fd.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU 2 | 3 | from vision.nn.mb_tiny import Mb_Tiny 4 | from vision.ssd.config import fd_config as config 5 | from vision.ssd.predictor import Predictor 6 | from vision.ssd.ssd import SSD 7 | 8 | 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0): 10 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 11 | """ 12 | return Sequential( 13 | Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 14 | groups=in_channels, stride=stride, padding=padding), 15 | ReLU(), 16 | Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 17 | ) 18 | 19 | 20 | def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"): 21 | base_net = Mb_Tiny(2) 22 | base_net_model = base_net.model # disable dropout layer 23 | 24 | source_layer_indexes = [ 25 | 8, 26 | 11, 27 | 13 28 | ] 29 | extras = ModuleList([ 30 | Sequential( 31 | Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1), 32 | ReLU(), 33 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1), 34 | ReLU() 35 | ) 36 | ]) 37 | 38 | regression_headers = ModuleList([ 39 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1), 40 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1), 41 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1), 42 | Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1) 43 | ]) 44 | 45 | classification_headers = ModuleList([ 46 | SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1), 47 | SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1), 48 | SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1), 49 | Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1) 50 | ]) 51 | 52 | return SSD(num_classes, base_net_model, source_layer_indexes, 53 | extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device) 54 | 55 | 56 | def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None): 57 | predictor = Predictor(net, config.image_size, config.image_mean_test, 58 | config.image_std, 59 | nms_method=nms_method, 60 | iou_threshold=config.iou_threshold, 61 | candidate_size=candidate_size, 62 | sigma=sigma, 63 | device=device) 64 | return predictor 65 | -------------------------------------------------------------------------------- /vision/ssd/predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..utils import box_utils 4 | from .data_preprocessing import PredictionTransform 5 | from ..utils.misc import Timer 6 | 7 | 8 | class Predictor: 9 | def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None, 10 | iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None): 11 | self.net = net 12 | self.transform = PredictionTransform(size, mean, std) 13 | self.iou_threshold = iou_threshold 14 | self.filter_threshold = filter_threshold 15 | self.candidate_size = candidate_size 16 | self.nms_method = nms_method 17 | 18 | self.sigma = sigma 19 | if device: 20 | self.device = device 21 | else: 22 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 23 | 24 | self.net.to(self.device) 25 | self.net.eval() 26 | 27 | self.timer = Timer() 28 | 29 | def predict(self, image, top_k=-1, prob_threshold=None): 30 | cpu_device = torch.device("cpu") 31 | height, width, _ = image.shape 32 | image = self.transform(image) 33 | images = image.unsqueeze(0) 34 | images = images.to(self.device) 35 | with torch.no_grad(): 36 | for i in range(1): 37 | self.timer.start() 38 | scores, boxes = self.net.forward(images) 39 | print("Inference time: ", self.timer.end()) 40 | boxes = boxes[0] 41 | scores = scores[0] 42 | if not prob_threshold: 43 | prob_threshold = self.filter_threshold 44 | # this version of nms is slower on GPU, so we move data to CPU. 45 | boxes = boxes.to(cpu_device) 46 | scores = scores.to(cpu_device) 47 | picked_box_probs = [] 48 | picked_labels = [] 49 | for class_index in range(1, scores.size(1)): 50 | probs = scores[:, class_index] 51 | mask = probs > prob_threshold 52 | probs = probs[mask] 53 | if probs.size(0) == 0: 54 | continue 55 | subset_boxes = boxes[mask, :] 56 | box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) 57 | box_probs = box_utils.nms(box_probs, self.nms_method, 58 | score_threshold=prob_threshold, 59 | iou_threshold=self.iou_threshold, 60 | sigma=self.sigma, 61 | top_k=top_k, 62 | candidate_size=self.candidate_size) 63 | picked_box_probs.append(box_probs) 64 | picked_labels.extend([class_index] * box_probs.size(0)) 65 | if not picked_box_probs: 66 | return torch.tensor([]), torch.tensor([]), torch.tensor([]) 67 | picked_box_probs = torch.cat(picked_box_probs) 68 | picked_box_probs[:, 0] *= width 69 | picked_box_probs[:, 1] *= height 70 | picked_box_probs[:, 2] *= width 71 | picked_box_probs[:, 3] *= height 72 | return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4] 73 | -------------------------------------------------------------------------------- /vision/transforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/transforms/__init__.py -------------------------------------------------------------------------------- /vision/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import * 2 | -------------------------------------------------------------------------------- /vision/utils/box_utils_numpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def convert_locations_to_boxes(locations, priors, center_variance, 5 | size_variance): 6 | """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w). 7 | 8 | The conversion: 9 | $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$ 10 | $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$ 11 | We do it in the inverse direction here. 12 | Args: 13 | locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well. 14 | priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes. 15 | center_variance: a float used to change the scale of center. 16 | size_variance: a float used to change of scale of size. 17 | Returns: 18 | boxes: priors: [[center_x, center_y, h, w]]. All the values 19 | are relative to the image size. 20 | """ 21 | # priors can have one dimension less. 22 | if len(priors.shape) + 1 == len(locations.shape): 23 | priors = np.expand_dims(priors, 0) 24 | return np.concatenate([ 25 | locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], 26 | np.exp(locations[..., 2:] * size_variance) * priors[..., 2:] 27 | ], axis=len(locations.shape) - 1) 28 | 29 | 30 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): 31 | # priors can have one dimension less 32 | if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape): 33 | center_form_priors = np.expand_dims(center_form_priors, 0) 34 | return np.concatenate([ 35 | (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance, 36 | np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance 37 | ], axis=len(center_form_boxes.shape) - 1) 38 | 39 | 40 | def area_of(left_top, right_bottom): 41 | """Compute the areas of rectangles given two corners. 42 | 43 | Args: 44 | left_top (N, 2): left top corner. 45 | right_bottom (N, 2): right bottom corner. 46 | 47 | Returns: 48 | area (N): return the area. 49 | """ 50 | hw = np.clip(right_bottom - left_top, 0.0, None) 51 | return hw[..., 0] * hw[..., 1] 52 | 53 | 54 | def iou_of(boxes0, boxes1, eps=1e-5): 55 | """Return intersection-over-union (Jaccard index) of boxes. 56 | 57 | Args: 58 | boxes0 (N, 4): ground truth boxes. 59 | boxes1 (N or 1, 4): predicted boxes. 60 | eps: a small number to avoid 0 as denominator. 61 | Returns: 62 | iou (N): IoU values. 63 | """ 64 | overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) 65 | overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) 66 | 67 | overlap_area = area_of(overlap_left_top, overlap_right_bottom) 68 | area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) 69 | area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) 70 | return overlap_area / (area0 + area1 - overlap_area + eps) 71 | 72 | 73 | def center_form_to_corner_form(locations): 74 | return np.concatenate([locations[..., :2] - locations[..., 2:] / 2, 75 | locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1) 76 | 77 | 78 | def corner_form_to_center_form(boxes): 79 | return np.concatenate([ 80 | (boxes[..., :2] + boxes[..., 2:]) / 2, 81 | boxes[..., 2:] - boxes[..., :2] 82 | ], len(boxes.shape) - 1) 83 | 84 | 85 | def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): 86 | """ 87 | 88 | Args: 89 | box_scores (N, 5): boxes in corner-form and probabilities. 90 | iou_threshold: intersection over union threshold. 91 | top_k: keep top_k results. If k <= 0, keep all the results. 92 | candidate_size: only consider the candidates with the highest scores. 93 | Returns: 94 | picked: a list of indexes of the kept boxes 95 | """ 96 | scores = box_scores[:, -1] 97 | boxes = box_scores[:, :-1] 98 | picked = [] 99 | # _, indexes = scores.sort(descending=True) 100 | indexes = np.argsort(scores) 101 | # indexes = indexes[:candidate_size] 102 | indexes = indexes[-candidate_size:] 103 | while len(indexes) > 0: 104 | # current = indexes[0] 105 | current = indexes[-1] 106 | picked.append(current) 107 | if 0 < top_k == len(picked) or len(indexes) == 1: 108 | break 109 | current_box = boxes[current, :] 110 | # indexes = indexes[1:] 111 | indexes = indexes[:-1] 112 | rest_boxes = boxes[indexes, :] 113 | iou = iou_of( 114 | rest_boxes, 115 | np.expand_dims(current_box, axis=0), 116 | ) 117 | indexes = indexes[iou <= iou_threshold] 118 | 119 | return box_scores[picked, :] 120 | -------------------------------------------------------------------------------- /vision/utils/misc.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import torch 4 | 5 | 6 | def str2bool(s): 7 | return s.lower() in ('true', '1') 8 | 9 | 10 | class Timer: 11 | def __init__(self): 12 | self.clock = {} 13 | 14 | def start(self, key="default"): 15 | self.clock[key] = datetime.datetime.now() 16 | 17 | def end(self, key="default"): 18 | if key not in self.clock: 19 | raise Exception(f"{key} is not in the clock.") 20 | interval = datetime.datetime.now() - self.clock[key] 21 | del self.clock[key] 22 | return interval.total_seconds() 23 | 24 | 25 | def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): 26 | torch.save({ 27 | 'epoch': epoch, 28 | 'model': net_state_dict, 29 | 'optimizer': optimizer_state_dict, 30 | 'best_score': best_score 31 | }, checkpoint_path) 32 | torch.save(net_state_dict, model_path) 33 | 34 | 35 | def load_checkpoint(checkpoint_path): 36 | return torch.load(checkpoint_path) 37 | 38 | 39 | def freeze_net_layers(net): 40 | for param in net.parameters(): 41 | param.requires_grad = False 42 | 43 | 44 | def store_labels(path, labels): 45 | with open(path, "w") as f: 46 | f.write("\n".join(labels)) 47 | -------------------------------------------------------------------------------- /widerface_evaluate/README.md: -------------------------------------------------------------------------------- 1 | # WiderFace-Evaluation 2 | Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) 3 | 4 | 5 | ## Usage 6 | 7 | ##### run evaluation_on_widerface.py 8 | 9 | ```` 10 | python3 evaluation_on_widerface.py 11 | ```` 12 | ##### before evaluating 13 | 14 | ```` 15 | python3 setup.py build_ext --inplace 16 | ```` 17 | 18 | ##### evaluating 19 | 20 | **GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat` 21 | 22 | ```` 23 | python3 evaluation.py -p -g 24 | ```` 25 | 26 | ## Acknowledgements 27 | 28 | some code borrowed from Sergey Karayev 29 | -------------------------------------------------------------------------------- /widerface_evaluate/box_overlaps.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps -------------------------------------------------------------------------------- /widerface_evaluate/evaluation_on_widerface.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2019-10-17 4 | # @Author : vealocia 5 | # @FileName: evaluation_on_widerface.py 6 | 7 | import math 8 | import os 9 | import sys 10 | 11 | import cv2 12 | sys.path.append('../') 13 | from vision.ssd.config.fd_config import define_img_size 14 | 15 | input_img_size = 320 # define input size ,default optional(128/160/320/480/640/1280) 16 | define_img_size(input_img_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor' 17 | 18 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor 19 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor 20 | 21 | label_path = "../models/voc-model-labels.txt" 22 | 23 | # net_type = "slim" # inference faster,lower precision 24 | net_type = "RFB" # inference lower,higher precision 25 | 26 | class_names = [name.strip() for name in open(label_path).readlines()] 27 | num_classes = len(class_names) 28 | test_device = "cuda:0" 29 | # test_device = "cpu" 30 | candidate_size = 800 31 | threshold = 0.1 32 | 33 | val_image_root = "/pic/linzai/1080Ti/home_linzai/PycharmProjects/insightface/RetinaFace/data/retinaface/val" # path to widerface valuation image root 34 | val_result_txt_save_root = "./widerface_evaluation/" # result directory 35 | 36 | if net_type == 'slim': 37 | model_path = "../models/pretrained/version-slim-320.pth" 38 | # model_path = "../models/pretrained/version-slim-640.pth" 39 | net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device) 40 | predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device) 41 | elif net_type == 'RFB': 42 | model_path = "../models/pretrained/version-RFB-320.pth" 43 | # model_path = "../models/pretrained/version-RFB-640.pth" 44 | net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device) 45 | predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device) 46 | else: 47 | print("The net type is wrong!") 48 | sys.exit(1) 49 | net.load(model_path) 50 | 51 | counter = 0 52 | for parent, dir_names, file_names in os.walk(val_image_root): 53 | for file_name in file_names: 54 | if not file_name.lower().endswith('jpg'): 55 | continue 56 | im = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR) 57 | im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 58 | boxes, labels, probs = predictor.predict(im, candidate_size / 2, threshold) 59 | 60 | event_name = parent.split('/')[-1] 61 | if not os.path.exists(os.path.join(val_result_txt_save_root, event_name)): 62 | os.makedirs(os.path.join(val_result_txt_save_root, event_name)) 63 | fout = open(os.path.join(val_result_txt_save_root, event_name, file_name.split('.')[0] + '.txt'), 'w') 64 | fout.write(file_name.split('.')[0] + '\n') 65 | fout.write(str(boxes.size(0)) + '\n') 66 | for i in range(boxes.size(0)): 67 | bbox = boxes[i, :] 68 | fout.write('%d %d %d %d %.03f' % (math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), probs[i] if probs[i] <= 1 else 1) + '\n') 69 | fout.close() 70 | counter += 1 71 | print('[%d] %s is processed.' % (counter, file_name)) 72 | 73 | # note: with score_threshold = 0.11 and hard_nms, MAP of 320-input model on widerface val set is: 0.785/0.695/0.431 74 | -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_easy_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_easy_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_face_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_face_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_hard_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_hard_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/ground_truth/wider_medium_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_medium_val.mat -------------------------------------------------------------------------------- /widerface_evaluate/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | WiderFace evaluation code 3 | author: wondervictor 4 | mail: tianhengcheng@gmail.com 5 | copyright@wondervictor 6 | """ 7 | 8 | from distutils.core import setup, Extension 9 | from Cython.Build import cythonize 10 | import numpy 11 | 12 | package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()]) 13 | setup(ext_modules=cythonize([package])) 14 | --------------------------------------------------------------------------------