├── .gitignore
├── .gitmodules
├── LICENSE
├── MNN
    ├── CMakeLists.txt
    ├── README.md
    ├── imgs
    │   ├── 1.jpg
    │   ├── 2.jpg
    │   ├── 3.jpg
    │   └── 4.jpg
    ├── mnn
    │   ├── include
    │   │   ├── AutoTime.hpp
    │   │   ├── Backend.hpp
    │   │   ├── ErrorCode.hpp
    │   │   ├── HalideRuntime.h
    │   │   ├── ImageProcess.hpp
    │   │   ├── Interpreter.hpp
    │   │   ├── MNNDefine.h
    │   │   ├── MNNForwardType.h
    │   │   ├── MNNSharedContext.h
    │   │   ├── Matrix.h
    │   │   ├── NonCopyable.hpp
    │   │   ├── Rect.h
    │   │   ├── Tensor.hpp
    │   │   └── revertMNNModel.hpp
    │   └── lib
    │   │   └── libMNN.so
    ├── model
    │   ├── version-RFB
    │   │   ├── RFB-320-quant-ADMM-32.mnn
    │   │   ├── RFB-320-quant-KL-5792.mnn
    │   │   └── RFB-320.mnn
    │   └── version-slim
    │   │   ├── slim-320-quant-ADMM-50.mnn
    │   │   └── slim-320.mnn
    ├── python
    │   ├── README.md
    │   └── ultraface_py_mnn.py
    ├── result.jpg
    └── src
    │   ├── UltraFace.cpp
    │   ├── UltraFace.hpp
    │   └── main.cpp
├── README.md
├── README_CN.md
├── caffe
    ├── MyCaffe.py
    ├── README.md
    ├── convertCaffe.py
    ├── model
    │   ├── RFB-320
    │   │   ├── RFB-320.caffemodel
    │   │   └── RFB-320.prototxt
    │   └── Slim-320
    │   │   ├── slim-320.caffemodel
    │   │   └── slim-320.prototxt
    ├── onnx2caffe
    │   ├── __init__.py
    │   ├── _error_utils.py
    │   ├── _graph.py
    │   ├── _operators.py
    │   ├── _transformers.py
    │   └── _weightloader.py
    ├── ultra_face_caffe_inference.py
    └── ultra_face_opencvdnn_inference.py
├── cal_flops.py
├── check_gt_box.py
├── convert_to_onnx.py
├── data
    ├── retinaface_labels
    │   ├── test
    │   │   └── label.txt
    │   ├── train
    │   │   └── label.txt
    │   └── val
    │   │   └── label.txt
    └── wider_face_2_voc_add_landmark.py
├── detect_imgs.py
├── detect_imgs_onnx.py
├── imgs
    ├── 1.jpg
    ├── 10.jpg
    ├── 11.jpg
    ├── 12.jpg
    ├── 13.jpg
    ├── 15.jpg
    ├── 16.jpg
    ├── 17.jpg
    ├── 18.jpg
    ├── 19.jpg
    ├── 2.jpg
    ├── 20.jpg
    ├── 21.jpg
    ├── 22.jpg
    ├── 23.jpg
    ├── 24.jpg
    ├── 25.jpg
    ├── 26.jpg
    ├── 27.jpg
    ├── 3.jpg
    ├── 5.jpg
    ├── 6.jpg
    ├── 8.jpg
    └── 9.jpg
├── masked_face
    ├── README.md
    ├── detect_imgs.py
    ├── imgs
    │   ├── img1.jpeg
    │   ├── img2.jpeg
    │   ├── img3.jpg
    │   ├── img4.jpeg
    │   ├── img5.jpeg
    │   ├── img6.webp
    │   ├── img7.webp
    │   └── img8.jpeg
    ├── mafa2voc.py
    ├── pretrained
    │   ├── RFB-1280-masked_face-v2.onnx
    │   ├── RFB-320-masked_face-v2.pth
    │   ├── RFB-640-masked_face-v2.onnx
    │   └── RFB-640-masked_face-v2.pth
    ├── readme_imgs
    │   ├── img1.jpeg
    │   ├── img2.jpeg
    │   ├── img3.jpg
    │   ├── img4.jpeg
    │   ├── img5.jpeg
    │   ├── img6.webp
    │   ├── img7.webp
    │   └── img8.jpeg
    └── voc-model-labels.txt
├── models
    ├── onnx
    │   ├── version-RFB-320.onnx
    │   ├── version-RFB-320_simplified.onnx
    │   ├── version-RFB-320_without_postprocessing.onnx
    │   ├── version-RFB-640.onnx
    │   ├── version-slim-320.onnx
    │   ├── version-slim-320_simplified.onnx
    │   └── version-slim-320_without_postprocessing.onnx
    ├── pretrained
    │   ├── version-RFB-320.pth
    │   ├── version-RFB-640.pth
    │   ├── version-slim-320.pth
    │   └── version-slim-640.pth
    ├── readme
    └── voc-model-labels.txt
├── ncnn
    ├── .clang-format
    ├── CMakeLists.txt
    ├── README.md
    ├── data
    │   ├── result.jpg
    │   ├── test.jpg
    │   ├── version-RFB
    │   │   ├── RFB-320.bin
    │   │   └── RFB-320.param
    │   └── version-slim
    │   │   ├── slim_320.bin
    │   │   └── slim_320.param
    └── src
    │   ├── UltraFace.cpp
    │   ├── UltraFace.hpp
    │   └── main.cpp
├── opencv_dnn
    ├── cv_dnn_ultraface.cpp
    └── cv_dnn_ultraface.h
├── paddle
    ├── data
    ├── train-version-RFB.sh
    ├── train-version-slim.sh
    ├── train.py
    └── vision
    │   ├── __init__.py
    │   ├── datasets
    │       ├── __init__.py
    │       └── voc_dataset.py
    │   ├── nn
    │       ├── __init__.py
    │       ├── mb_tiny.py
    │       ├── mb_tiny_RFB.py
    │       └── multibox_loss.py
    │   ├── ssd
    │       ├── __init__.py
    │       ├── config
    │       │   ├── __init__.py
    │       │   └── fd_config.py
    │       ├── data_preprocessing.py
    │       ├── mb_tiny_RFB_fd.py
    │       ├── mb_tiny_fd.py
    │       ├── predictor.py
    │       └── ssd.py
    │   ├── transforms
    │       ├── __init__.py
    │       └── transforms.py
    │   └── utils
    │       ├── __init__.py
    │       ├── box_utils.py
    │       ├── box_utils_numpy.py
    │       └── misc.py
├── readme_imgs
    ├── 1.jpg
    ├── 2.jpg
    ├── 26.jpg
    ├── 27.jpg
    └── 4.jpg
├── requirements.txt
├── run_video_face_detect.py
├── run_video_face_detect_onnx.py
├── tf
    ├── README.md
    ├── backend
    │   ├── op.py
    │   └── utils.py
    ├── convert_tensorflow.py
    ├── det_image.py
    ├── export_models
    │   ├── RFB
    │   │   ├── saved_model.pb
    │   │   └── variables
    │   │   │   ├── variables.data-00000-of-00001
    │   │   │   └── variables.index
    │   └── slim
    │   │   ├── saved_model.pb
    │   │   └── variables
    │   │       ├── variables.data-00000-of-00001
    │   │       └── variables.index
    ├── imgs
    │   ├── test_input.jpg
    │   ├── test_output_RFB.jpg
    │   ├── test_output_origin_RFB.jpg
    │   ├── test_output_origin_slim.jpg
    │   └── test_output_slim.jpg
    ├── mapping_tables
    │   ├── rfb_320.json
    │   └── slim_320.json
    └── model
    │   ├── rfb_320.py
    │   └── slim_320.py
├── tflite
    ├── README.md
    ├── TFLiteFaceDetector.py
    ├── inference_test.py
    ├── model
    │   ├── tflite_RFB_320_without_postprocessing.py
    │   └── tflite_slim_320_without_postprocessing.py
    └── pretrained
    │   ├── version-RFB-320_without_postprocessing.tflite
    │   └── version-slim-320_without_postprocessing.tflite
├── train-version-RFB.sh
├── train-version-slim.sh
├── train.py
├── vision
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   └── voc_dataset.py
    ├── nn
    │   ├── __init__.py
    │   ├── mb_tiny.py
    │   ├── mb_tiny_RFB.py
    │   └── multibox_loss.py
    ├── ssd
    │   ├── __init__.py
    │   ├── config
    │   │   ├── __init__.py
    │   │   └── fd_config.py
    │   ├── data_preprocessing.py
    │   ├── mb_tiny_RFB_fd.py
    │   ├── mb_tiny_fd.py
    │   ├── predictor.py
    │   └── ssd.py
    ├── transforms
    │   ├── __init__.py
    │   └── transforms.py
    └── utils
    │   ├── __init__.py
    │   ├── box_utils.py
    │   ├── box_utils_numpy.py
    │   └── misc.py
└── widerface_evaluate
    ├── README.md
    ├── box_overlaps.pyx
    ├── evaluation.py
    ├── evaluation_on_widerface.py
    ├── ground_truth
        ├── wider_easy_val.mat
        ├── wider_face_val.mat
        ├── wider_hard_val.mat
        └── wider_medium_val.mat
    └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | data/wider_face_add_lm_10_10
 3 | 
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | detect_imgs_results
 9 | detect_imgs_results_onnx
10 | widerface_evaluation
11 | 
12 | widerface_evaluate/build
13 | widerface_evaluate/*.so
14 | widerface_evaluate/*.c


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ncnn/3rdparty/ncnn"]
2 | 	path = ncnn/3rdparty/ncnn
3 | 	url = https://github.com/Tencent/ncnn
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 linzai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MNN/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | project(Ultra-face-mnn)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 11)
 5 | 
 6 | find_package(OpenCV REQUIRED)
 7 | include_directories(
 8 |         mnn/include
 9 |         src
10 | )
11 | 
12 | link_directories(mnn/lib)
13 | 
14 | add_executable(Ultra-face-mnn src/main.cpp src/UltraFace.cpp)
15 | target_link_libraries(Ultra-face-mnn MNN ${OpenCV_LIBS})


--------------------------------------------------------------------------------
/MNN/README.md:
--------------------------------------------------------------------------------
 1 | # C++ implemententation of [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) with [MNN](https://github.com/alibaba/MNN)
 2 | 
 3 | ## Build
 4 | 
 5 | ```bash
 6 | git clone --recursive --depth=1 https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
 7 | 
 8 | cd Ultra-Light-Fast-Generic-Face-Detector-1MB/MNN
 9 | ```
10 | 
11 | * Replace  **libMNN.so** under ./mnn/lib with your compiled libMNN.so and then :
12 | 
13 | ```bash
14 | mkdir build && cd build && cmake ..
15 | make -j$(nproc)
16 | ```
17 | 
18 | ## Run
19 | * Use FP32 model and run in FP16 mode:
20 | ```bash
21 | ./Ultra-face-mnn ../model/version-RFB/RFB-320.mnn  ../imgs/1.jpg ../imgs/2.jpg ../imgs/3.jpg ../imgs/4.jpg
22 | ```
23 | * Use quantized INT8 model:
24 | ```bash
25 | ./Ultra-face-mnn ../model/version-RFB/RFB-320-quant-KL-5792.mnn  ../imgs/1.jpg ../imgs/2.jpg ../imgs/3.jpg ../imgs/4.jpg
26 | ```
27 | 
28 | * We provide both converted MNN FP32 and **quantized INT8** models of version-slim-320 and version-RFB-320 in ./MNN/model . The xxx-quant-KL-xxx.mnn is quantified by the **KL** method and xxx-quant-ADMM-xxx.mnn is quantified by the **ADMM** method.
29 | 
30 | ## How to convert pretrained model to MNN
31 | 
32 | * Code bellow (```vision/ssd/ssd.py```) should be commented out when convert pytorch pretrained model to onnx. Comment it out and use the **convert_to_onnx.py** in official repo to finish this step.
33 | 
34 | ```python
35 | if self.is_test:
36 |     confidences = F.softmax(confidences, dim=2)
37 |     boxes = locations # this line should be added.
38 |     #boxes = box_utils.convert_locations_to_boxes(
39 |     #    locations, self.priors, self.config.center_variance, self.config.size_variance
40 |     #)
41 |     # boxes = box_utils.center_form_to_corner_form(boxes) # these lines should be commented out. detail information and analyze comming soon.
42 |     return confidences, boxes
43 | else:
44 |     return confidences, locations
45 | ```
46 | Then you can generate the onnx model like **version-RFB-320_without_postprocessing.onnx** in onnx directory. (You need to rename your model when convert.)
47 | * Then we can use this tool to simplify onnx :
48 | https://github.com/daquexian/onnx-simplifier
49 | 
50 | ```
51 | python3 -m onnxsim  version-RFB-320_without_postprocessing.onnx version-RFB-320_simplified.onnx
52 | 
53 | ```
54 | 
55 | Next, you can convert this onnx model like **version-RFB-320_simplified.onnx** into a MNN model. Here is a website for online conversion : https://convertmodel.com. You can also use the MNN compiled conversion tool **MNNConvert**.
56 | 
57 | 
58 | 
59 | ## PS
60 | * Since MNN mainly accelerates  model inference on mobile, so the INT8 quantified model will run slower on **PC** than FP32 model in CPU mode.
61 | * If you want to run faster, try using the version-slim model ,using lower-resolution inputs like 160x120 /128x96 or using quantified models(On the mobile).
62 | 
63 | ## Result
64 | ![img1](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/MNN/result.jpg)
65 | 


--------------------------------------------------------------------------------
/MNN/imgs/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/1.jpg


--------------------------------------------------------------------------------
/MNN/imgs/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/2.jpg


--------------------------------------------------------------------------------
/MNN/imgs/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/3.jpg


--------------------------------------------------------------------------------
/MNN/imgs/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/imgs/4.jpg


--------------------------------------------------------------------------------
/MNN/mnn/include/AutoTime.hpp:
--------------------------------------------------------------------------------
 1 | //
 2 | //  AutoTime.hpp
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2018/07/27.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef AutoTime_hpp
10 | #define AutoTime_hpp
11 | 
12 | #include <stdint.h>
13 | #include <stdio.h>
14 | #include "MNNDefine.h"
15 | 
16 | namespace MNN {
17 | 
18 | /** time tracing util. prints duration between init and deinit. */
19 | class MNN_PUBLIC AutoTime {
20 | public:
21 |     AutoTime(int line, const char* func);
22 |     ~AutoTime();
23 |     AutoTime(const AutoTime&)  = delete;
24 |     AutoTime(const AutoTime&&) = delete;
25 |     AutoTime& operator=(const AutoTime&) = delete;
26 |     AutoTime& operator=(const AutoTime&&) = delete;
27 | 
28 | private:
29 |     int mLine;
30 |     char* mName;
31 |     uint64_t mCurrentTime;
32 | };
33 | } // namespace MNN
34 | 
35 | #ifdef MNN_OPEN_TIME_TRACE
36 | #define AUTOTIME MNN::AutoTime ___t(__LINE__, __func__)
37 | #else
38 | #define AUTOTIME
39 | #endif
40 | 
41 | #endif /* AutoTime_hpp */
42 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/ErrorCode.hpp:
--------------------------------------------------------------------------------
 1 | //
 2 | //  ErrorCode.hpp
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2018/09/18.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef ErrorCode_h
10 | #define ErrorCode_h
11 | 
12 | namespace MNN {
13 | enum ErrorCode {
14 | #ifdef NO_ERROR
15 | #undef NO_ERROR
16 | #endif // NO_ERROR
17 |     NO_ERROR           = 0,
18 |     OUT_OF_MEMORY      = 1,
19 |     NOT_SUPPORT        = 2,
20 |     COMPUTE_SIZE_ERROR = 3,
21 |     NO_EXECUTION       = 4,
22 |     INVALID_VALUE      = 5,
23 | 
24 |     // User error
25 |     INPUT_DATA_ERROR = 10,
26 |     CALL_BACK_STOP   = 11,
27 | 
28 |     // Op Resize Error
29 |     TENSOR_NOT_SUPPORT = 20,
30 |     TENSOR_NEED_DIVIDE = 21,
31 | };
32 | } // namespace MNN
33 | 
34 | #endif /* ErrorCode_h */
35 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/ImageProcess.hpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //  ImageProcess.hpp
  3 | //  MNN
  4 | //
  5 | //  Created by MNN on 2018/09/19.
  6 | //  Copyright © 2018, Alibaba Group Holding Limited
  7 | //
  8 | 
  9 | #ifndef ImageProcess_hpp
 10 | #define ImageProcess_hpp
 11 | 
 12 | #include "ErrorCode.hpp"
 13 | #include "Matrix.h"
 14 | #include "Tensor.hpp"
 15 | 
 16 | namespace MNN {
 17 | namespace CV {
 18 | enum ImageFormat {
 19 |     RGBA = 0,
 20 |     RGB,
 21 |     BGR,
 22 |     GRAY,
 23 |     BGRA,
 24 |     YUV_NV21 = 11,
 25 | };
 26 | 
 27 | enum Filter { NEAREST = 0, BILINEAR = 1, BICUBIC = 2 };
 28 | 
 29 | enum Wrap { CLAMP_TO_EDGE = 0, ZERO = 1, REPEAT = 2 };
 30 | 
 31 | /**
 32 |  * handle image process for tensor.
 33 |  * step:
 34 |  *  1: Do transform compute and get points
 35 |  *  2: Sample line and do format convert
 36 |  *  3: Turn RGBA to float tensor, and do sub and normalize
 37 |  */
 38 | class MNN_PUBLIC ImageProcess {
 39 | public:
 40 |     struct Inside;
 41 |     struct Config {
 42 |         /** data filter */
 43 |         Filter filterType = NEAREST;
 44 |         /** format of source data */
 45 |         ImageFormat sourceFormat = RGBA;
 46 |         /** format of destination data */
 47 |         ImageFormat destFormat = RGBA;
 48 | 
 49 |         // Only valid if the dest type is float
 50 |         float mean[4]   = {0.0f, 0.0f, 0.0f, 0.0f};
 51 |         float normal[4] = {1.0f, 1.0f, 1.0f, 1.0f};
 52 | 
 53 |         /** edge wrapper */
 54 |         Wrap wrap = CLAMP_TO_EDGE;
 55 |     };
 56 | 
 57 | public:
 58 |     /**
 59 |      * @brief create image process with given config for given tensor.
 60 |      * @param config    given config.
 61 |      * @param dstTensor given tensor.
 62 |      * @return image processor.
 63 |      */
 64 |     static ImageProcess* create(const Config& config, const Tensor* dstTensor = nullptr);
 65 | 
 66 |     /**
 67 |      * @brief create image process with given config for given tensor.
 68 |      * @param means given means
 69 |      * @param meanCount given means count
 70 |      * @param normals   given normals
 71 |      * @param normalCount given normal count
 72 |      * @param sourceFormat  format of source data
 73 |      * @param destFormat    format of destination data
 74 |      * @param dstTensor given tensor.
 75 |      * @return image processor.
 76 |      */
 77 |     static ImageProcess* create(const ImageFormat sourceFormat = RGBA, const ImageFormat destFormat = RGBA,
 78 |                                 const float* means = nullptr, const int meanCount = 0, const float* normals = nullptr,
 79 |                                 const int normalCount = 0, const Tensor* dstTensor = nullptr);
 80 | 
 81 |     ~ImageProcess();
 82 | 
 83 |     /**
 84 |      * @brief get affine transform matrix.
 85 |      * @return affine transform matrix.
 86 |      */
 87 |     inline const Matrix& matrix() const {
 88 |         return mTransform;
 89 |     }
 90 |     void setMatrix(const Matrix& matrix);
 91 | 
 92 |     /**
 93 |      * @brief convert source data to given tensor.
 94 |      * @param source    source data.
 95 |      * @param iw        source width.
 96 |      * @param ih        source height.
 97 |      * @param stride    number of elements per row. eg: 100 width RGB contains at least 300 elements.
 98 |      * @param dest      given tensor.
 99 |      * @return result code.
100 |      */
101 |     ErrorCode convert(const uint8_t* source, int iw, int ih, int stride, Tensor* dest);
102 | 
103 |     /**
104 |      * @brief convert source data to given tensor.
105 |      * @param source    source data.
106 |      * @param iw        source width.
107 |      * @param ih        source height.
108 |      * @param stride    number of elements per row. eg: 100 width RGB contains at least 300 elements.
109 |      * @param dest      dest data.
110 |      * @param ow      output width.
111 |      * @param oh      output height.
112 |      * @param outputBpp      output bpp, if 0, set as the save and config.destFormat.
113 |      * @param outputStride  output stride, if 0, set as ow * outputBpp.
114 |      * @param type  Only support halide_type_of<uint8_t> and halide_type_of<float>.
115 |      * @return result code.
116 |      */
117 |     ErrorCode convert(const uint8_t* source, int iw, int ih, int stride, void* dest, int ow, int oh, int outputBpp = 0,
118 |                       int outputStride = 0, halide_type_t type = halide_type_of<float>());
119 | 
120 |     /**
121 |      * @brief create tensor with given data.
122 |      * @param w     image width.
123 |      * @param h     image height.
124 |      * @param bpp   bytes per pixel.
125 |      * @param p     pixel data pointer.
126 |      * @return created tensor.
127 |      */
128 |     template <typename T>
129 |     static Tensor* createImageTensor(int w, int h, int bpp, void* p = nullptr) {
130 |         return createImageTensor(halide_type_of<T>(), w, h, bpp, p);
131 |     }
132 |     static Tensor* createImageTensor(halide_type_t type, int w, int h, int bpp, void* p = nullptr);
133 | 
134 | private:
135 |     ImageProcess(const Config& config);
136 |     Matrix mTransform;
137 |     Matrix mTransformInvert;
138 |     Inside* mInside;
139 | };
140 | } // namespace CV
141 | } // namespace MNN
142 | 
143 | #endif /* ImageProcess_hpp */
144 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/MNNDefine.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  MNNDefine.h
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2018/08/09.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef MNNDefine_h
10 | #define MNNDefine_h
11 | 
12 | #include <assert.h>
13 | #include <stdio.h>
14 | 
15 | #if defined(__APPLE__)
16 | #include "TargetConditionals.h"
17 | #if TARGET_OS_IPHONE
18 | #define MNN_BUILD_FOR_IOS
19 | #endif
20 | #endif
21 | 
22 | #ifdef MNN_USE_LOGCAT
23 | #include <android/log.h>
24 | #define MNN_ERROR(format, ...) __android_log_print(ANDROID_LOG_ERROR, "MNNJNI", format, ##__VA_ARGS__)
25 | #define MNN_PRINT(format, ...) __android_log_print(ANDROID_LOG_INFO, "MNNJNI", format, ##__VA_ARGS__)
26 | #else
27 | #define MNN_PRINT(format, ...) printf(format, ##__VA_ARGS__)
28 | #define MNN_ERROR(format, ...) printf(format, ##__VA_ARGS__)
29 | #endif
30 | 
31 | #ifdef DEBUG
32 | #define MNN_ASSERT(x)                                            \
33 |     {                                                            \
34 |         int res = (x);                                           \
35 |         if (!res) {                                              \
36 |             MNN_ERROR("Error for %s, %d\n", __FILE__, __LINE__); \
37 |             assert(res);                                         \
38 |         }                                                        \
39 |     }
40 | #else
41 | #define MNN_ASSERT(x)                                            \
42 |     {                                                            \
43 |         int res = (x);                                           \
44 |         if (!res) {                                              \
45 |             MNN_ERROR("Error for %s, %d\n", __FILE__, __LINE__); \
46 |         }                                                        \
47 |     }
48 | #endif
49 | 
50 | #define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);
51 | #define FUNC_PRINT_ALL(x, type) MNN_PRINT(#x "=" #type " %" #type " in %s, %d \n", x, __func__, __LINE__);
52 | 
53 | #define MNN_CHECK(success, log) \
54 | if(!(success)){ \
55 | MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
56 | }
57 | 
58 | #if defined(_MSC_VER)
59 | #if defined(BUILDING_MNN_DLL)
60 | #define MNN_PUBLIC __declspec(dllexport)
61 | #elif defined(USING_MNN_DLL)
62 | #define MNN_PUBLIC __declspec(dllimport)
63 | #else
64 | #define MNN_PUBLIC
65 | #endif
66 | #else
67 | #define MNN_PUBLIC __attribute__((visibility("default")))
68 | #endif
69 | 
70 | #endif /* MNNDefine_h */
71 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/MNNForwardType.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  MNNForwardType.h
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2019/01/19.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef MNNForwardType_h
10 | #define MNNForwardType_h
11 | #include <stdint.h>
12 | #include <stddef.h>
13 | 
14 | typedef enum {
15 |     MNN_FORWARD_CPU = 0,
16 | 
17 |     /*
18 |      Firtly find the first available backends not equal to CPU
19 |      If no other backends, use cpu
20 |      */
21 |     MNN_FORWARD_AUTO = 4,
22 | 
23 |     /*Hand write metal*/
24 |     MNN_FORWARD_METAL = 1,
25 | 
26 |     /*Use IOS's MPS instead of hand-write metal, Not Support yet*/
27 |     MNN_FORWARD_MPS = 2,
28 | 
29 |     /*Android / Common Device GPU API*/
30 |     MNN_FORWARD_OPENCL = 3,
31 |     MNN_FORWARD_OPENGL = 6,
32 |     MNN_FORWARD_VULKAN = 7,
33 | 
34 |     /*Android 8.1's NNAPI, Not Support yet*/
35 |     MNN_FORWARD_NN = 5,
36 | 
37 |     /*User can use API from Backend.hpp to add or search Backend*/
38 |     MNN_FORWARD_USER_0 = 8,
39 |     MNN_FORWARD_USER_1 = 9,
40 |     MNN_FORWARD_USER_2 = 10,
41 |     MNN_FORWARD_USER_3 = 11,
42 | 
43 |     MNN_FORWARD_ALL
44 | } MNNForwardType;
45 | #ifdef __cplusplus
46 | namespace MNN {
47 | struct BackendConfig {
48 |     enum MemoryMode { Memory_Normal = 0, Memory_High, Memory_Low };
49 | 
50 |     MemoryMode memory = Memory_Normal;
51 | 
52 |     enum PowerMode { Power_Normal = 0, Power_High, Power_Low };
53 | 
54 |     PowerMode power = Power_Normal;
55 | 
56 |     enum PrecisionMode { Precision_Normal = 0, Precision_High, Precision_Low };
57 | 
58 |     PrecisionMode precision = Precision_Normal;
59 | 
60 |     /** user defined context */
61 |     union {
62 |         void* sharedContext = nullptr;
63 |         size_t flags; // Valid for CPU Backend
64 |     };
65 | };
66 | }; // namespace MNN
67 | #endif
68 | #endif /* MNNForwardType_h */
69 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/MNNSharedContext.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  MNNSharedContext.h
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2018/10/11.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef MNNSharedContext_h
10 | #define MNNSharedContext_h
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | #include <stdint.h> /*uint32_t*/
16 | 
17 | #ifndef VK_DEFINE_HANDLE
18 | #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object;
19 | VK_DEFINE_HANDLE(VkInstance)
20 | VK_DEFINE_HANDLE(VkPhysicalDevice)
21 | VK_DEFINE_HANDLE(VkDevice)
22 | VK_DEFINE_HANDLE(VkQueue)
23 | #endif
24 | struct MNNVulkanContext {
25 |     VkInstance pInstance;
26 |     VkPhysicalDevice pPhysicalDevice;
27 |     VkDevice pDevice;
28 |     VkQueue pQueue;
29 |     uint32_t iQueueFamilyIndex;
30 | };
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 | 
35 | #endif /* MNNSharedContext_h */
36 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/NonCopyable.hpp:
--------------------------------------------------------------------------------
 1 | //
 2 | //  NonCopyable.hpp
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2018/09/19.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef NonCopyable_hpp
10 | #define NonCopyable_hpp
11 | 
12 | namespace MNN {
13 | /** protocol class. used to delete assignment operator. */
14 | class NonCopyable {
15 | public:
16 |     NonCopyable()                    = default;
17 |     NonCopyable(const NonCopyable&)  = delete;
18 |     NonCopyable(const NonCopyable&&) = delete;
19 |     NonCopyable& operator=(const NonCopyable&) = delete;
20 |     NonCopyable& operator=(const NonCopyable&&) = delete;
21 | };
22 | } // namespace MNN
23 | 
24 | #endif /* NonCopyable_hpp */
25 | 


--------------------------------------------------------------------------------
/MNN/mnn/include/revertMNNModel.hpp:
--------------------------------------------------------------------------------
 1 | //
 2 | //  revertMNNModel.hpp
 3 | //  MNN
 4 | //
 5 | //  Created by MNN on 2019/01/31.
 6 | //  Copyright © 2018, Alibaba Group Holding Limited
 7 | //
 8 | 
 9 | #ifndef REVERTMNNMODEL_HPP
10 | #define REVERTMNNMODEL_HPP
11 | 
12 | #include "mnn/MNN_generated.h"
13 | 
14 | class Revert {
15 | public:
16 |     Revert(const char* originalModelFileName);
17 |     ~Revert();
18 |     void* getBuffer() const;
19 |     const size_t getBufferSize() const;
20 |     void initialize();
21 |     static float getRandValue();
22 | 
23 | private:
24 |     Revert();
25 |     std::unique_ptr<MNN::NetT> mMNNNet;
26 |     size_t mBufferSize;
27 |     std::shared_ptr<uint8_t> mBuffer;
28 |     void randStart();
29 |     void packMNNNet();
30 | };
31 | 
32 | #endif // REVERTMNNMODEL_HPP
33 | 


--------------------------------------------------------------------------------
/MNN/mnn/lib/libMNN.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/mnn/lib/libMNN.so


--------------------------------------------------------------------------------
/MNN/model/version-RFB/RFB-320-quant-ADMM-32.mnn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-RFB/RFB-320-quant-ADMM-32.mnn


--------------------------------------------------------------------------------
/MNN/model/version-RFB/RFB-320-quant-KL-5792.mnn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-RFB/RFB-320-quant-KL-5792.mnn


--------------------------------------------------------------------------------
/MNN/model/version-RFB/RFB-320.mnn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-RFB/RFB-320.mnn


--------------------------------------------------------------------------------
/MNN/model/version-slim/slim-320-quant-ADMM-50.mnn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-slim/slim-320-quant-ADMM-50.mnn


--------------------------------------------------------------------------------
/MNN/model/version-slim/slim-320.mnn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/model/version-slim/slim-320.mnn


--------------------------------------------------------------------------------
/MNN/python/README.md:
--------------------------------------------------------------------------------
 1 | # Python implemententation of [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) with [MNN](https://github.com/alibaba/MNN)
 2 | 
 3 | ## How to use MNN in Python
 4 | 
 5 | ### Install
 6 | #### Install Depencies
 7 | ##### graphviz
 8 | for macOS:
 9 | ```bash
10 | brew install graphviz
11 | ```
12 | for Linux:
13 | ```bash
14 | apt-get install graphviz
15 | ```
16 | 
17 | #### Python Version Limitation
18 | Python2.7, 3.5, 3.6, 3.7 are supported, but for Windows, python2.7 is not supported.
19 | for macOS:
20 | ```bash
21 | pip install -U MNN
22 | ```
23 | 
24 | for Linux:
25 | As PyPi requires all wheels to be tagged with "ManyLinux", and old version pip can't get the "ManyLinux" Tagged wheel, thus you have to upgrade your pip to newer version in order to use "pip install"
26 | ```bash
27 | pip install -U pip
28 | pip install -U MNN
29 | ```
30 | 
31 | ## Run
32 | * Use FP32 model(version-RFB) and run in FP16 mode:
33 | ```bash
34 | python ultraface_py_mnn.py  --model_path ../model/version-RFB/RFB-320.mnn
35 | ```
36 | * Use quantized INT8 model:
37 | ```bash
38 | python ultraface_py_mnn.py  --model_path ../model/version-RFB/RFB-320-quant-KL-5792.mnn 
39 | ```
40 | 
41 | * We provide both converted MNN FP32 and **quantized INT8** models of version-slim-320 and version-RFB-320 in ./MNN/model . The xxx-quant-KL-xxx.mnn is quantified by the **KL** method and xxx-quant-ADMM-xxx.mnn is quantified by the **ADMM** method.
42 | 
43 | 
44 | ## PS
45 | * Since MNN mainly accelerates  model inference on mobile, so the INT8 quantified model will run slower on **PC** than FP32 model in CPU mode.
46 | * If you want to run faster, try using the version-slim model ,using lower-resolution inputs like 160x120 /128x96 or using quantified models(On the mobile).
47 | 
48 | ## Result
49 | ![img1](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/MNN/result.jpg)


--------------------------------------------------------------------------------
/MNN/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/MNN/result.jpg


--------------------------------------------------------------------------------
/MNN/src/UltraFace.hpp:
--------------------------------------------------------------------------------
 1 | //  Created by Linzaer on 2019/11/15.
 2 | //  Copyright © 2019 Linzaer. All rights reserved.
 3 | 
 4 | #ifndef UltraFace_hpp
 5 | #define UltraFace_hpp
 6 | 
 7 | #pragma once
 8 | 
 9 | #include "Interpreter.hpp"
10 | 
11 | #include "MNNDefine.h"
12 | #include "Tensor.hpp"
13 | #include "ImageProcess.hpp"
14 | #include <opencv2/opencv.hpp>
15 | #include <algorithm>
16 | #include <iostream>
17 | #include <string>
18 | #include <vector>
19 | #include <memory>
20 | #include <chrono>
21 | 
22 | #define num_featuremap 4
23 | #define hard_nms 1
24 | #define blending_nms 2 /* mix nms was been proposaled in paper blaze face, aims to minimize the temporal jitter*/
25 | typedef struct FaceInfo {
26 |     float x1;
27 |     float y1;
28 |     float x2;
29 |     float y2;
30 |     float score;
31 | 
32 | } FaceInfo;
33 | 
34 | class UltraFace {
35 | public:
36 |     UltraFace(const std::string &mnn_path,
37 |               int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.7, float iou_threshold_ = 0.3,
38 |               int topk_ = -1);
39 | 
40 |     ~UltraFace();
41 | 
42 |     int detect(cv::Mat &img, std::vector<FaceInfo> &face_list);
43 | 
44 | private:
45 |     void generateBBox(std::vector<FaceInfo> &bbox_collection, MNN::Tensor *scores, MNN::Tensor *boxes);
46 | 
47 |     void nms(std::vector<FaceInfo> &input, std::vector<FaceInfo> &output, int type = blending_nms);
48 | 
49 | private:
50 | 
51 |     std::shared_ptr<MNN::Interpreter> ultraface_interpreter;
52 |     MNN::Session *ultraface_session = nullptr;
53 |     MNN::Tensor *input_tensor = nullptr;
54 | 
55 |     int num_thread;
56 |     int image_w;
57 |     int image_h;
58 | 
59 |     int in_w;
60 |     int in_h;
61 |     int num_anchors;
62 | 
63 |     float score_threshold;
64 |     float iou_threshold;
65 | 
66 | 
67 |     const float mean_vals[3] = {127, 127, 127};
68 |     const float norm_vals[3] = {1.0 / 128, 1.0 / 128, 1.0 / 128};
69 | 
70 |     const float center_variance = 0.1;
71 |     const float size_variance = 0.2;
72 |     const std::vector<std::vector<float>> min_boxes = {
73 |             {10.0f,  16.0f,  24.0f},
74 |             {32.0f,  48.0f},
75 |             {64.0f,  96.0f},
76 |             {128.0f, 192.0f, 256.0f}};
77 |     const std::vector<float> strides = {8.0, 16.0, 32.0, 64.0};
78 |     std::vector<std::vector<float>> featuremap_size;
79 |     std::vector<std::vector<float>> shrinkage_size;
80 |     std::vector<int> w_h_list;
81 | 
82 |     std::vector<std::vector<float>> priors = {};
83 | };
84 | 
85 | #endif /* UltraFace_hpp */
86 | 


--------------------------------------------------------------------------------
/MNN/src/main.cpp:
--------------------------------------------------------------------------------
 1 | //  Created by Linzaer on 2019/11/15.
 2 | //  Copyright © 2019 Linzaer. All rights reserved.
 3 | 
 4 | #include "UltraFace.hpp"
 5 | #include <iostream>
 6 | #include <opencv2/opencv.hpp>
 7 | 
 8 | using namespace std;
 9 | 
10 | int main(int argc, char **argv) {
11 |     if (argc <= 2) {
12 |         fprintf(stderr, "Usage: %s <mnn .mnn> [image files...]\n", argv[0]);
13 |         return 1;
14 |     }
15 | 
16 |     string mnn_path = argv[1];
17 |     UltraFace ultraface(mnn_path, 320, 240, 4, 0.65); // config model input
18 | 
19 |     for (int i = 2; i < argc; i++) {
20 |         string image_file = argv[i];
21 |         cout << "Processing " << image_file << endl;
22 | 
23 |         cv::Mat frame = cv::imread(image_file);
24 |         auto start = chrono::steady_clock::now();
25 |         vector<FaceInfo> face_info;
26 |         ultraface.detect(frame, face_info);
27 | 
28 |         for (auto face : face_info) {
29 |             cv::Point pt1(face.x1, face.y1);
30 |             cv::Point pt2(face.x2, face.y2);
31 |             cv::rectangle(frame, pt1, pt2, cv::Scalar(0, 255, 0), 2);
32 |         }
33 | 
34 |         auto end = chrono::steady_clock::now();
35 |         chrono::duration<double> elapsed = end - start;
36 |         cout << "all time: " << elapsed.count() << " s" << endl;
37 |         cv::imshow("UltraFace", frame);
38 |         cv::waitKey();
39 |         string result_name = "result" + to_string(i) + ".jpg";
40 |         cv::imwrite(result_name, frame);
41 |     }
42 |     return 0;
43 | }
44 | 


--------------------------------------------------------------------------------
/caffe/MyCaffe.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict, Counter
  2 | 
  3 | from caffe.proto import caffe_pb2
  4 | from google import protobuf
  5 | import six
  6 | 
  7 | def param_name_dict():
  8 |     """Find out the correspondence between layer names and parameter names."""
  9 | 
 10 |     layer = caffe_pb2.LayerParameter()
 11 |     # get all parameter names (typically underscore case) and corresponding
 12 |     # type names (typically camel case), which contain the layer names
 13 |     # (note that not all parameters correspond to layers, but we'll ignore that)
 14 |     param_names = [f.name for f in layer.DESCRIPTOR.fields if f.name.endswith('_param')]
 15 |     param_type_names = [type(getattr(layer, s)).__name__ for s in param_names]
 16 |     # strip the final '_param' or 'Parameter'
 17 |     param_names = [s[:-len('_param')] for s in param_names]
 18 |     param_type_names = [s[:-len('Parameter')] for s in param_type_names]
 19 |     return dict(zip(param_type_names, param_names))
 20 | 
 21 | def assign_proto(proto, name, val):
 22 |     """Assign a Python object to a protobuf message, based on the Python
 23 |     type (in recursive fashion). Lists become repeated fields/messages, dicts
 24 |     become messages, and other types are assigned directly. For convenience,
 25 |     repeated fields whose values are not lists are converted to single-element
 26 |     lists; e.g., `my_repeated_int_field=3` is converted to
 27 |     `my_repeated_int_field=[3]`."""
 28 | 
 29 |     is_repeated_field = hasattr(getattr(proto, name), 'extend')
 30 |     if is_repeated_field and not isinstance(val, list):
 31 |         val = [val]
 32 |     if isinstance(val, list):
 33 |         if isinstance(val[0], dict):
 34 |             for item in val:
 35 |                 proto_item = getattr(proto, name).add()
 36 |                 for k, v in six.iteritems(item):
 37 |                     assign_proto(proto_item, k, v)
 38 |         else:
 39 |             getattr(proto, name).extend(val)
 40 |     elif isinstance(val, dict):
 41 |         for k, v in six.iteritems(val):
 42 |             assign_proto(getattr(proto, name), k, v)
 43 |     else:
 44 |         setattr(proto, name, val)
 45 | 
 46 | class Function(object):
 47 |     """A Function specifies a layer, its parameters, and its inputs (which
 48 |     are Tops from other layers)."""
 49 | 
 50 |     def __init__(self, type_name, layer_name, inputs,outputs, **params):
 51 |         self.type_name = type_name
 52 |         self.inputs = inputs
 53 |         self.outputs = outputs
 54 |         self.params = params
 55 |         self.layer_name = layer_name
 56 |         self.ntop = self.params.get('ntop', 1)
 57 |         # use del to make sure kwargs are not double-processed as layer params
 58 |         if 'ntop' in self.params:
 59 |             del self.params['ntop']
 60 |         self.in_place = self.params.get('in_place', False)
 61 |         if 'in_place' in self.params:
 62 |             del self.params['in_place']
 63 |         # self.tops = tuple(Top(self, n) for n in range(self.ntop))l
 64 | 
 65 |     def _get_name(self, names, autonames):
 66 |         if self not in names and self.ntop > 0:
 67 |             names[self] = self._get_top_name(self.tops[0], names, autonames)
 68 |         elif self not in names:
 69 |             autonames[self.type_name] += 1
 70 |             names[self] = self.type_name + str(autonames[self.type_name])
 71 |         return names[self]
 72 | 
 73 |     def _get_top_name(self, top, names, autonames):
 74 |         if top not in names:
 75 |             autonames[top.fn.type_name] += 1
 76 |             names[top] = top.fn.type_name + str(autonames[top.fn.type_name])
 77 |         return names[top]
 78 | 
 79 |     def _to_proto(self):
 80 |         bottom_names = []
 81 |         for inp in self.inputs:
 82 |             # inp._to_proto(layers, names, autonames)
 83 |             bottom_names.append(inp)
 84 |         layer = caffe_pb2.LayerParameter()
 85 |         layer.type = self.type_name
 86 |         layer.bottom.extend(bottom_names)
 87 | 
 88 |         if self.in_place:
 89 |             layer.top.extend(layer.bottom)
 90 |         else:
 91 |             for top in self.outputs:
 92 |                 layer.top.append(top)
 93 |         layer.name = self.layer_name
 94 |         # print(self.type_name + "...")
 95 |         for k, v in six.iteritems(self.params):
 96 |             # special case to handle generic *params
 97 |             # print("generating "+k+"...")
 98 | 
 99 |             if k.endswith('param'):
100 |                 assign_proto(layer, k, v)
101 |             else:
102 |                 try:
103 |                     assign_proto(getattr(layer,
104 |                         _param_names[self.type_name] + '_param'), k, v)
105 |                 except (AttributeError, KeyError):
106 |                     assign_proto(layer, k, v)
107 | 
108 |         return layer
109 | 
110 | class Layers(object):
111 |     """A Layers object is a pseudo-module which generates functions that specify
112 |     layers; e.g., Layers().Convolution(bottom, kernel_size=3) will produce a Top
113 |     specifying a 3x3 convolution applied to bottom."""
114 | 
115 |     def __getattr__(self, name):
116 |         def layer_fn(*args, **kwargs):
117 |             fn = Function(name, args, kwargs)
118 |             return fn
119 |         return layer_fn
120 | 
121 | 
122 | 
123 | 
124 | _param_names = param_name_dict()
125 | 
126 | 


--------------------------------------------------------------------------------
/caffe/README.md:
--------------------------------------------------------------------------------
 1 | ## I added several operator(Transpose/Permute/Softmax) conversion support based on [onnx2caffe](https://github.com/MTlab/onnx2caffe).
 2 | # Convert pytorch to Caffe by ONNX
 3 | This tool converts [pytorch](https://github.com/pytorch/pytorch) model to [Caffe](https://github.com/BVLC/caffe) model by [ONNX](https://github.com/onnx/onnx)  
 4 | only use for inference
 5 | 
 6 | ### Dependencies
 7 | * caffe (with python support)
 8 | * pytorch 0.4+ (optional if you only want to convert onnx)
 9 | * onnx  
10 | 
11 | ### Current support operation
12 | * Conv
13 | * ConvTranspose
14 | * BatchNormalization
15 | * MaxPool
16 | * AveragePool
17 | * Relu
18 | * Sigmoid
19 | * Dropout
20 | * Gemm (InnerProduct only)
21 | * Add
22 | * Mul
23 | * Reshape
24 | * Upsample
25 | * Concat
26 | * Flatten
27 | * Transpose/Permute (new)
28 | * Softmax (new)
29 | 
30 | ## PS
31 | * You need to use [onnx-simplifier](https://github.com/daquexian/onnx-simplifier) to simplify onnx model and then run convertCaffe.py to convert it into caffe model.
32 | * You need to install [ssd-caffe](https://github.com/weiliu89/caffe/tree/ssd) and pycaffe of ssd-caffe.
33 | 


--------------------------------------------------------------------------------
/caffe/convertCaffe.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import sys
  4 | 
  5 | import caffe
  6 | from caffe.proto import caffe_pb2
  7 | import onnx
  8 | 
  9 | caffe.set_mode_cpu()
 10 | sys.path.append('../')
 11 | from onnx2caffe._transformers import ConvAddFuser, ConstantsToInitializers
 12 | from onnx2caffe._graph import Graph
 13 | 
 14 | import onnx2caffe._operators as cvt
 15 | import onnx2caffe._weightloader as wlr
 16 | from onnx2caffe._error_utils import ErrorHandling
 17 | from onnx import shape_inference
 18 | 
 19 | transformers = [
 20 |     ConstantsToInitializers(),
 21 |     ConvAddFuser(),
 22 | ]
 23 | 
 24 | 
 25 | def convertToCaffe(graph, prototxt_save_path, caffe_model_save_path):
 26 |     exist_edges = []
 27 |     layers = []
 28 |     exist_nodes = []
 29 |     err = ErrorHandling()
 30 |     for i in graph.inputs:
 31 |         edge_name = i[0]
 32 |         input_layer = cvt.make_input(i)
 33 |         layers.append(input_layer)
 34 |         exist_edges.append(i[0])
 35 |         graph.channel_dims[edge_name] = graph.shape_dict[edge_name][1]
 36 | 
 37 |     for id, node in enumerate(graph.nodes):
 38 |         node_name = node.name
 39 |         op_type = node.op_type
 40 |         inputs = node.inputs
 41 |         inputs_tensor = node.input_tensors
 42 |         input_non_exist_flag = False
 43 | 
 44 |         for inp in inputs:
 45 |             if inp not in exist_edges and inp not in inputs_tensor:
 46 |                 input_non_exist_flag = True
 47 |                 break
 48 |         if input_non_exist_flag:
 49 |             continue
 50 | 
 51 |         if op_type not in cvt._ONNX_NODE_REGISTRY:
 52 |             err.unsupported_op(node)
 53 |             continue
 54 |         converter_fn = cvt._ONNX_NODE_REGISTRY[op_type]
 55 |         layer = converter_fn(node, graph, err)
 56 |         if type(layer) == tuple:
 57 |             for l in layer:
 58 |                 layers.append(l)
 59 |         else:
 60 |             layers.append(layer)
 61 |         outs = node.outputs
 62 |         for out in outs:
 63 |             exist_edges.append(out)
 64 | 
 65 |     net = caffe_pb2.NetParameter()
 66 |     for id, layer in enumerate(layers):
 67 |         layers[id] = layer._to_proto()
 68 |     net.layer.extend(layers)
 69 | 
 70 |     with open(prototxt_save_path, 'w') as f:
 71 |         print(net, file=f)
 72 | 
 73 |     caffe.set_mode_cpu()
 74 |     deploy = prototxt_save_path
 75 |     net = caffe.Net(deploy,
 76 |                     caffe.TEST)
 77 | 
 78 |     for id, node in enumerate(graph.nodes):
 79 |         node_name = node.name
 80 |         op_type = node.op_type
 81 |         inputs = node.inputs
 82 |         inputs_tensor = node.input_tensors
 83 |         input_non_exist_flag = False
 84 |         if op_type not in wlr._ONNX_NODE_REGISTRY:
 85 |             err.unsupported_op(node)
 86 |             continue
 87 |         converter_fn = wlr._ONNX_NODE_REGISTRY[op_type]
 88 |         converter_fn(net, node, graph, err)
 89 | 
 90 |     net.save(caffe_model_save_path)
 91 |     return net
 92 | 
 93 | 
 94 | def getGraph(onnx_path):
 95 |     model = onnx.load(onnx_path)
 96 |     model = shape_inference.infer_shapes(model)
 97 |     model_graph = model.graph
 98 |     graph = Graph.from_onnx(model_graph)
 99 |     graph = graph.transformed(transformers)
100 |     graph.channel_dims = {}
101 | 
102 |     return graph
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     onnx_path = "../models/onnx/version-RFB-320_simplified.onnx"
107 |     prototxt_path = "./RFB-320.prototxt"
108 |     caffemodel_path = "./RFB-320.caffemodel"
109 |     graph = getGraph(onnx_path)
110 |     convertToCaffe(graph, prototxt_path, caffemodel_path)
111 | 


--------------------------------------------------------------------------------
/caffe/model/RFB-320/RFB-320.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/caffe/model/RFB-320/RFB-320.caffemodel


--------------------------------------------------------------------------------
/caffe/model/Slim-320/slim-320.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/caffe/model/Slim-320/slim-320.caffemodel


--------------------------------------------------------------------------------
/caffe/onnx2caffe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/caffe/onnx2caffe/__init__.py


--------------------------------------------------------------------------------
/caffe/onnx2caffe/_error_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from typing import Dict, Text, Any, Callable
 6 | from ._graph import Node, Graph
 7 | 
 8 | class ErrorHandling(object):
 9 |   '''
10 |   To handle errors and addition of custom layers
11 |   '''
12 | 
13 |   def __init__(self,
14 |                add_custom_layers = False, # type: bool
15 |                custom_conversion_functions = dict(), # type: Dict[Text, Any]
16 |                custom_layer_nodes = [], # type : List[Node]
17 |                ):
18 |       # type: (...) -> None
19 |       self.add_custom_layers = add_custom_layers
20 |       self.custom_conversion_functions = custom_conversion_functions
21 |       self.custom_layer_nodes = custom_layer_nodes
22 | 
23 | 
24 |   def unsupported_op(self,
25 |                      node,  # type: Node
26 |                     ):
27 |       # type: (...) -> Callable[[Any, Node, Graph, ErrorHandling], None]
28 |       '''
29 |       Either raise an error for an unsupported op type or return custom layer add function
30 |       '''
31 |       if self.add_custom_layers:
32 |         from ._operators import _convert_custom
33 |         return _convert_custom
34 |       else:
35 |         raise TypeError(
36 |           "ONNX node of type {} is not supported.\n".format(node.op_type,)
37 |         )
38 | 
39 | 
40 |   def unsupported_op_configuration(self,
41 |                                    node, # type: Node
42 |                                    err_message, # type: Text
43 |                                    ):
44 |       raise TypeError(
45 |         "Error while converting op of type: {}. Error message: {}\n".format(node.op_type, err_message, )
46 |       )
47 | 
48 | 
49 |   def missing_initializer(self,
50 |                           node, # type: Node
51 |                           err_message, # type: Text
52 |                           ):
53 |       # type: (...) -> None
54 |       '''
55 |       Missing initializer error
56 |       '''
57 |       raise ValueError(
58 |         "Missing initializer error in op of type {}, with input name = {}, "
59 |         "output name = {}. Error message: {}\n".
60 |         format(node.op_type, node.inputs[0], node.outputs[0], err_message)
61 |       )
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/cal_flops.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Output model complexity
 3 | """
 4 | import time
 5 | 
 6 | import torch
 7 | from torchstat import stat
 8 | from torchsummary import summary
 9 | 
10 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd
11 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd
12 | 
13 | device = "cpu"  # default cpu
14 | width = 320
15 | height = 240
16 | 
17 | # fd = create_mb_tiny_fd(2)
18 | fd = create_Mb_Tiny_RFB_fd(2)
19 | 
20 | print(fd)
21 | fd.eval()
22 | fd.to(device)
23 | x = torch.randn(1, 3, width, height).to(device)
24 | 
25 | summary(fd.to("cuda"), (3, width, height))
26 | 
27 | from ptflops import get_model_complexity_info
28 | 
29 | flops, params = get_model_complexity_info(fd.to(device), (3, width, height), print_per_layer_stat=True, as_strings=True)
30 | print("FLOPS:", flops)
31 | print("PARAMS:", params)
32 | 
33 | for i in range(5):
34 |     time_time = time.time()
35 |     features = fd(x)
36 |     print("inference time :{} s".format(time.time() - time_time))
37 | 
38 | stat(fd, (3, width, height))
39 | 


--------------------------------------------------------------------------------
/check_gt_box.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is used to check the data size distribution in the dataset.
 3 | """
 4 | import xml.etree.ElementTree as ET
 5 | from math import sqrt as sqrt
 6 | 
 7 | import cv2
 8 | import matplotlib.pyplot as plt
 9 | 
10 | # sets = [("./data/wider_face_add_lm_10_10", "trainval")]
11 | sets = [("./data/wider_face_add_lm_10_10", "test")]
12 | 
13 | classes = ['face']
14 | 
15 | if __name__ == '__main__':
16 |     width = []
17 |     height = []
18 | 
19 |     for image_set, set in sets:
20 |         image_ids = open('{}/ImageSets/Main/{}.txt'.format(image_set, set)).read().strip().split()
21 |         for image_id in image_ids:
22 |             img_path = '{}/JPEGImages/{}.jpg'.format(image_set, image_id)
23 |             label_file = open('{}/Annotations/{}.xml'.format(image_set, image_id))
24 |             tree = ET.parse(label_file)
25 |             root = tree.getroot()
26 |             size = root.find('size')
27 |             img_w = int(size.find('width').text)
28 |             img_h = int(size.find('height').text)
29 |             img = cv2.imread(img_path)
30 |             for obj in root.iter('object'):
31 |                 difficult = obj.find('difficult').text
32 |                 cls = obj.find('name').text
33 |                 if cls not in classes or int(difficult) == 2:
34 |                     continue
35 |                 cls_id = classes.index(cls)
36 | 
37 |                 xmlbox = obj.find('bndbox')
38 |                 xmin = int(xmlbox.find('xmin').text)
39 |                 ymin = int(xmlbox.find('ymin').text)
40 |                 xmax = int(xmlbox.find('xmax').text)
41 |                 ymax = int(xmlbox.find('ymax').text)
42 |                 w = xmax - xmin
43 |                 h = ymax - ymin
44 | 
45 |                 # img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 8)
46 |                 w_change = (w / img_w) * 320
47 |                 h_change = (h / img_h) * 240
48 |                 s = w_change * h_change
49 |                 if w_change / h_change > 6:
50 |                     print("{}/{}/{}/{}".format(xmin, xmax, ymin, ymax))
51 |                 width.append(sqrt(s))
52 |                 height.append(w_change / h_change)
53 |             print(img_path)
54 |             # img = cv2.resize(img, (608, 608))
55 |             # cv2.imwrite('{}_{}'.format(image_set.split('/')[-1], set), img)
56 |             # cv2.waitKey()
57 | 
58 |     plt.plot(width, height, 'ro')
59 |     plt.show()
60 | 


--------------------------------------------------------------------------------
/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is used to convert the pytorch model into an onnx format model.
 3 | """
 4 | import sys
 5 | 
 6 | import torch.onnx
 7 | 
 8 | from vision.ssd.config.fd_config import define_img_size
 9 | 
10 | input_img_size = 320  # define input size ,default optional(128/160/320/480/640/1280)
11 | define_img_size(input_img_size)
12 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd
13 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd
14 | 
15 | # net_type = "slim"  # inference faster,lower precision
16 | net_type = "RFB"  # inference lower,higher precision
17 | 
18 | label_path = "models/voc-model-labels.txt"
19 | class_names = [name.strip() for name in open(label_path).readlines()]
20 | num_classes = len(class_names)
21 | 
22 | if net_type == 'slim':
23 |     model_path = "models/pretrained/version-slim-320.pth"
24 |     # model_path = "models/pretrained/version-slim-640.pth"
25 |     net = create_mb_tiny_fd(len(class_names), is_test=True)
26 | elif net_type == 'RFB':
27 |     model_path = "models/pretrained/version-RFB-320.pth"
28 |     # model_path = "models/pretrained/version-RFB-640.pth"
29 |     net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True)
30 | 
31 | else:
32 |     print("unsupport network type.")
33 |     sys.exit(1)
34 | net.load(model_path)
35 | net.eval()
36 | net.to("cuda")
37 | 
38 | model_name = model_path.split("/")[-1].split(".")[0]
39 | model_path = f"models/onnx/{model_name}.onnx"
40 | 
41 | dummy_input = torch.randn(1, 3, 240, 320).to("cuda")
42 | # dummy_input = torch.randn(1, 3, 480, 640).to("cuda") #if input size is 640*480
43 | torch.onnx.export(net, dummy_input, model_path, verbose=False, input_names=['input'], output_names=['scores', 'boxes'])
44 | 


--------------------------------------------------------------------------------
/detect_imgs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is used to batch detect images in a folder.
 3 | """
 4 | import argparse
 5 | import os
 6 | import sys
 7 | 
 8 | import cv2
 9 | 
10 | from vision.ssd.config.fd_config import define_img_size
11 | 
12 | parser = argparse.ArgumentParser(
13 |     description='detect_imgs')
14 | 
15 | parser.add_argument('--net_type', default="RFB", type=str,
16 |                     help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
17 | parser.add_argument('--input_size', default=640, type=int,
18 |                     help='define network input size,default optional value 128/160/320/480/640/1280')
19 | parser.add_argument('--threshold', default=0.6, type=float,
20 |                     help='score threshold')
21 | parser.add_argument('--candidate_size', default=1500, type=int,
22 |                     help='nms candidate size')
23 | parser.add_argument('--path', default="imgs", type=str,
24 |                     help='imgs dir')
25 | parser.add_argument('--test_device', default="cuda:0", type=str,
26 |                     help='cuda:0 or cpu')
27 | args = parser.parse_args()
28 | define_img_size(args.input_size)  # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
29 | 
30 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
31 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
32 | 
33 | result_path = "./detect_imgs_results"
34 | label_path = "./models/voc-model-labels.txt"
35 | test_device = args.test_device
36 | 
37 | class_names = [name.strip() for name in open(label_path).readlines()]
38 | if args.net_type == 'slim':
39 |     model_path = "models/pretrained/version-slim-320.pth"
40 |     # model_path = "models/pretrained/version-slim-640.pth"
41 |     net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
42 |     predictor = create_mb_tiny_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
43 | elif args.net_type == 'RFB':
44 |     model_path = "models/pretrained/version-RFB-320.pth"
45 |     # model_path = "models/pretrained/version-RFB-640.pth"
46 |     net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
47 |     predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
48 | else:
49 |     print("The net type is wrong!")
50 |     sys.exit(1)
51 | net.load(model_path)
52 | 
53 | if not os.path.exists(result_path):
54 |     os.makedirs(result_path)
55 | listdir = os.listdir(args.path)
56 | sum = 0
57 | for file_path in listdir:
58 |     img_path = os.path.join(args.path, file_path)
59 |     orig_image = cv2.imread(img_path)
60 |     image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
61 |     boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold)
62 |     sum += boxes.size(0)
63 |     for i in range(boxes.size(0)):
64 |         box = boxes[i, :]
65 |         cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
66 |         # label = f"""{voc_dataset.class_names[labels[i]]}: {probs[i]:.2f}"""
67 |         label = f"{probs[i]:.2f}"
68 |         # cv2.putText(orig_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
69 |     cv2.putText(orig_image, str(boxes.size(0)), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
70 |     cv2.imwrite(os.path.join(result_path, file_path), orig_image)
71 |     print(f"Found {len(probs)} faces. The output image is {result_path}")
72 | print(sum)
73 | 


--------------------------------------------------------------------------------
/detect_imgs_onnx.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code uses the onnx model to detect faces from live video or cameras.
 3 | """
 4 | import os
 5 | import time
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | import onnx
10 | import vision.utils.box_utils_numpy as box_utils
11 | from caffe2.python.onnx import backend
12 | 
13 | # onnx runtime
14 | import onnxruntime as ort
15 | 
16 | 
17 | def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
18 |     boxes = boxes[0]
19 |     confidences = confidences[0]
20 |     picked_box_probs = []
21 |     picked_labels = []
22 |     for class_index in range(1, confidences.shape[1]):
23 |         probs = confidences[:, class_index]
24 |         mask = probs > prob_threshold
25 |         probs = probs[mask]
26 |         if probs.shape[0] == 0:
27 |             continue
28 |         subset_boxes = boxes[mask, :]
29 |         box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
30 |         box_probs = box_utils.hard_nms(box_probs,
31 |                                        iou_threshold=iou_threshold,
32 |                                        top_k=top_k,
33 |                                        )
34 |         picked_box_probs.append(box_probs)
35 |         picked_labels.extend([class_index] * box_probs.shape[0])
36 |     if not picked_box_probs:
37 |         return np.array([]), np.array([]), np.array([])
38 |     picked_box_probs = np.concatenate(picked_box_probs)
39 |     picked_box_probs[:, 0] *= width
40 |     picked_box_probs[:, 1] *= height
41 |     picked_box_probs[:, 2] *= width
42 |     picked_box_probs[:, 3] *= height
43 |     return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
44 | 
45 | 
46 | label_path = "models/voc-model-labels.txt"
47 | 
48 | onnx_path = "models/onnx/version-RFB-320.onnx"
49 | class_names = [name.strip() for name in open(label_path).readlines()]
50 | 
51 | predictor = onnx.load(onnx_path)
52 | onnx.checker.check_model(predictor)
53 | onnx.helper.printable_graph(predictor.graph)
54 | predictor = backend.prepare(predictor, device="CPU")  # default CPU
55 | 
56 | ort_session = ort.InferenceSession(onnx_path)
57 | input_name = ort_session.get_inputs()[0].name
58 | result_path = "./detect_imgs_results_onnx"
59 | 
60 | threshold = 0.7
61 | path = "imgs"
62 | sum = 0
63 | if not os.path.exists(result_path):
64 |     os.makedirs(result_path)
65 | listdir = os.listdir(path)
66 | sum = 0
67 | for file_path in listdir:
68 |     img_path = os.path.join(path, file_path)
69 |     orig_image = cv2.imread(img_path)
70 |     image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
71 |     image = cv2.resize(image, (320, 240))
72 |     # image = cv2.resize(image, (640, 480))
73 |     image_mean = np.array([127, 127, 127])
74 |     image = (image - image_mean) / 128
75 |     image = np.transpose(image, [2, 0, 1])
76 |     image = np.expand_dims(image, axis=0)
77 |     image = image.astype(np.float32)
78 |     # confidences, boxes = predictor.run(image)
79 |     time_time = time.time()
80 |     confidences, boxes = ort_session.run(None, {input_name: image})
81 |     print("cost time:{}".format(time.time() - time_time))
82 |     boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, threshold)
83 |     for i in range(boxes.shape[0]):
84 |         box = boxes[i, :]
85 |         label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
86 | 
87 |         cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)
88 | 
89 |         # cv2.putText(orig_image, label,
90 |         #             (box[0] + 20, box[1] + 40),
91 |         #             cv2.FONT_HERSHEY_SIMPLEX,
92 |         #             1,  # font scale
93 |         #             (255, 0, 255),
94 |         #             2)  # line type
95 |         cv2.imwrite(os.path.join(result_path, file_path), orig_image)
96 |     sum += boxes.shape[0]
97 | print("sum:{}".format(sum))
98 | 


--------------------------------------------------------------------------------
/imgs/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/1.jpg


--------------------------------------------------------------------------------
/imgs/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/10.jpg


--------------------------------------------------------------------------------
/imgs/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/11.jpg


--------------------------------------------------------------------------------
/imgs/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/12.jpg


--------------------------------------------------------------------------------
/imgs/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/13.jpg


--------------------------------------------------------------------------------
/imgs/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/15.jpg


--------------------------------------------------------------------------------
/imgs/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/16.jpg


--------------------------------------------------------------------------------
/imgs/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/17.jpg


--------------------------------------------------------------------------------
/imgs/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/18.jpg


--------------------------------------------------------------------------------
/imgs/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/19.jpg


--------------------------------------------------------------------------------
/imgs/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/2.jpg


--------------------------------------------------------------------------------
/imgs/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/20.jpg


--------------------------------------------------------------------------------
/imgs/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/21.jpg


--------------------------------------------------------------------------------
/imgs/22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/22.jpg


--------------------------------------------------------------------------------
/imgs/23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/23.jpg


--------------------------------------------------------------------------------
/imgs/24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/24.jpg


--------------------------------------------------------------------------------
/imgs/25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/25.jpg


--------------------------------------------------------------------------------
/imgs/26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/26.jpg


--------------------------------------------------------------------------------
/imgs/27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/27.jpg


--------------------------------------------------------------------------------
/imgs/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/3.jpg


--------------------------------------------------------------------------------
/imgs/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/5.jpg


--------------------------------------------------------------------------------
/imgs/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/6.jpg


--------------------------------------------------------------------------------
/imgs/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/8.jpg


--------------------------------------------------------------------------------
/imgs/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/imgs/9.jpg


--------------------------------------------------------------------------------
/masked_face/README.md:
--------------------------------------------------------------------------------
 1 | # Masked Face Detection 
 2 | 
 3 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img4.jpeg)
 4 | 
 5 | # Extending Ultra-L face model for masked facial detection
 6 | 
 7 | Ultra-L face detection model achieves great popularity in edge and client based applications. It has a surprising balance of model size and accuracy performance, e.g.,
 8 | - The default FP32 *.pth model size is **1.04~1.1MB**, and the inference framework int8 quantization size is about **300KB**.
 9 | - Only **90~109 MFlops** for 320x240 input resolution.
10 | - Supported inference code for [NCNN](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/ncnn), [MNN](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/MNN), [INT8](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/MNN/model),
11 | [Onnx](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/caffe), [OpencvDNN](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/caffe/ultra_face_opencvdnn_inference.py), etc.
12 | 
13 | COVID-19 has ravaged the world in the past two years, and wearing masks has become the norm in our lives on many occasions. However, most traditional face datasets such as Wider Face currently lack face samples with masks. Therefore, the face detection model based on conventional datasets will fail in the scenario where all attendants wear masks.
14 | [Face-Mask-Detection](https://github.com/chandrikadeb7/Face-Mask-Detection) is the most popular face detection model we can find on Github that supports Mask detection. 
15 | However, this model is trained only using 4095 images (2165 masked / 1930 without mask), which is a pretty small dataset. 
16 | We will experience many false positives in the actual application scenarios.
17 | 
18 | This original intention inspired me to build a larger dataset to provide better open-source masked facial detection models and help the world survive the pandemic.
19 | The main contribution of this project is to provide balanced facial training data combining the [wider_face_add_lm_10_10](https://drive.google.com/open?id=1OBY-Pk5hkcVBX1dRBOeLI4e4OCvqJRnH) and [MAFA face](https://imsg.ac.cn/research/maskedface.html) dataset. The [MAFA](https://imsg.ac.cn/research/maskedface.html) data was converted to pascal-VOC format and merged into the [wider_face_add_lm_10_10](https://drive.google.com/open?id=1OBY-Pk5hkcVBX1dRBOeLI4e4OCvqJRnH).
20 | 
21 | ## About the WIDER_MAFA_Balanced dataset
22 | The *Wider_MAFA_Balanced* dataset (**4.8GB**) can be downloaded at [HPI owncloud](https://owncloud.hpi.de/s/L4MUGqrpeENLbSv).
23 | It contains 38225 images in total where 31084 for training and 7141 for testing, respectively.
24 | The specific composition information is shown in the following table:
25 | 
26 | Source| Class | Train | Test |Total|
27 | ----|------|-------|------|-----
28 | MAFA face| masked_face | 15542 | 3922 | 19464 |
29 | Wider face| face | 12859 | 3219 | 16078 |
30 | *MAFA human body* | face | 2683 | 0 |2683
31 | 
32 | *MAFA human body* indicates the extracted training samples with human body occlusions.  
33 | 
34 | I use this script for converting MAFA data format to pascal VOC:
35 | ```Shell
36 | masked_face/mafa2voc.py
37 | ```
38 | 
39 | ## About the pre-trained models
40 | ```Shell
41 | masked_face/
42 |    pretrained/
43 |       RFB-320-masked_face-v2.pth   # trained with 320x240
44 |       RFB-640-masked_face-v2.pth   # trained with 640x480
45 |       RFB-640-masked_face-v2.onnx  # suitable for 640x480
46 |       RFB-1280-masked_face-v2.onnx # suitable for 1280x960
47 | ```
48 | 
49 | ## Detection Result (input resolution: 1280x960)
50 | 
51 | The following visual results are created by using this script:
52 | ```Shell
53 | masked_face/detect_imgs.py
54 | ```
55 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img1.jpeg)
56 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img2.jpeg)
57 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img3.jpg)
58 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img5.jpeg)
59 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img6.webp)
60 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img7.webp)
61 | ![img1](https://github.com/yanghaojin/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/masked_face/readme_imgs/img8.jpeg)
62 | 
63 | Author: Haojin Yang
64 | 


--------------------------------------------------------------------------------
/masked_face/detect_imgs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is used to batch detect images in a folder.
 3 | """
 4 | import os, sys
 5 | currentdir = os.path.dirname(os.path.realpath(__file__))
 6 | parentdir = os.path.dirname(currentdir)
 7 | sys.path.append(parentdir)
 8 | 
 9 | import argparse
10 | import os
11 | import sys
12 | 
13 | import cv2
14 | 
15 | from vision.ssd.config.fd_config import define_img_size
16 | 
17 | parser = argparse.ArgumentParser(
18 |     description='detect_imgs')
19 | 
20 | parser.add_argument('--net_type', default="RFB", type=str,
21 |                     help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
22 | parser.add_argument('--input_size', default=1280, type=int,
23 |                     help='define network input size,default optional value 128/160/320/480/640/1280')
24 | parser.add_argument('--threshold', default=0.3, type=float,
25 |                     help='score threshold')
26 | parser.add_argument('--candidate_size', default=1200, type=int,
27 |                     help='nms candidate size')
28 | parser.add_argument('--path', default="imgs", type=str,
29 |                     help='imgs dir')
30 | parser.add_argument('--test_device', default="cpu", type=str,
31 |                     help='cuda:0 or cpu')
32 | args = parser.parse_args()
33 | define_img_size(args.input_size)  # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
34 | 
35 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
36 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
37 | 
38 | result_path = "detect_imgs_results"
39 | label_path = "./voc-model-labels.txt"
40 | test_device = args.test_device
41 | 
42 | class_names = [name.strip() for name in open(label_path).readlines()]
43 | 
44 | if args.net_type == 'RFB':
45 |     model_path = "pretrained/RFB-640-masked_face-v2.pth"
46 |     net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
47 |     predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
48 | else:
49 |     print("The net type is wrong!")
50 |     sys.exit(1)
51 | net.load(model_path)
52 | 
53 | if not os.path.exists(result_path):
54 |     os.makedirs(result_path)
55 | listdir = os.listdir(args.path)
56 | sum = 0
57 | for file_path in listdir:
58 |     img_path = os.path.join(args.path, file_path)
59 |     orig_image = cv2.imread(img_path)
60 |     if orig_image is None: continue
61 |     image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
62 |     boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold)
63 |     sum += boxes.size(0)
64 |     for i in range(boxes.size(0)):
65 |         box = boxes[i, :]
66 |         label_index = labels[i].item()
67 |         cv2.rectangle(orig_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 165, 255), 2)
68 |         # label = f"""{voc_dataset.class_names[labels[i]]}: {probs[i]:.2f}"""
69 |         label = f"{probs[i]:.2f}"
70 |         # cv2.putText(orig_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
71 |         cv2.putText(orig_image, class_names[label_index],
72 |                     (int(box[0]), int(box[1]) - 10),
73 |                     cv2.FONT_HERSHEY_SIMPLEX,
74 |                     0.5,  # font scale
75 |                     (100, 0, 255),
76 |                     1)  # line type
77 |     cv2.putText(orig_image, str(boxes.size(0)), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
78 |     cv2.imwrite(os.path.join(result_path, file_path), orig_image)
79 |     print(f"Found {len(probs)} faces. The output image is {result_path}")
80 | print(sum)
81 | 


--------------------------------------------------------------------------------
/masked_face/imgs/img1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img1.jpeg


--------------------------------------------------------------------------------
/masked_face/imgs/img2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img2.jpeg


--------------------------------------------------------------------------------
/masked_face/imgs/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img3.jpg


--------------------------------------------------------------------------------
/masked_face/imgs/img4.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img4.jpeg


--------------------------------------------------------------------------------
/masked_face/imgs/img5.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img5.jpeg


--------------------------------------------------------------------------------
/masked_face/imgs/img6.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img6.webp


--------------------------------------------------------------------------------
/masked_face/imgs/img7.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img7.webp


--------------------------------------------------------------------------------
/masked_face/imgs/img8.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/imgs/img8.jpeg


--------------------------------------------------------------------------------
/masked_face/pretrained/RFB-1280-masked_face-v2.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-1280-masked_face-v2.onnx


--------------------------------------------------------------------------------
/masked_face/pretrained/RFB-320-masked_face-v2.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-320-masked_face-v2.pth


--------------------------------------------------------------------------------
/masked_face/pretrained/RFB-640-masked_face-v2.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-640-masked_face-v2.onnx


--------------------------------------------------------------------------------
/masked_face/pretrained/RFB-640-masked_face-v2.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/pretrained/RFB-640-masked_face-v2.pth


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img1.jpeg


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img2.jpeg


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img3.jpg


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img4.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img4.jpeg


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img5.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img5.jpeg


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img6.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img6.webp


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img7.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img7.webp


--------------------------------------------------------------------------------
/masked_face/readme_imgs/img8.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/masked_face/readme_imgs/img8.jpeg


--------------------------------------------------------------------------------
/masked_face/voc-model-labels.txt:
--------------------------------------------------------------------------------
1 | BACKGROUND
2 | face
3 | masked_face


--------------------------------------------------------------------------------
/models/onnx/version-RFB-320.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-320.onnx


--------------------------------------------------------------------------------
/models/onnx/version-RFB-320_simplified.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-320_simplified.onnx


--------------------------------------------------------------------------------
/models/onnx/version-RFB-320_without_postprocessing.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-320_without_postprocessing.onnx


--------------------------------------------------------------------------------
/models/onnx/version-RFB-640.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-RFB-640.onnx


--------------------------------------------------------------------------------
/models/onnx/version-slim-320.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-slim-320.onnx


--------------------------------------------------------------------------------
/models/onnx/version-slim-320_simplified.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-slim-320_simplified.onnx


--------------------------------------------------------------------------------
/models/onnx/version-slim-320_without_postprocessing.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/onnx/version-slim-320_without_postprocessing.onnx


--------------------------------------------------------------------------------
/models/pretrained/version-RFB-320.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-RFB-320.pth


--------------------------------------------------------------------------------
/models/pretrained/version-RFB-640.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-RFB-640.pth


--------------------------------------------------------------------------------
/models/pretrained/version-slim-320.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-slim-320.pth


--------------------------------------------------------------------------------
/models/pretrained/version-slim-640.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/models/pretrained/version-slim-640.pth


--------------------------------------------------------------------------------
/models/readme:
--------------------------------------------------------------------------------
 1 | A place to store your models.
 2 | 
 3 | ./pretrained:
 4 |     version-RFB-320.pth  : train with img input size 320x240
 5 |     version-RFB-640.pth  : train with img input size 640x480
 6 |     version-slim-320.pth : train with img input size 320x240
 7 |     version-slim-640.pth : train with img input size 640x480
 8 | ./onnx
 9 |     version-RFB-320.onnx                         :version-RFB / train with 320x240/ with    postprocessing
10 |     version-RFB-320_simplified.onnx              :version-RFB / train with 320x240/ without postprocessing /for ncnn & mnn
11 |     version-RFB-320_without_postprocessing.onnx  :version-RFB / train with 320x240/ without postprocessing
12 |     version-slim-320.onnx                        :version-slim/ train with 320x240/ with    postprocessing
13 |     version-slim-320_simplified.onnx             :version-slim/ train with 320x240/ without postprocessing /for ncnn & mnn
14 |     version-slim-320_without_postprocessing.onnx :version-slim/ train with 320x240/ without postprocessing


--------------------------------------------------------------------------------
/models/voc-model-labels.txt:
--------------------------------------------------------------------------------
1 | BACKGROUND
2 | face


--------------------------------------------------------------------------------
/ncnn/.clang-format:
--------------------------------------------------------------------------------
1 | ﻿---
2 | BasedOnStyle: LLVM
3 | BreakBeforeBraces: Linux
4 | IndentWidth: '4'
5 | 
6 | ...
7 | 


--------------------------------------------------------------------------------
/ncnn/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_CXX_FLAGS "-Wall")
 4 | set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG")
 5 | set(CMAKE_CXX_FLAGS_DEBUG "-g")
 6 | set(CMAKE_CXX_STANDARD 11)
 7 | 
 8 | if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 9 |     message(STATUS "No build type selected, default to Release")
10 |     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Debug)" FORCE)
11 | endif()
12 | 
13 | project(Ultra-Light-Fast-Generic-Face-Detector-1MB)
14 | 
15 | add_subdirectory(3rdparty/ncnn ncnn_build)
16 | include_directories(3rdparty/ncnn/src)
17 | 
18 | find_package(OpenCV REQUIRED)
19 | 
20 | add_executable(main src/main.cpp src/UltraFace.cpp)
21 | target_link_libraries(main ncnn ${OpenCV_LIBS})
22 | 


--------------------------------------------------------------------------------
/ncnn/README.md:
--------------------------------------------------------------------------------
 1 | # C++ implemententation of [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) with [NCNN](https://github.com/Tencent/ncnn)
 2 | 
 3 | ## Build
 4 | 
 5 | ```bash
 6 | git clone --recursive --depth=1 https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
 7 | 
 8 | cd Ultra-Light-Fast-Generic-Face-Detector-1MB/ncnn
 9 | 
10 | mkdir build && cd build && cmake ..
11 | make -j$(nproc)
12 | ```
13 | 
14 | ## Run
15 | 
16 | ```bash
17 | ./main ../data/version-RFB/RFB-320.bin ../data/version-RFB/RFB-320.param ../data/test.jpg
18 | ```
19 | * We provide converted NCNN models of version-slim-320 and version-RFB-320 in ./ncnn/data .
20 | 
21 | ## How to convert pretrained model to ncnn
22 | 
23 | * Code bellow (```vision/ssd/ssd.py```) should be commented out when convert pytorch pretrained model to onnx. Comment it out and use the **convert_to_onnx.py** in official repo to finish this step.
24 | 
25 | ```python
26 | if self.is_test:
27 |     confidences = F.softmax(confidences, dim=2)
28 |     boxes = locations # this line should be added.
29 |     #boxes = box_utils.convert_locations_to_boxes(
30 |     #    locations, self.priors, self.config.center_variance, self.config.size_variance
31 |     #)
32 |     # boxes = box_utils.center_form_to_corner_form(boxes) # these lines should be commented out. detail information and analyze comming soon.
33 |     return confidences, boxes
34 | else:
35 |     return confidences, locations
36 | ```
37 | Then you can generate the onnx model like **version-RFB-320_without_postprocessing.onnx** in onnx directory. (You need to rename your model when convert.)
38 | * But the exported onnx model may contains many redundant operators such as Shape, Gather and Unsqueeze that is not supported in ncnn.
39 | 
40 | ```
41 | Shape not supported yet!
42 | Gather not supported yet!
43 |   # axis=0
44 | Unsqueeze not supported yet!
45 |   # axes 7
46 | Unsqueeze not supported yet!
47 |   # axes 7
48 | ```
49 | 
50 | Fortunately, we can use this tool to eliminate them :
51 | https://github.com/daquexian/onnx-simplifier
52 | 
53 | ```
54 | python3 -m onnxsim  version-RFB-320_without_postprocessing.onnx version-RFB-320_simplified.onnx
55 | 
56 | ```
57 | 
58 | Next, you can convert this onnx model like **version-RFB-320_simplified.onnx** into a ncnn model. Here is a website for online conversion : https://convertmodel.com/?tdsourcetag=s_pctim_aiomsg. You can also use the NCNN compiled conversion tool **onnx2ncnn**.
59 | 
60 | ## PS
61 | * If you want to run faster, try using the version-slim model or using lower-resolution inputs like 160x120 or 128x96.
62 | 
63 | ## Result
64 | ![img1](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/ncnn/data/result.jpg)


--------------------------------------------------------------------------------
/ncnn/data/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/result.jpg


--------------------------------------------------------------------------------
/ncnn/data/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/test.jpg


--------------------------------------------------------------------------------
/ncnn/data/version-RFB/RFB-320.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/version-RFB/RFB-320.bin


--------------------------------------------------------------------------------
/ncnn/data/version-slim/slim_320.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/ncnn/data/version-slim/slim_320.bin


--------------------------------------------------------------------------------
/ncnn/src/UltraFace.hpp:
--------------------------------------------------------------------------------
 1 | //
 2 | //  UltraFace.hpp
 3 | //  UltraFaceTest
 4 | //
 5 | //  Created by vealocia on 2019/10/17.
 6 | //  Copyright © 2019 vealocia. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef UltraFace_hpp
10 | #define UltraFace_hpp
11 | 
12 | #pragma once
13 | 
14 | #include "gpu.h"
15 | #include "net.h"
16 | #include <algorithm>
17 | #include <iostream>
18 | #include <string>
19 | #include <vector>
20 | 
21 | #define num_featuremap 4
22 | #define hard_nms 1
23 | #define blending_nms 2 /* mix nms was been proposaled in paper blaze face, aims to minimize the temporal jitter*/
24 | 
25 | typedef struct FaceInfo {
26 |     float x1;
27 |     float y1;
28 |     float x2;
29 |     float y2;
30 |     float score;
31 | 
32 |     float *landmarks;
33 | } FaceInfo;
34 | 
35 | class UltraFace {
36 | public:
37 |     UltraFace(const std::string &bin_path, const std::string &param_path,
38 |               int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.7, float iou_threshold_ = 0.3, int topk_ = -1);
39 | 
40 |     ~UltraFace();
41 | 
42 |     int detect(ncnn::Mat &img, std::vector<FaceInfo> &face_list);
43 | 
44 | private:
45 |     void generateBBox(std::vector<FaceInfo> &bbox_collection, ncnn::Mat scores, ncnn::Mat boxes, float score_threshold, int num_anchors);
46 | 
47 |     void nms(std::vector<FaceInfo> &input, std::vector<FaceInfo> &output, int type = blending_nms);
48 | 
49 | private:
50 |     ncnn::Net ultraface;
51 | 
52 |     int num_thread;
53 |     int image_w;
54 |     int image_h;
55 | 
56 |     int in_w;
57 |     int in_h;
58 |     int num_anchors;
59 | 
60 |     int topk;
61 |     float score_threshold;
62 |     float iou_threshold;
63 | 
64 | 
65 |     const float mean_vals[3] = {127, 127, 127};
66 |     const float norm_vals[3] = {1.0 / 128, 1.0 / 128, 1.0 / 128};
67 | 
68 |     const float center_variance = 0.1;
69 |     const float size_variance = 0.2;
70 |     const std::vector<std::vector<float>> min_boxes = {
71 |             {10.0f,  16.0f,  24.0f},
72 |             {32.0f,  48.0f},
73 |             {64.0f,  96.0f},
74 |             {128.0f, 192.0f, 256.0f}};
75 |     const std::vector<float> strides = {8.0, 16.0, 32.0, 64.0};
76 |     std::vector<std::vector<float>> featuremap_size;
77 |     std::vector<std::vector<float>> shrinkage_size;
78 |     std::vector<int> w_h_list;
79 | 
80 |     std::vector<std::vector<float>> priors = {};
81 | };
82 | 
83 | #endif /* UltraFace_hpp */
84 | 


--------------------------------------------------------------------------------
/ncnn/src/main.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | //  main.cpp
 3 | //  UltraFaceTest
 4 | //
 5 | //  Created by vealocia on 2019/10/17.
 6 | //  Copyright © 2019 vealocia. All rights reserved.
 7 | //
 8 | 
 9 | #include "UltraFace.hpp"
10 | #include <iostream>
11 | #include <opencv2/opencv.hpp>
12 | 
13 | int main(int argc, char **argv) {
14 |     if (argc <= 3) {
15 |         fprintf(stderr, "Usage: %s <ncnn bin> <ncnn param> [image files...]\n", argv[0]);
16 |         return 1;
17 |     }
18 | 
19 |     std::string bin_path = argv[1];
20 |     std::string param_path = argv[2];
21 |     UltraFace ultraface(bin_path, param_path, 320, 240, 1, 0.7); // config model input
22 | 
23 |     for (int i = 3; i < argc; i++) {
24 |         std::string image_file = argv[i];
25 |         std::cout << "Processing " << image_file << std::endl;
26 | 
27 |         cv::Mat frame = cv::imread(image_file);
28 |         ncnn::Mat inmat = ncnn::Mat::from_pixels(frame.data, ncnn::Mat::PIXEL_BGR2RGB, frame.cols, frame.rows);
29 | 
30 |         std::vector<FaceInfo> face_info;
31 |         ultraface.detect(inmat, face_info);
32 | 
33 |         for (int i = 0; i < face_info.size(); i++) {
34 |             auto face = face_info[i];
35 |             cv::Point pt1(face.x1, face.y1);
36 |             cv::Point pt2(face.x2, face.y2);
37 |             cv::rectangle(frame, pt1, pt2, cv::Scalar(0, 255, 0), 2);
38 |         }
39 | 
40 |         cv::imshow("UltraFace", frame);
41 |         cv::waitKey();
42 |         cv::imwrite("result.jpg", frame);
43 |     }
44 |     return 0;
45 | }
46 | 


--------------------------------------------------------------------------------
/opencv_dnn/cv_dnn_ultraface.h:
--------------------------------------------------------------------------------
 1 | #ifndef UltraFace_hpp
 2 | #define UltraFace_hpp
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <algorithm>
 7 | #include <iostream>
 8 | #include <string>
 9 | #include <vector>
10 | #include "opencv2/opencv.hpp"
11 | 
12 | #define num_featuremap 4
13 | #define hard_nms 1
14 | #define blending_nms 2 /* mix nms was been proposaled in paper blaze face, aims to minimize the temporal jitter*/
15 | 
16 | typedef struct FaceInfo {
17 | 	float x1;
18 | 	float y1;
19 | 	float x2;
20 | 	float y2;
21 | 	float score;
22 | 
23 | 	float landmarks[10];
24 | } FaceInfo;
25 | 
26 | class UltraFace {
27 | public:
28 | 	UltraFace(const std::string model_path,
29 | 		int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.7, float iou_threshold_ = 0.3, int topk_ = -1);
30 | 
31 | 	~UltraFace();
32 | 
33 | 	int detect(cv::Mat &img, std::vector<FaceInfo> &face_list);
34 | 
35 | private:
36 | 	void generateBBox(std::vector<FaceInfo> &bbox_collection, cv::Mat scores, cv::Mat boxes, float score_threshold, int num_anchors);
37 | 
38 | 	void nms(std::vector<FaceInfo> &input, std::vector<FaceInfo> &output, int type = blending_nms);
39 | 
40 | private:
41 | 	cv::dnn::Net ultraface;
42 | 
43 | 	int num_thread;
44 | 	int image_w;
45 | 	int image_h;
46 | 
47 | 	int in_w;
48 | 	int in_h;
49 | 	int num_anchors;
50 | 
51 | 	int topk;
52 | 	float score_threshold;
53 | 	float iou_threshold;
54 | 
55 | 
56 | 	const float mean_vals[3] = { 127, 127, 127 };
57 | 	const float norm_vals[3] = { 1.0 / 128, 1.0 / 128, 1.0 / 128 };
58 | 
59 | 	const float center_variance = 0.1;
60 | 	const float size_variance = 0.2;
61 | 	const std::vector<std::vector<float>> min_boxes = {
62 | 			{10.0f,  16.0f,  24.0f},
63 | 			{32.0f,  48.0f},
64 | 			{64.0f,  96.0f},
65 | 			{128.0f, 192.0f, 256.0f} };
66 | 	const std::vector<float> strides = { 8.0, 16.0, 32.0, 64.0 };
67 | 	std::vector<std::vector<float>> featuremap_size;
68 | 	std::vector<std::vector<float>> shrinkage_size;
69 | 	std::vector<int> w_h_list;
70 | 
71 | 	std::vector<std::vector<float>> priors = {};
72 | };
73 | 
74 | #endif /* UltraFace_hpp */


--------------------------------------------------------------------------------
/paddle/data:
--------------------------------------------------------------------------------
1 | ../data


--------------------------------------------------------------------------------
/paddle/train-version-RFB.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | model_root_path="./models/train-version-RFB"
 3 | log_dir="$model_root_path/logs"
 4 | log="$log_dir/log"
 5 | mkdir -p "$log_dir"
 6 | 
 7 | python3 -u train.py \
 8 |   --datasets \
 9 |   ./data/wider_face_add_lm_10_10 \
10 |   --validation_dataset \
11 |   ./data/wider_face_add_lm_10_10 \
12 |   --net \
13 |   RFB \
14 |   --num_epochs \
15 |   200 \
16 |   --milestones \
17 |   "95,150" \
18 |   --lr \
19 |   1e-2 \
20 |   --batch_size \
21 |   24 \
22 |   --input_size \
23 |   320 \
24 |   --checkpoint_folder \
25 |   ${model_root_path} \
26 |   --num_workers \
27 |   4 \
28 |   --log_dir \
29 |   ${log_dir} \
30 |   --cuda_index \
31 |   0 \
32 |   2>&1 | tee "$log"
33 | 


--------------------------------------------------------------------------------
/paddle/train-version-slim.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | model_root_path="./models/train-version-slim"
 3 | log_dir="$model_root_path/logs"
 4 | log="$log_dir/log"
 5 | mkdir -p "$log_dir"
 6 | 
 7 | python3 -u train.py \
 8 |   --datasets \
 9 |   ./data/wider_face_add_lm_10_10 \
10 |   --validation_dataset \
11 |   ./data/wider_face_add_lm_10_10 \
12 |   --net \
13 |   slim \
14 |   --num_epochs \
15 |   200 \
16 |   --milestones \
17 |   "95,150" \
18 |   --lr \
19 |   1e-2 \
20 |   --batch_size \
21 |   24 \
22 |   --input_size \
23 |   320 \
24 |   --checkpoint_folder \
25 |   ${model_root_path} \
26 |   --num_workers \
27 |   4 \
28 |   --log_dir \
29 |   ${log_dir} \
30 |   --cuda_index \
31 |   0 \
32 |   2>&1 | tee "$log"
33 | 


--------------------------------------------------------------------------------
/paddle/vision/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/__init__.py


--------------------------------------------------------------------------------
/paddle/vision/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/datasets/__init__.py


--------------------------------------------------------------------------------
/paddle/vision/datasets/voc_dataset.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pathlib
  4 | import xml.etree.ElementTree as ET
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | from paddle.io import Dataset
  9 | 
 10 | class VOCDataset(Dataset):
 11 |     def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False):
 12 |         """Dataset for VOC data.
 13 |         Args:
 14 |             root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
 15 |                 Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
 16 |         """
 17 |         super(VOCDataset, self).__init__()
 18 |         self.root = pathlib.Path(root)
 19 |         self.transform = transform
 20 |         self.target_transform = target_transform
 21 |         if is_test:
 22 |             image_sets_file = self.root / "ImageSets/Main/test.txt"
 23 |         else:
 24 |             image_sets_file = self.root / "ImageSets/Main/trainval.txt"
 25 |         self.ids = VOCDataset._read_image_ids(image_sets_file)
 26 |         self.keep_difficult = keep_difficult
 27 | 
 28 |         # if the labels file exists, read in the class names
 29 |         label_file_name = self.root / "labels.txt"
 30 | 
 31 |         if os.path.isfile(label_file_name):
 32 |             class_string = ""
 33 |             with open(label_file_name, 'r') as infile:
 34 |                 for line in infile:
 35 |                     class_string += line.rstrip()
 36 | 
 37 |             # classes should be a comma separated list
 38 |             classes = class_string.split(',')
 39 |             # prepend BACKGROUND as first class
 40 |             classes.insert(0, 'BACKGROUND')
 41 |             classes = [elem.replace(" ", "") for elem in classes]
 42 |             self.class_names = tuple(classes)
 43 |             logging.info("VOC Labels read from file: " + str(self.class_names))
 44 | 
 45 |         else:
 46 |             logging.info("No labels file, using default VOC classes.")
 47 |             self.class_names = ('BACKGROUND',
 48 |                                 'face')
 49 | 
 50 |         self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
 51 | 
 52 |     def __getitem__(self, index):
 53 |         image_id = self.ids[index]
 54 |         boxes, labels, is_difficult = self._get_annotation(image_id)
 55 |         if not self.keep_difficult:
 56 |             boxes = boxes[is_difficult == 0]
 57 |             labels = labels[is_difficult == 0]
 58 |         image = self._read_image(image_id)
 59 |         if self.transform:
 60 |             image, boxes, labels = self.transform(image, boxes, labels)
 61 |         if self.target_transform:
 62 |             boxes, labels = self.target_transform(boxes, labels)
 63 |         return image, boxes, labels
 64 | 
 65 |     def get_image(self, index):
 66 |         image_id = self.ids[index]
 67 |         image = self._read_image(image_id)
 68 |         if self.transform:
 69 |             image, _ = self.transform(image)
 70 |         return image
 71 | 
 72 |     def get_annotation(self, index):
 73 |         image_id = self.ids[index]
 74 |         return image_id, self._get_annotation(image_id)
 75 | 
 76 |     def __len__(self):
 77 |         return len(self.ids)
 78 | 
 79 |     @staticmethod
 80 |     def _read_image_ids(image_sets_file):
 81 |         ids = []
 82 |         with open(image_sets_file) as f:
 83 |             for line in f:
 84 |                 ids.append(line.rstrip())
 85 |         return ids
 86 | 
 87 |     def _get_annotation(self, image_id):
 88 |         annotation_file = self.root / f"Annotations/{image_id}.xml"
 89 |         objects = ET.parse(annotation_file).findall("object")
 90 |         boxes = []
 91 |         labels = []
 92 |         is_difficult = []
 93 |         for object in objects:
 94 |             class_name = object.find('name').text.lower().strip()
 95 |             # we're only concerned with clases in our list
 96 |             if class_name in self.class_dict:
 97 |                 bbox = object.find('bndbox')
 98 | 
 99 |                 # VOC dataset format follows Matlab, in which indexes start from 0
100 |                 x1 = float(bbox.find('xmin').text) - 1
101 |                 y1 = float(bbox.find('ymin').text) - 1
102 |                 x2 = float(bbox.find('xmax').text) - 1
103 |                 y2 = float(bbox.find('ymax').text) - 1
104 |                 boxes.append([x1, y1, x2, y2])
105 | 
106 |                 labels.append(self.class_dict[class_name])
107 |                 is_difficult_str = object.find('difficult').text
108 |                 is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
109 | 
110 |         return (np.array(boxes, dtype=np.float32),
111 |                 np.array(labels, dtype=np.int64),
112 |                 np.array(is_difficult, dtype=np.uint8))
113 | 
114 |     def _read_image(self, image_id):
115 |         image_file = self.root / f"JPEGImages/{image_id}.jpg"
116 |         image = cv2.imread(str(image_file))
117 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
118 |         return image
119 | 


--------------------------------------------------------------------------------
/paddle/vision/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/nn/__init__.py


--------------------------------------------------------------------------------
/paddle/vision/nn/mb_tiny.py:
--------------------------------------------------------------------------------
 1 | import paddle.nn as nn
 2 | import paddle.nn.functional as F
 3 | 
 4 | 
 5 | class Mb_Tiny(nn.Layer):
 6 | 
 7 |     def __init__(self, num_classes=2):
 8 |         super(Mb_Tiny, self).__init__()
 9 |         self.base_channel = 8 * 2
10 | 
11 |         def conv_bn(inp, oup, stride):
12 |             return nn.Sequential(
13 |                 nn.Conv2D(inp, oup, 3, stride, 1, bias_attr=None),
14 |                 nn.BatchNorm2D(oup),
15 |                 nn.ReLU()
16 |             )
17 | 
18 |         def conv_dw(inp, oup, stride):
19 |             return nn.Sequential(
20 |                 nn.Conv2D(inp, inp, 3, stride, 1, groups=inp, bias_attr=None),
21 |                 nn.BatchNorm2D(inp),
22 |                 nn.ReLU(),
23 | 
24 |                 nn.Conv2D(inp, oup, 1, 1, 0, bias_attr=None),
25 |                 nn.BatchNorm2D(oup),
26 |                 nn.ReLU(),
27 |             )
28 | 
29 |         self.model = nn.Sequential(
30 |             conv_bn(3, self.base_channel, 2),  # 160*120
31 |             conv_dw(self.base_channel, self.base_channel * 2, 1),
32 |             conv_dw(self.base_channel * 2, self.base_channel * 2, 2),  # 80*60
33 |             conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
34 |             conv_dw(self.base_channel * 2, self.base_channel * 4, 2),  # 40*30
35 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
36 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
37 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
38 |             conv_dw(self.base_channel * 4, self.base_channel * 8, 2),  # 20*15
39 |             conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
40 |             conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
41 |             conv_dw(self.base_channel * 8, self.base_channel * 16, 2),  # 10*8
42 |             conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
43 |         )
44 |         self.fc = nn.Linear(1024, num_classes)
45 | 
46 |     def forward(self, x):
47 |         x = self.model(x)
48 |         x = F.avg_pool2d(x, 7)
49 |         x = x.view(-1, 1024)
50 |         x = self.fc(x)
51 |         return x
52 | 


--------------------------------------------------------------------------------
/paddle/vision/nn/multibox_loss.py:
--------------------------------------------------------------------------------
 1 | import paddle
 2 | import paddle.nn as nn
 3 | import paddle.nn.functional as F
 4 | 
 5 | from ..utils import box_utils
 6 | 
 7 | 
 8 | # class MultiboxLoss(object):
 9 | class MultiboxLoss(nn.Layer):
10 |     def __init__(self, priors, neg_pos_ratio, center_variance, size_variance):
11 |         """Implement SSD Multibox Loss.
12 | 
13 |         Basically, Multibox loss combines classification loss
14 |          and Smooth L1 regression loss.
15 |         """
16 |         super(MultiboxLoss, self).__init__()
17 |         self.neg_pos_ratio = neg_pos_ratio
18 |         self.center_variance = center_variance
19 |         self.size_variance = size_variance
20 |         self.priors = priors
21 | 
22 |     def forward(self, confidence, predicted_locations, labels, gt_locations):
23 |         """Compute classification loss and smooth l1 loss.
24 | 
25 |         Args:
26 |             confidence (batch_size, num_priors, num_classes): class predictions.
27 |             locations (batch_size, num_priors, 4): predicted locations.
28 |             labels (batch_size, num_priors): real labels of all the priors.
29 |             boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
30 |         """
31 |         num_classes = confidence.shape[2]
32 |         with paddle.no_grad():
33 |             # derived from cross_entropy=sum(log(p))
34 |             loss = -F.log_softmax(confidence, 2)[:, :, 0]
35 |             mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
36 | 
37 |         confidence = paddle.concat([confidence[:, :, 0].masked_select(mask).reshape([-1, 1]),
38 |                                     confidence[:, :,1].masked_select(mask).reshape([-1, 1])], axis=1)
39 |         classification_loss = F.cross_entropy(confidence.reshape([-1, num_classes]), labels.masked_select(mask), reduction='sum')
40 |         pos_mask = labels > 0
41 |         predicted_locations = predicted_locations.masked_select(paddle.concat([pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1])], axis=2)).reshape([-1, 4])
42 |         gt_locations = gt_locations.masked_select(paddle.concat([pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1]), pos_mask.reshape(pos_mask.shape+[1])], axis=2)).reshape([-1, 4])
43 |         smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations.cast('float32'), reduction='sum')  # smooth_l1_loss
44 |         # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum')  #l2 loss
45 |         num_pos = gt_locations.shape[0]
46 |         return smooth_l1_loss / num_pos, classification_loss / num_pos
47 | 


--------------------------------------------------------------------------------
/paddle/vision/ssd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/ssd/__init__.py


--------------------------------------------------------------------------------
/paddle/vision/ssd/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/ssd/config/__init__.py


--------------------------------------------------------------------------------
/paddle/vision/ssd/config/fd_config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from vision.utils.box_utils import generate_priors
 4 | 
 5 | image_mean_test = image_mean = np.array([127, 127, 127])
 6 | image_std = 128.0
 7 | iou_threshold = 0.3
 8 | center_variance = 0.1
 9 | size_variance = 0.2
10 | 
11 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
12 | shrinkage_list = []
13 | image_size = [320, 240]  # default input size 320*240
14 | feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]]  # default feature map size
15 | priors = []
16 | 
17 | 
18 | def define_img_size(size):
19 |     global image_size, feature_map_w_h_list, priors
20 |     img_size_dict = {128: [128, 96],
21 |                      160: [160, 120],
22 |                      320: [320, 240],
23 |                      480: [480, 360],
24 |                      640: [640, 480],
25 |                      1280: [1280, 960]}
26 |     image_size = img_size_dict[size]
27 | 
28 |     feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]],
29 |                                  160: [[20, 10, 5, 3], [15, 8, 4, 2]],
30 |                                  320: [[40, 20, 10, 5], [30, 15, 8, 4]],
31 |                                  480: [[60, 30, 15, 8], [45, 23, 12, 6]],
32 |                                  640: [[80, 40, 20, 10], [60, 30, 15, 8]],
33 |                                  1280: [[160, 80, 40, 20], [120, 60, 30, 15]]}
34 |     feature_map_w_h_list = feature_map_w_h_list_dict[size]
35 | 
36 |     for i in range(0, len(image_size)):
37 |         item_list = []
38 |         for k in range(0, len(feature_map_w_h_list[i])):
39 |             item_list.append(image_size[i] / feature_map_w_h_list[i][k])
40 |         shrinkage_list.append(item_list)
41 |     priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes)
42 | 


--------------------------------------------------------------------------------
/paddle/vision/ssd/data_preprocessing.py:
--------------------------------------------------------------------------------
 1 | from ..transforms.transforms import *
 2 | 
 3 | 
 4 | class TrainAugmentation:
 5 |     def __init__(self, size, mean=0, std=1.0):
 6 |         """
 7 |         Args:
 8 |             size: the size the of final image.
 9 |             mean: mean pixel value per channel.
10 |         """
11 |         self.mean = mean
12 |         self.size = size
13 |         self.augment = Compose([
14 |             ConvertFromInts(),
15 |             PhotometricDistort(),
16 |             RandomSampleCrop_v2(),
17 |             RandomMirror(),
18 |             ToPercentCoords(),
19 |             Resize(self.size),
20 |             SubtractMeans(self.mean),
21 |             lambda img, boxes=None, labels=None: (img / std, boxes, labels),
22 |             ToTensor(),
23 |         ])
24 | 
25 |     def __call__(self, img, boxes, labels):
26 |         """
27 | 
28 |         Args:
29 |             img: the output of cv.imread in RGB layout.
30 |             boxes: boundding boxes in the form of (x1, y1, x2, y2).
31 |             labels: labels of boxes.
32 |         """
33 |         return self.augment(img, boxes, labels)
34 | 
35 | 
36 | class TestTransform:
37 |     def __init__(self, size, mean=0.0, std=1.0):
38 |         self.transform = Compose([
39 |             ToPercentCoords(),
40 |             Resize(size),
41 |             SubtractMeans(mean),
42 |             lambda img, boxes=None, labels=None: (img / std, boxes, labels),
43 |             ToTensor(),
44 |         ])
45 | 
46 |     def __call__(self, image, boxes, labels):
47 |         return self.transform(image, boxes, labels)
48 | 
49 | 
50 | class PredictionTransform:
51 |     def __init__(self, size, mean=0.0, std=1.0):
52 |         self.transform = Compose([
53 |             Resize(size),
54 |             SubtractMeans(mean),
55 |             lambda img, boxes=None, labels=None: (img / std, boxes, labels),
56 |             ToTensor()
57 |         ])
58 | 
59 |     def __call__(self, image):
60 |         image, _, _ = self.transform(image)
61 |         return image
62 | 


--------------------------------------------------------------------------------
/paddle/vision/ssd/mb_tiny_RFB_fd.py:
--------------------------------------------------------------------------------
 1 | from paddle.nn import Conv2D, Sequential, LayerList, ReLU
 2 | 
 3 | from vision.nn.mb_tiny_RFB import Mb_Tiny_RFB
 4 | from vision.ssd.config import fd_config as config
 5 | from vision.ssd.predictor import Predictor
 6 | from vision.ssd.ssd import SSD
 7 | 
 8 | 
 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
10 |     """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
11 |     """
12 |     return Sequential(
13 |         Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
14 |                groups=in_channels, stride=stride, padding=padding),
15 |         ReLU(),
16 |         Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
17 |     )
18 | 
19 | 
20 | def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"):
21 |     base_net = Mb_Tiny_RFB(2)
22 |     base_net_model = base_net.model  # disable dropout layer
23 | 
24 |     source_layer_indexes = [
25 |         8,
26 |         11,
27 |         13
28 |     ]
29 |     extras = LayerList([
30 |         Sequential(
31 |             Conv2D(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
32 |             ReLU(),
33 |             SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
34 |             ReLU()
35 |         )
36 |     ])
37 | 
38 |     regression_headers = LayerList([
39 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
40 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
41 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
42 |         Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
43 |     ])
44 | 
45 |     classification_headers = LayerList([
46 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
47 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
48 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
49 |         Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
50 |     ])
51 | 
52 |     return SSD(num_classes, base_net_model, source_layer_indexes,
53 |                extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
54 | 
55 | 
56 | def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
57 |     predictor = Predictor(net, config.image_size, config.image_mean_test,
58 |                           config.image_std,
59 |                           nms_method=nms_method,
60 |                           iou_threshold=config.iou_threshold,
61 |                           candidate_size=candidate_size,
62 |                           sigma=sigma,
63 |                           device=device)
64 |     return predictor
65 | 


--------------------------------------------------------------------------------
/paddle/vision/ssd/mb_tiny_fd.py:
--------------------------------------------------------------------------------
 1 | from paddle.nn import Conv2D, Sequential, LayerList, ReLU
 2 | 
 3 | from vision.nn.mb_tiny import Mb_Tiny
 4 | from vision.ssd.config import fd_config as config
 5 | from vision.ssd.predictor import Predictor
 6 | from vision.ssd.ssd import SSD
 7 | 
 8 | 
 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
10 |     """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
11 |     """
12 |     return Sequential(
13 |         Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
14 |                groups=in_channels, stride=stride, padding=padding),
15 |         ReLU(),
16 |         Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
17 |     )
18 | 
19 | 
20 | def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"):
21 |     base_net = Mb_Tiny(2)
22 |     base_net_model = base_net.model  # disable dropout layer
23 | 
24 |     source_layer_indexes = [
25 |         8,
26 |         11,
27 |         13
28 |     ]
29 |     extras = LayerList([
30 |         Sequential(
31 |             Conv2D(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
32 |             ReLU(),
33 |             SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
34 |             ReLU()
35 |         )
36 |     ])
37 | 
38 |     regression_headers = LayerList([
39 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
40 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
41 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
42 |         Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
43 |     ])
44 | 
45 |     classification_headers = LayerList([
46 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
47 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
48 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
49 |         Conv2D(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
50 |     ])
51 | 
52 |     return SSD(num_classes, base_net_model, source_layer_indexes,
53 |                extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
54 | 
55 | 
56 | def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
57 |     predictor = Predictor(net, config.image_size, config.image_mean_test,
58 |                           config.image_std,
59 |                           nms_method=nms_method,
60 |                           iou_threshold=config.iou_threshold,
61 |                           candidate_size=candidate_size,
62 |                           sigma=sigma,
63 |                           device=device)
64 |     return predictor
65 | 


--------------------------------------------------------------------------------
/paddle/vision/ssd/predictor.py:
--------------------------------------------------------------------------------
 1 | import paddle
 2 | 
 3 | from ..utils import box_utils
 4 | from .data_preprocessing import PredictionTransform
 5 | from ..utils.misc import Timer
 6 | 
 7 | 
 8 | class Predictor:
 9 |     def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None,
10 |                  iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None):
11 |         self.net = net
12 |         self.transform = PredictionTransform(size, mean, std)
13 |         self.iou_threshold = iou_threshold
14 |         self.filter_threshold = filter_threshold
15 |         self.candidate_size = candidate_size
16 |         self.nms_method = nms_method
17 | 
18 |         self.sigma = sigma
19 |         if device:
20 |             self.device = device
21 |         else:
22 |             self.device = paddle.set_device("cuda" if paddle.is_compiled_with_cuda() else "cpu")
23 | 
24 |         self.net.to(self.device)
25 |         self.net.eval()
26 | 
27 |         self.timer = Timer()
28 | 
29 |     def predict(self, image, top_k=-1, prob_threshold=None):
30 |         cpu_device = paddle.set_device("cpu")
31 |         height, width, _ = image.shape
32 |         image = self.transform(image)
33 |         images = image.unsqueeze(0)
34 |         images = images.to(self.device)
35 |         with paddle.no_grad():
36 |             for i in range(1):
37 |                 self.timer.start()
38 |                 scores, boxes = self.net.forward(images)
39 |                 print("Inference time: ", self.timer.end())
40 |         boxes = boxes[0]
41 |         scores = scores[0]
42 |         if not prob_threshold:
43 |             prob_threshold = self.filter_threshold
44 |         # this version of nms is slower on GPU, so we move data to CPU.
45 |         boxes = boxes.to(cpu_device)
46 |         scores = scores.to(cpu_device)
47 |         picked_box_probs = []
48 |         picked_labels = []
49 |         for class_index in range(1, scores.size(1)):
50 |             probs = scores[:, class_index]
51 |             mask = probs > prob_threshold
52 |             probs = probs[mask]
53 |             if probs.size(0) == 0:
54 |                 continue
55 |             subset_boxes = boxes[mask, :]
56 |             box_probs = paddle.concat([subset_boxes, probs.reshape(-1, 1)], 1)
57 |             box_probs = box_utils.nms(box_probs, self.nms_method,
58 |                                       score_threshold=prob_threshold,
59 |                                       iou_threshold=self.iou_threshold,
60 |                                       sigma=self.sigma,
61 |                                       top_k=top_k,
62 |                                       candidate_size=self.candidate_size)
63 |             picked_box_probs.append(box_probs)
64 |             picked_labels.extend([class_index] * box_probs.size(0))
65 |         if not picked_box_probs:
66 |             return paddle.to_tensor([]), paddle.to_tensor([]), paddle.to_tensor([])
67 |         picked_box_probs = paddle.concat(picked_box_probs)
68 |         picked_box_probs[:, 0] *= width
69 |         picked_box_probs[:, 1] *= height
70 |         picked_box_probs[:, 2] *= width
71 |         picked_box_probs[:, 3] *= height
72 |         return picked_box_probs[:, :4], paddle.to_tensor(picked_labels), picked_box_probs[:, 4]
73 | 


--------------------------------------------------------------------------------
/paddle/vision/transforms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/paddle/vision/transforms/__init__.py


--------------------------------------------------------------------------------
/paddle/vision/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .misc import *
2 | 


--------------------------------------------------------------------------------
/paddle/vision/utils/box_utils_numpy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def convert_locations_to_boxes(locations, priors, center_variance, size_variance):
  5 |     """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
  6 | 
  7 |     The conversion:
  8 |         $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
  9 |         $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
 10 |     We do it in the inverse direction here.
 11 |     Args:
 12 |         locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
 13 |         priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
 14 |         center_variance: a float used to change the scale of center.
 15 |         size_variance: a float used to change of scale of size.
 16 |     Returns:
 17 |         boxes:  priors: [[center_x, center_y, h, w]]. All the values
 18 |             are relative to the image size.
 19 |     """
 20 |     # priors can have one dimension less.
 21 |     if len(priors.shape) + 1 == len(locations.shape):
 22 |         priors = np.expand_dims(priors, 0)
 23 |     return np.concatenate([
 24 |         locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
 25 |         np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
 26 |     ], axis=len(locations.shape) - 1)
 27 | 
 28 | 
 29 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
 30 |     # priors can have one dimension less
 31 |     if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape):
 32 |         center_form_priors = np.expand_dims(center_form_priors, 0)
 33 |     return np.concatenate([
 34 |         (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
 35 |         np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
 36 |     ], axis=len(center_form_boxes.shape) - 1)
 37 | 
 38 | 
 39 | def area_of(left_top, right_bottom):
 40 |     """Compute the areas of rectangles given two corners.
 41 | 
 42 |     Args:
 43 |         left_top (N, 2): left top corner.
 44 |         right_bottom (N, 2): right bottom corner.
 45 | 
 46 |     Returns:
 47 |         area (N): return the area.
 48 |     """
 49 |     hw = np.clip(right_bottom - left_top, 0.0, None)
 50 |     return hw[..., 0] * hw[..., 1]
 51 | 
 52 | 
 53 | def iou_of(boxes0, boxes1, eps=1e-5):
 54 |     """Return intersection-over-union (Jaccard index) of boxes.
 55 | 
 56 |     Args:
 57 |         boxes0 (N, 4): ground truth boxes.
 58 |         boxes1 (N or 1, 4): predicted boxes.
 59 |         eps: a small number to avoid 0 as denominator.
 60 |     Returns:
 61 |         iou (N): IoU values.
 62 |     """
 63 |     overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
 64 |     overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
 65 | 
 66 |     overlap_area = area_of(overlap_left_top, overlap_right_bottom)
 67 |     area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
 68 |     area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
 69 |     return overlap_area / (area0 + area1 - overlap_area + eps)
 70 | 
 71 | 
 72 | def center_form_to_corner_form(locations):
 73 |     return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
 74 |                            locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
 75 | 
 76 | 
 77 | def corner_form_to_center_form(boxes):
 78 |     return np.concatenate([
 79 |         (boxes[..., :2] + boxes[..., 2:]) / 2,
 80 |         boxes[..., 2:] - boxes[..., :2]
 81 |     ], len(boxes.shape) - 1)
 82 | 
 83 | 
 84 | def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
 85 |     """
 86 | 
 87 |     Args:
 88 |         box_scores (N, 5): boxes in corner-form and probabilities.
 89 |         iou_threshold: intersection over union threshold.
 90 |         top_k: keep top_k results. If k <= 0, keep all the results.
 91 |         candidate_size: only consider the candidates with the highest scores.
 92 |     Returns:
 93 |          picked: a list of indexes of the kept boxes
 94 |     """
 95 |     scores = box_scores[:, -1]
 96 |     boxes = box_scores[:, :-1]
 97 |     picked = []
 98 |     # _, indexes = scores.sort(descending=True)
 99 |     indexes = np.argsort(scores)
100 |     # indexes = indexes[:candidate_size]
101 |     indexes = indexes[-candidate_size:]
102 |     while len(indexes) > 0:
103 |         # current = indexes[0]
104 |         current = indexes[-1]
105 |         picked.append(current)
106 |         if 0 < top_k == len(picked) or len(indexes) == 1:
107 |             break
108 |         current_box = boxes[current, :]
109 |         # indexes = indexes[1:]
110 |         indexes = indexes[:-1]
111 |         rest_boxes = boxes[indexes, :]
112 |         iou = iou_of(
113 |             rest_boxes,
114 |             np.expand_dims(current_box, axis=0),
115 |         )
116 |         indexes = indexes[iou <= iou_threshold]
117 | 
118 |     return box_scores[picked, :]
119 | 


--------------------------------------------------------------------------------
/paddle/vision/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import paddle
 4 | 
 5 | 
 6 | def str2bool(s):
 7 |     return s.lower() in ('true', '1')
 8 | 
 9 | 
10 | class Timer:
11 |     def __init__(self):
12 |         self.clock = {}
13 | 
14 |     def start(self, key="default"):
15 |         self.clock[key] = datetime.datetime.now()
16 | 
17 |     def end(self, key="default"):
18 |         if key not in self.clock:
19 |             raise Exception(f"{key} is not in the clock.")
20 |         interval = datetime.datetime.now() - self.clock[key]
21 |         del self.clock[key]
22 |         return interval.total_seconds()
23 |         
24 | 
25 | def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path):
26 |     paddle.save({
27 |         'epoch': epoch,
28 |         'model': net_state_dict,
29 |         'optimizer': optimizer_state_dict,
30 |         'best_score': best_score
31 |     }, checkpoint_path)
32 |     paddle.save(net_state_dict, model_path)
33 |         
34 |         
35 | def load_checkpoint(checkpoint_path):
36 |     return paddle.load(checkpoint_path)
37 | 
38 | 
39 | def freeze_net_layers(net):
40 |     for param in net.parameters():
41 |         param.stop_gradient = True
42 | 
43 | 
44 | def store_labels(path, labels):
45 |     with open(path, "w") as f:
46 |         f.write("\n".join(labels))
47 | 


--------------------------------------------------------------------------------
/readme_imgs/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/1.jpg


--------------------------------------------------------------------------------
/readme_imgs/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/2.jpg


--------------------------------------------------------------------------------
/readme_imgs/26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/26.jpg


--------------------------------------------------------------------------------
/readme_imgs/27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/27.jpg


--------------------------------------------------------------------------------
/readme_imgs/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/readme_imgs/4.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | torch
 3 | opencv_python
 4 | torchvision
 5 | typing
 6 | torchstat
 7 | torchsummary
 8 | ptflops
 9 | matplotlib
10 | onnx
11 | onnxruntime


--------------------------------------------------------------------------------
/run_video_face_detect.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code uses the pytorch model to detect faces from live video or camera.
 3 | """
 4 | import argparse
 5 | import sys
 6 | import cv2
 7 | 
 8 | from vision.ssd.config.fd_config import define_img_size
 9 | 
10 | parser = argparse.ArgumentParser(
11 |     description='detect_video')
12 | 
13 | parser.add_argument('--net_type', default="RFB", type=str,
14 |                     help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
15 | parser.add_argument('--input_size', default=480, type=int,
16 |                     help='define network input size,default optional value 128/160/320/480/640/1280')
17 | parser.add_argument('--threshold', default=0.7, type=float,
18 |                     help='score threshold')
19 | parser.add_argument('--candidate_size', default=1000, type=int,
20 |                     help='nms candidate size')
21 | parser.add_argument('--path', default="imgs", type=str,
22 |                     help='imgs dir')
23 | parser.add_argument('--test_device', default="cuda:0", type=str,
24 |                     help='cuda:0 or cpu')
25 | parser.add_argument('--video_path', default="/home/linzai/Videos/video/16_1.MP4", type=str,
26 |                     help='path of video')
27 | args = parser.parse_args()
28 | 
29 | input_img_size = args.input_size
30 | define_img_size(input_img_size)  # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
31 | 
32 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
33 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
34 | from vision.utils.misc import Timer
35 | 
36 | label_path = "./models/voc-model-labels.txt"
37 | 
38 | net_type = args.net_type
39 | 
40 | cap = cv2.VideoCapture(args.video_path)  # capture from video
41 | # cap = cv2.VideoCapture(0)  # capture from camera
42 | 
43 | class_names = [name.strip() for name in open(label_path).readlines()]
44 | num_classes = len(class_names)
45 | test_device = args.test_device
46 | 
47 | candidate_size = args.candidate_size
48 | threshold = args.threshold
49 | 
50 | if net_type == 'slim':
51 |     model_path = "models/pretrained/version-slim-320.pth"
52 |     # model_path = "models/pretrained/version-slim-640.pth"
53 |     net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
54 |     predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device)
55 | elif net_type == 'RFB':
56 |     model_path = "models/pretrained/version-RFB-320.pth"
57 |     # model_path = "models/pretrained/version-RFB-640.pth"
58 |     net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
59 |     predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device)
60 | else:
61 |     print("The net type is wrong!")
62 |     sys.exit(1)
63 | net.load(model_path)
64 | 
65 | timer = Timer()
66 | sum = 0
67 | while True:
68 |     ret, orig_image = cap.read()
69 |     if orig_image is None:
70 |         print("end")
71 |         break
72 |     image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
73 |     timer.start()
74 |     boxes, labels, probs = predictor.predict(image, candidate_size / 2, threshold)
75 |     interval = timer.end()
76 |     print('Time: {:.6f}s, Detect Objects: {:d}.'.format(interval, labels.size(0)))
77 |     for i in range(boxes.size(0)):
78 |         box = boxes[i, :]
79 |         label = f" {probs[i]:.2f}"
80 |         cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 4)
81 | 
82 |         # cv2.putText(orig_image, label,
83 |         #             (box[0], box[1] - 10),
84 |         #             cv2.FONT_HERSHEY_SIMPLEX,
85 |         #             0.5,  # font scale
86 |         #             (0, 0, 255),
87 |         #             2)  # line type
88 |     orig_image = cv2.resize(orig_image, None, None, fx=0.8, fy=0.8)
89 |     sum += boxes.size(0)
90 |     cv2.imshow('annotated', orig_image)
91 |     if cv2.waitKey(1) & 0xFF == ord('q'):
92 |         break
93 | cap.release()
94 | cv2.destroyAllWindows()
95 | print("all face num:{}".format(sum))
96 | 


--------------------------------------------------------------------------------
/run_video_face_detect_onnx.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code uses the onnx model to detect faces from live video or cameras.
  3 | """
  4 | import time
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import onnx
  9 | import vision.utils.box_utils_numpy as box_utils
 10 | from caffe2.python.onnx import backend
 11 | 
 12 | # onnx runtime
 13 | import onnxruntime as ort
 14 | 
 15 | 
 16 | def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
 17 |     boxes = boxes[0]
 18 |     confidences = confidences[0]
 19 |     picked_box_probs = []
 20 |     picked_labels = []
 21 |     for class_index in range(1, confidences.shape[1]):
 22 |         probs = confidences[:, class_index]
 23 |         mask = probs > prob_threshold
 24 |         probs = probs[mask]
 25 |         if probs.shape[0] == 0:
 26 |             continue
 27 |         subset_boxes = boxes[mask, :]
 28 |         box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
 29 |         box_probs = box_utils.hard_nms(box_probs,
 30 |                                        iou_threshold=iou_threshold,
 31 |                                        top_k=top_k,
 32 |                                        )
 33 |         picked_box_probs.append(box_probs)
 34 |         picked_labels.extend([class_index] * box_probs.shape[0])
 35 |     if not picked_box_probs:
 36 |         return np.array([]), np.array([]), np.array([])
 37 |     picked_box_probs = np.concatenate(picked_box_probs)
 38 |     picked_box_probs[:, 0] *= width
 39 |     picked_box_probs[:, 1] *= height
 40 |     picked_box_probs[:, 2] *= width
 41 |     picked_box_probs[:, 3] *= height
 42 |     return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
 43 | 
 44 | 
 45 | label_path = "models/voc-model-labels.txt"
 46 | 
 47 | onnx_path = "models/onnx/version-RFB-320.onnx"
 48 | class_names = [name.strip() for name in open(label_path).readlines()]
 49 | 
 50 | predictor = onnx.load(onnx_path)
 51 | onnx.checker.check_model(predictor)
 52 | onnx.helper.printable_graph(predictor.graph)
 53 | predictor = backend.prepare(predictor, device="CPU")  # default CPU
 54 | 
 55 | ort_session = ort.InferenceSession(onnx_path)
 56 | input_name = ort_session.get_inputs()[0].name
 57 | 
 58 | cap = cv2.VideoCapture("/home/linzai/Videos/video/16_6.MP4")  # capture from camera
 59 | 
 60 | threshold = 0.7
 61 | 
 62 | sum = 0
 63 | while True:
 64 |     ret, orig_image = cap.read()
 65 |     if orig_image is None:
 66 |         print("no img")
 67 |         break
 68 |     image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
 69 |     image = cv2.resize(image, (320, 240))
 70 |     # image = cv2.resize(image, (640, 480))
 71 |     image_mean = np.array([127, 127, 127])
 72 |     image = (image - image_mean) / 128
 73 |     image = np.transpose(image, [2, 0, 1])
 74 |     image = np.expand_dims(image, axis=0)
 75 |     image = image.astype(np.float32)
 76 |     # confidences, boxes = predictor.run(image)
 77 |     time_time = time.time()
 78 |     confidences, boxes = ort_session.run(None, {input_name: image})
 79 |     print("cost time:{}".format(time.time() - time_time))
 80 |     boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, threshold)
 81 |     for i in range(boxes.shape[0]):
 82 |         box = boxes[i, :]
 83 |         label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
 84 | 
 85 |         cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)
 86 | 
 87 |         # cv2.putText(orig_image, label,
 88 |         #             (box[0] + 20, box[1] + 40),
 89 |         #             cv2.FONT_HERSHEY_SIMPLEX,
 90 |         #             1,  # font scale
 91 |         #             (255, 0, 255),
 92 |         #             2)  # line type
 93 |     sum += boxes.shape[0]
 94 |     orig_image = cv2.resize(orig_image, (0, 0), fx=0.7, fy=0.7)
 95 |     cv2.imshow('annotated', orig_image)
 96 |     if cv2.waitKey(1) & 0xFF == ord('q'):
 97 |         break
 98 | cap.release()
 99 | cv2.destroyAllWindows()
100 | print("sum:{}".format(sum))
101 | 


--------------------------------------------------------------------------------
/tf/README.md:
--------------------------------------------------------------------------------
 1 | # Tensorflow implementation of Ultra-Light-Fast-Generic-Face-Detector-1MB with converter
 2 | 
 3 | You can use this script to converter origin model to tensorflow version.
 4 | 
 5 | ## Run
 6 | Covert model
 7 | ```Python
 8 |  python3 ./convert_tensorflow.py --net_type <RFB|slim>
 9 | ```
10 | 
11 | Inference on image
12 | ```Python
13 |  python3 ./det_image.py --net_type <RFB|slim> --img_path <path>
14 | ```
15 | 
16 | ## Result
17 | ![img1](https://github.com/jason9075/Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter/blob/master/imgs/test_output_RFB.jpg)
18 | 
19 | ## Reference
20 | - [Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter](https://github.com/jason9075/Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter)
21 | 


--------------------------------------------------------------------------------
/tf/backend/op.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def basic_conv(x, out_ch, kernel_size, stride=(1, 1), padding=0, dilation=1, relu=True,
 5 |                bn=True, prefix='basic_conv'):
 6 |     if 0 < padding:
 7 |         out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}_padding')(x)
 8 |     else:
 9 |         out = x
10 |     out = tf.keras.layers.Conv2D(out_ch,
11 |                                  kernel_size,
12 |                                  strides=stride,
13 |                                  dilation_rate=dilation,
14 |                                  use_bias=(not bn),
15 |                                  name=f'{prefix}_conv')(out)
16 |     if bn:
17 |         out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}_bn')(out)
18 |     if relu:
19 |         out = tf.keras.layers.ReLU(name=f'{prefix}_relu')(out)
20 | 
21 |     return out
22 | 
23 | 
24 | def basic_rfb(x, in_ch, out_ch, stride=1, scale=0.1, map_reduce=8, vision=1, prefix='basic_rfb'):
25 |     inter_ch = in_ch // map_reduce
26 | 
27 |     branch0 = basic_conv(x, inter_ch, kernel_size=1, stride=1, relu=False,
28 |                          prefix=f'{prefix}.branch0.0')
29 |     branch0 = basic_conv(branch0, 2 * inter_ch, kernel_size=3, stride=stride, padding=1,
30 |                          prefix=f'{prefix}.branch0.1')
31 |     branch0 = basic_conv(branch0, 2 * inter_ch, kernel_size=3, stride=1, dilation=vision + 1,
32 |                          padding=vision + 1, relu=False, prefix=f'{prefix}.branch0.2')
33 | 
34 |     branch1 = basic_conv(x, inter_ch, kernel_size=1, stride=1, relu=False,
35 |                          prefix=f'{prefix}.branch1.0')
36 |     branch1 = basic_conv(branch1, 2 * inter_ch, kernel_size=3, stride=stride, padding=1,
37 |                          prefix=f'{prefix}.branch1.1')
38 |     branch1 = basic_conv(branch1, 2 * inter_ch, kernel_size=3, stride=1, dilation=vision + 2,
39 |                          padding=vision + 2, relu=False, prefix=f'{prefix}.branch1.2')
40 | 
41 |     branch2 = basic_conv(x, inter_ch, kernel_size=1, stride=1, relu=False,
42 |                          prefix=f'{prefix}.branch2.0')
43 |     branch2 = basic_conv(branch2, (inter_ch // 2) * 3, kernel_size=3, stride=1, padding=1,
44 |                          prefix=f'{prefix}.branch2.1')
45 |     branch2 = basic_conv(branch2, 2 * inter_ch, kernel_size=3, stride=stride, padding=1,
46 |                          prefix=f'{prefix}.branch2.2')
47 |     branch2 = basic_conv(branch2, 2 * inter_ch, kernel_size=3, stride=1, dilation=vision + 4,
48 |                          padding=vision + 4, relu=False, prefix=f'{prefix}.branch2.3')
49 | 
50 |     out = tf.keras.layers.Concatenate(axis=-1, name=f'{prefix}_cat')([branch0, branch1, branch2])
51 |     out = basic_conv(out, out_ch, kernel_size=1, stride=1, relu=False, prefix=f'{prefix}.convlinear')
52 |     shortcut = basic_conv(x, out_ch, kernel_size=1, stride=stride, relu=False, prefix=f'{prefix}.shortcut')
53 |     out = tf.multiply(out, scale, name=f'{prefix}_mul')
54 |     out = tf.keras.layers.Add(name=f'{prefix}_add')([out, shortcut])
55 |     out = tf.keras.layers.ReLU(name=f'{prefix}_relu')(out)
56 | 
57 |     return out
58 | 
59 | 
60 | def separable_conv(x, out_ch, kernel_size, stride, padding, prefix='separable_conv'):
61 |     out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}_dconv_padding')(x)
62 | 
63 |     out = tf.keras.layers.DepthwiseConv2D(kernel_size,
64 |                                           strides=stride,
65 |                                           name=f'{prefix}_dconvbias')(out)
66 |     out = tf.keras.layers.ReLU(name=f'{prefix}_relu')(out)
67 |     out = tf.keras.layers.Conv2D(out_ch, 1,
68 |                                  name=f'{prefix}_convbias')(out)
69 | 
70 |     return out
71 | 
72 | 
73 | def conv_bn(x, out_ch, stride, padding=1, prefix='conv_bn'):
74 |     out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}.0_padding')(x)
75 |     out = tf.keras.layers.Conv2D(out_ch,
76 |                                  (3, 3),
77 |                                  strides=stride,
78 |                                  use_bias=False,
79 |                                  name=f'{prefix}.0_conv')(out)
80 |     out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}.1_bn')(out)
81 |     out = tf.keras.layers.ReLU(name=f'{prefix}.2_relu')(out)
82 | 
83 |     return out
84 | 
85 | 
86 | def conv_dw(x, out_ch, stride, padding=1, prefix='conv_dw'):
87 |     out = tf.keras.layers.ZeroPadding2D(padding=padding, name=f'{prefix}.0_padding')(x)
88 |     out = tf.keras.layers.DepthwiseConv2D(3, strides=stride,
89 |                                           use_bias=False,
90 |                                           name=f'{prefix}.0_dconv')(out)
91 |     out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}.1_bn')(out)
92 |     out = tf.keras.layers.ReLU(name=f'{prefix}.2_relu')(out)
93 | 
94 |     out = tf.keras.layers.Conv2D(out_ch, 1, use_bias=False, name=f'{prefix}.3_conv')(out)
95 |     out = tf.keras.layers.BatchNormalization(epsilon=1e-5, name=f'{prefix}.4_bn')(out)
96 |     out = tf.keras.layers.ReLU(name=f'{prefix}.5_relu')(out)
97 | 
98 |     return out
99 | 


--------------------------------------------------------------------------------
/tf/backend/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import torch
  6 | 
  7 | 
  8 | def post_processing(reg_list, cls_list, num_classes, image_size, feature_map_wh_list, min_boxes,
  9 |                     center_variance, size_variance,
 10 |                     conf_threshold=0.6, nms_max_output_size=100, nms_iou_threshold=0.3, top_k=100):
 11 |     reg_list = [tf.keras.layers.Reshape([-1, 4])(reg) for reg in reg_list]
 12 |     cls_list = [tf.keras.layers.Reshape([-1, num_classes])(cls) for cls in cls_list]
 13 | 
 14 |     reg = tf.keras.layers.Concatenate(axis=1)(reg_list)
 15 |     cls = tf.keras.layers.Concatenate(axis=1)(cls_list)
 16 | 
 17 |     # post process
 18 |     cls = tf.keras.layers.Softmax(axis=-1)(cls)
 19 |     loc = decode_regression(reg, image_size, feature_map_wh_list, min_boxes,
 20 |                             center_variance, size_variance)
 21 | 
 22 |     result = tf.keras.layers.Concatenate(axis=-1)([cls, loc])
 23 | 
 24 |     # confidence thresholding
 25 |     mask = conf_threshold < cls[..., 1]
 26 |     result = tf.boolean_mask(tensor=result, mask=mask)
 27 | 
 28 |     # non-maximum suppression
 29 |     mask = tf.image.non_max_suppression(boxes=result[..., -4:],
 30 |                                         scores=result[..., 1],
 31 |                                         max_output_size=nms_max_output_size,
 32 |                                         iou_threshold=nms_iou_threshold,
 33 |                                         name='non_maximum_suppresion')
 34 |     result = tf.gather(params=result, indices=mask, axis=0)
 35 | 
 36 |     # top-k filtering
 37 |     top_k_value = tf.math.minimum(tf.constant(top_k), tf.shape(result)[0])
 38 |     mask = tf.nn.top_k(result[..., 1], k=top_k_value, sorted=True).indices
 39 |     result = tf.gather(params=result, indices=mask, axis=0)
 40 | 
 41 |     return result
 42 | 
 43 | 
 44 | def decode_regression(reg, image_size, feature_map_w_h_list, min_boxes,
 45 |                       center_variance, size_variance):
 46 |     priors = []
 47 |     for feature_map_w_h, min_box in zip(feature_map_w_h_list, min_boxes):
 48 |         xy_grid = np.meshgrid(range(feature_map_w_h[0]), range(feature_map_w_h[1]))
 49 |         xy_grid = np.add(xy_grid, 0.5)
 50 |         xy_grid[0, :, :] /= feature_map_w_h[0]
 51 |         xy_grid[1, :, :] /= feature_map_w_h[1]
 52 |         xy_grid = np.stack(xy_grid, axis=-1)
 53 |         xy_grid = np.tile(xy_grid, [1, 1, len(min_box)])
 54 |         xy_grid = np.reshape(xy_grid, (-1, 2))
 55 | 
 56 |         wh_grid = np.array(min_box) / np.array(image_size)[:, np.newaxis]
 57 |         wh_grid = np.tile(np.transpose(wh_grid), [np.product(feature_map_w_h), 1])
 58 | 
 59 |         prior = np.concatenate((xy_grid, wh_grid), axis=-1)
 60 |         priors.append(prior)
 61 | 
 62 |     priors = np.concatenate(priors, axis=0)
 63 |     print(f'priors nums:{priors.shape[0]}')
 64 | 
 65 |     priors = tf.constant(priors, dtype=tf.float32, shape=priors.shape, name='priors')
 66 | 
 67 |     center_xy = reg[..., :2] * center_variance * priors[..., 2:] + priors[..., :2]
 68 |     center_wh = tf.exp(reg[..., 2:] * size_variance) * priors[..., 2:]
 69 | 
 70 |     # center to corner
 71 |     start_xy = center_xy - center_wh / 2
 72 |     end_xy = center_xy + center_wh / 2
 73 | 
 74 |     loc = tf.concat([start_xy, end_xy], axis=-1)
 75 |     loc = tf.clip_by_value(loc, clip_value_min=0.0, clip_value_max=1.0)
 76 | 
 77 |     return loc
 78 | 
 79 | 
 80 | def load_weight(model, torch_path, mapping_table_path):
 81 |     torch_weights = torch.load(torch_path, map_location=torch.device('cpu'))
 82 | 
 83 |     with open(mapping_table_path, 'r') as f:
 84 |         mapping_table = json.load(f)
 85 |         mapping_table = {layer['name']: layer['weight'] for layer in mapping_table}
 86 | 
 87 |     for layer in model.layers:
 88 |         if layer.name in mapping_table:
 89 |             print(f'Set layer: {layer.name}')
 90 |             layer_type = layer.name.split('_')[-1]
 91 | 
 92 |             torch_layer_names = mapping_table[layer.name]
 93 |             if layer_type == 'conv':
 94 |                 weight = np.array(torch_weights[torch_layer_names[0]])
 95 |                 weight = np.transpose(weight, [2, 3, 1, 0])
 96 |                 layer.set_weights([weight])
 97 |             elif layer_type == 'dconv':
 98 |                 weight = np.array(torch_weights[torch_layer_names[0]])
 99 |                 weight = np.transpose(weight, [2, 3, 0, 1])
100 |                 layer.set_weights([weight])
101 |             elif layer_type == 'bn':
102 |                 gamma = np.array(torch_weights[torch_layer_names[0]])
103 |                 beta = np.array(torch_weights[torch_layer_names[1]])
104 |                 running_mean = np.array(torch_weights[torch_layer_names[2]])
105 |                 running_var = np.array(torch_weights[torch_layer_names[3]])
106 |                 layer.set_weights([gamma, beta, running_mean, running_var])
107 |             elif layer_type == 'convbias':
108 |                 weight = np.array(torch_weights[torch_layer_names[0]])
109 |                 bias = np.array(torch_weights[torch_layer_names[1]])
110 |                 weight = np.transpose(weight, [2, 3, 1, 0])
111 |                 layer.set_weights([weight, bias])
112 |             elif layer_type == 'dconvbias':
113 |                 weight = np.array(torch_weights[torch_layer_names[0]])
114 |                 bias = np.array(torch_weights[torch_layer_names[1]])
115 |                 weight = np.transpose(weight, [2, 3, 0, 1])
116 |                 layer.set_weights([weight, bias])
117 |             else:
118 |                 raise RuntimeError(f'Unknown Layer type \'{layer_type}\'.')
119 |         else:
120 |             print(f'Ignore layer: {layer.name}')
121 | 


--------------------------------------------------------------------------------
/tf/convert_tensorflow.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | from tf.backend.utils import load_weight
 5 | from tf.model.rfb_320 import create_rfb_net
 6 | from tf.model.slim_320 import create_slim_net
 7 | 
 8 | parser = argparse.ArgumentParser(
 9 |     description='convert model')
10 | 
11 | parser.add_argument('--net_type', default="RFB", type=str,
12 |                     help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
13 | args = parser.parse_args()
14 | 
15 | 
16 | def main():
17 |     input_shape = (240, 320)  # H,W
18 |     base_channel = 8 * 2
19 |     num_classes = 2
20 | 
21 |     if args.net_type == 'slim':
22 |         torch_path = "../models/pretrained/version-slim-320.pth"
23 |         mapping_table = "mapping_tables/slim_320.json"
24 |         model = create_slim_net(input_shape, base_channel, num_classes)
25 |     elif args.net_type == 'RFB':
26 |         torch_path = "../models/pretrained/version-RFB-320.pth"
27 |         mapping_table = "mapping_tables/rfb_320.json"
28 |         model = create_rfb_net(input_shape, base_channel, num_classes)
29 |     else:
30 |         print("The net type is wrong!")
31 |         sys.exit(1)
32 | 
33 |     load_weight(model, torch_path, mapping_table)
34 |     model.save(f'export_models/{args.net_type}/', include_optimizer=False)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/tf/det_image.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | import cv2
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | 
 8 | parser = argparse.ArgumentParser(
 9 |     description='convert model')
10 | 
11 | parser.add_argument('--net_type', default="RFB", type=str,
12 |                     help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
13 | parser.add_argument('--img_path', default='imgs/test_input.jpg', type=str,
14 |                     help='Image path for inference')
15 | args = parser.parse_args()
16 | 
17 | 
18 | def main():
19 |     if args.net_type == 'slim':
20 |         model_path = "export_models/slim/"
21 |     elif args.net_type == 'RFB':
22 |         model_path = "export_models/RFB/"
23 |     else:
24 |         print("The net type is wrong!")
25 |         sys.exit(1)
26 | 
27 |     model = tf.keras.models.load_model(model_path)
28 | 
29 |     img = cv2.imread(args.img_path)
30 |     h, w, _ = img.shape
31 |     img_resize = cv2.resize(img, (320, 240))
32 |     img_resize = cv2.cvtColor(img_resize, cv2.COLOR_BGR2RGB)
33 |     img_resize = img_resize - 127.0
34 |     img_resize = img_resize / 128.0
35 | 
36 |     results = model.predict(np.expand_dims(img_resize, axis=0))  # result=[background,face,x1,y1,x2,y2]
37 | 
38 |     for result in results:
39 |         start_x = int(result[2] * w)
40 |         start_y = int(result[3] * h)
41 |         end_x = int(result[4] * w)
42 |         end_y = int(result[5] * h)
43 | 
44 |         cv2.rectangle(img, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
45 | 
46 |     cv2.imwrite(f'imgs/test_output_{args.net_type}.jpg', img)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/tf/export_models/RFB/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/RFB/saved_model.pb


--------------------------------------------------------------------------------
/tf/export_models/RFB/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/RFB/variables/variables.data-00000-of-00001


--------------------------------------------------------------------------------
/tf/export_models/RFB/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/RFB/variables/variables.index


--------------------------------------------------------------------------------
/tf/export_models/slim/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/slim/saved_model.pb


--------------------------------------------------------------------------------
/tf/export_models/slim/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/slim/variables/variables.data-00000-of-00001


--------------------------------------------------------------------------------
/tf/export_models/slim/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/export_models/slim/variables/variables.index


--------------------------------------------------------------------------------
/tf/imgs/test_input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_input.jpg


--------------------------------------------------------------------------------
/tf/imgs/test_output_RFB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_RFB.jpg


--------------------------------------------------------------------------------
/tf/imgs/test_output_origin_RFB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_origin_RFB.jpg


--------------------------------------------------------------------------------
/tf/imgs/test_output_origin_slim.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_origin_slim.jpg


--------------------------------------------------------------------------------
/tf/imgs/test_output_slim.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tf/imgs/test_output_slim.jpg


--------------------------------------------------------------------------------
/tf/model/rfb_320.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from tf.backend.op import conv_bn, conv_dw, basic_rfb, separable_conv
 4 | from tf.backend.utils import post_processing
 5 | 
 6 | conf_threshold = 0.6
 7 | nms_iou_threshold = 0.3
 8 | nms_max_output_size = 200
 9 | top_k = 100
10 | center_variance = 0.1
11 | size_variance = 0.2
12 | 
13 | image_size = [320, 240]  # default input size 320*240
14 | feature_map_wh_list = [[40, 30], [20, 15], [10, 8], [5, 4]]  # default feature map size
15 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
16 | 
17 | 
18 | def create_rfb_net(input_shape, base_channel, num_classes):
19 |     input_node = tf.keras.layers.Input(shape=(input_shape[0], input_shape[1], 3))
20 | 
21 |     net = conv_bn(input_node, base_channel, stride=2, prefix='basenet.0')  # 120x160
22 |     net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.1')
23 |     net = conv_dw(net, base_channel * 2, stride=2, prefix='basenet.2')  # 60x80
24 |     net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.3')
25 |     net = conv_dw(net, base_channel * 4, stride=2, prefix='basenet.4')  # 30x40
26 |     net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.5')
27 |     net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.6')
28 |     header_0 = basic_rfb(net, base_channel * 4, base_channel * 4, stride=1, scale=1.0, prefix='basenet.7')
29 |     net = conv_dw(header_0, base_channel * 8, stride=2, prefix='basenet.8')  # 15x20
30 |     net = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.9')
31 |     header_1 = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.10')
32 |     net = conv_dw(header_1, base_channel * 16, stride=2, prefix='basenet.11')  # 8x10
33 |     header_2 = conv_dw(net, base_channel * 16, stride=1, prefix='basenet.12')
34 | 
35 |     out = tf.keras.layers.Conv2D(base_channel * 4, 1, padding='SAME', name='extras_convbias')(header_2)
36 |     out = tf.keras.layers.ReLU(name='extras_relu1')(out)
37 |     out = separable_conv(out, base_channel * 16, kernel_size=3, stride=2, padding=1,
38 |                          prefix='extras_sep')
39 |     header_3 = tf.keras.layers.ReLU(name='extras_relu2')(out)
40 | 
41 |     reg_0 = separable_conv(header_0, 3 * 4, kernel_size=3, stride=1, padding=1,
42 |                            prefix='reg_0_sep')
43 |     cls_0 = separable_conv(header_0, 3 * num_classes, kernel_size=3, stride=1, padding=1,
44 |                            prefix='cls_0_sep')
45 | 
46 |     reg_1 = separable_conv(header_1, 2 * 4, kernel_size=3, stride=1, padding=1,
47 |                            prefix='reg_1_sep')
48 |     cls_1 = separable_conv(header_1, 2 * num_classes, kernel_size=3, stride=1, padding=1,
49 |                            prefix='cls_1_sep')
50 | 
51 |     reg_2 = separable_conv(header_2, 2 * 4, kernel_size=3, stride=1, padding=1,
52 |                            prefix='reg_2_sep')
53 |     cls_2 = separable_conv(header_2, 2 * num_classes, kernel_size=3, stride=1, padding=1,
54 |                            prefix='cls_2_sep')
55 | 
56 |     reg_3 = tf.keras.layers.Conv2D(3 * 4, kernel_size=3, padding='SAME',
57 |                                    name='reg_3_convbias')(header_3)
58 |     cls_3 = tf.keras.layers.Conv2D(3 * num_classes, kernel_size=3, padding='SAME',
59 |                                    name='cls_3_convbias')(header_3)
60 | 
61 |     result = post_processing([reg_0, reg_1, reg_2, reg_3],
62 |                              [cls_0, cls_1, cls_2, cls_3],
63 |                              num_classes, image_size, feature_map_wh_list, min_boxes,
64 |                              center_variance, size_variance)
65 | 
66 |     model = tf.keras.Model(inputs=[input_node], outputs=[result])
67 |     model.summary()
68 | 
69 |     return model
70 | 


--------------------------------------------------------------------------------
/tf/model/slim_320.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from tf.backend.op import conv_bn, conv_dw, separable_conv
 4 | from tf.backend.utils import post_processing
 5 | 
 6 | conf_threshold = 0.6
 7 | nms_iou_threshold = 0.3
 8 | nms_max_output_size = 200
 9 | top_k = 100
10 | center_variance = 0.1
11 | size_variance = 0.2
12 | 
13 | image_size = [320, 240]  # default input size 320*240
14 | feature_map_wh_list = [[40, 30], [20, 15], [10, 8], [5, 4]]  # default feature map size
15 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
16 | 
17 | 
18 | def create_slim_net(input_shape, base_channel, num_classes):
19 |     input_node = tf.keras.layers.Input(shape=(input_shape[0], input_shape[1], 3))
20 | 
21 |     net = conv_bn(input_node, base_channel, stride=2, prefix='basenet.0')  # 120x160
22 |     net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.1')
23 |     net = conv_dw(net, base_channel * 2, stride=2, prefix='basenet.2')  # 60x80
24 |     net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.3')
25 |     net = conv_dw(net, base_channel * 4, stride=2, prefix='basenet.4')  # 30x40
26 |     net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.5')
27 |     net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.6')
28 |     header_0 = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.7')
29 |     net = conv_dw(header_0, base_channel * 8, stride=2, prefix='basenet.8')  # 15x20
30 |     net = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.9')
31 |     header_1 = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.10')
32 |     net = conv_dw(header_1, base_channel * 16, stride=2, prefix='basenet.11')  # 8x10
33 |     header_2 = conv_dw(net, base_channel * 16, stride=1, prefix='basenet.12')
34 | 
35 |     out = tf.keras.layers.Conv2D(base_channel * 4, 1, padding='SAME', name='extras_convbias')(header_2)
36 |     out = tf.keras.layers.ReLU(name='extras_relu1')(out)
37 |     out = separable_conv(out, base_channel * 16, kernel_size=3, stride=2, padding=1,
38 |                          prefix='extras_sep')
39 |     header_3 = tf.keras.layers.ReLU(name='extras_relu2')(out)
40 | 
41 |     reg_0 = separable_conv(header_0, 3 * 4, kernel_size=3, stride=1, padding=1,
42 |                            prefix='reg_0_sep')
43 |     cls_0 = separable_conv(header_0, 3 * num_classes, kernel_size=3, stride=1, padding=1,
44 |                            prefix='cls_0_sep')
45 | 
46 |     reg_1 = separable_conv(header_1, 2 * 4, kernel_size=3, stride=1, padding=1,
47 |                            prefix='reg_1_sep')
48 |     cls_1 = separable_conv(header_1, 2 * num_classes, kernel_size=3, stride=1, padding=1,
49 |                            prefix='cls_1_sep')
50 | 
51 |     reg_2 = separable_conv(header_2, 2 * 4, kernel_size=3, stride=1, padding=1,
52 |                            prefix='reg_2_sep')
53 |     cls_2 = separable_conv(header_2, 2 * num_classes, kernel_size=3, stride=1, padding=1,
54 |                            prefix='cls_2_sep')
55 | 
56 |     reg_3 = tf.keras.layers.Conv2D(3 * 4, kernel_size=3, padding='SAME',
57 |                                    name='reg_3_convbias')(header_3)
58 |     cls_3 = tf.keras.layers.Conv2D(3 * num_classes, kernel_size=3, padding='SAME',
59 |                                    name='cls_3_convbias')(header_3)
60 | 
61 |     result = post_processing([reg_0, reg_1, reg_2, reg_3],
62 |                              [cls_0, cls_1, cls_2, cls_3],
63 |                              num_classes, image_size, feature_map_wh_list, min_boxes,
64 |                              center_variance, size_variance)
65 | 
66 |     model = tf.keras.Model(inputs=[input_node], outputs=[result])
67 |     model.summary()
68 | 
69 |     return model
70 | 


--------------------------------------------------------------------------------
/tflite/README.md:
--------------------------------------------------------------------------------
 1 | # TFLite implementation of Ultra-Light-Fast-Generic-Face-Detector-1MB
 2 | 
 3 | TFLite model is suitable for edge computing devices.
 4 | Please refer to the official [Android Demo](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android) for learning how to deploy the face detection model on your phones.
 5 | 
 6 | ![Dwq3dS.jpg](https://s3.ax1x.com/2020/11/26/Dwq3dS.jpg)
 7 | 
 8 | ## Run
 9 | 
10 | Inference on image
11 | 
12 | ``` bash
13 | python3 inference_test.py --net_type <RFB|slim> --img_path <IMG_PATH>
14 | ```
15 | 
16 | Inference on video
17 | 
18 | ``` bash
19 | python3 inference_test.py --net_type <RFB|slim> --video_path <VIDEO_PATH>
20 | ```
21 | 
22 | ## Import
23 | 
24 | ``` python
25 | from TFLiteFaceDetector import UltraLightFaceDetecion
26 | 
27 | fd = UltraLightFaceDetecion(model_path,
28 |                             input_size=(320, 240), conf_threshold=0.6,
29 |                             center_variance=0.1, size_variance=0.2,
30 |                             nms_max_output_size=200, nms_iou_threshold=0.3)
31 | ```
32 | 
33 | ## Files Tree
34 | 
35 | The pretrained weights are converted form `onnx -> protobuf -> tflite`.
36 | 
37 | ``` bash
38 | .
39 | ├── model  # keras defined model architecture
40 | │   ├── tflite_RFB_320_without_postprocessing.py
41 | │   └── tflite_slim_320_without_postprocessing.py
42 | ├── pretrained  # pretrained model without post-processing
43 | │   ├── version-RFB-320_without_postprocessing.tflite
44 | │   └── version-slim-320_without_postprocessing.tflite
45 | ├── README.md
46 | ├── inference_test.py  # detector test script
47 | └── TFLiteFaceDetector.py  # class file of the tflite detector
48 | ```
49 | 
50 | ## Special Thanks
51 | 
52 | Part of the code for this work is referenced from the following repositories:
53 | 
54 | - [Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter](https://github.com/jason9075/Ultra-Light-Fast-Generic-Face-Detector_Tensorflow-Model-Converter)
55 | 


--------------------------------------------------------------------------------
/tflite/TFLiteFaceDetector.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import cv2
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | 
  7 | class UltraLightFaceDetecion():
  8 |     def __init__(self, filepath, input_size=(320, 240), conf_threshold=0.6,
  9 |                  center_variance=0.1, size_variance=0.2,
 10 |                  nms_max_output_size=200, nms_iou_threshold=0.3) -> None:
 11 | 
 12 |         self._feature_maps = np.array([[40, 30], [20, 15], [10, 8], [5, 4]])
 13 |         self._min_boxes = np.array([[10, 16, 24], [32, 48],
 14 |                                     [64, 96], [128, 192, 256]])
 15 | 
 16 |         self._resize = partial(cv2.resize, dsize=input_size)
 17 |         self._input_size = np.array(input_size)[:, None]
 18 | 
 19 |         self._anchors_xy, self._anchors_wh = self._generate_anchors()
 20 |         self._conf_threshold = conf_threshold
 21 |         self._center_variance = center_variance
 22 |         self._size_variance = size_variance
 23 |         self._nms = partial(tf.image.non_max_suppression,
 24 |                             max_output_size=nms_max_output_size,
 25 |                             iou_threshold=nms_iou_threshold)
 26 | 
 27 |         # tflite model init
 28 |         self._interpreter = tf.lite.Interpreter(model_path=filepath)
 29 |         self._interpreter.allocate_tensors()
 30 | 
 31 |         # model details
 32 |         input_details = self._interpreter.get_input_details()
 33 |         output_details = self._interpreter.get_output_details()
 34 | 
 35 |         # inference helper
 36 |         self._set_input_tensor = partial(self._interpreter.set_tensor,
 37 |                                          input_details[0]["index"])
 38 |         self._get_boxes_tensor = partial(self._interpreter.get_tensor,
 39 |                                          output_details[0]["index"])
 40 |         self._get_scores_tensor = partial(self._interpreter.get_tensor,
 41 |                                           output_details[1]["index"])
 42 | 
 43 |     def _generate_anchors(self):
 44 |         anchors = []
 45 |         for feature_map_w_h, min_box in zip(self._feature_maps, self._min_boxes):
 46 | 
 47 |             wh_grid = min_box / self._input_size
 48 |             wh_grid = np.tile(wh_grid.T, (np.prod(feature_map_w_h), 1))
 49 | 
 50 |             xy_grid = np.meshgrid(range(feature_map_w_h[0]),
 51 |                                   range(feature_map_w_h[1]))
 52 |             xy_grid = np.add(xy_grid, 0.5)
 53 | 
 54 |             xy_grid /= feature_map_w_h[..., None, None]
 55 | 
 56 |             xy_grid = np.stack(xy_grid, axis=-1)
 57 |             xy_grid = np.tile(xy_grid, [1, 1, len(min_box)])
 58 |             xy_grid = xy_grid.reshape(-1, 2)
 59 | 
 60 |             prior = np.concatenate((xy_grid, wh_grid), axis=-1)
 61 |             anchors.append(prior)
 62 | 
 63 |         anchors = np.concatenate(anchors, axis=0)
 64 |         anchors = np.clip(anchors, 0.0, 1.0)
 65 | 
 66 |         return anchors[:, :2], anchors[:, 2:]
 67 | 
 68 |     def _pre_processing(self, img):
 69 |         resized = self._resize(img)
 70 |         image_rgb = resized[..., ::-1]
 71 |         image_norm = image_rgb.astype(np.float32)
 72 |         cv2.normalize(image_norm, image_norm,
 73 |                       alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX)
 74 |         return image_norm[None, ...]
 75 | 
 76 |     def inference(self, img):
 77 |         # BGR image to tensor
 78 |         input_tensor = self._pre_processing(img)
 79 | 
 80 |         # set tensor and invoke
 81 |         self._set_input_tensor(input_tensor)
 82 |         self._interpreter.invoke()
 83 | 
 84 |         # get results
 85 |         boxes = self._get_boxes_tensor()[0]
 86 |         scores = self._get_scores_tensor()[0]
 87 | 
 88 |         # decode boxes to corner format
 89 |         boxes, scores = self._post_processing(boxes, scores)
 90 |         boxes *= np.tile(img.shape[1::-1], 2)
 91 | 
 92 |         return boxes, scores
 93 | 
 94 |     def _post_processing(self, boxes, scores):
 95 |         # bounding box regression
 96 |         boxes = self._decode_regression(boxes)
 97 |         scores = scores[:, 1]
 98 | 
 99 |         # confidence threshold filter
100 |         conf_mask = self._conf_threshold < scores
101 |         boxes, scores = boxes[conf_mask], scores[conf_mask]
102 | 
103 |         # non-maximum suppression
104 |         nms_mask = self._nms(boxes=boxes, scores=scores)
105 |         boxes = np.take(boxes, nms_mask, axis=0)
106 | 
107 |         return boxes, scores
108 | 
109 |     def _decode_regression(self, reg):
110 |         # bounding box regression
111 |         center_xy = reg[:, :2] * self._center_variance * \
112 |             self._anchors_wh + self._anchors_xy
113 |         center_wh = np.exp(
114 |             reg[:, 2:] * self._size_variance) * self._anchors_wh / 2
115 | 
116 |         # center to corner
117 |         start_xy = center_xy - center_wh
118 |         end_xy = center_xy + center_wh
119 | 
120 |         boxes = np.concatenate((start_xy, end_xy), axis=-1)
121 |         boxes = np.clip(boxes, 0.0, 1.0)
122 | 
123 |         return boxes
124 | 


--------------------------------------------------------------------------------
/tflite/inference_test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import time
 4 | 
 5 | from TFLiteFaceDetector import UltraLightFaceDetecion
 6 | 
 7 | 
 8 | parser = argparse.ArgumentParser(description='TFLite Face Detector')
 9 | 
10 | parser.add_argument('--net_type', default="RFB", type=str,
11 |                     help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
12 | parser.add_argument('--img_path', type=str, help='Image path for inference')
13 | parser.add_argument('--video_path', type=str, help='Video path for inference')
14 | 
15 | args = parser.parse_args()
16 | 
17 | 
18 | def image_inference(image_path, model_path, color=(125, 255, 0)):
19 | 
20 |     fd = UltraLightFaceDetecion(model_path,
21 |                                 conf_threshold=0.6)
22 | 
23 |     img = cv2.imread(image_path)
24 | 
25 |     boxes, scores = fd.inference(img)
26 | 
27 |     for result in boxes.astype(int):
28 |         cv2.rectangle(img, (result[0], result[1]),
29 |                       (result[2], result[3]), color, 2)
30 | 
31 |     cv2.imshow('res', img)
32 |     cv2.waitKey(0)
33 |     cv2.destroyAllWindows()
34 | 
35 | 
36 | def video_inference(video, model_path, color=(125, 255, 0)):
37 | 
38 |     fd = UltraLightFaceDetecion(model_path,
39 |                                 conf_threshold=0.88)
40 | 
41 |     cap = cv2.VideoCapture(video)
42 | 
43 |     while True:
44 |         ret, frame = cap.read()
45 | 
46 |         if not ret:
47 |             break
48 | 
49 |         start_time = time.perf_counter()
50 |         boxes, scores = fd.inference(frame)
51 |         print(time.perf_counter() - start_time)
52 | 
53 |         for result in boxes.astype(int):
54 |             cv2.rectangle(frame, (result[0], result[1]),
55 |                           (result[2], result[3]), color, 2)
56 | 
57 |         cv2.imshow('res', frame)
58 |         if cv2.waitKey(1) == ord('q'):
59 |             break
60 | 
61 |     cap.release()
62 |     cv2.destroyAllWindows()
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 
67 |     filepath = f"pretrained/version-{args.net_type}-320_without_postprocessing.tflite"
68 | 
69 |     if args.img_path:
70 |         image_inference(args.img_path, filepath)
71 |     elif args.video_path:
72 |         video_inference(args.video_path, filepath)
73 |     else:
74 |         print('--ima_path or --video_path must be filled')
75 | 


--------------------------------------------------------------------------------
/tflite/model/tflite_slim_320_without_postprocessing.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | import sys
 4 | sys.path.append("../tf")
 5 | from backend.op import conv_bn, conv_dw, separable_conv
 6 | 
 7 | 
 8 | def create_slim_net(input_shape, base_channel, num_classes):
 9 |     input_node = tf.keras.layers.Input(
10 |         shape=(input_shape[0], input_shape[1], 3))
11 | 
12 |     net = conv_bn(input_node, base_channel, stride=2,
13 |                   prefix='basenet.0')  # 120x160
14 |     net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.1')
15 |     net = conv_dw(net, base_channel * 2, stride=2, prefix='basenet.2')  # 60x80
16 |     net = conv_dw(net, base_channel * 2, stride=1, prefix='basenet.3')
17 |     net = conv_dw(net, base_channel * 4, stride=2, prefix='basenet.4')  # 30x40
18 |     net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.5')
19 |     net = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.6')
20 |     header_0 = conv_dw(net, base_channel * 4, stride=1, prefix='basenet.7')
21 |     net = conv_dw(header_0, base_channel * 8, stride=2,
22 |                   prefix='basenet.8')  # 15x20
23 |     net = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.9')
24 |     header_1 = conv_dw(net, base_channel * 8, stride=1, prefix='basenet.10')
25 |     net = conv_dw(header_1, base_channel * 16,
26 |                   stride=2, prefix='basenet.11')  # 8x10
27 |     header_2 = conv_dw(net, base_channel * 16, stride=1, prefix='basenet.12')
28 | 
29 |     out = tf.keras.layers.Conv2D(
30 |         base_channel * 4, 1, padding='SAME', name='extras_convbias')(header_2)
31 |     out = tf.keras.layers.ReLU(name='extras_relu1')(out)
32 |     out = separable_conv(out, base_channel * 16, kernel_size=3, stride=2, padding=1,
33 |                          prefix='extras_sep')
34 |     header_3 = tf.keras.layers.ReLU(name='extras_relu2')(out)
35 | 
36 |     reg_0 = separable_conv(header_0, 3 * 4, kernel_size=3, stride=1, padding=1,
37 |                            prefix='reg_0_sep')
38 |     cls_0 = separable_conv(header_0, 3 * num_classes, kernel_size=3, stride=1, padding=1,
39 |                            prefix='cls_0_sep')
40 | 
41 |     reg_1 = separable_conv(header_1, 2 * 4, kernel_size=3, stride=1, padding=1,
42 |                            prefix='reg_1_sep')
43 |     cls_1 = separable_conv(header_1, 2 * num_classes, kernel_size=3, stride=1, padding=1,
44 |                            prefix='cls_1_sep')
45 | 
46 |     reg_2 = separable_conv(header_2, 2 * 4, kernel_size=3, stride=1, padding=1,
47 |                            prefix='reg_2_sep')
48 |     cls_2 = separable_conv(header_2, 2 * num_classes, kernel_size=3, stride=1, padding=1,
49 |                            prefix='cls_2_sep')
50 | 
51 |     reg_3 = tf.keras.layers.Conv2D(3 * 4, kernel_size=3, padding='SAME',
52 |                                    name='reg_3_convbias')(header_3)
53 |     cls_3 = tf.keras.layers.Conv2D(3 * num_classes, kernel_size=3, padding='SAME',
54 |                                    name='cls_3_convbias')(header_3)
55 |     
56 |     reg_list = [tf.keras.layers.Reshape([-1, 4])(reg) for reg in [reg_0, reg_1, reg_2, reg_3]]
57 |     cls_list = [tf.keras.layers.Reshape([-1, num_classes])(cls) for cls in [cls_0, cls_1, cls_2, cls_3]]
58 | 
59 |     reg = tf.keras.layers.Concatenate(axis=1, name='face_boxes')(reg_list)
60 |     cls = tf.keras.layers.Concatenate(axis=1)(cls_list)
61 | 
62 |     cls = tf.keras.layers.Softmax(axis=-1, name='face_scores')(cls)
63 |     
64 |     model = tf.keras.Model(inputs=[input_node], outputs=[reg, cls])
65 | 
66 |     model.summary()
67 | 
68 |     return model
69 | 


--------------------------------------------------------------------------------
/tflite/pretrained/version-RFB-320_without_postprocessing.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tflite/pretrained/version-RFB-320_without_postprocessing.tflite


--------------------------------------------------------------------------------
/tflite/pretrained/version-slim-320_without_postprocessing.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/tflite/pretrained/version-slim-320_without_postprocessing.tflite


--------------------------------------------------------------------------------
/train-version-RFB.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | model_root_path="./models/train-version-RFB"
 3 | log_dir="$model_root_path/logs"
 4 | log="$log_dir/log"
 5 | mkdir -p "$log_dir"
 6 | 
 7 | python3 -u train.py \
 8 |   --datasets \
 9 |   ./data/wider_face_add_lm_10_10 \
10 |   --validation_dataset \
11 |   ./data/wider_face_add_lm_10_10 \
12 |   --net \
13 |   RFB \
14 |   --num_epochs \
15 |   200 \
16 |   --milestones \
17 |   "95,150" \
18 |   --lr \
19 |   1e-2 \
20 |   --batch_size \
21 |   24 \
22 |   --input_size \
23 |   320 \
24 |   --checkpoint_folder \
25 |   ${model_root_path} \
26 |   --num_workers \
27 |   0 \
28 |   --log_dir \
29 |   ${log_dir} \
30 |   --cuda_index \
31 |   0 \
32 |   2>&1 | tee "$log"
33 | 


--------------------------------------------------------------------------------
/train-version-slim.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | model_root_path="./models/train-version-slim"
 3 | log_dir="$model_root_path/logs"
 4 | log="$log_dir/log"
 5 | mkdir -p "$log_dir"
 6 | 
 7 | python3 -u train.py \
 8 |   --datasets \
 9 |   ./data/wider_face_add_lm_10_10 \
10 |   --validation_dataset \
11 |   ./data/wider_face_add_lm_10_10 \
12 |   --net \
13 |   slim \
14 |   --num_epochs \
15 |   200 \
16 |   --milestones \
17 |   "95,150" \
18 |   --lr \
19 |   1e-2 \
20 |   --batch_size \
21 |   24 \
22 |   --input_size \
23 |   320 \
24 |   --checkpoint_folder \
25 |   ${model_root_path} \
26 |   --num_workers \
27 |   4 \
28 |   --log_dir \
29 |   ${log_dir} \
30 |   --cuda_index \
31 |   0 \
32 |   2>&1 | tee "$log"
33 | 


--------------------------------------------------------------------------------
/vision/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/__init__.py


--------------------------------------------------------------------------------
/vision/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/datasets/__init__.py


--------------------------------------------------------------------------------
/vision/datasets/voc_dataset.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pathlib
  4 | import xml.etree.ElementTree as ET
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | 
 10 | class VOCDataset:
 11 | 
 12 |     def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
 13 |         """Dataset for VOC data.
 14 |         Args:
 15 |             root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
 16 |                 Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
 17 |         """
 18 |         self.root = pathlib.Path(root)
 19 |         self.transform = transform
 20 |         self.target_transform = target_transform
 21 |         if is_test:
 22 |             image_sets_file = self.root / "ImageSets/Main/test.txt"
 23 |         else:
 24 |             image_sets_file = self.root / "ImageSets/Main/trainval.txt"
 25 |         self.ids = VOCDataset._read_image_ids(image_sets_file)
 26 |         self.keep_difficult = keep_difficult
 27 | 
 28 |         # if the labels file exists, read in the class names
 29 |         label_file_name = self.root / "labels.txt"
 30 | 
 31 |         if os.path.isfile(label_file_name):
 32 |             class_string = ""
 33 |             with open(label_file_name, 'r') as infile:
 34 |                 for line in infile:
 35 |                     class_string += line.rstrip()
 36 | 
 37 |             # classes should be a comma separated list
 38 | 
 39 |             classes = class_string.split(',')
 40 |             # prepend BACKGROUND as first class
 41 |             classes.insert(0, 'BACKGROUND')
 42 |             classes = [elem.replace(" ", "") for elem in classes]
 43 |             self.class_names = tuple(classes)
 44 |             logging.info("VOC Labels read from file: " + str(self.class_names))
 45 | 
 46 |         else:
 47 |             logging.info("No labels file, using default VOC classes.")
 48 |             self.class_names = ('BACKGROUND',
 49 |                                 'face')
 50 | 
 51 |         self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
 52 | 
 53 |     def __getitem__(self, index):
 54 |         image_id = self.ids[index]
 55 |         boxes, labels, is_difficult = self._get_annotation(image_id)
 56 |         if not self.keep_difficult:
 57 |             boxes = boxes[is_difficult == 0]
 58 |             labels = labels[is_difficult == 0]
 59 |         image = self._read_image(image_id)
 60 |         if self.transform:
 61 |             image, boxes, labels = self.transform(image, boxes, labels)
 62 |         if self.target_transform:
 63 |             boxes, labels = self.target_transform(boxes, labels)
 64 |         return image, boxes, labels
 65 | 
 66 |     def get_image(self, index):
 67 |         image_id = self.ids[index]
 68 |         image = self._read_image(image_id)
 69 |         if self.transform:
 70 |             image, _ = self.transform(image)
 71 |         return image
 72 | 
 73 |     def get_annotation(self, index):
 74 |         image_id = self.ids[index]
 75 |         return image_id, self._get_annotation(image_id)
 76 | 
 77 |     def __len__(self):
 78 |         return len(self.ids)
 79 | 
 80 |     @staticmethod
 81 |     def _read_image_ids(image_sets_file):
 82 |         ids = []
 83 |         with open(image_sets_file) as f:
 84 |             for line in f:
 85 |                 ids.append(line.rstrip())
 86 |         return ids
 87 | 
 88 |     def _get_annotation(self, image_id):
 89 |         annotation_file = self.root / f"Annotations/{image_id}.xml"
 90 |         objects = ET.parse(annotation_file).findall("object")
 91 |         boxes = []
 92 |         labels = []
 93 |         is_difficult = []
 94 |         for object in objects:
 95 |             class_name = object.find('name').text.lower().strip()
 96 |             # we're only concerned with clases in our list
 97 |             if class_name in self.class_dict:
 98 |                 bbox = object.find('bndbox')
 99 | 
100 |                 # VOC dataset format follows Matlab, in which indexes start from 0
101 |                 x1 = float(bbox.find('xmin').text) - 1
102 |                 y1 = float(bbox.find('ymin').text) - 1
103 |                 x2 = float(bbox.find('xmax').text) - 1
104 |                 y2 = float(bbox.find('ymax').text) - 1
105 |                 boxes.append([x1, y1, x2, y2])
106 | 
107 |                 labels.append(self.class_dict[class_name])
108 |                 is_difficult_str = object.find('difficult').text
109 |                 is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
110 | 
111 |         return (np.array(boxes, dtype=np.float32),
112 |                 np.array(labels, dtype=np.int64),
113 |                 np.array(is_difficult, dtype=np.uint8))
114 | 
115 |     def _read_image(self, image_id):
116 |         image_file = self.root / f"JPEGImages/{image_id}.jpg"
117 |         image = cv2.imread(str(image_file))
118 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
119 |         return image
120 | 


--------------------------------------------------------------------------------
/vision/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/nn/__init__.py


--------------------------------------------------------------------------------
/vision/nn/mb_tiny.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class Mb_Tiny(nn.Module):
 6 | 
 7 |     def __init__(self, num_classes=2):
 8 |         super(Mb_Tiny, self).__init__()
 9 |         self.base_channel = 8 * 2
10 | 
11 |         def conv_bn(inp, oup, stride):
12 |             return nn.Sequential(
13 |                 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
14 |                 nn.BatchNorm2d(oup),
15 |                 nn.ReLU(inplace=True)
16 |             )
17 | 
18 |         def conv_dw(inp, oup, stride):
19 |             return nn.Sequential(
20 |                 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
21 |                 nn.BatchNorm2d(inp),
22 |                 nn.ReLU(inplace=True),
23 | 
24 |                 nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
25 |                 nn.BatchNorm2d(oup),
26 |                 nn.ReLU(inplace=True),
27 |             )
28 | 
29 |         self.model = nn.Sequential(
30 |             conv_bn(3, self.base_channel, 2),  # 160*120
31 |             conv_dw(self.base_channel, self.base_channel * 2, 1),
32 |             conv_dw(self.base_channel * 2, self.base_channel * 2, 2),  # 80*60
33 |             conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
34 |             conv_dw(self.base_channel * 2, self.base_channel * 4, 2),  # 40*30
35 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
36 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
37 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
38 |             conv_dw(self.base_channel * 4, self.base_channel * 8, 2),  # 20*15
39 |             conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
40 |             conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
41 |             conv_dw(self.base_channel * 8, self.base_channel * 16, 2),  # 10*8
42 |             conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
43 |         )
44 |         self.fc = nn.Linear(1024, num_classes)
45 | 
46 |     def forward(self, x):
47 |         x = self.model(x)
48 |         x = F.avg_pool2d(x, 7)
49 |         x = x.view(-1, 1024)
50 |         x = self.fc(x)
51 |         return x
52 | 


--------------------------------------------------------------------------------
/vision/nn/mb_tiny_RFB.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class BasicConv(nn.Module):
  7 | 
  8 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
  9 |         super(BasicConv, self).__init__()
 10 |         self.out_channels = out_planes
 11 |         if bn:
 12 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
 13 |             self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
 14 |             self.relu = nn.ReLU(inplace=True) if relu else None
 15 |         else:
 16 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
 17 |             self.bn = None
 18 |             self.relu = nn.ReLU(inplace=True) if relu else None
 19 | 
 20 |     def forward(self, x):
 21 |         x = self.conv(x)
 22 |         if self.bn is not None:
 23 |             x = self.bn(x)
 24 |         if self.relu is not None:
 25 |             x = self.relu(x)
 26 |         return x
 27 | 
 28 | 
 29 | class BasicRFB(nn.Module):
 30 | 
 31 |     def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8, vision=1, groups=1):
 32 |         super(BasicRFB, self).__init__()
 33 |         self.scale = scale
 34 |         self.out_channels = out_planes
 35 |         inter_planes = in_planes // map_reduce
 36 | 
 37 |         self.branch0 = nn.Sequential(
 38 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 39 |             BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
 40 |             BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 1, dilation=vision + 1, relu=False, groups=groups)
 41 |         )
 42 |         self.branch1 = nn.Sequential(
 43 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 44 |             BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
 45 |             BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
 46 |         )
 47 |         self.branch2 = nn.Sequential(
 48 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 49 |             BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=3, stride=1, padding=1, groups=groups),
 50 |             BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
 51 |             BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
 52 |         )
 53 | 
 54 |         self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
 55 |         self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
 56 |         self.relu = nn.ReLU(inplace=False)
 57 | 
 58 |     def forward(self, x):
 59 |         x0 = self.branch0(x)
 60 |         x1 = self.branch1(x)
 61 |         x2 = self.branch2(x)
 62 | 
 63 |         out = torch.cat((x0, x1, x2), 1)
 64 |         out = self.ConvLinear(out)
 65 |         short = self.shortcut(x)
 66 |         out = out * self.scale + short
 67 |         out = self.relu(out)
 68 | 
 69 |         return out
 70 | 
 71 | 
 72 | class Mb_Tiny_RFB(nn.Module):
 73 | 
 74 |     def __init__(self, num_classes=2):
 75 |         super(Mb_Tiny_RFB, self).__init__()
 76 |         self.base_channel = 8 * 2
 77 | 
 78 |         def conv_bn(inp, oup, stride):
 79 |             return nn.Sequential(
 80 |                 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 81 |                 nn.BatchNorm2d(oup),
 82 |                 nn.ReLU(inplace=True)
 83 |             )
 84 | 
 85 |         def conv_dw(inp, oup, stride):
 86 |             return nn.Sequential(
 87 |                 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 88 |                 nn.BatchNorm2d(inp),
 89 |                 nn.ReLU(inplace=True),
 90 | 
 91 |                 nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 92 |                 nn.BatchNorm2d(oup),
 93 |                 nn.ReLU(inplace=True),
 94 |             )
 95 | 
 96 |         self.model = nn.Sequential(
 97 |             conv_bn(3, self.base_channel, 2),  # 160*120
 98 |             conv_dw(self.base_channel, self.base_channel * 2, 1),
 99 |             conv_dw(self.base_channel * 2, self.base_channel * 2, 2),  # 80*60
100 |             conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
101 |             conv_dw(self.base_channel * 2, self.base_channel * 4, 2),  # 40*30
102 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
103 |             conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
104 |             BasicRFB(self.base_channel * 4, self.base_channel * 4, stride=1, scale=1.0),
105 |             conv_dw(self.base_channel * 4, self.base_channel * 8, 2),  # 20*15
106 |             conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
107 |             conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
108 |             conv_dw(self.base_channel * 8, self.base_channel * 16, 2),  # 10*8
109 |             conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
110 |         )
111 |         self.fc = nn.Linear(1024, num_classes)
112 | 
113 |     def forward(self, x):
114 |         x = self.model(x)
115 |         x = F.avg_pool2d(x, 7)
116 |         x = x.view(-1, 1024)
117 |         x = self.fc(x)
118 |         return x
119 | 


--------------------------------------------------------------------------------
/vision/nn/multibox_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..utils import box_utils
 6 | 
 7 | 
 8 | class MultiboxLoss(nn.Module):
 9 |     def __init__(self, priors, neg_pos_ratio,
10 |                  center_variance, size_variance, device):
11 |         """Implement SSD Multibox Loss.
12 | 
13 |         Basically, Multibox loss combines classification loss
14 |          and Smooth L1 regression loss.
15 |         """
16 |         super(MultiboxLoss, self).__init__()
17 |         self.neg_pos_ratio = neg_pos_ratio
18 |         self.center_variance = center_variance
19 |         self.size_variance = size_variance
20 |         self.priors = priors
21 |         self.priors.to(device)
22 | 
23 |     def forward(self, confidence, predicted_locations, labels, gt_locations):
24 |         """Compute classification loss and smooth l1 loss.
25 | 
26 |         Args:
27 |             confidence (batch_size, num_priors, num_classes): class predictions.
28 |             locations (batch_size, num_priors, 4): predicted locations.
29 |             labels (batch_size, num_priors): real labels of all the priors.
30 |             boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
31 |         """
32 |         num_classes = confidence.size(2)
33 |         with torch.no_grad():
34 |             # derived from cross_entropy=sum(log(p))
35 |             loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
36 |             mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
37 | 
38 |         confidence = confidence[mask, :]
39 |         classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], reduction='sum')
40 |         pos_mask = labels > 0
41 |         predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
42 |         gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
43 |         smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum')  # smooth_l1_loss
44 |         # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum')  #l2 loss
45 |         num_pos = gt_locations.size(0)
46 |         return smooth_l1_loss / num_pos, classification_loss / num_pos
47 | 


--------------------------------------------------------------------------------
/vision/ssd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/ssd/__init__.py


--------------------------------------------------------------------------------
/vision/ssd/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/ssd/config/__init__.py


--------------------------------------------------------------------------------
/vision/ssd/config/fd_config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from vision.utils.box_utils import generate_priors
 4 | 
 5 | image_mean_test = image_mean = np.array([127, 127, 127])
 6 | image_std = 128.0
 7 | iou_threshold = 0.3
 8 | center_variance = 0.1
 9 | size_variance = 0.2
10 | 
11 | min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
12 | shrinkage_list = []
13 | image_size = [320, 240]  # default input size 320*240
14 | feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]]  # default feature map size
15 | priors = []
16 | 
17 | 
18 | def define_img_size(size):
19 |     global image_size, feature_map_w_h_list, priors
20 |     img_size_dict = {128: [128, 96],
21 |                      160: [160, 120],
22 |                      320: [320, 240],
23 |                      480: [480, 360],
24 |                      640: [640, 480],
25 |                      1280: [1280, 960]}
26 |     image_size = img_size_dict[size]
27 | 
28 |     feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]],
29 |                                  160: [[20, 10, 5, 3], [15, 8, 4, 2]],
30 |                                  320: [[40, 20, 10, 5], [30, 15, 8, 4]],
31 |                                  480: [[60, 30, 15, 8], [45, 23, 12, 6]],
32 |                                  640: [[80, 40, 20, 10], [60, 30, 15, 8]],
33 |                                  1280: [[160, 80, 40, 20], [120, 60, 30, 15]]}
34 |     feature_map_w_h_list = feature_map_w_h_list_dict[size]
35 | 
36 |     for i in range(0, len(image_size)):
37 |         item_list = []
38 |         for k in range(0, len(feature_map_w_h_list[i])):
39 |             item_list.append(image_size[i] / feature_map_w_h_list[i][k])
40 |         shrinkage_list.append(item_list)
41 |     priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes)
42 | 


--------------------------------------------------------------------------------
/vision/ssd/data_preprocessing.py:
--------------------------------------------------------------------------------
 1 | from ..transforms.transforms import *
 2 | 
 3 | 
 4 | class TrainAugmentation:
 5 |     def __init__(self, size, mean=0, std=1.0):
 6 |         """
 7 |         Args:
 8 |             size: the size the of final image.
 9 |             mean: mean pixel value per channel.
10 |         """
11 |         self.mean = mean
12 |         self.size = size
13 |         self.augment = Compose([
14 |             ConvertFromInts(),
15 |             PhotometricDistort(),
16 |             RandomSampleCrop_v2(),
17 |             RandomMirror(),
18 |             ToPercentCoords(),
19 |             Resize(self.size),
20 |             SubtractMeans(self.mean),
21 |             lambda img, boxes=None, labels=None: (img / std, boxes, labels),
22 |             ToTensor(),
23 |         ])
24 | 
25 |     def __call__(self, img, boxes, labels):
26 |         """
27 | 
28 |         Args:
29 |             img: the output of cv.imread in RGB layout.
30 |             boxes: boundding boxes in the form of (x1, y1, x2, y2).
31 |             labels: labels of boxes.
32 |         """
33 |         return self.augment(img, boxes, labels)
34 | 
35 | 
36 | class TestTransform:
37 |     def __init__(self, size, mean=0.0, std=1.0):
38 |         self.transform = Compose([
39 |             ToPercentCoords(),
40 |             Resize(size),
41 |             SubtractMeans(mean),
42 |             lambda img, boxes=None, labels=None: (img / std, boxes, labels),
43 |             ToTensor(),
44 |         ])
45 | 
46 |     def __call__(self, image, boxes, labels):
47 |         return self.transform(image, boxes, labels)
48 | 
49 | 
50 | class PredictionTransform:
51 |     def __init__(self, size, mean=0.0, std=1.0):
52 |         self.transform = Compose([
53 |             Resize(size),
54 |             SubtractMeans(mean),
55 |             lambda img, boxes=None, labels=None: (img / std, boxes, labels),
56 |             ToTensor()
57 |         ])
58 | 
59 |     def __call__(self, image):
60 |         image, _, _ = self.transform(image)
61 |         return image
62 | 


--------------------------------------------------------------------------------
/vision/ssd/mb_tiny_RFB_fd.py:
--------------------------------------------------------------------------------
 1 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU
 2 | 
 3 | from vision.nn.mb_tiny_RFB import Mb_Tiny_RFB
 4 | from vision.ssd.config import fd_config as config
 5 | from vision.ssd.predictor import Predictor
 6 | from vision.ssd.ssd import SSD
 7 | 
 8 | 
 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
10 |     """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
11 |     """
12 |     return Sequential(
13 |         Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
14 |                groups=in_channels, stride=stride, padding=padding),
15 |         ReLU(),
16 |         Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
17 |     )
18 | 
19 | 
20 | def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"):
21 |     base_net = Mb_Tiny_RFB(2)
22 |     base_net_model = base_net.model  # disable dropout layer
23 | 
24 |     source_layer_indexes = [
25 |         8,
26 |         11,
27 |         13
28 |     ]
29 |     extras = ModuleList([
30 |         Sequential(
31 |             Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
32 |             ReLU(),
33 |             SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
34 |             ReLU()
35 |         )
36 |     ])
37 | 
38 |     regression_headers = ModuleList([
39 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
40 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
41 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
42 |         Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
43 |     ])
44 | 
45 |     classification_headers = ModuleList([
46 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
47 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
48 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
49 |         Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
50 |     ])
51 | 
52 |     return SSD(num_classes, base_net_model, source_layer_indexes,
53 |                extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
54 | 
55 | 
56 | def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
57 |     predictor = Predictor(net, config.image_size, config.image_mean_test,
58 |                           config.image_std,
59 |                           nms_method=nms_method,
60 |                           iou_threshold=config.iou_threshold,
61 |                           candidate_size=candidate_size,
62 |                           sigma=sigma,
63 |                           device=device)
64 |     return predictor
65 | 


--------------------------------------------------------------------------------
/vision/ssd/mb_tiny_fd.py:
--------------------------------------------------------------------------------
 1 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU
 2 | 
 3 | from vision.nn.mb_tiny import Mb_Tiny
 4 | from vision.ssd.config import fd_config as config
 5 | from vision.ssd.predictor import Predictor
 6 | from vision.ssd.ssd import SSD
 7 | 
 8 | 
 9 | def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
10 |     """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
11 |     """
12 |     return Sequential(
13 |         Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
14 |                groups=in_channels, stride=stride, padding=padding),
15 |         ReLU(),
16 |         Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
17 |     )
18 | 
19 | 
20 | def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"):
21 |     base_net = Mb_Tiny(2)
22 |     base_net_model = base_net.model  # disable dropout layer
23 | 
24 |     source_layer_indexes = [
25 |         8,
26 |         11,
27 |         13
28 |     ]
29 |     extras = ModuleList([
30 |         Sequential(
31 |             Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
32 |             ReLU(),
33 |             SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
34 |             ReLU()
35 |         )
36 |     ])
37 | 
38 |     regression_headers = ModuleList([
39 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
40 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
41 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
42 |         Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
43 |     ])
44 | 
45 |     classification_headers = ModuleList([
46 |         SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
47 |         SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
48 |         SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
49 |         Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
50 |     ])
51 | 
52 |     return SSD(num_classes, base_net_model, source_layer_indexes,
53 |                extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
54 | 
55 | 
56 | def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
57 |     predictor = Predictor(net, config.image_size, config.image_mean_test,
58 |                           config.image_std,
59 |                           nms_method=nms_method,
60 |                           iou_threshold=config.iou_threshold,
61 |                           candidate_size=candidate_size,
62 |                           sigma=sigma,
63 |                           device=device)
64 |     return predictor
65 | 


--------------------------------------------------------------------------------
/vision/ssd/predictor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..utils import box_utils
 4 | from .data_preprocessing import PredictionTransform
 5 | from ..utils.misc import Timer
 6 | 
 7 | 
 8 | class Predictor:
 9 |     def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None,
10 |                  iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None):
11 |         self.net = net
12 |         self.transform = PredictionTransform(size, mean, std)
13 |         self.iou_threshold = iou_threshold
14 |         self.filter_threshold = filter_threshold
15 |         self.candidate_size = candidate_size
16 |         self.nms_method = nms_method
17 | 
18 |         self.sigma = sigma
19 |         if device:
20 |             self.device = device
21 |         else:
22 |             self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23 | 
24 |         self.net.to(self.device)
25 |         self.net.eval()
26 | 
27 |         self.timer = Timer()
28 | 
29 |     def predict(self, image, top_k=-1, prob_threshold=None):
30 |         cpu_device = torch.device("cpu")
31 |         height, width, _ = image.shape
32 |         image = self.transform(image)
33 |         images = image.unsqueeze(0)
34 |         images = images.to(self.device)
35 |         with torch.no_grad():
36 |             for i in range(1):
37 |                 self.timer.start()
38 |                 scores, boxes = self.net.forward(images)
39 |                 print("Inference time: ", self.timer.end())
40 |         boxes = boxes[0]
41 |         scores = scores[0]
42 |         if not prob_threshold:
43 |             prob_threshold = self.filter_threshold
44 |         # this version of nms is slower on GPU, so we move data to CPU.
45 |         boxes = boxes.to(cpu_device)
46 |         scores = scores.to(cpu_device)
47 |         picked_box_probs = []
48 |         picked_labels = []
49 |         for class_index in range(1, scores.size(1)):
50 |             probs = scores[:, class_index]
51 |             mask = probs > prob_threshold
52 |             probs = probs[mask]
53 |             if probs.size(0) == 0:
54 |                 continue
55 |             subset_boxes = boxes[mask, :]
56 |             box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
57 |             box_probs = box_utils.nms(box_probs, self.nms_method,
58 |                                       score_threshold=prob_threshold,
59 |                                       iou_threshold=self.iou_threshold,
60 |                                       sigma=self.sigma,
61 |                                       top_k=top_k,
62 |                                       candidate_size=self.candidate_size)
63 |             picked_box_probs.append(box_probs)
64 |             picked_labels.extend([class_index] * box_probs.size(0))
65 |         if not picked_box_probs:
66 |             return torch.tensor([]), torch.tensor([]), torch.tensor([])
67 |         picked_box_probs = torch.cat(picked_box_probs)
68 |         picked_box_probs[:, 0] *= width
69 |         picked_box_probs[:, 1] *= height
70 |         picked_box_probs[:, 2] *= width
71 |         picked_box_probs[:, 3] *= height
72 |         return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]
73 | 


--------------------------------------------------------------------------------
/vision/transforms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/vision/transforms/__init__.py


--------------------------------------------------------------------------------
/vision/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .misc import *
2 | 


--------------------------------------------------------------------------------
/vision/utils/box_utils_numpy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def convert_locations_to_boxes(locations, priors, center_variance,
  5 |                                size_variance):
  6 |     """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
  7 | 
  8 |     The conversion:
  9 |         $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
 10 |         $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
 11 |     We do it in the inverse direction here.
 12 |     Args:
 13 |         locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
 14 |         priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
 15 |         center_variance: a float used to change the scale of center.
 16 |         size_variance: a float used to change of scale of size.
 17 |     Returns:
 18 |         boxes:  priors: [[center_x, center_y, h, w]]. All the values
 19 |             are relative to the image size.
 20 |     """
 21 |     # priors can have one dimension less.
 22 |     if len(priors.shape) + 1 == len(locations.shape):
 23 |         priors = np.expand_dims(priors, 0)
 24 |     return np.concatenate([
 25 |         locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
 26 |         np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
 27 |     ], axis=len(locations.shape) - 1)
 28 | 
 29 | 
 30 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
 31 |     # priors can have one dimension less
 32 |     if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape):
 33 |         center_form_priors = np.expand_dims(center_form_priors, 0)
 34 |     return np.concatenate([
 35 |         (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
 36 |         np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
 37 |     ], axis=len(center_form_boxes.shape) - 1)
 38 | 
 39 | 
 40 | def area_of(left_top, right_bottom):
 41 |     """Compute the areas of rectangles given two corners.
 42 | 
 43 |     Args:
 44 |         left_top (N, 2): left top corner.
 45 |         right_bottom (N, 2): right bottom corner.
 46 | 
 47 |     Returns:
 48 |         area (N): return the area.
 49 |     """
 50 |     hw = np.clip(right_bottom - left_top, 0.0, None)
 51 |     return hw[..., 0] * hw[..., 1]
 52 | 
 53 | 
 54 | def iou_of(boxes0, boxes1, eps=1e-5):
 55 |     """Return intersection-over-union (Jaccard index) of boxes.
 56 | 
 57 |     Args:
 58 |         boxes0 (N, 4): ground truth boxes.
 59 |         boxes1 (N or 1, 4): predicted boxes.
 60 |         eps: a small number to avoid 0 as denominator.
 61 |     Returns:
 62 |         iou (N): IoU values.
 63 |     """
 64 |     overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
 65 |     overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
 66 | 
 67 |     overlap_area = area_of(overlap_left_top, overlap_right_bottom)
 68 |     area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
 69 |     area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
 70 |     return overlap_area / (area0 + area1 - overlap_area + eps)
 71 | 
 72 | 
 73 | def center_form_to_corner_form(locations):
 74 |     return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
 75 |                            locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
 76 | 
 77 | 
 78 | def corner_form_to_center_form(boxes):
 79 |     return np.concatenate([
 80 |         (boxes[..., :2] + boxes[..., 2:]) / 2,
 81 |         boxes[..., 2:] - boxes[..., :2]
 82 |     ], len(boxes.shape) - 1)
 83 | 
 84 | 
 85 | def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
 86 |     """
 87 | 
 88 |     Args:
 89 |         box_scores (N, 5): boxes in corner-form and probabilities.
 90 |         iou_threshold: intersection over union threshold.
 91 |         top_k: keep top_k results. If k <= 0, keep all the results.
 92 |         candidate_size: only consider the candidates with the highest scores.
 93 |     Returns:
 94 |          picked: a list of indexes of the kept boxes
 95 |     """
 96 |     scores = box_scores[:, -1]
 97 |     boxes = box_scores[:, :-1]
 98 |     picked = []
 99 |     # _, indexes = scores.sort(descending=True)
100 |     indexes = np.argsort(scores)
101 |     # indexes = indexes[:candidate_size]
102 |     indexes = indexes[-candidate_size:]
103 |     while len(indexes) > 0:
104 |         # current = indexes[0]
105 |         current = indexes[-1]
106 |         picked.append(current)
107 |         if 0 < top_k == len(picked) or len(indexes) == 1:
108 |             break
109 |         current_box = boxes[current, :]
110 |         # indexes = indexes[1:]
111 |         indexes = indexes[:-1]
112 |         rest_boxes = boxes[indexes, :]
113 |         iou = iou_of(
114 |             rest_boxes,
115 |             np.expand_dims(current_box, axis=0),
116 |         )
117 |         indexes = indexes[iou <= iou_threshold]
118 | 
119 |     return box_scores[picked, :]
120 | 


--------------------------------------------------------------------------------
/vision/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def str2bool(s):
 7 |     return s.lower() in ('true', '1')
 8 | 
 9 | 
10 | class Timer:
11 |     def __init__(self):
12 |         self.clock = {}
13 | 
14 |     def start(self, key="default"):
15 |         self.clock[key] = datetime.datetime.now()
16 | 
17 |     def end(self, key="default"):
18 |         if key not in self.clock:
19 |             raise Exception(f"{key} is not in the clock.")
20 |         interval = datetime.datetime.now() - self.clock[key]
21 |         del self.clock[key]
22 |         return interval.total_seconds()
23 |         
24 | 
25 | def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path):
26 |     torch.save({
27 |         'epoch': epoch,
28 |         'model': net_state_dict,
29 |         'optimizer': optimizer_state_dict,
30 |         'best_score': best_score
31 |     }, checkpoint_path)
32 |     torch.save(net_state_dict, model_path)
33 |         
34 |         
35 | def load_checkpoint(checkpoint_path):
36 |     return torch.load(checkpoint_path)
37 | 
38 | 
39 | def freeze_net_layers(net):
40 |     for param in net.parameters():
41 |         param.requires_grad = False
42 | 
43 | 
44 | def store_labels(path, labels):
45 |     with open(path, "w") as f:
46 |         f.write("\n".join(labels))
47 | 


--------------------------------------------------------------------------------
/widerface_evaluate/README.md:
--------------------------------------------------------------------------------
 1 | # WiderFace-Evaluation
 2 | Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)
 3 | 
 4 | 
 5 | ## Usage
 6 | 
 7 | ##### run evaluation_on_widerface.py
 8 | 
 9 | ````
10 | python3 evaluation_on_widerface.py
11 | ````
12 | ##### before evaluating 
13 | 
14 | ````
15 | python3 setup.py build_ext --inplace
16 | ````
17 | 
18 | ##### evaluating
19 | 
20 | **GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat`
21 | 
22 | ````
23 | python3 evaluation.py -p <your prediction dir> -g <groud truth dir>
24 | ````
25 | 
26 | ## Acknowledgements
27 | 
28 | some code borrowed from Sergey Karayev
29 | 


--------------------------------------------------------------------------------
/widerface_evaluate/box_overlaps.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps


--------------------------------------------------------------------------------
/widerface_evaluate/evaluation_on_widerface.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ python3
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019-10-17
 4 | # @Author  : vealocia
 5 | # @FileName: evaluation_on_widerface.py
 6 | 
 7 | import math
 8 | import os
 9 | import sys
10 | 
11 | import cv2
12 | sys.path.append('../')
13 | from vision.ssd.config.fd_config import define_img_size
14 | 
15 | input_img_size = 320  # define input size ,default optional(128/160/320/480/640/1280)
16 | define_img_size(input_img_size)  # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
17 | 
18 | from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
19 | from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
20 | 
21 | label_path = "../models/voc-model-labels.txt"
22 | 
23 | # net_type = "slim"          # inference faster,lower precision
24 | net_type = "RFB"  # inference lower,higher precision
25 | 
26 | class_names = [name.strip() for name in open(label_path).readlines()]
27 | num_classes = len(class_names)
28 | test_device = "cuda:0"
29 | # test_device = "cpu"
30 | candidate_size = 800
31 | threshold = 0.1
32 | 
33 | val_image_root = "/pic/linzai/1080Ti/home_linzai/PycharmProjects/insightface/RetinaFace/data/retinaface/val"  # path to widerface valuation image root
34 | val_result_txt_save_root = "./widerface_evaluation/"  # result directory
35 | 
36 | if net_type == 'slim':
37 |     model_path = "../models/pretrained/version-slim-320.pth"
38 |     # model_path = "../models/pretrained/version-slim-640.pth"
39 |     net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
40 |     predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device)
41 | elif net_type == 'RFB':
42 |     model_path = "../models/pretrained/version-RFB-320.pth"
43 |     # model_path = "../models/pretrained/version-RFB-640.pth"
44 |     net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
45 |     predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device)
46 | else:
47 |     print("The net type is wrong!")
48 |     sys.exit(1)
49 | net.load(model_path)
50 | 
51 | counter = 0
52 | for parent, dir_names, file_names in os.walk(val_image_root):
53 |     for file_name in file_names:
54 |         if not file_name.lower().endswith('jpg'):
55 |             continue
56 |         im = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR)
57 |         im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
58 |         boxes, labels, probs = predictor.predict(im, candidate_size / 2, threshold)
59 | 
60 |         event_name = parent.split('/')[-1]
61 |         if not os.path.exists(os.path.join(val_result_txt_save_root, event_name)):
62 |             os.makedirs(os.path.join(val_result_txt_save_root, event_name))
63 |         fout = open(os.path.join(val_result_txt_save_root, event_name, file_name.split('.')[0] + '.txt'), 'w')
64 |         fout.write(file_name.split('.')[0] + '\n')
65 |         fout.write(str(boxes.size(0)) + '\n')
66 |         for i in range(boxes.size(0)):
67 |             bbox = boxes[i, :]
68 |             fout.write('%d %d %d %d %.03f' % (math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), probs[i] if probs[i] <= 1 else 1) + '\n')
69 |         fout.close()
70 |         counter += 1
71 |         print('[%d] %s is processed.' % (counter, file_name))
72 | 
73 | # note: with score_threshold = 0.11 and hard_nms, MAP of 320-input model on widerface val set is: 0.785/0.695/0.431
74 | 


--------------------------------------------------------------------------------
/widerface_evaluate/ground_truth/wider_easy_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_easy_val.mat


--------------------------------------------------------------------------------
/widerface_evaluate/ground_truth/wider_face_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_face_val.mat


--------------------------------------------------------------------------------
/widerface_evaluate/ground_truth/wider_hard_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_hard_val.mat


--------------------------------------------------------------------------------
/widerface_evaluate/ground_truth/wider_medium_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/dffdddda9794a50607cba8f318507a28c1c27cab/widerface_evaluate/ground_truth/wider_medium_val.mat


--------------------------------------------------------------------------------
/widerface_evaluate/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WiderFace evaluation code
 3 | author: wondervictor
 4 | mail: tianhengcheng@gmail.com
 5 | copyright@wondervictor
 6 | """
 7 | 
 8 | from distutils.core import setup, Extension
 9 | from Cython.Build import cythonize
10 | import numpy
11 | 
12 | package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()])
13 | setup(ext_modules=cythonize([package]))
14 | 


--------------------------------------------------------------------------------