├── .editorconfig ├── .gitignore ├── LICENSE ├── README.md ├── go.mod ├── hello.go ├── keliamoniz2.out.png ├── libfacedetection.go ├── libfacedetection ├── CMakeLists.txt ├── ChangeLog ├── LICENSE ├── README.md ├── aarch64-toolchain.cmake ├── example │ ├── libfacedetect.cpp │ └── libfacedetectcnn-example.cpp ├── images │ ├── chloecalmon.png │ ├── cnnresult.png │ ├── keliamoniz1.jpg │ └── keliamoniz2.jpg ├── models │ ├── README.md │ ├── caffe │ │ ├── yufacedetectnet-open-v1.caffemodel │ │ ├── yufacedetectnet-open-v1.prototxt │ │ ├── yufacedetectnet-open-v1.solver.prototxt │ │ └── yufacedetectnet-open-v1.train.prototxt │ └── openvino │ │ ├── yufacedetectnet-open-v1-320x240.bin │ │ └── yufacedetectnet-open-v1-320x240.xml └── src │ ├── facedetectcnn-floatdata.cpp │ ├── facedetectcnn-int8data.cpp │ ├── facedetectcnn-model.cpp │ ├── facedetectcnn.cpp │ └── facedetectcnn.h ├── libfacedetection_capi.cc ├── libfacedetection_capi.h ├── z_facedetectcnn_cc.cc ├── z_facedetectcnn_floatdata.cc ├── z_facedetectcnn_int8data.cc └── z_facedetectcnn_model.cc /.editorconfig: -------------------------------------------------------------------------------- 1 | # Copyright 2018 . All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | # http://editorconfig.org/ 6 | 7 | root = true 8 | 9 | # Unix-style newlines with a newline ending every file 10 | [*] 11 | charset = utf-8 12 | end_of_line = lf 13 | trim_trailing_whitespace = true 14 | insert_final_newline = true 15 | 16 | [*] 17 | indent_style = tab 18 | 19 | [*.{go,proto}] 20 | charset = utf-8 21 | indent_style = tab 22 | 23 | # Matches the exact files either package.json or .travis.yml 24 | [{package.json,.travis.yml}] 25 | indent_style = space 26 | indent_size = 2 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /libfacedetection/build 2 | a.out.png 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, chai2010 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | - *Go语言QQ群: 102319854, 1055927514* 2 | - *凹语言(凹读音“Wa”)(The Wa Programming Language): https://github.com/wa-lang/wa* 3 | 4 | ---- 5 | 6 | # [libfacedetection](https://github.com/ShiqiYu/libfacedetection) binding for Go 7 | 8 | - https://godoc.org/github.com/chai2010/libfacedetection-go 9 | - https://github.com/ShiqiYu/libfacedetection 10 | 11 | ## Example ([hello.go](hello.go)) 12 | 13 | ```go 14 | package main 15 | 16 | import ( 17 | "github.com/chai2010/libfacedetection-go" 18 | ) 19 | 20 | func main() { 21 | m := GetImage("./libfacedetection/images/keliamoniz2.jpg") 22 | rgb, w, h := libfacedetection.NewRGBImageFrom(m) 23 | 24 | faces := libfacedetection.DetectFaceRGB(rgb, w, h, w*3) 25 | fmt.Printf("%#v\n", faces) 26 | } 27 | 28 | // output: 29 | // []libfacedetection.Face{ 30 | // libfacedetection.Face{X:183, Y:137, W:150, H:150, Neighbors:94, Angle:0} 31 | // } 32 | ``` 33 | 34 | ![](keliamoniz2.out.png) 35 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | module github.com/chai2010/libfacedetection-go 6 | -------------------------------------------------------------------------------- /hello.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build ignore 6 | 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "image" 12 | "image/color" 13 | "image/draw" 14 | _ "image/jpeg" 15 | "image/png" 16 | "log" 17 | "os" 18 | 19 | "github.com/chai2010/libfacedetection-go" 20 | ) 21 | 22 | func main() { 23 | m := GetImage("./libfacedetection/images/keliamoniz2.jpg") 24 | rgb, w, h := libfacedetection.NewRGBImageFrom(m) 25 | 26 | faces := libfacedetection.DetectFaceRGB(rgb, w, h, w*3) 27 | fmt.Printf("%#v\n", faces) 28 | 29 | if len(faces) > 0 { 30 | b := m.Bounds() 31 | m2 := image.NewRGBA(b) 32 | 33 | for y := b.Min.Y; y < b.Max.Y; y++ { 34 | for x := b.Min.X; x < b.Max.X; x++ { 35 | m2.Set(x, y, m.At(x, y)) 36 | } 37 | } 38 | 39 | x1 := faces[0].X 40 | y1 := faces[0].Y 41 | x2 := faces[0].W + x1 42 | y2 := faces[0].H + y1 43 | 44 | DrawRect(m2, x1, y1, x2, y2) 45 | SaveImage(m2, "a.out.png") 46 | } 47 | } 48 | 49 | func GetImage(path string) image.Image { 50 | r, err := os.Open(path) 51 | if err != nil { 52 | log.Fatal(err) 53 | } 54 | defer r.Close() 55 | 56 | m, _, err := image.Decode(r) 57 | if err != nil { 58 | log.Fatal(err) 59 | } 60 | return m 61 | } 62 | 63 | func SaveImage(m image.Image, path string) { 64 | f, err := os.Create(path) 65 | if err != nil { 66 | log.Fatal(err) 67 | } 68 | defer f.Close() 69 | 70 | err = png.Encode(f, m) 71 | if err != nil { 72 | log.Fatal(err) 73 | } 74 | } 75 | 76 | // DrawHLine draws a horizontal line 77 | func DrawHLine(m draw.Image, x1, y, x2 int) { 78 | for ; x1 <= x2; x1++ { 79 | m.Set(x1, y, color.RGBA{0, 0, 255, 255}) 80 | } 81 | } 82 | 83 | // DrawVLine draws a veritcal line 84 | func DrawVLine(m draw.Image, x, y1, y2 int) { 85 | for ; y1 <= y2; y1++ { 86 | m.Set(x, y1, color.RGBA{0, 0, 255, 255}) 87 | } 88 | } 89 | 90 | // DrawRect draws a rectangle utilizing HLine() and VLine() 91 | func DrawRect(m draw.Image, x1, y1, x2, y2 int) { 92 | DrawHLine(m, x1, y1, x2) 93 | DrawHLine(m, x1, y2, x2) 94 | DrawVLine(m, x1, y1, y2) 95 | DrawVLine(m, x2, y1, y2) 96 | } 97 | -------------------------------------------------------------------------------- /keliamoniz2.out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/keliamoniz2.out.png -------------------------------------------------------------------------------- /libfacedetection.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package libfacedetection 6 | 7 | //#include "libfacedetection_capi.h" 8 | import "C" 9 | import ( 10 | "image" 11 | "unsafe" 12 | ) 13 | 14 | type Face struct { 15 | X int 16 | Y int 17 | W int 18 | H int 19 | Neighbors int 20 | Angle int 21 | } 22 | 23 | func DetectFaceRGBA(m *image.RGBA) []Face { 24 | if m == nil { 25 | return nil 26 | } 27 | 28 | rv := C.libfacedetection_capi_facedetect_rgba( 29 | (*C.uint8_t)(unsafe.Pointer(&m.Pix[0])), 30 | C.int(m.Rect.Dx()), 31 | C.int(m.Rect.Dy()), 32 | C.int(m.Stride), 33 | ) 34 | defer C.libfacedetection_capi_result_free(rv) 35 | 36 | n := int(C.libfacedetection_capi_result_len(rv)) 37 | if n <= 0 { 38 | return nil 39 | } 40 | 41 | face := make([]Face, n) 42 | for i := 0; i < n; i++ { 43 | var t C.libfacedetection_capi_face_t 44 | C.libfacedetection_capi_result_get(rv, C.int(i), &t) 45 | 46 | face[i].X = int(t.x) 47 | face[i].Y = int(t.y) 48 | face[i].W = int(t.w) 49 | face[i].H = int(t.h) 50 | face[i].Neighbors = int(t.neighbors) 51 | face[i].Angle = int(t.angle) 52 | } 53 | 54 | return face 55 | } 56 | 57 | func DetectFaceRGB(rgb []byte, w, h, stride int) []Face { 58 | if len(rgb) == 0 { 59 | return nil 60 | } 61 | 62 | rv := C.libfacedetection_capi_facedetect_rgb( 63 | (*C.uint8_t)(unsafe.Pointer(&rgb[0])), 64 | C.int(w), 65 | C.int(h), 66 | C.int(stride), 67 | ) 68 | defer C.libfacedetection_capi_result_free(rv) 69 | 70 | n := int(C.libfacedetection_capi_result_len(rv)) 71 | if n <= 0 { 72 | return nil 73 | } 74 | 75 | face := make([]Face, n) 76 | for i := 0; i < n; i++ { 77 | var t C.libfacedetection_capi_face_t 78 | C.libfacedetection_capi_result_get(rv, C.int(i), &t) 79 | 80 | face[i].X = int(t.x) 81 | face[i].Y = int(t.y) 82 | face[i].W = int(t.w) 83 | face[i].H = int(t.h) 84 | face[i].Neighbors = int(t.neighbors) 85 | face[i].Angle = int(t.angle) 86 | } 87 | 88 | return face 89 | } 90 | 91 | func NewRGBImageFrom(m image.Image) (rgb []byte, w, h int) { 92 | b := m.Bounds() 93 | 94 | w = b.Dx() 95 | h = b.Dy() 96 | rgb = make([]byte, w*h*3) 97 | 98 | off := 0 99 | for y := b.Min.Y; y < b.Max.Y; y++ { 100 | for x := b.Min.X; x < b.Max.X; x++ { 101 | pr, pg, pb, _ := m.At(x, y).RGBA() 102 | rgb[off+0] = uint8(pr >> 8) 103 | rgb[off+1] = uint8(pg >> 8) 104 | rgb[off+2] = uint8(pb >> 8) 105 | off += 3 106 | } 107 | } 108 | 109 | return 110 | } 111 | -------------------------------------------------------------------------------- /libfacedetection/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeLists for libfacedetectcnn 2 | 3 | project(facedetection) 4 | 5 | cmake_minimum_required(VERSION 2.8) 6 | 7 | option(ENABLE_INT8 "use int8" OFF) 8 | option(ENABLE_AVX2 "use avx2" OFF) 9 | option(ENABLE_NEON "whether use neon, if use arm please set it on" OFF) 10 | option(DEMO "build the demo" OFF) 11 | 12 | SET(fdt_base_dir ${PROJECT_SOURCE_DIR}) 13 | SET(fdt_src_dir ${fdt_base_dir}/src) 14 | SET(fdt_inc_dir ${fdt_base_dir}/src) 15 | 16 | SET(fdt_lib_name facedetection) 17 | SET(fdt_lib_static ${fdt_lib_name}) 18 | SET(fdt_lib_shared ${fdt_lib_name}_shared) 19 | 20 | FILE(GLOB_RECURSE fdt_source_files ${fdt_src_dir}/*.cpp) 21 | LIST(SORT fdt_source_files) 22 | 23 | if(ENABLE_INT8) 24 | message("using int8") 25 | add_definitions(-D_ENABLE_INT8) 26 | endif() 27 | 28 | if(ENABLE_AVX2) 29 | message("using avx2") 30 | add_definitions(-D_ENABLE_AVX2) 31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma") 32 | endif() 33 | 34 | if(ENABLE_NEON) 35 | message("using arm") 36 | add_definitions(-D_ENABLE_NEON) 37 | endif() 38 | 39 | set(CMAKE_CXX_STANDARD 11) 40 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 41 | set(CMAKE_CXX_EXTENSIONS OFF) 42 | 43 | INCLUDE_DIRECTORIES(${fdt_inc_dir}) 44 | 45 | # Create a static library (.a) 46 | ADD_LIBRARY(${fdt_lib_static} STATIC ${fdt_source_files}) 47 | 48 | # Create a shared library (.so) 49 | ADD_LIBRARY(${fdt_lib_shared} SHARED ${fdt_source_files}) 50 | SET_TARGET_PROPERTIES(${fdt_lib_shared} PROPERTIES OUTPUT_NAME "${fdt_lib_name}") 51 | SET_TARGET_PROPERTIES(${fdt_lib_shared} PROPERTIES PREFIX "lib") 52 | 53 | # Create demo. OpenCV is requred. 54 | if (DEMO) 55 | #if(WIN32) 56 | # set(OpenCV_DIR "D:/opencv343/build") # TODO 57 | #endif() 58 | find_package(OpenCV REQUIRED) 59 | include_directories(${OpenCV_INCLUDE_DIRS}) 60 | 61 | set(fdt_demo_files ${fdt_base_dir}/example/libfacedetectcnn-example.cpp) 62 | add_executable(fdt_demo ${fdt_demo_files}) 63 | target_link_libraries(fdt_demo ${fdt_lib_static} ${OpenCV_LIBS}) 64 | endif() 65 | 66 | if (GSTREAMER) 67 | find_package(OpenCV REQUIRED) 68 | 69 | include(FindPkgConfig) 70 | pkg_search_module(GSTREAMER REQUIRED gstreamer-1.0) 71 | pkg_search_module(GSTREAMER_BASE REQUIRED gstreamer-base-1.0) 72 | pkg_search_module(GSTREAMER_VIDEO REQUIRED gstreamer-video-1.0) 73 | 74 | add_library(gstfacedetect SHARED 75 | example/libfacedetect.cpp 76 | ) 77 | 78 | include_directories(gstfacedetect PRIVATE 79 | ${GSTREAMER_INCLUDE_DIRS} 80 | ${GSTREAMER_BASE_INCLUDE_DIRS} 81 | ${GSTREAMER_VIDEO_INCLUDE_DIRS} 82 | ${OpenCV_INCLUDE_DIRS} 83 | ) 84 | 85 | target_link_libraries(gstfacedetect 86 | ${GSTREAMER_LIBRARIES} 87 | ${GSTREAMER_BASE_LIBRARIES} 88 | ${GSTREAMER_VIDEO_LIBRARIES} 89 | ${OpenCV_LIBS} 90 | ${fdt_lib_shared} 91 | ) 92 | 93 | 94 | 95 | endif() 96 | -------------------------------------------------------------------------------- /libfacedetection/ChangeLog: -------------------------------------------------------------------------------- 1 | 2019-03-13 2 | --------------------- 3 | * Release the source code and the model files. Removed the binary libary. 4 | 5 | 2018-11-17 6 | --------------------- 7 | * Replaced the AdaBoost methods with a CNN based one. 8 | 9 | 2017-02-24 10 | --------------------- 11 | * landmark detection speed reaches to 0.8ms per face. The former version is 1.7ms per face. 12 | 13 | 2017-01-20 14 | --------------------- 15 | * 68-point landmark detection added. 16 | 17 | 2016-11-24 18 | --------------------- 19 | * Added benchmark.cpp which can run face detection in multiple threads using OpenMP. 20 | 21 | 2016-11-16 22 | --------------------- 23 | * Bugs in the previous version were fixed. std::vector was removed from the API because it can cause error. 24 | 25 | 2016-11-10 26 | --------------------- 27 | * The API was updated. std::vector was involved. 28 | * The functions can be called in multiple threads at the same time. 29 | 30 | 2016-10-6 31 | --------------------- 32 | * The algorithm has been speeded up greatly (2x to 3x). 33 | * The true positive rates (FDDB) have been improved 1% to 2% at FP=100. 34 | * Multi-core parallelization has been disabled. The detection time is still the same. 35 | 36 | 2016-9-16 37 | --------------------- 38 | * Speedup again. 39 | * Change function name facedetect_frontal_tmp() to facedetect_frontal_surveillance(). This function now uses a new trained classifier which can achieve a higher detection speed. 40 | 41 | 2016-6-28 42 | --------------------- 43 | * 64-bit dll added since there are so many users request it. 44 | * An easter egg is hidden in the 64-bit dll. Can you find it? 45 | 46 | 2016-6-8 47 | --------------------- 48 | * Speedup 1.2x 49 | * Added an experimental function facedetect_frontal_tmp(). The function can gain a higher detection rate in video surveillance. -------------------------------------------------------------------------------- /libfacedetection/LICENSE: -------------------------------------------------------------------------------- 1 | By downloading, copying, installing or using the software you agree to this license. 2 | If you do not agree to this license, do not download, install, 3 | copy or use the software. 4 | 5 | 6 | License Agreement For libfacedetection 7 | (3-clause BSD License) 8 | 9 | Copyright (c) 2015-2019, Shiqi Yu, all rights reserved. 10 | shiqi.yu@gmail.com 11 | 12 | Redistribution and use in source and binary forms, with or without modification, 13 | are permitted provided that the following conditions are met: 14 | 15 | * Redistributions of source code must retain the above copyright notice, 16 | this list of conditions and the following disclaimer. 17 | 18 | * Redistributions in binary form must reproduce the above copyright notice, 19 | this list of conditions and the following disclaimer in the documentation 20 | and/or other materials provided with the distribution. 21 | 22 | * Neither the names of the copyright holders nor the names of the contributors 23 | may be used to endorse or promote products derived from this software 24 | without specific prior written permission. 25 | 26 | This software is provided by the copyright holders and contributors "as is" and 27 | any express or implied warranties, including, but not limited to, the implied 28 | warranties of merchantability and fitness for a particular purpose are disclaimed. 29 | In no event shall copyright holders or contributors be liable for any direct, 30 | indirect, incidental, special, exemplary, or consequential damages 31 | (including, but not limited to, procurement of substitute goods or services; 32 | loss of use, data, or profits; or business interruption) however caused 33 | and on any theory of liability, whether in contract, strict liability, 34 | or tort (including negligence or otherwise) arising in any way out of 35 | the use of this software, even if advised of the possibility of such damage. -------------------------------------------------------------------------------- /libfacedetection/README.md: -------------------------------------------------------------------------------- 1 | # libfacedetection 2 | 3 | This is an open source library for CNN-based face detection in images. The CNN model has been converted to static variables in C source files. The source code does not depend on any other libraries. What you need is just a C++ compiler. You can compile the source code under Windows, Linux, ARM and any platform with a C++ compiler. 4 | 5 | SIMD instructions are used to speed up the detection. You can enable AVX2 if you use Intel CPU or NEON for ARM. 6 | 7 | The model file has also been provided in directory ./models/. 8 | 9 | examples/libfacedetectcnn-example.cpp shows how to use the library. 10 | 11 | ![Examples](/images/cnnresult.png "Detection example") 12 | 13 | ## How to Compile 14 | 15 | * Please add -O3 to turn on optimizations when you compile the source code using g++. 16 | * Please choose 'Maximize Speed/-O2' when you compile the source code using Microsoft Visual Studio. 17 | 18 | Create a build folder: 19 | 20 | ``` 21 | mkdir build; cd build; rm -rf * 22 | ``` 23 | 24 | ### Cross build for aarch64 25 | 1. set cross compiler for aarch64 (please refer to aarch64-toolchain.cmake) 26 | 2. set opencv path since the example code depends on opencv 27 | 28 | ``` 29 | cmake \ 30 | -DENABLE_INT8=ON \ 31 | -DENABLE_NEON=ON \ 32 | -DCMAKE_BUILD_TYPE=RELEASE \ 33 | -DCMAKE_TOOLCHAIN_FILE=../aarch64-toolchain.cmake \ 34 | .. 35 | 36 | make 37 | ``` 38 | 39 | ### Native build for avx2 40 | ``` 41 | cmake \ 42 | -DENABLE_INT8=ON \ 43 | -DENABLE_AVX2=ON \ 44 | -DCMAKE_BUILD_TYPE=RELEASE \ 45 | -DDEMO=ON \ 46 | .. 47 | 48 | make 49 | ``` 50 | 51 | ## CNN-based Face Detection on Windows 52 | 53 | | Method |Time | FPS |Time | FPS | 54 | |--------------------|--------------|-------------|--------------|-------------| 55 | | | X64 |X64 | X64 |X64 | 56 | | |Single-thread |Single-thread|Multi-thread |Multi-thread | 57 | |OpenCV Haar+AdaBoost (640x480)| -- | -- | 12.33ms | 81.1 | 58 | |cnn (CPU, 640x480) | 64.21ms | 15.57 | 15.59ms | 64.16 | 59 | |cnn (CPU, 320x240) | 15.23ms | 65.68 | 3.99ms | 250.40 | 60 | |cnn (CPU, 160x120) | 3.47ms | 288.08 | 0.95ms | 1052.20 | 61 | |cnn (CPU, 128x96) | 2.35ms | 425.95 | 0.64ms | 1562.10 | 62 | 63 | * OpenCV Haar+AdaBoost runs with minimal face size 48x48 64 | * Face detection only, and no landmark detection included. 65 | * Minimal face size ~12x12 66 | * Intel(R) Core(TM) i7-7700 CPU @ 3.6GHz. 67 | 68 | ## CNN-based Face Detection on ARM Linux (Raspberry Pi 3 B+) 69 | 70 | | Method |Time | FPS |Time | FPS | 71 | |--------------------|--------------|-------------|--------------|-------------| 72 | | |Single-thread |Single-thread|Multi-thread |Multi-thread | 73 | |cnn (CPU, 640x480) | 512.04ms | 1.95 | 174.89ms | 5.72 | 74 | |cnn (CPU, 320x240) | 123.47ms | 8.10 | 42.13ms | 23.74 | 75 | |cnn (CPU, 160x120) | 27.42ms | 36.47 | 9.75ms | 102.58 | 76 | |cnn (CPU, 128x96) | 17.78ms | 56.24 | 6.12ms | 163.50 | 77 | 78 | * Face detection only, and no landmark detection included. 79 | * Minimal face size ~12x12 80 | * Raspberry Pi 3 B+, Broadcom BCM2837B0, Cortex-A53 (ARMv8) 64-bit SoC @ 1.4GHz 81 | 82 | 83 | ## Author 84 | * Shiqi Yu, 85 | 86 | ## Contributors 87 | * Jia Wu 88 | * Shengyin Wu 89 | * Dong Xu 90 | 91 | ## Acknowledgment 92 | The work is partly supported by the Science Foundation of Shenzhen (Grant No. JCYJ20150324141711699). 93 | -------------------------------------------------------------------------------- /libfacedetection/aarch64-toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_SYSTEM_NAME Linux) 2 | set(CMAKE_SYSTEM_VERSION 1) 3 | set(CMAKE_SYSTEM_PROCESSOR "aarch64") 4 | set(CMAKE_CXX_COMPILER "/opt/linaro/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-g++") 5 | set(CMAKE_C_COMPILER "/opt/linaro/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc") 6 | -------------------------------------------------------------------------------- /libfacedetection/example/libfacedetect.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include "facedetectcnn.h" 7 | 8 | /* 9 | 10 | Howto run? 11 | 12 | GST_DEBUG=3 gst-launch-1.0 filesrc location=input.mp4 ! decodebin \ 13 | ! videorate ! video/x-raw, framerate=8/1 \ 14 | ! videoscale ! video/x-raw, width=640, height=480 \ 15 | ! videoconvert \ 16 | ! identity sync=true \ 17 | ! queue max-size-buffers=3 leaky=1 \ 18 | ! libfacedetect \ 19 | ! videoconvert \ 20 | ! xvimagesink sync=false 21 | */ 22 | 23 | 24 | #define TYPE_FACE_DETECT face_detect_get_type() 25 | G_DECLARE_FINAL_TYPE (FaceDetect, face_detect, FACE, DETECT, GstBaseTransform) 26 | 27 | struct _FaceDetect { 28 | GstBaseTransform element; 29 | 30 | guint width; 31 | guint height; 32 | guint thresh; 33 | gboolean boxes; 34 | }; 35 | 36 | G_DEFINE_TYPE (FaceDetect, face_detect, GST_TYPE_BASE_TRANSFORM) 37 | 38 | using namespace cv; 39 | 40 | #define DEFAULT_THRESHHOLD 75 41 | #define DEFAULT_BOXES TRUE 42 | 43 | enum { 44 | PROP_0, 45 | PROP_THRESH, 46 | PROP_BOXES 47 | }; 48 | 49 | static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ( 50 | "sink", 51 | GST_PAD_SINK, 52 | GST_PAD_ALWAYS, 53 | GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE("RGB")) 54 | ); 55 | 56 | static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ( 57 | "src", 58 | GST_PAD_SRC, 59 | GST_PAD_ALWAYS, 60 | GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE("RGB")) 61 | ); 62 | 63 | static void set_property( GObject *object, guint prop_id, const GValue *value, GParamSpec *pspec ) { 64 | FaceDetect *self = FACE_DETECT(object); 65 | switch(prop_id) { 66 | case PROP_THRESH: 67 | self->thresh = g_value_get_uint(value); 68 | break; 69 | case PROP_BOXES: 70 | self->boxes = g_value_get_boolean(value); 71 | break; 72 | default: 73 | G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); 74 | break; 75 | } 76 | } 77 | 78 | static void get_property( GObject *object, guint prop_id, GValue *value, GParamSpec *pspec ) { 79 | FaceDetect *self = FACE_DETECT(object); 80 | 81 | switch(prop_id) { 82 | case PROP_THRESH: 83 | g_value_set_uint(value, self->thresh); 84 | break; 85 | case PROP_BOXES: 86 | g_value_set_boolean(value, self->boxes); 87 | break; 88 | default: 89 | G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); 90 | break; 91 | } 92 | } 93 | 94 | static gboolean set_caps(GstBaseTransform *trans, GstCaps *incaps, GstCaps *outcaps) { 95 | FaceDetect *self = FACE_DETECT(trans); 96 | GstVideoInfo info; 97 | 98 | if (gst_video_info_from_caps (&info, incaps) == FALSE) { 99 | return FALSE; 100 | } 101 | 102 | GST_INFO("New Width: %d Height: %d", info.width, info.height); 103 | self->width = info.width; 104 | self->height = info.height; 105 | 106 | return TRUE; 107 | } 108 | 109 | static GstFlowReturn transform_ip(GstBaseTransform *trans, GstBuffer *buf) { 110 | FaceDetect *self = FACE_DETECT(trans); 111 | GST_DEBUG("Processing"); 112 | 113 | GstMapInfo info; 114 | if (gst_buffer_map(buf, &info, GST_MAP_WRITE) == FALSE) { 115 | GST_ERROR("Cannot map buffer"); 116 | return GST_FLOW_ERROR; 117 | } 118 | Size size(self->width, self->height); 119 | Mat image(size, CV_8UC3, (void *) info.data); 120 | 121 | unsigned char *pBuffer = (unsigned char *) malloc(0x20000); 122 | int *pResults = facedetect_cnn(pBuffer, (unsigned char*)(image.ptr(0)), image.cols, image.rows, (int)image.step); 123 | 124 | for(int i = 0; i < (pResults ? *pResults : 0); i++) { 125 | short * p = ((short*)(pResults+1))+142*i; 126 | int x = p[0]; 127 | int y = p[1]; 128 | int w = p[2]; 129 | int h = p[3]; 130 | int neighbors = p[4]; 131 | int angle = p[5]; 132 | 133 | GST_ERROR("face_rect=[%d, %d, %d, %d], neighbors=%d, angle=%d", x,y,w,h,neighbors, angle); 134 | if (neighbors >= self->thresh) { 135 | if (self->boxes) { 136 | rectangle(image, Rect(x, y, w, h), Scalar(0, 255, 0), 2); 137 | } 138 | } 139 | } 140 | 141 | free(pBuffer); 142 | gst_buffer_unmap(buf, &info); 143 | return GST_FLOW_OK; 144 | } 145 | 146 | static void face_detect_init (FaceDetect *self) 147 | { 148 | self->width = 0; 149 | self->height = 0; 150 | self->thresh = DEFAULT_THRESHHOLD; 151 | self->boxes = DEFAULT_BOXES; 152 | } 153 | 154 | static void face_detect_class_init (FaceDetectClass *klass) 155 | { 156 | GObjectClass *object_class = G_OBJECT_CLASS (klass); 157 | GstElementClass *element_class = GST_ELEMENT_CLASS (klass); 158 | GstBaseTransformClass *transform_class = GST_BASE_TRANSFORM_CLASS (klass); 159 | 160 | object_class->set_property = set_property; 161 | object_class->get_property = get_property; 162 | 163 | transform_class->set_caps = set_caps; 164 | transform_class->transform_ip = transform_ip; 165 | 166 | g_object_class_install_property(object_class, PROP_THRESH, 167 | g_param_spec_uint( "thresh", "thresh", "Thresh Hold as a percentage", 0, 100, DEFAULT_THRESHHOLD, G_PARAM_READWRITE) 168 | ); 169 | 170 | g_object_class_install_property(object_class, PROP_BOXES, 171 | g_param_spec_boolean( "boxes", "boxes", "Draw boxes", DEFAULT_BOXES, G_PARAM_READWRITE) 172 | ); 173 | 174 | gst_element_class_set_static_metadata(element_class, 175 | "Brightness", 176 | "Sink/Src/Caps", 177 | "Detects faces in images using libfacedetect", 178 | "James Stevenson " 179 | ); 180 | 181 | gst_element_class_add_static_pad_template (element_class, &sink_factory); 182 | gst_element_class_add_static_pad_template (element_class, &src_factory); 183 | } 184 | 185 | 186 | extern "C" gboolean Register_init (GstPlugin *plugin) { 187 | return gst_element_register (plugin, "libfacedetect", GST_RANK_NONE, TYPE_FACE_DETECT); 188 | } 189 | 190 | #ifndef PACKAGE 191 | #define PACKAGE "libfacedetect" 192 | #endif 193 | 194 | GST_PLUGIN_DEFINE ( 195 | GST_VERSION_MAJOR, 196 | GST_VERSION_MINOR, 197 | facedetect, 198 | "Detect faces", 199 | Register_init, 200 | "0.0.0", 201 | "BSD", 202 | "facedetect", 203 | "https://github.com/ShiqiYu/libfacedetection" 204 | ) 205 | -------------------------------------------------------------------------------- /libfacedetection/example/libfacedetectcnn-example.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | By downloading, copying, installing or using the software you agree to this license. 3 | If you do not agree to this license, do not download, install, 4 | copy or use the software. 5 | 6 | 7 | License Agreement For libfacedetection 8 | (3-clause BSD License) 9 | 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved. 11 | shiqi.yu@gmail.com 12 | 13 | Redistribution and use in source and binary forms, with or without modification, 14 | are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright notice, 20 | this list of conditions and the following disclaimer in the documentation 21 | and/or other materials provided with the distribution. 22 | 23 | * Neither the names of the copyright holders nor the names of the contributors 24 | may be used to endorse or promote products derived from this software 25 | without specific prior written permission. 26 | 27 | This software is provided by the copyright holders and contributors "as is" and 28 | any express or implied warranties, including, but not limited to, the implied 29 | warranties of merchantability and fitness for a particular purpose are disclaimed. 30 | In no event shall copyright holders or contributors be liable for any direct, 31 | indirect, incidental, special, exemplary, or consequential damages 32 | (including, but not limited to, procurement of substitute goods or services; 33 | loss of use, data, or profits; or business interruption) however caused 34 | and on any theory of liability, whether in contract, strict liability, 35 | or tort (including negligence or otherwise) arising in any way out of 36 | the use of this software, even if advised of the possibility of such damage. 37 | */ 38 | 39 | #include 40 | #include 41 | #include "facedetectcnn.h" 42 | 43 | //define the buffer size. Do not change the size! 44 | #define DETECT_BUFFER_SIZE 0x20000 45 | using namespace cv; 46 | 47 | int main(int argc, char* argv[]) 48 | { 49 | if(argc != 2) 50 | { 51 | printf("Usage: %s \n", argv[0]); 52 | return -1; 53 | } 54 | 55 | //load an image and convert it to gray (single-channel) 56 | Mat image = imread(argv[1]); 57 | if(image.empty()) 58 | { 59 | fprintf(stderr, "Can not load the image file %s.\n", argv[1]); 60 | return -1; 61 | } 62 | 63 | int * pResults = NULL; 64 | //pBuffer is used in the detection functions. 65 | //If you call functions in multiple threads, please create one buffer for each thread! 66 | unsigned char * pBuffer = (unsigned char *)malloc(DETECT_BUFFER_SIZE); 67 | if(!pBuffer) 68 | { 69 | fprintf(stderr, "Can not alloc buffer.\n"); 70 | return -1; 71 | } 72 | 73 | 74 | /////////////////////////////////////////// 75 | // CNN face detection 76 | // Best detection rate 77 | ////////////////////////////////////////// 78 | //!!! The input image must be a RGB one (three-channel) 79 | //!!! DO NOT RELEASE pResults !!! 80 | pResults = facedetect_cnn(pBuffer, (unsigned char*)(image.ptr(0)), image.cols, image.rows, (int)image.step); 81 | 82 | printf("%d faces detected.\n", (pResults ? *pResults : 0)); 83 | Mat result_cnn = image.clone(); 84 | //print the detection results 85 | for(int i = 0; i < (pResults ? *pResults : 0); i++) 86 | { 87 | short * p = ((short*)(pResults+1))+142*i; 88 | int x = p[0]; 89 | int y = p[1]; 90 | int w = p[2]; 91 | int h = p[3]; 92 | int neighbors = p[4]; 93 | int angle = p[5]; 94 | 95 | printf("face_rect=[%d, %d, %d, %d], neighbors=%d, angle=%d\n", x,y,w,h,neighbors, angle); 96 | rectangle(result_cnn, Rect(x, y, w, h), Scalar(0, 255, 0), 2); 97 | } 98 | imshow("result_cnn", result_cnn); 99 | 100 | waitKey(); 101 | 102 | //release the buffer 103 | free(pBuffer); 104 | 105 | return 0; 106 | } -------------------------------------------------------------------------------- /libfacedetection/images/chloecalmon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/chloecalmon.png -------------------------------------------------------------------------------- /libfacedetection/images/cnnresult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/cnnresult.png -------------------------------------------------------------------------------- /libfacedetection/images/keliamoniz1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/keliamoniz1.jpg -------------------------------------------------------------------------------- /libfacedetection/images/keliamoniz2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/keliamoniz2.jpg -------------------------------------------------------------------------------- /libfacedetection/models/README.md: -------------------------------------------------------------------------------- 1 | # How to use the model files 2 | 3 | You do not need to use the model files in this directory when you run face detection compiled from the C++ source code. If you want to use Caffe or OpenVINO to detect faces, you can use the model files here. -------------------------------------------------------------------------------- /libfacedetection/models/caffe/yufacedetectnet-open-v1.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/models/caffe/yufacedetectnet-open-v1.caffemodel -------------------------------------------------------------------------------- /libfacedetection/models/caffe/yufacedetectnet-open-v1.prototxt: -------------------------------------------------------------------------------- 1 | name: "YuFaceDetectNet" 2 | 3 | input: "data" 4 | 5 | input_shape { 6 | dim: 1 7 | dim: 3 8 | dim: 240 9 | dim: 320 10 | } 11 | 12 | #CONV1########################################################### 13 | 14 | layer { 15 | name: "conv1_1" 16 | type: "Convolution" 17 | bottom: "data" 18 | top: "conv1_1" 19 | param { 20 | lr_mult: 1.0 21 | decay_mult: 1.0 22 | } 23 | convolution_param { 24 | num_output: 16 25 | pad: 1 26 | stride: 2 27 | kernel_size: 3 28 | weight_filler { 29 | type: "xavier" 30 | } 31 | bias_term: false 32 | } 33 | } 34 | 35 | layer { 36 | name: "relu1_1" 37 | type: "ReLU" 38 | bottom: "conv1_1" 39 | top: "conv1_1" 40 | } 41 | layer { 42 | name: "conv1_2" 43 | type: "Convolution" 44 | bottom: "conv1_1" 45 | top: "conv1_2" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | convolution_param { 51 | num_output: 16 52 | pad: 0 53 | kernel_size: 1 54 | weight_filler { 55 | type: "xavier" 56 | } 57 | bias_term: false 58 | } 59 | } 60 | 61 | 62 | layer { 63 | name: "relu1_2" 64 | type: "ReLU" 65 | bottom: "conv1_2" 66 | top: "conv1_2" 67 | } 68 | #CONV2########################################################## 69 | layer { 70 | name: "pool1" 71 | type: "Pooling" 72 | bottom: "conv1_2" 73 | top: "pool1" 74 | pooling_param { 75 | pool: MAX 76 | kernel_size: 2 77 | stride: 2 78 | } 79 | } 80 | layer { 81 | name: "conv2_1" 82 | type: "Convolution" 83 | bottom: "pool1" 84 | top: "conv2_1" 85 | param { 86 | lr_mult: 1.0 87 | decay_mult: 1.0 88 | } 89 | convolution_param { 90 | num_output: 16 91 | pad: 1 92 | kernel_size: 3 93 | weight_filler { 94 | type: "xavier" 95 | } 96 | bias_term: false 97 | } 98 | } 99 | 100 | 101 | layer { 102 | name: "relu2_1" 103 | type: "ReLU" 104 | bottom: "conv2_1" 105 | top: "conv2_1" 106 | } 107 | layer { 108 | name: "conv2_2" 109 | type: "Convolution" 110 | bottom: "conv2_1" 111 | top: "conv2_2" 112 | param { 113 | lr_mult: 1.0 114 | decay_mult: 1.0 115 | } 116 | convolution_param { 117 | num_output: 16 118 | pad: 0 119 | kernel_size: 1 120 | weight_filler { 121 | type: "xavier" 122 | } 123 | bias_term: false 124 | } 125 | } 126 | 127 | layer { 128 | name: "relu2_2" 129 | type: "ReLU" 130 | bottom: "conv2_2" 131 | top: "conv2_2" 132 | } 133 | #CONV3########################################################## 134 | 135 | layer { 136 | name: "pool2" 137 | type: "Pooling" 138 | bottom: "conv2_2" 139 | top: "pool2" 140 | pooling_param { 141 | pool: MAX 142 | kernel_size: 2 143 | stride: 2 144 | } 145 | } 146 | layer { 147 | name: "conv3_1" 148 | type: "Convolution" 149 | bottom: "pool2" 150 | top: "conv3_1" 151 | param { 152 | lr_mult: 1.0 153 | decay_mult: 1.0 154 | } 155 | convolution_param { 156 | num_output: 32 157 | pad: 1 158 | kernel_size: 3 159 | weight_filler { 160 | type: "xavier" 161 | } 162 | bias_term: false 163 | } 164 | } 165 | 166 | layer { 167 | name: "relu3_1" 168 | type: "ReLU" 169 | bottom: "conv3_1" 170 | top: "conv3_1" 171 | } 172 | layer { 173 | name: "conv3_2" 174 | type: "Convolution" 175 | bottom: "conv3_1" 176 | top: "conv3_2" 177 | param { 178 | lr_mult: 1.0 179 | decay_mult: 1.0 180 | } 181 | convolution_param { 182 | num_output: 32 183 | pad: 0 184 | kernel_size: 1 185 | weight_filler { 186 | type: "xavier" 187 | } 188 | bias_term: false 189 | } 190 | } 191 | 192 | 193 | layer { 194 | name: "relu3_2" 195 | type: "ReLU" 196 | bottom: "conv3_2" 197 | top: "conv3_2" 198 | } 199 | layer { 200 | name: "conv3_3" 201 | type: "Convolution" 202 | bottom: "conv3_2" 203 | top: "conv3_3" 204 | param { 205 | lr_mult: 1.0 206 | decay_mult: 1.0 207 | } 208 | convolution_param { 209 | num_output: 32 210 | pad: 1 211 | kernel_size: 3 212 | weight_filler { 213 | type: "xavier" 214 | } 215 | bias_term: false 216 | } 217 | } 218 | 219 | layer { 220 | name: "relu3_3" 221 | type: "ReLU" 222 | bottom: "conv3_3" 223 | top: "conv3_3" 224 | } 225 | 226 | 227 | #CONV4########################################################## 228 | 229 | layer { 230 | name: "pool3" 231 | type: "Pooling" 232 | bottom: "conv3_3" 233 | top: "pool3" 234 | pooling_param { 235 | pool: MAX 236 | kernel_size: 2 237 | stride: 2 238 | } 239 | } 240 | layer { 241 | name: "conv4_1" 242 | type: "Convolution" 243 | bottom: "pool3" 244 | top: "conv4_1" 245 | param { 246 | lr_mult: 1.0 247 | decay_mult: 1.0 248 | } 249 | convolution_param { 250 | num_output: 64 251 | pad: 1 252 | kernel_size: 3 253 | weight_filler { 254 | type: "xavier" 255 | } 256 | bias_term: false 257 | } 258 | } 259 | 260 | layer { 261 | name: "relu4_1" 262 | type: "ReLU" 263 | bottom: "conv4_1" 264 | top: "conv4_1" 265 | } 266 | layer { 267 | name: "conv4_2" 268 | type: "Convolution" 269 | bottom: "conv4_1" 270 | top: "conv4_2" 271 | param { 272 | lr_mult: 1.0 273 | decay_mult: 1.0 274 | } 275 | convolution_param { 276 | num_output: 64 277 | pad: 0 278 | kernel_size: 1 279 | weight_filler { 280 | type: "xavier" 281 | } 282 | bias_term: false 283 | } 284 | } 285 | 286 | layer { 287 | name: "relu4_2" 288 | type: "ReLU" 289 | bottom: "conv4_2" 290 | top: "conv4_2" 291 | } 292 | layer { 293 | name: "conv4_3" 294 | type: "Convolution" 295 | bottom: "conv4_2" 296 | top: "conv4_3" 297 | param { 298 | lr_mult: 1.0 299 | decay_mult: 1.0 300 | } 301 | convolution_param { 302 | num_output: 64 303 | pad: 1 304 | kernel_size: 3 305 | weight_filler { 306 | type: "xavier" 307 | } 308 | bias_term: false 309 | } 310 | } 311 | 312 | layer { 313 | name: "relu4_3" 314 | type: "ReLU" 315 | bottom: "conv4_3" 316 | top: "conv4_3" 317 | } 318 | 319 | #CONV5########################################################## 320 | 321 | layer { 322 | name: "pool4" 323 | type: "Pooling" 324 | bottom: "conv4_3" 325 | top: "pool4" 326 | pooling_param { 327 | pool: MAX 328 | kernel_size: 2 329 | stride: 2 330 | } 331 | } 332 | layer { 333 | name: "conv5_1" 334 | type: "Convolution" 335 | bottom: "pool4" 336 | top: "conv5_1" 337 | param { 338 | lr_mult: 1.0 339 | decay_mult: 1.0 340 | } 341 | convolution_param { 342 | num_output: 128 343 | pad: 1 344 | kernel_size: 3 345 | weight_filler { 346 | type: "xavier" 347 | } 348 | bias_term: false 349 | dilation: 1 350 | } 351 | } 352 | 353 | layer { 354 | name: "relu5_1" 355 | type: "ReLU" 356 | bottom: "conv5_1" 357 | top: "conv5_1" 358 | } 359 | layer { 360 | name: "conv5_2" 361 | type: "Convolution" 362 | bottom: "conv5_1" 363 | top: "conv5_2" 364 | param { 365 | lr_mult: 1.0 366 | decay_mult: 1.0 367 | } 368 | convolution_param { 369 | num_output: 128 370 | pad: 0 371 | kernel_size: 1 372 | weight_filler { 373 | type: "xavier" 374 | } 375 | bias_term: false 376 | dilation: 1 377 | } 378 | } 379 | 380 | layer { 381 | name: "relu5_2" 382 | type: "ReLU" 383 | bottom: "conv5_2" 384 | top: "conv5_2" 385 | } 386 | layer { 387 | name: "conv5_3" 388 | type: "Convolution" 389 | bottom: "conv5_2" 390 | top: "conv5_3" 391 | param { 392 | lr_mult: 1.0 393 | decay_mult: 1.0 394 | } 395 | convolution_param { 396 | num_output: 128 397 | pad: 1 398 | kernel_size: 3 399 | weight_filler { 400 | type: "xavier" 401 | } 402 | bias_term: false 403 | dilation: 1 404 | } 405 | } 406 | 407 | layer { 408 | name: "relu5_3" 409 | type: "ReLU" 410 | bottom: "conv5_3" 411 | top: "conv5_3" 412 | } 413 | 414 | #CONV6########################################################## 415 | 416 | layer { 417 | name: "pool5" 418 | type: "Pooling" 419 | bottom: "conv5_3" 420 | top: "pool5" 421 | pooling_param { 422 | pool: MAX 423 | kernel_size: 2 424 | stride: 2 425 | } 426 | } 427 | 428 | layer { 429 | name: "conv6_1" 430 | type: "Convolution" 431 | bottom: "pool5" 432 | top: "conv6_1" 433 | param { 434 | lr_mult: 1.0 435 | decay_mult: 1.0 436 | } 437 | convolution_param { 438 | num_output: 128 439 | pad: 1 440 | kernel_size: 3 441 | weight_filler { 442 | type: "xavier" 443 | } 444 | bias_term: false 445 | dilation: 1 446 | } 447 | } 448 | 449 | layer { 450 | name: "relu6_1" 451 | type: "ReLU" 452 | bottom: "conv6_1" 453 | top: "conv6_1" 454 | } 455 | layer { 456 | name: "conv6_2" 457 | type: "Convolution" 458 | bottom: "conv6_1" 459 | top: "conv6_2" 460 | param { 461 | lr_mult: 1.0 462 | decay_mult: 1.0 463 | } 464 | convolution_param { 465 | num_output: 128 466 | pad: 0 467 | kernel_size: 1 468 | weight_filler { 469 | type: "xavier" 470 | } 471 | bias_term: false 472 | dilation: 1 473 | } 474 | } 475 | 476 | layer { 477 | name: "relu6_2" 478 | type: "ReLU" 479 | bottom: "conv6_2" 480 | top: "conv6_2" 481 | } 482 | layer { 483 | name: "conv6_3" 484 | type: "Convolution" 485 | bottom: "conv6_2" 486 | top: "conv6_3" 487 | param { 488 | lr_mult: 1.0 489 | decay_mult: 1.0 490 | } 491 | convolution_param { 492 | num_output: 128 493 | pad: 1 494 | kernel_size: 3 495 | weight_filler { 496 | type: "xavier" 497 | } 498 | bias_term: false 499 | dilation: 1 500 | } 501 | } 502 | 503 | layer { 504 | name: "relu6_3" 505 | type: "ReLU" 506 | bottom: "conv6_3" 507 | top: "conv6_3" 508 | } 509 | 510 | #PRIORBOX3########################################## 511 | layer { 512 | name: "conv3_3_norm" 513 | type: "Normalize" 514 | bottom: "conv3_3" 515 | top: "conv3_3_norm" 516 | norm_param { 517 | across_spatial: false 518 | scale_filler { 519 | type: "constant" 520 | value: 10.0 521 | } 522 | channel_shared: false 523 | } 524 | } 525 | layer { 526 | name: "conv3_3_norm_mbox_loc_new" 527 | type: "Convolution" 528 | bottom: "conv3_3_norm" 529 | top: "conv3_3_norm_mbox_loc_new" 530 | param { 531 | lr_mult: 1.0 532 | decay_mult: 1.0 533 | } 534 | convolution_param { 535 | num_output: 12 536 | pad: 1 537 | kernel_size: 3 538 | stride: 1 539 | weight_filler { 540 | type: "xavier" 541 | } 542 | bias_term: false 543 | } 544 | } 545 | layer { 546 | name: "conv3_3_norm_mbox_loc_perm" 547 | type: "Permute" 548 | bottom: "conv3_3_norm_mbox_loc_new" 549 | top: "conv3_3_norm_mbox_loc_perm" 550 | permute_param { 551 | order: 0 552 | order: 2 553 | order: 3 554 | order: 1 555 | } 556 | } 557 | layer { 558 | name: "conv3_3_norm_mbox_loc_flat" 559 | type: "Flatten" 560 | bottom: "conv3_3_norm_mbox_loc_perm" 561 | top: "conv3_3_norm_mbox_loc_flat" 562 | flatten_param { 563 | axis: 1 564 | } 565 | } 566 | layer { 567 | name: "conv3_3_norm_mbox_conf_new" 568 | type: "Convolution" 569 | bottom: "conv3_3_norm" 570 | top: "conv3_3_norm_mbox_conf_new" 571 | param { 572 | lr_mult: 1.0 573 | decay_mult: 1.0 574 | } 575 | convolution_param { 576 | num_output: 6 577 | pad: 1 578 | kernel_size: 3 579 | stride: 1 580 | weight_filler { 581 | type: "xavier" 582 | } 583 | bias_term: false 584 | } 585 | } 586 | layer { 587 | name: "conv3_3_norm_mbox_conf_perm" 588 | type: "Permute" 589 | bottom: "conv3_3_norm_mbox_conf_new" 590 | top: "conv3_3_norm_mbox_conf_perm" 591 | permute_param { 592 | order: 0 593 | order: 2 594 | order: 3 595 | order: 1 596 | } 597 | } 598 | layer { 599 | name: "conv3_3_norm_mbox_conf_flat" 600 | type: "Flatten" 601 | bottom: "conv3_3_norm_mbox_conf_perm" 602 | top: "conv3_3_norm_mbox_conf_flat" 603 | flatten_param { 604 | axis: 1 605 | } 606 | } 607 | layer { 608 | 609 | name: "conv3_3_norm_mbox_priorbox" 610 | type: "PriorBox" 611 | bottom: "conv3_3_norm" 612 | bottom: "data" 613 | top: "conv3_3_norm_mbox_priorbox" 614 | prior_box_param { 615 | min_size: 10.0 616 | min_size: 16.0 617 | min_size: 24.0 618 | clip: false 619 | variance: 0.10000000149 620 | variance: 0.10000000149 621 | variance: 0.20000000298 622 | variance: 0.20000000298 623 | step: 8.0 624 | offset: 0.5 625 | } 626 | } 627 | #PRIORBOX4########################################## 628 | layer { 629 | name: "conv4_3_norm" 630 | type: "Normalize" 631 | bottom: "conv4_3" 632 | top: "conv4_3_norm" 633 | norm_param { 634 | across_spatial: false 635 | scale_filler { 636 | type: "constant" 637 | value: 8.0 638 | } 639 | channel_shared: false 640 | } 641 | } 642 | layer { 643 | name: "conv4_3_norm_mbox_loc" 644 | type: "Convolution" 645 | bottom: "conv4_3_norm" 646 | top: "conv4_3_norm_mbox_loc" 647 | param { 648 | lr_mult: 1.0 649 | decay_mult: 1.0 650 | } 651 | convolution_param { 652 | num_output: 8 653 | pad: 1 654 | kernel_size: 3 655 | stride: 1 656 | weight_filler { 657 | type: "xavier" 658 | } 659 | bias_term: false 660 | } 661 | } 662 | layer { 663 | name: "conv4_3_norm_mbox_loc_perm" 664 | type: "Permute" 665 | bottom: "conv4_3_norm_mbox_loc" 666 | top: "conv4_3_norm_mbox_loc_perm" 667 | permute_param { 668 | order: 0 669 | order: 2 670 | order: 3 671 | order: 1 672 | } 673 | } 674 | layer { 675 | name: "conv4_3_norm_mbox_loc_flat" 676 | type: "Flatten" 677 | bottom: "conv4_3_norm_mbox_loc_perm" 678 | top: "conv4_3_norm_mbox_loc_flat" 679 | flatten_param { 680 | axis: 1 681 | } 682 | } 683 | layer { 684 | name: "conv4_3_norm_mbox_conf" 685 | type: "Convolution" 686 | bottom: "conv4_3_norm" 687 | top: "conv4_3_norm_mbox_conf" 688 | param { 689 | lr_mult: 1.0 690 | decay_mult: 1.0 691 | } 692 | convolution_param { 693 | num_output: 4 694 | pad: 1 695 | kernel_size: 3 696 | stride: 1 697 | weight_filler { 698 | type: "xavier" 699 | } 700 | bias_term: false 701 | } 702 | } 703 | layer { 704 | name: "conv4_3_norm_mbox_conf_perm" 705 | type: "Permute" 706 | bottom: "conv4_3_norm_mbox_conf" 707 | top: "conv4_3_norm_mbox_conf_perm" 708 | permute_param { 709 | order: 0 710 | order: 2 711 | order: 3 712 | order: 1 713 | } 714 | } 715 | layer { 716 | name: "conv4_3_norm_mbox_conf_flat" 717 | type: "Flatten" 718 | bottom: "conv4_3_norm_mbox_conf_perm" 719 | top: "conv4_3_norm_mbox_conf_flat" 720 | flatten_param { 721 | axis: 1 722 | } 723 | } 724 | layer { 725 | 726 | name: "conv4_3_norm_mbox_priorbox" 727 | type: "PriorBox" 728 | bottom: "conv4_3_norm" 729 | bottom: "data" 730 | top: "conv4_3_norm_mbox_priorbox" 731 | prior_box_param { 732 | min_size: 32.0 733 | min_size: 48.0 734 | clip: false 735 | variance: 0.10000000149 736 | variance: 0.10000000149 737 | variance: 0.20000000298 738 | variance: 0.20000000298 739 | step: 16.0 740 | offset: 0.5 741 | } 742 | } 743 | #PRIORBOX5########################################## 744 | layer { 745 | name: "conv5_3_norm" 746 | type: "Normalize" 747 | bottom: "conv5_3" 748 | top: "conv5_3_norm" 749 | norm_param { 750 | across_spatial: false 751 | scale_filler { 752 | type: "constant" 753 | value: 5.0 754 | } 755 | channel_shared: false 756 | } 757 | } 758 | layer { 759 | name: "conv5_3_norm_mbox_loc" 760 | type: "Convolution" 761 | bottom: "conv5_3_norm" 762 | top: "conv5_3_norm_mbox_loc" 763 | param { 764 | lr_mult: 1.0 765 | decay_mult: 1.0 766 | } 767 | convolution_param { 768 | num_output: 8 769 | pad: 1 770 | kernel_size: 3 771 | stride: 1 772 | weight_filler { 773 | type: "xavier" 774 | } 775 | bias_term: false 776 | } 777 | } 778 | layer { 779 | name: "conv5_3_norm_mbox_loc_perm" 780 | type: "Permute" 781 | bottom: "conv5_3_norm_mbox_loc" 782 | top: "conv5_3_norm_mbox_loc_perm" 783 | permute_param { 784 | order: 0 785 | order: 2 786 | order: 3 787 | order: 1 788 | } 789 | } 790 | layer { 791 | name: "conv5_3_norm_mbox_loc_flat" 792 | type: "Flatten" 793 | bottom: "conv5_3_norm_mbox_loc_perm" 794 | top: "conv5_3_norm_mbox_loc_flat" 795 | flatten_param { 796 | axis: 1 797 | } 798 | } 799 | layer { 800 | name: "conv5_3_norm_mbox_conf" 801 | type: "Convolution" 802 | bottom: "conv5_3_norm" 803 | top: "conv5_3_norm_mbox_conf" 804 | param { 805 | lr_mult: 1.0 806 | decay_mult: 1.0 807 | } 808 | convolution_param { 809 | num_output: 4 810 | pad: 1 811 | kernel_size: 3 812 | stride: 1 813 | weight_filler { 814 | type: "xavier" 815 | } 816 | bias_term: false 817 | } 818 | } 819 | layer { 820 | name: "conv5_3_norm_mbox_conf_perm" 821 | type: "Permute" 822 | bottom: "conv5_3_norm_mbox_conf" 823 | top: "conv5_3_norm_mbox_conf_perm" 824 | permute_param { 825 | order: 0 826 | order: 2 827 | order: 3 828 | order: 1 829 | } 830 | } 831 | layer { 832 | name: "conv5_3_norm_mbox_conf_flat" 833 | type: "Flatten" 834 | bottom: "conv5_3_norm_mbox_conf_perm" 835 | top: "conv5_3_norm_mbox_conf_flat" 836 | flatten_param { 837 | axis: 1 838 | } 839 | } 840 | layer { 841 | 842 | name: "conv5_3_norm_mbox_priorbox" 843 | type: "PriorBox" 844 | bottom: "conv5_3_norm" 845 | bottom: "data" 846 | top: "conv5_3_norm_mbox_priorbox" 847 | prior_box_param { 848 | min_size: 64.0 849 | min_size: 96.0 850 | clip: false 851 | variance: 0.10000000149 852 | variance: 0.10000000149 853 | variance: 0.20000000298 854 | variance: 0.20000000298 855 | step: 32.0 856 | offset: 0.5 857 | } 858 | } 859 | 860 | #PRIORBOX6########################################## 861 | layer { 862 | name: "conv6_3_norm" 863 | type: "Normalize" 864 | bottom: "conv6_3" 865 | top: "conv6_3_norm" 866 | norm_param { 867 | across_spatial: false 868 | scale_filler { 869 | type: "constant" 870 | value: 5.0 871 | } 872 | channel_shared: false 873 | } 874 | } 875 | layer { 876 | name: "conv6_3_norm_mbox_loc" 877 | type: "Convolution" 878 | bottom: "conv6_3_norm" 879 | top: "conv6_3_norm_mbox_loc" 880 | param { 881 | lr_mult: 1.0 882 | decay_mult: 1.0 883 | } 884 | convolution_param { 885 | num_output: 12 886 | pad: 1 887 | kernel_size: 3 888 | stride: 1 889 | weight_filler { 890 | type: "xavier" 891 | } 892 | bias_term: false 893 | } 894 | } 895 | layer { 896 | name: "conv6_3_norm_mbox_loc_perm" 897 | type: "Permute" 898 | bottom: "conv6_3_norm_mbox_loc" 899 | top: "conv6_3_norm_mbox_loc_perm" 900 | permute_param { 901 | order: 0 902 | order: 2 903 | order: 3 904 | order: 1 905 | } 906 | } 907 | layer { 908 | name: "conv6_3_norm_mbox_loc_flat" 909 | type: "Flatten" 910 | bottom: "conv6_3_norm_mbox_loc_perm" 911 | top: "conv6_3_norm_mbox_loc_flat" 912 | flatten_param { 913 | axis: 1 914 | } 915 | } 916 | layer { 917 | name: "conv6_3_norm_mbox_conf" 918 | type: "Convolution" 919 | bottom: "conv6_3_norm" 920 | top: "conv6_3_norm_mbox_conf" 921 | param { 922 | lr_mult: 1.0 923 | decay_mult: 1.0 924 | } 925 | convolution_param { 926 | num_output: 6 927 | pad: 1 928 | kernel_size: 3 929 | stride: 1 930 | weight_filler { 931 | type: "xavier" 932 | } 933 | bias_term: false 934 | } 935 | } 936 | layer { 937 | name: "conv6_3_norm_mbox_conf_perm" 938 | type: "Permute" 939 | bottom: "conv6_3_norm_mbox_conf" 940 | top: "conv6_3_norm_mbox_conf_perm" 941 | permute_param { 942 | order: 0 943 | order: 2 944 | order: 3 945 | order: 1 946 | } 947 | } 948 | layer { 949 | name: "conv6_3_norm_mbox_conf_flat" 950 | type: "Flatten" 951 | bottom: "conv6_3_norm_mbox_conf_perm" 952 | top: "conv6_3_norm_mbox_conf_flat" 953 | flatten_param { 954 | axis: 1 955 | } 956 | } 957 | layer { 958 | 959 | name: "conv6_3_norm_mbox_priorbox" 960 | type: "PriorBox" 961 | bottom: "conv6_3_norm" 962 | bottom: "data" 963 | top: "conv6_3_norm_mbox_priorbox" 964 | prior_box_param { 965 | min_size: 128.0 966 | min_size: 192.0 967 | min_size: 256.0 968 | clip: false 969 | variance: 0.10000000149 970 | variance: 0.10000000149 971 | variance: 0.20000000298 972 | variance: 0.20000000298 973 | step: 64.0 974 | offset: 0.5 975 | } 976 | } 977 | ######################################################## 978 | layer { 979 | name: "mbox_loc" 980 | type: "Concat" 981 | bottom: "conv3_3_norm_mbox_loc_flat" 982 | bottom: "conv4_3_norm_mbox_loc_flat" 983 | bottom: "conv5_3_norm_mbox_loc_flat" 984 | bottom: "conv6_3_norm_mbox_loc_flat" 985 | top: "mbox_loc" 986 | concat_param { 987 | axis: 1 988 | } 989 | } 990 | layer { 991 | name: "mbox_conf" 992 | type: "Concat" 993 | bottom: "conv3_3_norm_mbox_conf_flat" 994 | bottom: "conv4_3_norm_mbox_conf_flat" 995 | bottom: "conv5_3_norm_mbox_conf_flat" 996 | bottom: "conv6_3_norm_mbox_conf_flat" 997 | top: "mbox_conf" 998 | concat_param { 999 | axis: 1 1000 | } 1001 | } 1002 | layer { 1003 | name: "mbox_priorbox" 1004 | type: "Concat" 1005 | bottom: "conv3_3_norm_mbox_priorbox" 1006 | bottom: "conv4_3_norm_mbox_priorbox" 1007 | bottom: "conv5_3_norm_mbox_priorbox" 1008 | bottom: "conv6_3_norm_mbox_priorbox" 1009 | top: "mbox_priorbox" 1010 | concat_param { 1011 | axis: 2 1012 | } 1013 | } 1014 | 1015 | ##################################################### 1016 | layer { 1017 | name: "mbox_conf_reshape" 1018 | type: "Reshape" 1019 | bottom: "mbox_conf" 1020 | top: "mbox_conf_reshape" 1021 | reshape_param { 1022 | shape { 1023 | dim: 0 1024 | dim: -1 1025 | dim: 2 1026 | } 1027 | } 1028 | } 1029 | layer { 1030 | name: "mbox_conf_softmax" 1031 | type: "Softmax" 1032 | bottom: "mbox_conf_reshape" 1033 | top: "mbox_conf_softmax" 1034 | softmax_param { 1035 | axis: 2 1036 | } 1037 | } 1038 | layer { 1039 | name: "mbox_conf_flatten" 1040 | type: "Flatten" 1041 | bottom: "mbox_conf_softmax" 1042 | top: "mbox_conf_flatten" 1043 | flatten_param { 1044 | axis: 1 1045 | } 1046 | } 1047 | 1048 | layer { 1049 | name: "detection_out" 1050 | type: "DetectionOutput" 1051 | bottom: "mbox_loc" 1052 | bottom: "mbox_conf_flatten" 1053 | bottom: "mbox_priorbox" 1054 | # bottom: "data" 1055 | top: "detection_out" 1056 | include { 1057 | phase: TEST 1058 | } 1059 | transform_param { 1060 | mean_value: 103.94 1061 | mean_value: 116.78 1062 | mean_value: 123.68 1063 | } 1064 | detection_output_param { 1065 | num_classes: 2 1066 | share_location: true 1067 | background_label_id: 0 1068 | nms_param { 1069 | nms_threshold: 0.15 1070 | top_k: 100 1071 | } 1072 | code_type: CENTER_SIZE 1073 | keep_top_k: 50 1074 | confidence_threshold: 0.01 1075 | visualize: false 1076 | visualize_threshold: 0.3 1077 | } 1078 | } 1079 | 1080 | 1081 | -------------------------------------------------------------------------------- /libfacedetection/models/caffe/yufacedetectnet-open-v1.solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "./yufacedetectnet-open-v1.train.prototxt" 2 | 3 | base_lr: 0.001 4 | lr_policy: "poly" 5 | power: 2 6 | 7 | display: 10 8 | max_iter: 200000 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | snapshot: 1000 12 | snapshot_prefix: "./models/yufacedetectnet-open-v1" 13 | solver_mode: GPU 14 | debug_info: false 15 | snapshot_after_train: true 16 | average_loss: 10 17 | type: "SGD" 18 | store_blobs_in_old_format: true 19 | -------------------------------------------------------------------------------- /libfacedetection/models/caffe/yufacedetectnet-open-v1.train.prototxt: -------------------------------------------------------------------------------- 1 | name: "YuFaceDetectNet" 2 | 3 | layer { 4 | name: "data" 5 | type: "AnnotatedData" 6 | top: "data" 7 | top: "label" 8 | include { 9 | phase: TRAIN 10 | } 11 | transform_param { 12 | mirror: true 13 | mean_value: 104.0 14 | mean_value: 117.0 15 | mean_value: 123.0 16 | resize_param { 17 | prob: 1.0 18 | resize_mode: WARP 19 | height: 320 20 | width: 320 21 | interp_mode: LINEAR 22 | interp_mode: AREA 23 | interp_mode: NEAREST 24 | interp_mode: CUBIC 25 | interp_mode: LANCZOS4 26 | } 27 | emit_constraint { 28 | emit_type: CENTER 29 | } 30 | distort_param { 31 | brightness_prob: 0.5 32 | brightness_delta: 32.0 33 | contrast_prob: 0.5 34 | contrast_lower: 0.5 35 | contrast_upper: 1.5 36 | hue_prob: 0.5 37 | hue_delta: 18.0 38 | saturation_prob: 0.5 39 | saturation_lower: 0.5 40 | saturation_upper: 1.5 41 | random_order_prob: 0.0 42 | } 43 | } 44 | data_param { 45 | source: "../FACE/lmdb/FACE_trainval_lmdb/" 46 | batch_size: 16 47 | backend: LMDB 48 | } 49 | annotated_data_param { 50 | batch_sampler { 51 | sampler { 52 | min_scale: 1.0 53 | max_scale: 1.0 54 | min_aspect_ratio: 1.0 55 | max_aspect_ratio: 1.0 56 | } 57 | sample_constraint { 58 | min_object_coverage: 1.0 59 | } 60 | max_sample: 1 61 | max_trials: 50 62 | } 63 | batch_sampler { 64 | sampler { 65 | min_scale: 0.300000011921 66 | max_scale: 1.0 67 | min_aspect_ratio: 1.0 68 | max_aspect_ratio: 1.0 69 | } 70 | sample_constraint { 71 | min_object_coverage: 1.0 72 | } 73 | max_sample: 1 74 | max_trials: 50 75 | } 76 | batch_sampler { 77 | sampler { 78 | min_scale: 0.300000011921 79 | max_scale: 1.0 80 | min_aspect_ratio: 1.0 81 | max_aspect_ratio: 1.0 82 | } 83 | sample_constraint { 84 | min_object_coverage: 1.0 85 | } 86 | max_sample: 1 87 | max_trials: 50 88 | } 89 | batch_sampler { 90 | sampler { 91 | min_scale: 0.300000011921 92 | max_scale: 1.0 93 | min_aspect_ratio: 1.0 94 | max_aspect_ratio: 1.0 95 | } 96 | sample_constraint { 97 | min_object_coverage: 1.0 98 | } 99 | max_sample: 1 100 | max_trials: 50 101 | } 102 | batch_sampler { 103 | sampler { 104 | min_scale: 0.300000011921 105 | max_scale: 1.0 106 | min_aspect_ratio: 1.0 107 | max_aspect_ratio: 1.0 108 | } 109 | sample_constraint { 110 | min_object_coverage: 1.0 111 | } 112 | max_sample: 1 113 | max_trials: 50 114 | } 115 | label_map_file: "../labelmap_face.prototxt" 116 | } 117 | } 118 | #CONV1########################################################### 119 | 120 | layer { 121 | name: "conv1_1" 122 | type: "Convolution" 123 | bottom: "data" 124 | top: "conv1_1" 125 | param { 126 | lr_mult: 1.0 127 | decay_mult: 1.0 128 | } 129 | convolution_param { 130 | num_output: 16 131 | pad: 1 132 | stride: 2 133 | kernel_size: 3 134 | weight_filler { 135 | type: "xavier" 136 | } 137 | bias_term: false 138 | } 139 | } 140 | 141 | layer { 142 | name: "relu1_1" 143 | type: "ReLU" 144 | bottom: "conv1_1" 145 | top: "conv1_1" 146 | } 147 | layer { 148 | name: "conv1_2" 149 | type: "Convolution" 150 | bottom: "conv1_1" 151 | top: "conv1_2" 152 | param { 153 | lr_mult: 1.0 154 | decay_mult: 1.0 155 | } 156 | convolution_param { 157 | num_output: 16 158 | pad: 0 159 | kernel_size: 1 160 | weight_filler { 161 | type: "xavier" 162 | } 163 | bias_term: false 164 | } 165 | } 166 | 167 | 168 | layer { 169 | name: "relu1_2" 170 | type: "ReLU" 171 | bottom: "conv1_2" 172 | top: "conv1_2" 173 | } 174 | #CONV2########################################################## 175 | layer { 176 | name: "pool1" 177 | type: "Pooling" 178 | bottom: "conv1_2" 179 | top: "pool1" 180 | pooling_param { 181 | pool: MAX 182 | kernel_size: 2 183 | stride: 2 184 | } 185 | } 186 | layer { 187 | name: "conv2_1" 188 | type: "Convolution" 189 | bottom: "pool1" 190 | top: "conv2_1" 191 | param { 192 | lr_mult: 1.0 193 | decay_mult: 1.0 194 | } 195 | convolution_param { 196 | num_output: 16 197 | pad: 1 198 | kernel_size: 3 199 | weight_filler { 200 | type: "xavier" 201 | } 202 | bias_term: false 203 | } 204 | } 205 | 206 | 207 | layer { 208 | name: "relu2_1" 209 | type: "ReLU" 210 | bottom: "conv2_1" 211 | top: "conv2_1" 212 | } 213 | layer { 214 | name: "conv2_2" 215 | type: "Convolution" 216 | bottom: "conv2_1" 217 | top: "conv2_2" 218 | param { 219 | lr_mult: 1.0 220 | decay_mult: 1.0 221 | } 222 | convolution_param { 223 | num_output: 16 224 | pad: 0 225 | kernel_size: 1 226 | weight_filler { 227 | type: "xavier" 228 | } 229 | bias_term: false 230 | } 231 | } 232 | 233 | layer { 234 | name: "relu2_2" 235 | type: "ReLU" 236 | bottom: "conv2_2" 237 | top: "conv2_2" 238 | } 239 | #CONV3########################################################## 240 | 241 | layer { 242 | name: "pool2" 243 | type: "Pooling" 244 | bottom: "conv2_2" 245 | top: "pool2" 246 | pooling_param { 247 | pool: MAX 248 | kernel_size: 2 249 | stride: 2 250 | } 251 | } 252 | layer { 253 | name: "conv3_1" 254 | type: "Convolution" 255 | bottom: "pool2" 256 | top: "conv3_1" 257 | param { 258 | lr_mult: 1.0 259 | decay_mult: 1.0 260 | } 261 | convolution_param { 262 | num_output: 32 263 | pad: 1 264 | kernel_size: 3 265 | weight_filler { 266 | type: "xavier" 267 | } 268 | bias_term: false 269 | } 270 | } 271 | 272 | layer { 273 | name: "relu3_1" 274 | type: "ReLU" 275 | bottom: "conv3_1" 276 | top: "conv3_1" 277 | } 278 | layer { 279 | name: "conv3_2" 280 | type: "Convolution" 281 | bottom: "conv3_1" 282 | top: "conv3_2" 283 | param { 284 | lr_mult: 1.0 285 | decay_mult: 1.0 286 | } 287 | convolution_param { 288 | num_output: 32 289 | pad: 0 290 | kernel_size: 1 291 | weight_filler { 292 | type: "xavier" 293 | } 294 | bias_term: false 295 | } 296 | } 297 | 298 | 299 | layer { 300 | name: "relu3_2" 301 | type: "ReLU" 302 | bottom: "conv3_2" 303 | top: "conv3_2" 304 | } 305 | layer { 306 | name: "conv3_3" 307 | type: "Convolution" 308 | bottom: "conv3_2" 309 | top: "conv3_3" 310 | param { 311 | lr_mult: 1.0 312 | decay_mult: 1.0 313 | } 314 | convolution_param { 315 | num_output: 32 316 | pad: 1 317 | kernel_size: 3 318 | weight_filler { 319 | type: "xavier" 320 | } 321 | bias_term: false 322 | } 323 | } 324 | 325 | layer { 326 | name: "relu3_3" 327 | type: "ReLU" 328 | bottom: "conv3_3" 329 | top: "conv3_3" 330 | } 331 | 332 | 333 | #CONV4########################################################## 334 | 335 | layer { 336 | name: "pool3" 337 | type: "Pooling" 338 | bottom: "conv3_3" 339 | top: "pool3" 340 | pooling_param { 341 | pool: MAX 342 | kernel_size: 2 343 | stride: 2 344 | } 345 | } 346 | layer { 347 | name: "conv4_1" 348 | type: "Convolution" 349 | bottom: "pool3" 350 | top: "conv4_1" 351 | param { 352 | lr_mult: 1.0 353 | decay_mult: 1.0 354 | } 355 | convolution_param { 356 | num_output: 64 357 | pad: 1 358 | kernel_size: 3 359 | weight_filler { 360 | type: "xavier" 361 | } 362 | bias_term: false 363 | } 364 | } 365 | 366 | layer { 367 | name: "relu4_1" 368 | type: "ReLU" 369 | bottom: "conv4_1" 370 | top: "conv4_1" 371 | } 372 | layer { 373 | name: "conv4_2" 374 | type: "Convolution" 375 | bottom: "conv4_1" 376 | top: "conv4_2" 377 | param { 378 | lr_mult: 1.0 379 | decay_mult: 1.0 380 | } 381 | convolution_param { 382 | num_output: 64 383 | pad: 0 384 | kernel_size: 1 385 | weight_filler { 386 | type: "xavier" 387 | } 388 | bias_term: false 389 | } 390 | } 391 | 392 | layer { 393 | name: "relu4_2" 394 | type: "ReLU" 395 | bottom: "conv4_2" 396 | top: "conv4_2" 397 | } 398 | layer { 399 | name: "conv4_3" 400 | type: "Convolution" 401 | bottom: "conv4_2" 402 | top: "conv4_3" 403 | param { 404 | lr_mult: 1.0 405 | decay_mult: 1.0 406 | } 407 | convolution_param { 408 | num_output: 64 409 | pad: 1 410 | kernel_size: 3 411 | weight_filler { 412 | type: "xavier" 413 | } 414 | bias_term: false 415 | } 416 | } 417 | 418 | layer { 419 | name: "relu4_3" 420 | type: "ReLU" 421 | bottom: "conv4_3" 422 | top: "conv4_3" 423 | } 424 | 425 | #CONV5########################################################## 426 | 427 | layer { 428 | name: "pool4" 429 | type: "Pooling" 430 | bottom: "conv4_3" 431 | top: "pool4" 432 | pooling_param { 433 | pool: MAX 434 | kernel_size: 2 435 | stride: 2 436 | } 437 | } 438 | layer { 439 | name: "conv5_1" 440 | type: "Convolution" 441 | bottom: "pool4" 442 | top: "conv5_1" 443 | param { 444 | lr_mult: 1.0 445 | decay_mult: 1.0 446 | } 447 | convolution_param { 448 | num_output: 128 449 | pad: 1 450 | kernel_size: 3 451 | weight_filler { 452 | type: "xavier" 453 | } 454 | bias_term: false 455 | dilation: 1 456 | } 457 | } 458 | 459 | layer { 460 | name: "relu5_1" 461 | type: "ReLU" 462 | bottom: "conv5_1" 463 | top: "conv5_1" 464 | } 465 | layer { 466 | name: "conv5_2" 467 | type: "Convolution" 468 | bottom: "conv5_1" 469 | top: "conv5_2" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | convolution_param { 475 | num_output: 128 476 | pad: 0 477 | kernel_size: 1 478 | weight_filler { 479 | type: "xavier" 480 | } 481 | bias_term: false 482 | dilation: 1 483 | } 484 | } 485 | 486 | layer { 487 | name: "relu5_2" 488 | type: "ReLU" 489 | bottom: "conv5_2" 490 | top: "conv5_2" 491 | } 492 | layer { 493 | name: "conv5_3" 494 | type: "Convolution" 495 | bottom: "conv5_2" 496 | top: "conv5_3" 497 | param { 498 | lr_mult: 1.0 499 | decay_mult: 1.0 500 | } 501 | convolution_param { 502 | num_output: 128 503 | pad: 1 504 | kernel_size: 3 505 | weight_filler { 506 | type: "xavier" 507 | } 508 | bias_term: false 509 | dilation: 1 510 | } 511 | } 512 | 513 | layer { 514 | name: "relu5_3" 515 | type: "ReLU" 516 | bottom: "conv5_3" 517 | top: "conv5_3" 518 | } 519 | 520 | #CONV6########################################################## 521 | 522 | layer { 523 | name: "pool5" 524 | type: "Pooling" 525 | bottom: "conv5_3" 526 | top: "pool5" 527 | pooling_param { 528 | pool: MAX 529 | kernel_size: 2 530 | stride: 2 531 | } 532 | } 533 | 534 | layer { 535 | name: "conv6_1" 536 | type: "Convolution" 537 | bottom: "pool5" 538 | top: "conv6_1" 539 | param { 540 | lr_mult: 1.0 541 | decay_mult: 1.0 542 | } 543 | convolution_param { 544 | num_output: 128 545 | pad: 1 546 | kernel_size: 3 547 | weight_filler { 548 | type: "xavier" 549 | } 550 | bias_term: false 551 | dilation: 1 552 | } 553 | } 554 | 555 | layer { 556 | name: "relu6_1" 557 | type: "ReLU" 558 | bottom: "conv6_1" 559 | top: "conv6_1" 560 | } 561 | layer { 562 | name: "conv6_2" 563 | type: "Convolution" 564 | bottom: "conv6_1" 565 | top: "conv6_2" 566 | param { 567 | lr_mult: 1.0 568 | decay_mult: 1.0 569 | } 570 | convolution_param { 571 | num_output: 128 572 | pad: 0 573 | kernel_size: 1 574 | weight_filler { 575 | type: "xavier" 576 | } 577 | bias_term: false 578 | dilation: 1 579 | } 580 | } 581 | 582 | layer { 583 | name: "relu6_2" 584 | type: "ReLU" 585 | bottom: "conv6_2" 586 | top: "conv6_2" 587 | } 588 | layer { 589 | name: "conv6_3" 590 | type: "Convolution" 591 | bottom: "conv6_2" 592 | top: "conv6_3" 593 | param { 594 | lr_mult: 1.0 595 | decay_mult: 1.0 596 | } 597 | convolution_param { 598 | num_output: 128 599 | pad: 1 600 | kernel_size: 3 601 | weight_filler { 602 | type: "xavier" 603 | } 604 | bias_term: false 605 | dilation: 1 606 | } 607 | } 608 | 609 | layer { 610 | name: "relu6_3" 611 | type: "ReLU" 612 | bottom: "conv6_3" 613 | top: "conv6_3" 614 | } 615 | 616 | #PRIORBOX3########################################## 617 | layer { 618 | name: "conv3_3_norm" 619 | type: "Normalize" 620 | bottom: "conv3_3" 621 | top: "conv3_3_norm" 622 | norm_param { 623 | across_spatial: false 624 | scale_filler { 625 | type: "constant" 626 | value: 10.0 627 | } 628 | channel_shared: false 629 | } 630 | } 631 | layer { 632 | name: "conv3_3_norm_mbox_loc_new" 633 | type: "Convolution" 634 | bottom: "conv3_3_norm" 635 | top: "conv3_3_norm_mbox_loc_new" 636 | param { 637 | lr_mult: 1.0 638 | decay_mult: 1.0 639 | } 640 | convolution_param { 641 | num_output: 12 642 | pad: 1 643 | kernel_size: 3 644 | stride: 1 645 | weight_filler { 646 | type: "xavier" 647 | } 648 | bias_term: false 649 | } 650 | } 651 | layer { 652 | name: "conv3_3_norm_mbox_loc_perm" 653 | type: "Permute" 654 | bottom: "conv3_3_norm_mbox_loc_new" 655 | top: "conv3_3_norm_mbox_loc_perm" 656 | permute_param { 657 | order: 0 658 | order: 2 659 | order: 3 660 | order: 1 661 | } 662 | } 663 | layer { 664 | name: "conv3_3_norm_mbox_loc_flat" 665 | type: "Flatten" 666 | bottom: "conv3_3_norm_mbox_loc_perm" 667 | top: "conv3_3_norm_mbox_loc_flat" 668 | flatten_param { 669 | axis: 1 670 | } 671 | } 672 | layer { 673 | name: "conv3_3_norm_mbox_conf_new" 674 | type: "Convolution" 675 | bottom: "conv3_3_norm" 676 | top: "conv3_3_norm_mbox_conf_new" 677 | param { 678 | lr_mult: 1.0 679 | decay_mult: 1.0 680 | } 681 | convolution_param { 682 | num_output: 6 683 | pad: 1 684 | kernel_size: 3 685 | stride: 1 686 | weight_filler { 687 | type: "xavier" 688 | } 689 | bias_term: false 690 | } 691 | } 692 | layer { 693 | name: "conv3_3_norm_mbox_conf_perm" 694 | type: "Permute" 695 | bottom: "conv3_3_norm_mbox_conf_new" 696 | top: "conv3_3_norm_mbox_conf_perm" 697 | permute_param { 698 | order: 0 699 | order: 2 700 | order: 3 701 | order: 1 702 | } 703 | } 704 | layer { 705 | name: "conv3_3_norm_mbox_conf_flat" 706 | type: "Flatten" 707 | bottom: "conv3_3_norm_mbox_conf_perm" 708 | top: "conv3_3_norm_mbox_conf_flat" 709 | flatten_param { 710 | axis: 1 711 | } 712 | } 713 | layer { 714 | 715 | name: "conv3_3_norm_mbox_priorbox" 716 | type: "PriorBox" 717 | bottom: "conv3_3_norm" 718 | bottom: "data" 719 | top: "conv3_3_norm_mbox_priorbox" 720 | prior_box_param { 721 | min_size: 10.0 722 | min_size: 16.0 723 | min_size: 24.0 724 | clip: false 725 | variance: 0.10000000149 726 | variance: 0.10000000149 727 | variance: 0.20000000298 728 | variance: 0.20000000298 729 | step: 8.0 730 | offset: 0.5 731 | } 732 | } 733 | #PRIORBOX4########################################## 734 | layer { 735 | name: "conv4_3_norm" 736 | type: "Normalize" 737 | bottom: "conv4_3" 738 | top: "conv4_3_norm" 739 | norm_param { 740 | across_spatial: false 741 | scale_filler { 742 | type: "constant" 743 | value: 8.0 744 | } 745 | channel_shared: false 746 | } 747 | } 748 | layer { 749 | name: "conv4_3_norm_mbox_loc" 750 | type: "Convolution" 751 | bottom: "conv4_3_norm" 752 | top: "conv4_3_norm_mbox_loc" 753 | param { 754 | lr_mult: 1.0 755 | decay_mult: 1.0 756 | } 757 | convolution_param { 758 | num_output: 8 759 | pad: 1 760 | kernel_size: 3 761 | stride: 1 762 | weight_filler { 763 | type: "xavier" 764 | } 765 | bias_term: false 766 | } 767 | } 768 | layer { 769 | name: "conv4_3_norm_mbox_loc_perm" 770 | type: "Permute" 771 | bottom: "conv4_3_norm_mbox_loc" 772 | top: "conv4_3_norm_mbox_loc_perm" 773 | permute_param { 774 | order: 0 775 | order: 2 776 | order: 3 777 | order: 1 778 | } 779 | } 780 | layer { 781 | name: "conv4_3_norm_mbox_loc_flat" 782 | type: "Flatten" 783 | bottom: "conv4_3_norm_mbox_loc_perm" 784 | top: "conv4_3_norm_mbox_loc_flat" 785 | flatten_param { 786 | axis: 1 787 | } 788 | } 789 | layer { 790 | name: "conv4_3_norm_mbox_conf" 791 | type: "Convolution" 792 | bottom: "conv4_3_norm" 793 | top: "conv4_3_norm_mbox_conf" 794 | param { 795 | lr_mult: 1.0 796 | decay_mult: 1.0 797 | } 798 | convolution_param { 799 | num_output: 4 800 | pad: 1 801 | kernel_size: 3 802 | stride: 1 803 | weight_filler { 804 | type: "xavier" 805 | } 806 | bias_term: false 807 | } 808 | } 809 | layer { 810 | name: "conv4_3_norm_mbox_conf_perm" 811 | type: "Permute" 812 | bottom: "conv4_3_norm_mbox_conf" 813 | top: "conv4_3_norm_mbox_conf_perm" 814 | permute_param { 815 | order: 0 816 | order: 2 817 | order: 3 818 | order: 1 819 | } 820 | } 821 | layer { 822 | name: "conv4_3_norm_mbox_conf_flat" 823 | type: "Flatten" 824 | bottom: "conv4_3_norm_mbox_conf_perm" 825 | top: "conv4_3_norm_mbox_conf_flat" 826 | flatten_param { 827 | axis: 1 828 | } 829 | } 830 | layer { 831 | 832 | name: "conv4_3_norm_mbox_priorbox" 833 | type: "PriorBox" 834 | bottom: "conv4_3_norm" 835 | bottom: "data" 836 | top: "conv4_3_norm_mbox_priorbox" 837 | prior_box_param { 838 | min_size: 32.0 839 | min_size: 48.0 840 | clip: false 841 | variance: 0.10000000149 842 | variance: 0.10000000149 843 | variance: 0.20000000298 844 | variance: 0.20000000298 845 | step: 16.0 846 | offset: 0.5 847 | } 848 | } 849 | #PRIORBOX5########################################## 850 | layer { 851 | name: "conv5_3_norm" 852 | type: "Normalize" 853 | bottom: "conv5_3" 854 | top: "conv5_3_norm" 855 | norm_param { 856 | across_spatial: false 857 | scale_filler { 858 | type: "constant" 859 | value: 5.0 860 | } 861 | channel_shared: false 862 | } 863 | } 864 | layer { 865 | name: "conv5_3_norm_mbox_loc" 866 | type: "Convolution" 867 | bottom: "conv5_3_norm" 868 | top: "conv5_3_norm_mbox_loc" 869 | param { 870 | lr_mult: 1.0 871 | decay_mult: 1.0 872 | } 873 | convolution_param { 874 | num_output: 8 875 | pad: 1 876 | kernel_size: 3 877 | stride: 1 878 | weight_filler { 879 | type: "xavier" 880 | } 881 | bias_term: false 882 | } 883 | } 884 | layer { 885 | name: "conv5_3_norm_mbox_loc_perm" 886 | type: "Permute" 887 | bottom: "conv5_3_norm_mbox_loc" 888 | top: "conv5_3_norm_mbox_loc_perm" 889 | permute_param { 890 | order: 0 891 | order: 2 892 | order: 3 893 | order: 1 894 | } 895 | } 896 | layer { 897 | name: "conv5_3_norm_mbox_loc_flat" 898 | type: "Flatten" 899 | bottom: "conv5_3_norm_mbox_loc_perm" 900 | top: "conv5_3_norm_mbox_loc_flat" 901 | flatten_param { 902 | axis: 1 903 | } 904 | } 905 | layer { 906 | name: "conv5_3_norm_mbox_conf" 907 | type: "Convolution" 908 | bottom: "conv5_3_norm" 909 | top: "conv5_3_norm_mbox_conf" 910 | param { 911 | lr_mult: 1.0 912 | decay_mult: 1.0 913 | } 914 | convolution_param { 915 | num_output: 4 916 | pad: 1 917 | kernel_size: 3 918 | stride: 1 919 | weight_filler { 920 | type: "xavier" 921 | } 922 | bias_term: false 923 | } 924 | } 925 | layer { 926 | name: "conv5_3_norm_mbox_conf_perm" 927 | type: "Permute" 928 | bottom: "conv5_3_norm_mbox_conf" 929 | top: "conv5_3_norm_mbox_conf_perm" 930 | permute_param { 931 | order: 0 932 | order: 2 933 | order: 3 934 | order: 1 935 | } 936 | } 937 | layer { 938 | name: "conv5_3_norm_mbox_conf_flat" 939 | type: "Flatten" 940 | bottom: "conv5_3_norm_mbox_conf_perm" 941 | top: "conv5_3_norm_mbox_conf_flat" 942 | flatten_param { 943 | axis: 1 944 | } 945 | } 946 | layer { 947 | 948 | name: "conv5_3_norm_mbox_priorbox" 949 | type: "PriorBox" 950 | bottom: "conv5_3_norm" 951 | bottom: "data" 952 | top: "conv5_3_norm_mbox_priorbox" 953 | prior_box_param { 954 | min_size: 64.0 955 | min_size: 96.0 956 | clip: false 957 | variance: 0.10000000149 958 | variance: 0.10000000149 959 | variance: 0.20000000298 960 | variance: 0.20000000298 961 | step: 32.0 962 | offset: 0.5 963 | } 964 | } 965 | 966 | #PRIORBOX6########################################## 967 | layer { 968 | name: "conv6_3_norm" 969 | type: "Normalize" 970 | bottom: "conv6_3" 971 | top: "conv6_3_norm" 972 | norm_param { 973 | across_spatial: false 974 | scale_filler { 975 | type: "constant" 976 | value: 5.0 977 | } 978 | channel_shared: false 979 | } 980 | } 981 | layer { 982 | name: "conv6_3_norm_mbox_loc" 983 | type: "Convolution" 984 | bottom: "conv6_3_norm" 985 | top: "conv6_3_norm_mbox_loc" 986 | param { 987 | lr_mult: 1.0 988 | decay_mult: 1.0 989 | } 990 | convolution_param { 991 | num_output: 12 992 | pad: 1 993 | kernel_size: 3 994 | stride: 1 995 | weight_filler { 996 | type: "xavier" 997 | } 998 | bias_term: false 999 | } 1000 | } 1001 | layer { 1002 | name: "conv6_3_norm_mbox_loc_perm" 1003 | type: "Permute" 1004 | bottom: "conv6_3_norm_mbox_loc" 1005 | top: "conv6_3_norm_mbox_loc_perm" 1006 | permute_param { 1007 | order: 0 1008 | order: 2 1009 | order: 3 1010 | order: 1 1011 | } 1012 | } 1013 | layer { 1014 | name: "conv6_3_norm_mbox_loc_flat" 1015 | type: "Flatten" 1016 | bottom: "conv6_3_norm_mbox_loc_perm" 1017 | top: "conv6_3_norm_mbox_loc_flat" 1018 | flatten_param { 1019 | axis: 1 1020 | } 1021 | } 1022 | layer { 1023 | name: "conv6_3_norm_mbox_conf" 1024 | type: "Convolution" 1025 | bottom: "conv6_3_norm" 1026 | top: "conv6_3_norm_mbox_conf" 1027 | param { 1028 | lr_mult: 1.0 1029 | decay_mult: 1.0 1030 | } 1031 | convolution_param { 1032 | num_output: 6 1033 | pad: 1 1034 | kernel_size: 3 1035 | stride: 1 1036 | weight_filler { 1037 | type: "xavier" 1038 | } 1039 | bias_term: false 1040 | } 1041 | } 1042 | layer { 1043 | name: "conv6_3_norm_mbox_conf_perm" 1044 | type: "Permute" 1045 | bottom: "conv6_3_norm_mbox_conf" 1046 | top: "conv6_3_norm_mbox_conf_perm" 1047 | permute_param { 1048 | order: 0 1049 | order: 2 1050 | order: 3 1051 | order: 1 1052 | } 1053 | } 1054 | layer { 1055 | name: "conv6_3_norm_mbox_conf_flat" 1056 | type: "Flatten" 1057 | bottom: "conv6_3_norm_mbox_conf_perm" 1058 | top: "conv6_3_norm_mbox_conf_flat" 1059 | flatten_param { 1060 | axis: 1 1061 | } 1062 | } 1063 | layer { 1064 | 1065 | name: "conv6_3_norm_mbox_priorbox" 1066 | type: "PriorBox" 1067 | bottom: "conv6_3_norm" 1068 | bottom: "data" 1069 | top: "conv6_3_norm_mbox_priorbox" 1070 | prior_box_param { 1071 | min_size: 128.0 1072 | min_size: 192.0 1073 | min_size: 256.0 1074 | clip: false 1075 | variance: 0.10000000149 1076 | variance: 0.10000000149 1077 | variance: 0.20000000298 1078 | variance: 0.20000000298 1079 | step: 64.0 1080 | offset: 0.5 1081 | } 1082 | } 1083 | ######################################################## 1084 | layer { 1085 | name: "mbox_loc" 1086 | type: "Concat" 1087 | bottom: "conv3_3_norm_mbox_loc_flat" 1088 | bottom: "conv4_3_norm_mbox_loc_flat" 1089 | bottom: "conv5_3_norm_mbox_loc_flat" 1090 | bottom: "conv6_3_norm_mbox_loc_flat" 1091 | top: "mbox_loc" 1092 | concat_param { 1093 | axis: 1 1094 | } 1095 | } 1096 | layer { 1097 | name: "mbox_conf" 1098 | type: "Concat" 1099 | bottom: "conv3_3_norm_mbox_conf_flat" 1100 | bottom: "conv4_3_norm_mbox_conf_flat" 1101 | bottom: "conv5_3_norm_mbox_conf_flat" 1102 | bottom: "conv6_3_norm_mbox_conf_flat" 1103 | top: "mbox_conf" 1104 | concat_param { 1105 | axis: 1 1106 | } 1107 | } 1108 | layer { 1109 | name: "mbox_priorbox" 1110 | type: "Concat" 1111 | bottom: "conv3_3_norm_mbox_priorbox" 1112 | bottom: "conv4_3_norm_mbox_priorbox" 1113 | bottom: "conv5_3_norm_mbox_priorbox" 1114 | bottom: "conv6_3_norm_mbox_priorbox" 1115 | top: "mbox_priorbox" 1116 | concat_param { 1117 | axis: 2 1118 | } 1119 | } 1120 | 1121 | ##################################################### 1122 | layer { 1123 | 1124 | name: "mbox_loss" 1125 | type: "MultiBoxLoss" 1126 | bottom: "mbox_loc" 1127 | bottom: "mbox_conf" 1128 | bottom: "mbox_priorbox" 1129 | bottom: "label" 1130 | top: "mbox_loss" 1131 | include { 1132 | phase: TRAIN 1133 | } 1134 | propagate_down: true 1135 | propagate_down: true 1136 | propagate_down: false 1137 | propagate_down: false 1138 | loss_param { 1139 | normalization: VALID 1140 | } 1141 | multibox_loss_param { 1142 | loc_loss_type: L2 1143 | conf_loss_type: SOFTMAX 1144 | loc_weight: 1.0 1145 | num_classes: 2 1146 | share_location: true 1147 | match_type: PER_PREDICTION 1148 | overlap_threshold: 0.34999999404 1149 | use_prior_for_matching: true 1150 | background_label_id: 0 1151 | use_difficult_gt: true 1152 | neg_pos_ratio: 3.0 1153 | neg_overlap: 0.34999999404 1154 | code_type: CENTER_SIZE 1155 | ignore_cross_boundary_bbox: false 1156 | mining_type: MAX_NEGATIVE 1157 | } 1158 | } 1159 | 1160 | -------------------------------------------------------------------------------- /libfacedetection/models/openvino/yufacedetectnet-open-v1-320x240.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/models/openvino/yufacedetectnet-open-v1-320x240.bin -------------------------------------------------------------------------------- /libfacedetection/src/facedetectcnn-model.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | By downloading, copying, installing or using the software you agree to this license. 3 | If you do not agree to this license, do not download, install, 4 | copy or use the software. 5 | 6 | 7 | License Agreement For libfacedetection 8 | (3-clause BSD License) 9 | 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved. 11 | shiqi.yu@gmail.com 12 | 13 | Redistribution and use in source and binary forms, with or without modification, 14 | are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright notice, 20 | this list of conditions and the following disclaimer in the documentation 21 | and/or other materials provided with the distribution. 22 | 23 | * Neither the names of the copyright holders nor the names of the contributors 24 | may be used to endorse or promote products derived from this software 25 | without specific prior written permission. 26 | 27 | This software is provided by the copyright holders and contributors "as is" and 28 | any express or implied warranties, including, but not limited to, the implied 29 | warranties of merchantability and fitness for a particular purpose are disclaimed. 30 | In no event shall copyright holders or contributors be liable for any direct, 31 | indirect, incidental, special, exemplary, or consequential damages 32 | (including, but not limited to, procurement of substitute goods or services; 33 | loss of use, data, or profits; or business interruption) however caused 34 | and on any theory of liability, whether in contract, strict liability, 35 | or tort (including negligence or otherwise) arising in any way out of 36 | the use of this software, even if advised of the possibility of such damage. 37 | */ 38 | 39 | 40 | #include "facedetectcnn.h" 41 | #include 42 | #include 43 | 44 | #if 0 45 | #include 46 | #define TIME_START t=(double)cvGetTickCount(); 47 | #define TIME_END(FUNCNAME) t=((double)cvGetTickCount()-t)/((double)cvGetTickFrequency()*1000.); printf(FUNCNAME);printf("=%g\n", t); total+=t; 48 | #define TIME_TOTAL(tt) printf("total time=%g\n", (tt)); 49 | //#define TIME_END(FUNCNAME) t=((double)cvGetTickCount()-t)/((double)cvGetTickFrequency()*1000.); total+=t; 50 | #else 51 | #define TIME_START 52 | #define TIME_END(FUNCNAME) 53 | #define TIME_TOTAL(tt) 54 | #endif 55 | 56 | 57 | #define NUM_CONV_LAYER 24 58 | 59 | #if defined(_ENABLE_INT8_CONV) 60 | extern signed char * param_ppConvCoefInt8[NUM_CONV_LAYER]; 61 | #else 62 | extern float * param_ppConvCoefFloat[NUM_CONV_LAYER]; 63 | #endif 64 | extern float param_pConvCoefScales[NUM_CONV_LAYER]; 65 | 66 | 67 | typedef struct SConvInfo_{ 68 | int pad; 69 | int stride; 70 | int width; 71 | int height; 72 | int channels; 73 | int num; 74 | //float scale; 75 | }SConvInfo; 76 | 77 | Filters param_pFilters[NUM_CONV_LAYER]; //NUM_CONV_LAYER conv layers 78 | 79 | int param_pMean[3] = { 104,117,123 }; 80 | float param_pConv3Norm[32] = { 6.592306137084961f, 6.558613300323486f, 6.324647903442383f, 6.125812530517578f, 6.558647632598877f, 6.526843547821045f, 6.558306694030762f, 6.556366443634033f, 6.638453483581543f, 6.558482646942139f, 6.631646633148193f, 6.536031246185303f, 6.456478118896484f, 6.558736801147461f, 6.567635536193848f, 5.369370937347412f, 6.567112445831299f, 6.5589985847473145f, 6.602363586425781f, 6.558731555938721f, 6.419933795928955f, 6.138179779052734f, 6.250294208526611f, 6.562124252319336f, 6.579089164733887f, 6.553215503692627f, 6.191119194030762f, 3.9663331508636475f, 6.459974765777588f, 6.555095195770264f, 6.545119762420654f, 6.626718997955322f }; 81 | float param_pConv4Norm[64] = { 5.24936056137085f, 5.249833106994629f, 5.201494216918945f, 5.252224445343018f, 5.2574462890625f, 5.226278305053711f, 5.258802890777588f, 5.254302024841309f, 5.1779465675354f, 5.24658203125f, 5.252774238586426f, 5.248640060424805f, 5.209632396697998f, 5.256057262420654f, 5.198976039886475f, 5.259532928466797f, 5.150023460388184f, 5.225643634796143f, 5.25822114944458f, 5.247387886047363f, 5.2590227127075195f, 5.25047492980957f, 5.264795303344727f, 5.24699592590332f, 5.249448299407959f, 5.205463409423828f, 5.252189636230469f, 5.255984783172607f, 5.209300994873047f, 5.261776447296143f, 5.251255035400391f, 5.254635810852051f, 5.25943660736084f, 5.248631477355957f, 5.285300254821777f, 5.254481792449951f, 5.248084545135498f, 5.249952793121338f, 5.234015941619873f, 5.263407230377197f, 5.245810508728027f, 5.2482500076293945f, 5.102107048034668f, 5.221688747406006f, 5.245429515838623f, 5.251964569091797f, 5.305455684661865f, 5.1277570724487305f, 5.28142786026001f, 5.235960006713867f, 5.205482482910156f, 5.251782417297363f, 5.182538032531738f, 5.27116584777832f, 5.198187351226807f, 5.248366832733154f, 5.254012584686279f, 5.251954555511475f, 5.259944438934326f, 5.248141288757324f, 5.183408737182617f, 5.265050888061523f, 5.24997615814209f, 5.262747287750244f }; 82 | float param_pConv5Norm[128] = { 3.2798357009887695f, 3.279853582382202f, 3.316532850265503f, 3.2797908782958984f, 3.283681631088257f, 3.292001962661743f, 3.2796599864959717f, 3.301004409790039f, 3.279745101928711f, 3.2916154861450195f, 3.304828405380249f, 3.2860844135284424f, 3.2962393760681152f, 3.2914977073669434f, 3.311239719390869f, 3.2796378135681152f, 3.30684757232666f, 3.2795956134796143f, 3.3049705028533936f, 3.2912471294403076f, 3.2799410820007324f, 3.280548095703125f, 3.279585838317871f, 3.3002915382385254f, 3.351128339767456f, 3.285721778869629f, 3.308690071105957f, 3.286360025405884f, 3.286612033843994f, 3.323331117630005f, 3.280801296234131f, 3.279557228088379f, 3.325319290161133f, 3.292274236679077f, 3.284330129623413f, 3.3016438484191895f, 3.305274248123169f, 3.293483257293701f, 3.2965309619903564f, 3.343932628631592f, 3.2799618244171143f, 3.281886100769043f, 3.2855217456817627f, 3.283858299255371f, 3.299082040786743f, 3.295645236968994f, 3.291444778442383f, 3.279492139816284f, 3.2956502437591553f, 3.3284802436828613f, 3.2878544330596924f, 3.291749954223633f, 3.3015894889831543f, 3.2998600006103516f, 3.2887396812438965f, 3.2832252979278564f, 3.285311698913574f, 3.30757474899292f, 3.284590721130371f, 3.3025388717651367f, 3.2936882972717285f, 3.279754877090454f, 3.307007312774658f, 3.2958528995513916f, 3.3630170822143555f, 3.326841354370117f, 3.2800698280334473f, 3.2920491695404053f, 3.2991254329681396f, 3.309135913848877f, 3.2799878120422363f, 3.2878851890563965f, 3.302861452102661f, 3.315964698791504f, 3.279761791229248f, 3.3086979389190674f, 3.2836644649505615f, 3.29606294631958f, 3.2939038276672363f, 3.296156883239746f, 3.300607204437256f, 3.3293192386627197f, 3.2886781692504883f, 3.292102098464966f, 3.279629945755005f, 3.2798566818237305f, 3.2876806259155273f, 3.281590223312378f, 3.281094789505005f, 3.2978975772857666f, 3.2799761295318604f, 3.3351552486419678f, 3.2866907119750977f, 3.338275671005249f, 3.2797188758850098f, 3.280174493789673f, 3.296318531036377f, 3.281552314758301f, 3.2805323600769043f, 3.294194459915161f, 3.279611349105835f, 3.3100433349609375f, 3.2793779373168945f, 3.2797317504882812f, 3.2823593616485596f, 3.2944772243499756f, 3.280740976333618f, 3.2863688468933105f, 3.299750804901123f, 3.282517194747925f, 3.2863147258758545f, 3.286238193511963f, 3.280881881713867f, 3.2873098850250244f, 3.2873218059539795f, 3.2804245948791504f, 3.306823968887329f, 3.312803030014038f, 3.2798690795898438f, 3.288886785507202f, 3.3185698986053467f, 3.299873113632202f, 3.279698610305786f, 3.3057520389556885f, 3.3125710487365723f, 3.2796096801757812f, 3.2905843257904053f, 3.2902579307556152f }; 83 | float param_pConv6Norm[128] = { 3.2981998920440674f, 3.2801780700683594f, 3.2922229766845703f, 3.2955803871154785f, 3.294820547103882f, 3.292754888534546f, 3.2865407466888428f, 3.286324977874756f, 3.2815260887145996f, 3.2840657234191895f, 3.282493829727173f, 3.333282470703125f, 3.289398193359375f, 3.2884361743927f, 3.2864975929260254f, 3.2839083671569824f, 3.2875373363494873f, 3.322467565536499f, 3.28637433052063f, 3.286494731903076f, 3.291119337081909f, 3.322329044342041f, 3.2931485176086426f, 3.279733896255493f, 3.2803969383239746f, 3.295511245727539f, 3.2980639934539795f, 3.280717134475708f, 3.2957653999328613f, 3.317964553833008f, 3.2798070907592773f, 3.3057520389556885f, 3.305992841720581f, 3.348924160003662f, 3.291982650756836f, 3.3030447959899902f, 3.288562774658203f, 3.2987096309661865f, 3.2800376415252686f, 3.336385488510132f, 3.2968106269836426f, 3.296558380126953f, 3.2796730995178223f, 3.286644458770752f, 3.2797164916992188f, 3.2899343967437744f, 3.3156163692474365f, 3.3474082946777344f, 3.291513442993164f, 3.3053133487701416f, 3.283517360687256f, 3.305210828781128f, 3.303983211517334f, 3.282759189605713f, 3.280306816101074f, 3.2799665927886963f, 3.285292387008667f, 3.3079776763916016f, 3.29679274559021f, 3.280120849609375f, 3.2796525955200195f, 3.28070068359375f, 3.2877960205078125f, 3.302424192428589f, 3.2920310497283936f, 3.286226511001587f, 3.2799699306488037f, 3.2863574028015137f, 3.324301242828369f, 3.291632890701294f, 3.294870376586914f, 3.289574146270752f, 3.297481060028076f, 3.304020881652832f, 3.2841410636901855f, 3.2966504096984863f, 3.280271530151367f, 3.2840888500213623f, 3.3100359439849854f, 3.282552719116211f, 3.2812161445617676f, 3.3106632232666016f, 3.2951345443725586f, 3.280796766281128f, 3.283688545227051f, 3.328918695449829f, 3.2930126190185547f, 3.291832685470581f, 3.2913215160369873f, 3.282050609588623f, 3.280987501144409f, 3.303039073944092f, 3.2960453033447266f, 3.2923476696014404f, 3.2912893295288086f, 3.283773422241211f, 3.308786392211914f, 3.294309377670288f, 3.2953407764434814f, 3.2821991443634033f, 3.2892892360687256f, 3.2869884967803955f, 3.3056561946868896f, 3.281874895095825f, 3.337285280227661f, 3.2868685722351074f, 3.2931010723114014f, 3.279754638671875f, 3.2797317504882812f, 3.3095757961273193f, 3.2800772190093994f, 3.299697160720825f, 3.2798924446105957f, 3.2848010063171387f, 3.2815306186676025f, 3.29996919631958f, 3.29498553276062f, 3.317089557647705f, 3.2876529693603516f, 3.2813613414764404f, 3.290708065032959f, 3.2828986644744873f, 3.2800583839416504f, 3.3065834045410156f, 3.287909746170044f, 3.2797391414642334f, 3.3226301670074463f, 3.323007345199585f }; 84 | 85 | SConvInfo param_pConvInfo[NUM_CONV_LAYER] = { 86 | //conv1_1 87 | { 1, 2, 3, 3, 3, 16 }, 88 | //conv1_2 89 | { 0, 1, 1, 1, 16, 16 }, 90 | //conv2_1 91 | { 1, 1, 3, 3, 16, 16 }, 92 | //conv2_2 93 | { 0, 1, 1, 1, 16, 16 }, 94 | //conv3_1 95 | { 1, 1, 3, 3, 16, 32 }, 96 | //conv2_2 97 | { 0, 1, 1, 1, 32, 32 }, 98 | //conv3_3 99 | { 1, 1, 3, 3, 32, 32 }, 100 | //conv4_1 101 | { 1, 1, 3, 3, 32, 64 }, 102 | //conv4_2 103 | { 0, 1, 1, 1, 64, 64 }, 104 | //conv4_3 105 | { 1, 1, 3, 3, 64, 64 }, 106 | //conv5_1 107 | { 1, 1, 3, 3, 64,128 }, 108 | //conv5_2 109 | { 0, 1, 1, 1,128,128 }, 110 | //conv5_3 111 | { 1, 1, 3, 3,128,128 }, 112 | //conv6_1 113 | { 1, 1, 3, 3,128,128 }, 114 | //conv6_2 115 | { 0, 1, 1, 1,128,128 }, 116 | //conv6_3 117 | { 1, 1, 3, 3,128,128 }, 118 | //loc3 119 | { 1, 1, 3, 3, 32, 12 }, 120 | //conf3 121 | { 1, 1, 3, 3, 32, 6 }, 122 | //loc4 123 | { 1, 1, 3, 3, 64, 8 }, 124 | //conf4 125 | { 1, 1, 3, 3, 64, 4 }, 126 | //loc5 127 | { 1, 1, 3, 3,128, 8 }, 128 | //conf5 129 | { 1, 1, 3, 3,128, 4 }, 130 | //loc6 131 | { 1, 1, 3, 3,128, 12 }, 132 | //conf6 133 | { 1, 1, 3, 3,128, 6 }, 134 | }; 135 | 136 | bool param_initialized = false; 137 | 138 | void init_parameters() 139 | { 140 | //set filters 0 141 | { 142 | int i = 0; 143 | param_pFilters[i].stride = 1;// param_pConvInfo[i].stride; 144 | param_pFilters[i].pad = 0;// param_pConvInfo[i].pad; 145 | param_pFilters[i].scale = param_pConvCoefScales[i]; 146 | int offset = param_pConvInfo[i].width * param_pConvInfo[i].height * param_pConvInfo[i].channels; 147 | 148 | for(int ff = 0; ff < param_pConvInfo[i].num; ff++) 149 | { 150 | CDataBlob * b3x3 = new CDataBlob(param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels); 151 | CDataBlob * b1x1 = new CDataBlob(); 152 | #if defined(_ENABLE_INT8_CONV) 153 | b3x3->setInt8DataFromCaffeFormat(param_ppConvCoefInt8[i] + ff * offset, 154 | param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels); 155 | blob2vector(b3x3, b1x1, false); 156 | #else 157 | b3x3->setFloatDataFromCaffeFormat(param_ppConvCoefFloat[i] + ff * offset, 158 | param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels); 159 | blob2vector(b3x3, b1x1, true); 160 | #endif 161 | delete b3x3; 162 | b3x3 = 0; 163 | param_pFilters[i].filters.push_back(b1x1); 164 | } 165 | } 166 | //set the rest 167 | for(int i = 1; i < NUM_CONV_LAYER; i++) 168 | { 169 | param_pFilters[i].stride = param_pConvInfo[i].stride; 170 | param_pFilters[i].pad = param_pConvInfo[i].pad; 171 | param_pFilters[i].scale = param_pConvCoefScales[i]; 172 | int offset = param_pConvInfo[i].width * param_pConvInfo[i].height * param_pConvInfo[i].channels; 173 | 174 | for(int ff = 0; ff < param_pConvInfo[i].num; ff++) 175 | { 176 | CDataBlob * b = new CDataBlob(param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels); 177 | #if defined(_ENABLE_INT8_CONV) 178 | b->setInt8DataFromCaffeFormat(param_ppConvCoefInt8[i] + ff * offset, 179 | param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels); 180 | #else 181 | b->setFloatDataFromCaffeFormat(param_ppConvCoefFloat[i] + ff * offset, 182 | param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels); 183 | #endif 184 | param_pFilters[i].filters.push_back(b); 185 | } 186 | } 187 | } 188 | 189 | vector objectdetect_cnn(unsigned char * rgbImageData, int width, int height, int step) 190 | { 191 | CDataBlob inputImage; 192 | CDataBlob pConvDataBlobs[NUM_CONV_LAYER]; 193 | CDataBlob pool1, pool2, pool3, pool4, pool5; 194 | CDataBlob conv3norm, conv4norm, conv5norm, conv6norm; 195 | CDataBlob conv3priorbox, conv4priorbox, conv5priorbox, conv6priorbox; 196 | CDataBlob conv3priorbox_flat, conv4priorbox_flat, conv5priorbox_flat, conv6priorbox_flat, mbox_priorbox; 197 | CDataBlob conv3loc_flat, conv4loc_flat, conv5loc_flat, conv6loc_flat, mbox_loc; 198 | CDataBlob conv3conf_flat, conv4conf_flat, conv5conf_flat, conv6conf_flat, mbox_conf; 199 | 200 | double total = 0.0; 201 | double t = 0.0; 202 | 203 | TIME_START; 204 | if (!param_initialized) 205 | { 206 | init_parameters(); 207 | param_initialized = true; 208 | } 209 | TIME_END("init"); 210 | 211 | 212 | total = 0.0; 213 | 214 | TIME_START; 215 | //inputImage.setDataFromImage(rgbImageData, width, height, 3, step, param_pMean); 216 | inputImage.setDataFrom3x3S2P1to1x1S1P0FromImage(rgbImageData, width, height, 3, step, param_pMean); 217 | TIME_END("convert data"); 218 | 219 | 220 | /***************CONV1*********************/ 221 | int convidx = 0; 222 | TIME_START; 223 | convolution(&inputImage, param_pFilters + convidx, pConvDataBlobs + convidx); 224 | TIME_END("conv11"); 225 | TIME_START; 226 | relu(pConvDataBlobs+convidx); 227 | TIME_END("relu11"); 228 | 229 | convidx++; 230 | TIME_START; 231 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 232 | TIME_END("conv12"); 233 | TIME_START; 234 | relu(pConvDataBlobs+convidx); 235 | TIME_END("relu12"); 236 | 237 | TIME_START; 238 | maxpooling2x2S2(pConvDataBlobs+convidx, &pool1); 239 | TIME_END("pool1"); 240 | 241 | /***************CONV2*********************/ 242 | convidx++; 243 | TIME_START; 244 | convolution(&pool1, param_pFilters+convidx, pConvDataBlobs+convidx); 245 | TIME_END("conv21"); 246 | TIME_START; 247 | relu(pConvDataBlobs+convidx); 248 | TIME_END("relu21"); 249 | 250 | convidx++; 251 | TIME_START; 252 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 253 | TIME_END("conv22"); 254 | TIME_START 255 | relu(pConvDataBlobs+convidx); 256 | TIME_END("relu22"); 257 | 258 | TIME_START; 259 | maxpooling2x2S2(pConvDataBlobs+convidx, &pool2); 260 | TIME_END("pool2"); 261 | 262 | /***************CONV3*********************/ 263 | convidx++; 264 | TIME_START; 265 | convolution(&pool2, param_pFilters+convidx, pConvDataBlobs+convidx); 266 | TIME_END("conv31"); 267 | TIME_START; 268 | relu(pConvDataBlobs+convidx); 269 | TIME_END("relu31"); 270 | 271 | convidx++; 272 | TIME_START; 273 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 274 | TIME_END("conv32"); 275 | TIME_START; 276 | relu(pConvDataBlobs+convidx); 277 | TIME_END("relu32"); 278 | 279 | convidx++; 280 | TIME_START; 281 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 282 | TIME_END("conv33"); 283 | TIME_START; 284 | relu(pConvDataBlobs+convidx); 285 | TIME_END("relu33"); 286 | 287 | TIME_START; 288 | maxpooling2x2S2(pConvDataBlobs+convidx, &pool3); 289 | TIME_END("pool3"); 290 | 291 | /***************CONV4*********************/ 292 | convidx++; 293 | TIME_START; 294 | convolution(&pool3, param_pFilters+convidx, pConvDataBlobs+convidx); 295 | TIME_END("conv41"); 296 | TIME_START 297 | relu(pConvDataBlobs+convidx); 298 | TIME_END("relu41"); 299 | 300 | convidx++; 301 | TIME_START; 302 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 303 | TIME_END("conv42"); 304 | TIME_START; 305 | relu(pConvDataBlobs+convidx); 306 | TIME_END("relu42"); 307 | 308 | convidx++; 309 | TIME_START; 310 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 311 | TIME_END("conv43"); 312 | TIME_START; 313 | relu(pConvDataBlobs+convidx); 314 | TIME_END("relu43"); 315 | 316 | TIME_START; 317 | maxpooling2x2S2(pConvDataBlobs+convidx, &pool4); 318 | TIME_END("pool4"); 319 | 320 | /***************CONV5*********************/ 321 | convidx++; 322 | TIME_START; 323 | convolution(&pool4, param_pFilters+convidx, pConvDataBlobs+convidx); 324 | TIME_END("conv51"); 325 | TIME_START; 326 | relu(pConvDataBlobs+convidx); 327 | TIME_END("relu51"); 328 | 329 | convidx++; 330 | TIME_START; 331 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 332 | TIME_END("conv52"); 333 | TIME_START 334 | relu(pConvDataBlobs+convidx); 335 | TIME_END("relu52"); 336 | 337 | convidx++; 338 | TIME_START; 339 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 340 | TIME_END("conv53"); 341 | TIME_START; 342 | relu(pConvDataBlobs+convidx); 343 | TIME_END("relu53"); 344 | 345 | TIME_START; 346 | maxpooling2x2S2(pConvDataBlobs+convidx, &pool5); 347 | TIME_END("pool5"); 348 | 349 | /***************CONV6*********************/ 350 | convidx++; 351 | TIME_START; 352 | convolution(&pool5, param_pFilters+convidx, pConvDataBlobs+convidx); 353 | TIME_END("conv61"); 354 | TIME_START; 355 | relu(pConvDataBlobs+convidx); 356 | TIME_END("relu61"); 357 | 358 | convidx++; 359 | TIME_START; 360 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 361 | TIME_END("conv62"); 362 | TIME_START 363 | relu(pConvDataBlobs+convidx); 364 | TIME_END("relu62"); 365 | 366 | convidx++; 367 | TIME_START; 368 | convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx); 369 | TIME_END("conv63"); 370 | TIME_START; 371 | relu(pConvDataBlobs+convidx); 372 | TIME_END("relu63"); 373 | 374 | /***************PRIORBOX3*********************/ 375 | int conv3idx = 6; 376 | TIME_START; 377 | normalize(pConvDataBlobs+ conv3idx, param_pConv3Norm); 378 | TIME_END("norm3"); 379 | 380 | convidx++; 381 | TIME_START 382 | convolution(pConvDataBlobs+ conv3idx, param_pFilters+convidx, pConvDataBlobs+convidx); 383 | TIME_END("prior3 loc"); 384 | 385 | convidx++; 386 | TIME_START; 387 | convolution(pConvDataBlobs+ conv3idx, param_pFilters+convidx, pConvDataBlobs+convidx); 388 | TIME_END("prior3 conf"); 389 | 390 | TIME_START; 391 | float pSizes3[3] = {10, 16, 24}; 392 | priorbox(pConvDataBlobs+ conv3idx, &inputImage, 3, pSizes3, &conv3priorbox); 393 | TIME_END("prior3"); 394 | 395 | /***************PRIORBOX4*********************/ 396 | int conv4idx = 9; 397 | TIME_START; 398 | normalize(pConvDataBlobs + conv4idx, param_pConv4Norm); 399 | TIME_END("norm4"); 400 | 401 | convidx++; 402 | TIME_START 403 | convolution(pConvDataBlobs + conv4idx, param_pFilters + convidx, pConvDataBlobs + convidx); 404 | TIME_END("prior4 loc"); 405 | 406 | convidx++; 407 | TIME_START; 408 | convolution(pConvDataBlobs + conv4idx, param_pFilters + convidx, pConvDataBlobs + convidx); 409 | TIME_END("prior4 conf"); 410 | 411 | TIME_START; 412 | float pSizes4[2] = { 32, 48}; 413 | priorbox(pConvDataBlobs + conv4idx, &inputImage, 2, pSizes4, &conv4priorbox); 414 | TIME_END("prior4"); 415 | 416 | /***************PRIORBOX5*********************/ 417 | int conv5idx = 12; 418 | TIME_START; 419 | normalize(pConvDataBlobs + conv5idx, param_pConv5Norm); 420 | TIME_END("norm5"); 421 | 422 | convidx++; 423 | TIME_START 424 | convolution(pConvDataBlobs + conv5idx, param_pFilters + convidx, pConvDataBlobs + convidx); 425 | TIME_END("prior5 loc"); 426 | 427 | convidx++; 428 | TIME_START; 429 | convolution(pConvDataBlobs + conv5idx, param_pFilters + convidx, pConvDataBlobs + convidx); 430 | TIME_END("prior5 conf"); 431 | 432 | TIME_START; 433 | float pSizes5[2] = { 64, 96 }; 434 | priorbox(pConvDataBlobs + conv5idx, &inputImage, 2, pSizes5, &conv5priorbox); 435 | TIME_END("prior5"); 436 | 437 | /***************PRIORBOX6*********************/ 438 | int conv6idx = 15; 439 | TIME_START; 440 | normalize(pConvDataBlobs + conv6idx, param_pConv5Norm); 441 | TIME_END("norm6"); 442 | 443 | convidx++; 444 | TIME_START 445 | convolution(pConvDataBlobs + conv6idx, param_pFilters + convidx, pConvDataBlobs + convidx); 446 | TIME_END("prior6 loc"); 447 | 448 | convidx++; 449 | TIME_START; 450 | convolution(pConvDataBlobs + conv6idx, param_pFilters + convidx, pConvDataBlobs + convidx); 451 | TIME_END("prior6 conf"); 452 | 453 | TIME_START; 454 | float pSizes6[3] = { 128, 192, 256 }; 455 | priorbox(pConvDataBlobs + conv6idx, &inputImage, 3, pSizes6, &conv6priorbox); 456 | TIME_END("prior6"); 457 | 458 | 459 | 460 | TIME_START; 461 | blob2vector(&conv3priorbox, &conv3priorbox_flat, true); 462 | blob2vector(pConvDataBlobs + 16, &conv3loc_flat, true); 463 | blob2vector(pConvDataBlobs + 17, &conv3conf_flat, true); 464 | 465 | blob2vector(&conv4priorbox, &conv4priorbox_flat, true); 466 | blob2vector(pConvDataBlobs + 18, &conv4loc_flat, true); 467 | blob2vector(pConvDataBlobs + 19, &conv4conf_flat, true); 468 | 469 | blob2vector(&conv5priorbox, &conv5priorbox_flat, true); 470 | blob2vector(pConvDataBlobs + 20, &conv5loc_flat, true); 471 | blob2vector(pConvDataBlobs + 21, &conv5conf_flat, true); 472 | 473 | blob2vector(&conv6priorbox, &conv6priorbox_flat, true); 474 | blob2vector(pConvDataBlobs + 22, &conv6loc_flat, true); 475 | blob2vector(pConvDataBlobs + 23, &conv6conf_flat, true); 476 | TIME_END("prior flat"); 477 | 478 | 479 | 480 | TIME_START 481 | concat4(&conv3priorbox_flat, &conv4priorbox_flat, &conv5priorbox_flat, &conv6priorbox_flat, &mbox_priorbox); 482 | concat4(&conv3loc_flat, &conv4loc_flat, &conv5loc_flat, &conv6loc_flat, &mbox_loc); 483 | concat4(&conv3conf_flat, &conv4conf_flat, &conv5conf_flat, &conv6conf_flat, &mbox_conf); 484 | TIME_END("concat prior") 485 | 486 | TIME_START 487 | softmax1vector2class(&mbox_conf); 488 | TIME_END("softmax") 489 | 490 | 491 | CDataBlob facesInfo; 492 | TIME_START; 493 | detection_output(&mbox_priorbox, &mbox_loc, &mbox_conf, 0.3f, 0.5f, 100, 50, &facesInfo); 494 | TIME_END("detection output") 495 | 496 | 497 | 498 | TIME_START; 499 | std::vector faces; 500 | for (int i = 0; i < facesInfo.width; i++) 501 | { 502 | float score = facesInfo.getElementFloat(i, 0, 0); 503 | float bbxmin = facesInfo.getElementFloat(i, 0, 1); 504 | float bbymin = facesInfo.getElementFloat(i, 0, 2); 505 | float bbxmax = facesInfo.getElementFloat(i, 0, 3); 506 | float bbymax = facesInfo.getElementFloat(i, 0, 4); 507 | FaceRect r; 508 | r.score = score; 509 | //r.x = int(bbxmin * width + 0.5f); 510 | //r.y = int(bbymin * height + 0.5f); 511 | //r.w = int((bbxmax - bbxmin) * width + 0.5f); 512 | //r.h = int((bbymax - bbymin) * height + 0.5f); 513 | 514 | r.w = int( ((bbxmax - bbxmin) * width + (bbymax - bbymin) * height + 1) / 2); 515 | r.h = r.w; 516 | r.x = int(((bbxmin + bbxmax) * width - r.w + 0.5f) / 2); 517 | r.y = int(((bbymin + bbymax) * height - r.h + 0.5f) / 2); 518 | 519 | faces.push_back(r); 520 | } 521 | TIME_END("copy result"); 522 | 523 | TIME_TOTAL(total); 524 | 525 | return faces; 526 | } 527 | int * facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!! 528 | unsigned char * rgb_image_data, int width, int height, int step) //input image, it must be RGB (three-channel) image! 529 | { 530 | #ifdef __CALL_LIMIT__ 531 | static int call_count = 0; 532 | #endif 533 | 534 | if (!result_buffer) 535 | { 536 | fprintf(stderr, "%s: null buffer memory.\n", __FUNCTION__); 537 | return NULL; 538 | } 539 | //clear memory 540 | //memset(result_buffer, 0, 0x20000); 541 | result_buffer[0] = 0; 542 | result_buffer[1] = 0; 543 | result_buffer[2] = 0; 544 | result_buffer[3] = 0; 545 | 546 | vector faces = objectdetect_cnn(rgb_image_data, width, height, step); 547 | 548 | double t, total=0; 549 | TIME_START; 550 | 551 | int num_faces =(int)faces.size(); 552 | num_faces = MIN(num_faces, 256); 553 | 554 | int * pCount = (int *)result_buffer; 555 | pCount[0] = num_faces; 556 | 557 | for (int i = 0; i < num_faces; i++) 558 | { 559 | short * p = ((short*)(result_buffer + 4)) + 142 * i; 560 | p[0] = (short)faces[i].x; 561 | p[1] = (short)faces[i].y; 562 | p[2] = (short)faces[i].w; 563 | p[3] = (short)faces[i].h; 564 | p[4] = (short)(faces[i].score * faces[i].score * 100); 565 | } 566 | #ifdef __CALL_LIMIT__ 567 | if(call_count>1814403) 568 | { 569 | memset(result_buffer, 0 , 4+284*num_faces); 570 | } 571 | else 572 | call_count++; 573 | #endif 574 | 575 | TIME_END("call detection"); 576 | return pCount; 577 | } 578 | -------------------------------------------------------------------------------- /libfacedetection/src/facedetectcnn.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | By downloading, copying, installing or using the software you agree to this license. 3 | If you do not agree to this license, do not download, install, 4 | copy or use the software. 5 | 6 | 7 | License Agreement For libfacedetection 8 | (3-clause BSD License) 9 | 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved. 11 | shiqi.yu@gmail.com 12 | 13 | Redistribution and use in source and binary forms, with or without modification, 14 | are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright notice, 20 | this list of conditions and the following disclaimer in the documentation 21 | and/or other materials provided with the distribution. 22 | 23 | * Neither the names of the copyright holders nor the names of the contributors 24 | may be used to endorse or promote products derived from this software 25 | without specific prior written permission. 26 | 27 | This software is provided by the copyright holders and contributors "as is" and 28 | any express or implied warranties, including, but not limited to, the implied 29 | warranties of merchantability and fitness for a particular purpose are disclaimed. 30 | In no event shall copyright holders or contributors be liable for any direct, 31 | indirect, incidental, special, exemplary, or consequential damages 32 | (including, but not limited to, procurement of substitute goods or services; 33 | loss of use, data, or profits; or business interruption) however caused 34 | and on any theory of liability, whether in contract, strict liability, 35 | or tort (including negligence or otherwise) arising in any way out of 36 | the use of this software, even if advised of the possibility of such damage. 37 | */ 38 | 39 | #include "facedetectcnn.h" 40 | #include 41 | #include 42 | #include 43 | #include //for FLT_EPSION 44 | #include //for stable_sort, sort 45 | 46 | #if defined( __WIN__) || defined(_WINDOWS) 47 | #define SSE_256ELEMENT(vec, idx) vec.m256_f32[(idx)] 48 | #else 49 | #define SSE_256ELEMENT(vec, idx) vec[(idx)] 50 | #endif 51 | 52 | #if !defined(_ENABLE_OPENMP_SIMD) && ((defined(_OPENMP) && (_OPENMP >= 201307L))) 53 | # define _ENABLE_OPENMP_SIMD 54 | #elif defined(__cilk) 55 | # define _ENABLE_CILKPLUS 56 | #endif 57 | 58 | typedef struct NormalizedBBox_ 59 | { 60 | float xmin; 61 | float ymin; 62 | float xmax; 63 | float ymax; 64 | } NormalizedBBox; 65 | 66 | 67 | void* myAlloc(size_t size) 68 | { 69 | char *ptr, *ptr0; 70 | ptr0 = (char*)malloc( 71 | (size_t)(size + _MALLOC_ALIGN * ((size >= 4096) + 1) + sizeof(char*))); 72 | 73 | if (!ptr0) 74 | return 0; 75 | 76 | // align the pointer 77 | ptr = (char*)(((size_t)(ptr0 + sizeof(char*) + 1) + _MALLOC_ALIGN - 1) & ~(size_t)(_MALLOC_ALIGN - 1)); 78 | *(char**)(ptr - sizeof(char*)) = ptr0; 79 | 80 | return ptr; 81 | } 82 | 83 | 84 | void myFree_(void* ptr) 85 | { 86 | // Pointer must be aligned by _MALLOC_ALIGN 87 | if (ptr) 88 | { 89 | if (((size_t)ptr & (_MALLOC_ALIGN - 1)) != 0) 90 | return; 91 | free(*((char**)ptr - 1)); 92 | } 93 | 94 | } 95 | 96 | 97 | inline float dotProductFloatChGeneral(float* p1, float * p2, int num, int lengthInBytes) 98 | { 99 | #if defined(_ENABLE_NEON) && !defined(_ENABLE_INT8_CONV) 100 | float sum = 0.0f; 101 | float32x4_t a, b; 102 | float32x4_t result_vec; 103 | 104 | result_vec = vdupq_n_f32(0); //zeros 105 | for (int i = 0; i < num; i += 4) 106 | { 107 | a = vld1q_f32(p1 + i); 108 | b = vld1q_f32(p2 + i); 109 | result_vec = vmlaq_f32(result_vec, a, b); 110 | } 111 | sum += vgetq_lane_f32(result_vec, 0); 112 | sum += vgetq_lane_f32(result_vec, 1); 113 | sum += vgetq_lane_f32(result_vec, 2); 114 | sum += vgetq_lane_f32(result_vec, 3); 115 | 116 | return sum; 117 | #elif defined(_ENABLE_AVX2) && !defined(_ENABLE_INT8_CONV) 118 | float sum = 0; 119 | int end = lengthInBytes / sizeof(float); 120 | 121 | __m256 sumvec = _mm256_setzero_ps(); 122 | __m256 avec, bvec; 123 | for (int i = 0; i < end; i += 8) 124 | { 125 | avec = _mm256_load_ps(p1 + i); 126 | bvec = _mm256_load_ps(p2 + i); 127 | //_mm256_fmadd_ps needs FMA support 128 | //but _mm256_add_ps and _mm256_mul_ps only need AVX 129 | 130 | //sumvec = _mm256_add_ps(sumvec, _mm256_mul_ps(avec, bvec)); 131 | 132 | //fmadd is faster than add+mul 133 | sumvec = _mm256_fmadd_ps(avec, bvec, sumvec); 134 | 135 | //Note: _mm256_dp_ps is much slower than the previou line of code 136 | } 137 | sumvec = _mm256_hadd_ps(sumvec, sumvec); 138 | sumvec = _mm256_hadd_ps(sumvec, sumvec); 139 | sum += SSE_256ELEMENT(sumvec, 0); 140 | sum += SSE_256ELEMENT(sumvec, 4); 141 | 142 | return sum; 143 | 144 | #else 145 | float sum = 0; 146 | 147 | #if defined(_ENABLE_OPENMP_SIMD) 148 | #pragma omp simd reduction(+:sum) 149 | #endif 150 | for (int i = 0; i < num; i++) 151 | { 152 | sum += (p1[i] * p2[i]); 153 | } 154 | return sum; 155 | #endif 156 | } 157 | 158 | inline int dotProductInt8ChGeneral(signed char * p1, signed char * p2, int num, int lengthInBytes) 159 | { 160 | #if defined(_ENABLE_NEON) && defined(_ENABLE_INT8_CONV) 161 | //int sum = 0; 162 | //int16x8_t a, b; 163 | //int16x8_t result_vec; 164 | //int32x4_t d; 165 | // 166 | 167 | //result_vec = vdupq_n_s16(0); //zeros 168 | //for (int i = 0; i < num; i += 8) 169 | //{ 170 | // a = vld1q_s16(p1 + i); 171 | // b = vld1q_s16(p2 + i); 172 | // result_vec = vmlaq_s16(result_vec, a, b); 173 | //} 174 | //d = vpaddlq_s16(result_vec); 175 | //sum += vgetq_lane_s32(d, 0); 176 | //sum += vgetq_lane_s32(d, 1); 177 | //sum += vgetq_lane_s32(d, 2); 178 | //sum += vgetq_lane_s32(d, 3); 179 | 180 | //return sum; 181 | 182 | int sum = 0; 183 | int8x8x2_t a, b; 184 | int16x8_t result_vec; 185 | int32x4_t d; 186 | 187 | 188 | result_vec = vdupq_n_s16(0); //zeros 189 | for (int i = 0; i < num; i += 16) 190 | { 191 | a = vld2_s8(p1 + i); 192 | b = vld2_s8(p2 + i); 193 | result_vec = vmlal_s8(result_vec, a.val[0], b.val[0]); 194 | result_vec = vmlal_s8(result_vec, a.val[1], b.val[1]); 195 | } 196 | d = vpaddlq_s16(result_vec); 197 | sum += vgetq_lane_s32(d, 0); 198 | sum += vgetq_lane_s32(d, 1); 199 | sum += vgetq_lane_s32(d, 2); 200 | sum += vgetq_lane_s32(d, 3); 201 | 202 | return sum; 203 | 204 | #elif defined(_ENABLE_AVX2) && defined(_ENABLE_INT8_CONV) 205 | int sum = 0; 206 | int i = 0; 207 | 208 | short sumarray[16]; 209 | 210 | __m256i temp_sum; 211 | __m128i ac, bc; 212 | __m256i as, bs; 213 | for (; i < num; i += 16) 214 | { 215 | ac = _mm_load_si128((__m128i*)(p1 + i)); 216 | bc = _mm_load_si128((__m128i*)(p2 + i)); 217 | as = _mm256_cvtepi8_epi16(ac); 218 | bs = _mm256_cvtepi8_epi16(bc); 219 | temp_sum = _mm256_mullo_epi16(as, bs); 220 | temp_sum = _mm256_hadd_epi16(temp_sum, temp_sum); 221 | temp_sum = _mm256_hadd_epi16(temp_sum, temp_sum); 222 | //temp_sum = _mm256_hadd_epi16(temp_sum, temp_sum); 223 | _mm256_store_si256((__m256i*)sumarray, temp_sum); 224 | //sum += ((int)(sumarray[0]) + (int)(sumarray[8])); 225 | sum += ((int)(sumarray[0]) + (int)(sumarray[1]) + +(int)(sumarray[8]) + (int)(sumarray[9])); 226 | } 227 | return sum; 228 | #else 229 | 230 | int sum = 0; 231 | 232 | #if defined(_ENABLE_OPENMP_SIMD) 233 | #pragma omp simd reduction(+:sum) 234 | #endif 235 | for (int i = 0; i < num; i++) 236 | { 237 | sum += ( int(p1[i]) * int(p2[i])); 238 | } 239 | return sum; 240 | #endif 241 | } 242 | 243 | bool convolutionFloat1x1P0S1(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData) 244 | { 245 | #if defined(_OPENMP) 246 | #pragma omp parallel for 247 | #endif 248 | for (int row = 0; row < outputData->height; row++) 249 | { 250 | for (int col = 0; col < outputData->width; col++) 251 | { 252 | float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float)); 253 | float * pIn = (inputData->data_float + (row*inputData->width + col)*inputData->floatChannelStepInByte / sizeof(float)); 254 | for (int ch = 0; ch < outputData->channels; ch++) 255 | { 256 | float * pF = (float*)(filters->filters[ch]->data_float); 257 | pOut[ch] = dotProductFloatChGeneral(pIn, pF, inputData->channels, inputData->floatChannelStepInByte); 258 | } 259 | } 260 | } 261 | return true; 262 | } 263 | 264 | bool convolutionInt81x1P0S1(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData) 265 | { 266 | #if defined(_OPENMP) 267 | #pragma omp parallel for 268 | #endif 269 | for (int row = 0; row < outputData->height; row++) 270 | { 271 | for (int col = 0; col < outputData->width; col++) 272 | { 273 | float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float)); 274 | signed char * pIn = (inputData->data_int8 + (row*inputData->width + col)*inputData->int8ChannelStepInByte / sizeof(char)); 275 | for (int ch = 0; ch < outputData->channels; ch++) 276 | { 277 | signed char * pF = (filters->filters[ch]->data_int8); 278 | pOut[ch] = (float)dotProductInt8ChGeneral(pIn, pF, inputData->channels, inputData->int8ChannelStepInByte); 279 | } 280 | } 281 | } 282 | return true; 283 | } 284 | 285 | 286 | 287 | bool convolutionFloat3x3P1ChGeneral(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData) 288 | { 289 | #if defined(_OPENMP) 290 | #pragma omp parallel for 291 | #endif 292 | for (int row = 0; row < outputData->height; row++) 293 | { 294 | int elementStepInFloat = inputData->floatChannelStepInByte/sizeof(float); 295 | int stride = filters->stride; 296 | int src_centery = row * stride; 297 | for (int col = 0; col < outputData->width; col++) 298 | { 299 | int srcx_start = col * stride - 1; 300 | int srcx_end = srcx_start + 3; 301 | srcx_start = MAX(0, srcx_start); 302 | srcx_end = MIN(srcx_end, inputData->width); 303 | int num_pixels = srcx_end - srcx_start; 304 | int num_pixels_infloat = (srcx_end - srcx_start) * elementStepInFloat; 305 | 306 | for (int ch = 0; ch < outputData->channels; ch++) 307 | { 308 | int srcy = src_centery - 1; 309 | 310 | float * pIn = (inputData->data_float + (srcy *inputData->width + srcx_start)*elementStepInFloat); 311 | float * pF = (filters->filters[ch]->data_float) + (srcx_start - col*stride + 1) * elementStepInFloat; 312 | float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float)); 313 | pOut[ch] = 0; //the new created blob is not zeros, clear it first 314 | 315 | { 316 | if (srcy >= 0) 317 | { 318 | pOut[ch] += dotProductFloatChGeneral(pIn, 319 | pF, 320 | num_pixels_infloat, 321 | num_pixels_infloat * sizeof(float)); 322 | } 323 | } 324 | { 325 | srcy++; 326 | { 327 | pIn += (inputData->width * elementStepInFloat); 328 | pOut[ch] += dotProductFloatChGeneral(pIn, 329 | pF + ( 3 * elementStepInFloat), 330 | num_pixels_infloat, 331 | num_pixels_infloat * sizeof(float)); 332 | } 333 | } 334 | { 335 | srcy++; 336 | if (srcy < inputData->height) 337 | { 338 | pIn += (inputData->width * elementStepInFloat); 339 | pOut[ch] += dotProductFloatChGeneral(pIn, 340 | pF + ( 6 * elementStepInFloat ), 341 | num_pixels_infloat, 342 | num_pixels_infloat * sizeof(float)); 343 | } 344 | } 345 | } 346 | } 347 | } 348 | return true; 349 | } 350 | 351 | bool convolutionInt83x3P1ChGeneral(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData) 352 | { 353 | #if defined(_OPENMP) 354 | #pragma omp parallel for 355 | #endif 356 | for (int row = 0; row < outputData->height; row++) 357 | { 358 | int elementStep = inputData->int8ChannelStepInByte; 359 | int stride = filters->stride; 360 | int src_centery = row * stride; 361 | for (int col = 0; col < outputData->width; col++) 362 | { 363 | int srcx_start = col * stride - 1; 364 | int srcx_end = srcx_start + 3; 365 | srcx_start = MAX(0, srcx_start); 366 | srcx_end = MIN(srcx_end, inputData->width); 367 | int num_pixels_inbytes = (srcx_end - srcx_start) * elementStep; 368 | 369 | for (int ch = 0; ch < outputData->channels; ch++) 370 | { 371 | int srcy = src_centery - 1; 372 | 373 | signed char * pIn = (inputData->data_int8 + (srcy *inputData->width + srcx_start)*elementStep); 374 | signed char * pF = (filters->filters[ch]->data_int8) + ( (srcx_start - col*stride + 1)) * elementStep; 375 | float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float)); 376 | pOut[ch] = 0;//the new created blob is not zeros, clear it first 377 | 378 | { 379 | if (srcy >= 0) 380 | { 381 | pOut[ch] += dotProductInt8ChGeneral(pIn, 382 | pF, 383 | num_pixels_inbytes, 384 | num_pixels_inbytes); 385 | } 386 | } 387 | { 388 | srcy++; 389 | { 390 | pIn += (inputData->width * elementStep); 391 | pOut[ch] += dotProductInt8ChGeneral(pIn, 392 | pF + (3 * elementStep), 393 | num_pixels_inbytes, 394 | num_pixels_inbytes); 395 | } 396 | } 397 | { 398 | srcy++; 399 | if (srcy < inputData->height) 400 | { 401 | pIn += (inputData->width * elementStep); 402 | pOut[ch] += dotProductInt8ChGeneral(pIn, 403 | pF + (6 * elementStep), 404 | num_pixels_inbytes, 405 | num_pixels_inbytes); 406 | } 407 | } 408 | } 409 | } 410 | } 411 | return true; 412 | } 413 | 414 | bool convertFloat2Int8(CDataBlob * dataBlob) 415 | { 416 | if (dataBlob->data_float == NULL || dataBlob->data_int8 == NULL) 417 | { 418 | cerr << __FUNCTION__ << ": The input data is null." << endl; 419 | return false; 420 | } 421 | 422 | float maxval = -FLT_MAX; 423 | #if defined(_ENABLE_NEON) 424 | float32x4_t maxvalvec = vdupq_n_f32(-FLT_MAX); 425 | float32x4_t scalevec; 426 | #elif defined(_ENABLE_AVX2) 427 | //__m256 maxvalvec = _mm256_set1_ps(-FLT_MAX); 428 | __m256 scalevec; 429 | #endif 430 | 431 | float scale = 1.f; 432 | 433 | if (dataBlob->int8_data_valid) 434 | return true; 435 | 436 | for (int row = 0; row < dataBlob->height; row++) 437 | { 438 | for (int col = 0; col < dataBlob->width; col++) 439 | { 440 | float * pF = (dataBlob->data_float + (row*dataBlob->width + col)*dataBlob->floatChannelStepInByte / sizeof(float)); 441 | 442 | #if defined(_ENABLE_NEON) 443 | for (int ch = 0; ch < dataBlob->channels; ch+=4) 444 | { 445 | float32x4_t a; 446 | a = vld1q_f32(pF + ch); 447 | a = vabsq_f32(a); 448 | maxvalvec = vmaxq_f32(maxvalvec, a); 449 | } 450 | #else 451 | 452 | #if defined(_ENABLE_OPENMP_SIMD) 453 | #pragma omp simd reduction(max:maxval) 454 | #endif 455 | for (int ch = 0; ch < dataBlob->channels; ch++) 456 | { 457 | float tmp; 458 | //tmp = fabs(pF[ch]); 459 | //maxval = MAX(maxval, tmp); 460 | tmp = pF[ch]; 461 | tmp = tmp * ((tmp > 0) * 2 - 1); 462 | maxval = MAX(maxval, tmp); 463 | } 464 | #endif 465 | } 466 | } 467 | #if defined(_ENABLE_NEON) 468 | { 469 | float tmp; 470 | tmp = vgetq_lane_f32(maxvalvec, 0); 471 | maxval = MAX(maxval, tmp); 472 | tmp = vgetq_lane_f32(maxvalvec, 1); 473 | maxval = MAX(maxval, tmp); 474 | tmp = vgetq_lane_f32(maxvalvec, 2); 475 | maxval = MAX(maxval, tmp); 476 | tmp = vgetq_lane_f32(maxvalvec, 3); 477 | maxval = MAX(maxval, tmp); 478 | } 479 | #endif 480 | scale = 127.f / (maxval + FLT_EPSILON); 481 | 482 | #if defined(_ENABLE_NEON) 483 | scalevec = vdupq_n_f32(scale); 484 | #elif defined(_ENABLE_AVX2) 485 | scalevec = _mm256_set1_ps(scale); 486 | #endif 487 | 488 | #if defined(_OPENMP) 489 | #pragma omp parallel for 490 | #endif 491 | for (int row = 0; row < dataBlob->height; row++) 492 | { 493 | for (int col = 0; col < dataBlob->width; col++) 494 | { 495 | float * pF = (dataBlob->data_float + (row*dataBlob->width + col)*dataBlob->floatChannelStepInByte / sizeof(float)); 496 | signed char * pI = (dataBlob->data_int8 + (row*dataBlob->width + col)*dataBlob->int8ChannelStepInByte / sizeof(char)); 497 | 498 | #if defined(_ENABLE_NEON) 499 | for (int ch = 0; ch < dataBlob->channels; ch+=4) 500 | { 501 | float tmp; 502 | float32x4_t a = vld1q_f32(pF + ch); 503 | float32x4_t resultvec = vmulq_f32(a, scalevec); 504 | 505 | ////becuase Floating-point to integer conversions "vcvtq_s32_f32" use round towards zero. 506 | ////but we need round to nearest 507 | ////so we cannot use the following NEON instructions 508 | //int32x4_t int32resultvec = vcvtq_s32_f32(resultvec); 509 | //int16x4_t int16resultvec = vqmovn_s32(int32resultvec); 510 | //vst1_s16(pI + ch, int16resultvec); 511 | 512 | tmp = vgetq_lane_f32(resultvec, 0); 513 | pI[ch] = (signed char)(tmp + ((tmp>0) - 0.5f)); 514 | tmp = vgetq_lane_f32(resultvec, 1); 515 | pI[ch+1] = (signed char)(tmp + ((tmp>0) - 0.5f)); 516 | tmp = vgetq_lane_f32(resultvec, 2); 517 | pI[ch+2] = (signed char)(tmp + ((tmp>0) - 0.5f)); 518 | tmp = vgetq_lane_f32(resultvec, 3); 519 | pI[ch+3] = (signed char)(tmp + ((tmp>0) - 0.5f)); 520 | } 521 | #else 522 | #if defined(_ENABLE_OPENMP_SIMD) 523 | #pragma omp simd 524 | #endif 525 | for (int ch = 0; ch < dataBlob->channels; ch++) 526 | { 527 | float tmp; 528 | //pI[ch] = (signed char)round(pF[ch] * scale); 529 | //to speedup round() using the following code 530 | tmp = pF[ch]; 531 | pI[ch] = (signed char)(tmp * scale + ((tmp>0)-0.5f)); 532 | } 533 | #endif 534 | } 535 | } 536 | dataBlob->int8float_scale = scale; 537 | dataBlob->int8_data_valid = true; 538 | 539 | return true; 540 | } 541 | 542 | 543 | bool convolution(CDataBlob *inputData, const Filters* filters, CDataBlob *outputData) 544 | { 545 | if (inputData->data_float == NULL || inputData->data_int8 == NULL) 546 | { 547 | cerr << __FUNCTION__ << ": The input data is null." << endl; 548 | return false; 549 | } 550 | if (filters->filters.size() == 0) 551 | { 552 | cerr << __FUNCTION__ << ": There is not filters." << endl; 553 | return false; 554 | } 555 | //check filters' dimensions 556 | int filterW = filters->filters[0]->width; 557 | int filterH = filters->filters[0]->height; 558 | int filterC = filters->filters[0]->channels; 559 | int filterS = filters->stride; 560 | int filterP = filters->pad; 561 | 562 | int outputW = 0; 563 | int outputH = 0; 564 | int outputC = (int)filters->filters.size(); 565 | 566 | for (int i = 1; i < outputC; i++) 567 | { 568 | if ((filterW != filters->filters[i]->width) || 569 | (filterH != filters->filters[i]->height) || 570 | (filterC != filters->filters[i]->channels)) 571 | { 572 | cerr << __FUNCTION__ << ": The filters must be the same size." << endl; 573 | return false; 574 | } 575 | } 576 | 577 | if (filterC != inputData->channels) 578 | { 579 | cerr << __FUNCTION__ << ": The number of channels of filters must be the same with the input data. " << filterC << " vs " << inputData->channels << endl; 580 | return false; 581 | } 582 | 583 | //calculate the output dimension 584 | if (filterW == 1 && filterH == 1) //1x1 filters 585 | { 586 | if (filterS != 1) 587 | { 588 | cerr << __FUNCTION__ << ": Onle stride = 1 is supported for 1x1 filters." << endl; 589 | return false; 590 | } 591 | if (filterP != 0) 592 | { 593 | cerr << __FUNCTION__ << ": Onle pad = 0 is supported for 1x1 filters." << endl; 594 | return false; 595 | } 596 | outputW = inputData->width; 597 | outputH = inputData->height; 598 | 599 | } 600 | else if (filterW == 3 && filterH == 3) //3x3 filters 601 | { 602 | if (filterS == 1 && filterP == 1) 603 | { 604 | outputW = inputData->width; 605 | outputH = inputData->height; 606 | } 607 | else if (filterS == 2 && filterP == 1) 608 | { 609 | outputW = (inputData->width + 1) / 2; 610 | outputH = (inputData->height + 1) / 2; 611 | } 612 | else 613 | { 614 | cerr << __FUNCTION__ << ": Unspported filter stride=" << filterS << " or pad=" << filterP << endl; 615 | cerr << __FUNCTION__ << ": For 3x3 filters, only pad=1 and stride={1,2} are supported." << endl; 616 | return false; 617 | } 618 | } 619 | else 620 | { 621 | cerr << __FUNCTION__ << ": Unsported filter size." << endl; 622 | return false; 623 | } 624 | 625 | if (outputW < 1 || outputH < 1) 626 | { 627 | cerr << __FUNCTION__ << ": The size of the output is not correct. (" << outputW << ", " << outputH << ")." << endl; 628 | return false; 629 | } 630 | 631 | outputData->create(outputW, outputH, outputC); 632 | 633 | /* 634 | { 635 | float maxval = -FLT_MAX; 636 | float minval = FLT_MAX; 637 | 638 | for (int row = 0; row < inputData->height; row++) 639 | { 640 | for (int col = 0; col < inputData->width; col++) 641 | { 642 | float * pF = (inputData->data_float + (row*inputData->width + col)*inputData->floatChannelStepInByte / sizeof(float)); 643 | for (int ch = 0; ch < inputData->channels; ch++) 644 | { 645 | maxval = MAX(maxval, pF[ch]); 646 | minval = MIN(minval, pF[ch]); 647 | } 648 | } 649 | } 650 | cout << "\t\t\t\tconv range [min, max]=[" << minval << ", " << maxval << "]" << endl; 651 | } 652 | */ 653 | #if defined(_ENABLE_INT8_CONV) 654 | convertFloat2Int8(inputData); 655 | #endif 656 | 657 | if (filterW == 1 && filterH == 1) //1x1 filters 658 | { 659 | #if defined(_ENABLE_INT8_CONV) 660 | convolutionInt81x1P0S1(inputData, filters, outputData); 661 | #else 662 | convolutionFloat1x1P0S1(inputData, filters, outputData); 663 | #endif 664 | } 665 | else if (filterW == 3 && filterH == 3) //3x3 filters 666 | { 667 | #if defined(_ENABLE_INT8_CONV) 668 | convolutionInt83x3P1ChGeneral(inputData, filters, outputData); 669 | #else 670 | convolutionFloat3x3P1ChGeneral(inputData, filters, outputData); 671 | #endif 672 | } 673 | 674 | #if defined(_ENABLE_INT8_CONV) 675 | scale(outputData, 1.0f / (inputData->int8float_scale * filters->scale)); 676 | #endif 677 | 678 | return true; 679 | } 680 | 681 | //only 2X2 S2 is supported 682 | bool maxpooling2x2S2(const CDataBlob *inputData, CDataBlob *outputData) 683 | { 684 | if (inputData->data_float == NULL) 685 | { 686 | cerr << __FUNCTION__ << ": The input data is null." << endl; 687 | return false; 688 | } 689 | int outputW = static_cast(ceil((inputData->width - 3.0f) / 2)) + 1; 690 | int outputH = static_cast(ceil((inputData->height - 3.0f) / 2)) + 1; 691 | int outputC = inputData->channels; 692 | 693 | if (outputW < 1 || outputH < 1) 694 | { 695 | cerr << __FUNCTION__ << ": The size of the output is not correct. (" << outputW << ", " << outputH << ")." << endl; 696 | return false; 697 | } 698 | 699 | int elementStep = inputData->floatChannelStepInByte / sizeof(float); 700 | int lineElementStep = inputData->width * elementStep; 701 | 702 | outputData->create(outputW, outputH, outputC); 703 | 704 | for (int row = 0; row < outputData->height; row++) 705 | { 706 | for (int col = 0; col < outputData->width; col++) 707 | { 708 | int inputMatOffsetsInElement[4]; 709 | int elementCount = 0; 710 | 711 | int hstart = row * 2; 712 | int wstart = col * 2; 713 | int hend = MIN(hstart + 2, inputData->height); 714 | int wend = MIN(wstart + 2, inputData->width); 715 | 716 | for (int fy = hstart; fy < hend; fy++) 717 | for (int fx = wstart; fx < wend; fx++) 718 | { 719 | inputMatOffsetsInElement[elementCount++] = (fy *inputData->width + fx) * inputData->floatChannelStepInByte / sizeof(float); 720 | } 721 | 722 | float * pOut = outputData->data_float + (row*outputData->width + col) * outputData->floatChannelStepInByte / sizeof(float); 723 | float * pIn = inputData->data_float; 724 | 725 | #if defined(_ENABLE_NEON) 726 | for (int ch = 0; ch < outputData->channels; ch += 4) 727 | { 728 | float32x4_t a; 729 | float32x4_t maxval = vld1q_f32(pIn + ch + inputMatOffsetsInElement[0]); 730 | for (int el = 1; el < elementCount; el++) 731 | { 732 | a = vld1q_f32(pIn + ch + inputMatOffsetsInElement[el]); 733 | maxval = vmaxq_f32(maxval, a); 734 | } 735 | vst1q_f32(pOut + ch, maxval); 736 | } 737 | #elif defined(_ENABLE_AVX2) 738 | for (int ch = 0; ch < outputData->channels; ch += 8) 739 | { 740 | __m256 a; 741 | __m256 maxval = _mm256_load_ps(pIn + ch + inputMatOffsetsInElement[0]); 742 | for (int el = 1; el < elementCount; el++) 743 | { 744 | a = _mm256_load_ps(pIn + ch + inputMatOffsetsInElement[el]); 745 | maxval = _mm256_max_ps(maxval, a); 746 | } 747 | _mm256_store_ps(pOut + ch, maxval); 748 | } 749 | #else 750 | 751 | for (int ch = 0; ch < outputData->channels; ch++) 752 | { 753 | float maxval = pIn[ch + inputMatOffsetsInElement[0]]; 754 | #if defined(_ENABLE_OPENMP_SIMD) 755 | #pragma omp simd reduction(max:maxval) 756 | #endif 757 | for (int el = 1; el < elementCount; el++) 758 | { 759 | maxval = MAX(maxval, pIn[ch + inputMatOffsetsInElement[el]]); 760 | } 761 | pOut[ch] = maxval; 762 | } 763 | #endif 764 | } 765 | } 766 | 767 | return true; 768 | } 769 | 770 | 771 | 772 | bool concat4(const CDataBlob *inputData1, const CDataBlob *inputData2, const CDataBlob *inputData3, const CDataBlob *inputData4, CDataBlob *outputData) 773 | { 774 | if ((inputData1->data_float == NULL) || (inputData2->data_float == NULL) || (inputData3->data_float == NULL) || (inputData4->data_float == NULL)) 775 | { 776 | cerr << __FUNCTION__ << ": The input data is null." << endl; 777 | return false; 778 | } 779 | 780 | if ((inputData1->width != inputData2->width) || 781 | (inputData1->height != inputData2->height) || 782 | (inputData1->width != inputData3->width) || 783 | (inputData1->height != inputData3->height) || 784 | (inputData1->width != inputData4->width) || 785 | (inputData1->height != inputData4->height)) 786 | { 787 | cerr << __FUNCTION__ << ": The three inputs must have the same size." << endl; 788 | return false; 789 | } 790 | int outputW = inputData1->width; 791 | int outputH = inputData1->height; 792 | int outputC = inputData1->channels + inputData2->channels + inputData3->channels + inputData4->channels; 793 | 794 | if (outputW < 1 || outputH < 1 || outputC < 1) 795 | { 796 | cerr << __FUNCTION__ << ": The size of the output is not correct. (" << outputW << ", " << outputH << ", " << outputC << ")." << endl; 797 | return false; 798 | } 799 | 800 | outputData->create(outputW, outputH, outputC); 801 | 802 | for (int row = 0; row < outputData->height; row++) 803 | { 804 | for (int col = 0; col < outputData->width; col++) 805 | { 806 | float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float)); 807 | float * pIn1 = (inputData1->data_float + (row*inputData1->width + col)*inputData1->floatChannelStepInByte / sizeof(float)); 808 | float * pIn2 = (inputData2->data_float + (row*inputData2->width + col)*inputData2->floatChannelStepInByte / sizeof(float)); 809 | float * pIn3 = (inputData3->data_float + (row*inputData3->width + col)*inputData3->floatChannelStepInByte / sizeof(float)); 810 | float * pIn4 = (inputData4->data_float + (row*inputData4->width + col)*inputData4->floatChannelStepInByte / sizeof(float)); 811 | 812 | memcpy(pOut, pIn1, sizeof(float)* inputData1->channels); 813 | memcpy(pOut + inputData1->channels, pIn2, sizeof(float)* inputData2->channels); 814 | memcpy(pOut + inputData1->channels + inputData2->channels, pIn3, sizeof(float)* inputData3->channels); 815 | memcpy(pOut + inputData1->channels + inputData2->channels + inputData3->channels, pIn4, sizeof(float)* inputData4->channels); 816 | } 817 | } 818 | return true; 819 | } 820 | 821 | bool scale(CDataBlob * dataBlob, float scale) 822 | { 823 | if (dataBlob->data_float == NULL || dataBlob->data_int8 == NULL) 824 | { 825 | cerr << __FUNCTION__ << ": The input data is null." << endl; 826 | return false; 827 | } 828 | 829 | for (int row = 0; row < dataBlob->height; row++) 830 | { 831 | for (int col = 0; col < dataBlob->width; col++) 832 | { 833 | float * pF = (dataBlob->data_float + (row*dataBlob->width + col)*dataBlob->floatChannelStepInByte / sizeof(float)); 834 | #if defined(_ENABLE_NEON) 835 | float32x4_t a, bscale; 836 | float32x4_t result_vec; 837 | 838 | bscale = vdupq_n_f32(scale); 839 | for (int ch = 0; ch < dataBlob->channels; ch+=4) 840 | { 841 | a = vld1q_f32(pF + ch); 842 | result_vec = vmulq_f32(a, bscale); 843 | vst1q_f32(pF + ch, result_vec); 844 | } 845 | #elif defined(_ENABLE_AVX2) 846 | __m256 a, bscale; 847 | 848 | bscale = _mm256_set1_ps(scale); 849 | for (int ch = 0; ch < dataBlob->channels; ch += 8) 850 | { 851 | a = _mm256_load_ps(pF + ch); 852 | a = _mm256_mul_ps(a, bscale); 853 | _mm256_store_ps(pF + ch, a); 854 | } 855 | 856 | #else 857 | #if defined(_ENABLE_OPENMP_SIMD) 858 | #pragma omp simd 859 | #endif 860 | for (int ch = 0; ch < dataBlob->channels; ch++) 861 | { 862 | pF[ch] *= scale; 863 | } 864 | #endif 865 | } 866 | } 867 | return true; 868 | } 869 | 870 | bool relu(const CDataBlob *inputOutputData) 871 | { 872 | if (inputOutputData->data_float == NULL) 873 | { 874 | cerr << __FUNCTION__ << ": The input data is null." << endl; 875 | return false; 876 | } 877 | 878 | 879 | for (int row = 0; row < inputOutputData->height; row++) 880 | { 881 | for (int col = 0; col < inputOutputData->width; col++) 882 | { 883 | float * pData = (float*)(inputOutputData->data_float + (row*inputOutputData->width + col)*inputOutputData->floatChannelStepInByte / sizeof(float)); 884 | 885 | #if defined(_ENABLE_NEON) 886 | float32x4_t a, bzeros; 887 | float32x4_t result_vec; 888 | 889 | bzeros = vdupq_n_f32(0); //zeros 890 | for (int ch = 0; ch < inputOutputData->channels; ch += 4) 891 | { 892 | a = vld1q_f32(pData + ch); 893 | result_vec = vmaxq_f32(a, bzeros); 894 | vst1q_f32(pData + ch, result_vec); 895 | } 896 | #elif defined(_ENABLE_AVX2) 897 | __m256 a, bzeros; 898 | 899 | bzeros = _mm256_setzero_ps(); //zeros 900 | for (int ch = 0; ch < inputOutputData->channels; ch += 8) 901 | { 902 | a = _mm256_load_ps(pData + ch); 903 | a = _mm256_max_ps(a, bzeros); 904 | _mm256_store_ps(pData + ch, a); 905 | } 906 | #else 907 | #if defined(_ENABLE_OPENMP_SIMD) 908 | #pragma omp simd 909 | #endif 910 | for (int ch = 0; ch < inputOutputData->channels; ch++) 911 | pData[ch] = MAX(pData[ch], 0); 912 | #endif 913 | } 914 | } 915 | return true; 916 | } 917 | 918 | bool priorbox(const CDataBlob * featureData, const CDataBlob * imageData, int num_sizes, float * pWinSizes, CDataBlob * outputData) 919 | { 920 | if ((featureData->data_float == NULL) || 921 | imageData->data_float == NULL|| 922 | pWinSizes == NULL) 923 | { 924 | cerr << __FUNCTION__ << ": The input data is null." << endl; 925 | return false; 926 | } 927 | 928 | int feature_width = featureData->width; 929 | int feature_height = featureData->height; 930 | int image_width = imageData->width * 2; 931 | int image_height = imageData->height * 2; 932 | 933 | float step_w = static_cast(image_width) / feature_width; 934 | float step_h = static_cast(image_height) / feature_height; 935 | 936 | float * output_data = outputData->data_float; 937 | 938 | // outputData->create(feature_width, feature_height, num_sizes * 4 * 2); 939 | outputData->create(feature_width, feature_height, num_sizes * 4); 940 | 941 | for (int h = 0; h < feature_height; ++h) 942 | { 943 | for (int w = 0; w < feature_width; ++w) 944 | { 945 | float * pOut = (float*)(outputData->data_float + ( h * outputData->width + w) * outputData->floatChannelStepInByte / sizeof(float)); 946 | int idx = 0; 947 | //priorbox 948 | for (int s = 0; s < num_sizes; s++) 949 | { 950 | float min_size_ = pWinSizes[s]; 951 | float box_width, box_height; 952 | box_width = box_height = min_size_; 953 | 954 | float center_x = w * step_w + step_w / 2.0f; 955 | float center_y = h * step_h + step_h / 2.0f; 956 | // xmin 957 | pOut[idx++] = (center_x - box_width / 2.f) / image_width; 958 | // ymin 959 | pOut[idx++] = (center_y - box_height / 2.f) / image_height; 960 | // xmax 961 | pOut[idx++] = (center_x + box_width / 2.f) / image_width; 962 | // ymax 963 | pOut[idx++] = (center_y + box_height / 2.f) / image_height; 964 | 965 | } 966 | } 967 | } 968 | 969 | 970 | return true; 971 | } 972 | 973 | bool normalize(CDataBlob * inputOutputData, float * pScale) 974 | { 975 | if ((inputOutputData->data_float == NULL) || pScale == NULL) 976 | { 977 | cerr << __FUNCTION__ << ": The input data is null." << endl; 978 | return false; 979 | } 980 | 981 | 982 | for (int row = 0; row < inputOutputData->height; row++) 983 | { 984 | for (int col = 0; col < inputOutputData->width; col++) 985 | { 986 | float * pData = (float*)(inputOutputData->data_float + (row*inputOutputData->width + col)*inputOutputData->floatChannelStepInByte / sizeof(float)); 987 | float sum = FLT_EPSILON; 988 | float s = 0; 989 | #if defined(_ENABLE_NEON) 990 | float32x4_t a, b, cscale; 991 | float32x4_t result_vec; 992 | for (int ch = 0; ch < inputOutputData->channels; ch += 4) 993 | { 994 | a = vld1q_f32(pData + ch); 995 | result_vec = vmulq_f32(a, a); 996 | sum += vgetq_lane_f32(result_vec, 0); 997 | sum += vgetq_lane_f32(result_vec, 1); 998 | sum += vgetq_lane_f32(result_vec, 2); 999 | sum += vgetq_lane_f32(result_vec, 3); 1000 | } 1001 | 1002 | s = 1.0f/sqrt(sum); 1003 | cscale = vdupq_n_f32(s); 1004 | 1005 | for (int ch = 0; ch < inputOutputData->channels; ch += 4) 1006 | { 1007 | a = vld1q_f32(pData + ch); 1008 | b = vld1q_f32(pScale + ch); 1009 | 1010 | result_vec = vmulq_f32(a, b); 1011 | result_vec = vmulq_f32(result_vec, cscale); 1012 | vst1q_f32(pData + ch, result_vec); 1013 | } 1014 | #elif defined(_ENABLE_AVX2) 1015 | __m256 a, b, cscale; 1016 | __m256 result_vec; 1017 | for (int ch = 0; ch < inputOutputData->channels; ch += 8) 1018 | { 1019 | a = _mm256_load_ps(pData + ch); 1020 | a = _mm256_mul_ps(a, a); 1021 | a = _mm256_hadd_ps(a, a); 1022 | a = _mm256_hadd_ps(a, a); 1023 | sum += SSE_256ELEMENT(a, 0); 1024 | sum += SSE_256ELEMENT(a, 4); 1025 | } 1026 | 1027 | s = 1.0f / sqrt(sum); 1028 | cscale = _mm256_set1_ps(s); 1029 | 1030 | for (int ch = 0; ch < inputOutputData->channels; ch += 8) 1031 | { 1032 | a = _mm256_load_ps(pData + ch); 1033 | b = _mm256_load_ps(pScale + ch); 1034 | 1035 | result_vec = _mm256_mul_ps(a, b); 1036 | result_vec = _mm256_mul_ps(result_vec, cscale); 1037 | _mm256_store_ps(pData + ch, result_vec); 1038 | } 1039 | #else 1040 | 1041 | #if defined(_ENABLE_OPENMP_SIMD) 1042 | #pragma omp simd reduction(+:sum) 1043 | #endif 1044 | for (int ch = 0; ch < inputOutputData->channels; ch++) 1045 | sum += (pData[ch] * pData[ch]); 1046 | 1047 | s = 1.0f/sqrt(sum); 1048 | #if defined(_ENABLE_OPENMP_SIMD) 1049 | #pragma omp simd 1050 | #endif 1051 | for (int ch = 0; ch < inputOutputData->channels; ch++) 1052 | pData[ch] = pData[ch] * pScale[ch] * s; 1053 | #endif 1054 | } 1055 | } 1056 | return true; 1057 | 1058 | } 1059 | 1060 | bool softmax1vector2class(const CDataBlob *inputOutputData) 1061 | { 1062 | if (inputOutputData->data_float == NULL) 1063 | { 1064 | cerr << __FUNCTION__ << ": The input data is null." << endl; 1065 | return false; 1066 | } 1067 | 1068 | if(inputOutputData->width != 1 || inputOutputData->height != 1) 1069 | { 1070 | cerr << __FUNCTION__ << ": The input data must be Cx1x1." << endl; 1071 | return false; 1072 | } 1073 | 1074 | int num = inputOutputData->channels; 1075 | float * pData = (inputOutputData->data_float); 1076 | 1077 | #if defined(_OPENMP) 1078 | #pragma omp parallel for 1079 | #endif 1080 | for(int i = 0; i < num; i+= 2) 1081 | { 1082 | float v1 = pData[i]; 1083 | float v2 = pData[i+1]; 1084 | float vm = MAX(v1, v2); 1085 | v1 -= vm; 1086 | v2 -= vm; 1087 | v1 = expf(v1); 1088 | v2 = expf(v2); 1089 | vm = v1 + v2; 1090 | pData[i] = v1/vm; 1091 | pData[i+1] = v2/vm; 1092 | } 1093 | return true; 1094 | } 1095 | 1096 | bool blob2vector(const CDataBlob * inputData, CDataBlob * outputData, bool isFloat) 1097 | { 1098 | if (inputData->data_float == NULL) 1099 | { 1100 | cerr << __FUNCTION__ << ": The input data is null." << endl; 1101 | return false; 1102 | } 1103 | 1104 | outputData->create(1, 1, inputData->width * inputData->height * inputData->channels); 1105 | 1106 | if (isFloat) 1107 | { 1108 | int bytesOfAChannel = inputData->channels * sizeof(float); 1109 | float * pOut = outputData->data_float; 1110 | for (int row = 0; row < inputData->height; row++) 1111 | { 1112 | for (int col = 0; col < inputData->width; col++) 1113 | { 1114 | float * pIn = (inputData->data_float + (row*inputData->width + col)*inputData->floatChannelStepInByte / sizeof(float)); 1115 | memcpy(pOut, pIn, bytesOfAChannel); 1116 | pOut += inputData->channels; 1117 | } 1118 | } 1119 | } 1120 | else 1121 | { 1122 | int bytesOfAChannel = inputData->channels * sizeof(char); 1123 | signed char * pOut = outputData->data_int8; 1124 | for (int row = 0; row < inputData->height; row++) 1125 | { 1126 | for (int col = 0; col < inputData->width; col++) 1127 | { 1128 | signed char * pIn = (inputData->data_int8 + (row*inputData->width + col)*inputData->int8ChannelStepInByte / sizeof(char)); 1129 | memcpy(pOut, pIn, bytesOfAChannel); 1130 | pOut += inputData->channels; 1131 | } 1132 | } 1133 | } 1134 | 1135 | return true; 1136 | 1137 | } 1138 | 1139 | void IntersectBBox(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2, 1140 | NormalizedBBox* intersect_bbox) 1141 | { 1142 | if (bbox2.xmin > bbox1.xmax || bbox2.xmax < bbox1.xmin || 1143 | bbox2.ymin > bbox1.ymax || bbox2.ymax < bbox1.ymin) 1144 | { 1145 | // Return [0, 0, 0, 0] if there is no intersection. 1146 | intersect_bbox->xmin = 0; 1147 | intersect_bbox->ymin = 0; 1148 | intersect_bbox->xmax = 0; 1149 | intersect_bbox->ymax = 0; 1150 | } 1151 | else 1152 | { 1153 | intersect_bbox->xmin = (std::max(bbox1.xmin, bbox2.xmin)); 1154 | intersect_bbox->ymin = (std::max(bbox1.ymin, bbox2.ymin)); 1155 | intersect_bbox->xmax = (std::min(bbox1.xmax, bbox2.xmax)); 1156 | intersect_bbox->ymax = (std::min(bbox1.ymax, bbox2.ymax)); 1157 | } 1158 | } 1159 | 1160 | float JaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2) 1161 | { 1162 | NormalizedBBox intersect_bbox; 1163 | IntersectBBox(bbox1, bbox2, &intersect_bbox); 1164 | float intersect_width, intersect_height; 1165 | intersect_width = intersect_bbox.xmax - intersect_bbox.xmin; 1166 | intersect_height = intersect_bbox.ymax - intersect_bbox.ymin; 1167 | 1168 | if (intersect_width > 0 && intersect_height > 0) 1169 | { 1170 | float intersect_size = intersect_width * intersect_height; 1171 | float bsize1 = (bbox1.xmax - bbox1.xmin)*(bbox1.ymax - bbox1.ymin); 1172 | float bsize2 = (bbox2.xmax - bbox2.xmin)*(bbox2.ymax - bbox2.ymin); 1173 | return intersect_size / ( bsize1 + bsize2 - intersect_size); 1174 | } 1175 | else 1176 | { 1177 | return 0.f; 1178 | } 1179 | } 1180 | 1181 | bool SortScoreBBoxPairDescend(const pair& pair1, const pair& pair2) 1182 | { 1183 | return pair1.first > pair2.first; 1184 | } 1185 | 1186 | 1187 | bool detection_output(const CDataBlob * priorbox, const CDataBlob * loc, const CDataBlob * conf, float overlap_threshold, float confidence_threshold, int top_k, int keep_top_k, CDataBlob * outputData) 1188 | { 1189 | if (priorbox->data_float == NULL || loc->data_float == NULL || conf->data_float == NULL) 1190 | { 1191 | cerr << __FUNCTION__ << ": The input data is null." << endl; 1192 | return 0; 1193 | } 1194 | 1195 | if (priorbox->channels != loc->channels || loc->channels != conf->channels*2 ) 1196 | { 1197 | cerr << __FUNCTION__ << ": The sizes of the inputs are not match." << endl; 1198 | return 0; 1199 | } 1200 | 1201 | float prior_variance[4] = {0.1f, 0.1f, 0.2f, 0.2f}; 1202 | float * pPriorBox = priorbox->data_float; 1203 | float * pLoc = loc->data_float; 1204 | float * pConf = conf->data_float; 1205 | 1206 | vector > score_bbox_vec; 1207 | vector > final_score_bbox_vec; 1208 | 1209 | //get the candidates those are > confidence_threshold 1210 | for(int i = 1; i < conf->channels; i+=2) 1211 | { 1212 | if(pConf[i] > confidence_threshold) 1213 | { 1214 | float fx1 = pPriorBox[i*2-2]; 1215 | float fy1 = pPriorBox[i*2-1]; 1216 | float fx2 = pPriorBox[i*2]; 1217 | float fy2 = pPriorBox[i*2+1]; 1218 | 1219 | float locx1 = pLoc[i * 2 - 2]; 1220 | float locy1 = pLoc[i * 2 - 1]; 1221 | float locx2 = pLoc[i * 2]; 1222 | float locy2 = pLoc[i * 2 + 1]; 1223 | 1224 | float prior_width = fx2 - fx1; 1225 | float prior_height = fy2 - fy1; 1226 | float prior_center_x = (fx1 + fx2)/2; 1227 | float prior_center_y = (fy1 + fy2)/2; 1228 | 1229 | float box_centerx = prior_variance[0] * locx1 * prior_width + prior_center_x; 1230 | float box_centery = prior_variance[1] * locy1 * prior_height + prior_center_y; 1231 | float box_width = expf(prior_variance[2] * locx2) * prior_width; 1232 | float box_height = expf(prior_variance[3] * locy2) * prior_height; 1233 | 1234 | fx1 = box_centerx - box_width / 2.f; 1235 | fy1 = box_centery - box_height /2.f; 1236 | fx2 = box_centerx + box_width / 2.f; 1237 | fy2 = box_centery + box_height /2.f; 1238 | 1239 | fx1 = MAX(0, fx1); 1240 | fy1 = MAX(0, fy1); 1241 | fx2 = MIN(1.f, fx2); 1242 | fy2 = MIN(1.f, fy2); 1243 | 1244 | NormalizedBBox bb; 1245 | bb.xmin = fx1; 1246 | bb.ymin = fy1; 1247 | bb.xmax = fx2; 1248 | bb.ymax = fy2; 1249 | 1250 | score_bbox_vec.push_back(std::make_pair(pConf[i], bb)); 1251 | } 1252 | } 1253 | 1254 | //Sort the score pair according to the scores in descending order 1255 | std::stable_sort(score_bbox_vec.begin(), score_bbox_vec.end(), SortScoreBBoxPairDescend); 1256 | 1257 | // Keep top_k scores if needed. 1258 | if (top_k > -1 && top_k < score_bbox_vec.size()) { 1259 | score_bbox_vec.resize(top_k); 1260 | } 1261 | 1262 | //Do NMS 1263 | final_score_bbox_vec.clear(); 1264 | while (score_bbox_vec.size() != 0) { 1265 | const NormalizedBBox bb1 = score_bbox_vec.front().second; 1266 | bool keep = true; 1267 | for (int k = 0; k < final_score_bbox_vec.size(); ++k) 1268 | { 1269 | if (keep) 1270 | { 1271 | const NormalizedBBox bb2 = final_score_bbox_vec[k].second; 1272 | float overlap = JaccardOverlap(bb1, bb2); 1273 | keep = (overlap <= overlap_threshold); 1274 | } 1275 | else 1276 | { 1277 | break; 1278 | } 1279 | } 1280 | if (keep) { 1281 | final_score_bbox_vec.push_back(score_bbox_vec.front()); 1282 | } 1283 | score_bbox_vec.erase(score_bbox_vec.begin()); 1284 | } 1285 | if (keep_top_k > -1 && keep_top_k < final_score_bbox_vec.size()) { 1286 | final_score_bbox_vec.resize(keep_top_k); 1287 | } 1288 | 1289 | //copy the results to the output blob 1290 | int num_faces = (int)final_score_bbox_vec.size(); 1291 | if (num_faces == 0) 1292 | outputData->setNULL(); 1293 | else 1294 | { 1295 | outputData->create(num_faces, 1, 5); 1296 | for (int fi = 0; fi < num_faces; fi++) 1297 | { 1298 | pair pp = final_score_bbox_vec[fi]; 1299 | float * pOut = (outputData->data_float + fi * outputData->floatChannelStepInByte / sizeof(float)); 1300 | pOut[0] = pp.first; 1301 | pOut[1] = pp.second.xmin; 1302 | pOut[2] = pp.second.ymin; 1303 | pOut[3] = pp.second.xmax; 1304 | pOut[4] = pp.second.ymax; 1305 | } 1306 | } 1307 | 1308 | return true; 1309 | } 1310 | 1311 | -------------------------------------------------------------------------------- /libfacedetection/src/facedetectcnn.h: -------------------------------------------------------------------------------- 1 | /* 2 | By downloading, copying, installing or using the software you agree to this license. 3 | If you do not agree to this license, do not download, install, 4 | copy or use the software. 5 | 6 | 7 | License Agreement For libfacedetection 8 | (3-clause BSD License) 9 | 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved. 11 | shiqi.yu@gmail.com 12 | 13 | Redistribution and use in source and binary forms, with or without modification, 14 | are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright notice, 20 | this list of conditions and the following disclaimer in the documentation 21 | and/or other materials provided with the distribution. 22 | 23 | * Neither the names of the copyright holders nor the names of the contributors 24 | may be used to endorse or promote products derived from this software 25 | without specific prior written permission. 26 | 27 | This software is provided by the copyright holders and contributors "as is" and 28 | any express or implied warranties, including, but not limited to, the implied 29 | warranties of merchantability and fitness for a particular purpose are disclaimed. 30 | In no event shall copyright holders or contributors be liable for any direct, 31 | indirect, incidental, special, exemplary, or consequential damages 32 | (including, but not limited to, procurement of substitute goods or services; 33 | loss of use, data, or profits; or business interruption) however caused 34 | and on any theory of liability, whether in contract, strict liability, 35 | or tort (including negligence or otherwise) arising in any way out of 36 | the use of this software, even if advised of the possibility of such damage. 37 | */ 38 | 39 | #pragma once 40 | 41 | //#define _ENABLE_AVX2 //Please enable it if X64 CPU 42 | //#define _ENABLE_NEON //Please enable it if ARM CPU 43 | 44 | 45 | int * facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!! 46 | unsigned char * rgb_image_data, int width, int height, int step); //input image, it must be RGB (three-channel) image! 47 | 48 | 49 | 50 | 51 | //DO NOT EDIT the following code if you don't really understand it. 52 | 53 | #if defined(_ENABLE_AVX2) 54 | #include 55 | #endif 56 | 57 | #if defined(_ENABLE_NEON) 58 | #include "arm_neon.h" 59 | #define _ENABLE_INT8_CONV 60 | #endif 61 | 62 | #if defined(_ENABLE_AVX2) 63 | #define _MALLOC_ALIGN 256 64 | #else 65 | #define _MALLOC_ALIGN 128 66 | #endif 67 | 68 | #if defined(_ENABLE_AVX2)&& defined(_ENABLE_NEON) 69 | #error Cannot enable the two of SSE2 AVX and NEON at the same time. 70 | #endif 71 | 72 | 73 | #if defined(_OPENMP) 74 | #include 75 | #endif 76 | 77 | 78 | #include 79 | #include 80 | #include 81 | 82 | using namespace std; 83 | 84 | void* myAlloc(size_t size); 85 | void myFree_(void* ptr); 86 | #define myFree(ptr) (myFree_(*(ptr)), *(ptr)=0); 87 | 88 | #ifndef MIN 89 | # define MIN(a,b) ((a) > (b) ? (b) : (a)) 90 | #endif 91 | 92 | #ifndef MAX 93 | # define MAX(a,b) ((a) < (b) ? (b) : (a)) 94 | #endif 95 | 96 | typedef struct FaceRect_ 97 | { 98 | float score; 99 | int x; 100 | int y; 101 | int w; 102 | int h; 103 | }FaceRect; 104 | 105 | 106 | class CDataBlob 107 | { 108 | public: 109 | float * data_float; 110 | signed char * data_int8; 111 | int width; 112 | int height; 113 | int channels; 114 | int floatChannelStepInByte; 115 | int int8ChannelStepInByte; 116 | float int8float_scale; 117 | bool int8_data_valid; 118 | public: 119 | CDataBlob() { 120 | data_float = 0; 121 | data_int8 = 0; 122 | width = 0; 123 | height = 0; 124 | channels = 0; 125 | floatChannelStepInByte = 0; 126 | int8ChannelStepInByte = 0; 127 | int8float_scale = 1.0f; 128 | int8_data_valid = false; 129 | } 130 | CDataBlob(int w, int h, int c) 131 | { 132 | data_float = 0; 133 | data_int8 = 0; 134 | create(w, h, c); 135 | } 136 | ~CDataBlob() 137 | { 138 | setNULL(); 139 | } 140 | 141 | void setNULL() 142 | { 143 | if (data_float) 144 | myFree(&data_float); 145 | if (data_int8) 146 | myFree(&data_int8); 147 | width = height = channels = floatChannelStepInByte = int8ChannelStepInByte = 0; 148 | int8float_scale = 1.0f; 149 | int8_data_valid = false; 150 | } 151 | bool create(int w, int h, int c) 152 | { 153 | setNULL(); 154 | 155 | width = w; 156 | height = h; 157 | channels = c; 158 | //alloc space for float array 159 | int remBytes = (sizeof(float)* channels) % (_MALLOC_ALIGN / 8); 160 | if (remBytes == 0) 161 | floatChannelStepInByte = channels * sizeof(float); 162 | else 163 | floatChannelStepInByte = (channels * sizeof(float)) + (_MALLOC_ALIGN / 8) - remBytes; 164 | data_float = (float*)myAlloc(width * height * floatChannelStepInByte); 165 | 166 | //alloc space for int8 array 167 | remBytes = (sizeof(char)* channels) % (_MALLOC_ALIGN / 8); 168 | if (remBytes == 0) 169 | int8ChannelStepInByte = channels * sizeof(char); 170 | else 171 | int8ChannelStepInByte = (channels * sizeof(char)) + (_MALLOC_ALIGN / 8) - remBytes; 172 | data_int8 = (signed char*)myAlloc(width * height * int8ChannelStepInByte); 173 | 174 | if (data_float == NULL) 175 | { 176 | cerr << "Cannot alloc memeory for float data blob: " 177 | << width << "*" 178 | << height << "*" 179 | << channels << endl; 180 | return false; 181 | } 182 | 183 | if (data_int8 == NULL) 184 | { 185 | cerr << "Cannot alloc memeory for uint8 data blob: " 186 | << width << "*" 187 | << height << "*" 188 | << channels << endl; 189 | return false; 190 | } 191 | 192 | //memset(data_float, 0, width * height * floatChannelStepInByte); 193 | //memset(data_int8, 0, width * height * int8ChannelStepInByte); 194 | 195 | //the following code is faster than memset 196 | //but not only the padding bytes are set to zero. 197 | //BE CAREFUL!!! 198 | //#if defined(_OPENMP) 199 | //#pragma omp parallel for 200 | //#endif 201 | for (int r = 0; r < this->height; r++) 202 | { 203 | for (int c = 0; c < this->width; c++) 204 | { 205 | int pixel_end = this->floatChannelStepInByte / sizeof(float); 206 | float * pF = (float*)(this->data_float + (r * this->width + c) * this->floatChannelStepInByte/sizeof(float)); 207 | for (int ch = this->channels; ch < pixel_end; ch++) 208 | pF[ch] = 0; 209 | 210 | pixel_end = this->int8ChannelStepInByte / sizeof(char); 211 | char * pI = (char*)(this->data_int8 + (r * this->width + c) * this->int8ChannelStepInByte/sizeof(char)); 212 | for (int ch = this->channels; ch < pixel_end; ch++) 213 | pI[ch] = 0; 214 | } 215 | } 216 | 217 | return true; 218 | } 219 | 220 | bool setInt8DataFromCaffeFormat(signed char * pData, int dataWidth, int dataHeight, int dataChannels) 221 | { 222 | if (pData == NULL) 223 | { 224 | cerr << "The input image data is null." << endl; 225 | return false; 226 | } 227 | if (dataWidth != this->width || 228 | dataHeight != this->height || 229 | dataChannels != this->channels) 230 | { 231 | cerr << "The dim of the data can not match that of the Blob." << endl; 232 | return false; 233 | } 234 | //create(dataWidth, dataHeight, dataChannels); 235 | 236 | for(int row = 0; row < height; row++) 237 | for (int col = 0; col < width; col++) 238 | { 239 | signed char * p = (this->data_int8 + (width * row + col) * int8ChannelStepInByte /sizeof(char)); 240 | for (int ch = 0; ch < channels; ch++) 241 | { 242 | p[ch] = pData[ch * height * width + row * width + col]; 243 | } 244 | } 245 | return true; 246 | } 247 | bool setFloatDataFromCaffeFormat(float * pData, int dataWidth, int dataHeight, int dataChannels) 248 | { 249 | if (pData == NULL) 250 | { 251 | cerr << "The input image data is null." << endl; 252 | return false; 253 | } 254 | if (dataWidth != this->width || 255 | dataHeight != this->height || 256 | dataChannels != this->channels) 257 | { 258 | cerr << "The dim of the data can not match that of the Blob." << endl; 259 | return false; 260 | } 261 | //create(dataWidth, dataHeight, dataChannels); 262 | 263 | for (int row = 0; row < height; row++) 264 | for (int col = 0; col < width; col++) 265 | { 266 | float * p = (this->data_float + (width * row + col) * floatChannelStepInByte / sizeof(float)); 267 | for (int ch = 0; ch < channels; ch++) 268 | { 269 | p[ch] = pData[ch * height * width + row * width + col]; 270 | } 271 | } 272 | return true; 273 | } 274 | 275 | bool setDataFromImage(const unsigned char * imgData, int imgWidth, int imgHeight, int imgChannels, int imgWidthStep, 276 | int * pChannelMean) 277 | { 278 | if (imgData == NULL) 279 | { 280 | cerr << "The input image data is null." << endl; 281 | return false; 282 | } 283 | if (pChannelMean == NULL) 284 | { 285 | cerr << "The mean values is null." << endl; 286 | return false; 287 | } 288 | create(imgWidth, imgHeight, imgChannels); 289 | 290 | //#if defined(_OPENMP) 291 | //#pragma omp parallel for 292 | //#endif 293 | for (int r = 0; r < imgHeight; r++) 294 | { 295 | for (int c = 0; c < imgWidth; c++) 296 | { 297 | const unsigned char * pImgData = imgData + imgWidthStep * r + imgChannels * c; 298 | float * pBlobData = this->data_float + (this->width * r + c) * this->floatChannelStepInByte /sizeof(float); 299 | for (int ch = 0; ch < imgChannels; ch++) 300 | pBlobData[ch] = (float)(pImgData[ch] - pChannelMean[ch]); 301 | } 302 | } 303 | return true; 304 | } 305 | bool setDataFrom3x3S2P1to1x1S1P0FromImage(const unsigned char * imgData, int imgWidth, int imgHeight, int imgChannels, int imgWidthStep, 306 | int * pChannelMean) 307 | { 308 | if (imgData == NULL) 309 | { 310 | cerr << "The input image data is null." << endl; 311 | return false; 312 | } 313 | if (pChannelMean == NULL) 314 | { 315 | cerr << "The mean values is null." << endl; 316 | return false; 317 | } 318 | if (imgChannels != 3) 319 | { 320 | cerr << "The input image must be a 3-channel RGB image." << endl; 321 | return false; 322 | } 323 | 324 | create((imgWidth+1)/2, (imgHeight+1)/2, 27); 325 | //since the pixel assignment cannot fill all the elements in the blob. 326 | //some elements in the blob should be initialized to 0 327 | memset(data_float, 0, width * height * floatChannelStepInByte); 328 | 329 | #if defined(_OPENMP) 330 | #pragma omp parallel for 331 | #endif 332 | for (int r = 0; r < this->height; r++) 333 | { 334 | for (int c = 0; c < this->width; c++) 335 | { 336 | float * pData = this->data_float + (r * this->width + c) * this->floatChannelStepInByte / sizeof(float); 337 | for (int fy = -1; fy <= 1; fy++) 338 | { 339 | int srcy = r * 2 + fy; 340 | 341 | if (srcy < 0 || srcy >= imgHeight) //out of the range of the image 342 | continue; 343 | 344 | for (int fx = -1; fx <= 1; fx++) 345 | { 346 | int srcx = c * 2 + fx; 347 | 348 | if (srcx < 0 || srcx >= imgWidth) //out of the range of the image 349 | continue; 350 | 351 | const unsigned char * pImgData = imgData + imgWidthStep * srcy + imgChannels * srcx; 352 | 353 | int output_channel_offset = ((fy + 1) * 3 + fx + 1) * 3; //3x3 filters, 3-channel image 354 | 355 | pData[output_channel_offset] = (float)(pImgData[0] - pChannelMean[0]); 356 | pData[output_channel_offset+1] = (float)(pImgData[1] - pChannelMean[1]); 357 | pData[output_channel_offset+2] = (float)(pImgData[2] - pChannelMean[2]); 358 | 359 | } 360 | 361 | } 362 | } 363 | } 364 | return true; 365 | } 366 | float getElementFloat(int x, int y, int channel) 367 | { 368 | if (this->data_float) 369 | { 370 | if (x >= 0 && x < this->width && 371 | y >= 0 && y < this->height && 372 | channel >= 0 && channel < this->channels) 373 | { 374 | float * p = (float*)(this->data_float + (y*this->width + x)*this->floatChannelStepInByte / sizeof(float)); 375 | return p[channel]; 376 | } 377 | } 378 | 379 | return 0.f; 380 | } 381 | int getElementint8(int x, int y, int channel) 382 | { 383 | if (this->data_int8 && this->int8_data_valid) 384 | { 385 | if (x >= 0 && x < this->width && 386 | y >= 0 && y < this->height && 387 | channel >= 0 && channel < this->channels) 388 | { 389 | signed char * p = this->data_int8 + (y*this->width + x)*this->int8ChannelStepInByte/sizeof(char); 390 | return p[channel]; 391 | } 392 | } 393 | 394 | return 0; 395 | } 396 | 397 | friend ostream &operator<<(ostream &output, const CDataBlob &dataBlob) 398 | { 399 | output << "DataBlob Size (Width, Height, Channel) = (" 400 | << dataBlob.width 401 | << ", " << dataBlob.height 402 | << ", " << dataBlob.channels 403 | << ")" << endl; 404 | for (int ch = 0; ch < dataBlob.channels; ch++) 405 | { 406 | output << "Channel " << ch << ": " << endl; 407 | 408 | for (int row = 0; row < dataBlob.height; row++) 409 | { 410 | output << "("; 411 | for (int col = 0; col < dataBlob.width; col++) 412 | { 413 | float * p = (dataBlob.data_float + (dataBlob.width * row + col) * dataBlob.floatChannelStepInByte/sizeof(float)); 414 | output << p[ch]; 415 | if (col != dataBlob.width - 1) 416 | output << ", "; 417 | } 418 | output << ")" << endl; 419 | } 420 | } 421 | 422 | return output; 423 | } 424 | }; 425 | 426 | class Filters { 427 | public: 428 | vector filters; 429 | int pad; 430 | int stride; 431 | float scale; //element * scale = original value 432 | }; 433 | 434 | bool convolution(CDataBlob *inputData, const Filters* filters, CDataBlob *outputData); 435 | bool maxpooling2x2S2(const CDataBlob *inputData, CDataBlob *outputData); 436 | bool concat4(const CDataBlob *inputData1, const CDataBlob *inputData2, const CDataBlob *inputData3, const CDataBlob *inputData4, CDataBlob *outputData); 437 | bool scale(CDataBlob * dataBlob, float scale); 438 | bool relu(const CDataBlob *inputOutputData); 439 | bool priorbox(const CDataBlob * featureData, const CDataBlob * imageData, int num_sizes, float * pWinSizes, CDataBlob * outputData); 440 | bool normalize(CDataBlob * inputOutputData, float * pScale); 441 | bool blob2vector(const CDataBlob * inputData, CDataBlob * outputData, bool isFloat); 442 | bool detection_output(const CDataBlob * priorbox, const CDataBlob * loc, const CDataBlob * conf, float overlap_threshold, float confidence_threshold, int top_k, int keep_top_k, CDataBlob * outputData); 443 | /* the input data for softmax must be a vector, the data stored in a multi-channel blob with size 1x1 */ 444 | bool softmax1vector2class(const CDataBlob *inputOutputData); 445 | 446 | vector objectdetect_cnn(unsigned char * rgbImageData, int with, int height, int step); 447 | -------------------------------------------------------------------------------- /libfacedetection_capi.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | extern "C" { 6 | #include "libfacedetection_capi.h" 7 | } 8 | 9 | #include "./libfacedetection/src/facedetectcnn.h" 10 | 11 | #include 12 | 13 | // define the buffer size. Do not change the size! 14 | #define DETECT_BUFFER_SIZE 0x20000 15 | 16 | struct libfacedetection_capi_result_t { 17 | std::string *sBuffer; 18 | int * result; 19 | 20 | libfacedetection_capi_result_t(std::string* s, int* p): sBuffer(s), result(p) { 21 | // 22 | } 23 | ~libfacedetection_capi_result_t() { 24 | delete this->sBuffer; 25 | } 26 | }; 27 | 28 | void libfacedetection_capi_result_free( 29 | libfacedetection_capi_result_t* p 30 | ) { 31 | delete p; 32 | } 33 | 34 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgb( 35 | uint8_t * rgb, int width, int height, int step 36 | ) { 37 | std::string* sBuffer = new std::string(); 38 | sBuffer->resize(DETECT_BUFFER_SIZE); 39 | 40 | unsigned char* pBuffer = (unsigned char *)sBuffer->data(); 41 | int* pResults = facedetect_cnn(pBuffer, rgb, width, height, step); 42 | return new libfacedetection_capi_result_t(sBuffer, pResults); 43 | } 44 | 45 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgba( 46 | uint8_t* rgba, int width, int height, int step 47 | ) { 48 | std::string rgbBuffer; 49 | rgbBuffer.resize(width*height*3); 50 | 51 | uint8_t* rgb = (uint8_t*)rgbBuffer.data(); 52 | for(int y = 0; y < height; y++) { 53 | for(int x = 0; x < width; x++) { 54 | *rgb++ = *rgba++; // r 55 | *rgb++ = *rgba++; // g 56 | *rgb++ = *rgba++; // b 57 | rgba++; // a 58 | } 59 | } 60 | 61 | std::string* sBuffer = new std::string(); 62 | sBuffer->resize(DETECT_BUFFER_SIZE); 63 | 64 | unsigned char* pBuffer = (unsigned char *)sBuffer->data(); 65 | int* pResults = facedetect_cnn(pBuffer, rgb, width, height, step); 66 | return new libfacedetection_capi_result_t(sBuffer, pResults); 67 | } 68 | 69 | int libfacedetection_capi_result_len(libfacedetection_capi_result_t* self) { 70 | int* pResults = self->result; 71 | return pResults? *pResults: 0; 72 | } 73 | 74 | libfacedetection_capi_bool_t libfacedetection_capi_result_get( 75 | libfacedetection_capi_result_t* self, int i, 76 | libfacedetection_capi_face_t* face 77 | ) { 78 | int* pResults = self->result; 79 | int n = pResults? *pResults: 0; 80 | 81 | if(i < 0 || i >= n) return 0; 82 | 83 | short * p = ((short*)(pResults+1))+142*i; 84 | face->x = p[0]; 85 | face->y = p[1]; 86 | face->w = p[2]; 87 | face->h = p[3]; 88 | face->neighbors = p[4]; 89 | face->angle = p[5]; 90 | 91 | return 1; 92 | } 93 | -------------------------------------------------------------------------------- /libfacedetection_capi.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef libfacedetection_capi_h_ 6 | #define libfacedetection_capi_h_ 7 | 8 | #include 9 | #include 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | typedef int libfacedetection_capi_bool_t; 16 | typedef struct libfacedetection_capi_face_t libfacedetection_capi_face_t; 17 | typedef struct libfacedetection_capi_result_t libfacedetection_capi_result_t; 18 | 19 | struct libfacedetection_capi_face_t { 20 | int x; 21 | int y; 22 | int w; 23 | int h; 24 | int neighbors; 25 | int angle; 26 | }; 27 | 28 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgb( 29 | uint8_t* rgb, int width, int height, int step 30 | ); 31 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgba( 32 | uint8_t* rgba, int width, int height, int step 33 | ); 34 | 35 | int libfacedetection_capi_result_len( 36 | libfacedetection_capi_result_t* p 37 | ); 38 | libfacedetection_capi_bool_t libfacedetection_capi_result_get( 39 | libfacedetection_capi_result_t* p, int i, 40 | libfacedetection_capi_face_t* face 41 | ); 42 | void libfacedetection_capi_result_free( 43 | libfacedetection_capi_result_t* p 44 | ); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | #endif // libfacedetection_capi_h_ 50 | -------------------------------------------------------------------------------- /z_facedetectcnn_cc.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "./libfacedetection/src/facedetectcnn.cpp" 6 | -------------------------------------------------------------------------------- /z_facedetectcnn_floatdata.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "./libfacedetection/src/facedetectcnn-floatdata.cpp" 6 | -------------------------------------------------------------------------------- /z_facedetectcnn_int8data.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "./libfacedetection/src/facedetectcnn-int8data.cpp" 6 | -------------------------------------------------------------------------------- /z_facedetectcnn_model.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 . All rights reserved. 2 | // Use of this source code is governed by a Apache-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "./libfacedetection/src/facedetectcnn-model.cpp" 6 | --------------------------------------------------------------------------------