├── .editorconfig
├── .gitignore
├── LICENSE
├── README.md
├── go.mod
├── hello.go
├── keliamoniz2.out.png
├── libfacedetection.go
├── libfacedetection
    ├── CMakeLists.txt
    ├── ChangeLog
    ├── LICENSE
    ├── README.md
    ├── aarch64-toolchain.cmake
    ├── example
    │   ├── libfacedetect.cpp
    │   └── libfacedetectcnn-example.cpp
    ├── images
    │   ├── chloecalmon.png
    │   ├── cnnresult.png
    │   ├── keliamoniz1.jpg
    │   └── keliamoniz2.jpg
    ├── models
    │   ├── README.md
    │   ├── caffe
    │   │   ├── yufacedetectnet-open-v1.caffemodel
    │   │   ├── yufacedetectnet-open-v1.prototxt
    │   │   ├── yufacedetectnet-open-v1.solver.prototxt
    │   │   └── yufacedetectnet-open-v1.train.prototxt
    │   └── openvino
    │   │   ├── yufacedetectnet-open-v1-320x240.bin
    │   │   └── yufacedetectnet-open-v1-320x240.xml
    └── src
    │   ├── facedetectcnn-floatdata.cpp
    │   ├── facedetectcnn-int8data.cpp
    │   ├── facedetectcnn-model.cpp
    │   ├── facedetectcnn.cpp
    │   └── facedetectcnn.h
├── libfacedetection_capi.cc
├── libfacedetection_capi.h
├── z_facedetectcnn_cc.cc
├── z_facedetectcnn_floatdata.cc
├── z_facedetectcnn_int8data.cc
└── z_facedetectcnn_model.cc


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 <chaishushan{AT}gmail.com>. All rights reserved.
 2 | # Use of this source code is governed by a BSD-style
 3 | # license that can be found in the LICENSE file.
 4 | 
 5 | # http://editorconfig.org/
 6 | 
 7 | root = true
 8 | 
 9 | # Unix-style newlines with a newline ending every file
10 | [*]
11 | charset = utf-8
12 | end_of_line = lf
13 | trim_trailing_whitespace = true
14 | insert_final_newline = true
15 | 
16 | [*]
17 | indent_style = tab
18 | 
19 | [*.{go,proto}]
20 | charset = utf-8
21 | indent_style = tab
22 | 
23 | # Matches the exact files either package.json or .travis.yml
24 | [{package.json,.travis.yml}]
25 | indent_style = space
26 | indent_size = 2
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /libfacedetection/build
2 | a.out.png
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, chai2010
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | - *Go语言QQ群: 102319854, 1055927514*
 2 | - *凹语言(凹读音“Wa”)(The Wa Programming Language): https://github.com/wa-lang/wa*
 3 | 
 4 | ----
 5 | 
 6 | # [libfacedetection](https://github.com/ShiqiYu/libfacedetection) binding for Go
 7 | 
 8 | - https://godoc.org/github.com/chai2010/libfacedetection-go
 9 | - https://github.com/ShiqiYu/libfacedetection
10 | 
11 | ## Example ([hello.go](hello.go))
12 | 
13 | ```go
14 | package main
15 | 
16 | import (
17 | 	"github.com/chai2010/libfacedetection-go"
18 | )
19 | 
20 | func main() {
21 | 	m := GetImage("./libfacedetection/images/keliamoniz2.jpg")
22 | 	rgb, w, h := libfacedetection.NewRGBImageFrom(m)
23 | 
24 | 	faces := libfacedetection.DetectFaceRGB(rgb, w, h, w*3)
25 | 	fmt.Printf("%#v\n", faces)
26 | }
27 | 
28 | // output:
29 | // []libfacedetection.Face{
30 | //     libfacedetection.Face{X:183, Y:137, W:150, H:150, Neighbors:94, Angle:0}
31 | // }
32 | ```
33 | 
34 | ![](keliamoniz2.out.png)
35 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
2 | // Use of this source code is governed by a Apache-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | module github.com/chai2010/libfacedetection-go
6 | 


--------------------------------------------------------------------------------
/hello.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
 2 | // Use of this source code is governed by a Apache-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // +build ignore
 6 | 
 7 | package main
 8 | 
 9 | import (
10 | 	"fmt"
11 | 	"image"
12 | 	"image/color"
13 | 	"image/draw"
14 | 	_ "image/jpeg"
15 | 	"image/png"
16 | 	"log"
17 | 	"os"
18 | 
19 | 	"github.com/chai2010/libfacedetection-go"
20 | )
21 | 
22 | func main() {
23 | 	m := GetImage("./libfacedetection/images/keliamoniz2.jpg")
24 | 	rgb, w, h := libfacedetection.NewRGBImageFrom(m)
25 | 
26 | 	faces := libfacedetection.DetectFaceRGB(rgb, w, h, w*3)
27 | 	fmt.Printf("%#v\n", faces)
28 | 
29 | 	if len(faces) > 0 {
30 | 		b := m.Bounds()
31 | 		m2 := image.NewRGBA(b)
32 | 
33 | 		for y := b.Min.Y; y < b.Max.Y; y++ {
34 | 			for x := b.Min.X; x < b.Max.X; x++ {
35 | 				m2.Set(x, y, m.At(x, y))
36 | 			}
37 | 		}
38 | 
39 | 		x1 := faces[0].X
40 | 		y1 := faces[0].Y
41 | 		x2 := faces[0].W + x1
42 | 		y2 := faces[0].H + y1
43 | 
44 | 		DrawRect(m2, x1, y1, x2, y2)
45 | 		SaveImage(m2, "a.out.png")
46 | 	}
47 | }
48 | 
49 | func GetImage(path string) image.Image {
50 | 	r, err := os.Open(path)
51 | 	if err != nil {
52 | 		log.Fatal(err)
53 | 	}
54 | 	defer r.Close()
55 | 
56 | 	m, _, err := image.Decode(r)
57 | 	if err != nil {
58 | 		log.Fatal(err)
59 | 	}
60 | 	return m
61 | }
62 | 
63 | func SaveImage(m image.Image, path string) {
64 | 	f, err := os.Create(path)
65 | 	if err != nil {
66 | 		log.Fatal(err)
67 | 	}
68 | 	defer f.Close()
69 | 
70 | 	err = png.Encode(f, m)
71 | 	if err != nil {
72 | 		log.Fatal(err)
73 | 	}
74 | }
75 | 
76 | // DrawHLine draws a horizontal line
77 | func DrawHLine(m draw.Image, x1, y, x2 int) {
78 | 	for ; x1 <= x2; x1++ {
79 | 		m.Set(x1, y, color.RGBA{0, 0, 255, 255})
80 | 	}
81 | }
82 | 
83 | // DrawVLine draws a veritcal line
84 | func DrawVLine(m draw.Image, x, y1, y2 int) {
85 | 	for ; y1 <= y2; y1++ {
86 | 		m.Set(x, y1, color.RGBA{0, 0, 255, 255})
87 | 	}
88 | }
89 | 
90 | // DrawRect draws a rectangle utilizing HLine() and VLine()
91 | func DrawRect(m draw.Image, x1, y1, x2, y2 int) {
92 | 	DrawHLine(m, x1, y1, x2)
93 | 	DrawHLine(m, x1, y2, x2)
94 | 	DrawVLine(m, x1, y1, y2)
95 | 	DrawVLine(m, x2, y1, y2)
96 | }
97 | 


--------------------------------------------------------------------------------
/keliamoniz2.out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/keliamoniz2.out.png


--------------------------------------------------------------------------------
/libfacedetection.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
  2 | // Use of this source code is governed by a Apache-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package libfacedetection
  6 | 
  7 | //#include "libfacedetection_capi.h"
  8 | import "C"
  9 | import (
 10 | 	"image"
 11 | 	"unsafe"
 12 | )
 13 | 
 14 | type Face struct {
 15 | 	X         int
 16 | 	Y         int
 17 | 	W         int
 18 | 	H         int
 19 | 	Neighbors int
 20 | 	Angle     int
 21 | }
 22 | 
 23 | func DetectFaceRGBA(m *image.RGBA) []Face {
 24 | 	if m == nil {
 25 | 		return nil
 26 | 	}
 27 | 
 28 | 	rv := C.libfacedetection_capi_facedetect_rgba(
 29 | 		(*C.uint8_t)(unsafe.Pointer(&m.Pix[0])),
 30 | 		C.int(m.Rect.Dx()),
 31 | 		C.int(m.Rect.Dy()),
 32 | 		C.int(m.Stride),
 33 | 	)
 34 | 	defer C.libfacedetection_capi_result_free(rv)
 35 | 
 36 | 	n := int(C.libfacedetection_capi_result_len(rv))
 37 | 	if n <= 0 {
 38 | 		return nil
 39 | 	}
 40 | 
 41 | 	face := make([]Face, n)
 42 | 	for i := 0; i < n; i++ {
 43 | 		var t C.libfacedetection_capi_face_t
 44 | 		C.libfacedetection_capi_result_get(rv, C.int(i), &t)
 45 | 
 46 | 		face[i].X = int(t.x)
 47 | 		face[i].Y = int(t.y)
 48 | 		face[i].W = int(t.w)
 49 | 		face[i].H = int(t.h)
 50 | 		face[i].Neighbors = int(t.neighbors)
 51 | 		face[i].Angle = int(t.angle)
 52 | 	}
 53 | 
 54 | 	return face
 55 | }
 56 | 
 57 | func DetectFaceRGB(rgb []byte, w, h, stride int) []Face {
 58 | 	if len(rgb) == 0 {
 59 | 		return nil
 60 | 	}
 61 | 
 62 | 	rv := C.libfacedetection_capi_facedetect_rgb(
 63 | 		(*C.uint8_t)(unsafe.Pointer(&rgb[0])),
 64 | 		C.int(w),
 65 | 		C.int(h),
 66 | 		C.int(stride),
 67 | 	)
 68 | 	defer C.libfacedetection_capi_result_free(rv)
 69 | 
 70 | 	n := int(C.libfacedetection_capi_result_len(rv))
 71 | 	if n <= 0 {
 72 | 		return nil
 73 | 	}
 74 | 
 75 | 	face := make([]Face, n)
 76 | 	for i := 0; i < n; i++ {
 77 | 		var t C.libfacedetection_capi_face_t
 78 | 		C.libfacedetection_capi_result_get(rv, C.int(i), &t)
 79 | 
 80 | 		face[i].X = int(t.x)
 81 | 		face[i].Y = int(t.y)
 82 | 		face[i].W = int(t.w)
 83 | 		face[i].H = int(t.h)
 84 | 		face[i].Neighbors = int(t.neighbors)
 85 | 		face[i].Angle = int(t.angle)
 86 | 	}
 87 | 
 88 | 	return face
 89 | }
 90 | 
 91 | func NewRGBImageFrom(m image.Image) (rgb []byte, w, h int) {
 92 | 	b := m.Bounds()
 93 | 
 94 | 	w = b.Dx()
 95 | 	h = b.Dy()
 96 | 	rgb = make([]byte, w*h*3)
 97 | 
 98 | 	off := 0
 99 | 	for y := b.Min.Y; y < b.Max.Y; y++ {
100 | 		for x := b.Min.X; x < b.Max.X; x++ {
101 | 			pr, pg, pb, _ := m.At(x, y).RGBA()
102 | 			rgb[off+0] = uint8(pr >> 8)
103 | 			rgb[off+1] = uint8(pg >> 8)
104 | 			rgb[off+2] = uint8(pb >> 8)
105 | 			off += 3
106 | 		}
107 | 	}
108 | 
109 | 	return
110 | }
111 | 


--------------------------------------------------------------------------------
/libfacedetection/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # CMakeLists for libfacedetectcnn
 2 | 
 3 | project(facedetection)
 4 | 
 5 | cmake_minimum_required(VERSION 2.8)
 6 | 
 7 | option(ENABLE_INT8 "use int8" OFF)
 8 | option(ENABLE_AVX2 "use avx2" OFF)
 9 | option(ENABLE_NEON "whether use neon, if use arm please set it on" OFF)
10 | option(DEMO "build the demo" OFF)
11 | 
12 | SET(fdt_base_dir   ${PROJECT_SOURCE_DIR})
13 | SET(fdt_src_dir    ${fdt_base_dir}/src)
14 | SET(fdt_inc_dir    ${fdt_base_dir}/src)
15 | 
16 | SET(fdt_lib_name   facedetection)
17 | SET(fdt_lib_static ${fdt_lib_name})
18 | SET(fdt_lib_shared ${fdt_lib_name}_shared)
19 | 
20 | FILE(GLOB_RECURSE fdt_source_files ${fdt_src_dir}/*.cpp)
21 | LIST(SORT         fdt_source_files)
22 | 
23 | if(ENABLE_INT8)
24 | 	message("using int8")
25 | 	add_definitions(-D_ENABLE_INT8)
26 | endif()
27 | 
28 | if(ENABLE_AVX2)
29 | 	message("using avx2")
30 | 	add_definitions(-D_ENABLE_AVX2)
31 | 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma")
32 | endif()
33 | 
34 | if(ENABLE_NEON)
35 | 	message("using arm")
36 | 	add_definitions(-D_ENABLE_NEON)
37 | endif()
38 | 
39 | set(CMAKE_CXX_STANDARD 11)
40 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
41 | set(CMAKE_CXX_EXTENSIONS OFF)
42 | 
43 | INCLUDE_DIRECTORIES(${fdt_inc_dir})
44 | 
45 | # Create a static library (.a)
46 | ADD_LIBRARY(${fdt_lib_static} STATIC ${fdt_source_files})
47 | 
48 | # Create a shared library (.so)
49 | ADD_LIBRARY(${fdt_lib_shared} SHARED ${fdt_source_files})
50 | SET_TARGET_PROPERTIES(${fdt_lib_shared} PROPERTIES OUTPUT_NAME "${fdt_lib_name}")
51 | SET_TARGET_PROPERTIES(${fdt_lib_shared} PROPERTIES PREFIX "lib")
52 | 
53 | # Create demo. OpenCV is requred.
54 | if (DEMO)
55 |     #if(WIN32)
56 |     #    set(OpenCV_DIR "D:/opencv343/build") # TODO
57 |     #endif()
58 |     find_package(OpenCV REQUIRED)
59 |     include_directories(${OpenCV_INCLUDE_DIRS})
60 | 
61 |     set(fdt_demo_files ${fdt_base_dir}/example/libfacedetectcnn-example.cpp)
62 |     add_executable(fdt_demo ${fdt_demo_files})
63 |     target_link_libraries(fdt_demo ${fdt_lib_static} ${OpenCV_LIBS})
64 | endif()
65 | 
66 | if (GSTREAMER)
67 |     find_package(OpenCV REQUIRED)
68 | 
69 |     include(FindPkgConfig)
70 |     pkg_search_module(GSTREAMER REQUIRED gstreamer-1.0)
71 |     pkg_search_module(GSTREAMER_BASE REQUIRED gstreamer-base-1.0)
72 |     pkg_search_module(GSTREAMER_VIDEO REQUIRED gstreamer-video-1.0)
73 | 
74 |     add_library(gstfacedetect SHARED
75 |         example/libfacedetect.cpp
76 |     )
77 | 
78 |     include_directories(gstfacedetect PRIVATE
79 |         ${GSTREAMER_INCLUDE_DIRS}
80 |         ${GSTREAMER_BASE_INCLUDE_DIRS}
81 |         ${GSTREAMER_VIDEO_INCLUDE_DIRS}
82 |         ${OpenCV_INCLUDE_DIRS}
83 |     )
84 | 
85 |     target_link_libraries(gstfacedetect
86 |         ${GSTREAMER_LIBRARIES}
87 |         ${GSTREAMER_BASE_LIBRARIES}
88 |         ${GSTREAMER_VIDEO_LIBRARIES}
89 |         ${OpenCV_LIBS}
90 |         ${fdt_lib_shared}
91 |     )
92 | 
93 | 
94 | 
95 | endif()
96 | 


--------------------------------------------------------------------------------
/libfacedetection/ChangeLog:
--------------------------------------------------------------------------------
 1 | 2019-03-13
 2 | ---------------------
 3 | * Release the source code and the model files. Removed the binary libary. 
 4 | 
 5 | 2018-11-17
 6 | ---------------------
 7 | * Replaced the AdaBoost methods with a CNN based one.
 8 | 
 9 | 2017-02-24
10 | ---------------------
11 | * landmark detection speed reaches to 0.8ms per face. The former version is 1.7ms per face.
12 | 
13 | 2017-01-20
14 | ---------------------
15 | * 68-point landmark detection added.
16 | 
17 | 2016-11-24
18 | ---------------------
19 | * Added benchmark.cpp which can run face detection in multiple threads using OpenMP.
20 | 
21 | 2016-11-16
22 | ---------------------
23 | * Bugs in the previous version were fixed. std::vector was removed from the API because it can cause error.
24 | 
25 | 2016-11-10
26 | ---------------------
27 | * The API was updated. std::vector was involved.
28 | * The functions can be called in multiple threads at the same time.
29 | 
30 | 2016-10-6
31 | ---------------------
32 | * The algorithm has been speeded up greatly (2x to 3x). 
33 | * The true positive rates (FDDB) have been improved 1% to 2% at FP=100.
34 | * Multi-core parallelization has been disabled. The detection time is still the same.
35 | 
36 | 2016-9-16
37 | ---------------------
38 | * Speedup again.
39 | * Change function name facedetect_frontal_tmp() to facedetect_frontal_surveillance(). This function now uses a new trained classifier which can achieve a higher detection speed.
40 | 
41 | 2016-6-28
42 | ---------------------
43 | * 64-bit dll added since there are so many users request it.
44 | * An easter egg is hidden in the 64-bit dll. Can you find it?
45 | 
46 | 2016-6-8
47 | ---------------------
48 | * Speedup 1.2x
49 | * Added an experimental function facedetect_frontal_tmp(). The function can gain a higher detection rate in video surveillance.


--------------------------------------------------------------------------------
/libfacedetection/LICENSE:
--------------------------------------------------------------------------------
 1 | By downloading, copying, installing or using the software you agree to this license.
 2 | If you do not agree to this license, do not download, install,
 3 | copy or use the software.
 4 | 
 5 | 
 6 |                   License Agreement For libfacedetection
 7 |                      (3-clause BSD License)
 8 | 
 9 | Copyright (c) 2015-2019, Shiqi Yu, all rights reserved.
10 | shiqi.yu@gmail.com
11 | 
12 | Redistribution and use in source and binary forms, with or without modification,
13 | are permitted provided that the following conditions are met:
14 | 
15 |   * Redistributions of source code must retain the above copyright notice,
16 |     this list of conditions and the following disclaimer.
17 | 
18 |   * Redistributions in binary form must reproduce the above copyright notice,
19 |     this list of conditions and the following disclaimer in the documentation
20 |     and/or other materials provided with the distribution.
21 | 
22 |   * Neither the names of the copyright holders nor the names of the contributors
23 |     may be used to endorse or promote products derived from this software
24 |     without specific prior written permission.
25 | 
26 | This software is provided by the copyright holders and contributors "as is" and
27 | any express or implied warranties, including, but not limited to, the implied
28 | warranties of merchantability and fitness for a particular purpose are disclaimed.
29 | In no event shall copyright holders or contributors be liable for any direct,
30 | indirect, incidental, special, exemplary, or consequential damages
31 | (including, but not limited to, procurement of substitute goods or services;
32 | loss of use, data, or profits; or business interruption) however caused
33 | and on any theory of liability, whether in contract, strict liability,
34 | or tort (including negligence or otherwise) arising in any way out of
35 | the use of this software, even if advised of the possibility of such damage.


--------------------------------------------------------------------------------
/libfacedetection/README.md:
--------------------------------------------------------------------------------
 1 | # libfacedetection
 2 | 
 3 | This is an open source library for CNN-based face detection in images. The CNN model has been converted to static variables in C source files. The source code does not depend on any other libraries. What you need is just a C++ compiler. You can compile the source code under Windows, Linux, ARM and any platform with a C++ compiler.
 4 | 
 5 | SIMD instructions are used to speed up the detection. You can enable AVX2 if you use Intel CPU or NEON for ARM.
 6 | 
 7 | The model file has also been provided in directory ./models/.
 8 | 
 9 | examples/libfacedetectcnn-example.cpp shows how to use the library.
10 | 
11 | ![Examples](/images/cnnresult.png "Detection example")
12 | 
13 | ## How to Compile
14 | 
15 | * Please add -O3 to turn on optimizations when you compile the source code using g++.
16 | * Please choose 'Maximize Speed/-O2' when you compile the source code using Microsoft Visual Studio.
17 | 
18 | Create a build folder:
19 | 
20 | ```
21 | mkdir build; cd build; rm -rf *
22 | ```
23 | 
24 | ### Cross build for aarch64
25 | 1. set cross compiler for aarch64 (please refer to aarch64-toolchain.cmake)
26 | 2. set opencv path since the example code depends on opencv
27 | 
28 | ```
29 | cmake \
30 |     -DENABLE_INT8=ON \
31 |     -DENABLE_NEON=ON \
32 |     -DCMAKE_BUILD_TYPE=RELEASE \
33 |     -DCMAKE_TOOLCHAIN_FILE=../aarch64-toolchain.cmake \
34 |      ..
35 | 
36 | make
37 | ```
38 | 
39 | ### Native build for avx2
40 | ```
41 | cmake \
42 |     -DENABLE_INT8=ON \
43 |     -DENABLE_AVX2=ON \
44 |     -DCMAKE_BUILD_TYPE=RELEASE \
45 |     -DDEMO=ON \
46 |      ..
47 | 
48 | make
49 | ```
50 | 
51 | ## CNN-based Face Detection on Windows
52 | 
53 | | Method             |Time          | FPS         |Time          | FPS         |
54 | |--------------------|--------------|-------------|--------------|-------------|
55 | |                    |  X64         |X64          |  X64         |X64          |
56 | |                    |Single-thread |Single-thread|Multi-thread  |Multi-thread |
57 | |OpenCV Haar+AdaBoost (640x480)|   --         | --          | 12.33ms      |   81.1      |
58 | |cnn (CPU, 640x480)  |  64.21ms     | 15.57       | 15.59ms      |   64.16     |
59 | |cnn (CPU, 320x240)  |  15.23ms     | 65.68       |  3.99ms      |  250.40     |
60 | |cnn (CPU, 160x120)  |   3.47ms     | 288.08      |  0.95ms      | 1052.20     |
61 | |cnn (CPU, 128x96)   |   2.35ms     | 425.95      |  0.64ms      | 1562.10     |
62 | 
63 | * OpenCV Haar+AdaBoost runs with minimal face size 48x48
64 | * Face detection only, and no landmark detection included.
65 | * Minimal face size ~12x12
66 | * Intel(R) Core(TM) i7-7700 CPU @ 3.6GHz.
67 | 
68 | ## CNN-based Face Detection on ARM Linux (Raspberry Pi 3 B+)
69 | 
70 | | Method             |Time          | FPS         |Time          | FPS         |
71 | |--------------------|--------------|-------------|--------------|-------------|
72 | |                    |Single-thread |Single-thread|Multi-thread  |Multi-thread |
73 | |cnn (CPU, 640x480)  |  512.04ms    |  1.95       |  174.89ms    |   5.72      |
74 | |cnn (CPU, 320x240)  |  123.47ms    |  8.10       |   42.13ms    |  23.74      |
75 | |cnn (CPU, 160x120)  |   27.42ms    | 36.47       |    9.75ms    | 102.58      |
76 | |cnn (CPU, 128x96)   |   17.78ms    | 56.24       |    6.12ms    | 163.50      |
77 | 
78 | * Face detection only, and no landmark detection included.
79 | * Minimal face size ~12x12
80 | * Raspberry Pi 3 B+, Broadcom BCM2837B0, Cortex-A53 (ARMv8) 64-bit SoC @ 1.4GHz
81 | 
82 | 
83 | ## Author
84 | * Shiqi Yu, <shiqi.yu@gmail.com>
85 | 
86 | ## Contributors
87 | * Jia Wu
88 | * Shengyin Wu
89 | * Dong Xu
90 | 
91 | ## Acknowledgment
92 | The work is partly supported by the Science Foundation of Shenzhen (Grant No. JCYJ20150324141711699).
93 | 


--------------------------------------------------------------------------------
/libfacedetection/aarch64-toolchain.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_SYSTEM_NAME Linux)
2 | set(CMAKE_SYSTEM_VERSION 1)
3 | set(CMAKE_SYSTEM_PROCESSOR "aarch64")
4 | set(CMAKE_CXX_COMPILER "/opt/linaro/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-g++")
5 | set(CMAKE_C_COMPILER "/opt/linaro/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc")
6 | 


--------------------------------------------------------------------------------
/libfacedetection/example/libfacedetect.cpp:
--------------------------------------------------------------------------------
  1 | #include <gst/gst.h>
  2 | #include <gst/base/gstbasetransform.h>
  3 | #include <gst/video/video.h>
  4 | 
  5 | #include <opencv2/opencv.hpp>
  6 | #include "facedetectcnn.h"
  7 | 
  8 | /*
  9 | 
 10 | Howto run?
 11 | 
 12 | GST_DEBUG=3 gst-launch-1.0 filesrc location=input.mp4 ! decodebin \
 13 |     ! videorate ! video/x-raw, framerate=8/1 \
 14 |     ! videoscale ! video/x-raw, width=640, height=480 \
 15 |     ! videoconvert \
 16 |     ! identity sync=true \
 17 |     ! queue max-size-buffers=3 leaky=1 \
 18 |     ! libfacedetect \
 19 |     ! videoconvert \
 20 |     ! xvimagesink sync=false
 21 | */
 22 | 
 23 | 
 24 | #define TYPE_FACE_DETECT face_detect_get_type()
 25 | G_DECLARE_FINAL_TYPE (FaceDetect, face_detect, FACE, DETECT, GstBaseTransform)
 26 | 
 27 | struct _FaceDetect {
 28 |     GstBaseTransform element;
 29 | 
 30 |     guint width;
 31 |     guint height;
 32 |     guint thresh;
 33 |     gboolean boxes;
 34 | };
 35 | 
 36 | G_DEFINE_TYPE (FaceDetect, face_detect, GST_TYPE_BASE_TRANSFORM)
 37 | 
 38 | using namespace cv;
 39 | 
 40 | #define DEFAULT_THRESHHOLD 75
 41 | #define DEFAULT_BOXES TRUE
 42 | 
 43 | enum {
 44 |     PROP_0,
 45 |     PROP_THRESH,
 46 |     PROP_BOXES
 47 | };
 48 | 
 49 | static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE (
 50 |     "sink",
 51 |     GST_PAD_SINK,
 52 |     GST_PAD_ALWAYS,
 53 |     GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE("RGB"))
 54 | );
 55 | 
 56 | static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE (
 57 |     "src",
 58 |     GST_PAD_SRC,
 59 |     GST_PAD_ALWAYS,
 60 |     GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE("RGB"))
 61 | );
 62 | 
 63 | static void set_property( GObject *object, guint prop_id, const GValue *value, GParamSpec *pspec ) {
 64 |     FaceDetect *self = FACE_DETECT(object);
 65 |     switch(prop_id) {
 66 |         case PROP_THRESH:
 67 |             self->thresh = g_value_get_uint(value);
 68 |             break;
 69 |         case PROP_BOXES:
 70 |             self->boxes = g_value_get_boolean(value);
 71 |             break;
 72 |         default:
 73 |             G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
 74 |             break;
 75 |     }
 76 | }
 77 | 
 78 | static void get_property( GObject *object, guint prop_id, GValue *value, GParamSpec *pspec ) {
 79 |     FaceDetect *self = FACE_DETECT(object);
 80 | 
 81 |     switch(prop_id) {
 82 |         case PROP_THRESH:
 83 |             g_value_set_uint(value, self->thresh);
 84 |             break;
 85 |         case PROP_BOXES:
 86 |             g_value_set_boolean(value, self->boxes);
 87 |             break;
 88 |         default:
 89 |             G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
 90 |             break;
 91 |     }
 92 | }
 93 | 
 94 | static gboolean set_caps(GstBaseTransform *trans, GstCaps *incaps, GstCaps *outcaps) {
 95 |     FaceDetect *self = FACE_DETECT(trans);
 96 |     GstVideoInfo info;
 97 | 
 98 |     if (gst_video_info_from_caps (&info, incaps) == FALSE) {
 99 |             return FALSE;
100 |     }
101 | 
102 |     GST_INFO("New Width: %d Height: %d", info.width, info.height);
103 |     self->width = info.width;
104 |     self->height = info.height;
105 | 
106 |     return TRUE;
107 | }
108 | 
109 | static GstFlowReturn transform_ip(GstBaseTransform *trans, GstBuffer *buf) {
110 |     FaceDetect *self = FACE_DETECT(trans);
111 |     GST_DEBUG("Processing");
112 | 
113 |     GstMapInfo info;
114 |     if (gst_buffer_map(buf, &info, GST_MAP_WRITE) == FALSE) {
115 |         GST_ERROR("Cannot map buffer");
116 |         return GST_FLOW_ERROR;
117 |     }
118 |     Size size(self->width, self->height);
119 |     Mat image(size, CV_8UC3, (void *) info.data);
120 | 
121 |     unsigned char *pBuffer = (unsigned char *) malloc(0x20000);
122 |     int *pResults = facedetect_cnn(pBuffer, (unsigned char*)(image.ptr(0)), image.cols, image.rows, (int)image.step);
123 | 
124 |     for(int i = 0; i < (pResults ? *pResults : 0); i++) {
125 |         short * p = ((short*)(pResults+1))+142*i;
126 |         int x = p[0];
127 |         int y = p[1];
128 |         int w = p[2];
129 |         int h = p[3];
130 |         int neighbors = p[4];
131 |         int angle = p[5];
132 | 
133 |         GST_ERROR("face_rect=[%d, %d, %d, %d], neighbors=%d, angle=%d", x,y,w,h,neighbors, angle);
134 |         if (neighbors >= self->thresh) {
135 |             if (self->boxes) {
136 |                 rectangle(image, Rect(x, y, w, h), Scalar(0, 255, 0), 2);
137 |             }
138 |         }
139 |     }
140 | 
141 |     free(pBuffer);
142 |     gst_buffer_unmap(buf, &info);
143 |     return GST_FLOW_OK;
144 | }
145 | 
146 | static void face_detect_init (FaceDetect *self)
147 | {
148 |     self->width = 0;
149 |     self->height = 0;
150 |     self->thresh = DEFAULT_THRESHHOLD;
151 |     self->boxes = DEFAULT_BOXES;
152 | }
153 | 
154 | static void face_detect_class_init (FaceDetectClass *klass)
155 | {
156 |     GObjectClass *object_class = G_OBJECT_CLASS (klass);
157 |     GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
158 |     GstBaseTransformClass *transform_class = GST_BASE_TRANSFORM_CLASS (klass);
159 | 
160 |     object_class->set_property = set_property;
161 |     object_class->get_property = get_property;
162 | 
163 |     transform_class->set_caps = set_caps;
164 |     transform_class->transform_ip = transform_ip;
165 | 
166 |     g_object_class_install_property(object_class, PROP_THRESH,
167 |         g_param_spec_uint( "thresh", "thresh", "Thresh Hold as a percentage", 0, 100, DEFAULT_THRESHHOLD, G_PARAM_READWRITE)
168 |     );
169 | 
170 |     g_object_class_install_property(object_class, PROP_BOXES,
171 |         g_param_spec_boolean( "boxes", "boxes", "Draw boxes", DEFAULT_BOXES, G_PARAM_READWRITE)
172 |     );
173 | 
174 |     gst_element_class_set_static_metadata(element_class,
175 |         "Brightness",
176 |         "Sink/Src/Caps",
177 |         "Detects faces in images using libfacedetect",
178 |         "James Stevenson <james@stev.org>"
179 |     );
180 | 
181 |     gst_element_class_add_static_pad_template (element_class, &sink_factory);
182 |     gst_element_class_add_static_pad_template (element_class, &src_factory);
183 | }
184 | 
185 | 
186 | extern "C" gboolean Register_init (GstPlugin *plugin) {
187 |     return gst_element_register (plugin, "libfacedetect", GST_RANK_NONE, TYPE_FACE_DETECT);
188 | }
189 | 
190 | #ifndef PACKAGE
191 | #define PACKAGE "libfacedetect"
192 | #endif
193 | 
194 | GST_PLUGIN_DEFINE (
195 |     GST_VERSION_MAJOR,
196 |     GST_VERSION_MINOR,
197 |     facedetect,
198 |     "Detect faces",
199 |     Register_init,
200 |     "0.0.0",
201 |     "BSD",
202 |     "facedetect",
203 |     "https://github.com/ShiqiYu/libfacedetection"
204 | )
205 | 


--------------------------------------------------------------------------------
/libfacedetection/example/libfacedetectcnn-example.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | By downloading, copying, installing or using the software you agree to this license.
  3 | If you do not agree to this license, do not download, install,
  4 | copy or use the software.
  5 | 
  6 | 
  7 |                   License Agreement For libfacedetection
  8 |                      (3-clause BSD License)
  9 | 
 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved.
 11 | shiqi.yu@gmail.com
 12 | 
 13 | Redistribution and use in source and binary forms, with or without modification,
 14 | are permitted provided that the following conditions are met:
 15 | 
 16 |   * Redistributions of source code must retain the above copyright notice,
 17 |     this list of conditions and the following disclaimer.
 18 | 
 19 |   * Redistributions in binary form must reproduce the above copyright notice,
 20 |     this list of conditions and the following disclaimer in the documentation
 21 |     and/or other materials provided with the distribution.
 22 | 
 23 |   * Neither the names of the copyright holders nor the names of the contributors
 24 |     may be used to endorse or promote products derived from this software
 25 |     without specific prior written permission.
 26 | 
 27 | This software is provided by the copyright holders and contributors "as is" and
 28 | any express or implied warranties, including, but not limited to, the implied
 29 | warranties of merchantability and fitness for a particular purpose are disclaimed.
 30 | In no event shall copyright holders or contributors be liable for any direct,
 31 | indirect, incidental, special, exemplary, or consequential damages
 32 | (including, but not limited to, procurement of substitute goods or services;
 33 | loss of use, data, or profits; or business interruption) however caused
 34 | and on any theory of liability, whether in contract, strict liability,
 35 | or tort (including negligence or otherwise) arising in any way out of
 36 | the use of this software, even if advised of the possibility of such damage.
 37 | */
 38 | 
 39 | #include <stdio.h>
 40 | #include <opencv2/opencv.hpp>
 41 | #include "facedetectcnn.h"
 42 | 
 43 | //define the buffer size. Do not change the size!
 44 | #define DETECT_BUFFER_SIZE 0x20000
 45 | using namespace cv;
 46 | 
 47 | int main(int argc, char* argv[])
 48 | {
 49 |     if(argc != 2)
 50 |     {
 51 |         printf("Usage: %s <image_file_name>\n", argv[0]);
 52 |         return -1;
 53 |     }
 54 | 
 55 | 	//load an image and convert it to gray (single-channel)
 56 | 	Mat image = imread(argv[1]); 
 57 | 	if(image.empty())
 58 | 	{
 59 | 		fprintf(stderr, "Can not load the image file %s.\n", argv[1]);
 60 | 		return -1;
 61 | 	}
 62 | 
 63 | 	int * pResults = NULL; 
 64 |     //pBuffer is used in the detection functions.
 65 |     //If you call functions in multiple threads, please create one buffer for each thread!
 66 |     unsigned char * pBuffer = (unsigned char *)malloc(DETECT_BUFFER_SIZE);
 67 |     if(!pBuffer)
 68 |     {
 69 |         fprintf(stderr, "Can not alloc buffer.\n");
 70 |         return -1;
 71 |     }
 72 | 	
 73 | 
 74 | 	///////////////////////////////////////////
 75 | 	// CNN face detection 
 76 | 	// Best detection rate
 77 | 	//////////////////////////////////////////
 78 | 	//!!! The input image must be a RGB one (three-channel)
 79 | 	//!!! DO NOT RELEASE pResults !!!
 80 | 	pResults = facedetect_cnn(pBuffer, (unsigned char*)(image.ptr(0)), image.cols, image.rows, (int)image.step);
 81 | 
 82 |     printf("%d faces detected.\n", (pResults ? *pResults : 0));
 83 | 	Mat result_cnn = image.clone();
 84 | 	//print the detection results
 85 | 	for(int i = 0; i < (pResults ? *pResults : 0); i++)
 86 | 	{
 87 |         short * p = ((short*)(pResults+1))+142*i;
 88 | 		int x = p[0];
 89 | 		int y = p[1];
 90 | 		int w = p[2];
 91 | 		int h = p[3];
 92 | 		int neighbors = p[4];
 93 | 		int angle = p[5];
 94 | 
 95 | 		printf("face_rect=[%d, %d, %d, %d], neighbors=%d, angle=%d\n", x,y,w,h,neighbors, angle);
 96 | 		rectangle(result_cnn, Rect(x, y, w, h), Scalar(0, 255, 0), 2);
 97 | 	}
 98 | 	imshow("result_cnn", result_cnn);
 99 | 
100 | 	waitKey();
101 | 
102 |     //release the buffer
103 |     free(pBuffer);
104 | 
105 | 	return 0;
106 | }


--------------------------------------------------------------------------------
/libfacedetection/images/chloecalmon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/chloecalmon.png


--------------------------------------------------------------------------------
/libfacedetection/images/cnnresult.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/cnnresult.png


--------------------------------------------------------------------------------
/libfacedetection/images/keliamoniz1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/keliamoniz1.jpg


--------------------------------------------------------------------------------
/libfacedetection/images/keliamoniz2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/images/keliamoniz2.jpg


--------------------------------------------------------------------------------
/libfacedetection/models/README.md:
--------------------------------------------------------------------------------
1 | # How to use the model files
2 | 
3 | You do not need to use the model files in this directory when you run face detection compiled from the C++ source code. If you want to use Caffe or OpenVINO to detect faces, you can use the model files here.


--------------------------------------------------------------------------------
/libfacedetection/models/caffe/yufacedetectnet-open-v1.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/models/caffe/yufacedetectnet-open-v1.caffemodel


--------------------------------------------------------------------------------
/libfacedetection/models/caffe/yufacedetectnet-open-v1.prototxt:
--------------------------------------------------------------------------------
   1 | name: "YuFaceDetectNet"
   2 | 
   3 | input: "data"
   4 | 
   5 | input_shape {
   6 |   dim: 1
   7 |   dim: 3
   8 |   dim: 240
   9 |   dim: 320
  10 | }
  11 | 
  12 | #CONV1###########################################################
  13 | 
  14 | layer {
  15 |   name: "conv1_1"
  16 |   type: "Convolution"
  17 |   bottom: "data"
  18 |   top: "conv1_1"
  19 |   param {
  20 |     lr_mult: 1.0
  21 |     decay_mult: 1.0
  22 |   }
  23 |   convolution_param {
  24 |     num_output: 16
  25 |     pad: 1
  26 | 	stride: 2
  27 |     kernel_size: 3
  28 |     weight_filler {
  29 |       type: "xavier"
  30 |     }
  31 |     bias_term: false
  32 |   }
  33 | }
  34 | 
  35 | layer {
  36 |   name: "relu1_1"
  37 |   type: "ReLU"
  38 |   bottom: "conv1_1"
  39 |   top: "conv1_1"
  40 | }
  41 | layer {
  42 |   name: "conv1_2"
  43 |   type: "Convolution"
  44 |   bottom: "conv1_1"
  45 |   top: "conv1_2"
  46 |   param {
  47 |     lr_mult: 1.0
  48 |     decay_mult: 1.0
  49 |   }
  50 |   convolution_param {
  51 |     num_output: 16
  52 |     pad: 0
  53 |     kernel_size: 1
  54 |     weight_filler {
  55 |       type: "xavier"
  56 |     }
  57 |     bias_term: false
  58 |   }
  59 | }
  60 | 
  61 | 
  62 | layer {
  63 |   name: "relu1_2"
  64 |   type: "ReLU"
  65 |   bottom: "conv1_2"
  66 |   top: "conv1_2"
  67 | }
  68 | #CONV2##########################################################
  69 | layer {
  70 |   name: "pool1"
  71 |   type: "Pooling"
  72 |   bottom: "conv1_2"
  73 |   top: "pool1"
  74 |   pooling_param {
  75 |     pool: MAX
  76 |     kernel_size: 2
  77 |     stride: 2
  78 |   }
  79 | }
  80 | layer {
  81 |   name: "conv2_1"
  82 |   type: "Convolution"
  83 |   bottom: "pool1"
  84 |   top: "conv2_1"
  85 |   param {
  86 |     lr_mult: 1.0
  87 |     decay_mult: 1.0
  88 |   }
  89 |   convolution_param {
  90 |     num_output: 16
  91 |     pad: 1
  92 |     kernel_size: 3
  93 |     weight_filler {
  94 |       type: "xavier"
  95 |     }
  96 |     bias_term: false
  97 |   }
  98 | }
  99 | 
 100 | 
 101 | layer {
 102 |   name: "relu2_1"
 103 |   type: "ReLU"
 104 |   bottom: "conv2_1"
 105 |   top: "conv2_1"
 106 | }
 107 | layer {
 108 |   name: "conv2_2"
 109 |   type: "Convolution"
 110 |   bottom: "conv2_1"
 111 |   top: "conv2_2"
 112 |   param {
 113 |     lr_mult: 1.0
 114 |     decay_mult: 1.0
 115 |   }
 116 |   convolution_param {
 117 |     num_output: 16
 118 |     pad: 0
 119 |     kernel_size: 1
 120 |     weight_filler {
 121 |       type: "xavier"
 122 |     }
 123 |     bias_term: false
 124 |   }
 125 | }
 126 | 
 127 | layer {
 128 |   name: "relu2_2"
 129 |   type: "ReLU"
 130 |   bottom: "conv2_2"
 131 |   top: "conv2_2"
 132 | }
 133 | #CONV3##########################################################
 134 | 
 135 | layer {
 136 |   name: "pool2"
 137 |   type: "Pooling"
 138 |   bottom: "conv2_2"
 139 |   top: "pool2"
 140 |   pooling_param {
 141 |     pool: MAX
 142 |     kernel_size: 2
 143 |     stride: 2
 144 |   }
 145 | }
 146 | layer {
 147 |   name: "conv3_1"
 148 |   type: "Convolution"
 149 |   bottom: "pool2"
 150 |   top: "conv3_1"
 151 |   param {
 152 |     lr_mult: 1.0
 153 |     decay_mult: 1.0
 154 |   }
 155 |   convolution_param {
 156 |     num_output: 32
 157 |     pad: 1
 158 |     kernel_size: 3
 159 |     weight_filler {
 160 |       type: "xavier"
 161 |     }
 162 |     bias_term: false
 163 |   }
 164 | }
 165 | 
 166 | layer {
 167 |   name: "relu3_1"
 168 |   type: "ReLU"
 169 |   bottom: "conv3_1"
 170 |   top: "conv3_1"
 171 | }
 172 | layer {
 173 |   name: "conv3_2"
 174 |   type: "Convolution"
 175 |   bottom: "conv3_1"
 176 |   top: "conv3_2"
 177 |   param {
 178 |     lr_mult: 1.0
 179 |     decay_mult: 1.0
 180 |   }
 181 |   convolution_param {
 182 |     num_output: 32
 183 |     pad: 0
 184 |     kernel_size: 1
 185 |     weight_filler {
 186 |       type: "xavier"
 187 |     }
 188 |     bias_term: false
 189 |   }
 190 | }
 191 | 
 192 | 
 193 | layer {
 194 |   name: "relu3_2"
 195 |   type: "ReLU"
 196 |   bottom: "conv3_2"
 197 |   top: "conv3_2"
 198 | }
 199 | layer {
 200 |   name: "conv3_3"
 201 |   type: "Convolution"
 202 |   bottom: "conv3_2"
 203 |   top: "conv3_3"
 204 |   param {
 205 |     lr_mult: 1.0
 206 |     decay_mult: 1.0
 207 |   }
 208 |   convolution_param {
 209 |     num_output: 32
 210 |     pad: 1
 211 |     kernel_size: 3
 212 |     weight_filler {
 213 |       type: "xavier"
 214 |     }
 215 |     bias_term: false
 216 |   }
 217 | }
 218 | 
 219 | layer {
 220 |   name: "relu3_3"
 221 |   type: "ReLU"
 222 |   bottom: "conv3_3"
 223 |   top: "conv3_3"
 224 | }
 225 | 
 226 | 
 227 | #CONV4##########################################################
 228 | 
 229 | layer {
 230 |   name: "pool3"
 231 |   type: "Pooling"
 232 |   bottom: "conv3_3"
 233 |   top: "pool3"
 234 |   pooling_param {
 235 |     pool: MAX
 236 |     kernel_size: 2
 237 |     stride: 2
 238 |   }
 239 | }
 240 | layer {
 241 |   name: "conv4_1"
 242 |   type: "Convolution"
 243 |   bottom: "pool3"
 244 |   top: "conv4_1"
 245 |   param {
 246 |     lr_mult: 1.0
 247 |     decay_mult: 1.0
 248 |   }
 249 |   convolution_param {
 250 |     num_output: 64
 251 |     pad: 1
 252 |     kernel_size: 3
 253 |     weight_filler {
 254 |       type: "xavier"
 255 |     }
 256 |     bias_term: false
 257 |   }
 258 | }
 259 | 
 260 | layer {
 261 |   name: "relu4_1"
 262 |   type: "ReLU"
 263 |   bottom: "conv4_1"
 264 |   top: "conv4_1"
 265 | }
 266 | layer {
 267 |   name: "conv4_2"
 268 |   type: "Convolution"
 269 |   bottom: "conv4_1"
 270 |   top: "conv4_2"
 271 |   param {
 272 |     lr_mult: 1.0
 273 |     decay_mult: 1.0
 274 |   }
 275 |   convolution_param {
 276 |     num_output: 64
 277 |     pad: 0
 278 |     kernel_size: 1
 279 |     weight_filler {
 280 |       type: "xavier"
 281 |     }
 282 |     bias_term: false
 283 |   }
 284 | }
 285 | 
 286 | layer {
 287 |   name: "relu4_2"
 288 |   type: "ReLU"
 289 |   bottom: "conv4_2"
 290 |   top: "conv4_2"
 291 | }
 292 | layer {
 293 |   name: "conv4_3"
 294 |   type: "Convolution"
 295 |   bottom: "conv4_2"
 296 |   top: "conv4_3"
 297 |   param {
 298 |     lr_mult: 1.0
 299 |     decay_mult: 1.0
 300 |   }
 301 |   convolution_param {
 302 |     num_output: 64
 303 |     pad: 1
 304 |     kernel_size: 3
 305 |     weight_filler {
 306 |       type: "xavier"
 307 |     }
 308 |     bias_term: false
 309 |   }
 310 | }
 311 | 
 312 | layer {
 313 |   name: "relu4_3"
 314 |   type: "ReLU"
 315 |   bottom: "conv4_3"
 316 |   top: "conv4_3"
 317 | }
 318 | 
 319 | #CONV5##########################################################
 320 | 
 321 | layer {
 322 |   name: "pool4"
 323 |   type: "Pooling"
 324 |   bottom: "conv4_3"
 325 |   top: "pool4"
 326 |   pooling_param {
 327 |     pool: MAX
 328 |     kernel_size: 2
 329 |     stride: 2
 330 |   }
 331 | }
 332 | layer {
 333 |   name: "conv5_1"
 334 |   type: "Convolution"
 335 |   bottom: "pool4"
 336 |   top: "conv5_1"
 337 |   param {
 338 |     lr_mult: 1.0
 339 |     decay_mult: 1.0
 340 |   }
 341 |   convolution_param {
 342 |     num_output: 128
 343 |     pad: 1
 344 |     kernel_size: 3
 345 |     weight_filler {
 346 |       type: "xavier"
 347 |     }
 348 |     bias_term: false
 349 |     dilation: 1
 350 |   }
 351 | }
 352 | 
 353 | layer {
 354 |   name: "relu5_1"
 355 |   type: "ReLU"
 356 |   bottom: "conv5_1"
 357 |   top: "conv5_1"
 358 | }
 359 | layer {
 360 |   name: "conv5_2"
 361 |   type: "Convolution"
 362 |   bottom: "conv5_1"
 363 |   top: "conv5_2"
 364 |   param {
 365 |     lr_mult: 1.0
 366 |     decay_mult: 1.0
 367 |   }
 368 |   convolution_param {
 369 |     num_output: 128
 370 |     pad: 0
 371 |     kernel_size: 1
 372 |     weight_filler {
 373 |       type: "xavier"
 374 |     }
 375 |     bias_term: false
 376 |     dilation: 1
 377 |   }
 378 | }
 379 | 
 380 | layer {
 381 |   name: "relu5_2"
 382 |   type: "ReLU"
 383 |   bottom: "conv5_2"
 384 |   top: "conv5_2"
 385 | }
 386 | layer {
 387 |   name: "conv5_3"
 388 |   type: "Convolution"
 389 |   bottom: "conv5_2"
 390 |   top: "conv5_3"
 391 |   param {
 392 |     lr_mult: 1.0
 393 |     decay_mult: 1.0
 394 |   }
 395 |   convolution_param {
 396 |     num_output: 128
 397 |     pad: 1
 398 |     kernel_size: 3
 399 |     weight_filler {
 400 |       type: "xavier"
 401 |     }
 402 |     bias_term: false
 403 |     dilation: 1
 404 |   }
 405 | }
 406 | 
 407 | layer {
 408 |   name: "relu5_3"
 409 |   type: "ReLU"
 410 |   bottom: "conv5_3"
 411 |   top: "conv5_3"
 412 | }
 413 | 
 414 | #CONV6##########################################################
 415 | 
 416 | layer {
 417 |   name: "pool5"
 418 |   type: "Pooling"
 419 |   bottom: "conv5_3"
 420 |   top: "pool5"
 421 |   pooling_param {
 422 |     pool: MAX
 423 |     kernel_size: 2
 424 |     stride: 2
 425 |   }
 426 | }
 427 | 
 428 | layer {
 429 |   name: "conv6_1"
 430 |   type: "Convolution"
 431 |   bottom: "pool5"
 432 |   top: "conv6_1"
 433 |   param {
 434 |     lr_mult: 1.0
 435 |     decay_mult: 1.0
 436 |   }
 437 |   convolution_param {
 438 |     num_output: 128
 439 |     pad: 1
 440 |     kernel_size: 3
 441 |     weight_filler {
 442 |       type: "xavier"
 443 |     }
 444 |     bias_term: false
 445 |     dilation: 1
 446 |   }
 447 | }
 448 | 
 449 | layer {
 450 |   name: "relu6_1"
 451 |   type: "ReLU"
 452 |   bottom: "conv6_1"
 453 |   top: "conv6_1"
 454 | }
 455 | layer {
 456 |   name: "conv6_2"
 457 |   type: "Convolution"
 458 |   bottom: "conv6_1"
 459 |   top: "conv6_2"
 460 |   param {
 461 |     lr_mult: 1.0
 462 |     decay_mult: 1.0
 463 |   }
 464 |   convolution_param {
 465 |     num_output: 128
 466 |     pad: 0
 467 |     kernel_size: 1
 468 |     weight_filler {
 469 |       type: "xavier"
 470 |     }
 471 |     bias_term: false
 472 |     dilation: 1
 473 |   }
 474 | }
 475 | 
 476 | layer {
 477 |   name: "relu6_2"
 478 |   type: "ReLU"
 479 |   bottom: "conv6_2"
 480 |   top: "conv6_2"
 481 | }
 482 | layer {
 483 |   name: "conv6_3"
 484 |   type: "Convolution"
 485 |   bottom: "conv6_2"
 486 |   top: "conv6_3"
 487 |   param {
 488 |     lr_mult: 1.0
 489 |     decay_mult: 1.0
 490 |   }
 491 |   convolution_param {
 492 |     num_output: 128
 493 |     pad: 1
 494 |     kernel_size: 3
 495 |     weight_filler {
 496 |       type: "xavier"
 497 |     }
 498 |     bias_term: false
 499 |     dilation: 1
 500 |   }
 501 | }
 502 | 
 503 | layer {
 504 |   name: "relu6_3"
 505 |   type: "ReLU"
 506 |   bottom: "conv6_3"
 507 |   top: "conv6_3"
 508 | }
 509 | 
 510 | #PRIORBOX3##########################################
 511 | layer {
 512 |   name: "conv3_3_norm"
 513 |   type: "Normalize"
 514 |   bottom: "conv3_3"
 515 |   top: "conv3_3_norm"
 516 |   norm_param {
 517 |     across_spatial: false
 518 |     scale_filler {
 519 |       type: "constant"
 520 |       value: 10.0
 521 |     }
 522 |     channel_shared: false
 523 |   }
 524 | }
 525 | layer {
 526 |   name: "conv3_3_norm_mbox_loc_new"
 527 |   type: "Convolution"
 528 |   bottom: "conv3_3_norm"
 529 |   top: "conv3_3_norm_mbox_loc_new"
 530 |   param {
 531 |     lr_mult: 1.0
 532 |     decay_mult: 1.0
 533 |   }
 534 |   convolution_param {
 535 |     num_output: 12
 536 |     pad: 1
 537 |     kernel_size: 3
 538 |     stride: 1
 539 |     weight_filler {
 540 |       type: "xavier"
 541 |     }
 542 |     bias_term: false
 543 |   }
 544 | }
 545 | layer {
 546 |   name: "conv3_3_norm_mbox_loc_perm"
 547 |   type: "Permute"
 548 |   bottom: "conv3_3_norm_mbox_loc_new"
 549 |   top: "conv3_3_norm_mbox_loc_perm"
 550 |   permute_param {
 551 |     order: 0
 552 |     order: 2
 553 |     order: 3
 554 |     order: 1
 555 |   }
 556 | }
 557 | layer {
 558 |   name: "conv3_3_norm_mbox_loc_flat"
 559 |   type: "Flatten"
 560 |   bottom: "conv3_3_norm_mbox_loc_perm"
 561 |   top: "conv3_3_norm_mbox_loc_flat"
 562 |   flatten_param {
 563 |     axis: 1
 564 |   }
 565 | }
 566 | layer {
 567 |   name: "conv3_3_norm_mbox_conf_new"
 568 |   type: "Convolution"
 569 |   bottom: "conv3_3_norm"
 570 |   top: "conv3_3_norm_mbox_conf_new"
 571 |   param {
 572 |     lr_mult: 1.0
 573 |     decay_mult: 1.0
 574 |   }
 575 |   convolution_param {
 576 |     num_output: 6
 577 |     pad: 1
 578 |     kernel_size: 3
 579 |     stride: 1
 580 |     weight_filler {
 581 |       type: "xavier"
 582 |     }
 583 |     bias_term: false
 584 |   }
 585 | }
 586 | layer {
 587 |   name: "conv3_3_norm_mbox_conf_perm"
 588 |   type: "Permute"
 589 |   bottom: "conv3_3_norm_mbox_conf_new"
 590 |   top: "conv3_3_norm_mbox_conf_perm"
 591 |   permute_param {
 592 |     order: 0
 593 |     order: 2
 594 |     order: 3
 595 |     order: 1
 596 |   }
 597 | }
 598 | layer {
 599 |   name: "conv3_3_norm_mbox_conf_flat"
 600 |   type: "Flatten"
 601 |   bottom: "conv3_3_norm_mbox_conf_perm"
 602 |   top: "conv3_3_norm_mbox_conf_flat"
 603 |   flatten_param {
 604 |     axis: 1
 605 |   }
 606 | }
 607 | layer {
 608 | 
 609 |   name: "conv3_3_norm_mbox_priorbox"
 610 |   type: "PriorBox"
 611 |   bottom: "conv3_3_norm"
 612 |   bottom: "data"
 613 |   top: "conv3_3_norm_mbox_priorbox"
 614 |   prior_box_param {
 615 |     min_size: 10.0
 616 |     min_size: 16.0
 617 |     min_size: 24.0
 618 |     clip: false
 619 |     variance: 0.10000000149
 620 |     variance: 0.10000000149
 621 |     variance: 0.20000000298
 622 |     variance: 0.20000000298
 623 |     step: 8.0
 624 |     offset: 0.5
 625 |   }
 626 | }
 627 | #PRIORBOX4##########################################
 628 | layer {
 629 |   name: "conv4_3_norm"
 630 |   type: "Normalize"
 631 |   bottom: "conv4_3"
 632 |   top: "conv4_3_norm"
 633 |   norm_param {
 634 |     across_spatial: false
 635 |     scale_filler {
 636 |       type: "constant"
 637 |       value: 8.0
 638 |     }
 639 |     channel_shared: false
 640 |   }
 641 | }
 642 | layer {
 643 |   name: "conv4_3_norm_mbox_loc"
 644 |   type: "Convolution"
 645 |   bottom: "conv4_3_norm"
 646 |   top: "conv4_3_norm_mbox_loc"
 647 |   param {
 648 |     lr_mult: 1.0
 649 |     decay_mult: 1.0
 650 |   }
 651 |   convolution_param {
 652 |     num_output: 8
 653 |     pad: 1
 654 |     kernel_size: 3
 655 |     stride: 1
 656 |     weight_filler {
 657 |       type: "xavier"
 658 |     }
 659 |     bias_term: false
 660 |   }
 661 | }
 662 | layer {
 663 |   name: "conv4_3_norm_mbox_loc_perm"
 664 |   type: "Permute"
 665 |   bottom: "conv4_3_norm_mbox_loc"
 666 |   top: "conv4_3_norm_mbox_loc_perm"
 667 |   permute_param {
 668 |     order: 0
 669 |     order: 2
 670 |     order: 3
 671 |     order: 1
 672 |   }
 673 | }
 674 | layer {
 675 |   name: "conv4_3_norm_mbox_loc_flat"
 676 |   type: "Flatten"
 677 |   bottom: "conv4_3_norm_mbox_loc_perm"
 678 |   top: "conv4_3_norm_mbox_loc_flat"
 679 |   flatten_param {
 680 |     axis: 1
 681 |   }
 682 | }
 683 | layer {
 684 |   name: "conv4_3_norm_mbox_conf"
 685 |   type: "Convolution"
 686 |   bottom: "conv4_3_norm"
 687 |   top: "conv4_3_norm_mbox_conf"
 688 |   param {
 689 |     lr_mult: 1.0
 690 |     decay_mult: 1.0
 691 |   }
 692 |   convolution_param {
 693 |     num_output: 4
 694 |     pad: 1
 695 |     kernel_size: 3
 696 |     stride: 1
 697 |     weight_filler {
 698 |       type: "xavier"
 699 |     }
 700 |     bias_term: false
 701 |   }
 702 | }
 703 | layer {
 704 |   name: "conv4_3_norm_mbox_conf_perm"
 705 |   type: "Permute"
 706 |   bottom: "conv4_3_norm_mbox_conf"
 707 |   top: "conv4_3_norm_mbox_conf_perm"
 708 |   permute_param {
 709 |     order: 0
 710 |     order: 2
 711 |     order: 3
 712 |     order: 1
 713 |   }
 714 | }
 715 | layer {
 716 |   name: "conv4_3_norm_mbox_conf_flat"
 717 |   type: "Flatten"
 718 |   bottom: "conv4_3_norm_mbox_conf_perm"
 719 |   top: "conv4_3_norm_mbox_conf_flat"
 720 |   flatten_param {
 721 |     axis: 1
 722 |   }
 723 | }
 724 | layer {
 725 | 
 726 |   name: "conv4_3_norm_mbox_priorbox"
 727 |   type: "PriorBox"
 728 |   bottom: "conv4_3_norm"
 729 |   bottom: "data"
 730 |   top: "conv4_3_norm_mbox_priorbox"
 731 |   prior_box_param {
 732 |     min_size: 32.0
 733 |     min_size: 48.0
 734 |     clip: false
 735 |     variance: 0.10000000149
 736 |     variance: 0.10000000149
 737 |     variance: 0.20000000298
 738 |     variance: 0.20000000298
 739 |     step: 16.0
 740 |     offset: 0.5
 741 |   }
 742 | }
 743 | #PRIORBOX5##########################################
 744 | layer {
 745 |   name: "conv5_3_norm"
 746 |   type: "Normalize"
 747 |   bottom: "conv5_3"
 748 |   top: "conv5_3_norm"
 749 |   norm_param {
 750 |     across_spatial: false
 751 |     scale_filler {
 752 |       type: "constant"
 753 |       value: 5.0
 754 |     }
 755 |     channel_shared: false
 756 |   }
 757 | }
 758 | layer {
 759 |   name: "conv5_3_norm_mbox_loc"
 760 |   type: "Convolution"
 761 |   bottom: "conv5_3_norm"
 762 |   top: "conv5_3_norm_mbox_loc"
 763 |   param {
 764 |     lr_mult: 1.0
 765 |     decay_mult: 1.0
 766 |   }
 767 |   convolution_param {
 768 |     num_output: 8
 769 |     pad: 1
 770 |     kernel_size: 3
 771 |     stride: 1
 772 |     weight_filler {
 773 |       type: "xavier"
 774 |     }
 775 |     bias_term: false
 776 |   }
 777 | }
 778 | layer {
 779 |   name: "conv5_3_norm_mbox_loc_perm"
 780 |   type: "Permute"
 781 |   bottom: "conv5_3_norm_mbox_loc"
 782 |   top: "conv5_3_norm_mbox_loc_perm"
 783 |   permute_param {
 784 |     order: 0
 785 |     order: 2
 786 |     order: 3
 787 |     order: 1
 788 |   }
 789 | }
 790 | layer {
 791 |   name: "conv5_3_norm_mbox_loc_flat"
 792 |   type: "Flatten"
 793 |   bottom: "conv5_3_norm_mbox_loc_perm"
 794 |   top: "conv5_3_norm_mbox_loc_flat"
 795 |   flatten_param {
 796 |     axis: 1
 797 |   }
 798 | }
 799 | layer {
 800 |   name: "conv5_3_norm_mbox_conf"
 801 |   type: "Convolution"
 802 |   bottom: "conv5_3_norm"
 803 |   top: "conv5_3_norm_mbox_conf"
 804 |   param {
 805 |     lr_mult: 1.0
 806 |     decay_mult: 1.0
 807 |   }
 808 |   convolution_param {
 809 |     num_output: 4
 810 |     pad: 1
 811 |     kernel_size: 3
 812 |     stride: 1
 813 |     weight_filler {
 814 |       type: "xavier"
 815 |     }
 816 |     bias_term: false
 817 |   }
 818 | }
 819 | layer {
 820 |   name: "conv5_3_norm_mbox_conf_perm"
 821 |   type: "Permute"
 822 |   bottom: "conv5_3_norm_mbox_conf"
 823 |   top: "conv5_3_norm_mbox_conf_perm"
 824 |   permute_param {
 825 |     order: 0
 826 |     order: 2
 827 |     order: 3
 828 |     order: 1
 829 |   }
 830 | }
 831 | layer {
 832 |   name: "conv5_3_norm_mbox_conf_flat"
 833 |   type: "Flatten"
 834 |   bottom: "conv5_3_norm_mbox_conf_perm"
 835 |   top: "conv5_3_norm_mbox_conf_flat"
 836 |   flatten_param {
 837 |     axis: 1
 838 |   }
 839 | }
 840 | layer {
 841 | 
 842 |   name: "conv5_3_norm_mbox_priorbox"
 843 |   type: "PriorBox"
 844 |   bottom: "conv5_3_norm"
 845 |   bottom: "data"
 846 |   top: "conv5_3_norm_mbox_priorbox"
 847 |   prior_box_param {
 848 |     min_size: 64.0
 849 |     min_size: 96.0
 850 |     clip: false
 851 |     variance: 0.10000000149
 852 |     variance: 0.10000000149
 853 |     variance: 0.20000000298
 854 |     variance: 0.20000000298
 855 |     step: 32.0
 856 |     offset: 0.5
 857 |   }
 858 | }
 859 | 
 860 | #PRIORBOX6##########################################
 861 | layer {
 862 |   name: "conv6_3_norm"
 863 |   type: "Normalize"
 864 |   bottom: "conv6_3"
 865 |   top: "conv6_3_norm"
 866 |   norm_param {
 867 |     across_spatial: false
 868 |     scale_filler {
 869 |       type: "constant"
 870 |       value: 5.0
 871 |     }
 872 |     channel_shared: false
 873 |   }
 874 | }
 875 | layer {
 876 |   name: "conv6_3_norm_mbox_loc"
 877 |   type: "Convolution"
 878 |   bottom: "conv6_3_norm"
 879 |   top: "conv6_3_norm_mbox_loc"
 880 |   param {
 881 |     lr_mult: 1.0
 882 |     decay_mult: 1.0
 883 |   }
 884 |   convolution_param {
 885 |     num_output: 12
 886 |     pad: 1
 887 |     kernel_size: 3
 888 |     stride: 1
 889 |     weight_filler {
 890 |       type: "xavier"
 891 |     }
 892 |     bias_term: false
 893 |   }
 894 | }
 895 | layer {
 896 |   name: "conv6_3_norm_mbox_loc_perm"
 897 |   type: "Permute"
 898 |   bottom: "conv6_3_norm_mbox_loc"
 899 |   top: "conv6_3_norm_mbox_loc_perm"
 900 |   permute_param {
 901 |     order: 0
 902 |     order: 2
 903 |     order: 3
 904 |     order: 1
 905 |   }
 906 | }
 907 | layer {
 908 |   name: "conv6_3_norm_mbox_loc_flat"
 909 |   type: "Flatten"
 910 |   bottom: "conv6_3_norm_mbox_loc_perm"
 911 |   top: "conv6_3_norm_mbox_loc_flat"
 912 |   flatten_param {
 913 |     axis: 1
 914 |   }
 915 | }
 916 | layer {
 917 |   name: "conv6_3_norm_mbox_conf"
 918 |   type: "Convolution"
 919 |   bottom: "conv6_3_norm"
 920 |   top: "conv6_3_norm_mbox_conf"
 921 |   param {
 922 |     lr_mult: 1.0
 923 |     decay_mult: 1.0
 924 |   }
 925 |   convolution_param {
 926 |     num_output: 6
 927 |     pad: 1
 928 |     kernel_size: 3
 929 |     stride: 1
 930 |     weight_filler {
 931 |       type: "xavier"
 932 |     }
 933 |     bias_term: false
 934 |   }
 935 | }
 936 | layer {
 937 |   name: "conv6_3_norm_mbox_conf_perm"
 938 |   type: "Permute"
 939 |   bottom: "conv6_3_norm_mbox_conf"
 940 |   top: "conv6_3_norm_mbox_conf_perm"
 941 |   permute_param {
 942 |     order: 0
 943 |     order: 2
 944 |     order: 3
 945 |     order: 1
 946 |   }
 947 | }
 948 | layer {
 949 |   name: "conv6_3_norm_mbox_conf_flat"
 950 |   type: "Flatten"
 951 |   bottom: "conv6_3_norm_mbox_conf_perm"
 952 |   top: "conv6_3_norm_mbox_conf_flat"
 953 |   flatten_param {
 954 |     axis: 1
 955 |   }
 956 | }
 957 | layer {
 958 | 
 959 |   name: "conv6_3_norm_mbox_priorbox"
 960 |   type: "PriorBox"
 961 |   bottom: "conv6_3_norm"
 962 |   bottom: "data"
 963 |   top: "conv6_3_norm_mbox_priorbox"
 964 |   prior_box_param {
 965 |     min_size: 128.0
 966 |     min_size: 192.0
 967 |     min_size: 256.0
 968 |     clip: false
 969 |     variance: 0.10000000149
 970 |     variance: 0.10000000149
 971 |     variance: 0.20000000298
 972 |     variance: 0.20000000298
 973 |     step: 64.0
 974 |     offset: 0.5
 975 |   }
 976 | }
 977 | ########################################################
 978 | layer {
 979 |   name: "mbox_loc"
 980 |   type: "Concat"
 981 |   bottom: "conv3_3_norm_mbox_loc_flat"
 982 |   bottom: "conv4_3_norm_mbox_loc_flat"
 983 |   bottom: "conv5_3_norm_mbox_loc_flat"
 984 |   bottom: "conv6_3_norm_mbox_loc_flat"
 985 |   top: "mbox_loc"
 986 |   concat_param {
 987 |     axis: 1
 988 |   }
 989 | }
 990 | layer {
 991 |   name: "mbox_conf"
 992 |   type: "Concat"
 993 |   bottom: "conv3_3_norm_mbox_conf_flat"
 994 |   bottom: "conv4_3_norm_mbox_conf_flat"
 995 |   bottom: "conv5_3_norm_mbox_conf_flat"
 996 |   bottom: "conv6_3_norm_mbox_conf_flat"
 997 |   top: "mbox_conf"
 998 |   concat_param {
 999 |     axis: 1
1000 |   }
1001 | }
1002 | layer {
1003 |   name: "mbox_priorbox"
1004 |   type: "Concat"
1005 |   bottom: "conv3_3_norm_mbox_priorbox"
1006 |   bottom: "conv4_3_norm_mbox_priorbox"
1007 |   bottom: "conv5_3_norm_mbox_priorbox"
1008 |   bottom: "conv6_3_norm_mbox_priorbox"
1009 |   top: "mbox_priorbox"
1010 |   concat_param {
1011 |     axis: 2
1012 |   }
1013 | }
1014 | 
1015 | #####################################################
1016 | layer {
1017 |   name: "mbox_conf_reshape"
1018 |   type: "Reshape"
1019 |   bottom: "mbox_conf"
1020 |   top: "mbox_conf_reshape"
1021 |   reshape_param {
1022 |     shape {
1023 |       dim: 0
1024 |       dim: -1
1025 |       dim: 2
1026 |     }
1027 |   }
1028 | }
1029 | layer {
1030 |   name: "mbox_conf_softmax"
1031 |   type: "Softmax"
1032 |   bottom: "mbox_conf_reshape"
1033 |   top: "mbox_conf_softmax"
1034 |   softmax_param {
1035 |     axis: 2
1036 |   }
1037 | }
1038 | layer {
1039 |   name: "mbox_conf_flatten"
1040 |   type: "Flatten"
1041 |   bottom: "mbox_conf_softmax"
1042 |   top: "mbox_conf_flatten"
1043 |   flatten_param {
1044 |     axis: 1
1045 |   }
1046 | }
1047 | 
1048 | layer {
1049 |   name: "detection_out"
1050 |   type: "DetectionOutput"
1051 |   bottom: "mbox_loc"
1052 |   bottom: "mbox_conf_flatten"
1053 |   bottom: "mbox_priorbox"
1054 | #  bottom: "data"
1055 |   top: "detection_out"
1056 |   include {
1057 |     phase: TEST
1058 |   }
1059 |   transform_param {
1060 |     mean_value: 103.94
1061 |     mean_value: 116.78
1062 |     mean_value: 123.68
1063 |   }
1064 |   detection_output_param {
1065 |     num_classes: 2
1066 |     share_location: true
1067 |     background_label_id: 0
1068 |     nms_param {
1069 |       nms_threshold: 0.15
1070 |       top_k: 100
1071 |     }
1072 |     code_type: CENTER_SIZE
1073 |     keep_top_k: 50
1074 |     confidence_threshold: 0.01
1075 |     visualize: false
1076 |     visualize_threshold: 0.3
1077 |   }
1078 | }
1079 | 
1080 | 
1081 | 


--------------------------------------------------------------------------------
/libfacedetection/models/caffe/yufacedetectnet-open-v1.solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "./yufacedetectnet-open-v1.train.prototxt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "poly"
 5 | power: 2
 6 | 
 7 | display: 10
 8 | max_iter: 200000
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | snapshot: 1000
12 | snapshot_prefix: "./models/yufacedetectnet-open-v1"
13 | solver_mode: GPU
14 | debug_info: false
15 | snapshot_after_train: true
16 | average_loss: 10
17 | type: "SGD"
18 | store_blobs_in_old_format: true
19 | 


--------------------------------------------------------------------------------
/libfacedetection/models/caffe/yufacedetectnet-open-v1.train.prototxt:
--------------------------------------------------------------------------------
   1 | name: "YuFaceDetectNet"
   2 | 
   3 | layer {
   4 |   name: "data"
   5 |   type: "AnnotatedData"
   6 |   top: "data"
   7 |   top: "label"
   8 |   include {
   9 |     phase: TRAIN
  10 |   }
  11 |   transform_param {
  12 |     mirror: true
  13 |     mean_value: 104.0
  14 |     mean_value: 117.0
  15 |     mean_value: 123.0
  16 |     resize_param {
  17 |       prob: 1.0
  18 |       resize_mode: WARP
  19 |       height: 320
  20 |       width: 320
  21 |       interp_mode: LINEAR
  22 |       interp_mode: AREA
  23 |       interp_mode: NEAREST
  24 |       interp_mode: CUBIC
  25 |       interp_mode: LANCZOS4
  26 |     }
  27 |     emit_constraint {
  28 |       emit_type: CENTER
  29 |     }
  30 |     distort_param {
  31 |       brightness_prob: 0.5
  32 |       brightness_delta: 32.0
  33 |       contrast_prob: 0.5
  34 |       contrast_lower: 0.5
  35 |       contrast_upper: 1.5
  36 |       hue_prob: 0.5
  37 |       hue_delta: 18.0
  38 |       saturation_prob: 0.5
  39 |       saturation_lower: 0.5
  40 |       saturation_upper: 1.5
  41 |       random_order_prob: 0.0
  42 |     }
  43 |   }
  44 |   data_param {
  45 |     source: "../FACE/lmdb/FACE_trainval_lmdb/"
  46 |     batch_size: 16
  47 |     backend: LMDB
  48 |   }
  49 |   annotated_data_param {
  50 |     batch_sampler {
  51 |       sampler {
  52 |         min_scale: 1.0
  53 |         max_scale: 1.0
  54 |         min_aspect_ratio: 1.0
  55 |         max_aspect_ratio: 1.0
  56 |       }
  57 |       sample_constraint {
  58 |         min_object_coverage: 1.0
  59 |       }
  60 |       max_sample: 1
  61 |       max_trials: 50
  62 |     }
  63 |     batch_sampler {
  64 |       sampler {
  65 |         min_scale: 0.300000011921
  66 |         max_scale: 1.0
  67 |         min_aspect_ratio: 1.0
  68 |         max_aspect_ratio: 1.0
  69 |       }
  70 |       sample_constraint {
  71 |         min_object_coverage: 1.0
  72 |       }
  73 |       max_sample: 1
  74 |       max_trials: 50
  75 |     }
  76 |     batch_sampler {
  77 |       sampler {
  78 |         min_scale: 0.300000011921
  79 |         max_scale: 1.0
  80 |         min_aspect_ratio: 1.0
  81 |         max_aspect_ratio: 1.0
  82 |       }
  83 |       sample_constraint {
  84 |         min_object_coverage: 1.0
  85 |       }
  86 |       max_sample: 1
  87 |       max_trials: 50
  88 |     }
  89 |     batch_sampler {
  90 |       sampler {
  91 |         min_scale: 0.300000011921
  92 |         max_scale: 1.0
  93 |         min_aspect_ratio: 1.0
  94 |         max_aspect_ratio: 1.0
  95 |       }
  96 |       sample_constraint {
  97 |         min_object_coverage: 1.0
  98 |       }
  99 |       max_sample: 1
 100 |       max_trials: 50
 101 |     }
 102 |     batch_sampler {
 103 |       sampler {
 104 |         min_scale: 0.300000011921
 105 |         max_scale: 1.0
 106 |         min_aspect_ratio: 1.0
 107 |         max_aspect_ratio: 1.0
 108 |       }
 109 |       sample_constraint {
 110 |         min_object_coverage: 1.0
 111 |       }
 112 |       max_sample: 1
 113 |       max_trials: 50
 114 |     }
 115 |     label_map_file: "../labelmap_face.prototxt"
 116 |   }
 117 | }
 118 | #CONV1###########################################################
 119 | 
 120 | layer {
 121 |   name: "conv1_1"
 122 |   type: "Convolution"
 123 |   bottom: "data"
 124 |   top: "conv1_1"
 125 |   param {
 126 |     lr_mult: 1.0
 127 |     decay_mult: 1.0
 128 |   }
 129 |   convolution_param {
 130 |     num_output: 16
 131 |     pad: 1
 132 | 	stride: 2
 133 |     kernel_size: 3
 134 |     weight_filler {
 135 |       type: "xavier"
 136 |     }
 137 |     bias_term: false
 138 |   }
 139 | }
 140 | 
 141 | layer {
 142 |   name: "relu1_1"
 143 |   type: "ReLU"
 144 |   bottom: "conv1_1"
 145 |   top: "conv1_1"
 146 | }
 147 | layer {
 148 |   name: "conv1_2"
 149 |   type: "Convolution"
 150 |   bottom: "conv1_1"
 151 |   top: "conv1_2"
 152 |   param {
 153 |     lr_mult: 1.0
 154 |     decay_mult: 1.0
 155 |   }
 156 |   convolution_param {
 157 |     num_output: 16
 158 |     pad: 0
 159 |     kernel_size: 1
 160 |     weight_filler {
 161 |       type: "xavier"
 162 |     }
 163 |     bias_term: false
 164 |   }
 165 | }
 166 | 
 167 | 
 168 | layer {
 169 |   name: "relu1_2"
 170 |   type: "ReLU"
 171 |   bottom: "conv1_2"
 172 |   top: "conv1_2"
 173 | }
 174 | #CONV2##########################################################
 175 | layer {
 176 |   name: "pool1"
 177 |   type: "Pooling"
 178 |   bottom: "conv1_2"
 179 |   top: "pool1"
 180 |   pooling_param {
 181 |     pool: MAX
 182 |     kernel_size: 2
 183 |     stride: 2
 184 |   }
 185 | }
 186 | layer {
 187 |   name: "conv2_1"
 188 |   type: "Convolution"
 189 |   bottom: "pool1"
 190 |   top: "conv2_1"
 191 |   param {
 192 |     lr_mult: 1.0
 193 |     decay_mult: 1.0
 194 |   }
 195 |   convolution_param {
 196 |     num_output: 16
 197 |     pad: 1
 198 |     kernel_size: 3
 199 |     weight_filler {
 200 |       type: "xavier"
 201 |     }
 202 |     bias_term: false
 203 |   }
 204 | }
 205 | 
 206 | 
 207 | layer {
 208 |   name: "relu2_1"
 209 |   type: "ReLU"
 210 |   bottom: "conv2_1"
 211 |   top: "conv2_1"
 212 | }
 213 | layer {
 214 |   name: "conv2_2"
 215 |   type: "Convolution"
 216 |   bottom: "conv2_1"
 217 |   top: "conv2_2"
 218 |   param {
 219 |     lr_mult: 1.0
 220 |     decay_mult: 1.0
 221 |   }
 222 |   convolution_param {
 223 |     num_output: 16
 224 |     pad: 0
 225 |     kernel_size: 1
 226 |     weight_filler {
 227 |       type: "xavier"
 228 |     }
 229 |     bias_term: false
 230 |   }
 231 | }
 232 | 
 233 | layer {
 234 |   name: "relu2_2"
 235 |   type: "ReLU"
 236 |   bottom: "conv2_2"
 237 |   top: "conv2_2"
 238 | }
 239 | #CONV3##########################################################
 240 | 
 241 | layer {
 242 |   name: "pool2"
 243 |   type: "Pooling"
 244 |   bottom: "conv2_2"
 245 |   top: "pool2"
 246 |   pooling_param {
 247 |     pool: MAX
 248 |     kernel_size: 2
 249 |     stride: 2
 250 |   }
 251 | }
 252 | layer {
 253 |   name: "conv3_1"
 254 |   type: "Convolution"
 255 |   bottom: "pool2"
 256 |   top: "conv3_1"
 257 |   param {
 258 |     lr_mult: 1.0
 259 |     decay_mult: 1.0
 260 |   }
 261 |   convolution_param {
 262 |     num_output: 32
 263 |     pad: 1
 264 |     kernel_size: 3
 265 |     weight_filler {
 266 |       type: "xavier"
 267 |     }
 268 |     bias_term: false
 269 |   }
 270 | }
 271 | 
 272 | layer {
 273 |   name: "relu3_1"
 274 |   type: "ReLU"
 275 |   bottom: "conv3_1"
 276 |   top: "conv3_1"
 277 | }
 278 | layer {
 279 |   name: "conv3_2"
 280 |   type: "Convolution"
 281 |   bottom: "conv3_1"
 282 |   top: "conv3_2"
 283 |   param {
 284 |     lr_mult: 1.0
 285 |     decay_mult: 1.0
 286 |   }
 287 |   convolution_param {
 288 |     num_output: 32
 289 |     pad: 0
 290 |     kernel_size: 1
 291 |     weight_filler {
 292 |       type: "xavier"
 293 |     }
 294 |     bias_term: false
 295 |   }
 296 | }
 297 | 
 298 | 
 299 | layer {
 300 |   name: "relu3_2"
 301 |   type: "ReLU"
 302 |   bottom: "conv3_2"
 303 |   top: "conv3_2"
 304 | }
 305 | layer {
 306 |   name: "conv3_3"
 307 |   type: "Convolution"
 308 |   bottom: "conv3_2"
 309 |   top: "conv3_3"
 310 |   param {
 311 |     lr_mult: 1.0
 312 |     decay_mult: 1.0
 313 |   }
 314 |   convolution_param {
 315 |     num_output: 32
 316 |     pad: 1
 317 |     kernel_size: 3
 318 |     weight_filler {
 319 |       type: "xavier"
 320 |     }
 321 |     bias_term: false
 322 |   }
 323 | }
 324 | 
 325 | layer {
 326 |   name: "relu3_3"
 327 |   type: "ReLU"
 328 |   bottom: "conv3_3"
 329 |   top: "conv3_3"
 330 | }
 331 | 
 332 | 
 333 | #CONV4##########################################################
 334 | 
 335 | layer {
 336 |   name: "pool3"
 337 |   type: "Pooling"
 338 |   bottom: "conv3_3"
 339 |   top: "pool3"
 340 |   pooling_param {
 341 |     pool: MAX
 342 |     kernel_size: 2
 343 |     stride: 2
 344 |   }
 345 | }
 346 | layer {
 347 |   name: "conv4_1"
 348 |   type: "Convolution"
 349 |   bottom: "pool3"
 350 |   top: "conv4_1"
 351 |   param {
 352 |     lr_mult: 1.0
 353 |     decay_mult: 1.0
 354 |   }
 355 |   convolution_param {
 356 |     num_output: 64
 357 |     pad: 1
 358 |     kernel_size: 3
 359 |     weight_filler {
 360 |       type: "xavier"
 361 |     }
 362 |     bias_term: false
 363 |   }
 364 | }
 365 | 
 366 | layer {
 367 |   name: "relu4_1"
 368 |   type: "ReLU"
 369 |   bottom: "conv4_1"
 370 |   top: "conv4_1"
 371 | }
 372 | layer {
 373 |   name: "conv4_2"
 374 |   type: "Convolution"
 375 |   bottom: "conv4_1"
 376 |   top: "conv4_2"
 377 |   param {
 378 |     lr_mult: 1.0
 379 |     decay_mult: 1.0
 380 |   }
 381 |   convolution_param {
 382 |     num_output: 64
 383 |     pad: 0
 384 |     kernel_size: 1
 385 |     weight_filler {
 386 |       type: "xavier"
 387 |     }
 388 |     bias_term: false
 389 |   }
 390 | }
 391 | 
 392 | layer {
 393 |   name: "relu4_2"
 394 |   type: "ReLU"
 395 |   bottom: "conv4_2"
 396 |   top: "conv4_2"
 397 | }
 398 | layer {
 399 |   name: "conv4_3"
 400 |   type: "Convolution"
 401 |   bottom: "conv4_2"
 402 |   top: "conv4_3"
 403 |   param {
 404 |     lr_mult: 1.0
 405 |     decay_mult: 1.0
 406 |   }
 407 |   convolution_param {
 408 |     num_output: 64
 409 |     pad: 1
 410 |     kernel_size: 3
 411 |     weight_filler {
 412 |       type: "xavier"
 413 |     }
 414 |     bias_term: false
 415 |   }
 416 | }
 417 | 
 418 | layer {
 419 |   name: "relu4_3"
 420 |   type: "ReLU"
 421 |   bottom: "conv4_3"
 422 |   top: "conv4_3"
 423 | }
 424 | 
 425 | #CONV5##########################################################
 426 | 
 427 | layer {
 428 |   name: "pool4"
 429 |   type: "Pooling"
 430 |   bottom: "conv4_3"
 431 |   top: "pool4"
 432 |   pooling_param {
 433 |     pool: MAX
 434 |     kernel_size: 2
 435 |     stride: 2
 436 |   }
 437 | }
 438 | layer {
 439 |   name: "conv5_1"
 440 |   type: "Convolution"
 441 |   bottom: "pool4"
 442 |   top: "conv5_1"
 443 |   param {
 444 |     lr_mult: 1.0
 445 |     decay_mult: 1.0
 446 |   }
 447 |   convolution_param {
 448 |     num_output: 128
 449 |     pad: 1
 450 |     kernel_size: 3
 451 |     weight_filler {
 452 |       type: "xavier"
 453 |     }
 454 |     bias_term: false
 455 |     dilation: 1
 456 |   }
 457 | }
 458 | 
 459 | layer {
 460 |   name: "relu5_1"
 461 |   type: "ReLU"
 462 |   bottom: "conv5_1"
 463 |   top: "conv5_1"
 464 | }
 465 | layer {
 466 |   name: "conv5_2"
 467 |   type: "Convolution"
 468 |   bottom: "conv5_1"
 469 |   top: "conv5_2"
 470 |   param {
 471 |     lr_mult: 1.0
 472 |     decay_mult: 1.0
 473 |   }
 474 |   convolution_param {
 475 |     num_output: 128
 476 |     pad: 0
 477 |     kernel_size: 1
 478 |     weight_filler {
 479 |       type: "xavier"
 480 |     }
 481 |     bias_term: false
 482 |     dilation: 1
 483 |   }
 484 | }
 485 | 
 486 | layer {
 487 |   name: "relu5_2"
 488 |   type: "ReLU"
 489 |   bottom: "conv5_2"
 490 |   top: "conv5_2"
 491 | }
 492 | layer {
 493 |   name: "conv5_3"
 494 |   type: "Convolution"
 495 |   bottom: "conv5_2"
 496 |   top: "conv5_3"
 497 |   param {
 498 |     lr_mult: 1.0
 499 |     decay_mult: 1.0
 500 |   }
 501 |   convolution_param {
 502 |     num_output: 128
 503 |     pad: 1
 504 |     kernel_size: 3
 505 |     weight_filler {
 506 |       type: "xavier"
 507 |     }
 508 |     bias_term: false
 509 |     dilation: 1
 510 |   }
 511 | }
 512 | 
 513 | layer {
 514 |   name: "relu5_3"
 515 |   type: "ReLU"
 516 |   bottom: "conv5_3"
 517 |   top: "conv5_3"
 518 | }
 519 | 
 520 | #CONV6##########################################################
 521 | 
 522 | layer {
 523 |   name: "pool5"
 524 |   type: "Pooling"
 525 |   bottom: "conv5_3"
 526 |   top: "pool5"
 527 |   pooling_param {
 528 |     pool: MAX
 529 |     kernel_size: 2
 530 |     stride: 2
 531 |   }
 532 | }
 533 | 
 534 | layer {
 535 |   name: "conv6_1"
 536 |   type: "Convolution"
 537 |   bottom: "pool5"
 538 |   top: "conv6_1"
 539 |   param {
 540 |     lr_mult: 1.0
 541 |     decay_mult: 1.0
 542 |   }
 543 |   convolution_param {
 544 |     num_output: 128
 545 |     pad: 1
 546 |     kernel_size: 3
 547 |     weight_filler {
 548 |       type: "xavier"
 549 |     }
 550 |     bias_term: false
 551 |     dilation: 1
 552 |   }
 553 | }
 554 | 
 555 | layer {
 556 |   name: "relu6_1"
 557 |   type: "ReLU"
 558 |   bottom: "conv6_1"
 559 |   top: "conv6_1"
 560 | }
 561 | layer {
 562 |   name: "conv6_2"
 563 |   type: "Convolution"
 564 |   bottom: "conv6_1"
 565 |   top: "conv6_2"
 566 |   param {
 567 |     lr_mult: 1.0
 568 |     decay_mult: 1.0
 569 |   }
 570 |   convolution_param {
 571 |     num_output: 128
 572 |     pad: 0
 573 |     kernel_size: 1
 574 |     weight_filler {
 575 |       type: "xavier"
 576 |     }
 577 |     bias_term: false
 578 |     dilation: 1
 579 |   }
 580 | }
 581 | 
 582 | layer {
 583 |   name: "relu6_2"
 584 |   type: "ReLU"
 585 |   bottom: "conv6_2"
 586 |   top: "conv6_2"
 587 | }
 588 | layer {
 589 |   name: "conv6_3"
 590 |   type: "Convolution"
 591 |   bottom: "conv6_2"
 592 |   top: "conv6_3"
 593 |   param {
 594 |     lr_mult: 1.0
 595 |     decay_mult: 1.0
 596 |   }
 597 |   convolution_param {
 598 |     num_output: 128
 599 |     pad: 1
 600 |     kernel_size: 3
 601 |     weight_filler {
 602 |       type: "xavier"
 603 |     }
 604 |     bias_term: false
 605 |     dilation: 1
 606 |   }
 607 | }
 608 | 
 609 | layer {
 610 |   name: "relu6_3"
 611 |   type: "ReLU"
 612 |   bottom: "conv6_3"
 613 |   top: "conv6_3"
 614 | }
 615 | 
 616 | #PRIORBOX3##########################################
 617 | layer {
 618 |   name: "conv3_3_norm"
 619 |   type: "Normalize"
 620 |   bottom: "conv3_3"
 621 |   top: "conv3_3_norm"
 622 |   norm_param {
 623 |     across_spatial: false
 624 |     scale_filler {
 625 |       type: "constant"
 626 |       value: 10.0
 627 |     }
 628 |     channel_shared: false
 629 |   }
 630 | }
 631 | layer {
 632 |   name: "conv3_3_norm_mbox_loc_new"
 633 |   type: "Convolution"
 634 |   bottom: "conv3_3_norm"
 635 |   top: "conv3_3_norm_mbox_loc_new"
 636 |   param {
 637 |     lr_mult: 1.0
 638 |     decay_mult: 1.0
 639 |   }
 640 |   convolution_param {
 641 |     num_output: 12
 642 |     pad: 1
 643 |     kernel_size: 3
 644 |     stride: 1
 645 |     weight_filler {
 646 |       type: "xavier"
 647 |     }
 648 |     bias_term: false
 649 |   }
 650 | }
 651 | layer {
 652 |   name: "conv3_3_norm_mbox_loc_perm"
 653 |   type: "Permute"
 654 |   bottom: "conv3_3_norm_mbox_loc_new"
 655 |   top: "conv3_3_norm_mbox_loc_perm"
 656 |   permute_param {
 657 |     order: 0
 658 |     order: 2
 659 |     order: 3
 660 |     order: 1
 661 |   }
 662 | }
 663 | layer {
 664 |   name: "conv3_3_norm_mbox_loc_flat"
 665 |   type: "Flatten"
 666 |   bottom: "conv3_3_norm_mbox_loc_perm"
 667 |   top: "conv3_3_norm_mbox_loc_flat"
 668 |   flatten_param {
 669 |     axis: 1
 670 |   }
 671 | }
 672 | layer {
 673 |   name: "conv3_3_norm_mbox_conf_new"
 674 |   type: "Convolution"
 675 |   bottom: "conv3_3_norm"
 676 |   top: "conv3_3_norm_mbox_conf_new"
 677 |   param {
 678 |     lr_mult: 1.0
 679 |     decay_mult: 1.0
 680 |   }
 681 |   convolution_param {
 682 |     num_output: 6
 683 |     pad: 1
 684 |     kernel_size: 3
 685 |     stride: 1
 686 |     weight_filler {
 687 |       type: "xavier"
 688 |     }
 689 |     bias_term: false
 690 |   }
 691 | }
 692 | layer {
 693 |   name: "conv3_3_norm_mbox_conf_perm"
 694 |   type: "Permute"
 695 |   bottom: "conv3_3_norm_mbox_conf_new"
 696 |   top: "conv3_3_norm_mbox_conf_perm"
 697 |   permute_param {
 698 |     order: 0
 699 |     order: 2
 700 |     order: 3
 701 |     order: 1
 702 |   }
 703 | }
 704 | layer {
 705 |   name: "conv3_3_norm_mbox_conf_flat"
 706 |   type: "Flatten"
 707 |   bottom: "conv3_3_norm_mbox_conf_perm"
 708 |   top: "conv3_3_norm_mbox_conf_flat"
 709 |   flatten_param {
 710 |     axis: 1
 711 |   }
 712 | }
 713 | layer {
 714 | 
 715 |   name: "conv3_3_norm_mbox_priorbox"
 716 |   type: "PriorBox"
 717 |   bottom: "conv3_3_norm"
 718 |   bottom: "data"
 719 |   top: "conv3_3_norm_mbox_priorbox"
 720 |   prior_box_param {
 721 |     min_size: 10.0
 722 |     min_size: 16.0
 723 |     min_size: 24.0
 724 |     clip: false
 725 |     variance: 0.10000000149
 726 |     variance: 0.10000000149
 727 |     variance: 0.20000000298
 728 |     variance: 0.20000000298
 729 |     step: 8.0
 730 |     offset: 0.5
 731 |   }
 732 | }
 733 | #PRIORBOX4##########################################
 734 | layer {
 735 |   name: "conv4_3_norm"
 736 |   type: "Normalize"
 737 |   bottom: "conv4_3"
 738 |   top: "conv4_3_norm"
 739 |   norm_param {
 740 |     across_spatial: false
 741 |     scale_filler {
 742 |       type: "constant"
 743 |       value: 8.0
 744 |     }
 745 |     channel_shared: false
 746 |   }
 747 | }
 748 | layer {
 749 |   name: "conv4_3_norm_mbox_loc"
 750 |   type: "Convolution"
 751 |   bottom: "conv4_3_norm"
 752 |   top: "conv4_3_norm_mbox_loc"
 753 |   param {
 754 |     lr_mult: 1.0
 755 |     decay_mult: 1.0
 756 |   }
 757 |   convolution_param {
 758 |     num_output: 8
 759 |     pad: 1
 760 |     kernel_size: 3
 761 |     stride: 1
 762 |     weight_filler {
 763 |       type: "xavier"
 764 |     }
 765 |     bias_term: false
 766 |   }
 767 | }
 768 | layer {
 769 |   name: "conv4_3_norm_mbox_loc_perm"
 770 |   type: "Permute"
 771 |   bottom: "conv4_3_norm_mbox_loc"
 772 |   top: "conv4_3_norm_mbox_loc_perm"
 773 |   permute_param {
 774 |     order: 0
 775 |     order: 2
 776 |     order: 3
 777 |     order: 1
 778 |   }
 779 | }
 780 | layer {
 781 |   name: "conv4_3_norm_mbox_loc_flat"
 782 |   type: "Flatten"
 783 |   bottom: "conv4_3_norm_mbox_loc_perm"
 784 |   top: "conv4_3_norm_mbox_loc_flat"
 785 |   flatten_param {
 786 |     axis: 1
 787 |   }
 788 | }
 789 | layer {
 790 |   name: "conv4_3_norm_mbox_conf"
 791 |   type: "Convolution"
 792 |   bottom: "conv4_3_norm"
 793 |   top: "conv4_3_norm_mbox_conf"
 794 |   param {
 795 |     lr_mult: 1.0
 796 |     decay_mult: 1.0
 797 |   }
 798 |   convolution_param {
 799 |     num_output: 4
 800 |     pad: 1
 801 |     kernel_size: 3
 802 |     stride: 1
 803 |     weight_filler {
 804 |       type: "xavier"
 805 |     }
 806 |     bias_term: false
 807 |   }
 808 | }
 809 | layer {
 810 |   name: "conv4_3_norm_mbox_conf_perm"
 811 |   type: "Permute"
 812 |   bottom: "conv4_3_norm_mbox_conf"
 813 |   top: "conv4_3_norm_mbox_conf_perm"
 814 |   permute_param {
 815 |     order: 0
 816 |     order: 2
 817 |     order: 3
 818 |     order: 1
 819 |   }
 820 | }
 821 | layer {
 822 |   name: "conv4_3_norm_mbox_conf_flat"
 823 |   type: "Flatten"
 824 |   bottom: "conv4_3_norm_mbox_conf_perm"
 825 |   top: "conv4_3_norm_mbox_conf_flat"
 826 |   flatten_param {
 827 |     axis: 1
 828 |   }
 829 | }
 830 | layer {
 831 | 
 832 |   name: "conv4_3_norm_mbox_priorbox"
 833 |   type: "PriorBox"
 834 |   bottom: "conv4_3_norm"
 835 |   bottom: "data"
 836 |   top: "conv4_3_norm_mbox_priorbox"
 837 |   prior_box_param {
 838 |     min_size: 32.0
 839 |     min_size: 48.0
 840 |     clip: false
 841 |     variance: 0.10000000149
 842 |     variance: 0.10000000149
 843 |     variance: 0.20000000298
 844 |     variance: 0.20000000298
 845 |     step: 16.0
 846 |     offset: 0.5
 847 |   }
 848 | }
 849 | #PRIORBOX5##########################################
 850 | layer {
 851 |   name: "conv5_3_norm"
 852 |   type: "Normalize"
 853 |   bottom: "conv5_3"
 854 |   top: "conv5_3_norm"
 855 |   norm_param {
 856 |     across_spatial: false
 857 |     scale_filler {
 858 |       type: "constant"
 859 |       value: 5.0
 860 |     }
 861 |     channel_shared: false
 862 |   }
 863 | }
 864 | layer {
 865 |   name: "conv5_3_norm_mbox_loc"
 866 |   type: "Convolution"
 867 |   bottom: "conv5_3_norm"
 868 |   top: "conv5_3_norm_mbox_loc"
 869 |   param {
 870 |     lr_mult: 1.0
 871 |     decay_mult: 1.0
 872 |   }
 873 |   convolution_param {
 874 |     num_output: 8
 875 |     pad: 1
 876 |     kernel_size: 3
 877 |     stride: 1
 878 |     weight_filler {
 879 |       type: "xavier"
 880 |     }
 881 |     bias_term: false
 882 |   }
 883 | }
 884 | layer {
 885 |   name: "conv5_3_norm_mbox_loc_perm"
 886 |   type: "Permute"
 887 |   bottom: "conv5_3_norm_mbox_loc"
 888 |   top: "conv5_3_norm_mbox_loc_perm"
 889 |   permute_param {
 890 |     order: 0
 891 |     order: 2
 892 |     order: 3
 893 |     order: 1
 894 |   }
 895 | }
 896 | layer {
 897 |   name: "conv5_3_norm_mbox_loc_flat"
 898 |   type: "Flatten"
 899 |   bottom: "conv5_3_norm_mbox_loc_perm"
 900 |   top: "conv5_3_norm_mbox_loc_flat"
 901 |   flatten_param {
 902 |     axis: 1
 903 |   }
 904 | }
 905 | layer {
 906 |   name: "conv5_3_norm_mbox_conf"
 907 |   type: "Convolution"
 908 |   bottom: "conv5_3_norm"
 909 |   top: "conv5_3_norm_mbox_conf"
 910 |   param {
 911 |     lr_mult: 1.0
 912 |     decay_mult: 1.0
 913 |   }
 914 |   convolution_param {
 915 |     num_output: 4
 916 |     pad: 1
 917 |     kernel_size: 3
 918 |     stride: 1
 919 |     weight_filler {
 920 |       type: "xavier"
 921 |     }
 922 |     bias_term: false
 923 |   }
 924 | }
 925 | layer {
 926 |   name: "conv5_3_norm_mbox_conf_perm"
 927 |   type: "Permute"
 928 |   bottom: "conv5_3_norm_mbox_conf"
 929 |   top: "conv5_3_norm_mbox_conf_perm"
 930 |   permute_param {
 931 |     order: 0
 932 |     order: 2
 933 |     order: 3
 934 |     order: 1
 935 |   }
 936 | }
 937 | layer {
 938 |   name: "conv5_3_norm_mbox_conf_flat"
 939 |   type: "Flatten"
 940 |   bottom: "conv5_3_norm_mbox_conf_perm"
 941 |   top: "conv5_3_norm_mbox_conf_flat"
 942 |   flatten_param {
 943 |     axis: 1
 944 |   }
 945 | }
 946 | layer {
 947 | 
 948 |   name: "conv5_3_norm_mbox_priorbox"
 949 |   type: "PriorBox"
 950 |   bottom: "conv5_3_norm"
 951 |   bottom: "data"
 952 |   top: "conv5_3_norm_mbox_priorbox"
 953 |   prior_box_param {
 954 |     min_size: 64.0
 955 |     min_size: 96.0
 956 |     clip: false
 957 |     variance: 0.10000000149
 958 |     variance: 0.10000000149
 959 |     variance: 0.20000000298
 960 |     variance: 0.20000000298
 961 |     step: 32.0
 962 |     offset: 0.5
 963 |   }
 964 | }
 965 | 
 966 | #PRIORBOX6##########################################
 967 | layer {
 968 |   name: "conv6_3_norm"
 969 |   type: "Normalize"
 970 |   bottom: "conv6_3"
 971 |   top: "conv6_3_norm"
 972 |   norm_param {
 973 |     across_spatial: false
 974 |     scale_filler {
 975 |       type: "constant"
 976 |       value: 5.0
 977 |     }
 978 |     channel_shared: false
 979 |   }
 980 | }
 981 | layer {
 982 |   name: "conv6_3_norm_mbox_loc"
 983 |   type: "Convolution"
 984 |   bottom: "conv6_3_norm"
 985 |   top: "conv6_3_norm_mbox_loc"
 986 |   param {
 987 |     lr_mult: 1.0
 988 |     decay_mult: 1.0
 989 |   }
 990 |   convolution_param {
 991 |     num_output: 12
 992 |     pad: 1
 993 |     kernel_size: 3
 994 |     stride: 1
 995 |     weight_filler {
 996 |       type: "xavier"
 997 |     }
 998 |     bias_term: false
 999 |   }
1000 | }
1001 | layer {
1002 |   name: "conv6_3_norm_mbox_loc_perm"
1003 |   type: "Permute"
1004 |   bottom: "conv6_3_norm_mbox_loc"
1005 |   top: "conv6_3_norm_mbox_loc_perm"
1006 |   permute_param {
1007 |     order: 0
1008 |     order: 2
1009 |     order: 3
1010 |     order: 1
1011 |   }
1012 | }
1013 | layer {
1014 |   name: "conv6_3_norm_mbox_loc_flat"
1015 |   type: "Flatten"
1016 |   bottom: "conv6_3_norm_mbox_loc_perm"
1017 |   top: "conv6_3_norm_mbox_loc_flat"
1018 |   flatten_param {
1019 |     axis: 1
1020 |   }
1021 | }
1022 | layer {
1023 |   name: "conv6_3_norm_mbox_conf"
1024 |   type: "Convolution"
1025 |   bottom: "conv6_3_norm"
1026 |   top: "conv6_3_norm_mbox_conf"
1027 |   param {
1028 |     lr_mult: 1.0
1029 |     decay_mult: 1.0
1030 |   }
1031 |   convolution_param {
1032 |     num_output: 6
1033 |     pad: 1
1034 |     kernel_size: 3
1035 |     stride: 1
1036 |     weight_filler {
1037 |       type: "xavier"
1038 |     }
1039 |     bias_term: false
1040 |   }
1041 | }
1042 | layer {
1043 |   name: "conv6_3_norm_mbox_conf_perm"
1044 |   type: "Permute"
1045 |   bottom: "conv6_3_norm_mbox_conf"
1046 |   top: "conv6_3_norm_mbox_conf_perm"
1047 |   permute_param {
1048 |     order: 0
1049 |     order: 2
1050 |     order: 3
1051 |     order: 1
1052 |   }
1053 | }
1054 | layer {
1055 |   name: "conv6_3_norm_mbox_conf_flat"
1056 |   type: "Flatten"
1057 |   bottom: "conv6_3_norm_mbox_conf_perm"
1058 |   top: "conv6_3_norm_mbox_conf_flat"
1059 |   flatten_param {
1060 |     axis: 1
1061 |   }
1062 | }
1063 | layer {
1064 | 
1065 |   name: "conv6_3_norm_mbox_priorbox"
1066 |   type: "PriorBox"
1067 |   bottom: "conv6_3_norm"
1068 |   bottom: "data"
1069 |   top: "conv6_3_norm_mbox_priorbox"
1070 |   prior_box_param {
1071 |     min_size: 128.0
1072 |     min_size: 192.0
1073 |     min_size: 256.0
1074 |     clip: false
1075 |     variance: 0.10000000149
1076 |     variance: 0.10000000149
1077 |     variance: 0.20000000298
1078 |     variance: 0.20000000298
1079 |     step: 64.0
1080 |     offset: 0.5
1081 |   }
1082 | }
1083 | ########################################################
1084 | layer {
1085 |   name: "mbox_loc"
1086 |   type: "Concat"
1087 |   bottom: "conv3_3_norm_mbox_loc_flat"
1088 |   bottom: "conv4_3_norm_mbox_loc_flat"
1089 |   bottom: "conv5_3_norm_mbox_loc_flat"
1090 |   bottom: "conv6_3_norm_mbox_loc_flat"
1091 |   top: "mbox_loc"
1092 |   concat_param {
1093 |     axis: 1
1094 |   }
1095 | }
1096 | layer {
1097 |   name: "mbox_conf"
1098 |   type: "Concat"
1099 |   bottom: "conv3_3_norm_mbox_conf_flat"
1100 |   bottom: "conv4_3_norm_mbox_conf_flat"
1101 |   bottom: "conv5_3_norm_mbox_conf_flat"
1102 |   bottom: "conv6_3_norm_mbox_conf_flat"
1103 |   top: "mbox_conf"
1104 |   concat_param {
1105 |     axis: 1
1106 |   }
1107 | }
1108 | layer {
1109 |   name: "mbox_priorbox"
1110 |   type: "Concat"
1111 |   bottom: "conv3_3_norm_mbox_priorbox"
1112 |   bottom: "conv4_3_norm_mbox_priorbox"
1113 |   bottom: "conv5_3_norm_mbox_priorbox"
1114 |   bottom: "conv6_3_norm_mbox_priorbox"
1115 |   top: "mbox_priorbox"
1116 |   concat_param {
1117 |     axis: 2
1118 |   }
1119 | }
1120 | 
1121 | #####################################################
1122 | layer {
1123 | 
1124 |   name: "mbox_loss"
1125 |   type: "MultiBoxLoss"
1126 |   bottom: "mbox_loc"
1127 |   bottom: "mbox_conf"
1128 |   bottom: "mbox_priorbox"
1129 |   bottom: "label"
1130 |   top: "mbox_loss"
1131 |   include {
1132 |     phase: TRAIN
1133 |   }
1134 |   propagate_down: true
1135 |   propagate_down: true
1136 |   propagate_down: false
1137 |   propagate_down: false
1138 |   loss_param {
1139 |     normalization: VALID
1140 |   }
1141 |   multibox_loss_param {
1142 |     loc_loss_type: L2
1143 |     conf_loss_type: SOFTMAX
1144 |     loc_weight: 1.0
1145 |     num_classes: 2
1146 |     share_location: true
1147 |     match_type: PER_PREDICTION
1148 |     overlap_threshold: 0.34999999404
1149 |     use_prior_for_matching: true
1150 |     background_label_id: 0
1151 |     use_difficult_gt: true
1152 |     neg_pos_ratio: 3.0
1153 |     neg_overlap: 0.34999999404
1154 |     code_type: CENTER_SIZE
1155 |     ignore_cross_boundary_bbox: false
1156 |     mining_type: MAX_NEGATIVE
1157 |   }
1158 | }
1159 | 
1160 | 


--------------------------------------------------------------------------------
/libfacedetection/models/openvino/yufacedetectnet-open-v1-320x240.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chai2010/libfacedetection-go/80e89f84b0b18fa87abb1de4b48795e9a5505c15/libfacedetection/models/openvino/yufacedetectnet-open-v1-320x240.bin


--------------------------------------------------------------------------------
/libfacedetection/src/facedetectcnn-model.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | By downloading, copying, installing or using the software you agree to this license.
  3 | If you do not agree to this license, do not download, install,
  4 | copy or use the software.
  5 | 
  6 | 
  7 |                   License Agreement For libfacedetection
  8 |                      (3-clause BSD License)
  9 | 
 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved.
 11 | shiqi.yu@gmail.com
 12 | 
 13 | Redistribution and use in source and binary forms, with or without modification,
 14 | are permitted provided that the following conditions are met:
 15 | 
 16 |   * Redistributions of source code must retain the above copyright notice,
 17 |     this list of conditions and the following disclaimer.
 18 | 
 19 |   * Redistributions in binary form must reproduce the above copyright notice,
 20 |     this list of conditions and the following disclaimer in the documentation
 21 |     and/or other materials provided with the distribution.
 22 | 
 23 |   * Neither the names of the copyright holders nor the names of the contributors
 24 |     may be used to endorse or promote products derived from this software
 25 |     without specific prior written permission.
 26 | 
 27 | This software is provided by the copyright holders and contributors "as is" and
 28 | any express or implied warranties, including, but not limited to, the implied
 29 | warranties of merchantability and fitness for a particular purpose are disclaimed.
 30 | In no event shall copyright holders or contributors be liable for any direct,
 31 | indirect, incidental, special, exemplary, or consequential damages
 32 | (including, but not limited to, procurement of substitute goods or services;
 33 | loss of use, data, or profits; or business interruption) however caused
 34 | and on any theory of liability, whether in contract, strict liability,
 35 | or tort (including negligence or otherwise) arising in any way out of
 36 | the use of this software, even if advised of the possibility of such damage.
 37 | */
 38 | 
 39 | 
 40 | #include "facedetectcnn.h"
 41 | #include <stdio.h>
 42 | #include <string.h>
 43 | 
 44 | #if 0
 45 | #include <opencv2/opencv.hpp>
 46 | #define TIME_START t=(double)cvGetTickCount();
 47 | #define TIME_END(FUNCNAME)     t=((double)cvGetTickCount()-t)/((double)cvGetTickFrequency()*1000.); printf(FUNCNAME);printf("=%g\n", t); total+=t;
 48 | #define TIME_TOTAL(tt) printf("total time=%g\n", (tt));
 49 | //#define TIME_END(FUNCNAME)     t=((double)cvGetTickCount()-t)/((double)cvGetTickFrequency()*1000.); total+=t;
 50 | #else
 51 | #define TIME_START
 52 | #define TIME_END(FUNCNAME)
 53 | #define TIME_TOTAL(tt)
 54 | #endif
 55 | 
 56 | 
 57 | #define NUM_CONV_LAYER 24
 58 | 
 59 | #if defined(_ENABLE_INT8_CONV)
 60 | extern signed char * param_ppConvCoefInt8[NUM_CONV_LAYER];
 61 | #else
 62 | extern float * param_ppConvCoefFloat[NUM_CONV_LAYER];
 63 | #endif
 64 | extern float param_pConvCoefScales[NUM_CONV_LAYER];
 65 | 
 66 | 
 67 | typedef struct SConvInfo_{
 68 | 	int pad;
 69 | 	int stride;
 70 | 	int width;
 71 | 	int height;
 72 | 	int channels;
 73 | 	int num;
 74 | 	//float scale;
 75 | }SConvInfo;
 76 | 
 77 | Filters param_pFilters[NUM_CONV_LAYER]; //NUM_CONV_LAYER conv layers
 78 | 
 79 | int param_pMean[3] = { 104,117,123 };
 80 | float param_pConv3Norm[32] = { 6.592306137084961f, 6.558613300323486f, 6.324647903442383f, 6.125812530517578f, 6.558647632598877f, 6.526843547821045f, 6.558306694030762f, 6.556366443634033f, 6.638453483581543f, 6.558482646942139f, 6.631646633148193f, 6.536031246185303f, 6.456478118896484f, 6.558736801147461f, 6.567635536193848f, 5.369370937347412f, 6.567112445831299f, 6.5589985847473145f, 6.602363586425781f, 6.558731555938721f, 6.419933795928955f, 6.138179779052734f, 6.250294208526611f, 6.562124252319336f, 6.579089164733887f, 6.553215503692627f, 6.191119194030762f, 3.9663331508636475f, 6.459974765777588f, 6.555095195770264f, 6.545119762420654f, 6.626718997955322f };
 81 | float param_pConv4Norm[64] = { 5.24936056137085f, 5.249833106994629f, 5.201494216918945f, 5.252224445343018f, 5.2574462890625f, 5.226278305053711f, 5.258802890777588f, 5.254302024841309f, 5.1779465675354f, 5.24658203125f, 5.252774238586426f, 5.248640060424805f, 5.209632396697998f, 5.256057262420654f, 5.198976039886475f, 5.259532928466797f, 5.150023460388184f, 5.225643634796143f, 5.25822114944458f, 5.247387886047363f, 5.2590227127075195f, 5.25047492980957f, 5.264795303344727f, 5.24699592590332f, 5.249448299407959f, 5.205463409423828f, 5.252189636230469f, 5.255984783172607f, 5.209300994873047f, 5.261776447296143f, 5.251255035400391f, 5.254635810852051f, 5.25943660736084f, 5.248631477355957f, 5.285300254821777f, 5.254481792449951f, 5.248084545135498f, 5.249952793121338f, 5.234015941619873f, 5.263407230377197f, 5.245810508728027f, 5.2482500076293945f, 5.102107048034668f, 5.221688747406006f, 5.245429515838623f, 5.251964569091797f, 5.305455684661865f, 5.1277570724487305f, 5.28142786026001f, 5.235960006713867f, 5.205482482910156f, 5.251782417297363f, 5.182538032531738f, 5.27116584777832f, 5.198187351226807f, 5.248366832733154f, 5.254012584686279f, 5.251954555511475f, 5.259944438934326f, 5.248141288757324f, 5.183408737182617f, 5.265050888061523f, 5.24997615814209f, 5.262747287750244f };
 82 | float param_pConv5Norm[128] = { 3.2798357009887695f, 3.279853582382202f, 3.316532850265503f, 3.2797908782958984f, 3.283681631088257f, 3.292001962661743f, 3.2796599864959717f, 3.301004409790039f, 3.279745101928711f, 3.2916154861450195f, 3.304828405380249f, 3.2860844135284424f, 3.2962393760681152f, 3.2914977073669434f, 3.311239719390869f, 3.2796378135681152f, 3.30684757232666f, 3.2795956134796143f, 3.3049705028533936f, 3.2912471294403076f, 3.2799410820007324f, 3.280548095703125f, 3.279585838317871f, 3.3002915382385254f, 3.351128339767456f, 3.285721778869629f, 3.308690071105957f, 3.286360025405884f, 3.286612033843994f, 3.323331117630005f, 3.280801296234131f, 3.279557228088379f, 3.325319290161133f, 3.292274236679077f, 3.284330129623413f, 3.3016438484191895f, 3.305274248123169f, 3.293483257293701f, 3.2965309619903564f, 3.343932628631592f, 3.2799618244171143f, 3.281886100769043f, 3.2855217456817627f, 3.283858299255371f, 3.299082040786743f, 3.295645236968994f, 3.291444778442383f, 3.279492139816284f, 3.2956502437591553f, 3.3284802436828613f, 3.2878544330596924f, 3.291749954223633f, 3.3015894889831543f, 3.2998600006103516f, 3.2887396812438965f, 3.2832252979278564f, 3.285311698913574f, 3.30757474899292f, 3.284590721130371f, 3.3025388717651367f, 3.2936882972717285f, 3.279754877090454f, 3.307007312774658f, 3.2958528995513916f, 3.3630170822143555f, 3.326841354370117f, 3.2800698280334473f, 3.2920491695404053f, 3.2991254329681396f, 3.309135913848877f, 3.2799878120422363f, 3.2878851890563965f, 3.302861452102661f, 3.315964698791504f, 3.279761791229248f, 3.3086979389190674f, 3.2836644649505615f, 3.29606294631958f, 3.2939038276672363f, 3.296156883239746f, 3.300607204437256f, 3.3293192386627197f, 3.2886781692504883f, 3.292102098464966f, 3.279629945755005f, 3.2798566818237305f, 3.2876806259155273f, 3.281590223312378f, 3.281094789505005f, 3.2978975772857666f, 3.2799761295318604f, 3.3351552486419678f, 3.2866907119750977f, 3.338275671005249f, 3.2797188758850098f, 3.280174493789673f, 3.296318531036377f, 3.281552314758301f, 3.2805323600769043f, 3.294194459915161f, 3.279611349105835f, 3.3100433349609375f, 3.2793779373168945f, 3.2797317504882812f, 3.2823593616485596f, 3.2944772243499756f, 3.280740976333618f, 3.2863688468933105f, 3.299750804901123f, 3.282517194747925f, 3.2863147258758545f, 3.286238193511963f, 3.280881881713867f, 3.2873098850250244f, 3.2873218059539795f, 3.2804245948791504f, 3.306823968887329f, 3.312803030014038f, 3.2798690795898438f, 3.288886785507202f, 3.3185698986053467f, 3.299873113632202f, 3.279698610305786f, 3.3057520389556885f, 3.3125710487365723f, 3.2796096801757812f, 3.2905843257904053f, 3.2902579307556152f };
 83 | float param_pConv6Norm[128] = { 3.2981998920440674f, 3.2801780700683594f, 3.2922229766845703f, 3.2955803871154785f, 3.294820547103882f, 3.292754888534546f, 3.2865407466888428f, 3.286324977874756f, 3.2815260887145996f, 3.2840657234191895f, 3.282493829727173f, 3.333282470703125f, 3.289398193359375f, 3.2884361743927f, 3.2864975929260254f, 3.2839083671569824f, 3.2875373363494873f, 3.322467565536499f, 3.28637433052063f, 3.286494731903076f, 3.291119337081909f, 3.322329044342041f, 3.2931485176086426f, 3.279733896255493f, 3.2803969383239746f, 3.295511245727539f, 3.2980639934539795f, 3.280717134475708f, 3.2957653999328613f, 3.317964553833008f, 3.2798070907592773f, 3.3057520389556885f, 3.305992841720581f, 3.348924160003662f, 3.291982650756836f, 3.3030447959899902f, 3.288562774658203f, 3.2987096309661865f, 3.2800376415252686f, 3.336385488510132f, 3.2968106269836426f, 3.296558380126953f, 3.2796730995178223f, 3.286644458770752f, 3.2797164916992188f, 3.2899343967437744f, 3.3156163692474365f, 3.3474082946777344f, 3.291513442993164f, 3.3053133487701416f, 3.283517360687256f, 3.305210828781128f, 3.303983211517334f, 3.282759189605713f, 3.280306816101074f, 3.2799665927886963f, 3.285292387008667f, 3.3079776763916016f, 3.29679274559021f, 3.280120849609375f, 3.2796525955200195f, 3.28070068359375f, 3.2877960205078125f, 3.302424192428589f, 3.2920310497283936f, 3.286226511001587f, 3.2799699306488037f, 3.2863574028015137f, 3.324301242828369f, 3.291632890701294f, 3.294870376586914f, 3.289574146270752f, 3.297481060028076f, 3.304020881652832f, 3.2841410636901855f, 3.2966504096984863f, 3.280271530151367f, 3.2840888500213623f, 3.3100359439849854f, 3.282552719116211f, 3.2812161445617676f, 3.3106632232666016f, 3.2951345443725586f, 3.280796766281128f, 3.283688545227051f, 3.328918695449829f, 3.2930126190185547f, 3.291832685470581f, 3.2913215160369873f, 3.282050609588623f, 3.280987501144409f, 3.303039073944092f, 3.2960453033447266f, 3.2923476696014404f, 3.2912893295288086f, 3.283773422241211f, 3.308786392211914f, 3.294309377670288f, 3.2953407764434814f, 3.2821991443634033f, 3.2892892360687256f, 3.2869884967803955f, 3.3056561946868896f, 3.281874895095825f, 3.337285280227661f, 3.2868685722351074f, 3.2931010723114014f, 3.279754638671875f, 3.2797317504882812f, 3.3095757961273193f, 3.2800772190093994f, 3.299697160720825f, 3.2798924446105957f, 3.2848010063171387f, 3.2815306186676025f, 3.29996919631958f, 3.29498553276062f, 3.317089557647705f, 3.2876529693603516f, 3.2813613414764404f, 3.290708065032959f, 3.2828986644744873f, 3.2800583839416504f, 3.3065834045410156f, 3.287909746170044f, 3.2797391414642334f, 3.3226301670074463f, 3.323007345199585f };
 84 | 
 85 | SConvInfo param_pConvInfo[NUM_CONV_LAYER] = {
 86 |     //conv1_1
 87 |     { 1, 2, 3, 3, 3,  16 },
 88 |     //conv1_2
 89 |     { 0, 1, 1, 1, 16, 16 },
 90 |     //conv2_1
 91 |     { 1, 1, 3, 3, 16, 16 },
 92 |     //conv2_2
 93 |     { 0, 1, 1, 1, 16, 16 },
 94 |     //conv3_1
 95 |     { 1, 1, 3, 3, 16, 32 },
 96 |     //conv2_2
 97 |     { 0, 1, 1, 1, 32, 32 },
 98 |     //conv3_3
 99 |     { 1, 1, 3, 3, 32, 32 },
100 |     //conv4_1
101 |     { 1, 1, 3, 3, 32, 64 },
102 |     //conv4_2
103 |     { 0, 1, 1, 1, 64, 64 },
104 |     //conv4_3
105 |     { 1, 1, 3, 3, 64, 64 },
106 |     //conv5_1
107 |     { 1, 1, 3, 3, 64,128 },
108 |     //conv5_2
109 |     { 0, 1, 1, 1,128,128 },
110 |     //conv5_3
111 |     { 1, 1, 3, 3,128,128 },
112 |     //conv6_1
113 |     { 1, 1, 3, 3,128,128 },
114 |     //conv6_2
115 |     { 0, 1, 1, 1,128,128 },
116 |     //conv6_3
117 |     { 1, 1, 3, 3,128,128 },
118 |     //loc3
119 |     { 1, 1, 3, 3, 32, 12 },
120 |     //conf3
121 |     { 1, 1, 3, 3, 32,  6 },
122 |     //loc4
123 |     { 1, 1, 3, 3, 64,  8 },
124 |     //conf4
125 |     { 1, 1, 3, 3, 64,  4 },
126 |     //loc5
127 |     { 1, 1, 3, 3,128,  8 },
128 |     //conf5
129 |     { 1, 1, 3, 3,128,  4 },
130 |     //loc6
131 |     { 1, 1, 3, 3,128, 12 },
132 |     //conf6
133 |     { 1, 1, 3, 3,128,  6 },
134 | };
135 | 
136 | bool param_initialized = false;
137 | 
138 | void init_parameters()
139 | {
140 |     //set filters 0
141 |     {
142 |         int i = 0;
143 |         param_pFilters[i].stride = 1;// param_pConvInfo[i].stride;
144 |         param_pFilters[i].pad = 0;// param_pConvInfo[i].pad;
145 |         param_pFilters[i].scale = param_pConvCoefScales[i];
146 |         int offset = param_pConvInfo[i].width * param_pConvInfo[i].height * param_pConvInfo[i].channels;
147 |         
148 |         for(int ff = 0; ff < param_pConvInfo[i].num; ff++)
149 |         {
150 |             CDataBlob * b3x3 = new CDataBlob(param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels);
151 |             CDataBlob * b1x1 = new CDataBlob();
152 | #if defined(_ENABLE_INT8_CONV)
153 |             b3x3->setInt8DataFromCaffeFormat(param_ppConvCoefInt8[i] + ff * offset, 
154 |                                             param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels);
155 |             blob2vector(b3x3, b1x1, false);
156 | #else
157 |             b3x3->setFloatDataFromCaffeFormat(param_ppConvCoefFloat[i] + ff * offset,
158 |                                             param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels);
159 |             blob2vector(b3x3, b1x1, true);
160 | #endif
161 |             delete b3x3;
162 |             b3x3 = 0;
163 |             param_pFilters[i].filters.push_back(b1x1);            
164 |         }
165 |     }
166 |     //set the rest
167 |     for(int i = 1; i < NUM_CONV_LAYER; i++)
168 |     {
169 |         param_pFilters[i].stride = param_pConvInfo[i].stride;
170 |         param_pFilters[i].pad = param_pConvInfo[i].pad;
171 |         param_pFilters[i].scale = param_pConvCoefScales[i];
172 |         int offset = param_pConvInfo[i].width * param_pConvInfo[i].height * param_pConvInfo[i].channels;
173 |         
174 |         for(int ff = 0; ff < param_pConvInfo[i].num; ff++)
175 |         {
176 |             CDataBlob * b = new CDataBlob(param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels);
177 | #if defined(_ENABLE_INT8_CONV)
178 |             b->setInt8DataFromCaffeFormat(param_ppConvCoefInt8[i] + ff * offset, 
179 |                                             param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels);
180 | #else
181 |             b->setFloatDataFromCaffeFormat(param_ppConvCoefFloat[i] + ff * offset,
182 |                                             param_pConvInfo[i].width, param_pConvInfo[i].height, param_pConvInfo[i].channels);
183 | #endif
184 |             param_pFilters[i].filters.push_back(b);            
185 |         }
186 |     }
187 | }
188 | 
189 | vector<FaceRect> objectdetect_cnn(unsigned char * rgbImageData, int width, int height, int step)
190 | {
191 |     CDataBlob inputImage;
192 |     CDataBlob pConvDataBlobs[NUM_CONV_LAYER];
193 |     CDataBlob pool1, pool2, pool3, pool4, pool5;
194 |     CDataBlob conv3norm, conv4norm, conv5norm, conv6norm;
195 |     CDataBlob conv3priorbox, conv4priorbox, conv5priorbox, conv6priorbox;
196 |     CDataBlob conv3priorbox_flat, conv4priorbox_flat, conv5priorbox_flat, conv6priorbox_flat, mbox_priorbox;
197 |     CDataBlob conv3loc_flat, conv4loc_flat, conv5loc_flat, conv6loc_flat, mbox_loc;
198 |     CDataBlob conv3conf_flat, conv4conf_flat, conv5conf_flat, conv6conf_flat, mbox_conf;
199 | 
200 |     double total = 0.0;
201 |     double t = 0.0;
202 | 
203 |     TIME_START;
204 |     if (!param_initialized)
205 |     {
206 |         init_parameters();
207 |         param_initialized = true;
208 |     }
209 |     TIME_END("init");
210 | 
211 |   
212 |     total = 0.0;
213 |   
214 |     TIME_START;
215 |     //inputImage.setDataFromImage(rgbImageData, width, height, 3, step, param_pMean);
216 |     inputImage.setDataFrom3x3S2P1to1x1S1P0FromImage(rgbImageData, width, height, 3, step, param_pMean);
217 |     TIME_END("convert data");
218 | 
219 | 
220 | /***************CONV1*********************/
221 |     int convidx = 0;
222 |     TIME_START;
223 |     convolution(&inputImage, param_pFilters + convidx, pConvDataBlobs + convidx);
224 |     TIME_END("conv11");
225 |     TIME_START;
226 |     relu(pConvDataBlobs+convidx);
227 |     TIME_END("relu11");
228 | 
229 |     convidx++;
230 |     TIME_START;
231 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
232 |     TIME_END("conv12");
233 |     TIME_START;
234 |     relu(pConvDataBlobs+convidx);
235 |     TIME_END("relu12");
236 | 
237 |     TIME_START;
238 |     maxpooling2x2S2(pConvDataBlobs+convidx, &pool1);
239 |     TIME_END("pool1");
240 | 
241 | /***************CONV2*********************/
242 |     convidx++;
243 | TIME_START;
244 |     convolution(&pool1, param_pFilters+convidx, pConvDataBlobs+convidx);
245 | TIME_END("conv21");
246 | TIME_START;
247 |     relu(pConvDataBlobs+convidx);
248 | TIME_END("relu21");
249 | 
250 | convidx++;
251 | TIME_START;
252 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
253 | TIME_END("conv22");
254 | TIME_START
255 |     relu(pConvDataBlobs+convidx);
256 | TIME_END("relu22");
257 | 
258 | TIME_START;
259 |     maxpooling2x2S2(pConvDataBlobs+convidx, &pool2);
260 | TIME_END("pool2");
261 | 
262 | /***************CONV3*********************/
263 |     convidx++;
264 | TIME_START;
265 |     convolution(&pool2, param_pFilters+convidx, pConvDataBlobs+convidx);
266 | TIME_END("conv31");
267 | TIME_START;
268 |     relu(pConvDataBlobs+convidx);
269 | TIME_END("relu31");
270 | 
271 | convidx++;
272 | TIME_START;
273 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
274 | TIME_END("conv32");
275 | TIME_START;
276 |     relu(pConvDataBlobs+convidx);
277 | TIME_END("relu32");
278 | 
279 | convidx++;
280 | TIME_START;
281 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
282 | TIME_END("conv33");
283 | TIME_START;
284 |     relu(pConvDataBlobs+convidx);
285 | TIME_END("relu33");
286 | 
287 | TIME_START;
288 |     maxpooling2x2S2(pConvDataBlobs+convidx, &pool3);
289 | TIME_END("pool3");
290 | 
291 | /***************CONV4*********************/
292 |     convidx++;
293 | TIME_START;
294 |     convolution(&pool3, param_pFilters+convidx, pConvDataBlobs+convidx);
295 | TIME_END("conv41");
296 | TIME_START
297 |     relu(pConvDataBlobs+convidx);
298 | TIME_END("relu41");
299 | 
300 | convidx++;
301 | TIME_START;
302 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
303 | TIME_END("conv42");
304 | TIME_START;
305 |     relu(pConvDataBlobs+convidx);
306 | TIME_END("relu42");
307 | 
308 | convidx++;
309 | TIME_START;
310 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
311 | TIME_END("conv43");
312 | TIME_START;
313 |     relu(pConvDataBlobs+convidx);
314 | TIME_END("relu43");
315 | 
316 | TIME_START;
317 |     maxpooling2x2S2(pConvDataBlobs+convidx, &pool4);
318 | TIME_END("pool4");
319 | 
320 | /***************CONV5*********************/
321 |     convidx++;
322 | TIME_START;
323 |     convolution(&pool4, param_pFilters+convidx, pConvDataBlobs+convidx);
324 | TIME_END("conv51");
325 | TIME_START;
326 |     relu(pConvDataBlobs+convidx);
327 | TIME_END("relu51");
328 | 
329 | convidx++;
330 | TIME_START;
331 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
332 | TIME_END("conv52");
333 | TIME_START
334 |     relu(pConvDataBlobs+convidx);
335 | TIME_END("relu52");
336 | 
337 | convidx++;
338 | TIME_START;
339 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
340 | TIME_END("conv53");
341 | TIME_START;
342 |     relu(pConvDataBlobs+convidx);
343 | TIME_END("relu53");
344 | 
345 | TIME_START;
346 |     maxpooling2x2S2(pConvDataBlobs+convidx, &pool5);
347 | TIME_END("pool5");
348 | 
349 | /***************CONV6*********************/
350 |     convidx++;
351 | TIME_START;
352 |     convolution(&pool5, param_pFilters+convidx, pConvDataBlobs+convidx);
353 | TIME_END("conv61");
354 | TIME_START;
355 |     relu(pConvDataBlobs+convidx);
356 | TIME_END("relu61");
357 | 
358 | convidx++;
359 | TIME_START;
360 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
361 | TIME_END("conv62");
362 | TIME_START
363 |     relu(pConvDataBlobs+convidx);
364 | TIME_END("relu62");
365 | 
366 | convidx++;
367 | TIME_START;
368 |     convolution(pConvDataBlobs+convidx-1, param_pFilters+convidx, pConvDataBlobs+convidx);
369 | TIME_END("conv63");
370 | TIME_START;
371 |     relu(pConvDataBlobs+convidx);
372 | TIME_END("relu63");
373 | 
374 |     /***************PRIORBOX3*********************/
375 |     int conv3idx = 6;
376 |     TIME_START;
377 |     normalize(pConvDataBlobs+ conv3idx, param_pConv3Norm);
378 |     TIME_END("norm3");
379 | 
380 |     convidx++;
381 |     TIME_START
382 |     convolution(pConvDataBlobs+ conv3idx, param_pFilters+convidx, pConvDataBlobs+convidx);
383 |     TIME_END("prior3 loc");
384 | 
385 |     convidx++;
386 |     TIME_START;
387 |     convolution(pConvDataBlobs+ conv3idx, param_pFilters+convidx, pConvDataBlobs+convidx);
388 |     TIME_END("prior3 conf");
389 | 
390 |     TIME_START;
391 |     float pSizes3[3] = {10, 16, 24};
392 |     priorbox(pConvDataBlobs+ conv3idx, &inputImage, 3, pSizes3, &conv3priorbox);
393 |     TIME_END("prior3");
394 | 
395 |     /***************PRIORBOX4*********************/
396 |     int conv4idx = 9;
397 |     TIME_START;
398 |     normalize(pConvDataBlobs + conv4idx, param_pConv4Norm);
399 |     TIME_END("norm4");
400 | 
401 |     convidx++;
402 |     TIME_START
403 |         convolution(pConvDataBlobs + conv4idx, param_pFilters + convidx, pConvDataBlobs + convidx);
404 |     TIME_END("prior4 loc");
405 | 
406 |     convidx++;
407 |     TIME_START;
408 |     convolution(pConvDataBlobs + conv4idx, param_pFilters + convidx, pConvDataBlobs + convidx);
409 |     TIME_END("prior4 conf");
410 | 
411 |     TIME_START;
412 |     float pSizes4[2] = { 32, 48};
413 |     priorbox(pConvDataBlobs + conv4idx, &inputImage, 2, pSizes4, &conv4priorbox);
414 |     TIME_END("prior4");
415 | 
416 |     /***************PRIORBOX5*********************/
417 |     int conv5idx = 12;
418 |     TIME_START;
419 |     normalize(pConvDataBlobs + conv5idx, param_pConv5Norm);
420 |     TIME_END("norm5");
421 | 
422 |     convidx++;
423 |     TIME_START
424 |         convolution(pConvDataBlobs + conv5idx, param_pFilters + convidx, pConvDataBlobs + convidx);
425 |     TIME_END("prior5 loc");
426 | 
427 |     convidx++;
428 |     TIME_START;
429 |     convolution(pConvDataBlobs + conv5idx, param_pFilters + convidx, pConvDataBlobs + convidx);
430 |     TIME_END("prior5 conf");
431 | 
432 |     TIME_START;
433 |     float pSizes5[2] = { 64, 96 };
434 |     priorbox(pConvDataBlobs + conv5idx, &inputImage, 2, pSizes5, &conv5priorbox);
435 |     TIME_END("prior5");
436 | 
437 |     /***************PRIORBOX6*********************/
438 |     int conv6idx = 15;
439 |     TIME_START;
440 |     normalize(pConvDataBlobs + conv6idx, param_pConv5Norm);
441 |     TIME_END("norm6");
442 | 
443 |     convidx++;
444 |     TIME_START
445 |         convolution(pConvDataBlobs + conv6idx, param_pFilters + convidx, pConvDataBlobs + convidx);
446 |     TIME_END("prior6 loc");
447 | 
448 |     convidx++;
449 |     TIME_START;
450 |     convolution(pConvDataBlobs + conv6idx, param_pFilters + convidx, pConvDataBlobs + convidx);
451 |     TIME_END("prior6 conf");
452 | 
453 |     TIME_START;
454 |     float pSizes6[3] = { 128, 192, 256 };
455 |     priorbox(pConvDataBlobs + conv6idx, &inputImage, 3, pSizes6, &conv6priorbox);
456 |     TIME_END("prior6");
457 | 
458 | 
459 | 
460 | TIME_START;
461 |     blob2vector(&conv3priorbox, &conv3priorbox_flat, true);
462 |     blob2vector(pConvDataBlobs + 16, &conv3loc_flat, true);
463 |     blob2vector(pConvDataBlobs + 17, &conv3conf_flat, true);
464 | 
465 |     blob2vector(&conv4priorbox, &conv4priorbox_flat, true);
466 |     blob2vector(pConvDataBlobs + 18, &conv4loc_flat, true);
467 |     blob2vector(pConvDataBlobs + 19, &conv4conf_flat, true);
468 | 
469 |     blob2vector(&conv5priorbox, &conv5priorbox_flat, true);
470 |     blob2vector(pConvDataBlobs + 20, &conv5loc_flat, true);
471 |     blob2vector(pConvDataBlobs + 21, &conv5conf_flat, true);
472 | 
473 |     blob2vector(&conv6priorbox, &conv6priorbox_flat, true);
474 |     blob2vector(pConvDataBlobs + 22, &conv6loc_flat, true);
475 |     blob2vector(pConvDataBlobs + 23, &conv6conf_flat, true);
476 | TIME_END("prior flat");
477 | 
478 | 
479 | 
480 | TIME_START
481 |     concat4(&conv3priorbox_flat, &conv4priorbox_flat, &conv5priorbox_flat, &conv6priorbox_flat, &mbox_priorbox);
482 |     concat4(&conv3loc_flat, &conv4loc_flat, &conv5loc_flat, &conv6loc_flat, &mbox_loc);
483 |     concat4(&conv3conf_flat, &conv4conf_flat, &conv5conf_flat, &conv6conf_flat, &mbox_conf);
484 | TIME_END("concat prior")
485 | 
486 | TIME_START
487 |     softmax1vector2class(&mbox_conf);
488 | TIME_END("softmax")
489 | 
490 | 
491 |     CDataBlob facesInfo;
492 |     TIME_START;
493 |     detection_output(&mbox_priorbox, &mbox_loc, &mbox_conf, 0.3f, 0.5f, 100, 50, &facesInfo);
494 |     TIME_END("detection output")
495 | 
496 | 
497 | 
498 |     TIME_START;
499 |     std::vector<FaceRect> faces;
500 |     for (int i = 0; i < facesInfo.width; i++)
501 |     {
502 |         float score = facesInfo.getElementFloat(i, 0, 0);
503 |         float bbxmin = facesInfo.getElementFloat(i, 0, 1);
504 |         float bbymin = facesInfo.getElementFloat(i, 0, 2);
505 |         float bbxmax = facesInfo.getElementFloat(i, 0, 3);
506 |         float bbymax = facesInfo.getElementFloat(i, 0, 4);
507 |         FaceRect r;
508 |         r.score = score;
509 |         //r.x = int(bbxmin * width + 0.5f);
510 |         //r.y = int(bbymin * height + 0.5f);
511 |         //r.w = int((bbxmax - bbxmin) * width + 0.5f);
512 |         //r.h = int((bbymax - bbymin) * height + 0.5f);
513 | 
514 |         r.w = int( ((bbxmax - bbxmin) * width + (bbymax - bbymin) * height + 1) / 2);
515 |         r.h = r.w;
516 |         r.x = int(((bbxmin + bbxmax) * width - r.w + 0.5f) / 2);
517 |         r.y = int(((bbymin + bbymax) * height - r.h + 0.5f) / 2);
518 | 
519 |         faces.push_back(r);
520 |     }
521 |     TIME_END("copy result");
522 |     
523 |     TIME_TOTAL(total);
524 | 
525 |     return faces;
526 | }
527 | int * facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!!
528 |     unsigned char * rgb_image_data, int width, int height, int step) //input image, it must be RGB (three-channel) image!
529 | {
530 | #ifdef	__CALL_LIMIT__
531 |     static int call_count = 0;
532 | #endif
533 | 
534 |     if (!result_buffer)
535 |     {
536 |         fprintf(stderr, "%s: null buffer memory.\n", __FUNCTION__);
537 |         return NULL;
538 |     }
539 |     //clear memory
540 |     //memset(result_buffer, 0, 0x20000);
541 |     result_buffer[0] = 0;
542 |     result_buffer[1] = 0;
543 |     result_buffer[2] = 0;
544 |     result_buffer[3] = 0;
545 | 
546 |     vector<FaceRect> faces = objectdetect_cnn(rgb_image_data, width, height, step);
547 | 
548 |     double t, total=0;
549 |     TIME_START;
550 | 
551 |     int num_faces =(int)faces.size();
552 |     num_faces = MIN(num_faces, 256);
553 | 
554 |     int * pCount = (int *)result_buffer;
555 |     pCount[0] = num_faces;
556 | 
557 |     for (int i = 0; i < num_faces; i++)
558 |     {
559 |         short * p = ((short*)(result_buffer + 4)) + 142 * i;
560 |         p[0] = (short)faces[i].x;
561 |         p[1] = (short)faces[i].y;
562 |         p[2] = (short)faces[i].w;
563 |         p[3] = (short)faces[i].h;
564 |         p[4] = (short)(faces[i].score * faces[i].score * 100);
565 |     }
566 | #ifdef	__CALL_LIMIT__
567 |     if(call_count>1814403)
568 |     {
569 |         memset(result_buffer, 0 , 4+284*num_faces);
570 |     }
571 |     else
572 |         call_count++;
573 | #endif
574 | 
575 |     TIME_END("call detection");
576 |     return pCount;
577 | }
578 | 


--------------------------------------------------------------------------------
/libfacedetection/src/facedetectcnn.cpp:
--------------------------------------------------------------------------------
   1 | /*
   2 | By downloading, copying, installing or using the software you agree to this license.
   3 | If you do not agree to this license, do not download, install,
   4 | copy or use the software.
   5 | 
   6 | 
   7 |                   License Agreement For libfacedetection
   8 |                      (3-clause BSD License)
   9 | 
  10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved.
  11 | shiqi.yu@gmail.com
  12 | 
  13 | Redistribution and use in source and binary forms, with or without modification,
  14 | are permitted provided that the following conditions are met:
  15 | 
  16 |   * Redistributions of source code must retain the above copyright notice,
  17 |     this list of conditions and the following disclaimer.
  18 | 
  19 |   * Redistributions in binary form must reproduce the above copyright notice,
  20 |     this list of conditions and the following disclaimer in the documentation
  21 |     and/or other materials provided with the distribution.
  22 | 
  23 |   * Neither the names of the copyright holders nor the names of the contributors
  24 |     may be used to endorse or promote products derived from this software
  25 |     without specific prior written permission.
  26 | 
  27 | This software is provided by the copyright holders and contributors "as is" and
  28 | any express or implied warranties, including, but not limited to, the implied
  29 | warranties of merchantability and fitness for a particular purpose are disclaimed.
  30 | In no event shall copyright holders or contributors be liable for any direct,
  31 | indirect, incidental, special, exemplary, or consequential damages
  32 | (including, but not limited to, procurement of substitute goods or services;
  33 | loss of use, data, or profits; or business interruption) however caused
  34 | and on any theory of liability, whether in contract, strict liability,
  35 | or tort (including negligence or otherwise) arising in any way out of
  36 | the use of this software, even if advised of the possibility of such damage.
  37 | */
  38 | 
  39 | #include "facedetectcnn.h"
  40 | #include <string.h>
  41 | #include <cmath>
  42 | #include <vector>
  43 | #include <float.h> //for FLT_EPSION
  44 | #include <algorithm>//for stable_sort, sort
  45 | 
  46 | #if defined( __WIN__) || defined(_WINDOWS)
  47 | #define SSE_256ELEMENT(vec, idx) vec.m256_f32[(idx)]
  48 | #else
  49 | #define SSE_256ELEMENT(vec, idx) vec[(idx)]
  50 | #endif
  51 | 
  52 | #if !defined(_ENABLE_OPENMP_SIMD) && ((defined(_OPENMP) && (_OPENMP >= 201307L)))
  53 | #  define _ENABLE_OPENMP_SIMD
  54 | #elif defined(__cilk)
  55 | #  define _ENABLE_CILKPLUS
  56 | #endif
  57 | 
  58 | typedef struct NormalizedBBox_
  59 | {
  60 |     float xmin;
  61 |     float ymin;
  62 |     float xmax;
  63 |     float ymax;
  64 | } NormalizedBBox;
  65 | 
  66 | 
  67 | void* myAlloc(size_t size)
  68 | {
  69 | 	char *ptr, *ptr0;
  70 | 	ptr0 = (char*)malloc(
  71 | 		(size_t)(size + _MALLOC_ALIGN * ((size >= 4096) + 1) + sizeof(char*)));
  72 | 
  73 | 	if (!ptr0)
  74 | 		return 0;
  75 | 
  76 | 	// align the pointer
  77 | 	ptr = (char*)(((size_t)(ptr0 + sizeof(char*) + 1) + _MALLOC_ALIGN - 1) & ~(size_t)(_MALLOC_ALIGN - 1));
  78 | 	*(char**)(ptr - sizeof(char*)) = ptr0;
  79 | 
  80 | 	return ptr;
  81 | }
  82 | 
  83 | 
  84 | void myFree_(void* ptr)
  85 | {
  86 | 	// Pointer must be aligned by _MALLOC_ALIGN
  87 | 	if (ptr)
  88 | 	{
  89 | 		if (((size_t)ptr & (_MALLOC_ALIGN - 1)) != 0)
  90 | 			return;
  91 | 		free(*((char**)ptr - 1));
  92 | 	}
  93 | 
  94 | }
  95 | 
  96 | 
  97 | inline float dotProductFloatChGeneral(float* p1, float * p2, int num, int lengthInBytes)
  98 | {
  99 | #if defined(_ENABLE_NEON) && !defined(_ENABLE_INT8_CONV)
 100 |     float sum = 0.0f;
 101 |     float32x4_t a, b;
 102 |     float32x4_t result_vec;
 103 | 
 104 |     result_vec = vdupq_n_f32(0); //zeros
 105 |     for (int i = 0; i < num; i += 4)
 106 |     {
 107 |         a = vld1q_f32(p1 + i);
 108 |         b = vld1q_f32(p2 + i);
 109 |         result_vec = vmlaq_f32(result_vec, a, b);
 110 |     }
 111 |     sum += vgetq_lane_f32(result_vec, 0);
 112 |     sum += vgetq_lane_f32(result_vec, 1);
 113 |     sum += vgetq_lane_f32(result_vec, 2);
 114 |     sum += vgetq_lane_f32(result_vec, 3);
 115 | 
 116 |     return sum;
 117 | #elif defined(_ENABLE_AVX2) && !defined(_ENABLE_INT8_CONV)
 118 |     float sum = 0;
 119 |     int end = lengthInBytes / sizeof(float);
 120 | 
 121 |     __m256 sumvec = _mm256_setzero_ps();
 122 |     __m256 avec, bvec;
 123 |     for (int i = 0; i < end; i += 8)
 124 |     {
 125 |         avec = _mm256_load_ps(p1 + i);
 126 |         bvec = _mm256_load_ps(p2 + i);
 127 |         //_mm256_fmadd_ps needs FMA support 
 128 |         //but _mm256_add_ps and _mm256_mul_ps only need AVX
 129 | 
 130 |         //sumvec = _mm256_add_ps(sumvec, _mm256_mul_ps(avec, bvec));
 131 | 
 132 |         //fmadd is faster than add+mul
 133 |         sumvec = _mm256_fmadd_ps(avec, bvec, sumvec);
 134 | 
 135 |         //Note: _mm256_dp_ps is much slower than the previou line of code
 136 |     }
 137 |     sumvec = _mm256_hadd_ps(sumvec, sumvec);
 138 |     sumvec = _mm256_hadd_ps(sumvec, sumvec);
 139 |     sum += SSE_256ELEMENT(sumvec, 0);
 140 |     sum += SSE_256ELEMENT(sumvec, 4);
 141 | 
 142 |     return sum;
 143 | 
 144 | #else
 145 |     float sum = 0;
 146 | 
 147 | #if defined(_ENABLE_OPENMP_SIMD)
 148 | #pragma omp simd reduction(+:sum)
 149 | #endif
 150 |     for (int i = 0; i < num; i++)
 151 |     {
 152 |         sum += (p1[i] * p2[i]);
 153 |     }
 154 |     return sum;
 155 | #endif 
 156 | }
 157 | 
 158 | inline int dotProductInt8ChGeneral(signed char * p1, signed char * p2, int num, int lengthInBytes)
 159 | {
 160 | #if defined(_ENABLE_NEON) && defined(_ENABLE_INT8_CONV)
 161 |     //int sum = 0;
 162 |     //int16x8_t a, b;
 163 |     //int16x8_t result_vec;
 164 |     //int32x4_t d;
 165 |     //
 166 | 
 167 |     //result_vec = vdupq_n_s16(0); //zeros
 168 |     //for (int i = 0; i < num; i += 8)
 169 |     //{
 170 |     //    a = vld1q_s16(p1 + i);
 171 |     //    b = vld1q_s16(p2 + i);
 172 |     //    result_vec = vmlaq_s16(result_vec, a, b);
 173 |     //}
 174 |     //d = vpaddlq_s16(result_vec);
 175 |     //sum += vgetq_lane_s32(d, 0);
 176 |     //sum += vgetq_lane_s32(d, 1);
 177 |     //sum += vgetq_lane_s32(d, 2);
 178 |     //sum += vgetq_lane_s32(d, 3);
 179 | 
 180 |     //return sum;
 181 | 
 182 |     int sum = 0;
 183 |     int8x8x2_t a, b;
 184 |     int16x8_t result_vec;
 185 |     int32x4_t d;
 186 | 
 187 | 
 188 |     result_vec = vdupq_n_s16(0); //zeros
 189 |     for (int i = 0; i < num; i += 16)
 190 |     {
 191 |         a = vld2_s8(p1 + i);
 192 |         b = vld2_s8(p2 + i);
 193 |         result_vec = vmlal_s8(result_vec, a.val[0], b.val[0]);
 194 |         result_vec = vmlal_s8(result_vec, a.val[1], b.val[1]);
 195 |     }
 196 |     d = vpaddlq_s16(result_vec);
 197 |     sum += vgetq_lane_s32(d, 0);
 198 |     sum += vgetq_lane_s32(d, 1);
 199 |     sum += vgetq_lane_s32(d, 2);
 200 |     sum += vgetq_lane_s32(d, 3);
 201 | 
 202 |     return sum;
 203 | 
 204 | #elif defined(_ENABLE_AVX2) && defined(_ENABLE_INT8_CONV)
 205 |     int sum = 0;
 206 |     int i = 0;
 207 | 
 208 |     short sumarray[16];
 209 |    
 210 |     __m256i temp_sum;
 211 |     __m128i ac, bc;
 212 |     __m256i as, bs;
 213 |     for (; i < num; i += 16)
 214 |     {
 215 |         ac = _mm_load_si128((__m128i*)(p1 + i));
 216 |         bc = _mm_load_si128((__m128i*)(p2 + i));
 217 |         as = _mm256_cvtepi8_epi16(ac);
 218 |         bs = _mm256_cvtepi8_epi16(bc);
 219 |         temp_sum = _mm256_mullo_epi16(as, bs);
 220 |         temp_sum = _mm256_hadd_epi16(temp_sum, temp_sum);
 221 |         temp_sum = _mm256_hadd_epi16(temp_sum, temp_sum);
 222 |         //temp_sum = _mm256_hadd_epi16(temp_sum, temp_sum);
 223 |         _mm256_store_si256((__m256i*)sumarray, temp_sum);
 224 |         //sum += ((int)(sumarray[0]) + (int)(sumarray[8]));
 225 |         sum += ((int)(sumarray[0]) + (int)(sumarray[1]) + +(int)(sumarray[8]) + (int)(sumarray[9]));
 226 |     }
 227 |     return sum;
 228 | #else
 229 | 
 230 |     int sum = 0;
 231 | 
 232 | #if defined(_ENABLE_OPENMP_SIMD)
 233 | #pragma omp simd reduction(+:sum)
 234 | #endif
 235 |     for (int i = 0; i < num; i++)
 236 |     {
 237 |         sum += ( int(p1[i]) * int(p2[i]));
 238 |     }
 239 |     return sum;
 240 | #endif
 241 | }
 242 | 
 243 | bool convolutionFloat1x1P0S1(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData)
 244 | {
 245 | #if defined(_OPENMP)
 246 | #pragma omp parallel for
 247 | #endif
 248 |     for (int row = 0; row < outputData->height; row++)
 249 |     {
 250 |         for (int col = 0; col < outputData->width; col++)
 251 |         {
 252 |             float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float));
 253 |             float * pIn = (inputData->data_float + (row*inputData->width + col)*inputData->floatChannelStepInByte / sizeof(float));
 254 |             for (int ch = 0; ch < outputData->channels; ch++)
 255 |             {
 256 |                 float * pF = (float*)(filters->filters[ch]->data_float);
 257 |                 pOut[ch] = dotProductFloatChGeneral(pIn, pF, inputData->channels, inputData->floatChannelStepInByte);
 258 |             }
 259 |         }
 260 |     }
 261 |     return true;
 262 | }
 263 | 
 264 | bool convolutionInt81x1P0S1(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData)
 265 | {
 266 | #if defined(_OPENMP)
 267 | #pragma omp parallel for
 268 | #endif
 269 |     for (int row = 0; row < outputData->height; row++)
 270 |     {
 271 |         for (int col = 0; col < outputData->width; col++)
 272 |         {
 273 |             float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float));
 274 |             signed char * pIn = (inputData->data_int8 + (row*inputData->width + col)*inputData->int8ChannelStepInByte / sizeof(char));
 275 |             for (int ch = 0; ch < outputData->channels; ch++)
 276 |             {
 277 |                 signed char * pF = (filters->filters[ch]->data_int8);
 278 |                 pOut[ch] = (float)dotProductInt8ChGeneral(pIn, pF, inputData->channels, inputData->int8ChannelStepInByte);
 279 |             }
 280 |         }
 281 |     }
 282 |     return true;
 283 | }
 284 | 
 285 | 
 286 | 
 287 | bool convolutionFloat3x3P1ChGeneral(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData)
 288 | {
 289 | #if defined(_OPENMP)
 290 | #pragma omp parallel for
 291 | #endif
 292 |     for (int row = 0; row < outputData->height; row++)
 293 |     {
 294 |         int elementStepInFloat = inputData->floatChannelStepInByte/sizeof(float);
 295 |         int stride = filters->stride;
 296 |         int src_centery = row * stride;
 297 |         for (int col = 0; col < outputData->width; col++)
 298 |         {
 299 |             int srcx_start = col * stride - 1;
 300 |             int srcx_end = srcx_start + 3;
 301 |             srcx_start = MAX(0, srcx_start);
 302 |             srcx_end = MIN(srcx_end, inputData->width);
 303 |             int num_pixels = srcx_end - srcx_start;
 304 |             int num_pixels_infloat = (srcx_end - srcx_start) * elementStepInFloat;
 305 | 
 306 |             for (int ch = 0; ch < outputData->channels; ch++)
 307 |             {
 308 |                 int srcy = src_centery - 1;
 309 | 
 310 |                 float * pIn = (inputData->data_float + (srcy *inputData->width + srcx_start)*elementStepInFloat);
 311 |                 float * pF = (filters->filters[ch]->data_float) + (srcx_start - col*stride + 1) * elementStepInFloat;
 312 |                 float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float));
 313 |                 pOut[ch] = 0; //the new created blob is not zeros, clear it first
 314 | 
 315 |                 {
 316 |                     if (srcy >= 0)
 317 |                     {
 318 |                         pOut[ch] += dotProductFloatChGeneral(pIn,
 319 |                             pF,
 320 |                             num_pixels_infloat,
 321 |                             num_pixels_infloat * sizeof(float));
 322 |                     }
 323 |                 }
 324 |                 {
 325 |                     srcy++;
 326 |                     {
 327 |                         pIn += (inputData->width * elementStepInFloat);
 328 |                         pOut[ch] += dotProductFloatChGeneral(pIn,
 329 |                             pF + ( 3 * elementStepInFloat),
 330 |                             num_pixels_infloat,
 331 |                             num_pixels_infloat * sizeof(float));
 332 |                     }
 333 |                 }
 334 |                 {
 335 |                     srcy++;
 336 |                     if (srcy < inputData->height)
 337 |                     {
 338 |                         pIn += (inputData->width * elementStepInFloat);
 339 |                         pOut[ch] += dotProductFloatChGeneral(pIn,
 340 |                             pF + ( 6 * elementStepInFloat ),
 341 |                             num_pixels_infloat,
 342 |                             num_pixels_infloat * sizeof(float));
 343 |                     }
 344 |                 }
 345 |             }
 346 |         }
 347 |     }
 348 |     return true;
 349 | }
 350 | 
 351 | bool convolutionInt83x3P1ChGeneral(const CDataBlob *inputData, const Filters* filters, CDataBlob *outputData) 
 352 | { 
 353 | #if defined(_OPENMP)
 354 | #pragma omp parallel for
 355 | #endif
 356 |     for (int row = 0; row < outputData->height; row++) 
 357 |     {  
 358 |         int elementStep = inputData->int8ChannelStepInByte;
 359 |         int stride = filters->stride;
 360 |         int src_centery = row * stride;
 361 |         for (int col = 0; col < outputData->width; col++)
 362 |         { 
 363 |             int srcx_start = col * stride - 1;
 364 |             int srcx_end = srcx_start + 3;
 365 |             srcx_start = MAX(0, srcx_start);
 366 |             srcx_end = MIN(srcx_end, inputData->width);
 367 |             int num_pixels_inbytes = (srcx_end - srcx_start) * elementStep;
 368 | 
 369 |             for (int ch = 0; ch < outputData->channels; ch++)
 370 |             {
 371 |                 int srcy = src_centery - 1;
 372 | 
 373 |                 signed char * pIn = (inputData->data_int8 + (srcy *inputData->width + srcx_start)*elementStep);
 374 |                 signed char * pF = (filters->filters[ch]->data_int8) + ( (srcx_start - col*stride + 1)) * elementStep;
 375 |                 float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float));
 376 |                 pOut[ch] = 0;//the new created blob is not zeros, clear it first
 377 | 
 378 |                 {
 379 |                     if (srcy >= 0)
 380 |                     {
 381 |                         pOut[ch] += dotProductInt8ChGeneral(pIn,
 382 |                             pF,
 383 |                             num_pixels_inbytes,
 384 |                             num_pixels_inbytes);
 385 |                     }
 386 |                 }
 387 |                 {
 388 |                     srcy++;
 389 |                     {
 390 |                         pIn += (inputData->width * elementStep);
 391 |                         pOut[ch] += dotProductInt8ChGeneral(pIn,
 392 |                             pF + (3 * elementStep),
 393 |                             num_pixels_inbytes,
 394 |                             num_pixels_inbytes);
 395 |                     }
 396 |                 }
 397 |                 {
 398 |                     srcy++;
 399 |                     if (srcy < inputData->height)
 400 |                     {
 401 |                         pIn += (inputData->width * elementStep);
 402 |                         pOut[ch] += dotProductInt8ChGeneral(pIn,
 403 |                             pF + (6 * elementStep),
 404 |                             num_pixels_inbytes,
 405 |                             num_pixels_inbytes);
 406 |                     }
 407 |                 }
 408 |             }
 409 |         }
 410 |     }
 411 |     return true; 
 412 | }
 413 | 
 414 | bool convertFloat2Int8(CDataBlob * dataBlob)
 415 | {
 416 |     if (dataBlob->data_float == NULL || dataBlob->data_int8 == NULL)
 417 |     {
 418 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 419 |         return false;
 420 |     }
 421 | 
 422 |     float maxval = -FLT_MAX;
 423 | #if defined(_ENABLE_NEON)
 424 |     float32x4_t maxvalvec = vdupq_n_f32(-FLT_MAX);
 425 |     float32x4_t scalevec;
 426 | #elif defined(_ENABLE_AVX2)
 427 |     //__m256 maxvalvec = _mm256_set1_ps(-FLT_MAX);
 428 |     __m256 scalevec;
 429 | #endif
 430 | 
 431 |     float scale = 1.f;
 432 | 
 433 |     if (dataBlob->int8_data_valid)
 434 |         return true;
 435 | 
 436 |     for (int row = 0; row < dataBlob->height; row++)
 437 |     {
 438 |         for (int col = 0; col < dataBlob->width; col++)
 439 |         {
 440 |             float * pF = (dataBlob->data_float + (row*dataBlob->width + col)*dataBlob->floatChannelStepInByte / sizeof(float));
 441 | 
 442 | #if defined(_ENABLE_NEON)
 443 |             for (int ch = 0; ch < dataBlob->channels; ch+=4)
 444 |             {
 445 |                 float32x4_t a;
 446 |                 a = vld1q_f32(pF + ch);
 447 |                 a = vabsq_f32(a);
 448 |                 maxvalvec = vmaxq_f32(maxvalvec, a);
 449 |             }
 450 | #else
 451 | 
 452 | #if defined(_ENABLE_OPENMP_SIMD)
 453 | #pragma omp simd reduction(max:maxval)
 454 | #endif
 455 |             for (int ch = 0; ch < dataBlob->channels; ch++)
 456 |             {
 457 |                 float tmp;
 458 |                 //tmp = fabs(pF[ch]);
 459 |                 //maxval = MAX(maxval, tmp);
 460 |                 tmp = pF[ch];
 461 |                 tmp = tmp * ((tmp > 0) * 2 - 1);
 462 |                 maxval = MAX(maxval, tmp);
 463 |             }
 464 | #endif
 465 |         }
 466 |     }
 467 | #if defined(_ENABLE_NEON)
 468 |     {
 469 |         float tmp;
 470 |         tmp = vgetq_lane_f32(maxvalvec, 0);
 471 |         maxval = MAX(maxval, tmp);
 472 |         tmp = vgetq_lane_f32(maxvalvec, 1);
 473 |         maxval = MAX(maxval, tmp);
 474 |         tmp = vgetq_lane_f32(maxvalvec, 2);
 475 |         maxval = MAX(maxval, tmp);
 476 |         tmp = vgetq_lane_f32(maxvalvec, 3);
 477 |         maxval = MAX(maxval, tmp);
 478 |     }
 479 | #endif
 480 |     scale = 127.f / (maxval + FLT_EPSILON);
 481 | 
 482 | #if defined(_ENABLE_NEON)
 483 |     scalevec = vdupq_n_f32(scale);
 484 | #elif defined(_ENABLE_AVX2)
 485 |     scalevec = _mm256_set1_ps(scale);
 486 | #endif
 487 | 
 488 | #if defined(_OPENMP)
 489 | #pragma omp parallel for
 490 | #endif
 491 |     for (int row = 0; row < dataBlob->height; row++)
 492 |     {
 493 |         for (int col = 0; col < dataBlob->width; col++)
 494 |         {
 495 |             float * pF = (dataBlob->data_float + (row*dataBlob->width + col)*dataBlob->floatChannelStepInByte / sizeof(float));
 496 |             signed char * pI = (dataBlob->data_int8 + (row*dataBlob->width + col)*dataBlob->int8ChannelStepInByte / sizeof(char));
 497 | 
 498 | #if defined(_ENABLE_NEON)
 499 |             for (int ch = 0; ch < dataBlob->channels; ch+=4)
 500 |             {
 501 |                 float tmp;
 502 |                 float32x4_t a = vld1q_f32(pF + ch);
 503 |                 float32x4_t resultvec = vmulq_f32(a, scalevec);
 504 |                 
 505 |                 ////becuase Floating-point to integer conversions "vcvtq_s32_f32" use round towards zero.
 506 |                 ////but we need round to nearest
 507 |                 ////so we cannot use the following NEON instructions
 508 |                 //int32x4_t int32resultvec = vcvtq_s32_f32(resultvec);
 509 |                 //int16x4_t int16resultvec = vqmovn_s32(int32resultvec);
 510 |                 //vst1_s16(pI + ch, int16resultvec);
 511 |                 
 512 |                 tmp = vgetq_lane_f32(resultvec, 0);
 513 |                 pI[ch] = (signed char)(tmp + ((tmp>0) - 0.5f));
 514 |                 tmp = vgetq_lane_f32(resultvec, 1);
 515 |                 pI[ch+1] = (signed char)(tmp + ((tmp>0) - 0.5f));
 516 |                 tmp = vgetq_lane_f32(resultvec, 2);
 517 |                 pI[ch+2] = (signed char)(tmp + ((tmp>0) - 0.5f));
 518 |                 tmp = vgetq_lane_f32(resultvec, 3);
 519 |                 pI[ch+3] = (signed char)(tmp + ((tmp>0) - 0.5f));
 520 |             }
 521 | #else
 522 | #if defined(_ENABLE_OPENMP_SIMD)
 523 | #pragma omp simd
 524 | #endif
 525 |             for (int ch = 0; ch < dataBlob->channels; ch++)
 526 |             {
 527 |                 float tmp;
 528 |                 //pI[ch] = (signed char)round(pF[ch] * scale);
 529 |                 //to speedup round() using the following code
 530 |                 tmp = pF[ch];
 531 |                 pI[ch] = (signed char)(tmp * scale + ((tmp>0)-0.5f));
 532 |             }
 533 | #endif
 534 |         }
 535 |     }
 536 |     dataBlob->int8float_scale = scale;
 537 |     dataBlob->int8_data_valid = true;
 538 | 
 539 |     return true;
 540 | }
 541 | 
 542 | 
 543 | bool convolution(CDataBlob *inputData, const Filters* filters, CDataBlob *outputData)
 544 | {
 545 |     if (inputData->data_float == NULL || inputData->data_int8 == NULL)
 546 |     {
 547 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 548 |         return false;
 549 |     }
 550 |     if (filters->filters.size() == 0)
 551 |     {
 552 |         cerr << __FUNCTION__ << ": There is not filters." << endl;
 553 |         return false;
 554 |     }
 555 |     //check filters' dimensions
 556 |     int filterW = filters->filters[0]->width;
 557 |     int filterH = filters->filters[0]->height;
 558 |     int filterC = filters->filters[0]->channels;
 559 |     int filterS = filters->stride;
 560 |     int filterP = filters->pad;
 561 | 
 562 |     int outputW = 0;
 563 |     int outputH = 0;
 564 |     int outputC = (int)filters->filters.size();
 565 | 
 566 |     for (int i = 1; i < outputC; i++)
 567 |     {
 568 |         if ((filterW != filters->filters[i]->width) ||
 569 |             (filterH != filters->filters[i]->height) ||
 570 |             (filterC != filters->filters[i]->channels))
 571 |         {
 572 |             cerr << __FUNCTION__ << ": The filters must be the same size." << endl;
 573 |             return false;
 574 |         }
 575 |     }
 576 | 
 577 |     if (filterC != inputData->channels)
 578 |     {
 579 |         cerr << __FUNCTION__ << ": The number of channels of filters must be the same with the input data. " << filterC << " vs " << inputData->channels << endl;
 580 |         return false;
 581 |     }
 582 | 
 583 |     //calculate the output dimension
 584 |     if (filterW == 1 && filterH == 1) //1x1 filters
 585 |     {
 586 |         if (filterS != 1)
 587 |         {
 588 |             cerr << __FUNCTION__ << ": Onle stride = 1 is supported for 1x1 filters." << endl;
 589 |             return false;
 590 |         }
 591 |         if (filterP != 0)
 592 |         {
 593 |             cerr << __FUNCTION__ << ": Onle pad = 0 is supported for 1x1 filters." << endl;
 594 |             return false;
 595 |         }
 596 |         outputW = inputData->width;
 597 |         outputH = inputData->height;
 598 | 
 599 |     }
 600 |     else if (filterW == 3 && filterH == 3) //3x3 filters
 601 |     {
 602 |         if (filterS == 1 && filterP == 1)
 603 |         {
 604 |             outputW = inputData->width;
 605 |             outputH = inputData->height;
 606 |         }
 607 |         else if (filterS == 2 && filterP == 1)
 608 |         {
 609 |             outputW = (inputData->width + 1) / 2;
 610 |             outputH = (inputData->height + 1) / 2;
 611 |         }
 612 |         else
 613 |         {
 614 |             cerr << __FUNCTION__ << ": Unspported filter stride=" << filterS << " or pad=" << filterP << endl;
 615 |             cerr << __FUNCTION__ << ": For 3x3 filters, only pad=1 and stride={1,2} are supported." << endl;
 616 |             return false;
 617 |         }
 618 |     }
 619 |     else
 620 |     {
 621 |         cerr << __FUNCTION__ << ": Unsported filter size." << endl;
 622 |         return false;
 623 |     }
 624 | 
 625 |     if (outputW < 1 || outputH < 1)
 626 |     {
 627 |         cerr << __FUNCTION__ << ": The size of the output is not correct. (" << outputW << ", " << outputH << ")." << endl;
 628 |         return false;
 629 |     }
 630 | 
 631 |     outputData->create(outputW, outputH, outputC);
 632 | 
 633 |     /*
 634 |     {
 635 |         float maxval = -FLT_MAX;
 636 |         float minval = FLT_MAX;
 637 | 
 638 |         for (int row = 0; row < inputData->height; row++)
 639 |         {
 640 |             for (int col = 0; col < inputData->width; col++)
 641 |             {
 642 |                 float * pF = (inputData->data_float + (row*inputData->width + col)*inputData->floatChannelStepInByte / sizeof(float));
 643 |                 for (int ch = 0; ch < inputData->channels; ch++)
 644 |                 {
 645 |                     maxval = MAX(maxval, pF[ch]);
 646 |                     minval = MIN(minval, pF[ch]);
 647 |                 }
 648 |             }
 649 |         }
 650 |         cout << "\t\t\t\tconv range [min, max]=[" << minval << ",   " << maxval << "]" << endl;
 651 |     }
 652 |     */
 653 | #if defined(_ENABLE_INT8_CONV)
 654 |     convertFloat2Int8(inputData);
 655 | #endif
 656 | 
 657 |     if (filterW == 1 && filterH == 1) //1x1 filters
 658 |     {
 659 | #if defined(_ENABLE_INT8_CONV)
 660 |         convolutionInt81x1P0S1(inputData, filters, outputData);
 661 | #else
 662 |         convolutionFloat1x1P0S1(inputData, filters, outputData);
 663 | #endif
 664 |     }
 665 |     else if (filterW == 3 && filterH == 3) //3x3 filters
 666 |     {
 667 | #if defined(_ENABLE_INT8_CONV)
 668 |         convolutionInt83x3P1ChGeneral(inputData, filters, outputData);
 669 | #else
 670 |         convolutionFloat3x3P1ChGeneral(inputData, filters, outputData);
 671 | #endif
 672 |     }
 673 | 
 674 | #if defined(_ENABLE_INT8_CONV)
 675 |     scale(outputData, 1.0f / (inputData->int8float_scale * filters->scale));
 676 | #endif
 677 | 
 678 | 	return true;
 679 | }
 680 | 
 681 | //only 2X2 S2 is supported
 682 | bool maxpooling2x2S2(const CDataBlob *inputData, CDataBlob *outputData)
 683 | {
 684 |     if (inputData->data_float == NULL)
 685 |     {
 686 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 687 |         return false;
 688 |     }
 689 |     int outputW = static_cast<int>(ceil((inputData->width - 3.0f) / 2)) + 1;
 690 |     int outputH = static_cast<int>(ceil((inputData->height - 3.0f) / 2)) + 1;
 691 |     int outputC = inputData->channels;
 692 | 
 693 |     if (outputW < 1 || outputH < 1)
 694 |     {
 695 |         cerr << __FUNCTION__ << ": The size of the output is not correct. (" << outputW << ", " << outputH << ")." << endl;
 696 |         return false;
 697 |     }
 698 | 
 699 |     int elementStep = inputData->floatChannelStepInByte / sizeof(float);
 700 |     int lineElementStep = inputData->width * elementStep;
 701 | 
 702 |     outputData->create(outputW, outputH, outputC);
 703 | 
 704 |     for (int row = 0; row < outputData->height; row++)
 705 |     {
 706 |         for (int col = 0; col < outputData->width; col++)
 707 |         {
 708 |             int inputMatOffsetsInElement[4];
 709 |             int elementCount = 0;
 710 | 
 711 |             int hstart = row * 2;
 712 |             int wstart = col * 2;
 713 |             int hend = MIN(hstart + 2, inputData->height);
 714 |             int wend = MIN(wstart + 2, inputData->width);
 715 | 
 716 |             for (int fy = hstart; fy < hend; fy++)
 717 |                 for (int fx = wstart; fx < wend; fx++)
 718 |                 {
 719 |                     inputMatOffsetsInElement[elementCount++] = (fy *inputData->width + fx) * inputData->floatChannelStepInByte / sizeof(float);
 720 |                 }
 721 | 
 722 |             float * pOut = outputData->data_float + (row*outputData->width + col) * outputData->floatChannelStepInByte / sizeof(float);
 723 |             float * pIn = inputData->data_float;
 724 | 
 725 | #if defined(_ENABLE_NEON)
 726 |             for (int ch = 0; ch < outputData->channels; ch += 4)
 727 |             {
 728 |                 float32x4_t a;
 729 |                 float32x4_t maxval = vld1q_f32(pIn + ch + inputMatOffsetsInElement[0]);
 730 |                 for (int el = 1; el < elementCount; el++)
 731 |                 {
 732 |                     a = vld1q_f32(pIn + ch + inputMatOffsetsInElement[el]);
 733 |                     maxval = vmaxq_f32(maxval, a);
 734 |                 }
 735 |                 vst1q_f32(pOut + ch, maxval);
 736 |             }
 737 | #elif defined(_ENABLE_AVX2)
 738 |             for (int ch = 0; ch < outputData->channels; ch += 8)
 739 |             {
 740 |                 __m256 a;
 741 |                 __m256 maxval = _mm256_load_ps(pIn + ch + inputMatOffsetsInElement[0]);
 742 |                 for (int el = 1; el < elementCount; el++)
 743 |                 {
 744 |                     a = _mm256_load_ps(pIn + ch + inputMatOffsetsInElement[el]);
 745 |                     maxval = _mm256_max_ps(maxval, a);
 746 |                 }
 747 |                 _mm256_store_ps(pOut + ch, maxval);
 748 |             }
 749 | #else
 750 | 
 751 |             for (int ch = 0; ch < outputData->channels; ch++)
 752 |             {
 753 |                 float maxval = pIn[ch + inputMatOffsetsInElement[0]];
 754 | #if defined(_ENABLE_OPENMP_SIMD)
 755 | #pragma omp simd reduction(max:maxval)
 756 | #endif
 757 |                 for (int el = 1; el < elementCount; el++)
 758 |                 {
 759 |                     maxval = MAX(maxval, pIn[ch + inputMatOffsetsInElement[el]]);
 760 |                 }
 761 |                 pOut[ch] = maxval;
 762 |             }
 763 | #endif
 764 |         }
 765 |     }
 766 | 
 767 |     return true;
 768 | }
 769 | 
 770 | 
 771 | 
 772 | bool concat4(const CDataBlob *inputData1, const CDataBlob *inputData2, const CDataBlob *inputData3, const CDataBlob *inputData4, CDataBlob *outputData)
 773 | {
 774 |     if ((inputData1->data_float == NULL) || (inputData2->data_float == NULL) || (inputData3->data_float == NULL) || (inputData4->data_float == NULL))
 775 |     {
 776 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 777 |         return false;
 778 |     }
 779 | 
 780 |     if ((inputData1->width != inputData2->width) ||
 781 |         (inputData1->height != inputData2->height) ||
 782 |         (inputData1->width != inputData3->width) ||
 783 |         (inputData1->height != inputData3->height) ||
 784 |         (inputData1->width != inputData4->width) ||
 785 |         (inputData1->height != inputData4->height))
 786 |     {
 787 |         cerr << __FUNCTION__ << ": The three inputs must have the same size." << endl;
 788 |         return false;
 789 |     }
 790 |     int outputW = inputData1->width;
 791 |     int outputH = inputData1->height;
 792 |     int outputC = inputData1->channels + inputData2->channels + inputData3->channels + inputData4->channels;
 793 | 
 794 |     if (outputW < 1 || outputH < 1 || outputC < 1)
 795 |     {
 796 |         cerr << __FUNCTION__ << ": The size of the output is not correct. (" << outputW << ", " << outputH << ", " << outputC << ")." << endl;
 797 |         return false;
 798 |     }
 799 | 
 800 |     outputData->create(outputW, outputH, outputC);
 801 | 
 802 |     for (int row = 0; row < outputData->height; row++)
 803 |     {
 804 |         for (int col = 0; col < outputData->width; col++)
 805 |         {
 806 |             float * pOut = (outputData->data_float + (row*outputData->width + col)*outputData->floatChannelStepInByte / sizeof(float));
 807 |             float * pIn1 = (inputData1->data_float + (row*inputData1->width + col)*inputData1->floatChannelStepInByte / sizeof(float));
 808 |             float * pIn2 = (inputData2->data_float + (row*inputData2->width + col)*inputData2->floatChannelStepInByte / sizeof(float));
 809 |             float * pIn3 = (inputData3->data_float + (row*inputData3->width + col)*inputData3->floatChannelStepInByte / sizeof(float));
 810 |             float * pIn4 = (inputData4->data_float + (row*inputData4->width + col)*inputData4->floatChannelStepInByte / sizeof(float));
 811 | 
 812 |             memcpy(pOut, pIn1, sizeof(float)* inputData1->channels);
 813 |             memcpy(pOut + inputData1->channels, pIn2, sizeof(float)* inputData2->channels);
 814 |             memcpy(pOut + inputData1->channels + inputData2->channels, pIn3, sizeof(float)* inputData3->channels);
 815 |             memcpy(pOut + inputData1->channels + inputData2->channels + inputData3->channels, pIn4, sizeof(float)* inputData4->channels);
 816 |         }
 817 |     }
 818 |     return true;
 819 | }
 820 | 
 821 | bool scale(CDataBlob * dataBlob, float scale)
 822 | {
 823 |     if (dataBlob->data_float == NULL || dataBlob->data_int8 == NULL)
 824 |     {
 825 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 826 |         return false;
 827 |     }
 828 | 
 829 |     for (int row = 0; row < dataBlob->height; row++)
 830 |     {
 831 |         for (int col = 0; col < dataBlob->width; col++)
 832 |         {
 833 |             float * pF = (dataBlob->data_float + (row*dataBlob->width + col)*dataBlob->floatChannelStepInByte / sizeof(float));
 834 | #if defined(_ENABLE_NEON)
 835 |             float32x4_t a, bscale;
 836 |             float32x4_t result_vec;
 837 | 
 838 |             bscale = vdupq_n_f32(scale);
 839 |             for (int ch = 0; ch < dataBlob->channels; ch+=4)
 840 |             {
 841 |                 a = vld1q_f32(pF + ch);
 842 |                 result_vec = vmulq_f32(a, bscale);
 843 |                 vst1q_f32(pF + ch, result_vec);
 844 |             }
 845 | #elif defined(_ENABLE_AVX2)
 846 |             __m256 a, bscale;
 847 | 
 848 |             bscale = _mm256_set1_ps(scale);
 849 |             for (int ch = 0; ch < dataBlob->channels; ch += 8)
 850 |             {
 851 |                 a = _mm256_load_ps(pF + ch);
 852 |                 a = _mm256_mul_ps(a, bscale);
 853 |                 _mm256_store_ps(pF + ch, a);
 854 |             }
 855 | 
 856 | #else
 857 | #if defined(_ENABLE_OPENMP_SIMD)
 858 | #pragma omp simd
 859 | #endif
 860 |             for (int ch = 0; ch < dataBlob->channels; ch++)
 861 |             {
 862 |                 pF[ch] *= scale;
 863 |             }
 864 | #endif
 865 |         }
 866 |     }
 867 |     return true;
 868 | }
 869 | 
 870 | bool relu(const CDataBlob *inputOutputData)
 871 | {
 872 |     if (inputOutputData->data_float == NULL)
 873 |     {
 874 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 875 |         return false;
 876 |     }
 877 | 
 878 | 
 879 |     for (int row = 0; row < inputOutputData->height; row++)
 880 |     {
 881 |         for (int col = 0; col < inputOutputData->width; col++)
 882 |         {
 883 |             float * pData = (float*)(inputOutputData->data_float + (row*inputOutputData->width + col)*inputOutputData->floatChannelStepInByte / sizeof(float));
 884 | 
 885 | #if defined(_ENABLE_NEON)
 886 |             float32x4_t a, bzeros;
 887 |             float32x4_t result_vec;
 888 | 
 889 |             bzeros = vdupq_n_f32(0); //zeros
 890 |             for (int ch = 0; ch < inputOutputData->channels; ch += 4)
 891 |             {
 892 |                 a = vld1q_f32(pData + ch);
 893 |                 result_vec = vmaxq_f32(a, bzeros);
 894 |                 vst1q_f32(pData + ch, result_vec);
 895 |             }
 896 | #elif defined(_ENABLE_AVX2)
 897 |             __m256 a, bzeros;
 898 | 
 899 |             bzeros = _mm256_setzero_ps(); //zeros
 900 |             for (int ch = 0; ch < inputOutputData->channels; ch += 8)
 901 |             {
 902 |                 a = _mm256_load_ps(pData + ch);
 903 |                 a = _mm256_max_ps(a, bzeros);
 904 |                 _mm256_store_ps(pData + ch, a);
 905 |             }
 906 | #else
 907 | #if defined(_ENABLE_OPENMP_SIMD)
 908 | #pragma omp simd
 909 | #endif
 910 |             for (int ch = 0; ch < inputOutputData->channels; ch++)
 911 |                 pData[ch] = MAX(pData[ch], 0);
 912 | #endif
 913 |         }
 914 |     }
 915 |     return true;
 916 | }
 917 | 
 918 | bool priorbox(const CDataBlob * featureData, const CDataBlob * imageData, int num_sizes, float * pWinSizes, CDataBlob * outputData)
 919 | {
 920 |     if ((featureData->data_float == NULL) ||
 921 |         imageData->data_float == NULL||
 922 |         pWinSizes == NULL)
 923 |     {
 924 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 925 |         return false;
 926 |     }
 927 | 
 928 |     int feature_width = featureData->width;
 929 |     int feature_height = featureData->height;
 930 |     int image_width = imageData->width * 2;
 931 |     int image_height = imageData->height * 2;
 932 | 
 933 | 	float step_w = static_cast<float>(image_width) / feature_width;
 934 | 	float step_h = static_cast<float>(image_height) / feature_height;
 935 | 
 936 | 	float * output_data = outputData->data_float;
 937 | 
 938 | //    outputData->create(feature_width, feature_height, num_sizes * 4 * 2);
 939 |     outputData->create(feature_width, feature_height, num_sizes * 4);
 940 | 
 941 | 	for (int h = 0; h < feature_height; ++h) 
 942 | 	{
 943 | 		for (int w = 0; w < feature_width; ++w) 
 944 | 		{
 945 |             float * pOut = (float*)(outputData->data_float + ( h * outputData->width + w) * outputData->floatChannelStepInByte / sizeof(float));
 946 |             int idx = 0;
 947 |             //priorbox
 948 | 			for (int s = 0; s < num_sizes; s++) 
 949 | 			{
 950 | 				float min_size_ = pWinSizes[s];
 951 |                 float box_width, box_height;
 952 |                 box_width = box_height = min_size_;
 953 |                 
 954 |                 float center_x = w * step_w + step_w / 2.0f;
 955 |                 float center_y = h * step_h + step_h / 2.0f;
 956 |                 // xmin
 957 |                 pOut[idx++] = (center_x - box_width / 2.f) / image_width;
 958 |                 // ymin
 959 |                 pOut[idx++] = (center_y - box_height / 2.f) / image_height;
 960 |                 // xmax
 961 |                 pOut[idx++] = (center_x + box_width / 2.f) / image_width;
 962 |                 // ymax
 963 |                 pOut[idx++] = (center_y + box_height / 2.f) / image_height;
 964 | 
 965 | 			}
 966 | 		}
 967 | 	}
 968 |     
 969 |     
 970 |     return true;
 971 | }
 972 | 
 973 | bool normalize(CDataBlob * inputOutputData, float * pScale)
 974 | {
 975 |     if ((inputOutputData->data_float == NULL) || pScale == NULL)
 976 |     {
 977 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
 978 |         return false;
 979 |     }
 980 | 
 981 | 
 982 |     for (int row = 0; row < inputOutputData->height; row++)
 983 |     {
 984 |         for (int col = 0; col < inputOutputData->width; col++)
 985 |         {
 986 |             float * pData = (float*)(inputOutputData->data_float + (row*inputOutputData->width + col)*inputOutputData->floatChannelStepInByte / sizeof(float));
 987 |             float sum = FLT_EPSILON;
 988 |             float s = 0;
 989 | #if defined(_ENABLE_NEON)
 990 |             float32x4_t a, b, cscale;
 991 |             float32x4_t result_vec;
 992 |             for (int ch = 0; ch < inputOutputData->channels; ch += 4)
 993 |             {
 994 |                 a = vld1q_f32(pData + ch);
 995 |                 result_vec = vmulq_f32(a, a);
 996 |                 sum += vgetq_lane_f32(result_vec, 0);
 997 |                 sum += vgetq_lane_f32(result_vec, 1);
 998 |                 sum += vgetq_lane_f32(result_vec, 2);
 999 |                 sum += vgetq_lane_f32(result_vec, 3);
1000 |             }
1001 | 
1002 |             s = 1.0f/sqrt(sum);
1003 |             cscale = vdupq_n_f32(s);
1004 | 
1005 |             for (int ch = 0; ch < inputOutputData->channels; ch += 4)
1006 |             {
1007 |                 a = vld1q_f32(pData + ch);
1008 |                 b = vld1q_f32(pScale + ch);
1009 | 
1010 |                 result_vec = vmulq_f32(a, b);
1011 |                 result_vec = vmulq_f32(result_vec, cscale);
1012 |                 vst1q_f32(pData + ch, result_vec);
1013 |             }
1014 | #elif defined(_ENABLE_AVX2)
1015 |             __m256 a, b, cscale;
1016 |             __m256 result_vec;
1017 |             for (int ch = 0; ch < inputOutputData->channels; ch += 8)
1018 |             {
1019 |                 a = _mm256_load_ps(pData + ch);
1020 |                 a = _mm256_mul_ps(a, a);
1021 |                 a = _mm256_hadd_ps(a, a);
1022 |                 a = _mm256_hadd_ps(a, a);
1023 |                 sum += SSE_256ELEMENT(a, 0);
1024 |                 sum += SSE_256ELEMENT(a, 4);
1025 |             }
1026 | 
1027 |             s = 1.0f / sqrt(sum);
1028 |             cscale = _mm256_set1_ps(s);
1029 | 
1030 |             for (int ch = 0; ch < inputOutputData->channels; ch += 8)
1031 |             {
1032 |                 a = _mm256_load_ps(pData + ch);
1033 |                 b = _mm256_load_ps(pScale + ch);
1034 | 
1035 |                 result_vec = _mm256_mul_ps(a, b);
1036 |                 result_vec = _mm256_mul_ps(result_vec, cscale);
1037 |                 _mm256_store_ps(pData + ch, result_vec);
1038 |             }
1039 | #else
1040 | 
1041 | #if defined(_ENABLE_OPENMP_SIMD)
1042 | #pragma omp simd reduction(+:sum)
1043 | #endif
1044 |             for (int ch = 0; ch < inputOutputData->channels; ch++)
1045 |                 sum += (pData[ch] * pData[ch]);
1046 | 
1047 |             s = 1.0f/sqrt(sum);
1048 | #if defined(_ENABLE_OPENMP_SIMD)
1049 | #pragma omp simd
1050 | #endif
1051 |             for (int ch = 0; ch < inputOutputData->channels; ch++)
1052 |                 pData[ch] = pData[ch] * pScale[ch] * s;
1053 | #endif            
1054 |         }
1055 |     }
1056 |     return true;
1057 | 
1058 | }
1059 | 
1060 | bool softmax1vector2class(const CDataBlob *inputOutputData)
1061 | {
1062 |     if (inputOutputData->data_float == NULL)
1063 |     {
1064 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
1065 |         return false;
1066 |     }
1067 | 
1068 |     if(inputOutputData->width != 1 || inputOutputData->height != 1)
1069 |     {
1070 |         cerr << __FUNCTION__ << ": The input data must be Cx1x1." << endl;
1071 |         return false;
1072 |     }
1073 | 
1074 |     int num = inputOutputData->channels;
1075 |     float * pData = (inputOutputData->data_float);
1076 | 
1077 | #if defined(_OPENMP)
1078 | #pragma omp parallel for
1079 | #endif
1080 |     for(int i = 0; i < num; i+= 2)
1081 |     {
1082 |         float v1 = pData[i];
1083 |         float v2 = pData[i+1];
1084 |         float vm = MAX(v1, v2);
1085 |         v1 -= vm;
1086 |         v2 -= vm;
1087 |         v1 = expf(v1);
1088 |         v2 = expf(v2);
1089 |         vm = v1 + v2;
1090 |         pData[i] = v1/vm;
1091 |         pData[i+1] = v2/vm;
1092 |     }
1093 |     return true;
1094 | }
1095 | 
1096 | bool blob2vector(const CDataBlob * inputData, CDataBlob * outputData, bool isFloat)
1097 | {
1098 |     if (inputData->data_float == NULL)
1099 |     {
1100 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
1101 |         return false;
1102 |     }
1103 | 
1104 |     outputData->create(1, 1, inputData->width * inputData->height * inputData->channels);
1105 | 
1106 |     if (isFloat)
1107 |     {
1108 |         int bytesOfAChannel = inputData->channels * sizeof(float);
1109 |         float * pOut = outputData->data_float;
1110 |         for (int row = 0; row < inputData->height; row++)
1111 |         {
1112 |             for (int col = 0; col < inputData->width; col++)
1113 |             {
1114 |                 float * pIn = (inputData->data_float + (row*inputData->width + col)*inputData->floatChannelStepInByte / sizeof(float));
1115 |                 memcpy(pOut, pIn, bytesOfAChannel);
1116 |                 pOut += inputData->channels;
1117 |             }
1118 |         }
1119 |     }
1120 |     else
1121 |     {
1122 |         int bytesOfAChannel = inputData->channels * sizeof(char);
1123 |         signed char * pOut = outputData->data_int8;
1124 |         for (int row = 0; row < inputData->height; row++)
1125 |         {
1126 |             for (int col = 0; col < inputData->width; col++)
1127 |             {
1128 |                 signed char * pIn = (inputData->data_int8 + (row*inputData->width + col)*inputData->int8ChannelStepInByte / sizeof(char));
1129 |                 memcpy(pOut, pIn, bytesOfAChannel);
1130 |                 pOut += inputData->channels;
1131 |             }
1132 |         }
1133 |     }
1134 | 
1135 |     return true;
1136 | 
1137 | }
1138 | 
1139 | void IntersectBBox(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2,
1140 |                    NormalizedBBox* intersect_bbox) 
1141 | {
1142 |     if (bbox2.xmin > bbox1.xmax || bbox2.xmax < bbox1.xmin ||
1143 |         bbox2.ymin > bbox1.ymax || bbox2.ymax < bbox1.ymin) 
1144 |     {
1145 |         // Return [0, 0, 0, 0] if there is no intersection.
1146 |         intersect_bbox->xmin = 0;
1147 |         intersect_bbox->ymin = 0;
1148 |         intersect_bbox->xmax = 0;
1149 |         intersect_bbox->ymax = 0;
1150 |     }
1151 |     else
1152 |     {
1153 |         intersect_bbox->xmin = (std::max(bbox1.xmin, bbox2.xmin));
1154 |         intersect_bbox->ymin = (std::max(bbox1.ymin, bbox2.ymin));
1155 |         intersect_bbox->xmax = (std::min(bbox1.xmax, bbox2.xmax));
1156 |         intersect_bbox->ymax = (std::min(bbox1.ymax, bbox2.ymax));
1157 |     }
1158 | }
1159 | 
1160 | float JaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2)
1161 | {
1162 |     NormalizedBBox intersect_bbox;
1163 |     IntersectBBox(bbox1, bbox2, &intersect_bbox);
1164 |     float intersect_width, intersect_height;
1165 |     intersect_width = intersect_bbox.xmax - intersect_bbox.xmin;
1166 |     intersect_height = intersect_bbox.ymax - intersect_bbox.ymin;
1167 | 
1168 |     if (intersect_width > 0 && intersect_height > 0) 
1169 |     {
1170 |         float intersect_size = intersect_width * intersect_height;
1171 |         float bsize1 = (bbox1.xmax - bbox1.xmin)*(bbox1.ymax - bbox1.ymin);
1172 |         float bsize2 = (bbox2.xmax - bbox2.xmin)*(bbox2.ymax - bbox2.ymin);
1173 |         return intersect_size / ( bsize1 + bsize2 - intersect_size);
1174 |     }
1175 |     else 
1176 |     {
1177 |         return 0.f;
1178 |     }
1179 | }
1180 | 
1181 | bool SortScoreBBoxPairDescend(const pair<float, NormalizedBBox>& pair1,   const pair<float, NormalizedBBox>& pair2) 
1182 | {
1183 |     return pair1.first > pair2.first;
1184 | }
1185 | 
1186 | 
1187 | bool detection_output(const CDataBlob * priorbox, const CDataBlob * loc, const CDataBlob * conf, float overlap_threshold, float confidence_threshold, int top_k, int keep_top_k, CDataBlob * outputData)
1188 | {
1189 |     if (priorbox->data_float == NULL || loc->data_float == NULL || conf->data_float == NULL)
1190 |     {
1191 |         cerr << __FUNCTION__ << ": The input data is null." << endl;
1192 |         return 0;
1193 |     }
1194 | 
1195 |     if (priorbox->channels != loc->channels || loc->channels != conf->channels*2 )
1196 |     {
1197 |         cerr << __FUNCTION__ << ": The sizes of the inputs are not match." << endl;
1198 |         return 0;
1199 |     }
1200 | 
1201 |     float prior_variance[4] = {0.1f, 0.1f, 0.2f, 0.2f};
1202 |     float * pPriorBox = priorbox->data_float;
1203 |     float * pLoc = loc->data_float;
1204 |     float * pConf = conf->data_float;
1205 | 
1206 |     vector<pair<float, NormalizedBBox> > score_bbox_vec;
1207 |     vector<pair<float, NormalizedBBox> > final_score_bbox_vec;
1208 | 
1209 |     //get the candidates those are > confidence_threshold
1210 |     for(int i = 1; i < conf->channels; i+=2)
1211 |     {
1212 |         if(pConf[i] > confidence_threshold)
1213 |         {
1214 |             float fx1 = pPriorBox[i*2-2];
1215 |             float fy1 = pPriorBox[i*2-1];
1216 |             float fx2 = pPriorBox[i*2];
1217 |             float fy2 = pPriorBox[i*2+1];
1218 | 
1219 |             float locx1 = pLoc[i * 2 - 2];
1220 |             float locy1 = pLoc[i * 2 - 1];
1221 |             float locx2 = pLoc[i * 2];
1222 |             float locy2 = pLoc[i * 2 + 1];
1223 | 
1224 |             float prior_width = fx2 - fx1;
1225 |             float prior_height = fy2 - fy1;
1226 |             float prior_center_x = (fx1 + fx2)/2;
1227 |             float prior_center_y = (fy1 + fy2)/2;
1228 | 
1229 |             float box_centerx = prior_variance[0] * locx1 * prior_width + prior_center_x;
1230 |             float box_centery = prior_variance[1] * locy1 * prior_height + prior_center_y;
1231 |             float box_width = expf(prior_variance[2] * locx2) * prior_width;
1232 |             float box_height = expf(prior_variance[3] * locy2) * prior_height;
1233 | 
1234 |             fx1 = box_centerx - box_width / 2.f;
1235 |             fy1 = box_centery - box_height /2.f;
1236 |             fx2 = box_centerx + box_width / 2.f;
1237 |             fy2 = box_centery + box_height /2.f;
1238 | 
1239 |             fx1 = MAX(0, fx1);
1240 |             fy1 = MAX(0, fy1);
1241 |             fx2 = MIN(1.f, fx2);
1242 |             fy2 = MIN(1.f, fy2);
1243 | 
1244 |             NormalizedBBox bb;
1245 |             bb.xmin = fx1;
1246 |             bb.ymin = fy1;
1247 |             bb.xmax = fx2;
1248 |             bb.ymax = fy2;
1249 | 
1250 |             score_bbox_vec.push_back(std::make_pair(pConf[i], bb));
1251 |         }
1252 |     }
1253 | 
1254 |     //Sort the score pair according to the scores in descending order
1255 |     std::stable_sort(score_bbox_vec.begin(), score_bbox_vec.end(), SortScoreBBoxPairDescend);
1256 | 
1257 |     // Keep top_k scores if needed.
1258 |     if (top_k > -1 && top_k < score_bbox_vec.size()) {
1259 |         score_bbox_vec.resize(top_k);
1260 |     }
1261 | 
1262 |     //Do NMS
1263 |     final_score_bbox_vec.clear();
1264 |     while (score_bbox_vec.size() != 0) {
1265 |         const NormalizedBBox bb1 = score_bbox_vec.front().second;
1266 |         bool keep = true;
1267 |         for (int k = 0; k < final_score_bbox_vec.size(); ++k)
1268 |         {
1269 |             if (keep) 
1270 |             {
1271 |                 const NormalizedBBox bb2 = final_score_bbox_vec[k].second;
1272 |                 float overlap = JaccardOverlap(bb1, bb2);
1273 |                 keep = (overlap <= overlap_threshold);
1274 |             }
1275 |             else 
1276 |             {
1277 |                 break;
1278 |             }
1279 |         }
1280 |         if (keep) {
1281 |             final_score_bbox_vec.push_back(score_bbox_vec.front());
1282 |         }
1283 |         score_bbox_vec.erase(score_bbox_vec.begin());
1284 |     }
1285 |     if (keep_top_k > -1 && keep_top_k < final_score_bbox_vec.size()) {
1286 |         final_score_bbox_vec.resize(keep_top_k);
1287 |     }
1288 | 
1289 |     //copy the results to the output blob
1290 |     int num_faces = (int)final_score_bbox_vec.size();
1291 |     if (num_faces == 0)
1292 |         outputData->setNULL();
1293 |     else
1294 |     {
1295 |         outputData->create(num_faces, 1, 5);
1296 |         for (int fi = 0; fi < num_faces; fi++)
1297 |         {
1298 |             pair<float, NormalizedBBox> pp = final_score_bbox_vec[fi];
1299 |             float * pOut = (outputData->data_float + fi * outputData->floatChannelStepInByte / sizeof(float));
1300 |             pOut[0] = pp.first;
1301 |             pOut[1] = pp.second.xmin;
1302 |             pOut[2] = pp.second.ymin;
1303 |             pOut[3] = pp.second.xmax;
1304 |             pOut[4] = pp.second.ymax;
1305 |         }
1306 |     }
1307 | 
1308 |     return true;
1309 | }
1310 | 
1311 | 


--------------------------------------------------------------------------------
/libfacedetection/src/facedetectcnn.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | By downloading, copying, installing or using the software you agree to this license.
  3 | If you do not agree to this license, do not download, install,
  4 | copy or use the software.
  5 | 
  6 | 
  7 |                   License Agreement For libfacedetection
  8 |                      (3-clause BSD License)
  9 | 
 10 | Copyright (c) 2018-2019, Shiqi Yu, all rights reserved.
 11 | shiqi.yu@gmail.com
 12 | 
 13 | Redistribution and use in source and binary forms, with or without modification,
 14 | are permitted provided that the following conditions are met:
 15 | 
 16 |   * Redistributions of source code must retain the above copyright notice,
 17 |     this list of conditions and the following disclaimer.
 18 | 
 19 |   * Redistributions in binary form must reproduce the above copyright notice,
 20 |     this list of conditions and the following disclaimer in the documentation
 21 |     and/or other materials provided with the distribution.
 22 | 
 23 |   * Neither the names of the copyright holders nor the names of the contributors
 24 |     may be used to endorse or promote products derived from this software
 25 |     without specific prior written permission.
 26 | 
 27 | This software is provided by the copyright holders and contributors "as is" and
 28 | any express or implied warranties, including, but not limited to, the implied
 29 | warranties of merchantability and fitness for a particular purpose are disclaimed.
 30 | In no event shall copyright holders or contributors be liable for any direct,
 31 | indirect, incidental, special, exemplary, or consequential damages
 32 | (including, but not limited to, procurement of substitute goods or services;
 33 | loss of use, data, or profits; or business interruption) however caused
 34 | and on any theory of liability, whether in contract, strict liability,
 35 | or tort (including negligence or otherwise) arising in any way out of
 36 | the use of this software, even if advised of the possibility of such damage.
 37 | */
 38 | 
 39 | #pragma once
 40 | 
 41 | //#define _ENABLE_AVX2 //Please enable it if X64 CPU
 42 | //#define _ENABLE_NEON //Please enable it if ARM CPU
 43 | 
 44 | 
 45 | int * facedetect_cnn(unsigned char * result_buffer, //buffer memory for storing face detection results, !!its size must be 0x20000 Bytes!!
 46 |                     unsigned char * rgb_image_data, int width, int height, int step); //input image, it must be RGB (three-channel) image!
 47 | 
 48 | 
 49 | 
 50 |                     
 51 | //DO NOT EDIT the following code if you don't really understand it.
 52 | 
 53 | #if defined(_ENABLE_AVX2)
 54 | #include <immintrin.h>
 55 | #endif
 56 | 
 57 | #if defined(_ENABLE_NEON)
 58 | #include "arm_neon.h"
 59 | #define _ENABLE_INT8_CONV
 60 | #endif
 61 | 
 62 | #if defined(_ENABLE_AVX2) 
 63 | #define _MALLOC_ALIGN 256
 64 | #else
 65 | #define _MALLOC_ALIGN 128
 66 | #endif
 67 | 
 68 | #if defined(_ENABLE_AVX2)&& defined(_ENABLE_NEON)
 69 | #error Cannot enable the two of SSE2 AVX and NEON at the same time.
 70 | #endif
 71 | 
 72 | 
 73 | #if defined(_OPENMP)
 74 | #include <omp.h>
 75 | #endif
 76 | 
 77 | 
 78 | #include <string.h>
 79 | #include <vector>
 80 | #include <iostream>
 81 | 
 82 | using namespace std;
 83 | 
 84 | void* myAlloc(size_t size);
 85 | void myFree_(void* ptr);
 86 | #define myFree(ptr) (myFree_(*(ptr)), *(ptr)=0);
 87 | 
 88 | #ifndef MIN
 89 | #  define MIN(a,b)  ((a) > (b) ? (b) : (a))
 90 | #endif
 91 | 
 92 | #ifndef MAX
 93 | #  define MAX(a,b)  ((a) < (b) ? (b) : (a))
 94 | #endif
 95 | 
 96 | typedef struct FaceRect_
 97 | {
 98 |     float score;
 99 |     int x;
100 |     int y;
101 |     int w;
102 |     int h;
103 | }FaceRect;
104 |     
105 | 
106 | class CDataBlob
107 | {
108 | public:
109 | 	float * data_float;
110 |     signed char * data_int8;
111 | 	int width;
112 | 	int height;
113 | 	int channels;
114 | 	int floatChannelStepInByte;
115 |     int int8ChannelStepInByte;
116 |     float int8float_scale;
117 |     bool int8_data_valid;
118 | public:
119 | 	CDataBlob() {
120 |         data_float = 0;
121 |         data_int8 = 0;
122 | 		width = 0;
123 | 		height = 0;
124 |         channels = 0;
125 |         floatChannelStepInByte = 0;
126 |         int8ChannelStepInByte = 0;
127 |         int8float_scale = 1.0f;
128 |         int8_data_valid = false;
129 | 	}
130 | 	CDataBlob(int w, int h, int c)
131 | 	{
132 |         data_float = 0;
133 |         data_int8 = 0;
134 |         create(w, h, c);
135 | 	}
136 | 	~CDataBlob()
137 | 	{
138 |         setNULL();
139 | 	}
140 | 
141 |     void setNULL()
142 |     {
143 |         if (data_float)
144 |             myFree(&data_float);
145 |         if (data_int8)
146 |             myFree(&data_int8);
147 |         width = height = channels = floatChannelStepInByte = int8ChannelStepInByte = 0;
148 |         int8float_scale = 1.0f;
149 |         int8_data_valid = false;
150 |     }
151 | 	bool create(int w, int h, int c)
152 | 	{
153 |         setNULL();
154 | 
155 | 		width = w;
156 | 		height = h;
157 |         channels = c;
158 |         //alloc space for float array
159 |         int remBytes = (sizeof(float)* channels) % (_MALLOC_ALIGN / 8);
160 | 		if (remBytes == 0)
161 |             floatChannelStepInByte = channels * sizeof(float);
162 | 		else
163 |             floatChannelStepInByte = (channels * sizeof(float)) + (_MALLOC_ALIGN / 8) - remBytes;
164 |         data_float = (float*)myAlloc(width * height * floatChannelStepInByte);
165 | 
166 |         //alloc space for int8 array
167 |         remBytes = (sizeof(char)* channels) % (_MALLOC_ALIGN / 8);
168 |         if (remBytes == 0)
169 |             int8ChannelStepInByte = channels * sizeof(char);
170 |         else
171 |             int8ChannelStepInByte = (channels * sizeof(char)) + (_MALLOC_ALIGN / 8) - remBytes;
172 |         data_int8 = (signed char*)myAlloc(width * height * int8ChannelStepInByte);
173 | 
174 |         if (data_float == NULL)
175 |         {
176 |             cerr << "Cannot alloc memeory for float data blob: " 
177 |                 << width  << "*"
178 |                 << height << "*"
179 |                 << channels << endl;
180 |             return false;
181 |         }
182 | 
183 |         if (data_int8 == NULL)
184 |         {
185 |             cerr << "Cannot alloc memeory for uint8 data blob: "
186 |                 << width << "*"
187 |                 << height << "*"
188 |                 << channels << endl;
189 |             return false;
190 |         }
191 |         
192 |         //memset(data_float, 0, width * height * floatChannelStepInByte);
193 |         //memset(data_int8, 0, width * height * int8ChannelStepInByte);
194 |         
195 |         //the following code is faster than memset
196 |         //but not only the padding bytes are set to zero.
197 |         //BE CAREFUL!!!
198 | //#if defined(_OPENMP)
199 | //#pragma omp parallel for
200 | //#endif
201 |         for (int r = 0; r < this->height; r++)
202 |         {
203 |             for (int c = 0; c < this->width; c++)
204 |             {
205 |                 int pixel_end = this->floatChannelStepInByte / sizeof(float);
206 |                 float * pF = (float*)(this->data_float + (r * this->width + c) * this->floatChannelStepInByte/sizeof(float));
207 |                 for (int ch = this->channels; ch < pixel_end; ch++)
208 |                     pF[ch] = 0;
209 | 
210 |                 pixel_end = this->int8ChannelStepInByte / sizeof(char);
211 |                 char * pI = (char*)(this->data_int8 + (r * this->width + c) * this->int8ChannelStepInByte/sizeof(char));
212 |                 for (int ch = this->channels; ch < pixel_end; ch++)
213 |                     pI[ch] = 0;
214 |             }
215 |         }
216 |         
217 |         return true;
218 | 	}
219 | 
220 |     bool setInt8DataFromCaffeFormat(signed char * pData, int dataWidth, int dataHeight, int dataChannels)
221 |     {
222 |         if (pData == NULL)
223 |         {
224 |             cerr << "The input image data is null." << endl;
225 |             return false;
226 |         }
227 |         if (dataWidth != this->width ||
228 |             dataHeight != this->height ||
229 |             dataChannels != this->channels)
230 |         {
231 |             cerr << "The dim of the data can not match that of the Blob." << endl;
232 |             return false;
233 |         }
234 |         //create(dataWidth, dataHeight, dataChannels);
235 | 
236 |         for(int row = 0; row < height; row++)
237 |             for (int col = 0; col < width; col++)
238 |             {
239 |                 signed char * p = (this->data_int8 + (width * row + col) * int8ChannelStepInByte /sizeof(char));
240 |                 for (int ch = 0; ch < channels; ch++)
241 |                 {
242 |                     p[ch] = pData[ch * height * width + row * width + col];
243 |                 }
244 |             }
245 |         return true;
246 |     }
247 |     bool setFloatDataFromCaffeFormat(float * pData, int dataWidth, int dataHeight, int dataChannels)
248 |     {
249 |         if (pData == NULL)
250 |         {
251 |             cerr << "The input image data is null." << endl;
252 |             return false;
253 |         }
254 |         if (dataWidth != this->width ||
255 |             dataHeight != this->height ||
256 |             dataChannels != this->channels)
257 |         {
258 |             cerr << "The dim of the data can not match that of the Blob." << endl;
259 |             return false;
260 |         }
261 |         //create(dataWidth, dataHeight, dataChannels);
262 | 
263 |         for (int row = 0; row < height; row++)
264 |             for (int col = 0; col < width; col++)
265 |             {
266 |                 float * p = (this->data_float + (width * row + col) * floatChannelStepInByte / sizeof(float));
267 |                 for (int ch = 0; ch < channels; ch++)
268 |                 {
269 |                     p[ch] = pData[ch * height * width + row * width + col];
270 |                 }
271 |             }
272 |         return true;
273 |     }
274 | 
275 |     bool setDataFromImage(const unsigned char * imgData, int imgWidth, int imgHeight, int imgChannels, int imgWidthStep,
276 |         int * pChannelMean)
277 |     {
278 |         if (imgData == NULL)
279 |         {
280 |             cerr << "The input image data is null." << endl;
281 |             return false;
282 |         }
283 |         if (pChannelMean == NULL)
284 |         {
285 |             cerr << "The mean values is null." << endl;
286 |             return false;
287 |         }
288 |         create(imgWidth, imgHeight, imgChannels);
289 | 
290 | //#if defined(_OPENMP)
291 | //#pragma omp parallel for
292 | //#endif
293 |         for (int r = 0; r < imgHeight; r++)
294 |         {
295 |             for (int c = 0; c < imgWidth; c++)
296 |             {
297 |                 const unsigned char * pImgData = imgData + imgWidthStep * r + imgChannels * c;
298 |                 float * pBlobData = this->data_float + (this->width * r + c) * this->floatChannelStepInByte /sizeof(float);
299 |                 for (int ch = 0; ch < imgChannels; ch++)
300 |                     pBlobData[ch] = (float)(pImgData[ch] - pChannelMean[ch]);
301 |             }
302 |         }
303 |         return true;
304 |     }
305 |     bool setDataFrom3x3S2P1to1x1S1P0FromImage(const unsigned char * imgData, int imgWidth, int imgHeight, int imgChannels, int imgWidthStep,
306 |         int * pChannelMean)
307 |     {
308 |         if (imgData == NULL)
309 |         {
310 |             cerr << "The input image data is null." << endl;
311 |             return false;
312 |         }
313 |         if (pChannelMean == NULL)
314 |         {
315 |             cerr << "The mean values is null." << endl;
316 |             return false;
317 |         }
318 |         if (imgChannels != 3)
319 |         {
320 |             cerr << "The input image must be a 3-channel RGB image." << endl;
321 |             return false;
322 |         }
323 | 
324 |         create((imgWidth+1)/2, (imgHeight+1)/2, 27);
325 |         //since the pixel assignment cannot fill all the elements in the blob. 
326 |         //some elements in the blob should be initialized to 0
327 |         memset(data_float, 0, width * height * floatChannelStepInByte);
328 | 
329 | #if defined(_OPENMP)
330 | #pragma omp parallel for
331 | #endif
332 |         for (int r = 0; r < this->height; r++)
333 |         {
334 |             for (int c = 0; c < this->width; c++)
335 |             {
336 |                 float * pData = this->data_float + (r * this->width + c) * this->floatChannelStepInByte / sizeof(float);
337 |                 for (int fy = -1; fy <= 1; fy++)
338 |                 {
339 |                     int srcy = r * 2 + fy;
340 |                     
341 |                     if (srcy < 0 || srcy >= imgHeight) //out of the range of the image
342 |                         continue;
343 | 
344 |                     for (int fx = -1; fx <= 1; fx++)
345 |                     {
346 |                         int srcx = c * 2 + fx;
347 | 
348 |                         if (srcx < 0 || srcx >= imgWidth) //out of the range of the image
349 |                             continue;
350 | 
351 |                         const unsigned char * pImgData = imgData + imgWidthStep * srcy + imgChannels * srcx;
352 | 
353 |                         int output_channel_offset = ((fy + 1) * 3 + fx + 1) * 3; //3x3 filters, 3-channel image
354 | 
355 |                         pData[output_channel_offset] = (float)(pImgData[0] - pChannelMean[0]);
356 |                         pData[output_channel_offset+1] = (float)(pImgData[1] - pChannelMean[1]);
357 |                         pData[output_channel_offset+2] = (float)(pImgData[2] - pChannelMean[2]);
358 | 
359 |                     }
360 | 
361 |                 }
362 |             }
363 |         }
364 |         return true;
365 |     }
366 |     float getElementFloat(int x, int y, int channel)
367 |     {
368 |         if (this->data_float)
369 |         {
370 |             if (x >= 0 && x < this->width &&
371 |                 y >= 0 && y < this->height &&
372 |                 channel >= 0 && channel < this->channels)
373 |             {
374 |                 float * p = (float*)(this->data_float + (y*this->width + x)*this->floatChannelStepInByte / sizeof(float));
375 |                 return p[channel];
376 |             }
377 |          }
378 |         
379 |         return 0.f;
380 |     }
381 |     int getElementint8(int x, int y, int channel)
382 |     {
383 |         if (this->data_int8 && this->int8_data_valid)
384 |         {
385 |             if (x >= 0 && x < this->width &&
386 |                 y >= 0 && y < this->height &&
387 |                 channel >= 0 && channel < this->channels)
388 |             {
389 |                 signed char * p = this->data_int8 + (y*this->width + x)*this->int8ChannelStepInByte/sizeof(char);
390 |                 return p[channel];
391 |             }
392 |         }
393 |         
394 |         return 0;
395 |     }
396 | 
397 |     friend ostream &operator<<(ostream &output, const CDataBlob &dataBlob)
398 |     {
399 |         output << "DataBlob Size (Width, Height, Channel) = (" 
400 |             << dataBlob.width
401 |             << ", " << dataBlob.height 
402 |             << ", " << dataBlob.channels
403 |             << ")" << endl;
404 |         for (int ch = 0; ch < dataBlob.channels; ch++)
405 |         {
406 |             output << "Channel " << ch << ": " << endl;
407 | 
408 |             for (int row = 0; row < dataBlob.height; row++)
409 |             {
410 |                 output << "(";
411 |                 for (int col = 0; col < dataBlob.width; col++)
412 |                 {
413 |                     float * p = (dataBlob.data_float + (dataBlob.width * row + col) * dataBlob.floatChannelStepInByte/sizeof(float));
414 |                     output << p[ch];
415 |                     if (col != dataBlob.width - 1)
416 |                         output << ", ";
417 |                 }
418 |                 output << ")" << endl;
419 |             }
420 |         }
421 | 
422 |         return output;
423 |     }
424 | };
425 | 
426 | class Filters {
427 | public:
428 | 	vector<CDataBlob *> filters;
429 | 	int pad;
430 | 	int stride;
431 |     float scale; //element * scale = original value
432 | };
433 | 
434 | bool convolution(CDataBlob *inputData, const Filters* filters, CDataBlob *outputData);
435 | bool maxpooling2x2S2(const CDataBlob *inputData, CDataBlob *outputData);
436 | bool concat4(const CDataBlob *inputData1, const CDataBlob *inputData2, const CDataBlob *inputData3, const CDataBlob *inputData4, CDataBlob *outputData);
437 | bool scale(CDataBlob * dataBlob, float scale);
438 | bool relu(const CDataBlob *inputOutputData);
439 | bool priorbox(const CDataBlob * featureData, const CDataBlob * imageData, int num_sizes, float * pWinSizes, CDataBlob * outputData);
440 | bool normalize(CDataBlob * inputOutputData, float * pScale);
441 | bool blob2vector(const CDataBlob * inputData, CDataBlob * outputData, bool isFloat);
442 | bool detection_output(const CDataBlob * priorbox, const CDataBlob * loc, const CDataBlob * conf, float overlap_threshold, float confidence_threshold, int top_k, int keep_top_k, CDataBlob * outputData);
443 | /* the input data for softmax must be a vector, the data stored in a multi-channel blob with size 1x1 */
444 | bool softmax1vector2class(const CDataBlob *inputOutputData);
445 | 
446 | vector<FaceRect> objectdetect_cnn(unsigned char * rgbImageData, int with, int height, int step);
447 | 


--------------------------------------------------------------------------------
/libfacedetection_capi.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
 2 | // Use of this source code is governed by a Apache-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | extern "C" {
 6 | 	#include "libfacedetection_capi.h"
 7 | }
 8 | 
 9 | #include "./libfacedetection/src/facedetectcnn.h"
10 | 
11 | #include <string>
12 | 
13 | // define the buffer size. Do not change the size!
14 | #define DETECT_BUFFER_SIZE 0x20000
15 | 
16 | struct libfacedetection_capi_result_t {
17 | 	std::string *sBuffer;
18 | 	int * result;
19 | 
20 | 	libfacedetection_capi_result_t(std::string* s, int* p): sBuffer(s), result(p) {
21 | 		//
22 | 	}
23 | 	~libfacedetection_capi_result_t() {
24 | 		delete this->sBuffer;
25 | 	}
26 | };
27 | 
28 | void libfacedetection_capi_result_free(
29 | 	libfacedetection_capi_result_t* p
30 | ) {
31 | 	delete p;
32 | }
33 | 
34 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgb(
35 | 	uint8_t * rgb, int width, int height, int step
36 | ) {
37 | 	std::string* sBuffer = new std::string();
38 | 	sBuffer->resize(DETECT_BUFFER_SIZE);
39 | 
40 | 	unsigned char* pBuffer = (unsigned char *)sBuffer->data();
41 | 	int* pResults = facedetect_cnn(pBuffer, rgb, width, height, step);
42 | 	return new libfacedetection_capi_result_t(sBuffer, pResults);
43 | }
44 | 
45 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgba(
46 | 	uint8_t* rgba, int width, int height, int step
47 | ) {
48 | 	std::string rgbBuffer;
49 | 	rgbBuffer.resize(width*height*3);
50 | 
51 | 	uint8_t* rgb = (uint8_t*)rgbBuffer.data();
52 | 	for(int y = 0; y < height; y++) {
53 | 		for(int x = 0; x < width; x++) {
54 | 			*rgb++ = *rgba++; // r
55 | 			*rgb++ = *rgba++; // g
56 | 			*rgb++ = *rgba++; // b
57 | 			rgba++;           // a
58 | 		}
59 | 	}
60 | 
61 | 	std::string* sBuffer = new std::string();
62 | 	sBuffer->resize(DETECT_BUFFER_SIZE);
63 | 
64 | 	unsigned char* pBuffer = (unsigned char *)sBuffer->data();
65 | 	int* pResults = facedetect_cnn(pBuffer, rgb, width, height, step);
66 | 	return new libfacedetection_capi_result_t(sBuffer, pResults);
67 | }
68 | 
69 | int libfacedetection_capi_result_len(libfacedetection_capi_result_t* self) {
70 | 	int* pResults = self->result;
71 | 	return pResults? *pResults: 0;
72 | }
73 | 
74 | libfacedetection_capi_bool_t libfacedetection_capi_result_get(
75 | 	libfacedetection_capi_result_t* self, int i,
76 | 	libfacedetection_capi_face_t* face
77 | ) {
78 | 	int* pResults = self->result;
79 | 	int n = pResults? *pResults: 0;
80 | 
81 | 	if(i < 0 || i >= n) return 0;
82 | 
83 | 	short * p = ((short*)(pResults+1))+142*i;
84 | 	face->x = p[0];
85 | 	face->y = p[1];
86 | 	face->w = p[2];
87 | 	face->h = p[3];
88 | 	face->neighbors = p[4];
89 | 	face->angle = p[5];
90 | 
91 | 	return 1;
92 | }
93 | 


--------------------------------------------------------------------------------
/libfacedetection_capi.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
 2 | // Use of this source code is governed by a Apache-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #ifndef libfacedetection_capi_h_
 6 | #define libfacedetection_capi_h_
 7 | 
 8 | #include <stddef.h>
 9 | #include <stdint.h>
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | typedef int                                   libfacedetection_capi_bool_t;
16 | typedef struct libfacedetection_capi_face_t   libfacedetection_capi_face_t;
17 | typedef struct libfacedetection_capi_result_t libfacedetection_capi_result_t;
18 | 
19 | struct libfacedetection_capi_face_t {
20 | 	int x;
21 | 	int y;
22 | 	int w;
23 | 	int h;
24 | 	int neighbors;
25 | 	int angle;
26 | };
27 | 
28 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgb(
29 | 	uint8_t* rgb, int width, int height, int step
30 | );
31 | libfacedetection_capi_result_t* libfacedetection_capi_facedetect_rgba(
32 | 	uint8_t* rgba, int width, int height, int step
33 | );
34 | 
35 | int libfacedetection_capi_result_len(
36 | 	libfacedetection_capi_result_t* p
37 | );
38 | libfacedetection_capi_bool_t libfacedetection_capi_result_get(
39 | 	libfacedetection_capi_result_t* p, int i,
40 | 	libfacedetection_capi_face_t* face
41 | );
42 | void libfacedetection_capi_result_free(
43 | 	libfacedetection_capi_result_t* p
44 | );
45 | 
46 | #ifdef __cplusplus
47 | }
48 | #endif
49 | #endif // libfacedetection_capi_h_
50 | 


--------------------------------------------------------------------------------
/z_facedetectcnn_cc.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
2 | // Use of this source code is governed by a Apache-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | #include "./libfacedetection/src/facedetectcnn.cpp"
6 | 


--------------------------------------------------------------------------------
/z_facedetectcnn_floatdata.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
2 | // Use of this source code is governed by a Apache-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | #include "./libfacedetection/src/facedetectcnn-floatdata.cpp"
6 | 


--------------------------------------------------------------------------------
/z_facedetectcnn_int8data.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
2 | // Use of this source code is governed by a Apache-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | #include "./libfacedetection/src/facedetectcnn-int8data.cpp"
6 | 


--------------------------------------------------------------------------------
/z_facedetectcnn_model.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 <chaishushan{AT}gmail.com>. All rights reserved.
2 | // Use of this source code is governed by a Apache-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | #include "./libfacedetection/src/facedetectcnn-model.cpp"
6 | 


--------------------------------------------------------------------------------