├── .DS_Store
├── .gitignore
├── CMakeLists.txt
├── README.md
├── cmake_install.cmake
├── common.cpp
├── common.h
├── cudaUtility.h
├── imageBuffer.h
├── kernel.cu
├── main.cpp
├── mathFunctions.cpp
├── mathFunctions.cu
├── mathFunctions.h
├── model
└── pelee
│ ├── pelee_deploy_iplugin.prototxt
│ ├── pelee_merged.caffemodel
│ └── pelee_merged.prototxt
├── pluginImplement.cpp
├── pluginImplement.h
├── tensorNet.cpp
├── tensorNet.h
├── testPic
└── test.png
├── testVideo
└── test.avi
└── util
├── cuda
├── cudaMappedMemory.h
├── cudaNormalize.cu
├── cudaNormalize.h
├── cudaOverlay.cu
├── cudaOverlay.h
├── cudaRGB.cu
├── cudaRGB.h
├── cudaResize.cu
├── cudaResize.h
├── cudaUtility.h
├── cudaYUV-NV12.cu
├── cudaYUV-YUYV.cu
├── cudaYUV-YV12.cu
└── cudaYUV.h
├── loadImage.cpp
└── loadImage.h
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Pelee-Seg-TensorRT/05bf0b31c5891adaf64f40b784ef4a1927d68862/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## General
2 | jobs/*
3 | CMakeFiles/*
4 | 3rdparty/*
5 | cmake/*
6 | tools/*
7 | lib/*
8 | build/*
9 |
10 | # Compiled Object files
11 | *.slo
12 | *.lo
13 | *.o
14 | *.cuo
15 |
16 | # Compiled Dynamic libraries
17 | *.so
18 | *.dylib
19 |
20 | # Compiled Static libraries
21 | *.lai
22 | *.la
23 | *.a
24 |
25 | # Compiled protocol buffers
26 | *.pb.h
27 | *.pb.cc
28 | *_pb2.py
29 |
30 | # Compiled python
31 | *.pyc
32 |
33 | # Compiled MATLAB
34 | *.mex*
35 |
36 | # IPython notebook checkpoints
37 | .ipynb_checkpoints
38 |
39 | # Editor temporaries
40 | *.swp
41 | *~
42 |
43 | # Sublime Text settings
44 | *.sublime-workspace
45 | *.sublime-project
46 |
47 | # Eclipse Project settings
48 | *.*project
49 | .settings
50 |
51 | # QtCreator files
52 | *.user
53 |
54 | # PyCharm files
55 | .idea
56 |
57 | # OSX dir files
58 | .DS_Store
59 |
60 | ## Caffe
61 |
62 | # User's build configuration
63 | Makefile.config
64 | Makefile
65 |
66 | # Data and models are either
67 | # 1. reference, and not casually committed
68 | # 2. custom, and live on their own unless they're deliberated contributed
69 | data/*
70 | models/*
71 | *.caffemodel
72 | *.caffemodel.h5
73 | *.solverstate
74 | *.solverstate.h5
75 | *.binaryproto
76 | *leveldb
77 | *lmdb
78 |
79 | # build, distribute, and bins (+ python proto bindings)
80 | build
81 | .build_debug/*
82 | .build_release/*
83 | distribute/*
84 | *.testbin
85 | *.bin
86 | python/caffe/proto/
87 | cmake_build
88 | .cmake_build
89 |
90 | # Generated documentation
91 | docs/_site
92 | docs/gathered
93 | _site
94 | doxygen
95 | docs/dev
96 |
97 | # LevelDB files
98 | *.sst
99 | *.ldb
100 | LOCK
101 | LOG*
102 | CURRENT
103 | MANIFEST-*
104 |
105 |
106 | *.tar.gz
107 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8)
2 | project(pelee)
3 |
4 | #set(inference_VERSION_MAJOR 2)
5 | #set(inference_VERSION_MINOR 1)
6 |
7 | #set(CMAKE_CXX_STANDARD 11)
8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
9 | find_package(OpenMP)
10 | if (OPENMP_FOUND)
11 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
12 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
13 | set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
14 | endif()
15 | set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed.")
16 |
17 | set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/build)
18 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include)
19 |
20 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR})
21 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
22 |
23 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
24 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
25 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
26 |
27 | message("The runtime libraries are included in ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
28 | message("The library files are included in ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
29 |
30 | message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}")
31 | message("-- output path: ${PROJECT_OUTPUT_DIR}")
32 |
33 | find_package(CUDA)
34 | find_package(OpenCV REQUIRED)
35 | message(" -- CUDA and Opencv Found ")
36 | message(" -- opencv_version "${OpenCV_VERSION})
37 |
38 |
39 | set(CUDA_NVCC_FLAGS
40 | ${CUDA_NVCC_FLAGS};--disable-warnings;
41 | -O3
42 | -gencode arch=compute_30,code=sm_30
43 | -gencode arch=compute_35,code=sm_35
44 | -gencode arch=compute_50,code=sm_50
45 | -gencode arch=compute_50,code=compute_50
46 | -gencode arch=compute_52,code=sm_52
47 | -gencode arch=compute_61,code=sm_61
48 | -gencode arch=compute_62,code=sm_62
49 | )
50 |
51 | file(GLOB cudaSources util/cuda/*.cu)
52 | file(GLOB cudaIncludes util/cuda/*.h)
53 |
54 | file(GLOB sources *.cu *.cpp util/*.cpp util/cuda/*.cu)
55 | file(GLOB includes util/*.h util/cuda/*.h)
56 |
57 | include_directories(${PROJECT_INCLUDE_DIR}/util)
58 | include_directories(${PROJECT_BINARY_DIR}/util)
59 | include_directories(${OpenCV_INCLUDE_DIRS})
60 | ##
61 |
62 | link_directories(${OpenCV_LIBRARY_DIRS})
63 |
64 | cuda_add_library(inferLib SHARED ${sources})
65 | ##
66 | target_link_libraries(inferLib /usr/lib/aarch64-linux-gnu/libnvcaffe_parser.so)
67 | target_link_libraries(inferLib /usr/lib/aarch64-linux-gnu/libnvinfer.so)
68 | target_link_libraries(inferLib /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so)
69 | target_link_libraries(inferLib /usr/lib/aarch64-linux-gnu/libnvparsers.so)
70 |
71 |
72 | # transfer all headers to the include directory
73 | foreach(include ${includes})
74 | message("-- Copying ${include}")
75 | configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY)
76 | endforeach()
77 |
78 | ## install
79 | foreach(include ${includes})
80 | install(FILES "${include}" DESTINATION include/inferLib)
81 | endforeach()
82 |
83 | add_executable(pelee main.cpp )
84 | target_link_libraries(pelee inferLib ${OpenCV_LIBS})
85 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pelee-TensorRT
2 |
3 | **Accelerate Pelee with TensorRT**
4 | Pelee: A Real-Time Object Detection System on Mobile Devices (NeurIPS 2018)
5 |
6 | **TensorRT-Pelee can run over 70FPS(11ms) on Jetson TX2(FP32)**
7 |
8 | ---
9 |
10 | **Performance:**
11 | Jetson TX2: 72 FPS, 13.2~11 ms (FP32)
12 | Titan V: 200FPS, 5 ms (FP32)
13 |
14 | **Requierments:**
15 |
16 | 1.TensorRT4 (Jetpack 3.3 on TX2)
17 | 2.CUDA 9.0
18 | 3.cudnn7
19 |
20 | ---
21 |
22 | **Run:**
23 |
24 | ```shell
25 | cmake .
26 | make
27 | ./build/bin/pelee
28 | ```
29 |
30 | ---
31 |
32 | **Reference:**
33 |
34 | https://github.com/Ghustwb/MobileNet-SSD-TensorRT
35 |
36 | ---
37 |
38 | **TODO:**
39 | - [ ] FP16 Implementation
40 | - [ ] Change Custom layers IPlugin to IPluginExt
41 |
42 |
43 |
44 |
45 | **The bug has been fixed**
46 |
47 | 
48 |
--------------------------------------------------------------------------------
/cmake_install.cmake:
--------------------------------------------------------------------------------
1 | # Install script for directory: /home/nvidia/TRT-Pelee
2 |
3 | # Set the install prefix
4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
5 | set(CMAKE_INSTALL_PREFIX "/usr/local")
6 | endif()
7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
8 |
9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 | if(BUILD_TYPE)
12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 | else()
15 | set(CMAKE_INSTALL_CONFIG_NAME "")
16 | endif()
17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 |
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 | if(COMPONENT)
23 | message(STATUS "Install component: \"${COMPONENT}\"")
24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 | else()
26 | set(CMAKE_INSTALL_COMPONENT)
27 | endif()
28 | endif()
29 |
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 | set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 |
35 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
36 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/loadImage.h")
37 | endif()
38 |
39 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
40 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaOverlay.h")
41 | endif()
42 |
43 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
44 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaResize.h")
45 | endif()
46 |
47 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
48 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaRGB.h")
49 | endif()
50 |
51 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
52 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaYUV.h")
53 | endif()
54 |
55 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
56 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaMappedMemory.h")
57 | endif()
58 |
59 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
60 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaNormalize.h")
61 | endif()
62 |
63 | if(NOT CMAKE_INSTALL_COMPONENT OR "${CMAKE_INSTALL_COMPONENT}" STREQUAL "Unspecified")
64 | file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/include/inferLib" TYPE FILE FILES "/home/nvidia/TRT-Pelee/util/cuda/cudaUtility.h")
65 | endif()
66 |
67 | if(CMAKE_INSTALL_COMPONENT)
68 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
69 | else()
70 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
71 | endif()
72 |
73 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
74 | "${CMAKE_INSTALL_MANIFEST_FILES}")
75 | file(WRITE "/home/nvidia/TRT-Pelee/${CMAKE_INSTALL_MANIFEST}"
76 | "${CMAKE_INSTALL_MANIFEST_CONTENT}")
77 |
--------------------------------------------------------------------------------
/common.cpp:
--------------------------------------------------------------------------------
1 | #include "common.h"
2 | std::string locateFile(const std::string& input, const std::vector & directories)
3 | {
4 | std::string file;
5 | const int MAX_DEPTH{10};
6 | bool found{false};
7 | for (auto &dir : directories)
8 | {
9 | file = dir + input;
10 | std::cout << file << std::endl;
11 | for (int i = 0; i < MAX_DEPTH && !found; i++)
12 | {
13 | std::ifstream checkFile(file);
14 | found = checkFile.is_open();
15 | if (found) break;
16 | file = "../" + file;
17 | }
18 | if (found) break;
19 | file.clear();
20 | }
21 | std::cout << file << std::endl;
22 | assert(!file.empty() && "Could not find a file due to it not existing in the data directory.");
23 | return file;
24 | }
25 |
26 | void readPGMFile(const std::string& fileName, uint8_t *buffer, int inH, int inW)
27 | {
28 | std::ifstream infile(fileName, std::ifstream::binary);
29 | assert(infile.is_open() && "Attempting to read from a file that is not open.");
30 | std::string magic, h, w, max;
31 | infile >> magic >> h >> w >> max;
32 | infile.seekg(1, infile.cur);
33 | infile.read(reinterpret_cast(buffer), inH*inW);
34 | }
35 |
36 | /*********************************/
37 | /* Updated date: 2018.3.7
38 | /*This is my own implementation of the detectout layer code, because I met a mistake with the detectout api of
39 | /*tensorrt3.0 a few months ago. You can use the detectout api of tensorrt3.0 correctly by adding an extra output
40 | /*in the deploy prototxt file. Please refer to my deploy prototxt.
41 | /********************************/
42 | // Retrieve all location predictions.
43 | void GetLocPredictions(const float* loc_data,
44 | const int num_preds_per_class, const int num_loc_classes,
45 | std::vector >* loc_preds) {
46 | for (int p = 0; p < num_preds_per_class; ++p) {
47 | int start_idx = p * num_loc_classes * 4;
48 | vector labelbbox;
49 | for (int c = 0; c < num_loc_classes; ++c) {
50 | labelbbox.push_back(loc_data[start_idx + c * 4]);
51 | labelbbox.push_back(loc_data[start_idx + c * 4 + 1]);
52 | labelbbox.push_back(loc_data[start_idx + c * 4 + 2]);
53 | labelbbox.push_back(loc_data[start_idx + c * 4 + 3]);
54 |
55 | loc_preds->push_back(labelbbox);
56 | }
57 |
58 | }
59 | }
60 |
61 | // Retrieve all confidences.
62 | void GetConfidenceScores(const float* conf_data,
63 | const int num_preds_per_class, const int num_classes,
64 | vector >* conf_preds) {
65 | for (int p = 0; p < num_preds_per_class; ++p) {
66 | int start_idx = p * num_classes;
67 | vector conf_classes;
68 | for (int c = 0; c < num_classes; ++c) {
69 | conf_classes.push_back(conf_data[start_idx + c]);
70 | }
71 | conf_preds->push_back(conf_classes);
72 | }
73 | }
74 |
75 | // Retrieve all prior bboxes. bboxes and variances
76 | void GetPriorBBoxes(const float* prior_data, const int num_priors,
77 | vector >* prior_bboxes,
78 | vector >* prior_variances) {
79 | for (int i = 0; i < num_priors; ++i) {
80 | int start_idx = i * 4;
81 | vector prior_bbox;
82 | prior_bbox.push_back(prior_data[start_idx]);
83 | prior_bbox.push_back(prior_data[start_idx + 1]);
84 | prior_bbox.push_back(prior_data[start_idx + 2]);
85 | prior_bbox.push_back(prior_data[start_idx + 3]);
86 | prior_bboxes->push_back(prior_bbox);
87 | }
88 |
89 | for (int i = 0; i < num_priors; ++i) {
90 | int start_idx = (num_priors + i) * 4;
91 | vector prior_variance;
92 | vector var;
93 | for (int j = 0; j < 4; ++j) {
94 | prior_variance.push_back(prior_data[start_idx + j]);
95 | }
96 | prior_variances->push_back(prior_variance);
97 | }
98 | }
99 |
100 | /* code_type: 0 = CORNER; 1 = CENTER_SIZE; 2 = CORNER_SIZE
101 | *
102 | */
103 | void DecodeBBox(
104 | const vector& prior_bbox, const vector& prior_variance,
105 | const int code_type, const bool variance_encoded_in_target,
106 | const bool clip_bbox, const vector& bbox,
107 | vector* decode_bbox) {
108 | if (0 == code_type) {
109 | if (variance_encoded_in_target) {
110 | // variance is encoded in target, we simply need to add the offset
111 | // predictions.
112 | decode_bbox->push_back(prior_bbox[0] + bbox[0]);
113 | decode_bbox->push_back(prior_bbox[1] + bbox[1]);
114 | decode_bbox->push_back(prior_bbox[2] + bbox[2]);
115 | decode_bbox->push_back(prior_bbox[3] + bbox[3]);
116 | } else {
117 | // variance is encoded in bbox, we need to scale the offset accordingly.
118 | decode_bbox->push_back(
119 | prior_bbox[0]+ prior_variance[0] * bbox[0]);
120 | decode_bbox->push_back(
121 | prior_bbox[1] + prior_variance[1] * bbox[1]);
122 | decode_bbox->push_back(
123 | prior_bbox[2] + prior_variance[2] * bbox[2]);
124 | decode_bbox->push_back(
125 | prior_bbox[3] + prior_variance[3] * bbox[3]);
126 | }
127 | } else if (1 == code_type) {
128 | float prior_width = prior_bbox[2] - prior_bbox[0];
129 | //CHECK_GT(prior_width, 0);
130 | float prior_height = prior_bbox[3] - prior_bbox[1];
131 | //CHECK_GT(prior_height, 0);
132 | float prior_center_x = (prior_bbox[0] + prior_bbox[2]) / 2.;
133 | float prior_center_y = (prior_bbox[1] + prior_bbox[3]) / 2.;
134 |
135 | float decode_bbox_center_x, decode_bbox_center_y;
136 | float decode_bbox_width, decode_bbox_height;
137 | if (variance_encoded_in_target) {
138 | // variance is encoded in target, we simply need to retore the offset
139 | // predictions.
140 | decode_bbox_center_x = bbox[0] * prior_width + prior_center_x;
141 | decode_bbox_center_y = bbox[1] * prior_height + prior_center_y;
142 | decode_bbox_width = exp(bbox[2]) * prior_width;
143 | decode_bbox_height = exp(bbox[3]) * prior_height;
144 | } else {
145 | // variance is encoded in bbox, we need to scale the offset accordingly.
146 | decode_bbox_center_x =
147 | prior_variance[0] * bbox[0] * prior_width + prior_center_x;
148 | decode_bbox_center_y =
149 | prior_variance[1] * bbox[1] * prior_height + prior_center_y;
150 | decode_bbox_width =
151 | exp(prior_variance[2] * bbox[2]) * prior_width;
152 | decode_bbox_height =
153 | exp(prior_variance[3] * bbox[3]) * prior_height;
154 | }
155 |
156 | decode_bbox->push_back(decode_bbox_center_x - decode_bbox_width / 2.);
157 | decode_bbox->push_back(decode_bbox_center_y - decode_bbox_height / 2.);
158 | decode_bbox->push_back(decode_bbox_center_x + decode_bbox_width / 2.);
159 | decode_bbox->push_back(decode_bbox_center_y + decode_bbox_height / 2.);
160 | } else if (2 == code_type) {
161 | float prior_width = prior_bbox[2] - prior_bbox[0];
162 | //CHECK_GT(prior_width, 0);
163 | float prior_height = prior_bbox[3] - prior_bbox[1];
164 | //CHECK_GT(prior_height, 0);
165 | if (variance_encoded_in_target) {
166 | // variance is encoded in target, we simply need to add the offset
167 | // predictions.
168 | decode_bbox->push_back(prior_bbox[0] + bbox[0] * prior_width);
169 | decode_bbox->push_back(prior_bbox[1] + bbox[1] * prior_height);
170 | decode_bbox->push_back(prior_bbox[2] + bbox[2] * prior_width);
171 | decode_bbox->push_back(prior_bbox[3] + bbox[3] * prior_height);
172 | } else {
173 | // variance is encoded in bbox, we need to scale the offset accordingly.
174 | decode_bbox->push_back(
175 | prior_bbox[0] + prior_variance[0] * bbox[0] * prior_width);
176 | decode_bbox->push_back(
177 | prior_bbox[1] + prior_variance[1] * bbox[1] * prior_height);
178 | decode_bbox->push_back(
179 | prior_bbox[2] + prior_variance[2] * bbox[2] * prior_width);
180 | decode_bbox->push_back(
181 | prior_bbox[3] + prior_variance[3] * bbox[3] * prior_height);
182 | }
183 | } else {
184 | std::cout<< "Unknown LocLossType."< >& prior_bboxes,
195 | const vector >& prior_variances,
196 | const int code_type, const bool variance_encoded_in_target,
197 | const bool clip_bbox, const vector >& bboxes,
198 | vector >* decode_bboxes) {
199 | //CHECK_EQ(prior_bboxes.size(), prior_variances.size());
200 | //CHECK_EQ(prior_bboxes.size(), bboxes.size());
201 | int num_bboxes = prior_bboxes.size();
202 |
203 | for (int i = 0; i < num_bboxes; ++i) {
204 | vector decode_bbox;
205 | DecodeBBox(prior_bboxes[i], prior_variances[i], code_type,
206 | variance_encoded_in_target, clip_bbox, bboxes[i], &decode_bbox);
207 | decode_bboxes->push_back(decode_bbox);
208 | }
209 | }
210 |
211 | //
212 | void ConfData(const float* data, const int num_classes, const int num_prior, float* new_data) {
213 | int idx = 0;
214 | for (int c = 0; c < num_classes; ++c) {
215 | for (int p = 0; p < num_prior; ++p) {
216 | new_data[idx] = data[p*num_classes + c];
217 | idx++;
218 | }
219 | }
220 | //softmax
221 | for (int p = 0; p < num_prior; ++p) {
222 | int sum = 0;
223 | float _max = new_data[p];//new_data[0*num_prior + p]
224 | for (int c = 1; c < num_classes; ++c) {
225 | _max = std::max(_max, new_data[c*num_prior + p]);
226 | }
227 | for (int c = 0; c < num_classes; ++c) {
228 | sum += exp(new_data[c*num_prior + p]-_max);
229 | }
230 | for (int j = 0; j < num_classes; ++j) {
231 | new_data[j*num_prior + p] = exp(new_data[j*num_prior + p]-_max)/sum;
232 | }
233 | }
234 |
235 | }
236 |
237 | template
238 | void DecodeBBoxes_2(const Dtype* loc_data, const Dtype* prior_data,
239 | const int code_type, const bool variance_encoded_in_target,
240 | const int num_priors, const bool share_location,
241 | const int num_loc_classes, const int background_label_id,
242 | const bool clip_bbox, Dtype* bbox_data) {
243 |
244 | if(code_type == 0){
245 | for(int p = 0; p < num_priors; p++) {
246 | if (variance_encoded_in_target) {
247 | for (int i = 0; i < 4; i++) {
248 | bbox_data[4 * p + i] = prior_data[4 * p + i] + loc_data[4 * p + i];
249 | }
250 | } else {
251 | for (int i = 0; i < 4; i++) {
252 | bbox_data[4 * p + i] = prior_data[4 * p + i] + prior_data[4 * num_priors + 4 * p + i] + loc_data[4 * p + i];
253 | }
254 | }
255 | }
256 | }else if(code_type == 1){
257 | for(int p = 0; p < num_priors; p++) {
258 | float prior_width = prior_data[4 * p + 2] - prior_data[4 * p + 0];
259 | float prior_height = prior_data[4 * p + 3] - prior_data[4 * p + 1];
260 | float prior_center_x = (prior_data[4 * p + 0] + prior_data[4 * p + 2]) / 2.;
261 | float prior_center_y = (prior_data[4 * p + 1] + prior_data[4 * p + 3]) / 2.;
262 | float decode_bbox_center_x, decode_bbox_center_y;
263 | float decode_bbox_width, decode_bbox_height;;
264 | if (variance_encoded_in_target) {
265 | decode_bbox_center_x = loc_data[4 * p + 0] * prior_width + prior_center_x;
266 | decode_bbox_center_y = loc_data[4 * p + 1] * prior_height + prior_center_y;
267 | decode_bbox_width = exp(loc_data[4 * p + 2]) * prior_width;
268 | decode_bbox_height = exp(loc_data[4 * p + 3]) * prior_height;
269 | }else{
270 | decode_bbox_center_x = prior_data[4 * num_priors + 4 * p + 0] * loc_data[4 * p + 0] * prior_width + prior_center_x;
271 | decode_bbox_center_y = prior_data[4 * num_priors + 4 * p + 1] * loc_data[4 * p + 1] * prior_height + prior_center_y;
272 | decode_bbox_width = exp(prior_data[4 * num_priors + 4 * p + 2] * loc_data[4 * p + 2]) * prior_width;
273 | decode_bbox_height = exp(prior_data[4 * num_priors + 4 * p + 3] * loc_data[4 * p + 3]) * prior_height;
274 | }
275 | bbox_data[4 * p + 0] = (decode_bbox_center_x - decode_bbox_width / 2.);
276 | bbox_data[4 * p + 1] = (decode_bbox_center_y - decode_bbox_height / 2.);
277 | bbox_data[4 * p + 2] = (decode_bbox_center_x + decode_bbox_width / 2.);
278 | bbox_data[4 * p + 3] = (decode_bbox_center_y + decode_bbox_height / 2.);
279 | }
280 |
281 | }else if(code_type == 2){
282 | for(int p = 0; p < num_priors; p++) {
283 | float prior_width = prior_data[4 * p + 2] - prior_data[4 * p + 0];
284 | float prior_height = prior_data[4 * p + 3] - prior_data[4 * p + 1];
285 |
286 | if (variance_encoded_in_target) {
287 | bbox_data[4 * p + 0] = prior_data[4 * p + 0] + loc_data[4 * p + 0] * prior_width;
288 | bbox_data[4 * p + 1] = prior_data[4 * p + 1] + loc_data[4 * p + 1] * prior_height;
289 | bbox_data[4 * p + 2] = exp(prior_data[4 * p + 2]) + loc_data[4 * p + 2] * prior_width;
290 | bbox_data[4 * p + 3] = exp(prior_data[4 * p + 3]) + loc_data[4 * p + 3] * prior_height;
291 | }else {
292 | bbox_data[4 * p + 0] = prior_data[4 * p + 0] +
293 | prior_data[4 * num_priors + 4 * p + 0] * loc_data[4 * p + 0] * prior_width;
294 | bbox_data[4 * p + 1] = prior_data[4 * p + 1] +
295 | prior_data[4 * num_priors + 4 * p + 1] * loc_data[4 * p + 1] * prior_height;
296 | bbox_data[4 * p + 2] = prior_data[4 * p + 2] +
297 | prior_data[4 * num_priors + 4 * p + 2] * loc_data[4 * p + 2] * prior_width;
298 | bbox_data[4 * p + 3] = prior_data[4 * p + 3] +
299 | prior_data[4 * num_priors + 4 * p + 3] * loc_data[4 * p + 3] * prior_height;
300 | }
301 | }
302 |
303 | }else{
304 | std::cout << "Unknown LocLossType." << std::endl;
305 | }
306 | }
307 |
308 |
309 | template
310 | Dtype BBoxSize(const Dtype* bbox, const bool normalized = true) {
311 | if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) {
312 | // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
313 | return Dtype(0.);
314 | } else {
315 | const Dtype width = bbox[2] - bbox[0];
316 | const Dtype height = bbox[3] - bbox[1];
317 | if (normalized) {
318 | return width * height;
319 | } else {
320 | // If bbox is not within range [0, 1].
321 | return (width + 1) * (height + 1);
322 | }
323 | }
324 | }
325 |
326 | template
327 | Dtype JaccardOverlap(const Dtype* bbox1, const Dtype* bbox2) {
328 | if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] ||
329 | bbox2[1] > bbox1[3] || bbox2[3] < bbox1[1]) {
330 | return Dtype(0.);
331 | } else {
332 | const Dtype inter_xmin = std::max(bbox1[0], bbox2[0]);
333 | const Dtype inter_ymin = std::max(bbox1[1], bbox2[1]);
334 | const Dtype inter_xmax = std::min(bbox1[2], bbox2[2]);
335 | const Dtype inter_ymax = std::min(bbox1[3], bbox2[3]);
336 |
337 | const Dtype inter_width = inter_xmax - inter_xmin;
338 | const Dtype inter_height = inter_ymax - inter_ymin;
339 | const Dtype inter_size = inter_width * inter_height;
340 |
341 | const Dtype bbox1_size = BBoxSize(bbox1);
342 | const Dtype bbox2_size = BBoxSize(bbox2);
343 |
344 | return inter_size / (bbox1_size + bbox2_size - inter_size);
345 | }
346 | }
347 |
348 | template
349 | bool SortScorePairDescend(const pair& pair1,
350 | const pair& pair2) {
351 | return pair1.first > pair2.first;
352 | }
353 |
354 | template
355 | void GetMaxScoreIndex(const Dtype* scores, const int num, const float threshold,
356 | const int top_k, vector >* score_index_vec) {
357 | // Generate index score pairs.
358 | for (int i = 0; i < num; ++i) {
359 | if (scores[i] > threshold) {
360 | score_index_vec->push_back(std::make_pair(scores[i], i));
361 | }
362 | }
363 |
364 | // Sort the score pair according to the scores in descending order
365 | std::sort(score_index_vec->begin(), score_index_vec->end(),
366 | SortScorePairDescend);
367 |
368 | // Keep top_k scores if needed.
369 | if (top_k > -1 && top_k < score_index_vec->size()) {
370 | score_index_vec->resize(top_k);
371 | }
372 | }
373 |
374 | template
375 | void ApplyNMSFast(const Dtype* bboxes, const Dtype* scores, const int num,
376 | const float score_threshold, const float nms_threshold,
377 | const float eta, const int top_k, vector* indices) {
378 | // Get top_k scores (with corresponding indices).
379 | vector > score_index_vec;
380 | //float n1 = cv::getTickCount();
381 | GetMaxScoreIndex(scores, num, score_threshold, top_k, &score_index_vec);
382 | // n1 = (cv::getTickCount()-n1) / cv::getTickFrequency();
383 | //printf("======n==1 Forward_DetectionOutputLayer time is %f \n", n1);
384 |
385 | // Do nms.
386 | float adaptive_threshold = nms_threshold;
387 | indices->clear();
388 | //float n2 = cv::getTickCount();
389 | std::cout<<"======n==n" <size(); ++k) {
394 | if (keep) {
395 | const int kept_idx = (*indices)[k];
396 | float overlap = JaccardOverlap(bboxes + idx * 4, bboxes + kept_idx * 4);
397 | keep = overlap <= adaptive_threshold;
398 | } else {
399 | break;
400 | }
401 | }
402 | if (keep) {
403 | indices->push_back(idx);
404 | }
405 | score_index_vec.erase(score_index_vec.begin());
406 | if (keep && eta < 1 && adaptive_threshold > 0.5) {
407 | adaptive_threshold *= eta;
408 | }
409 | }
410 | //n2 = (cv::getTickCount()-n2) / cv::getTickFrequency();
411 | //printf("======n==2 Forward_DetectionOutputLayer time is %f \n", n2);
412 | }
413 |
414 |
415 | void Forward_DetectionOutputLayer(float* loc_data, float* conf_data, float* prior_data, int num_priors_, int num_classes_, vector >* detecions) {
416 | // Retrieve all location predictions.
417 | /*vector> all_loc_preds;
418 | GetLocPredictions(loc_data, num_priors_, num_loc_classes_, &all_loc_preds);
419 | // Retrieve all confidences.
420 | vector > all_conf_scores;
421 | GetConfidenceScores(conf_data, num_priors_, num_classes_,
422 | &all_conf_scores);
423 | // Retrieve all prior bboxes.
424 | vector> prior_bboxes;
425 | vector> prior_variances;
426 | GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances);
427 | // Decode all loc predictions to bboxes.
428 | vector> all_decode_bboxes;
429 | //const bool clip_bbox = false;
430 | DecodeBBoxes(prior_bboxes, prior_variances, code_type_,
431 | variance_encoded_in_target_, clip_bbox, all_loc_preds,
432 | &all_decode_bboxes);*/
433 |
434 |
435 | int num_kept = 0;
436 | vector