├── .gitignore ├── CMakeLists.txt ├── README.md ├── a.jpeg ├── demo.cpp ├── models └── nanodet.rknn ├── res.jpg ├── rknn_api_sdk ├── CMakeLists.txt ├── include │ ├── Net.hpp │ └── testtime.hpp ├── rknn_api │ ├── arm │ │ ├── include │ │ │ └── rknn_api.h │ │ └── lib64 │ │ │ └── librknn_api.so │ └── x86 │ │ ├── include │ │ └── rknn_api.h │ │ └── lib64 │ │ └── librknn_api.so ├── src │ └── Net.cpp └── test │ ├── test.cpp │ └── testtime.hpp └── rknn_nanodet ├── CMakeLists.txt ├── include └── NanoDet.hpp ├── src └── NanoDet.cpp └── test └── test.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | /build 4 | .vscode 5 | .cache 6 | 7 | # Compiled Object files 8 | *.slo 9 | *.lo 10 | *.o 11 | *.obj 12 | 13 | # Precompiled Headers 14 | *.gch 15 | *.pch 16 | 17 | # Compiled Dynamic libraries 18 | # *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Fortran module files 23 | *.mod 24 | *.smod 25 | 26 | # Compiled Static libraries 27 | *.lai 28 | *.la 29 | *.a 30 | *.lib 31 | 32 | # Executables 33 | *.exe 34 | *.out 35 | *.app 36 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(nanodet_rknn_demo) 2 | 3 | 4 | 5 | add_subdirectory(rknn_api_sdk) 6 | include_directories(${Rknn_Net_INCLUDE_DIRS}) 7 | link_libraries(${Rknn_Net_LIBS}) 8 | 9 | add_subdirectory(rknn_nanodet) 10 | include_directories(${NanoDet_INCLUDE_DIRS}) 11 | link_libraries(${NanoDet_LIBS}) 12 | 13 | find_package(OpenCV 4 REQUIRED) 14 | 15 | add_executable( 16 | nanodet_rknn_demo 17 | ./demo.cpp 18 | ) 19 | 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nanodet_rknn 2 | nanodet_rknn on rk3399pro platform 3 | 4 | # dependence 5 | 6 | Make sure your platform has installed rknn_toolkit v1.60, please refer to [link](http://t.rock-chips.com/forum.php?mod=forumdisplay) 7 | 8 | - opencv 4+ (3+ is ok but no tested) 9 | 10 | # usage 11 | 12 | ``` shell 13 | mkdir build 14 | cd build 15 | cmake .. 16 | make 17 | ``` 18 | 19 | # results 20 | 21 | ![](./a.jpeg) 22 | ![](./res.jpg) -------------------------------------------------------------------------------- /a.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sologala/nanodet_rknn/448c93c9cfb90d71506cf15cda606a9006ef7efc/a.jpeg -------------------------------------------------------------------------------- /demo.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoDet.hpp" 2 | #include 3 | #include "testtime.hpp" 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | int main() 10 | { 11 | const string model_path = "../models/nanodet.rknn"; 12 | NanoDet net(model_path); 13 | net.Input_Output_Configuration(); 14 | cv::Mat img = cv::imread("../a.jpeg"); 15 | 16 | int height = img.rows, width = img.cols; 17 | 18 | TestTimeVar; 19 | TestTimeTic; 20 | auto res = net.detect(img); 21 | TestTimeTocFPS("forward"); 22 | 23 | for (auto some_class_bbxs : res) 24 | { 25 | for (auto bbx : some_class_bbxs) 26 | { 27 | cv::Rect bbx_(bbx.x1, bbx.y1, bbx.x2 - bbx.x1, bbx.y2 - bbx.y1); 28 | cv::rectangle(img, bbx_, {0, 0, 225}); 29 | char text[256]; 30 | sprintf(text, "%s %.1f%%", net.labels_[bbx.label].c_str(), bbx.score * 100); 31 | int baseLine = 0; 32 | cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); 33 | 34 | int x = (bbx.x1); 35 | int y = (bbx.y1) - label_size.height - baseLine; 36 | if (y < 0) 37 | y = 0; 38 | if (x + label_size.width > img.cols) 39 | x = img.cols - label_size.width; 40 | cv::putText(img, text, cv::Point(x, y + label_size.height), 41 | cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); 42 | } 43 | } 44 | cv::imshow("res", img); 45 | cv::imwrite("res.jpg", img); 46 | cv::waitKey(0); 47 | } -------------------------------------------------------------------------------- /models/nanodet.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sologala/nanodet_rknn/448c93c9cfb90d71506cf15cda606a9006ef7efc/models/nanodet.rknn -------------------------------------------------------------------------------- /res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sologala/nanodet_rknn/448c93c9cfb90d71506cf15cda606a9006ef7efc/res.jpg -------------------------------------------------------------------------------- /rknn_api_sdk/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(Rknn_Net) 4 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") 5 | set(ARCH_DIR x86) 6 | else() 7 | set(ARCH_DIR arm) 8 | endif() 9 | 10 | set(RKNN_API_HEADERS 11 | ${CMAKE_CURRENT_SOURCE_DIR}/rknn_api/${ARCH_DIR}/include/ 12 | ) 13 | set(RKNN_API_LIBS 14 | ${CMAKE_CURRENT_SOURCE_DIR}/rknn_api/${ARCH_DIR}/lib64/librknn_api.so 15 | pthread 16 | ) 17 | include_directories( 18 | ${RKNN_API_HEADERS} 19 | ./include/ 20 | ) 21 | file(GLOB Rknn_Net_API_SRCS "./src/*.c*") 22 | 23 | add_library( 24 | ${PROJECT_NAME} 25 | ${Rknn_Net_API_SRCS} 26 | ) 27 | target_link_libraries( 28 | ${PROJECT_NAME} 29 | ${RKNN_API_LIBS} 30 | ) 31 | 32 | set(Rknn_Net_INCLUDE_DIRS 33 | ${CMAKE_CURRENT_SOURCE_DIR}/include/ 34 | ${RKNN_API_HEADERS} 35 | PARENT_SCOPE 36 | ) 37 | set(Rknn_Net_LIBS 38 | ${PROJECT_NAME} 39 | ${CMAKE_CURRENT_SOURCE_DIR}/rknn_api/${ARCH_DIR}/lib64/librknn_api.so 40 | PARENT_SCOPE 41 | ) 42 | 43 | 44 | -------------------------------------------------------------------------------- /rknn_api_sdk/include/Net.hpp: -------------------------------------------------------------------------------- 1 | #ifndef NET_H 2 | #define NET_H 3 | #include 4 | #include 5 | 6 | #include "rknn_api.h" 7 | 8 | #pragma once 9 | namespace RKNN_NET 10 | { 11 | class Net 12 | { 13 | public: 14 | Net(const std::string _model_path); 15 | virtual ~Net(); 16 | virtual void Input_Output_Configuration(); 17 | void Forward(std::vector _datas); 18 | 19 | protected: 20 | void *model_; 21 | rknn_context ctx_; 22 | std::string modelPath_; 23 | int n_inputs_, n_outputs_; 24 | 25 | rknn_input *inputs_; 26 | rknn_tensor_attr *inputs_attr; 27 | rknn_output *outputs_; 28 | rknn_tensor_attr *outputs_attr; 29 | float **outputs_buffer; 30 | }; 31 | } // namespace RKNN_NET 32 | #endif -------------------------------------------------------------------------------- /rknn_api_sdk/include/testtime.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #define __DoTestTime 5 | 6 | #ifdef __DoTestTime 7 | #define TestTimeVar std::chrono::steady_clock::time_point testTime_t1, testTime_t2; 8 | #define TestTimeVarSum(x) double x = 0; 9 | 10 | #define TestTimeTic testTime_t1 = std::chrono::steady_clock::now(); 11 | 12 | #define TestTimeToc(str) \ 13 | testTime_t2 = std::chrono::steady_clock::now(); \ 14 | printf("%s: %.1f ms\n", str, \ 15 | 1000 * std::chrono::duration_cast>(testTime_t2 - testTime_t1).count()); 16 | 17 | #define TestTimeTocFPS(str) \ 18 | testTime_t2 = std::chrono::steady_clock::now(); \ 19 | printf("%s FPS: %.1f\n", str, \ 20 | 1.0 / std::chrono::duration_cast>(testTime_t2 - testTime_t1).count()); 21 | 22 | #define TestTimeTocSum(x) \ 23 | testTime_t2 = std::chrono::steady_clock::now(); \ 24 | x += 1000 * std::chrono::duration_cast>(testTime_t2 - testTime_t1).count(); 25 | 26 | #define TestTimePrintf(str, x) printf("%s: %.1f\n", str, x); 27 | 28 | #else 29 | #define TestTimeVar 30 | #define TestTimeVarSum(x) 31 | #define TestTimeTic 32 | 33 | #define TestTimeToc(str) 34 | #define TestTimeTocSum(x) 35 | #define TestTimePrintf(str, x) 36 | #endif 37 | -------------------------------------------------------------------------------- /rknn_api_sdk/rknn_api/arm/include/rknn_api.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * 3 | * Copyright (c) 2017 - 2018 by Rockchip Corp. All rights reserved. 4 | * 5 | * The material in this file is confidential and contains trade secrets 6 | * of Rockchip Corporation. This is proprietary information owned by 7 | * Rockchip Corporation. No part of this work may be disclosed, 8 | * reproduced, copied, transmitted, or used in any way for any purpose, 9 | * without the express written permission of Rockchip Corporation. 10 | * 11 | *****************************************************************************/ 12 | 13 | 14 | #ifndef _RKNN_API_H 15 | #define _RKNN_API_H 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #include 22 | 23 | /* RKNN API Version */ 24 | #define API_VERSION "1.6.1" 25 | 26 | /* 27 | Definition of extended flag for rknn_init. 28 | */ 29 | /* set high priority context. */ 30 | #define RKNN_FLAG_PRIOR_HIGH 0x00000000 31 | 32 | /* set medium priority context */ 33 | #define RKNN_FLAG_PRIOR_MEDIUM 0x00000001 34 | 35 | /* set low priority context. */ 36 | #define RKNN_FLAG_PRIOR_LOW 0x00000002 37 | 38 | /* asynchronous mode. 39 | when enable, rknn_outputs_get will not block for too long because it directly retrieves the result of 40 | the previous frame which can increase the frame rate on single-threaded mode, but at the cost of 41 | rknn_outputs_get not retrieves the result of the current frame. 42 | in multi-threaded mode you do not need to turn this mode on. */ 43 | #define RKNN_FLAG_ASYNC_MASK 0x00000004 44 | 45 | /* collect performance mode. 46 | when enable, you can get detailed performance reports via rknn_query(ctx, RKNN_QUERY_PERF_DETAIL, ...), 47 | but it will reduce the frame rate. */ 48 | #define RKNN_FLAG_COLLECT_PERF_MASK 0x00000008 49 | 50 | /* You can store the rknn model under NPU, 51 | * when you call rknn_init(), you can pass the filename of model instead of model data. 52 | * Then you can hide your model and be invisible to the end user. 53 | * */ 54 | #define RKNN_FLAG_LOAD_MODEL_IN_NPU 0x00000010 55 | 56 | /* 57 | Error code returned by the RKNN API. 58 | */ 59 | #define RKNN_SUCC 0 /* execute succeed. */ 60 | #define RKNN_ERR_FAIL -1 /* execute failed. */ 61 | #define RKNN_ERR_TIMEOUT -2 /* execute timeout. */ 62 | #define RKNN_ERR_DEVICE_UNAVAILABLE -3 /* device is unavailable. */ 63 | #define RKNN_ERR_MALLOC_FAIL -4 /* memory malloc fail. */ 64 | #define RKNN_ERR_PARAM_INVALID -5 /* parameter is invalid. */ 65 | #define RKNN_ERR_MODEL_INVALID -6 /* model is invalid. */ 66 | #define RKNN_ERR_CTX_INVALID -7 /* context is invalid. */ 67 | #define RKNN_ERR_INPUT_INVALID -8 /* input is invalid. */ 68 | #define RKNN_ERR_OUTPUT_INVALID -9 /* output is invalid. */ 69 | #define RKNN_ERR_DEVICE_UNMATCH -10 /* the device is unmatch, please update rknn sdk 70 | and npu driver/firmware. */ 71 | #define RKNN_ERR_INCOMPATILE_PRE_COMPILE_MODEL -11 /* This RKNN model use pre_compile mode, but not compatible with current driver. */ 72 | #define RKNN_ERR_INCOMPATILE_OPTIMIZATION_LEVEL_VERSION -12 /* This RKNN model set optimization level, but not compatible with current driver. */ 73 | #define RKNN_ERR_TARGET_PLATFORM_UNMATCH -13 /* This RKNN model set target platform, but not compatible with current platform. */ 74 | /* 75 | Definition for tensor 76 | */ 77 | #define RKNN_MAX_DIMS 16 /* maximum dimension of tensor. */ 78 | #define RKNN_MAX_NAME_LEN 256 /* maximum name lenth of tensor. */ 79 | #define RKNN_MAX_NUM_CHANNEL 128 /* maximum channel number of graph input tensor. */ 80 | 81 | /* 82 | Definition for deivce id 83 | */ 84 | #define RKNN_MAX_DEVS 256 /* maximum number of device. */ 85 | #define RKNN_MAX_DEV_LEN 64 /* maximum id/type lenth of device. */ 86 | 87 | typedef uint64_t rknn_context; 88 | 89 | 90 | /* 91 | The query command for rknn_query 92 | */ 93 | typedef enum _rknn_query_cmd { 94 | RKNN_QUERY_IN_OUT_NUM = 0, /* query the number of input & output tensor. */ 95 | RKNN_QUERY_INPUT_ATTR, /* query the attribute of input tensor. */ 96 | RKNN_QUERY_OUTPUT_ATTR, /* query the attribute of output tensor. */ 97 | RKNN_QUERY_PERF_DETAIL, /* query the detail performance, need set 98 | RKNN_FLAG_COLLECT_PERF_MASK when call rknn_init, 99 | this query needs to be valid after rknn_outputs_get. */ 100 | RKNN_QUERY_PERF_RUN, /* query the time of run, 101 | this query needs to be valid after rknn_outputs_get. */ 102 | RKNN_QUERY_SDK_VERSION, /* query the sdk & driver version */ 103 | 104 | RKNN_QUERY_CMD_MAX 105 | } rknn_query_cmd; 106 | 107 | /* 108 | the tensor data type. 109 | */ 110 | typedef enum _rknn_tensor_type { 111 | RKNN_TENSOR_FLOAT32 = 0, /* data type is float32. */ 112 | RKNN_TENSOR_FLOAT16, /* data type is float16. */ 113 | RKNN_TENSOR_INT8, /* data type is int8. */ 114 | RKNN_TENSOR_UINT8, /* data type is uint8. */ 115 | RKNN_TENSOR_INT16, /* data type is int16. */ 116 | 117 | RKNN_TENSOR_TYPE_MAX 118 | } rknn_tensor_type; 119 | 120 | /* 121 | the quantitative type. 122 | */ 123 | typedef enum _rknn_tensor_qnt_type { 124 | RKNN_TENSOR_QNT_NONE = 0, /* none. */ 125 | RKNN_TENSOR_QNT_DFP, /* dynamic fixed point. */ 126 | RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC, /* asymmetric affine. */ 127 | 128 | RKNN_TENSOR_QNT_MAX 129 | } rknn_tensor_qnt_type; 130 | 131 | /* 132 | the tensor data format. 133 | */ 134 | typedef enum _rknn_tensor_format { 135 | RKNN_TENSOR_NCHW = 0, /* data format is NCHW. */ 136 | RKNN_TENSOR_NHWC, /* data format is NHWC. */ 137 | 138 | RKNN_TENSOR_FORMAT_MAX 139 | } rknn_tensor_format; 140 | 141 | /* 142 | the information for RKNN_QUERY_IN_OUT_NUM. 143 | */ 144 | typedef struct _rknn_input_output_num { 145 | uint32_t n_input; /* the number of input. */ 146 | uint32_t n_output; /* the number of output. */ 147 | } rknn_input_output_num; 148 | 149 | /* 150 | the information for RKNN_QUERY_INPUT_ATTR / RKNN_QUERY_OUTPUT_ATTR. 151 | */ 152 | typedef struct _rknn_tensor_attr { 153 | uint32_t index; /* input parameter, the index of input/output tensor, 154 | need set before call rknn_query. */ 155 | 156 | uint32_t n_dims; /* the number of dimensions. */ 157 | uint32_t dims[RKNN_MAX_DIMS]; /* the dimensions array. */ 158 | char name[RKNN_MAX_NAME_LEN]; /* the name of tensor. */ 159 | 160 | uint32_t n_elems; /* the number of elements. */ 161 | uint32_t size; /* the bytes size of tensor. */ 162 | 163 | rknn_tensor_format fmt; /* the data format of tensor. */ 164 | rknn_tensor_type type; /* the data type of tensor. */ 165 | rknn_tensor_qnt_type qnt_type; /* the quantitative type of tensor. */ 166 | int8_t fl; /* fractional length for RKNN_TENSOR_QNT_DFP. */ 167 | uint32_t zp; /* zero point for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */ 168 | float scale; /* scale for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */ 169 | } rknn_tensor_attr; 170 | 171 | /* 172 | the information for RKNN_QUERY_PERF_DETAIL. 173 | */ 174 | typedef struct _rknn_perf_detail { 175 | char* perf_data; /* the string pointer of perf detail. don't need free it by user. */ 176 | uint64_t data_len; /* the string length. */ 177 | } rknn_perf_detail; 178 | 179 | /* 180 | the information for RKNN_QUERY_PERF_RUN. 181 | */ 182 | typedef struct _rknn_perf_run { 183 | int64_t run_duration; /* real inference time (us) */ 184 | } rknn_perf_run; 185 | 186 | /* 187 | the information for RKNN_QUERY_SDK_VERSION. 188 | */ 189 | typedef struct _rknn_sdk_version { 190 | char api_version[256]; /* the version of rknn api. */ 191 | char drv_version[256]; /* the version of rknn driver. */ 192 | } rknn_sdk_version; 193 | 194 | /* 195 | the information for rknn_find_devices. 196 | */ 197 | typedef struct _rknn_devices_id { 198 | uint32_t n_devices; /* the number of devices. */ 199 | char types[RKNN_MAX_DEVS][RKNN_MAX_DEV_LEN]; /* the array of device type. */ 200 | char ids[RKNN_MAX_DEVS][RKNN_MAX_DEV_LEN]; /* the array of device ID. */ 201 | } rknn_devices_id; 202 | 203 | /* 204 | the input information for rknn_input_set. 205 | */ 206 | typedef struct _rknn_input { 207 | uint32_t index; /* the input index. */ 208 | void* buf; /* the input buf for index. */ 209 | uint32_t size; /* the size of input buf. */ 210 | uint8_t pass_through; /* pass through mode. 211 | if TRUE, the buf data is passed directly to the input node of the rknn model 212 | without any conversion. the following variables do not need to be set. 213 | if FALSE, the buf data is converted into an input consistent with the model 214 | according to the following type and fmt. so the following variables 215 | need to be set.*/ 216 | rknn_tensor_type type; /* the data type of input buf. */ 217 | rknn_tensor_format fmt; /* the data format of input buf. 218 | currently the internal input format of NPU is NCHW by default. 219 | so entering NCHW data can avoid the format conversion in the driver. */ 220 | } rknn_input; 221 | 222 | /* 223 | the output information for rknn_outputs_get. 224 | */ 225 | typedef struct _rknn_output { 226 | uint8_t want_float; /* want transfer output data to float */ 227 | uint8_t is_prealloc; /* whether buf is pre-allocated. 228 | if TRUE, the following variables need to be set. 229 | if FALSE, the following variables do not need to be set. */ 230 | uint32_t index; /* the output index. */ 231 | void* buf; /* the output buf for index. 232 | when is_prealloc = FALSE and rknn_outputs_release called, 233 | this buf pointer will be free and don't use it anymore. */ 234 | uint32_t size; /* the size of output buf. */ 235 | } rknn_output; 236 | 237 | /* 238 | the extend information for rknn_init. 239 | */ 240 | typedef struct _rknn_init_extend { 241 | char* device_id; /* input parameter, indicate which device selected. if only one 242 | device connected, can set nullptr. */ 243 | } rknn_init_extend; 244 | 245 | /* 246 | the extend information for rknn_run. 247 | */ 248 | typedef struct _rknn_run_extend { 249 | uint64_t frame_id; /* output parameter, indicate current frame id of run. */ 250 | } rknn_run_extend; 251 | 252 | /* 253 | the extend information for rknn_outputs_get. 254 | */ 255 | typedef struct _rknn_output_extend { 256 | uint64_t frame_id; /* output parameter, indicate the frame id of outputs, corresponds to 257 | struct rknn_run_extend.frame_id.*/ 258 | } rknn_output_extend; 259 | 260 | 261 | /* rknn_find_devices 262 | 263 | find the devices that connected to host. 264 | 265 | input: 266 | rknn_devices_id* pdevs the pointer of devices information structure. 267 | return: 268 | int error code. 269 | */ 270 | int rknn_find_devices(rknn_devices_id* pdevs); 271 | 272 | 273 | /* rknn_init 274 | 275 | initial the context and load the rknn model. 276 | 277 | input: 278 | rknn_context* context the pointer of context handle. 279 | void* model pointer to the rknn model. 280 | uint32_t size the size of rknn model. 281 | uint32_t flag extend flag, see the define of RKNN_FLAG_XXX_XXX. 282 | return: 283 | int error code. 284 | */ 285 | int rknn_init(rknn_context* context, void* model, uint32_t size, uint32_t flag); 286 | 287 | 288 | /* rknn_init2 289 | 290 | initial the context and load the rknn model (version 2). 291 | 292 | input: 293 | rknn_context* context the pointer of context handle. 294 | void* model pointer to the rknn model. 295 | uint32_t size the size of rknn model. 296 | uint32_t flag extend flag, see the define of RKNN_FLAG_XXX_XXX. 297 | rknn_init_extend* extend the extend information of init. 298 | return: 299 | int error code. 300 | */ 301 | int rknn_init2(rknn_context* context, void* model, uint32_t size, uint32_t flag, rknn_init_extend* extend); 302 | 303 | 304 | /* rknn_destroy 305 | 306 | unload the rknn model and destroy the context. 307 | 308 | input: 309 | rknn_context context the handle of context. 310 | return: 311 | int error code. 312 | */ 313 | int rknn_destroy(rknn_context context); 314 | 315 | 316 | /* rknn_query 317 | 318 | query the information about model or others. see rknn_query_cmd. 319 | 320 | input: 321 | rknn_context context the handle of context. 322 | rknn_query_cmd cmd the command of query. 323 | void* info the buffer point of information. 324 | uint32_t size the size of information. 325 | return: 326 | int error code. 327 | */ 328 | int rknn_query(rknn_context context, rknn_query_cmd cmd, void* info, uint32_t size); 329 | 330 | 331 | /* rknn_inputs_set 332 | 333 | set inputs information by input index of rknn model. 334 | inputs information see rknn_input. 335 | 336 | input: 337 | rknn_context context the handle of context. 338 | uint32_t n_inputs the number of inputs. 339 | rknn_input inputs[] the arrays of inputs information, see rknn_input. 340 | return: 341 | int error code 342 | */ 343 | int rknn_inputs_set(rknn_context context, uint32_t n_inputs, rknn_input inputs[]); 344 | 345 | 346 | /* rknn_run 347 | 348 | run the model to execute inference. 349 | this function does not block normally, but it blocks when more than 3 inferences 350 | are not obtained by rknn_outputs_get. 351 | 352 | input: 353 | rknn_context context the handle of context. 354 | rknn_run_extend* extend the extend information of run. 355 | return: 356 | int error code. 357 | */ 358 | int rknn_run(rknn_context context, rknn_run_extend* extend); 359 | 360 | 361 | /* rknn_outputs_get 362 | 363 | wait the inference to finish and get the outputs. 364 | this function will block until inference finish. 365 | the results will set to outputs[]. 366 | 367 | input: 368 | rknn_context context the handle of context. 369 | uint32_t n_outputs the number of outputs. 370 | rknn_output outputs[] the arrays of output, see rknn_output. 371 | rknn_output_extend* the extend information of output. 372 | return: 373 | int error code. 374 | */ 375 | int rknn_outputs_get(rknn_context context, uint32_t n_outputs, rknn_output outputs[], rknn_output_extend* extend); 376 | 377 | 378 | /* rknn_outputs_release 379 | 380 | release the outputs that get by rknn_outputs_get. 381 | after called, the rknn_output[x].buf get from rknn_outputs_get will 382 | also be free when rknn_output[x].is_prealloc = FALSE. 383 | 384 | input: 385 | rknn_context context the handle of context. 386 | uint32_t n_ouputs the number of outputs. 387 | rknn_output outputs[] the arrays of output. 388 | return: 389 | int error code 390 | */ 391 | int rknn_outputs_release(rknn_context context, uint32_t n_ouputs, rknn_output outputs[]); 392 | 393 | #ifdef __cplusplus 394 | } //extern "C" 395 | #endif 396 | 397 | #endif //_RKNN_API_H 398 | -------------------------------------------------------------------------------- /rknn_api_sdk/rknn_api/arm/lib64/librknn_api.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sologala/nanodet_rknn/448c93c9cfb90d71506cf15cda606a9006ef7efc/rknn_api_sdk/rknn_api/arm/lib64/librknn_api.so -------------------------------------------------------------------------------- /rknn_api_sdk/rknn_api/x86/include/rknn_api.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * 3 | * Copyright (c) 2017 - 2018 by Rockchip Corp. All rights reserved. 4 | * 5 | * The material in this file is confidential and contains trade secrets 6 | * of Rockchip Corporation. This is proprietary information owned by 7 | * Rockchip Corporation. No part of this work may be disclosed, 8 | * reproduced, copied, transmitted, or used in any way for any purpose, 9 | * without the express written permission of Rockchip Corporation. 10 | * 11 | *****************************************************************************/ 12 | 13 | 14 | #ifndef _RKNN_API_H 15 | #define _RKNN_API_H 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #include 22 | 23 | /* RKNN API Version */ 24 | #define API_VERSION "1.6.1" 25 | 26 | /* 27 | Definition of extended flag for rknn_init. 28 | */ 29 | /* set high priority context. */ 30 | #define RKNN_FLAG_PRIOR_HIGH 0x00000000 31 | 32 | /* set medium priority context */ 33 | #define RKNN_FLAG_PRIOR_MEDIUM 0x00000001 34 | 35 | /* set low priority context. */ 36 | #define RKNN_FLAG_PRIOR_LOW 0x00000002 37 | 38 | /* asynchronous mode. 39 | when enable, rknn_outputs_get will not block for too long because it directly retrieves the result of 40 | the previous frame which can increase the frame rate on single-threaded mode, but at the cost of 41 | rknn_outputs_get not retrieves the result of the current frame. 42 | in multi-threaded mode you do not need to turn this mode on. */ 43 | #define RKNN_FLAG_ASYNC_MASK 0x00000004 44 | 45 | /* collect performance mode. 46 | when enable, you can get detailed performance reports via rknn_query(ctx, RKNN_QUERY_PERF_DETAIL, ...), 47 | but it will reduce the frame rate. */ 48 | #define RKNN_FLAG_COLLECT_PERF_MASK 0x00000008 49 | 50 | /* You can store the rknn model under NPU, 51 | * when you call rknn_init(), you can pass the filename of model instead of model data. 52 | * Then you can hide your model and be invisible to the end user. 53 | * */ 54 | #define RKNN_FLAG_LOAD_MODEL_IN_NPU 0x00000010 55 | 56 | /* 57 | Error code returned by the RKNN API. 58 | */ 59 | #define RKNN_SUCC 0 /* execute succeed. */ 60 | #define RKNN_ERR_FAIL -1 /* execute failed. */ 61 | #define RKNN_ERR_TIMEOUT -2 /* execute timeout. */ 62 | #define RKNN_ERR_DEVICE_UNAVAILABLE -3 /* device is unavailable. */ 63 | #define RKNN_ERR_MALLOC_FAIL -4 /* memory malloc fail. */ 64 | #define RKNN_ERR_PARAM_INVALID -5 /* parameter is invalid. */ 65 | #define RKNN_ERR_MODEL_INVALID -6 /* model is invalid. */ 66 | #define RKNN_ERR_CTX_INVALID -7 /* context is invalid. */ 67 | #define RKNN_ERR_INPUT_INVALID -8 /* input is invalid. */ 68 | #define RKNN_ERR_OUTPUT_INVALID -9 /* output is invalid. */ 69 | #define RKNN_ERR_DEVICE_UNMATCH -10 /* the device is unmatch, please update rknn sdk 70 | and npu driver/firmware. */ 71 | #define RKNN_ERR_INCOMPATILE_PRE_COMPILE_MODEL -11 /* This RKNN model use pre_compile mode, but not compatible with current driver. */ 72 | #define RKNN_ERR_INCOMPATILE_OPTIMIZATION_LEVEL_VERSION -12 /* This RKNN model set optimization level, but not compatible with current driver. */ 73 | #define RKNN_ERR_TARGET_PLATFORM_UNMATCH -13 /* This RKNN model set target platform, but not compatible with current platform. */ 74 | /* 75 | Definition for tensor 76 | */ 77 | #define RKNN_MAX_DIMS 16 /* maximum dimension of tensor. */ 78 | #define RKNN_MAX_NAME_LEN 256 /* maximum name lenth of tensor. */ 79 | #define RKNN_MAX_NUM_CHANNEL 128 /* maximum channel number of graph input tensor. */ 80 | 81 | /* 82 | Definition for deivce id 83 | */ 84 | #define RKNN_MAX_DEVS 256 /* maximum number of device. */ 85 | #define RKNN_MAX_DEV_LEN 64 /* maximum id/type lenth of device. */ 86 | 87 | typedef uint64_t rknn_context; 88 | 89 | 90 | /* 91 | The query command for rknn_query 92 | */ 93 | typedef enum _rknn_query_cmd { 94 | RKNN_QUERY_IN_OUT_NUM = 0, /* query the number of input & output tensor. */ 95 | RKNN_QUERY_INPUT_ATTR, /* query the attribute of input tensor. */ 96 | RKNN_QUERY_OUTPUT_ATTR, /* query the attribute of output tensor. */ 97 | RKNN_QUERY_PERF_DETAIL, /* query the detail performance, need set 98 | RKNN_FLAG_COLLECT_PERF_MASK when call rknn_init, 99 | this query needs to be valid after rknn_outputs_get. */ 100 | RKNN_QUERY_PERF_RUN, /* query the time of run, 101 | this query needs to be valid after rknn_outputs_get. */ 102 | RKNN_QUERY_SDK_VERSION, /* query the sdk & driver version */ 103 | 104 | RKNN_QUERY_CMD_MAX 105 | } rknn_query_cmd; 106 | 107 | /* 108 | the tensor data type. 109 | */ 110 | typedef enum _rknn_tensor_type { 111 | RKNN_TENSOR_FLOAT32 = 0, /* data type is float32. */ 112 | RKNN_TENSOR_FLOAT16, /* data type is float16. */ 113 | RKNN_TENSOR_INT8, /* data type is int8. */ 114 | RKNN_TENSOR_UINT8, /* data type is uint8. */ 115 | RKNN_TENSOR_INT16, /* data type is int16. */ 116 | 117 | RKNN_TENSOR_TYPE_MAX 118 | } rknn_tensor_type; 119 | 120 | /* 121 | the quantitative type. 122 | */ 123 | typedef enum _rknn_tensor_qnt_type { 124 | RKNN_TENSOR_QNT_NONE = 0, /* none. */ 125 | RKNN_TENSOR_QNT_DFP, /* dynamic fixed point. */ 126 | RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC, /* asymmetric affine. */ 127 | 128 | RKNN_TENSOR_QNT_MAX 129 | } rknn_tensor_qnt_type; 130 | 131 | /* 132 | the tensor data format. 133 | */ 134 | typedef enum _rknn_tensor_format { 135 | RKNN_TENSOR_NCHW = 0, /* data format is NCHW. */ 136 | RKNN_TENSOR_NHWC, /* data format is NHWC. */ 137 | 138 | RKNN_TENSOR_FORMAT_MAX 139 | } rknn_tensor_format; 140 | 141 | /* 142 | the information for RKNN_QUERY_IN_OUT_NUM. 143 | */ 144 | typedef struct _rknn_input_output_num { 145 | uint32_t n_input; /* the number of input. */ 146 | uint32_t n_output; /* the number of output. */ 147 | } rknn_input_output_num; 148 | 149 | /* 150 | the information for RKNN_QUERY_INPUT_ATTR / RKNN_QUERY_OUTPUT_ATTR. 151 | */ 152 | typedef struct _rknn_tensor_attr { 153 | uint32_t index; /* input parameter, the index of input/output tensor, 154 | need set before call rknn_query. */ 155 | 156 | uint32_t n_dims; /* the number of dimensions. */ 157 | uint32_t dims[RKNN_MAX_DIMS]; /* the dimensions array. */ 158 | char name[RKNN_MAX_NAME_LEN]; /* the name of tensor. */ 159 | 160 | uint32_t n_elems; /* the number of elements. */ 161 | uint32_t size; /* the bytes size of tensor. */ 162 | 163 | rknn_tensor_format fmt; /* the data format of tensor. */ 164 | rknn_tensor_type type; /* the data type of tensor. */ 165 | rknn_tensor_qnt_type qnt_type; /* the quantitative type of tensor. */ 166 | int8_t fl; /* fractional length for RKNN_TENSOR_QNT_DFP. */ 167 | uint32_t zp; /* zero point for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */ 168 | float scale; /* scale for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */ 169 | } rknn_tensor_attr; 170 | 171 | /* 172 | the information for RKNN_QUERY_PERF_DETAIL. 173 | */ 174 | typedef struct _rknn_perf_detail { 175 | char* perf_data; /* the string pointer of perf detail. don't need free it by user. */ 176 | uint64_t data_len; /* the string length. */ 177 | } rknn_perf_detail; 178 | 179 | /* 180 | the information for RKNN_QUERY_PERF_RUN. 181 | */ 182 | typedef struct _rknn_perf_run { 183 | int64_t run_duration; /* real inference time (us) */ 184 | } rknn_perf_run; 185 | 186 | /* 187 | the information for RKNN_QUERY_SDK_VERSION. 188 | */ 189 | typedef struct _rknn_sdk_version { 190 | char api_version[256]; /* the version of rknn api. */ 191 | char drv_version[256]; /* the version of rknn driver. */ 192 | } rknn_sdk_version; 193 | 194 | /* 195 | the information for rknn_find_devices. 196 | */ 197 | typedef struct _rknn_devices_id { 198 | uint32_t n_devices; /* the number of devices. */ 199 | char types[RKNN_MAX_DEVS][RKNN_MAX_DEV_LEN]; /* the array of device type. */ 200 | char ids[RKNN_MAX_DEVS][RKNN_MAX_DEV_LEN]; /* the array of device ID. */ 201 | } rknn_devices_id; 202 | 203 | /* 204 | the input information for rknn_input_set. 205 | */ 206 | typedef struct _rknn_input { 207 | uint32_t index; /* the input index. */ 208 | void* buf; /* the input buf for index. */ 209 | uint32_t size; /* the size of input buf. */ 210 | uint8_t pass_through; /* pass through mode. 211 | if TRUE, the buf data is passed directly to the input node of the rknn model 212 | without any conversion. the following variables do not need to be set. 213 | if FALSE, the buf data is converted into an input consistent with the model 214 | according to the following type and fmt. so the following variables 215 | need to be set.*/ 216 | rknn_tensor_type type; /* the data type of input buf. */ 217 | rknn_tensor_format fmt; /* the data format of input buf. 218 | currently the internal input format of NPU is NCHW by default. 219 | so entering NCHW data can avoid the format conversion in the driver. */ 220 | } rknn_input; 221 | 222 | /* 223 | the output information for rknn_outputs_get. 224 | */ 225 | typedef struct _rknn_output { 226 | uint8_t want_float; /* want transfer output data to float */ 227 | uint8_t is_prealloc; /* whether buf is pre-allocated. 228 | if TRUE, the following variables need to be set. 229 | if FALSE, the following variables do not need to be set. */ 230 | uint32_t index; /* the output index. */ 231 | void* buf; /* the output buf for index. 232 | when is_prealloc = FALSE and rknn_outputs_release called, 233 | this buf pointer will be free and don't use it anymore. */ 234 | uint32_t size; /* the size of output buf. */ 235 | } rknn_output; 236 | 237 | /* 238 | the extend information for rknn_init. 239 | */ 240 | typedef struct _rknn_init_extend { 241 | char* device_id; /* input parameter, indicate which device selected. if only one 242 | device connected, can set nullptr. */ 243 | } rknn_init_extend; 244 | 245 | /* 246 | the extend information for rknn_run. 247 | */ 248 | typedef struct _rknn_run_extend { 249 | uint64_t frame_id; /* output parameter, indicate current frame id of run. */ 250 | } rknn_run_extend; 251 | 252 | /* 253 | the extend information for rknn_outputs_get. 254 | */ 255 | typedef struct _rknn_output_extend { 256 | uint64_t frame_id; /* output parameter, indicate the frame id of outputs, corresponds to 257 | struct rknn_run_extend.frame_id.*/ 258 | } rknn_output_extend; 259 | 260 | 261 | /* rknn_find_devices 262 | 263 | find the devices that connected to host. 264 | 265 | input: 266 | rknn_devices_id* pdevs the pointer of devices information structure. 267 | return: 268 | int error code. 269 | */ 270 | int rknn_find_devices(rknn_devices_id* pdevs); 271 | 272 | 273 | /* rknn_init 274 | 275 | initial the context and load the rknn model. 276 | 277 | input: 278 | rknn_context* context the pointer of context handle. 279 | void* model pointer to the rknn model. 280 | uint32_t size the size of rknn model. 281 | uint32_t flag extend flag, see the define of RKNN_FLAG_XXX_XXX. 282 | return: 283 | int error code. 284 | */ 285 | int rknn_init(rknn_context* context, void* model, uint32_t size, uint32_t flag); 286 | 287 | 288 | /* rknn_init2 289 | 290 | initial the context and load the rknn model (version 2). 291 | 292 | input: 293 | rknn_context* context the pointer of context handle. 294 | void* model pointer to the rknn model. 295 | uint32_t size the size of rknn model. 296 | uint32_t flag extend flag, see the define of RKNN_FLAG_XXX_XXX. 297 | rknn_init_extend* extend the extend information of init. 298 | return: 299 | int error code. 300 | */ 301 | int rknn_init2(rknn_context* context, void* model, uint32_t size, uint32_t flag, rknn_init_extend* extend); 302 | 303 | 304 | /* rknn_destroy 305 | 306 | unload the rknn model and destroy the context. 307 | 308 | input: 309 | rknn_context context the handle of context. 310 | return: 311 | int error code. 312 | */ 313 | int rknn_destroy(rknn_context context); 314 | 315 | 316 | /* rknn_query 317 | 318 | query the information about model or others. see rknn_query_cmd. 319 | 320 | input: 321 | rknn_context context the handle of context. 322 | rknn_query_cmd cmd the command of query. 323 | void* info the buffer point of information. 324 | uint32_t size the size of information. 325 | return: 326 | int error code. 327 | */ 328 | int rknn_query(rknn_context context, rknn_query_cmd cmd, void* info, uint32_t size); 329 | 330 | 331 | /* rknn_inputs_set 332 | 333 | set inputs information by input index of rknn model. 334 | inputs information see rknn_input. 335 | 336 | input: 337 | rknn_context context the handle of context. 338 | uint32_t n_inputs the number of inputs. 339 | rknn_input inputs[] the arrays of inputs information, see rknn_input. 340 | return: 341 | int error code 342 | */ 343 | int rknn_inputs_set(rknn_context context, uint32_t n_inputs, rknn_input inputs[]); 344 | 345 | 346 | /* rknn_run 347 | 348 | run the model to execute inference. 349 | this function does not block normally, but it blocks when more than 3 inferences 350 | are not obtained by rknn_outputs_get. 351 | 352 | input: 353 | rknn_context context the handle of context. 354 | rknn_run_extend* extend the extend information of run. 355 | return: 356 | int error code. 357 | */ 358 | int rknn_run(rknn_context context, rknn_run_extend* extend); 359 | 360 | 361 | /* rknn_outputs_get 362 | 363 | wait the inference to finish and get the outputs. 364 | this function will block until inference finish. 365 | the results will set to outputs[]. 366 | 367 | input: 368 | rknn_context context the handle of context. 369 | uint32_t n_outputs the number of outputs. 370 | rknn_output outputs[] the arrays of output, see rknn_output. 371 | rknn_output_extend* the extend information of output. 372 | return: 373 | int error code. 374 | */ 375 | int rknn_outputs_get(rknn_context context, uint32_t n_outputs, rknn_output outputs[], rknn_output_extend* extend); 376 | 377 | 378 | /* rknn_outputs_release 379 | 380 | release the outputs that get by rknn_outputs_get. 381 | after called, the rknn_output[x].buf get from rknn_outputs_get will 382 | also be free when rknn_output[x].is_prealloc = FALSE. 383 | 384 | input: 385 | rknn_context context the handle of context. 386 | uint32_t n_ouputs the number of outputs. 387 | rknn_output outputs[] the arrays of output. 388 | return: 389 | int error code 390 | */ 391 | int rknn_outputs_release(rknn_context context, uint32_t n_ouputs, rknn_output outputs[]); 392 | 393 | #ifdef __cplusplus 394 | } //extern "C" 395 | #endif 396 | 397 | #endif //_RKNN_API_H 398 | -------------------------------------------------------------------------------- /rknn_api_sdk/rknn_api/x86/lib64/librknn_api.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sologala/nanodet_rknn/448c93c9cfb90d71506cf15cda606a9006ef7efc/rknn_api_sdk/rknn_api/x86/lib64/librknn_api.so -------------------------------------------------------------------------------- /rknn_api_sdk/src/Net.cpp: -------------------------------------------------------------------------------- 1 | #include "Net.hpp" 2 | 3 | #include 4 | #include 5 | 6 | #include "rknn_api.h" 7 | #include "testtime.hpp" 8 | namespace RKNN_NET { 9 | Net::Net(const std::string _model_path) : modelPath_(_model_path) { 10 | printf("Loading~ rknn model\n %s", modelPath_.c_str()); 11 | FILE *fp = fopen(modelPath_.c_str(), "rb"); 12 | if (fp == NULL) { 13 | printf("fopen %s fail!\n", modelPath_.c_str()); 14 | exit(-1); 15 | } 16 | fseek(fp, 0, SEEK_END); 17 | 18 | int model_len = ftell(fp); 19 | model_ = malloc(model_len); 20 | fseek(fp, 0, SEEK_SET); 21 | if (model_len != fread(model_, 1, model_len, fp)) { 22 | printf("fread %s fail!\n", modelPath_.c_str()); 23 | free(model_); 24 | exit(-1); 25 | } 26 | int ret = 0; 27 | 28 | // char device_name[255] = "TDs33101190500578"; 29 | // rknn_init_extend device; 30 | // device.device_id = device_name; 31 | 32 | // query all avialiable devices 33 | rknn_devices_id devids; 34 | ret = rknn_find_devices(&devids); 35 | 36 | printf("n_devices = %d \n", devids.n_devices); 37 | for (int i = 0; i < devids.n_devices; ++i) { 38 | printf("%d : type : %s , id %s\n", i, devids.types[i], devids.ids[i]); 39 | } 40 | 41 | // ret = rknn_init2(&ctx_, model_, model_len, RKNN_FLAG_PRIOR_MEDIUM, &device); 42 | ret = rknn_init(&ctx_, model_, model_len, RKNN_FLAG_PRIOR_MEDIUM); 43 | if (ret < 0) { 44 | printf("rknn_init fail! ret=%d\n", ret); 45 | exit(-1); 46 | } 47 | 48 | rknn_input_output_num io_num; 49 | ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num)); 50 | if (ret < 0) { 51 | printf("rknn_query fail! ret=%d\n", ret); 52 | exit(-1); 53 | } 54 | printf("inp : %d , output : %d\n", io_num.n_input, io_num.n_output); 55 | inputs_attr = new rknn_tensor_attr[io_num.n_input]; 56 | printf("-------------[input %d ]-------------\n", io_num.n_input); 57 | for (int i = 0, sz = io_num.n_input; i < sz; ++i) { 58 | inputs_attr[i].index = i; 59 | ret = rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &inputs_attr[i], sizeof(inputs_attr[i])); 60 | if (ret < 0) { 61 | printf("rknn_query fail! ret=%d\n", ret); 62 | exit(-1); 63 | } 64 | printf("%d : (", i); 65 | for (int j = 0; j < inputs_attr[i].n_dims; ++j) { 66 | if (j) printf(", "); 67 | printf("%d", inputs_attr[i].dims[j]); 68 | } 69 | printf(") "); 70 | } 71 | printf("\n-------------[output %d ]-------------\n", io_num.n_output); 72 | outputs_attr = new rknn_tensor_attr[io_num.n_output]; 73 | for (int i = 0, sz = io_num.n_output; i < sz; ++i) { 74 | outputs_attr[i].index = i; 75 | ret = rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &outputs_attr[i], sizeof(outputs_attr[i])); 76 | if (ret < 0) { 77 | printf("rknn_query fail! ret=%d\n", ret); 78 | exit(-1); 79 | } 80 | printf("%d : (", i); 81 | for (int j = 0; j < outputs_attr[i].n_dims; ++j) { 82 | if (j) printf(", "); 83 | printf("%d", outputs_attr[i].dims[j]); 84 | } 85 | printf(") "); 86 | } 87 | printf("\n"); 88 | 89 | n_inputs_ = io_num.n_input; 90 | n_outputs_ = io_num.n_output; 91 | } 92 | 93 | #define __DO_TESTTIME__ 94 | 95 | void Net::Forward(std::vector _datas) { 96 | #ifdef __DO_TESTTIME__ 97 | TestTimeVar; 98 | TestTimeTic; 99 | #endif 100 | assert(_datas.size() == n_inputs_); 101 | for (int i = 0; i < n_inputs_; ++i) { 102 | inputs_[i].buf = _datas[i]; 103 | inputs_[i].pass_through = false; 104 | } 105 | 106 | int ret = rknn_inputs_set(ctx_, n_inputs_, inputs_); 107 | if (ret < 0) { 108 | printf("rknn_input_set fail! ret=%d\n", ret); 109 | exit(-1); 110 | } 111 | #ifdef __DO_TESTTIME__ 112 | TestTimeToc("set input : "); 113 | TestTimeTic; 114 | #endif 115 | ret = rknn_run(ctx_, NULL); 116 | if (ret < 0) { 117 | printf("rknn_run fail! ret=%d\n", ret); 118 | exit(-1); 119 | } 120 | 121 | ret = rknn_outputs_get(ctx_, n_outputs_, outputs_, NULL); 122 | if (ret < 0) { 123 | printf("rknn_outputs_get fail! ret=%d\n", ret); 124 | exit(-1); 125 | } 126 | #ifdef __DO_TESTTIME__ 127 | TestTimeToc("inference : "); 128 | #endif 129 | } 130 | 131 | void Net::Input_Output_Configuration() { 132 | // 配置 input 与 output 数组 133 | inputs_ = new rknn_input[n_inputs_]; 134 | outputs_ = new rknn_output[n_outputs_]; 135 | 136 | for (int i = 0; i < n_inputs_; ++i) { 137 | inputs_[i].index = i; 138 | inputs_[i].buf = NULL; 139 | inputs_[i].size = inputs_attr[i].n_elems * sizeof(char); 140 | inputs_[i].pass_through = false; 141 | inputs_[i].type = RKNN_TENSOR_UINT8; 142 | inputs_[i].fmt = RKNN_TENSOR_NHWC; 143 | } 144 | 145 | outputs_buffer = new float *[n_outputs_]; 146 | 147 | for (int i = 0; i < n_outputs_; ++i) { 148 | outputs_[i].want_float = true; 149 | outputs_[i].is_prealloc = true; 150 | outputs_[i].index = i; 151 | outputs_[i].size = outputs_attr[i].n_elems * sizeof(float); 152 | outputs_buffer[i] = new float[outputs_[i].size]; 153 | outputs_[i].buf = (void *)outputs_buffer[i]; 154 | memset(outputs_[i].buf, 0, sizeof(outputs_[i].buf)); 155 | } 156 | } 157 | 158 | Net::~Net() { 159 | printf("Free net~~~ \n"); 160 | // free output buffer ; 161 | for (int i = 0; i < n_outputs_; ++i) { 162 | delete[] outputs_buffer[i]; 163 | } 164 | if (outputs_buffer) delete[] outputs_buffer; 165 | if (outputs_) delete[] outputs_; 166 | if (inputs_) delete[] inputs_; 167 | if (inputs_attr) delete[] inputs_attr; 168 | if (outputs_attr) delete[] outputs_attr; 169 | rknn_destroy(ctx_); 170 | // delete model 171 | free(model_); 172 | } 173 | } // namespace RKNN_NET -------------------------------------------------------------------------------- /rknn_api_sdk/test/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Net.hpp" 4 | #include "testtime.hpp" 5 | using namespace std; 6 | 7 | int main(int argc, const char** argv) { 8 | const int input_shape[4] = {1, 3, 320, 320}; 9 | TestTimeVar; 10 | TestTimeTic; 11 | 12 | for (int i = 0; i < 10000000; ++i) { 13 | TestTimeTic; 14 | RKNN_NET::Net net("../nanodet.rknn"); 15 | net.Input_Output_Configuration(); 16 | TestTimeToc("load"); 17 | cv::Mat img = cv::Mat::zeros(320, 320, CV_8UC3); 18 | net.Forward({img.data}); 19 | TestTimeToc("run"); 20 | } 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /rknn_api_sdk/test/testtime.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #define __DoTestTime 5 | 6 | #ifdef __DoTestTime 7 | #define TestTimeVar std::chrono::steady_clock::time_point testTime_t1, testTime_t2; 8 | #define TestTimeVarSum(x) double x = 0; 9 | 10 | #define TestTimeTic testTime_t1 = std::chrono::steady_clock::now(); 11 | 12 | #define TestTimeToc(str) \ 13 | testTime_t2 = std::chrono::steady_clock::now(); \ 14 | printf("%s: %.1f ms\n", str, \ 15 | 1000 * std::chrono::duration_cast>(testTime_t2 - testTime_t1).count()); 16 | 17 | #define TestTimeTocFPS(str) \ 18 | testTime_t2 = std::chrono::steady_clock::now(); \ 19 | printf("%s FPS: %.1f\n", str, \ 20 | 1.0 / std::chrono::duration_cast>(testTime_t2 - testTime_t1).count()); 21 | 22 | #define TestTimeTocSum(x) \ 23 | testTime_t2 = std::chrono::steady_clock::now(); \ 24 | x += 1000 * std::chrono::duration_cast>(testTime_t2 - testTime_t1).count(); 25 | 26 | #define TestTimePrintf(str, x) printf("%s: %.1f\n", str, x); 27 | 28 | #else 29 | #define TestTimeVar 30 | #define TestTimeVarSum(x) 31 | #define TestTimeTic 32 | 33 | #define TestTimeToc(str) 34 | #define TestTimeTocSum(x) 35 | #define TestTimePrintf(str, x) 36 | #endif 37 | -------------------------------------------------------------------------------- /rknn_nanodet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | project(NanoDet) 3 | 4 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 6 | 7 | 8 | file(GLOB SRCS "./src/*.c*") 9 | 10 | find_package(OpenCV REQUIRED) 11 | 12 | # message("find opencv version : "${OpenCV_VERSION}) 13 | 14 | # add_subdirectory(../rknn_api_sdk) 15 | 16 | include_directories( 17 | include/ 18 | ${Rknn_Net_INCLUDE_DIRS} 19 | ${OpenCV_INCLUDE_DIRS} 20 | ) 21 | 22 | add_library( 23 | ${PROJECT_NAME} 24 | ${SRCS} 25 | ) 26 | 27 | target_link_libraries( 28 | ${PROJECT_NAME} 29 | ${Rknn_Net_LIBS} 30 | ${OpenCV_LIBS} 31 | ) 32 | 33 | 34 | set(NanoDet_INCLUDE_DIRS 35 | ${CMAKE_CURRENT_SOURCE_DIR}/include/ 36 | # ${Rknn_Net_HEADERS} 37 | PARENT_SCOPE 38 | ) 39 | 40 | set(NanoDet_LIBS 41 | ${PROJECT_NAME} 42 | # ${CMAKE_CURRENT_SOURCE_DIR}/rknn_api/${ARCH_DIR}/lib64/librknn_api.so 43 | PARENT_SCOPE 44 | ) 45 | 46 | -------------------------------------------------------------------------------- /rknn_nanodet/include/NanoDet.hpp: -------------------------------------------------------------------------------- 1 | #ifndef NANODET_H 2 | #define NANODET_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "Net.hpp" 9 | #include "rknn_api.h" 10 | #pragma once 11 | 12 | typedef struct BoxInfo 13 | { 14 | float x1; 15 | float y1; 16 | float x2; 17 | float y2; 18 | float score; 19 | int label; 20 | } BoxInfo; 21 | 22 | class NanoDet : public RKNN_NET::Net 23 | { 24 | public: 25 | NanoDet(const std::string _modelpath); 26 | ~NanoDet(); 27 | std::vector> detect(const cv::Mat _img); 28 | void Input_Output_Configuration(); 29 | void decode_infer(float *cls_pred, float *&dis_pred, int stride, float threshold, std::vector> &results); 30 | 31 | BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride); 32 | 33 | void nms(std::vector &input_boxes, float NMS_THRESH); 34 | 35 | std::vector labels_{ 36 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", 37 | "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", 38 | "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", 39 | "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", 40 | "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", 41 | "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", 42 | "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", 43 | "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", 44 | "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", 45 | "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", 46 | "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", 47 | "teddy bear", "hair drier", "toothbrush"}; 48 | 49 | private: 50 | std::vector stride_ = {8, 16, 32}; 51 | const int reg_max_ = 7; 52 | const int intput_w = 320; 53 | const int intput_h = 320; 54 | const float score_threshold_ = 0.35; 55 | const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; 56 | const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; 57 | }; 58 | #endif -------------------------------------------------------------------------------- /rknn_nanodet/src/NanoDet.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoDet.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "Net.hpp" 9 | using namespace std; 10 | 11 | inline float fast_exp(float x) 12 | { 13 | union 14 | { 15 | uint32_t i; 16 | float f; 17 | } v{}; 18 | v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); 19 | return v.f; 20 | } 21 | 22 | inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); } 23 | 24 | template 25 | int activation_function_softmax(const _Tp *src, _Tp *dst, int length) 26 | { 27 | const _Tp alpha = *std::max_element(src, src + length); 28 | _Tp denominator{0}; 29 | 30 | for (int i = 0; i < length; ++i) 31 | { 32 | dst[i] = fast_exp(src[i] - alpha); 33 | denominator += dst[i]; 34 | } 35 | 36 | for (int i = 0; i < length; ++i) 37 | { 38 | dst[i] /= denominator; 39 | } 40 | 41 | return 0; 42 | } 43 | 44 | NanoDet::~NanoDet() {} 45 | 46 | NanoDet::NanoDet(const string _modelpath) : RKNN_NET::Net(_modelpath) 47 | { 48 | // do s 49 | Input_Output_Configuration(); 50 | } 51 | 52 | std::vector> NanoDet::detect(const cv::Mat _img) 53 | { 54 | cv::Mat resizeed_img; 55 | float fx = 1, fy = 1; 56 | if (_img.rows != intput_w || _img.cols != intput_h) 57 | { 58 | cv::resize(_img, resizeed_img, cv::Size(intput_w, intput_h)); 59 | fx = (_img.cols * 1.0) / intput_w; 60 | fy = (_img.rows * 1.0) / intput_h; 61 | } 62 | else 63 | { 64 | resizeed_img = _img; 65 | } 66 | RKNN_NET::Net::Forward({resizeed_img.data}); 67 | std::vector> res; 68 | res.resize(labels_.size()); 69 | for (int i = 0; i < stride_.size(); ++i) 70 | { 71 | const int idx_class = i; 72 | const int idx_bbx = i + 3; 73 | 74 | float *bbx_pred = (float *)outputs_[idx_bbx].buf; 75 | float *class_pred = (float *)outputs_[idx_class].buf; 76 | // get output 77 | this->decode_infer(class_pred, bbx_pred, stride_[i], score_threshold_, res); 78 | } 79 | 80 | for (int i = 0; i < labels_.size(); ++i) 81 | { 82 | nms(res[i], 0.5); 83 | for (auto &_bbx : res[i]) 84 | { 85 | _bbx.x1 *= fx; 86 | _bbx.x2 *= fx; 87 | _bbx.y1 *= fy; 88 | _bbx.y2 *= fy; 89 | } 90 | } 91 | return res; 92 | } 93 | 94 | void NanoDet::nms(std::vector &input_boxes, float NMS_THRESH) 95 | { 96 | std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) 97 | { return a.score > b.score; }); 98 | std::vector vArea(input_boxes.size()); 99 | for (int i = 0; i < int(input_boxes.size()); ++i) 100 | { 101 | vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); 102 | } 103 | for (int i = 0; i < int(input_boxes.size()); ++i) 104 | { 105 | for (int j = i + 1; j < int(input_boxes.size());) 106 | { 107 | float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); 108 | float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); 109 | float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); 110 | float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); 111 | float w = (std::max)(float(0), xx2 - xx1 + 1); 112 | float h = (std::max)(float(0), yy2 - yy1 + 1); 113 | float inter = w * h; 114 | float ovr = inter / (vArea[i] + vArea[j] - inter); 115 | if (ovr >= NMS_THRESH) 116 | { 117 | input_boxes.erase(input_boxes.begin() + j); 118 | vArea.erase(vArea.begin() + j); 119 | } 120 | else 121 | { 122 | j++; 123 | } 124 | } 125 | } 126 | } 127 | 128 | void NanoDet::Input_Output_Configuration() 129 | { 130 | // 配置 input 与 output 数组 131 | inputs_ = new rknn_input[n_inputs_]; 132 | outputs_ = new rknn_output[n_outputs_]; 133 | 134 | for (int i = 0; i < n_inputs_; ++i) 135 | { 136 | inputs_[i].index = i; 137 | inputs_[i].buf = NULL; 138 | inputs_[i].size = inputs_attr[i].n_elems * sizeof(char); 139 | inputs_[i].pass_through = false; 140 | inputs_[i].type = RKNN_TENSOR_UINT8; 141 | inputs_[i].fmt = RKNN_TENSOR_NHWC; 142 | } 143 | 144 | outputs_buffer = new float *[n_outputs_]; 145 | 146 | for (int i = 0; i < n_outputs_; ++i) 147 | { 148 | outputs_[i].want_float = true; 149 | outputs_[i].is_prealloc = true; 150 | outputs_[i].index = i; 151 | outputs_[i].size = outputs_attr[i].n_elems * sizeof(float); 152 | outputs_buffer[i] = new float[outputs_[i].size]; 153 | outputs_[i].buf = (void *)outputs_buffer[i]; 154 | memset(outputs_[i].buf, 0, sizeof(outputs_[i].buf)); 155 | } 156 | } 157 | 158 | void NanoDet::decode_infer(float *cls_pred, float *&dis_pred, int stride, float threshold, 159 | std::vector> &results) 160 | { 161 | int feature_h = 320 / stride; 162 | int feature_w = 320 / stride; 163 | // cv::Mat debug_heatmap = cv::Mat::zeros(feature_h, feature_w, CV_8UC3); 164 | for (int idx = 0; idx < feature_h * feature_w; idx++) 165 | { 166 | int row = idx / feature_w; 167 | int col = idx % feature_w; 168 | 169 | float score = 0; 170 | int cur_label = 0; 171 | 172 | // 找一个最大的score 173 | for (int label = 0, num_class_ = labels_.size(); label < num_class_; label++) 174 | { 175 | if (cls_pred[idx * num_class_ + label] > score) 176 | { 177 | score = cls_pred[idx * num_class_ + label]; 178 | cur_label = label; 179 | } 180 | } 181 | if (score > threshold) 182 | { 183 | if (cur_label == 0 || 1) 184 | { 185 | // std::cout << row << "," << col << " label:" << cur_label << " score:" << score << std::endl; 186 | const float *bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4; 187 | results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); 188 | } 189 | 190 | // debug_heatmap.at(row, col)[0] = 255; 191 | } 192 | } 193 | // cv::imshow("debug", debug_heatmap); 194 | // cv::waitKey(0); 195 | } 196 | 197 | BoxInfo NanoDet::disPred2Bbox(const float *&bbox_pred, int label, float score, int x, int y, int stride) 198 | { 199 | float ct_x = (x + 0.5) * stride; 200 | float ct_y = (y + 0.5) * stride; 201 | std::vector dis_pred; 202 | dis_pred.resize(4); 203 | for (int i = 0; i < 4; i++) 204 | { 205 | float dis = 0; 206 | float *dis_after_sm = new float[reg_max_ + 1]; 207 | activation_function_softmax(bbox_pred + i * (reg_max_ + 1), dis_after_sm, reg_max_ + 1); 208 | for (int j = 0; j < reg_max_ + 1; j++) 209 | { 210 | dis += j * dis_after_sm[j]; 211 | } 212 | dis *= stride; 213 | // std::cout << "dis:" << dis << std::endl; 214 | dis_pred[i] = dis; 215 | delete[] dis_after_sm; 216 | } 217 | float xmin = (std::max)(ct_x - dis_pred[0], .0f); 218 | float ymin = (std::max)(ct_y - dis_pred[1], .0f); 219 | float xmax = (std::min)(ct_x + dis_pred[2], float(intput_w)); 220 | float ymax = (std::min)(ct_y + dis_pred[3], float(intput_h)); 221 | 222 | // std::cout << xmin << "," << ymin << "," << xmax << "," << xmax << "," << std::endl; 223 | return BoxInfo{xmin, ymin, xmax, ymax, score, label}; 224 | } 225 | -------------------------------------------------------------------------------- /rknn_nanodet/test/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "NanoDet.hpp" 15 | #include "testtime.hpp" 16 | using namespace std; 17 | 18 | int main(int argc, const char** argv) { 19 | const string model_path = "/home/toybrick/osnet_pedestrain_detection/models/nanodet.rknn"; 20 | const string seq_path = "/home/toybrick/VisDrone/SOT/VisDrone2018-SOT-test-dev/sequences/uav0000021_00000_s"; 21 | 22 | vector all_imgs; 23 | cv::glob(seq_path + "/*.jpg", all_imgs); 24 | // create net 25 | 26 | vector> trackers; 27 | while (1) { 28 | trackers.clear(); 29 | cv::Mat img = cv::imread(all_imgs[0]); 30 | vector> newtrackers; 31 | for (int i = 0; i < 1000; ++i) { 32 | cv::TrackerKCF::Params paras; 33 | paras.detect_thresh = 0.35; 34 | cv::Ptr pTck = cv::TrackerKCF::create(paras); 35 | pTck->init(img, cv::Rect(30, 30, 50, 60)); 36 | newtrackers.push_back(pTck); 37 | } 38 | usleep(1000000); 39 | trackers = newtrackers; 40 | } 41 | 42 | NanoDet net(model_path, ""); 43 | 44 | int nframe = all_imgs.size(); 45 | TestTimeVar; 46 | for (int i = 0; i < all_imgs.size(); ++i) { 47 | cv::Mat img = cv::imread(all_imgs[i]); 48 | 49 | int imgw = img.cols, imgh = img.rows; 50 | cout << imgw << " " << imgh << endl; 51 | std::vector allbbx; 52 | TestTimeTic; 53 | for (int i = 0; i < imgw / 320 - 1; i++) { 54 | for (int j = 0; j < imgh / 320 - 1; j++) { 55 | cv::Mat patch = img(cv::Rect(cv::Point(i * 320, j * 320), cv::Point((i + 1) * 320, (j + 1) * 320))); 56 | std::vector bbx = net.detect(patch); 57 | cv::imshow("patch", patch); 58 | cout << bbx.size() << endl; 59 | for (auto _bbx : bbx) { 60 | _bbx += cv::Point2i(i * 320, j * 320); 61 | allbbx.push_back(_bbx); 62 | } 63 | cv::waitKey(10); 64 | } 65 | } 66 | 67 | TestTimeTocFPS("run : "); 68 | cout << allbbx.size() << endl; 69 | for (auto _bbx : allbbx) { 70 | cv::rectangle(img, _bbx, {0, 0, 225}); 71 | // cout << _bbx << endl; 72 | } 73 | // cv::imwrite("a.jpeg", img); 74 | cv::imshow("dsaf", img); 75 | cv::waitKey(10); 76 | } 77 | return 0; 78 | } 79 | --------------------------------------------------------------------------------