├── .gitignore ├── CMakeLists.txt ├── CMakePresets.json ├── LICENSE ├── README.md ├── bus.jpg ├── img ├── result.jpg └── speed.jpg ├── include ├── common.h ├── rga_utils.h ├── rknn.h └── rknn_model.h ├── install_rknpu.sh ├── main.cpp ├── report.md ├── rknn.cpp ├── runtime └── Linux │ ├── librknn_api │ └── include │ │ ├── rknn_api.h │ │ ├── rknn_custom_op.h │ │ └── rknn_matmul_api.h │ └── rknn_server │ └── aarch64 │ └── usr │ └── bin │ ├── restart_rknn.sh │ ├── rknn_server │ └── start_rknn.sh ├── src ├── common.cpp ├── rga_utils.cpp └── rknn_model.cpp └── yolo11s.rknn /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt: rknn 的 CMake 项目,在此处包括源代码并定义 2 | # 项目特定的逻辑。 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | 6 | # 如果支持,请为 MSVC 编译器启用热重载。 7 | if (POLICY CMP0141) 8 | cmake_policy(SET CMP0141 NEW) 9 | set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "$,$>,$<$:EditAndContinue>,$<$:ProgramDatabase>>") 10 | endif() 11 | 12 | project ("rknn") 13 | 14 | find_package(OpenCV REQUIRED) 15 | find_package(PkgConfig REQUIRED) 16 | find_library(RGA_LIBRARY NAMES rga) 17 | find_package(OpenMP REQUIRED) 18 | 19 | find_package(TBB REQUIRED) # 添加对 libtbb 库的依赖 20 | # 将源代码添加到此项目的可执行文件。 21 | 22 | file(GLOB SOURCES "src/*.cpp" "rknn.cpp") 23 | 24 | add_executable (rknn ${SOURCES}) 25 | 26 | if (CMAKE_VERSION VERSION_GREATER 3.12) 27 | set_property(TARGET rknn PROPERTY CXX_STANDARD 20) 28 | endif() 29 | 30 | # 添加 /usr/include 到包含路径 31 | include_directories( 32 | /usr/include 33 | ${OpenCV_INCLUDE_DIRS} 34 | ${CMAKE_SOURCE_DIR}/include 35 | /usr/include/rga 36 | ) 37 | 38 | # 设置编译选项,启用 OpenMP 39 | if(OpenMP_CXX_FOUND) 40 | message(STATUS "OpenMP found, enabling OpenMP support.") 41 | target_link_libraries(rknn OpenMP::OpenMP_CXX) 42 | else() 43 | message(WARNING "OpenMP not found, compiling without OpenMP.") 44 | endif() 45 | 46 | # 链接 /usr/lib 中的库 47 | # 查找库文件 48 | find_library(RKNN_API_LIBRARY NAMES rknnrt PATHS /usr/lib) 49 | 50 | # 检查是否找到库文件 51 | if(RKNN_API_LIBRARY) 52 | target_link_libraries(rknn ${RKNN_API_LIBRARY}) 53 | else() 54 | message(FATAL_ERROR "Could not find librknnrt.so") 55 | endif() 56 | 57 | # TODO: 如有需要,请添加测试并安装目标。 58 | # 链接库 59 | target_link_libraries(rknn 60 | ${OpenCV_LIBS} 61 | TBB::tbb 62 | ${RGA_LIBRARY} 63 | ) 64 | # sudo apt-get install libtbb-dev 65 | 66 | # 设置 C++ 编译选项以启用 O3 优化 67 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") -------------------------------------------------------------------------------- /CMakePresets.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 3, 3 | "configurePresets": [ 4 | { 5 | "name": "windows-base", 6 | "hidden": true, 7 | "generator": "Ninja", 8 | "binaryDir": "${sourceDir}/out/build/${presetName}", 9 | "installDir": "${sourceDir}/out/install/${presetName}", 10 | "cacheVariables": { 11 | "CMAKE_C_COMPILER": "cl.exe", 12 | "CMAKE_CXX_COMPILER": "cl.exe" 13 | }, 14 | "condition": { 15 | "type": "equals", 16 | "lhs": "${hostSystemName}", 17 | "rhs": "Windows" 18 | } 19 | }, 20 | { 21 | "name": "x64-debug", 22 | "displayName": "x64 Debug", 23 | "inherits": "windows-base", 24 | "architecture": { 25 | "value": "x64", 26 | "strategy": "external" 27 | }, 28 | "cacheVariables": { 29 | "CMAKE_BUILD_TYPE": "Debug" 30 | } 31 | }, 32 | { 33 | "name": "x64-release", 34 | "displayName": "x64 Release", 35 | "inherits": "x64-debug", 36 | "cacheVariables": { 37 | "CMAKE_BUILD_TYPE": "Release" 38 | } 39 | }, 40 | { 41 | "name": "x86-debug", 42 | "displayName": "x86 Debug", 43 | "inherits": "windows-base", 44 | "architecture": { 45 | "value": "x86", 46 | "strategy": "external" 47 | }, 48 | "cacheVariables": { 49 | "CMAKE_BUILD_TYPE": "Debug" 50 | } 51 | }, 52 | { 53 | "name": "x86-release", 54 | "displayName": "x86 Release", 55 | "inherits": "x86-debug", 56 | "cacheVariables": { 57 | "CMAKE_BUILD_TYPE": "Release" 58 | } 59 | }, 60 | { 61 | "name": "linux-debug", 62 | "displayName": "Linux Debug", 63 | "generator": "Ninja", 64 | "binaryDir": "${sourceDir}/out/build/${presetName}", 65 | "installDir": "${sourceDir}/out/install/${presetName}", 66 | "cacheVariables": { 67 | "CMAKE_BUILD_TYPE": "Debug" 68 | }, 69 | "condition": { 70 | "type": "equals", 71 | "lhs": "${hostSystemName}", 72 | "rhs": "Linux" 73 | }, 74 | "vendor": { 75 | "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": { 76 | "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}" 77 | } 78 | } 79 | }, 80 | { 81 | "name": "orangepi", 82 | "displayName": "orangepi", 83 | "generator": "Ninja", 84 | "binaryDir": "${sourceDir}/out/build/${presetName}", 85 | "installDir": "${sourceDir}/out/install/${presetName}", 86 | "cacheVariables": { 87 | "CMAKE_BUILD_TYPE": "Debug", 88 | "CMAKE_CXX_COMPILER": "/usr/bin/g++", 89 | "CMAKE_C_COMPILER": "/usr/bin/gcc", 90 | //"CMAKE_PREFIX_PATH": "/usr/local", 91 | "CMAKE_INCLUDE_PATH": "/usr/include", 92 | "CMAKE_LIBRARY_PATH": "/usr/lib" 93 | }, 94 | "condition": { 95 | "type": "equals", 96 | "lhs": "${hostSystemName}", 97 | "rhs": "Linux" 98 | }, 99 | "vendor": { 100 | "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": { 101 | "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}" 102 | } 103 | } 104 | }, 105 | { 106 | "name": "macos-debug", 107 | "displayName": "macOS Debug", 108 | "generator": "Ninja", 109 | "binaryDir": "${sourceDir}/out/build/${presetName}", 110 | "installDir": "${sourceDir}/out/install/${presetName}", 111 | "cacheVariables": { 112 | "CMAKE_BUILD_TYPE": "Debug" 113 | }, 114 | "condition": { 115 | "type": "equals", 116 | "lhs": "${hostSystemName}", 117 | "rhs": "Darwin" 118 | }, 119 | "vendor": { 120 | "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": { 121 | "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}" 122 | } 123 | } 124 | } 125 | ] 126 | } 127 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rknn-cpp-yolo 2 | This project implements YOLOv11 inference on the RK3588 platform using the RKNN framework. With deep optimization of the official code and RGA hardware acceleration for image preprocessing, it achieves a stable 25 FPS for YOLOv11s without overclocking and core binding, showcasing efficient real-time object detection for embedded applications. 3 | 4 | 本项目基于RKNN框架,在RK3588平台上实现了YOLOv11推理。通过对官方代码的深度优化和RGA硬件加速预处理,YOLOv11s在未超频和绑定大核的情况下,稳定达到25帧/秒,为嵌入式实时目标检测提供了高效解决方案。 5 | **** 6 | 7 | # YOLOv11 on RK3588 with RKNN 8 | 9 | ## Features 10 | - **YOLOv11 Inference**: Optimized implementation for RK3588. 11 | - **RGA Preprocessing**: Utilizes hardware acceleration for image processing. 12 | - **CMake Build System**: Easy to configure and build. 13 | - **High Performance**: Achieves 25 FPS without overclocking or core binding. 14 | - **Zero-Copy API**: Reduces inference overhead for better efficiency. 15 | - **RK3588 Optimization**: Supports concurrent inference across three NPU cores (requires custom thread pool implementation). 16 | 17 | ## Report: Inference Results and Speed 18 | 19 | ![Result](https://github.com/yuunnn-w/rknn-cpp-yolo/blob/main/img/result.jpg) 20 | 21 | ![Speed](https://github.com/yuunnn-w/rknn-cpp-yolo/blob/main/img/speed.jpg) 22 | 23 | ## Prerequisites 24 | - RK3588 development board 25 | - RKNPU Driver (version >= 0.9.6) 26 | - RKNN SDK 27 | - CMake (version 3.10 or higher) 28 | - OpenCV (for image handling, optional) 29 | 30 | ## Build Instructions 31 | 32 | 1. **Clone the repository:** 33 | ```bash 34 | git clone https://github.com/yuunnn-w/rknn-cpp-yolo.git 35 | cd rknn-cpp-yolo 36 | ``` 37 | 38 | 2. **Install dependencies:** 39 | ```bash 40 | sudo apt-get update 41 | sudo apt-get install -y build-essential gcc g++ gdb cmake ninja-build git libopencv-dev zlib1g-dev librga-dev ninja-build libomp-dev 42 | ``` 43 | 44 | 3. **Install RKNN SDK:** 45 | ```bash 46 | sudo bash install_rknpu.sh 47 | ``` 48 | 49 | 4. **Create a build directory:** 50 | ```bash 51 | mkdir build 52 | cd build 53 | ``` 54 | 55 | 5. **Configure the project with CMake:** 56 | ```bash 57 | cmake .. 58 | ``` 59 | 60 | 6. **Build the project:** 61 | ```bash 62 | make 63 | ``` 64 | 65 | 7. **Run the inference:** 66 | ```bash 67 | ./yolov11_rk3588 68 | ``` 69 | 70 | ## Usage 71 | After building the project, you can run the inference by executing the generated binary. Ensure that the RKNN model and test images are correctly placed in the specified paths. 72 | 73 | ## Optimization 74 | The project includes several optimizations to achieve high performance on the RK3588 platform: 75 | - Efficient use of RGA for image preprocessing. 76 | - Memory and computation optimizations in the inference pipeline. 77 | - Zero-Copy API to minimize memory overhead. 78 | - Support for concurrent NPU core utilization (requires thread pool implementation). 79 | 80 | ## Attention 81 | 82 | Please note that this project only provides an example of image-based inference in the `rknn.cpp` file. If you need to perform real-time inference in more complex application scenarios, you will need to implement it yourself. 83 | 84 | Additionally, this project is purely an experimental demo and is not responsible for any products or issues. The final interpretation right belongs to **yuunnn_w**. 85 | 86 | ## License 87 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. 88 | 89 | ## Acknowledgments 90 | - Thanks to the RKNN team for their framework and support. 91 | - Inspired by the official YOLOv11 implementation. 92 | 93 | For any questions or contributions, feel free to open an issue or submit a pull request. 94 | 95 | ## Contact Me 96 | 97 | If you have any questions, suggestions, or would like to contribute to this project, feel free to reach out! You can contact me through the following channels: 98 | 99 | - **Email**: [jiaxinsugar@gmail.com](mailto:jiaxinsugar@gmail.com) 100 | - **GitHub**: [yuunnn-w](https://github.com/yuunnn-w) 101 | 102 | I’m always open to discussions, collaborations, and feedback. Let’s make this project even better together! 🚀 103 | -------------------------------------------------------------------------------- /bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/bus.jpg -------------------------------------------------------------------------------- /img/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/img/result.jpg -------------------------------------------------------------------------------- /img/speed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/img/speed.jpg -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/include/common.h -------------------------------------------------------------------------------- /include/rga_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/include/rga_utils.h -------------------------------------------------------------------------------- /include/rknn.h: -------------------------------------------------------------------------------- 1 |  2 | /* 3 | #include 4 | #include 5 | #include 6 | #include // 用于计时 7 | #include // 用于 setprecision 8 | #include "rknn_model.h" 9 | 10 | int main() { 11 | // 初始化模型 12 | std::string model_path = "/root/.vs/rknn/yolov9c.rknn"; 13 | rknn_model model(model_path); 14 | 15 | // 查询模型信息 16 | model.query_model_info(); 17 | 18 | // 打印量化和反量化信息 19 | model.print_quantization_info(); 20 | 21 | // 获取输入和输出属性 22 | const std::vector& input_attrs = model.get_input_attrs(); 23 | const std::vector& output_attrs = model.get_output_attrs(); 24 | 25 | 26 | 27 | // 打印输入张量形状 28 | std::cout << "Input Tensor Shapes:" << std::endl; 29 | for (const auto& attr : input_attrs) { 30 | std::cout << " Name: " << attr.name << ", Shape: "; 31 | for (uint32_t j = 0; j < attr.n_dims; ++j) { 32 | std::cout << attr.dims[j] << " "; 33 | } 34 | std::cout << std::endl; 35 | } 36 | // 设置模型输入参数 37 | std::vector inputs(input_attrs.size()); 38 | for (size_t i = 0; i < input_attrs.size(); ++i) { 39 | inputs[i].index = i; 40 | inputs[i].buf = new float[input_attrs[i].n_elems]; // 分配内存 41 | std::memset(inputs[i].buf, 0, input_attrs[i].n_elems * sizeof(float)); // 初始化为0 42 | inputs[i].size = input_attrs[i].n_elems; // * sizeof(float) 43 | inputs[i].pass_through = 0; // 用于指定输入数据是否直接传递给模型的输入节点 44 | inputs[i].type = RKNN_TENSOR_INT8;//input_attrs[i].type; // 输入数据类型 RKNN_TENSOR_INT8 45 | inputs[i].fmt = RKNN_TENSOR_NHWC;//input_attrs[i].fmt; // 输入数据格式 46 | // fmt: rknn_tensor_format类型,常见的有RKNN_TENSOR_NCHW、RKNN_TENSOR_NHWC、RKNN_TENSOR_NCHW_VEC、RKNN_TENSOR_UNDEFINED 47 | std::cout << "inputs[i].size: " << input_attrs[i].n_elems << std::endl; 48 | // 打印设置的输入参数 49 | std::cout << "Setting input parameter for tensor " << i << ":" << std::endl; 50 | std::cout << " Index: " << inputs[i].index << std::endl; 51 | std::cout << " Buffer size: " << inputs[i].size << " bytes" << std::endl; 52 | std::cout << " Pass through: " << static_cast(inputs[i].pass_through) << std::endl; 53 | std::cout << " Data type: " << inputs[i].type << std::endl; 54 | std::cout << " Data format: " << inputs[i].fmt << std::endl; //RKNN_TENSOR_NHWC 55 | } 56 | model.set_input(inputs); 57 | 58 | // 运行模型推理100次并计时 59 | const int num_runs = 20; 60 | std::chrono::duration total_time(0); 61 | 62 | for (int i = 0; i < num_runs; ++i) { 63 | auto start_time = std::chrono::high_resolution_clock::now(); 64 | model.run(); 65 | auto end_time = std::chrono::high_resolution_clock::now(); 66 | total_time += end_time - start_time; 67 | } 68 | 69 | // 计算平均推理时间 70 | double average_time = total_time.count() / num_runs; 71 | std::cout << "Average inference time over " << num_runs << " runs: " << std::fixed << std::setprecision(10) << average_time << " ms" << std::endl; 72 | 73 | // 查询模型推理的逐层耗时 74 | rknn_perf_detail perf_detail; 75 | int ret = rknn_query(model.get_context(), RKNN_QUERY_PERF_DETAIL, &perf_detail, sizeof(perf_detail)); 76 | if (ret == RKNN_SUCC) { 77 | std::cout << "Model inference layer-wise performance details (in microseconds, 2 decimal places):" << std::endl; 78 | std::cout << perf_detail.perf_data << std::endl; 79 | } 80 | else { 81 | std::cerr << "Failed to query performance details." << std::endl; 82 | } 83 | 84 | // 查询模型推理的总耗时 85 | rknn_perf_run perf_run; 86 | ret = rknn_query(model.get_context(), RKNN_QUERY_PERF_RUN, &perf_run, sizeof(perf_run)); 87 | if (ret == RKNN_SUCC) { 88 | std::cout << "Total inference time (in milliseconds, 4 decimal places): " << std::fixed << std::setprecision(4) << static_cast(perf_run.run_duration) / 1000.0 << " ms" << std::endl; 89 | } 90 | else { 91 | std::cerr << "Failed to query total inference time." << std::endl; 92 | } 93 | // 获取模型输出 94 | std::vector outputs(output_attrs.size()); 95 | for (size_t i = 0; i < output_attrs.size(); ++i) { 96 | outputs[i].index = i; 97 | outputs[i].is_prealloc = 0; 98 | outputs[i].want_float = 1; 99 | } 100 | model.get_output(outputs); 101 | 102 | // 打印输出张量形状 103 | std::cout << "Output Tensor Shapes:" << std::endl; 104 | for (const auto& attr : output_attrs) { 105 | std::cout << " Name: " << attr.name << ", Shape: "; 106 | for (uint32_t j = 0; j < attr.n_dims; ++j) { 107 | std::cout << attr.dims[j] << " "; 108 | } 109 | std::cout << std::endl; 110 | } 111 | 112 | // 释放输出资源 113 | model.release_output(outputs); 114 | 115 | // 释放输入数据内存 116 | for (auto& input : inputs) { 117 | delete[] static_cast(input.buf); 118 | } 119 | return 0; 120 | } 121 | */ 122 | 123 | #include 124 | #include 125 | #include 126 | #include // 用于计时 127 | #include // 用于 setprecision 128 | #include // 包含 OpenCV 头文件 129 | #include "rknn_model.h" 130 | 131 | int main() { 132 | // 初始化模型 133 | std::string model_path = "/root/.vs/rknn/yolov11s.rknn"; // yolov9c.rknn 134 | rknn_model model(model_path); 135 | 136 | // 查询模型信息 137 | int ctx_index = 0; // 假设使用第一个上下文 138 | // 打印量化和反量化信息 139 | // model.print_quantization_info(); 140 | 141 | // 生成填充为0的640x640x3的图像 142 | cv::Mat input_image = cv::Mat::ones(640, 640, CV_8UC3); 143 | //printf("Start inference..."); 144 | // 预热推理5次 145 | for (int i = 0; i < 5; ++i) { 146 | model.run_inference(input_image, ctx_index); 147 | } 148 | 149 | // 进行10次推理并统计时间 150 | std::vector inference_times; 151 | for (int i = 0; i < 1; ++i) { 152 | auto start = std::chrono::high_resolution_clock::now(); 153 | model.run_inference(input_image, ctx_index); 154 | auto end = std::chrono::high_resolution_clock::now(); 155 | std::chrono::duration elapsed = end - start; 156 | inference_times.push_back(elapsed.count()); 157 | } 158 | 159 | // 计算平均推理时间 160 | double total_time = 0.0; 161 | for (double time : inference_times) { 162 | total_time += time; 163 | } 164 | double average_time = total_time / inference_times.size(); 165 | 166 | // 输出推理时间 167 | std::cout << "Function call time: " << std::fixed << std::setprecision(4) << average_time << " ms" << std::endl; 168 | 169 | 170 | 171 | // 打印输出向量的形状和大小 172 | //std::cout << "Output vector shape: [" << output.size() << "]" << std::endl; 173 | 174 | // 打印平均推理时间,精确到毫秒,保留四位小数 175 | //std::cout << "Average inference time: " << std::fixed << std::setprecision(4) << average_time << " ms" << std::endl; 176 | 177 | 178 | 179 | 180 | 181 | // 打印输出向量的形状和大小 182 | //std::cout << "Output vector shape: [" << output.size() << "]" << std::endl; 183 | 184 | // 重塑输出数据为 [8400, 84] 185 | //std::vector> reshaped_output(8400, std::vector(84)); 186 | //std::memcpy(reshaped_output.data(), output.data(), 8400 * 84 * sizeof(float)); 187 | 188 | // 查询模型推理的逐层耗时 189 | /* 190 | rknn_perf_detail perf_detail; 191 | int ret = rknn_query(model.get_context(ctx_index), RKNN_QUERY_PERF_DETAIL, &perf_detail, sizeof(perf_detail)); 192 | if (ret == RKNN_SUCC) { 193 | std::cout << "Model inference layer-wise performance details (in microseconds, 2 decimal places):" << std::endl; 194 | std::cout << perf_detail.perf_data << std::endl; 195 | } 196 | else { 197 | std::cerr << "Failed to query performance details." << std::endl; 198 | } 199 | */ 200 | // 查询模型推理的总耗时 201 | 202 | 203 | rknn_perf_run perf_run; 204 | int ret = rknn_query(model.get_context(ctx_index), RKNN_QUERY_PERF_RUN, &perf_run, sizeof(perf_run)); 205 | if (ret == RKNN_SUCC) { 206 | std::cout << "Real inference time: " << std::fixed << std::setprecision(4) << static_cast(perf_run.run_duration) / 1000.0 << " ms" << std::endl; 207 | } 208 | else { 209 | std::cerr << "Failed to query total inference time." << std::endl; 210 | } 211 | 212 | 213 | return 0; 214 | } -------------------------------------------------------------------------------- /include/rknn_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/include/rknn_model.h -------------------------------------------------------------------------------- /install_rknpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 定义源目录和目标目录 4 | SOURCE_DIR_BIN="./runtime/Linux/rknn_server/aarch64/usr/bin" 5 | DEST_DIR_BIN="/usr/bin" 6 | 7 | SOURCE_DIR_INCLUDE="./runtime/Linux/librknn_api/include" 8 | DEST_DIR_INCLUDE="/usr/include" 9 | 10 | SOURCE_DIR_LIB="./runtime/Linux/librknn_api/aarch64" 11 | DEST_DIR_LIB="/usr/lib" 12 | 13 | # 检查源目录是否存在 14 | check_source_dir() { 15 | if [ ! -d "$1" ]; then 16 | echo "源目录 $1 不存在。" 17 | exit 1 18 | fi 19 | } 20 | 21 | # 检查目标目录是否存在 22 | check_dest_dir() { 23 | if [ ! -d "$1" ]; then 24 | echo "目标目录 $1 不存在。" 25 | exit 1 26 | fi 27 | } 28 | 29 | # 复制文件 30 | copy_files() { 31 | local source_dir=$1 32 | local dest_dir=$2 33 | cp -r "$source_dir"/* "$dest_dir" 34 | if [ $? -eq 0 ]; then 35 | echo "文件从 $source_dir 复制到 $dest_dir 成功。" 36 | else 37 | echo "文件从 $source_dir 复制到 $dest_dir 失败。" 38 | exit 1 39 | fi 40 | } 41 | 42 | # 检查并复制 rknn_server 文件 43 | check_source_dir "$SOURCE_DIR_BIN" 44 | check_dest_dir "$DEST_DIR_BIN" 45 | copy_files "$SOURCE_DIR_BIN" "$DEST_DIR_BIN" 46 | 47 | # 检查并复制 include 文件 48 | check_source_dir "$SOURCE_DIR_INCLUDE" 49 | check_dest_dir "$DEST_DIR_INCLUDE" 50 | copy_files "$SOURCE_DIR_INCLUDE" "$DEST_DIR_INCLUDE" 51 | 52 | # 检查并复制 lib 文件 53 | check_source_dir "$SOURCE_DIR_LIB" 54 | check_dest_dir "$DEST_DIR_LIB" 55 | copy_files "$SOURCE_DIR_LIB" "$DEST_DIR_LIB" 56 | 57 | echo "所有文件复制成功。" -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/main.cpp -------------------------------------------------------------------------------- /report.md: -------------------------------------------------------------------------------- 1 | ### Operator Time Consuming Ranking Table 2 | 3 | | OpType | CallNumber | CPUTime(us) | GPUTime(us) | NPUTime(us) | TotalTime(us) | TimeRatio(%) | 4 | |--------------------|------------|-------------|-------------|-------------|---------------|--------------| 5 | | ConvExSwish | 138 | 0 | 0 | 80201 | 80201 | 76.94% | 6 | | Concat | 38 | 0 | 0 | 5868 | 5868 | 5.63% | 7 | | Split | 15 | 0 | 0 | 5270 | 5270 | 5.06% | 8 | | AveragePool | 5 | 0 | 0 | 4203 | 4203 | 4.03% | 9 | | exSoftmax13 | 1 | 0 | 0 | 2169 | 2169 | 2.08% | 10 | | MaxPool | 8 | 0 | 0 | 2159 | 2159 | 2.07% | 11 | | Add | 18 | 0 | 0 | 1237 | 1237 | 1.19% | 12 | | Conv | 7 | 0 | 0 | 771 | 771 | 0.74% | 13 | | Resize | 2 | 0 | 0 | 663 | 663 | 0.64% | 14 | | Reshape | 5 | 568 | 0 | 6 | 574 | 0.55% | 15 | | Transpose | 2 | 0 | 0 | 486 | 486 | 0.47% | 16 | | Sigmoid | 1 | 0 | 0 | 354 | 354 | 0.34% | 17 | | Mul | 2 | 0 | 0 | 148 | 148 | 0.14% | 18 | | Sub | 2 | 0 | 0 | 99 | 99 | 0.09% | 19 | | OutputOperator | 1 | 30 | 0 | 0 | 30 | 0.03% | 20 | | InputOperator | 1 | 4 | 0 | 0 | 4 | 0.00% | 21 | 22 | ### Total Inference Time 23 | - **Total Inference Time (in milliseconds, 4 decimal places):** 113.4130 ms 24 | 25 | ### Summary and Analysis 26 | 27 | 1. **Dominant Operators:** 28 | - The operator `ConvExSwish` is the most time-consuming, accounting for **76.94%** of the total inference time. This indicates that the model spends the majority of its time in convolutional operations followed by a swish activation function. 29 | - The `Concat` and `Split` operators also consume a significant portion of the inference time, with **5.63%** and **5.06%** respectively. 30 | 31 | 2. **Less Time-Consuming Operators:** 32 | - Operators like `Reshape`, `Transpose`, and `Sigmoid` consume relatively less time, each contributing less than **1%** of the total inference time. 33 | - The `OutputOperator` and `InputOperator` are the least time-consuming, with **0.03%** and **0.00%** respectively. 34 | 35 | 3. **CPU vs. NPU Utilization:** 36 | - The majority of the inference time is spent on the NPU, with **103,634 microseconds** (**103.634 milliseconds**) spent on NPU operations. 37 | - The CPU time is minimal, with only **602 microseconds** (**0.602 milliseconds**) spent on CPU operations. 38 | 39 | 4. **Potential Optimization Areas:** 40 | - Given that `ConvExSwish` is the most time-consuming operator, optimizing the convolutional layers or exploring alternative activation functions could potentially reduce the inference time. 41 | - The `Concat` and `Split` operators, while not as dominant as `ConvExSwish`, still contribute a significant portion of the inference time. Optimizing these operations could also lead to performance improvements. 42 | 43 | 5. **Overall Performance:** 44 | - The total inference time of **113.4130 milliseconds** indicates that the model is performing inference within a reasonable time frame for many real-time applications. However, further optimizations could reduce this time, making the model even more suitable for latency-sensitive applications. 45 | 46 | ### Conclusion 47 | The inference time is primarily dominated by convolutional operations (`ConvExSwish`), followed by concatenation (`Concat`) and splitting (`Split`) operations. The model efficiently utilizes the NPU, with minimal CPU involvement. To further optimize performance, focusing on reducing the time spent in convolutional layers and concatenation/splitting operations could yield significant improvements. 48 | 49 | 50 | ### 操作耗时排名表 51 | 52 | | 操作类型 | 调用次数 | CPU时间(us) | GPU时间(us) | NPU时间(us) | 总时间(us) | 时间比例(%) | 53 | |--------------------|------------|-------------|-------------|-------------|---------------|--------------| 54 | | ConvExSwish | 138 | 0 | 0 | 80201 | 80201 | 76.94% | 55 | | Concat | 38 | 0 | 0 | 5868 | 5868 | 5.63% | 56 | | Split | 15 | 0 | 0 | 5270 | 5270 | 5.06% | 57 | | AveragePool | 5 | 0 | 0 | 4203 | 4203 | 4.03% | 58 | | exSoftmax13 | 1 | 0 | 0 | 2169 | 2169 | 2.08% | 59 | | MaxPool | 8 | 0 | 0 | 2159 | 2159 | 2.07% | 60 | | Add | 18 | 0 | 0 | 1237 | 1237 | 1.19% | 61 | | Conv | 7 | 0 | 0 | 771 | 771 | 0.74% | 62 | | Resize | 2 | 0 | 0 | 663 | 663 | 0.64% | 63 | | Reshape | 5 | 568 | 0 | 6 | 574 | 0.55% | 64 | | Transpose | 2 | 0 | 0 | 486 | 486 | 0.47% | 65 | | Sigmoid | 1 | 0 | 0 | 354 | 354 | 0.34% | 66 | | Mul | 2 | 0 | 0 | 148 | 148 | 0.14% | 67 | | Sub | 2 | 0 | 0 | 99 | 99 | 0.09% | 68 | | OutputOperator | 1 | 30 | 0 | 0 | 30 | 0.03% | 69 | | InputOperator | 1 | 4 | 0 | 0 | 4 | 0.00% | 70 | 71 | ### 总推理时间 72 | - **总推理时间(以毫秒为单位,保留四位小数):** 113.4130 ms 73 | 74 | ### 总结与分析 75 | 76 | 1. **主要操作:** 77 | - 操作 `ConvExSwish` 是最耗时的,占总推理时间的 **76.94%**。这表明模型大部分时间都花在卷积操作后跟一个 swish 激活函数上。 78 | - `Concat` 和 `Split` 操作也消耗了相当一部分推理时间,分别为 **5.63%** 和 **5.06%**。 79 | 80 | 2. **耗时较少的操作:** 81 | - 像 `Reshape`、`Transpose` 和 `Sigmoid` 这样的操作消耗的时间相对较少,每个对总推理时间的贡献都不到 **1%**。 82 | - `OutputOperator` 和 `InputOperator` 是最不耗时的,分别为 **0.03%** 和 **0.00%**。 83 | 84 | 3. **CPU与NPU利用率:** 85 | - 大部分推理时间都花在了NPU上,**103,634 微秒**(**103.634 毫秒**)用于NPU操作。 86 | - CPU时间非常少,只有 **602 微秒**(**0.602 毫秒**)用于CPU操作。 87 | 88 | 4. **潜在优化领域:** 89 | - 鉴于 `ConvExSwish` 是最耗时的操作,优化卷积层或探索替代激活函数可能会减少推理时间。 90 | - `Concat` 和 `Split` 操作虽然不如 `ConvExSwish` 占主导地位,但仍占相当一部分推理时间。优化这些操作也可能导致性能提升。 91 | 92 | 5. **整体性能:** 93 | - 总推理时间为 **113.4130 毫秒**,表明模型在许多实时应用中进行推理的时间框架是合理的。然而,进一步的优化可以减少这个时间,使模型更适合对延迟敏感的应用。 94 | 95 | ### 结论 96 | 推理时间主要由卷积操作(`ConvExSwish`)主导,其次是连接(`Concat`)和分割(`Split`)操作。模型有效地利用了NPU,CPU参与度最小。为了进一步优化性能,专注于减少卷积层和连接/分割操作所花费的时间可能会带来显著的改进。 97 | 98 | ### Model size: 27.7908 MB 99 | ### SDK API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33) 100 | ### Driver Version: 0.9.8 101 | 102 | Total Operator Elapsed Per Frame Time(us): 131201 103 | Total Memory Read/Write Per Frame Size(KB): 235155.92 104 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 105 | --------------------------------------------------------------------------------------------------- 106 | Operator Time Consuming Ranking Table 107 | --------------------------------------------------------------------------------------------------- 108 | OpType CallNumber CPUTime(us) GPUTime(us) NPUTime(us) TotalTime(us) TimeRatio(%) 109 | --------------------------------------------------------------------------------------------------- 110 | ConvExSwish 138 0 0 95657 95657 72.91% 111 | Concat 38 0 0 10339 10339 7.88% 112 | Split 15 0 0 6891 6891 5.25% 113 | AveragePool 5 0 0 4817 4817 3.67% 114 | Add 18 0 0 3539 3539 2.70% 115 | MaxPool 8 0 0 3101 3101 2.36% 116 | exSoftmax13 1 0 0 2344 2344 1.79% 117 | Conv 7 0 0 1168 1168 0.89% 118 | Reshape 5 1065 0 7 1072 0.82% 119 | Resize 2 0 0 845 845 0.64% 120 | Transpose 2 0 0 626 626 0.48% 121 | Sigmoid 1 0 0 398 398 0.30% 122 | Sub 2 0 0 202 202 0.15% 123 | Mul 2 0 0 150 150 0.11% 124 | OutputOperator 1 38 0 0 38 0.03% 125 | InputOperator 1 14 0 0 14 0.01% 126 | --------------------------------------------------------------------------------------------------- 127 | Total 1117 0 130084 131201 128 | --------------------------------------------------------------------------------------------------- 129 | 130 | Total inference time (in milliseconds, 4 decimal places): 151.6830 ms -------------------------------------------------------------------------------- /rknn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include // 用于计时 5 | #include // 用于 setprecision 6 | #include // 包含 OpenCV 头文件 7 | #include "rknn_model.h" 8 | #include "rga_utils.h" 9 | 10 | int main() { 11 | // 初始化模型 12 | std::string model_path = "yolo11s.rknn"; // yolov9c.rknn 13 | rknn_model model(model_path); 14 | 15 | int ctx_index = 0; // 使用第一个上下文 16 | 17 | std::string image_path = "bus.jpg"; 18 | cv::Mat image = cv::imread(image_path); 19 | if (image.empty()) { 20 | std::cerr << "Failed to read the image: " << image_path << std::endl; 21 | return -1; 22 | } 23 | cv::cvtColor(image, image, cv::COLOR_BGR2RGB); 24 | 25 | // 打印图像的尺寸和数据类型 26 | std::cout << "Image size: " << image.size() << std::endl; 27 | std::cout << "Image type: " << image.type() << std::endl; 28 | 29 | // 打印图像的数据类型名称 30 | std::string type_name; 31 | switch (image.type()) { 32 | case CV_8U: type_name = "CV_8U"; break; 33 | case CV_8S: type_name = "CV_8S"; break; 34 | case CV_16U: type_name = "CV_16U"; break; 35 | case CV_16S: type_name = "CV_16S"; break; 36 | case CV_32S: type_name = "CV_32S"; break; 37 | case CV_32F: type_name = "CV_32F"; break; 38 | case CV_64F: type_name = "CV_64F"; break; 39 | case CV_8UC3: type_name = "CV_8UC3"; break; 40 | default: type_name = "Unknown"; break; 41 | } 42 | std::cout << "Image type name: " << type_name << std::endl; 43 | 44 | const int num_inferences = 100; 45 | double total_time_ms = 0.0; 46 | object_detect_result_list od_results; 47 | 48 | for (int i = 0; i < num_inferences; ++i) { 49 | auto start = std::chrono::high_resolution_clock::now(); 50 | 51 | // 运行推理 52 | int ret = model.run_inference(image, ctx_index, &od_results); 53 | auto end = std::chrono::high_resolution_clock::now(); 54 | if (ret < 0) { 55 | printf("rknn_run fail! ret=%d\n", ret); 56 | return -1; 57 | } 58 | std::chrono::duration elapsed = end - start; 59 | total_time_ms += elapsed.count(); 60 | } 61 | 62 | double avg_time_ms = total_time_ms / num_inferences; 63 | double fps = num_inferences / (total_time_ms / 1000.0); 64 | 65 | std::cout << std::fixed << std::setprecision(10); // 设置小数点后十位 66 | std::cout << "\nAverage inference time over " << num_inferences << " runs: " 67 | << avg_time_ms << " ms" << std::endl; 68 | 69 | std::cout << std::fixed << std::setprecision(2); // 设置小数点后两位 70 | std::cout << "Frames per second (FPS): " << fps << std::endl; 71 | 72 | /* //打印最后一次推理的结果(如果你需要) 73 | for (int i = 0; i < od_results.count; ++i) { 74 | object_detect_result result = od_results.results[i]; 75 | printf("Object %d:\n", i + 1); 76 | printf(" Box: (%d, %d, %d, %d)\n", 77 | result.box.left, 78 | result.box.top, 79 | result.box.right, 80 | result.box.bottom); 81 | printf(" Class ID: %d\n", result.cls_id); 82 | printf(" Confidence: %.2f\n", result.prop); 83 | } 84 | */ 85 | 86 | // 创建一个 RGB 格式的副本用于绘制 87 | cv::Mat image_rgb = image.clone(); 88 | cv::cvtColor(image_rgb, image_rgb, cv::COLOR_RGB2BGR); // 转换回 BGR 格式以便显示正确颜色 89 | 90 | 91 | // 定义向下偏移量 92 | int offset = 50; 93 | // 绘制检测框 94 | for (int i = 0; i < od_results.count; ++i) { 95 | object_detect_result result = od_results.results[i]; 96 | 97 | // 绘制矩形框 98 | cv::Rect rect(result.box.left, result.box.top, result.box.right - result.box.left, result.box.bottom - result.box.top); 99 | cv::rectangle(image_rgb, rect, cv::Scalar(0, 255, 0), 2); // 绿色框线 100 | 101 | // 添加文本标签 102 | std::ostringstream label; 103 | label << "ID: " << result.cls_id << " Conf: " << std::fixed << std::setprecision(2) << result.prop; 104 | int baseLine = 0; 105 | cv::Size label_size = cv::getTextSize(label.str(), cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 106 | cv::rectangle(image_rgb, cv::Point(result.box.left, result.box.top - label_size.height), 107 | cv::Point(result.box.left + label_size.width, result.box.top + baseLine), 108 | cv::Scalar(0, 255, 0), -1); // 填充背景 109 | cv::putText(image_rgb, label.str(), cv::Point(result.box.left, result.box.top), 110 | cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0), 1, cv::LINE_AA); // 黑色文字 111 | } 112 | 113 | // 保存结果图像 114 | cv::imwrite("result.jpg", image_rgb); 115 | 116 | return 0; 117 | } -------------------------------------------------------------------------------- /runtime/Linux/librknn_api/include/rknn_api.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * 3 | * Copyright (c) 2017 - 2022 by Rockchip Corp. All rights reserved. 4 | * 5 | * The material in this file is confidential and contains trade secrets 6 | * of Rockchip Corporation. This is proprietary information owned by 7 | * Rockchip Corporation. No part of this work may be disclosed, 8 | * reproduced, copied, transmitted, or used in any way for any purpose, 9 | * without the express written permission of Rockchip Corporation. 10 | * 11 | *****************************************************************************/ 12 | 13 | 14 | #ifndef _RKNN_API_H 15 | #define _RKNN_API_H 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #include 22 | 23 | /* 24 | Definition of extended flag for rknn_init. 25 | */ 26 | /* set high priority context. */ 27 | #define RKNN_FLAG_PRIOR_HIGH 0x00000000 28 | 29 | /* set medium priority context */ 30 | #define RKNN_FLAG_PRIOR_MEDIUM 0x00000001 31 | 32 | /* set low priority context. */ 33 | #define RKNN_FLAG_PRIOR_LOW 0x00000002 34 | 35 | /* asynchronous mode. 36 | when enable, rknn_outputs_get will not block for too long because it directly retrieves the result of 37 | the previous frame which can increase the frame rate on single-threaded mode, but at the cost of 38 | rknn_outputs_get not retrieves the result of the current frame. 39 | in multi-threaded mode you do not need to turn this mode on. */ 40 | #define RKNN_FLAG_ASYNC_MASK 0x00000004 41 | 42 | /* collect performance mode. 43 | when enable, you can get detailed performance reports via rknn_query(ctx, RKNN_QUERY_PERF_DETAIL, ...), 44 | but it will reduce the frame rate. */ 45 | #define RKNN_FLAG_COLLECT_PERF_MASK 0x00000008 46 | 47 | /* allocate all memory in outside, includes weight/internal/inputs/outputs */ 48 | #define RKNN_FLAG_MEM_ALLOC_OUTSIDE 0x00000010 49 | 50 | /* weight sharing with the same network structure */ 51 | #define RKNN_FLAG_SHARE_WEIGHT_MEM 0x00000020 52 | 53 | /* send fence fd from outside */ 54 | #define RKNN_FLAG_FENCE_IN_OUTSIDE 0x00000040 55 | 56 | /* get fence fd from inside */ 57 | #define RKNN_FLAG_FENCE_OUT_OUTSIDE 0x00000080 58 | 59 | /* dummy init flag: could only get total_weight_size and total_internal_size by rknn_query*/ 60 | #define RKNN_FLAG_COLLECT_MODEL_INFO_ONLY 0x00000100 61 | 62 | /* allocate internal memory in outside */ 63 | #define RKNN_FLAG_INTERNAL_ALLOC_OUTSIDE 0x00000200 64 | 65 | /* set GPU as the preferred execution backend When the operator is not supported by the NPU */ 66 | #define RKNN_FLAG_EXECUTE_FALLBACK_PRIOR_DEVICE_GPU 0x00000400 67 | 68 | /* enable allocate sram type buffers */ 69 | #define RKNN_FLAG_ENABLE_SRAM 0x00000800 70 | 71 | /* sram type buffers are shared among different contexts */ 72 | #define RKNN_FLAG_SHARE_SRAM 0x00001000 73 | 74 | /* default nice -19, this flag can disable default priority */ 75 | #define RKNN_FLAG_DISABLE_PROC_HIGH_PRIORITY 0x00002000 76 | 77 | /* don't flush input buffer cache, the user must ensure that the input tensor has flushed the cache before calling rknn_run. 78 | !!! Don't use this flags when you call rknn_inputs_set() to set input data. */ 79 | #define RKNN_FLAG_DISABLE_FLUSH_INPUT_MEM_CACHE 0x00004000 80 | 81 | /* Don't invalid output buffer cache. 82 | Users cannot directly access output_mem->virt_addr, 83 | which will cause cache consistency problems. 84 | If you want to use output_mem->virt_addr, 85 | you must use rknn_mem_sync (ctx, mem, RKNN_MEMORY_SYNC_FROM_DEVICE) to flush the cache. 86 | This flags is generally used when the output data of the NPU is not accessed by the CPU, 87 | but is accessed by the GPU or RGA to reduce the time required to flush the cache. 88 | !!! Don't use this flags when you call rknn_outputs_get() to get output data.*/ 89 | #define RKNN_FLAG_DISABLE_FLUSH_OUTPUT_MEM_CACHE 0x00008000 90 | 91 | /* This flag is used when the model data buffer is allocated by NPU, and can be accessed by NPU directly. */ 92 | #define RKNN_FLAG_MODEL_BUFFER_ZERO_COPY 0x00010000 93 | 94 | /* This flag is a memory allocation flag, which is used in rknn_create_mem2() when no context is available. */ 95 | #define RKNN_MEM_FLAG_ALLOC_NO_CONTEXT 0x00020000 96 | 97 | 98 | /* 99 | Error code returned by the RKNN API. 100 | */ 101 | #define RKNN_SUCC 0 /* execute succeed. */ 102 | #define RKNN_ERR_FAIL -1 /* execute failed. */ 103 | #define RKNN_ERR_TIMEOUT -2 /* execute timeout. */ 104 | #define RKNN_ERR_DEVICE_UNAVAILABLE -3 /* device is unavailable. */ 105 | #define RKNN_ERR_MALLOC_FAIL -4 /* memory malloc fail. */ 106 | #define RKNN_ERR_PARAM_INVALID -5 /* parameter is invalid. */ 107 | #define RKNN_ERR_MODEL_INVALID -6 /* model is invalid. */ 108 | #define RKNN_ERR_CTX_INVALID -7 /* context is invalid. */ 109 | #define RKNN_ERR_INPUT_INVALID -8 /* input is invalid. */ 110 | #define RKNN_ERR_OUTPUT_INVALID -9 /* output is invalid. */ 111 | #define RKNN_ERR_DEVICE_UNMATCH -10 /* the device is unmatch, please update rknn sdk 112 | and npu driver/firmware. */ 113 | #define RKNN_ERR_INCOMPATILE_PRE_COMPILE_MODEL -11 /* This RKNN model use pre_compile mode, but not compatible with current driver. */ 114 | #define RKNN_ERR_INCOMPATILE_OPTIMIZATION_LEVEL_VERSION -12 /* This RKNN model set optimization level, but not compatible with current driver. */ 115 | #define RKNN_ERR_TARGET_PLATFORM_UNMATCH -13 /* This RKNN model set target platform, but not compatible with current platform. */ 116 | 117 | /* 118 | Definition for tensor 119 | */ 120 | #define RKNN_MAX_DIMS 16 /* maximum dimension of tensor. */ 121 | #define RKNN_MAX_NUM_CHANNEL 15 /* maximum channel number of input tensor. */ 122 | #define RKNN_MAX_NAME_LEN 256 /* maximum name lenth of tensor. */ 123 | #define RKNN_MAX_DYNAMIC_SHAPE_NUM 512 /* maximum number of dynamic shape for each input. */ 124 | 125 | #ifdef __arm__ 126 | typedef uint32_t rknn_context; 127 | #else 128 | typedef uint64_t rknn_context; 129 | #endif 130 | 131 | 132 | /* 133 | The query command for rknn_query 134 | */ 135 | typedef enum _rknn_query_cmd { 136 | RKNN_QUERY_IN_OUT_NUM = 0, /* query the number of input & output tensor. */ 137 | RKNN_QUERY_INPUT_ATTR = 1, /* query the attribute of input tensor. */ 138 | RKNN_QUERY_OUTPUT_ATTR = 2, /* query the attribute of output tensor. */ 139 | RKNN_QUERY_PERF_DETAIL = 3, /* query the detail performance, need set 140 | RKNN_FLAG_COLLECT_PERF_MASK when call rknn_init, 141 | this query needs to be valid after rknn_outputs_get. */ 142 | RKNN_QUERY_PERF_RUN = 4, /* query the time of run, 143 | this query needs to be valid after rknn_outputs_get. */ 144 | RKNN_QUERY_SDK_VERSION = 5, /* query the sdk & driver version */ 145 | 146 | RKNN_QUERY_MEM_SIZE = 6, /* query the weight & internal memory size */ 147 | RKNN_QUERY_CUSTOM_STRING = 7, /* query the custom string */ 148 | 149 | RKNN_QUERY_NATIVE_INPUT_ATTR = 8, /* query the attribute of native input tensor. */ 150 | RKNN_QUERY_NATIVE_OUTPUT_ATTR = 9, /* query the attribute of native output tensor. */ 151 | 152 | RKNN_QUERY_NATIVE_NC1HWC2_INPUT_ATTR = 8, /* query the attribute of native input tensor. */ 153 | RKNN_QUERY_NATIVE_NC1HWC2_OUTPUT_ATTR = 9, /* query the attribute of native output tensor. */ 154 | 155 | RKNN_QUERY_NATIVE_NHWC_INPUT_ATTR = 10, /* query the attribute of native input tensor. */ 156 | RKNN_QUERY_NATIVE_NHWC_OUTPUT_ATTR = 11, /* query the attribute of native output tensor. */ 157 | 158 | RKNN_QUERY_DEVICE_MEM_INFO = 12, /* query the attribute of rknn memory information. */ 159 | 160 | RKNN_QUERY_INPUT_DYNAMIC_RANGE = 13, /* query the dynamic shape range of rknn input tensor. */ 161 | RKNN_QUERY_CURRENT_INPUT_ATTR = 14, /* query the current shape of rknn input tensor, only valid for dynamic rknn model*/ 162 | RKNN_QUERY_CURRENT_OUTPUT_ATTR = 15, /* query the current shape of rknn output tensor, only valid for dynamic rknn model*/ 163 | 164 | RKNN_QUERY_CURRENT_NATIVE_INPUT_ATTR = 16, /* query the current native shape of rknn input tensor, only valid for dynamic rknn model*/ 165 | RKNN_QUERY_CURRENT_NATIVE_OUTPUT_ATTR = 17, /* query the current native shape of rknn output tensor, only valid for dynamic rknn model*/ 166 | 167 | 168 | RKNN_QUERY_CMD_MAX 169 | } rknn_query_cmd; 170 | 171 | /* 172 | the tensor data type. 173 | */ 174 | typedef enum _rknn_tensor_type { 175 | RKNN_TENSOR_FLOAT32 = 0, /* data type is float32. */ 176 | RKNN_TENSOR_FLOAT16, /* data type is float16. */ 177 | RKNN_TENSOR_INT8, /* data type is int8. */ 178 | RKNN_TENSOR_UINT8, /* data type is uint8. */ 179 | RKNN_TENSOR_INT16, /* data type is int16. */ 180 | RKNN_TENSOR_UINT16, /* data type is uint16. */ 181 | RKNN_TENSOR_INT32, /* data type is int32. */ 182 | RKNN_TENSOR_UINT32, /* data type is uint32. */ 183 | RKNN_TENSOR_INT64, /* data type is int64. */ 184 | RKNN_TENSOR_BOOL, 185 | RKNN_TENSOR_INT4, 186 | RKNN_TENSOR_BFLOAT16, 187 | 188 | RKNN_TENSOR_TYPE_MAX 189 | } rknn_tensor_type; 190 | 191 | inline static const char* get_type_string(rknn_tensor_type type) 192 | { 193 | switch(type) { 194 | case RKNN_TENSOR_FLOAT32: return "FP32"; 195 | case RKNN_TENSOR_FLOAT16: return "FP16"; 196 | case RKNN_TENSOR_INT8: return "INT8"; 197 | case RKNN_TENSOR_UINT8: return "UINT8"; 198 | case RKNN_TENSOR_INT16: return "INT16"; 199 | case RKNN_TENSOR_UINT16: return "UINT16"; 200 | case RKNN_TENSOR_INT32: return "INT32"; 201 | case RKNN_TENSOR_UINT32: return "UINT32"; 202 | case RKNN_TENSOR_INT64: return "INT64"; 203 | case RKNN_TENSOR_BOOL: return "BOOL"; 204 | case RKNN_TENSOR_INT4: return "INT4"; 205 | case RKNN_TENSOR_BFLOAT16: return "BF16"; 206 | default: return "UNKNOW"; 207 | } 208 | } 209 | 210 | /* 211 | the quantitative type. 212 | */ 213 | typedef enum _rknn_tensor_qnt_type { 214 | RKNN_TENSOR_QNT_NONE = 0, /* none. */ 215 | RKNN_TENSOR_QNT_DFP, /* dynamic fixed point. */ 216 | RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC, /* asymmetric affine. */ 217 | 218 | RKNN_TENSOR_QNT_MAX 219 | } rknn_tensor_qnt_type; 220 | 221 | inline static const char* get_qnt_type_string(rknn_tensor_qnt_type type) 222 | { 223 | switch(type) { 224 | case RKNN_TENSOR_QNT_NONE: return "NONE"; 225 | case RKNN_TENSOR_QNT_DFP: return "DFP"; 226 | case RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC: return "AFFINE"; 227 | default: return "UNKNOW"; 228 | } 229 | } 230 | 231 | /* 232 | the tensor data format. 233 | */ 234 | typedef enum _rknn_tensor_format { 235 | RKNN_TENSOR_NCHW = 0, /* data format is NCHW. */ 236 | RKNN_TENSOR_NHWC, /* data format is NHWC. */ 237 | RKNN_TENSOR_NC1HWC2, /* data format is NC1HWC2. */ 238 | RKNN_TENSOR_UNDEFINED, 239 | 240 | RKNN_TENSOR_FORMAT_MAX 241 | } rknn_tensor_format; 242 | 243 | /* 244 | the mode of running on target NPU core. 245 | */ 246 | typedef enum _rknn_core_mask { 247 | RKNN_NPU_CORE_AUTO = 0, /* default, run on NPU core randomly. */ 248 | RKNN_NPU_CORE_0 = 1, /* run on NPU core 0. */ 249 | RKNN_NPU_CORE_1 = 2, /* run on NPU core 1. */ 250 | RKNN_NPU_CORE_2 = 4, /* run on NPU core 2. */ 251 | RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, /* run on NPU core 0 and core 1. */ 252 | RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2, /* run on NPU core 0 and core 1 and core 2. */ 253 | RKNN_NPU_CORE_ALL = 0xffff, /* auto choice, run on NPU cores depending on platform */ 254 | 255 | RKNN_NPU_CORE_UNDEFINED, 256 | } rknn_core_mask; 257 | 258 | inline static const char* get_format_string(rknn_tensor_format fmt) 259 | { 260 | switch(fmt) { 261 | case RKNN_TENSOR_NCHW: return "NCHW"; 262 | case RKNN_TENSOR_NHWC: return "NHWC"; 263 | case RKNN_TENSOR_NC1HWC2: return "NC1HWC2"; 264 | case RKNN_TENSOR_UNDEFINED: return "UNDEFINED"; 265 | default: return "UNKNOW"; 266 | } 267 | } 268 | 269 | /* 270 | the information for RKNN_QUERY_IN_OUT_NUM. 271 | */ 272 | typedef struct _rknn_input_output_num { 273 | uint32_t n_input; /* the number of input. */ 274 | uint32_t n_output; /* the number of output. */ 275 | } rknn_input_output_num; 276 | 277 | /* 278 | the information for RKNN_QUERY_INPUT_ATTR / RKNN_QUERY_OUTPUT_ATTR. 279 | */ 280 | typedef struct _rknn_tensor_attr { 281 | uint32_t index; /* input parameter, the index of input/output tensor, 282 | need set before call rknn_query. */ 283 | 284 | uint32_t n_dims; /* the number of dimensions. */ 285 | uint32_t dims[RKNN_MAX_DIMS]; /* the dimensions array. */ 286 | char name[RKNN_MAX_NAME_LEN]; /* the name of tensor. */ 287 | 288 | uint32_t n_elems; /* the number of elements. */ 289 | uint32_t size; /* the bytes size of tensor. */ 290 | 291 | rknn_tensor_format fmt; /* the data format of tensor. */ 292 | rknn_tensor_type type; /* the data type of tensor. */ 293 | rknn_tensor_qnt_type qnt_type; /* the quantitative type of tensor. */ 294 | int8_t fl; /* fractional length for RKNN_TENSOR_QNT_DFP. */ 295 | int32_t zp; /* zero point for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */ 296 | float scale; /* scale for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */ 297 | 298 | uint32_t w_stride; /* the stride of tensor along the width dimention of input, 299 | Note: it is read-only, 0 means equal to width. */ 300 | uint32_t size_with_stride; /* the bytes size of tensor with stride. */ 301 | 302 | uint8_t pass_through; /* pass through mode, for rknn_set_io_mem interface. 303 | if TRUE, the buf data is passed directly to the input node of the rknn model 304 | without any conversion. the following variables do not need to be set. 305 | if FALSE, the buf data is converted into an input consistent with the model 306 | according to the following type and fmt. so the following variables 307 | need to be set.*/ 308 | uint32_t h_stride; /* the stride along the height dimention of input, 309 | Note: it is write-only, if it was set to 0, h_stride = height. */ 310 | } rknn_tensor_attr; 311 | 312 | typedef struct _rknn_input_range { 313 | uint32_t index; /* input parameter, the index of input/output tensor, 314 | need set before call rknn_query. */ 315 | uint32_t shape_number; /* the number of shape. */ 316 | rknn_tensor_format fmt; /* the data format of tensor. */ 317 | char name[RKNN_MAX_NAME_LEN]; /* the name of tensor. */ 318 | uint32_t dyn_range[RKNN_MAX_DYNAMIC_SHAPE_NUM][RKNN_MAX_DIMS]; /* the dynamic input dimensions range. */ 319 | uint32_t n_dims; /* the number of dimensions. */ 320 | 321 | } rknn_input_range; 322 | 323 | /* 324 | the information for RKNN_QUERY_PERF_DETAIL. 325 | */ 326 | typedef struct _rknn_perf_detail { 327 | char* perf_data; /* the string pointer of perf detail. don't need free it by user. */ 328 | uint64_t data_len; /* the string length. */ 329 | } rknn_perf_detail; 330 | 331 | /* 332 | the information for RKNN_QUERY_PERF_RUN. 333 | */ 334 | typedef struct _rknn_perf_run { 335 | int64_t run_duration; /* real inference time (us) */ 336 | } rknn_perf_run; 337 | 338 | /* 339 | the information for RKNN_QUERY_SDK_VERSION. 340 | */ 341 | typedef struct _rknn_sdk_version { 342 | char api_version[256]; /* the version of rknn api. */ 343 | char drv_version[256]; /* the version of rknn driver. */ 344 | } rknn_sdk_version; 345 | 346 | /* 347 | the information for RKNN_QUERY_MEM_SIZE. 348 | */ 349 | typedef struct _rknn_mem_size { 350 | uint32_t total_weight_size; /* the weight memory size */ 351 | uint32_t total_internal_size; /* the internal memory size, exclude inputs/outputs */ 352 | uint64_t total_dma_allocated_size; /* total dma memory allocated size */ 353 | uint32_t total_sram_size; /* total system sram size reserved for rknn */ 354 | uint32_t free_sram_size; /* free system sram size reserved for rknn */ 355 | uint32_t reserved[10]; /* reserved */ 356 | } rknn_mem_size; 357 | 358 | /* 359 | the information for RKNN_QUERY_CUSTOM_STRING. 360 | */ 361 | typedef struct _rknn_custom_string { 362 | char string[1024]; /* the string of custom, lengths max to 1024 bytes */ 363 | } rknn_custom_string; 364 | 365 | /* 366 | The flags of rknn_tensor_mem. 367 | */ 368 | typedef enum _rknn_tensor_mem_flags { 369 | RKNN_TENSOR_MEMORY_FLAGS_ALLOC_INSIDE = 1, /*Used to mark in rknn_destroy_mem() whether it is necessary to release the "mem" pointer itself. 370 | If the flag RKNN_TENSOR_MEMORY_FLAGS_ALLOC_INSIDE is set, rknn_destroy_mem() will call free(mem).*/ 371 | RKNN_TENSOR_MEMORY_FLAGS_FROM_FD = 2, /*Used to mark in rknn_create_mem_from_fd() whether it is necessary to release the "mem" pointer itself. 372 | If the flag RKNN_TENSOR_MEMORY_FLAGS_FROM_FD is set, rknn_destroy_mem() will call free(mem).*/ 373 | RKNN_TENSOR_MEMORY_FLAGS_FROM_PHYS = 3, /*Used to mark in rknn_create_mem_from_phys() whether it is necessary to release the "mem" pointer itself. 374 | If the flag RKNN_TENSOR_MEMORY_FLAGS_FROM_PHYS is set, rknn_destroy_mem() will call free(mem).*/ 375 | RKNN_TENSOR_MEMORY_FLAGS_UNKNOWN 376 | } rknn_tensor_mem_flags; 377 | 378 | /* 379 | The mode to sync cacheable rknn memory. 380 | */ 381 | typedef enum _rknn_mem_alloc_flags { 382 | RKNN_FLAG_MEMORY_FLAGS_DEFAULT = 0 << 0, /* Same with RKNN_FLAG_MEMORY_CACHEABLE */ 383 | RKNN_FLAG_MEMORY_CACHEABLE = 1 << 0, /* Create Cacheable memory. */ 384 | RKNN_FLAG_MEMORY_NON_CACHEABLE = 1 << 1, /* Create NON-Cacheable memory. */ 385 | } rknn_mem_alloc_flags; 386 | 387 | /* 388 | The mode to sync cacheable rknn memory. 389 | */ 390 | typedef enum _rknn_mem_sync_mode { 391 | RKNN_MEMORY_SYNC_TO_DEVICE = 0x1, /* the mode used for consistency of device access after CPU accesses data. */ 392 | RKNN_MEMORY_SYNC_FROM_DEVICE = 0x2, /* the mode used for consistency of CPU access after device accesses data. */ 393 | RKNN_MEMORY_SYNC_BIDIRECTIONAL = RKNN_MEMORY_SYNC_TO_DEVICE | RKNN_MEMORY_SYNC_FROM_DEVICE, /* the mode used for consistency of data access 394 | between device and CPU in both directions. */ 395 | } rknn_mem_sync_mode; 396 | 397 | /* 398 | the memory information of tensor. 399 | */ 400 | typedef struct _rknn_tensor_memory { 401 | void* virt_addr; /* the virtual address of tensor buffer. */ 402 | uint64_t phys_addr; /* the physical address of tensor buffer. */ 403 | int32_t fd; /* the fd of tensor buffer. */ 404 | int32_t offset; /* indicates the offset of the memory. */ 405 | uint32_t size; /* the size of tensor buffer. */ 406 | uint32_t flags; /* the flags of tensor buffer, reserved */ 407 | void * priv_data; /* the private data of tensor buffer. */ 408 | } rknn_tensor_mem; 409 | 410 | /* 411 | the input information for rknn_input_set. 412 | */ 413 | typedef struct _rknn_input { 414 | uint32_t index; /* the input index. */ 415 | void* buf; /* the input buf for index. */ 416 | uint32_t size; /* the size of input buf. */ 417 | uint8_t pass_through; /* pass through mode. 418 | if TRUE, the buf data is passed directly to the input node of the rknn model 419 | without any conversion. the following variables do not need to be set. 420 | if FALSE, the buf data is converted into an input consistent with the model 421 | according to the following type and fmt. so the following variables 422 | need to be set.*/ 423 | rknn_tensor_type type; /* the data type of input buf. */ 424 | rknn_tensor_format fmt; /* the data format of input buf. 425 | currently the internal input format of NPU is NCHW by default. 426 | so entering NCHW data can avoid the format conversion in the driver. */ 427 | } rknn_input; 428 | 429 | /* 430 | the output information for rknn_outputs_get. 431 | */ 432 | typedef struct _rknn_output { 433 | uint8_t want_float; /* want transfer output data to float */ 434 | uint8_t is_prealloc; /* whether buf is pre-allocated. 435 | if TRUE, the following variables need to be set. 436 | if FALSE, the following variables do not need to be set. */ 437 | uint32_t index; /* the output index. */ 438 | void* buf; /* the output buf for index. 439 | when is_prealloc = FALSE and rknn_outputs_release called, 440 | this buf pointer will be free and don't use it anymore. */ 441 | uint32_t size; /* the size of output buf. */ 442 | } rknn_output; 443 | 444 | /* 445 | the extend information for rknn_init. 446 | */ 447 | typedef struct _rknn_init_extend { 448 | rknn_context ctx; /* rknn context */ 449 | int32_t real_model_offset; /* real rknn model file offset, only valid when init context with rknn file path and zero-copy model model */ 450 | uint32_t real_model_size; /* real rknn model file size, only valid when init context with rknn file path and zero-copy model model */ 451 | int32_t model_buffer_fd; /* the fd of model buffer. */ 452 | uint32_t model_buffer_flags; /* the flags of model_buffer */ 453 | uint8_t reserved[112]; /* reserved */ 454 | } rknn_init_extend; 455 | 456 | /* 457 | the extend information for rknn_run. 458 | */ 459 | typedef struct _rknn_run_extend { 460 | uint64_t frame_id; /* output parameter, indicate current frame id of run. */ 461 | int32_t non_block; /* block flag of run, 0 is block else 1 is non block */ 462 | int32_t timeout_ms; /* timeout for block mode, in milliseconds */ 463 | int32_t fence_fd; /* fence fd from other unit */ 464 | } rknn_run_extend; 465 | 466 | /* 467 | the extend information for rknn_outputs_get. 468 | */ 469 | typedef struct _rknn_output_extend { 470 | uint64_t frame_id; /* output parameter, indicate the frame id of outputs, corresponds to 471 | struct rknn_run_extend.frame_id.*/ 472 | } rknn_output_extend; 473 | 474 | 475 | /* rknn_init 476 | 477 | initial the context and load the rknn model. 478 | 479 | input: 480 | rknn_context* context the pointer of context handle. 481 | void* model if size > 0, pointer to the rknn model, if size = 0, filepath to the rknn model. 482 | uint32_t size the size of rknn model. 483 | uint32_t flag extend flag, see the define of RKNN_FLAG_XXX_XXX. 484 | rknn_init_extend* extend the extend information of init. 485 | return: 486 | int error code. 487 | */ 488 | int rknn_init(rknn_context* context, void* model, uint32_t size, uint32_t flag, rknn_init_extend* extend); 489 | 490 | /* rknn_dup_context 491 | 492 | initial the context and load the rknn model. 493 | 494 | input: 495 | rknn_context* context_in the pointer of context in handle. 496 | rknn_context* context_out the pointer of context out handle. 497 | return: 498 | int error code. 499 | */ 500 | int rknn_dup_context(rknn_context* context_in, rknn_context* context_out); 501 | 502 | /* rknn_destroy 503 | 504 | unload the rknn model and destroy the context. 505 | 506 | input: 507 | rknn_context context the handle of context. 508 | return: 509 | int error code. 510 | */ 511 | int rknn_destroy(rknn_context context); 512 | 513 | 514 | /* rknn_query 515 | 516 | query the information about model or others. see rknn_query_cmd. 517 | 518 | input: 519 | rknn_context context the handle of context. 520 | rknn_query_cmd cmd the command of query. 521 | void* info the buffer point of information. 522 | uint32_t size the size of information. 523 | return: 524 | int error code. 525 | */ 526 | int rknn_query(rknn_context context, rknn_query_cmd cmd, void* info, uint32_t size); 527 | 528 | 529 | /* rknn_inputs_set 530 | 531 | set inputs information by input index of rknn model. 532 | inputs information see rknn_input. 533 | 534 | input: 535 | rknn_context context the handle of context. 536 | uint32_t n_inputs the number of inputs. 537 | rknn_input inputs[] the arrays of inputs information, see rknn_input. 538 | return: 539 | int error code 540 | */ 541 | int rknn_inputs_set(rknn_context context, uint32_t n_inputs, rknn_input inputs[]); 542 | 543 | /* 544 | rknn_set_batch_core_num 545 | 546 | set rknn batch core_num. 547 | 548 | input: 549 | rknn_context context the handle of context. 550 | int core_num the core number. 551 | return: 552 | int error code. 553 | 554 | */ 555 | int rknn_set_batch_core_num(rknn_context context, int core_num); 556 | 557 | /* rknn_set_core_mask 558 | 559 | set rknn core mask.(only supported on RK3588 now) 560 | 561 | RKNN_NPU_CORE_AUTO: auto mode, default value 562 | RKNN_NPU_CORE_0: core 0 mode 563 | RKNN_NPU_CORE_1: core 1 mode 564 | RKNN_NPU_CORE_2: core 2 mode 565 | RKNN_NPU_CORE_0_1: combine core 0/1 mode 566 | RKNN_NPU_CORE_0_1_2: combine core 0/1/2 mode 567 | RKNN_NPU_CORE_ALL: auto mode, select multiple npu cores to run depending on platform 568 | 569 | 570 | input: 571 | rknn_context context the handle of context. 572 | rknn_core_mask core_mask the core mask. 573 | return: 574 | int error code. 575 | */ 576 | int rknn_set_core_mask(rknn_context context, rknn_core_mask core_mask); 577 | 578 | /* rknn_run 579 | 580 | run the model to execute inference. 581 | 582 | input: 583 | rknn_context context the handle of context. 584 | rknn_run_extend* extend the extend information of run. 585 | return: 586 | int error code. 587 | */ 588 | int rknn_run(rknn_context context, rknn_run_extend* extend); 589 | 590 | 591 | /* rknn_wait 592 | 593 | wait the model after execute inference. 594 | 595 | input: 596 | rknn_context context the handle of context. 597 | rknn_run_extend* extend the extend information of run. 598 | return: 599 | int error code. 600 | */ 601 | int rknn_wait(rknn_context context, rknn_run_extend* extend); 602 | 603 | 604 | /* rknn_outputs_get 605 | 606 | wait the inference to finish and get the outputs. 607 | this function will block until inference finish. 608 | the results will set to outputs[]. 609 | 610 | input: 611 | rknn_context context the handle of context. 612 | uint32_t n_outputs the number of outputs. 613 | rknn_output outputs[] the arrays of output, see rknn_output. 614 | rknn_output_extend* the extend information of output. 615 | return: 616 | int error code. 617 | */ 618 | int rknn_outputs_get(rknn_context context, uint32_t n_outputs, rknn_output outputs[], rknn_output_extend* extend); 619 | 620 | 621 | /* rknn_outputs_release 622 | 623 | release the outputs that get by rknn_outputs_get. 624 | after called, the rknn_output[x].buf get from rknn_outputs_get will 625 | also be free when rknn_output[x].is_prealloc = FALSE. 626 | 627 | input: 628 | rknn_context context the handle of context. 629 | uint32_t n_ouputs the number of outputs. 630 | rknn_output outputs[] the arrays of output. 631 | return: 632 | int error code 633 | */ 634 | int rknn_outputs_release(rknn_context context, uint32_t n_ouputs, rknn_output outputs[]); 635 | 636 | 637 | /* new api for zero copy */ 638 | 639 | /* rknn_create_mem_from_phys (memory allocated outside) 640 | 641 | initialize tensor memory from physical address. 642 | 643 | input: 644 | rknn_context ctx the handle of context. 645 | uint64_t phys_addr physical address. 646 | void *virt_addr virtual address. 647 | uint32_t size the size of tensor buffer. 648 | return: 649 | rknn_tensor_mem the pointer of tensor memory information. 650 | */ 651 | rknn_tensor_mem* rknn_create_mem_from_phys(rknn_context ctx, uint64_t phys_addr, void *virt_addr, uint32_t size); 652 | 653 | 654 | /* rknn_create_mem_from_fd (memory allocated outside) 655 | 656 | initialize tensor memory from file description. 657 | 658 | input: 659 | rknn_context ctx the handle of context. 660 | int32_t fd file description. 661 | void *virt_addr virtual address. 662 | uint32_t size the size of tensor buffer. 663 | int32_t offset indicates the offset of the memory (virt_addr without offset). 664 | return: 665 | rknn_tensor_mem the pointer of tensor memory information. 666 | */ 667 | rknn_tensor_mem* rknn_create_mem_from_fd(rknn_context ctx, int32_t fd, void *virt_addr, uint32_t size, int32_t offset); 668 | 669 | 670 | /* rknn_create_mem_from_mb_blk (memory allocated outside) 671 | 672 | create tensor memory from mb_blk. 673 | 674 | input: 675 | rknn_context ctx the handle of context. 676 | void *mb_blk mb_blk allocate from system api. 677 | int32_t offset indicates the offset of the memory. 678 | return: 679 | rknn_tensor_mem the pointer of tensor memory information. 680 | */ 681 | rknn_tensor_mem* rknn_create_mem_from_mb_blk(rknn_context ctx, void *mb_blk, int32_t offset); 682 | 683 | 684 | /* rknn_create_mem (memory allocated inside) 685 | 686 | create tensor memory. 687 | 688 | input: 689 | rknn_context ctx the handle of context. 690 | uint32_t size the size of tensor buffer. 691 | return: 692 | rknn_tensor_mem the pointer of tensor memory information. 693 | */ 694 | rknn_tensor_mem* rknn_create_mem(rknn_context ctx, uint32_t size); 695 | 696 | /* rknn_create_mem2 (memory allocated inside) 697 | 698 | create tensor memory. 699 | 700 | input: 701 | rknn_context ctx the handle of context. 702 | uint64_t size the size of tensor buffer. 703 | uint64_t alloc_flags control the memory is cacheable 704 | return: 705 | rknn_tensor_mem the pointer of tensor memory information. 706 | */ 707 | rknn_tensor_mem* rknn_create_mem2(rknn_context ctx, uint64_t size, uint64_t alloc_flags); 708 | 709 | /* rknn_destroy_mem (support allocate inside and outside) 710 | 711 | destroy tensor memory. 712 | 713 | input: 714 | rknn_context ctx the handle of context. 715 | rknn_tensor_mem *mem the pointer of tensor memory information. 716 | return: 717 | int error code 718 | */ 719 | int rknn_destroy_mem(rknn_context ctx, rknn_tensor_mem *mem); 720 | 721 | 722 | /* rknn_set_weight_mem 723 | 724 | set the weight memory. 725 | 726 | input: 727 | rknn_context ctx the handle of context. 728 | rknn_tensor_mem *mem the array of tensor memory information 729 | return: 730 | int error code. 731 | */ 732 | int rknn_set_weight_mem(rknn_context ctx, rknn_tensor_mem *mem); 733 | 734 | 735 | /* rknn_set_internal_mem 736 | 737 | set the internal memory. 738 | 739 | input: 740 | rknn_context ctx the handle of context. 741 | rknn_tensor_mem *mem the array of tensor memory information 742 | return: 743 | int error code. 744 | */ 745 | int rknn_set_internal_mem(rknn_context ctx, rknn_tensor_mem *mem); 746 | 747 | 748 | /* rknn_set_io_mem 749 | 750 | set the input and output tensors buffer. 751 | 752 | input: 753 | rknn_context ctx the handle of context. 754 | rknn_tensor_mem *mem the array of tensor memory information. 755 | rknn_tensor_attr *attr the attribute of input or output tensor buffer. 756 | return: 757 | int error code. 758 | */ 759 | int rknn_set_io_mem(rknn_context ctx, rknn_tensor_mem *mem, rknn_tensor_attr *attr); 760 | 761 | /* rknn_set_input_shape(deprecated) 762 | 763 | set the input tensor shape (only valid for dynamic shape rknn model). 764 | 765 | input: 766 | rknn_context ctx the handle of context. 767 | rknn_tensor_attr *attr the attribute of input or output tensor buffer. 768 | return: 769 | int error code. 770 | */ 771 | int rknn_set_input_shape(rknn_context ctx, rknn_tensor_attr* attr); 772 | 773 | /* rknn_set_input_shapes 774 | 775 | set all the input tensor shapes. graph will run under current set of input shapes after rknn_set_input_shapes.(only valid for dynamic shape rknn model). 776 | 777 | input: 778 | rknn_context ctx the handle of context. 779 | uint32_t n_inputs the number of inputs. 780 | rknn_tensor_attr attr[] the attribute array of all input tensors. 781 | return: 782 | int error code. 783 | */ 784 | int rknn_set_input_shapes(rknn_context ctx, uint32_t n_inputs, rknn_tensor_attr attr[]); 785 | 786 | /* rknn_mem_sync 787 | 788 | sync cacheable rknn memory when both cpu and device access data. 789 | 790 | input: 791 | rknn_context context the handle of context. 792 | rknn_tensor_mem *mem the pointer of tensor memory information. 793 | rknn_mem_sync_mode mode the mode of sync cache. 794 | return: 795 | int error code. 796 | */ 797 | int rknn_mem_sync(rknn_context context, rknn_tensor_mem* mem, rknn_mem_sync_mode mode); 798 | 799 | #ifdef __cplusplus 800 | } //extern "C" 801 | #endif 802 | 803 | #endif //_RKNN_API_H 804 | -------------------------------------------------------------------------------- /runtime/Linux/librknn_api/include/rknn_custom_op.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * 3 | * Copyright (c) 2017 - 2023 by Rockchip Corp. All rights reserved. 4 | * 5 | * The material in this file is confidential and contains trade secrets 6 | * of Rockchip Corporation. This is proprietary information owned by 7 | * Rockchip Corporation. No part of this work may be disclosed, 8 | * reproduced, copied, transmitted, or used in any way for any purpose, 9 | * without the express written permission of Rockchip Corporation. 10 | * 11 | *****************************************************************************/ 12 | 13 | #ifndef _RKNN_CUSTOM_OP_H 14 | #define _RKNN_CUSTOM_OP_H 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | #include "rknn_api.h" 21 | 22 | #include 23 | 24 | /* 25 | Error code returned by the RKNN Custom Operator API. 26 | */ 27 | #define RKNN_WARNING_SKIP_CUSTOM_OP_COMPUTE -14 /* if custom op init callback funtion return this code and op type is supported by RKNN, it will use RKNN implementation. */ 28 | 29 | #define RKNN_CUSTOM_OP_MAX_STR_LEN 64 30 | #define RKNN_CUSTOM_OP_MAX_VALUE_LEN 32 31 | #define RKNN_CUSTOM_OP_EXPORT __attribute__((visibility("default"))) 32 | 33 | #ifdef __arm__ 34 | typedef uint32_t rknn_custom_op_interal_context; 35 | #else 36 | typedef uint64_t rknn_custom_op_interal_context; 37 | #endif 38 | /* 39 | the backend execution device of custom operator. 40 | */ 41 | typedef enum _rknn_target_type 42 | { 43 | RKNN_TARGET_TYPE_CPU = 1, /* backend device is cpu */ 44 | RKNN_TARGET_TYPE_GPU = 2, /* backend device is gpu */ 45 | RKNN_TARGET_TYPE_MAX 46 | } rknn_target_type; 47 | 48 | typedef struct _rknn_gpu_op_context 49 | { 50 | void* cl_context; 51 | void* cl_command_queue; 52 | void* cl_kernel; 53 | 54 | } rknn_gpu_op_context; 55 | 56 | typedef struct _rknn_custom_op_context 57 | { 58 | rknn_target_type target; /* custom op backend target */ 59 | rknn_custom_op_interal_context internal_ctx; /* the context of custom op*/ 60 | rknn_gpu_op_context gpu_ctx; /* the gpu context of custom op */ 61 | void* priv_data; /* the private data managed by user */ 62 | } rknn_custom_op_context; 63 | 64 | typedef struct _rknn_custom_op_tensor 65 | { 66 | rknn_tensor_attr attr; /* the attribute of tensor buffer. */ 67 | rknn_tensor_mem mem; /* the memory information of tensor. */ 68 | } rknn_custom_op_tensor; 69 | 70 | typedef struct _rknn_custom_op_attr 71 | { 72 | char name[RKNN_MAX_NAME_LEN]; /* the name of operator atrributes. */ 73 | rknn_tensor_type dtype; /* the data type of operator attributes, indicate the 'array' type. */ 74 | uint32_t n_elems; /* the number of 'array'. */ 75 | void* data; /* the array pointer of operator attributes, the data type of each element is determined by type. */ 76 | } rknn_custom_op_attr; 77 | 78 | /* 79 | the information of custom operator to add to the rknn_context. 80 | */ 81 | typedef struct _rknn_custom_op 82 | { 83 | uint32_t version; /* custom op version */ 84 | rknn_target_type target; /* custom op backend target */ 85 | char op_type[RKNN_MAX_NAME_LEN]; /* custom op type */ 86 | 87 | char cl_kernel_name[RKNN_MAX_NAME_LEN]; /* the opencl kernel name used by custom op */ 88 | char* cl_kernel_source; /* if cl_source_size > 0, pointer to the cl kernel source string, if cl_source_size = 0, 89 | filepath to the cl kernel file. */ 90 | uint64_t cl_source_size; /* the size of cl_kernel_source */ 91 | char cl_build_options[RKNN_MAX_NAME_LEN]; /* the options for opencl to build clProgram used by custom op */ 92 | 93 | /** 94 | * The callback function sets that the users need to code 95 | */ 96 | int (*init)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs, 97 | rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [optional] custom op kernel init falllback function*/ 98 | int (*prepare)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs, 99 | rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [optional] custom op kernel prepare falllback function*/ 100 | int (*compute)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs, 101 | rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [required] custom op kernel compute falllback function */ 102 | int (*compute_native)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs, 103 | rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [optional] custom op kernel compute with native attribute falllback function */ 104 | int (*destroy)(rknn_custom_op_context* op_ctx); /* [optional] custom op kernel compute falllback function */ 105 | 106 | } rknn_custom_op; 107 | 108 | /** 109 | * dlopen custom op with so required this function 110 | */ 111 | typedef rknn_custom_op* (*get_custom_op_func)(); 112 | 113 | /* rknn_register_custom_ops 114 | 115 | Register custom operators to rknn_context. 116 | Steps to use a custom op: 117 | 1. Create a rknn_custom_op structure array and fill in it. 118 | 2. Setup prepare/compute/compute_native/destroy callback function and add them to the 119 | rknn_custom_op.(compute is required and other function is optional, compute_native is not supported now, set it 120 | to nullptr) 121 | 3. Call rknn_register_custom_ops to register the op type after rknn_init. 122 | input: 123 | rknn_context ctx the handle of context. 124 | rknn_custom_op* op the custom operator array, each of which contains operator information and calllback function. 125 | uint32_t custom_op_num the length of rknn_custom_op array. 126 | return: 127 | int error code. 128 | */ 129 | int rknn_register_custom_ops(rknn_context ctx, rknn_custom_op* op, uint32_t custom_op_num); 130 | 131 | /* rknn_custom_op_get_op_attr 132 | 133 | input: 134 | rknn_custom_op_context* op_ctx the handle of custom op context. 135 | const char* attr_name the attribute name of operator. 136 | rknn_custom_op_attr* op_attr the data and information of operator attributes. 137 | */ 138 | void rknn_custom_op_get_op_attr(rknn_custom_op_context* op_ctx, const char* attr_name, rknn_custom_op_attr* op_attr); 139 | 140 | #ifdef __cplusplus 141 | } // extern "C" 142 | #endif 143 | 144 | #endif //_RKNN_CUSTOM_OP_H 145 | -------------------------------------------------------------------------------- /runtime/Linux/librknn_api/include/rknn_matmul_api.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * 3 | * Copyright (c) 2017 - 2018 by Rockchip Corp. All rights reserved. 4 | * 5 | * The material in this file is confidential and contains trade secrets 6 | * of Rockchip Corporation. This is proprietary information owned by 7 | * Rockchip Corporation. No part of this work may be disclosed, 8 | * reproduced, copied, transmitted, or used in any way for any purpose, 9 | * without the express written permission of Rockchip Corporation. 10 | * 11 | *****************************************************************************/ 12 | 13 | #ifndef _RKNN_MATMUL_API_H 14 | #define _RKNN_MATMUL_API_H 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | #include "rknn_api.h" 21 | 22 | typedef rknn_context rknn_matmul_ctx; 23 | 24 | typedef enum _rknn_matmul_quant_type 25 | { 26 | RKNN_QUANT_TYPE_PER_LAYER_SYM = 0, 27 | RKNN_QUANT_TYPE_PER_LAYER_ASYM = 1, 28 | RKNN_QUANT_TYPE_PER_CHANNEL_SYM = 2, 29 | RKNN_QUANT_TYPE_PER_CHANNEL_ASYM = 3, 30 | RKNN_QUANT_TYPE_PER_GROUP_SYM = 4, 31 | RKNN_QUANT_TYPE_PER_GROUP_ASYM = 5, 32 | } rknn_matmul_quant_type; 33 | 34 | typedef struct _rknn_quant_params 35 | { 36 | char name[RKNN_MAX_NAME_LEN]; 37 | 38 | // matmul tensor scale 39 | float* scale; 40 | int32_t scale_len; 41 | 42 | // matmul tensor zero point 43 | int32_t* zp; 44 | int32_t zp_len; 45 | 46 | } rknn_quant_params; 47 | 48 | typedef enum _rknn_matmul_type 49 | { 50 | RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT32 = 1, 51 | RKNN_INT8_MM_INT8_TO_INT32 = 2, 52 | RKNN_INT8_MM_INT8_TO_INT8 = 3, 53 | RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT16 = 4, 54 | RKNN_FLOAT16_MM_INT8_TO_FLOAT32 = 5, 55 | RKNN_FLOAT16_MM_INT8_TO_FLOAT16 = 6, 56 | RKNN_FLOAT16_MM_INT4_TO_FLOAT32 = 7, 57 | RKNN_FLOAT16_MM_INT4_TO_FLOAT16 = 8, 58 | RKNN_INT8_MM_INT8_TO_FLOAT32 = 9, 59 | RKNN_INT4_MM_INT4_TO_INT16 = 10, 60 | RKNN_INT8_MM_INT4_TO_INT32 = 11, 61 | RKNN_FLOAT16_MM_INT4_TO_BFLOAT16 = 12, 62 | } rknn_matmul_type; 63 | 64 | inline static const char* get_matmul_type_string(rknn_matmul_type type) 65 | { 66 | switch (type) { 67 | case RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT32: 68 | return "RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT32"; 69 | case RKNN_INT8_MM_INT8_TO_INT32: 70 | return "RKNN_INT8_MM_INT8_TO_INT32"; 71 | case RKNN_INT8_MM_INT8_TO_INT8: 72 | return "RKNN_INT8_MM_INT8_TO_INT8"; 73 | case RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT16: 74 | return "RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT16"; 75 | case RKNN_FLOAT16_MM_INT8_TO_FLOAT32: 76 | return "RKNN_FLOAT16_MM_INT8_TO_FLOAT32"; 77 | case RKNN_FLOAT16_MM_INT8_TO_FLOAT16: 78 | return "RKNN_FLOAT16_MM_INT8_TO_FLOAT16"; 79 | case RKNN_INT4_MM_INT4_TO_INT16: 80 | return "RKNN_INT4_MM_INT4_TO_INT16"; 81 | case RKNN_FLOAT16_MM_INT4_TO_FLOAT32: 82 | return "RKNN_FLOAT16_MM_INT4_TO_FLOAT32"; 83 | case RKNN_FLOAT16_MM_INT4_TO_FLOAT16: 84 | return "RKNN_FLOAT16_MM_INT4_TO_FLOAT16"; 85 | case RKNN_INT8_MM_INT4_TO_INT32: 86 | return "RKNN_INT8_MM_INT4_TO_INT32"; 87 | case RKNN_INT8_MM_INT8_TO_FLOAT32: 88 | return "RKNN_INT8_MM_INT8_TO_FLOAT32"; 89 | case RKNN_FLOAT16_MM_INT4_TO_BFLOAT16: 90 | return "RKNN_FLOAT16_MM_INT4_TO_BFLOAT16"; 91 | default: 92 | return "UNKNOW"; 93 | } 94 | } 95 | 96 | typedef struct _rknn_matmul_tensor_attr 97 | { 98 | char name[RKNN_MAX_NAME_LEN]; 99 | 100 | // indicate A(M, K) or B(K, N) or C(M, N) 101 | uint32_t n_dims; 102 | uint32_t dims[RKNN_MAX_DIMS]; 103 | 104 | // matmul tensor size 105 | uint32_t size; 106 | 107 | // matmul tensor data type 108 | // int8 : A, B 109 | // int32: C 110 | rknn_tensor_type type; 111 | 112 | } rknn_matmul_tensor_attr; 113 | 114 | typedef struct _rknn_matmul_io_attr 115 | { 116 | // indicate A(M, K) or B(K, N) or C(M, N) 117 | rknn_matmul_tensor_attr A; 118 | rknn_matmul_tensor_attr B; 119 | rknn_matmul_tensor_attr C; 120 | } rknn_matmul_io_attr; 121 | 122 | /* 123 | matmul dynamic shape struct 124 | */ 125 | typedef struct _rknn_matmul_shape 126 | { 127 | int32_t M; 128 | int32_t K; 129 | int32_t N; 130 | } rknn_matmul_shape; 131 | 132 | /* 133 | the layout of matmul input/output tensor. 134 | */ 135 | typedef enum 136 | { 137 | RKNN_MM_LAYOUT_NORM = 0, 138 | RKNN_MM_LAYOUT_NATIVE = 1, 139 | RKNN_MM_LAYOUT_TP_NORM = 2, 140 | } rknn_matmul_layout; 141 | 142 | /* 143 | matmul information struct 144 | */ 145 | typedef struct rknn_matmul_info_t 146 | { 147 | int32_t M; 148 | int32_t K; // limit: RK3566/3568: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte; 149 | // RK3562: int8 type must be aligned with 32byte, float16 type must be aligned with 32byte; 150 | // RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 32byte, 151 | // int4 type must be aligned with 32byte; 152 | int32_t N; // limit: RK3566/3568: int8 type must be aligned with 16byte, float16 type must be aligned with 8byte; 153 | // RK3562: int8 type must be aligned with 16byte, float16 type must be aligned with 8byte; 154 | // RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte, 155 | // int4 type must be aligned with 64byte; 156 | // matmul data type 157 | // int4: int4(A) x int4(B) -> int16(C) 158 | // int8: int8(A) x int8(B) -> int32(C) 159 | // float16: float16(A) x float16(B) -> float32(C) 160 | rknn_matmul_type type; 161 | 162 | // matmul native layout for B 163 | // 0: normal layout 164 | // 1: native layout 165 | int16_t B_layout; 166 | 167 | // matmul quant type for B 168 | // A and C only support per layer 169 | // 0: per layer 170 | // 1: per channel 171 | // 2: per group 172 | int16_t B_quant_type; 173 | 174 | // matmul native layout for A and C 175 | // 0: normal layout 176 | // 1: native layout 177 | int16_t AC_layout; 178 | 179 | // matmul quant type for A and C, only support 0 180 | int16_t AC_quant_type; 181 | 182 | // iommu domain id, each domain has 4GB of space 183 | int32_t iommu_domain_id; 184 | 185 | // B_quant_type set 2, group size is enable 186 | int16_t group_size; 187 | 188 | // reserved field 189 | int8_t reserved[34]; 190 | } rknn_matmul_info; 191 | 192 | /* rknn_matmul_create 193 | 194 | params: 195 | rknn_matmul_ctx *ctx the handle of context. 196 | rknn_matmul_info *info the matmal information. 197 | rknn_matmul_io_attr *io_attr inputs/output attribute 198 | return: 199 | int error code 200 | */ 201 | int rknn_matmul_create(rknn_matmul_ctx* ctx, rknn_matmul_info* info, rknn_matmul_io_attr* io_attr); 202 | 203 | /* rknn_matmul_create_dynamic_shape 204 | 205 | params: 206 | rknn_matmul_ctx *ctx the handle of context. 207 | rknn_matmul_info *info the matmal information. 208 | int shape_num the supported shape number of matmul. 209 | rknn_matmul_shape dynamic_shapes[] the supported M,K,N shape struct array. 210 | rknn_matmul_io_attr *io_attr the array of inputs and output attribute 211 | return: 212 | int error code 213 | */ 214 | /* 215 | 原来的info.M, K, N无效 216 | */ 217 | int rknn_matmul_create_dynamic_shape(rknn_matmul_ctx* ctx, rknn_matmul_info* info, int shape_num, 218 | rknn_matmul_shape dynamic_shapes[], rknn_matmul_io_attr io_attrs[]); 219 | 220 | /* rknn_matmul_set_io_mem 221 | 222 | params: 223 | rknn_matmul_ctx ctx the handle of context. 224 | rknn_tensor_mem *mem the pointer of tensor memory information. 225 | rknn_matmul_tensor_attr *attr the attribute of input or output tensor buffer. 226 | return: 227 | int error code. 228 | 229 | formula: 230 | C = A * B, 231 | 232 | limit: 233 | K max: k <= 10240 234 | K limit: RK3566/3568: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte; 235 | RK3562: int8 type must be aligned with 32byte, float16 type must be aligned with 32byte; 236 | RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 32byte, 237 | int4 type must be aligned with 32byte; 238 | N limit: RK3566/3568: int8 type must be aligned with 16byte, float16 type must be aligned with 8byte; 239 | RK3562: int8 type must be aligned with 16byte, float16 type must be aligned with 8byte; 240 | RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte, 241 | int4 type must be aligned with 64byte; 242 | A shape: M x K 243 | normal layout: (M, K) 244 | [M1K1, M1K2, ..., M1Kk, 245 | M2K1, M2K2, ..., M2Kk, 246 | ... 247 | MmK1, MmK2, ..., MmKk] 248 | for RK3566/3568: 249 | int8: 250 | native layout: (K / 8, M, 8) 251 | [K1M1, K2M1, ..., K8M1, 252 | K9M2, K10M2, ..., K16M2, 253 | ... 254 | K(k-7)Mm, K(k-6)Mm, ..., KkMm] 255 | float16: 256 | native layout: (K / 4, M, 4) 257 | [K1M1, K2M1, ..., K4M1, 258 | K9M2, K10M2, ..., K8M2, 259 | ... 260 | K(k-3)Mm, K(k-2)Mm, ..., KkMm] 261 | for RK3562: 262 | int8: 263 | native layout: (K / 16, M, 16) 264 | [K1M1, K2M1, ..., K16M1, 265 | K17M2, K18M2, ..., K32M2, 266 | ... 267 | K(k-15)Mm, K(k-14)Mm, ..., KkMm] 268 | float16: 269 | native layout: (K / 8, M, 8) 270 | [K1M1, K2M1, ..., K8M1, 271 | K9M2, K10M2, ..., K16M2, 272 | ... 273 | K(k-7)Mm, K(k-6)Mm, ..., KkMm] 274 | for RK3588/3576: 275 | int4: 276 | native layout: (K / 32, M, 32) 277 | [K1M1, K2M1, ..., K32M1, 278 | K33M2, K10M2, ..., K64M2, 279 | ... 280 | K(k-31)Mm, K(k-30)Mm, ..., KkMm] 281 | int8: 282 | native layout: (K / 16, M, 16) 283 | [K1M1, K2M1, ..., K16M1, 284 | K17M2, K18M2, ..., K32M2, 285 | ... 286 | K(k-15)Mm, K(k-14)Mm, ..., KkMm] 287 | float16: 288 | native layout: (K / 8, M, 8) 289 | [K1M1, K2M1, ..., K8M1, 290 | K9M2, K10M2, ..., K16M2, 291 | ... 292 | K(k-7)Mm, K(k-6)Mm, ..., KkMm] 293 | B shape: K x N 294 | normal layout: (K, N) 295 | [K1N1, K1N2, ..., K1Nn, 296 | K2N1, K2N2, ..., K2Nn, 297 | ... 298 | KkN1, KkN2, ..., KkNn] 299 | for RK3566/3568: 300 | int8: 301 | native layout: (N / 16, K / 32, 16, 32) 302 | [K1N1, K2N1, ..., K32N1, 303 | K1N2, K2N2, ..., K32N2, 304 | ... 305 | K1N16, K2N16, ..., K32N16, 306 | K33N1, K34N1, ..., K64N1, 307 | K33N2, K34N2, ..., K64N2, 308 | ... 309 | K(k-31)N16, K(k-30)N16, ..., KkN16, 310 | K1N17, K2N17, ..., K32N17, 311 | K1N18, K2N18, ..., K32N18, 312 | ... 313 | K(k-31)Nn, K(k-30)Nn, ..., KkNn] 314 | float16: 315 | native layout: (N / 8, K / 16, 8, 16) 316 | [K1N1, K2N1, ..., K16N1, 317 | K1N2, K2N2, ..., K16N2, 318 | ... 319 | K1N8, K2N8, ..., K16N8, 320 | K17N1, K18N1, ..., K32N1, 321 | K17N2, K18N2, ..., K32N2, 322 | ... 323 | K(k-15)N8, K(k-30)N8, ..., KkN8, 324 | K1N9, K2N9, ..., K16N9, 325 | K1N10, K2N10, ..., K16N10, 326 | ... 327 | K(k-15)Nn, K(k-14)Nn, ..., KkNn] 328 | for RK3562: 329 | int8: 330 | native layout: (N / 16, K / 32, 16, 32) 331 | [K1N1, K2N1, ..., K32N1, 332 | K1N2, K2N2, ..., K32N2, 333 | ... 334 | K1N16, K2N16, ..., K32N16, 335 | K33N1, K34N1, ..., K64N1, 336 | K33N2, K34N2, ..., K64N2, 337 | ... 338 | K(k-31)N16, K(k-30)N16, ..., KkN16, 339 | K1N17, K2N17, ..., K32N17, 340 | K1N18, K2N18, ..., K32N18, 341 | ... 342 | K(k-31)Nn, K(k-30)Nn, ..., KkNn] 343 | float16: 344 | native layout: (N / 8, K / 32, 8, 32) 345 | [K1N1, K2N1, ..., K32N1, 346 | K1N2, K2N2, ..., K32N2, 347 | ... 348 | K1N8, K2N8, ..., K32N8, 349 | K33N1, K34N1, ..., K64N1, 350 | K33N2, K34N2, ..., K64N2, 351 | ... 352 | K(k-31)N8, K(k-30)N8, ..., KkN8, 353 | K1N9, K2N9, ..., K16N9, 354 | K1N10, K2N10, ..., K16N10, 355 | ... 356 | K(k-31)Nn, K(k-30)Nn, ..., KkNn] 357 | for RK3588: 358 | when K > 8192, the B data will be split into T segments. 359 | int T = std::ceil(K / 8192); 360 | For example: normal layout -> native layout 361 | K = 20488, N = 4096, T = 3, the data will be split into 3 segments. 362 | subN = rknn_matmul_io_attr.B.dims[2]; 363 | subK = rknn_matmul_io_attr.B.dims[3]; 364 | (8196, 4096) (4096 / subN, 8196 / subK, subN, subK) 365 | (K, N) = (20488, 4096) -> (8196, 4096) -> (4096 / subN, 8196 / subK, subN, subK) 366 | normal layout (4096, 4096) (4096 / subN, 4096 / subK, subN, subK) 367 | T normal layout T native layout 368 | It is recommended to use the rknn_B_normal_layout_to_native_layout interface for direct data conversion. 369 | for RK3576: 370 | when K > 4096, the B data will be split into T segments. 371 | int T = std::ceil(K / 4096); 372 | For example: normal layout -> native layout 373 | K = 10240, N = 2048, T = 3, the data will be split into 3 segments. 374 | subN = rknn_matmul_io_attr.B.dims[2]; 375 | subK = rknn_matmul_io_attr.B.dims[3]; 376 | (4096, 2048) (2048 / subN, 4096 / subK, subN, subK) 377 | (K, N) = (10240, 2048) -> (4096, 2048) -> (2048 / subN, 4096 / subK, subN, subK) 378 | normal layout (2048, 2048) (2048 / subN, 2048 / subK, subN, subK) 379 | T normal layout T native layout 380 | It is recommended to use the rknn_B_normal_layout_to_native_layout interface for direct data conversion. 381 | for RK3588/3576: 382 | int4: 383 | native layout: (N / 64, K / 32, 64, 32) 384 | [K1N1, K2N1, ..., K32N1, 385 | K1N2, K2N2, ..., K32N2, 386 | ... 387 | K1N64, K2N64, ..., K32N64, 388 | K33N1, K34N1, ..., K64N1, 389 | K33N2, K34N2, ..., K64N2, 390 | ... 391 | K(k-31)N64, K(k-30)N64, ..., KkN64, 392 | K1N65, K2N65, ..., K32N65, 393 | K1N66, K2N66, ..., K32N66, 394 | ... 395 | K(k-31)Nn, K(k-30)Nn, ..., KkNn] 396 | int8: 397 | native layout: (N / 32, K / 32, 32, 32) 398 | [K1N1, K2N1, ..., K32N1, 399 | K1N2, K2N2, ..., K32N2, 400 | ... 401 | K1N32, K2N32, ..., K32N32, 402 | K33N1, K34N1, ..., K64N1, 403 | K33N2, K34N2, ..., K64N2, 404 | ... 405 | K(k-31)N32, K(k-30)N32, ..., KkN32, 406 | K1N33, K2N33, ..., K32N33, 407 | K1N34, K2N34, ..., K32N34, 408 | ... 409 | K(k-31)Nn, K(k-30)Nn, ..., KkNn] 410 | float16: 411 | native layout: (N / 16, K / 32, 16, 32) 412 | [K1N1, K2N1, ..., K32N1, 413 | K1N2, K2N2, ..., K32N2, 414 | ... 415 | K1N16, K2N16, ..., K32N16, 416 | K33N1, K34N1, ..., K64N1, 417 | K33N2, K34N2, ..., K64N2, 418 | ... 419 | K(k-31)N16, K(k-30)N16, ..., KkN16, 420 | K1N17, K2N17, ..., K32N17, 421 | K1N18, K2N18, ..., K32N18, 422 | ... 423 | K(k-31)Nn, K(k-30)Nn, ..., KkNn] 424 | C shape: M x N 425 | normal layout: (M, N) 426 | [M1N1, M1N2, ..., M1Nn, 427 | M2N1, M2N2, ..., M2Nn, 428 | ... 429 | MmN1, MmN2, ..., MmNn] 430 | native layout: (N / 4, M, 4) 431 | [N1M1, N2M1, ..., N4M1, 432 | N5M2, N6M2, ..., N8M2, 433 | ... 434 | N(n-3)Mm, N(n-2)Mm, ..., NnMm] 435 | for RK3588: 436 | int4: 437 | native layout: (N / 8, M, 8) 438 | [N1M1, N2M1, ..., N8M1, 439 | N9M2, N10M2, ..., N16M2, 440 | ... 441 | N(n-7)Mm, N(n-6)Mm, ..., NnMm] 442 | */ 443 | int rknn_matmul_set_io_mem(rknn_matmul_ctx ctx, rknn_tensor_mem* mem, rknn_matmul_tensor_attr* attr); 444 | 445 | /* rknn_matmul_set_core_mask 446 | 447 | set rknn core mask.(only support RK3588 in current) 448 | 449 | RKNN_NPU_CORE_AUTO: auto mode, default value 450 | RKNN_NPU_CORE_0: core 0 mode 451 | RKNN_NPU_CORE_1: core 1 mode 452 | RKNN_NPU_CORE_2: core 2 mode 453 | RKNN_NPU_CORE_0_1: combine core 0/1 mode 454 | RKNN_NPU_CORE_0_1_2: combine core 0/1/2 mode 455 | 456 | input: 457 | rknn_matmul_ctx context the handle of context. 458 | rknn_core_mask core_mask the core mask. 459 | return: 460 | int error code. 461 | */ 462 | int rknn_matmul_set_core_mask(rknn_matmul_ctx context, rknn_core_mask core_mask); 463 | 464 | /* rknn_matmul_set_quant_params 465 | 466 | set quant params.(only support matmul type RKNN_INT8_MM_INT8_TO_INT8, RKNN_INT8_MM_INT8_TO_INT32) 467 | 468 | input: 469 | rknn_matmul_ctx context the handle of context. 470 | rknn_quant_params params quant params. 471 | return: 472 | int error code. 473 | */ 474 | int rknn_matmul_set_quant_params(rknn_matmul_ctx context, rknn_quant_params* params); 475 | 476 | /* rknn_matmul_get_quant_params 477 | 478 | get per channel quant params.(only support matmul type RKNN_INT8_MM_INT8_TO_INT32) 479 | 480 | input: 481 | rknn_matmul_ctx context the handle of context. 482 | rknn_quant_params params quant params. 483 | float scale get scale for user. 484 | return: 485 | int error code. 486 | */ 487 | int rknn_matmul_get_quant_params(rknn_matmul_ctx ctx, rknn_quant_params* params, float* scale); 488 | 489 | /* rknn_matmul_set_dynamic_shape 490 | 491 | set the matmul input/output shape. matmul will run under current input shape after rknn_matmul_set_dynamic_shape, 492 | only support M dynamicly now. 493 | 494 | input: 495 | rknn_matmul_ctx ctx the handle of context. 496 | rknn_matmul_shape* shape the M,K,N shape of matmul currently 497 | return: 498 | int error code. 499 | */ 500 | int rknn_matmul_set_dynamic_shape(rknn_matmul_ctx ctx, rknn_matmul_shape* shape); 501 | 502 | /* rknn_matmul_run 503 | 504 | run the matmul in blocking mode 505 | 506 | params: 507 | rknn_matmul_ctx ctx the handle of context. 508 | return: 509 | int error code. 510 | */ 511 | int rknn_matmul_run(rknn_matmul_ctx ctx); 512 | 513 | /* rknn_matmul_destroy 514 | 515 | destroy the matmul context 516 | 517 | params: 518 | rknn_matmul_ctx ctx the handle of context. 519 | return: 520 | int error code. 521 | */ 522 | int rknn_matmul_destroy(rknn_matmul_ctx ctx); 523 | 524 | /* rknn_B_normal_layout_to_native_layout 525 | 526 | change the B normal layout buffer to native layout buffer 527 | 528 | params: 529 | void* B_input B normal layout buffer. 530 | void* B_output B native layout buffer. 531 | int K K 532 | int N N 533 | rknn_matmul_info info matmul info 534 | return: 535 | int error code. 536 | */ 537 | int rknn_B_normal_layout_to_native_layout(void* B_input, void* B_output, int K, int N, rknn_matmul_info* info); 538 | 539 | #ifdef __cplusplus 540 | } // extern "C" 541 | #endif 542 | 543 | #endif // _RKNN_MATMUL_API_H -------------------------------------------------------------------------------- /runtime/Linux/rknn_server/aarch64/usr/bin/restart_rknn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | killall start_rknn.sh > /dev/null 2>&1 4 | killall rknn_server > /dev/null 2>&1 5 | start_rknn.sh & -------------------------------------------------------------------------------- /runtime/Linux/rknn_server/aarch64/usr/bin/rknn_server: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/runtime/Linux/rknn_server/aarch64/usr/bin/rknn_server -------------------------------------------------------------------------------- /runtime/Linux/rknn_server/aarch64/usr/bin/start_rknn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | while true 4 | do 5 | sleep 1 6 | rknn_server #>/dev/null 2>&1 7 | done 8 | -------------------------------------------------------------------------------- /src/common.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/src/common.cpp -------------------------------------------------------------------------------- /src/rga_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/src/rga_utils.cpp -------------------------------------------------------------------------------- /src/rknn_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/src/rknn_model.cpp -------------------------------------------------------------------------------- /yolo11s.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/yolo11s.rknn --------------------------------------------------------------------------------