├── .gitignore
├── CMakeLists.txt
├── CMakePresets.json
├── LICENSE
├── README.md
├── bus.jpg
├── img
    ├── result.jpg
    └── speed.jpg
├── include
    ├── common.h
    ├── rga_utils.h
    ├── rknn.h
    └── rknn_model.h
├── install_rknpu.sh
├── main.cpp
├── report.md
├── rknn.cpp
├── runtime
    └── Linux
    │   ├── librknn_api
    │       └── include
    │       │   ├── rknn_api.h
    │       │   ├── rknn_custom_op.h
    │       │   └── rknn_matmul_api.h
    │   └── rknn_server
    │       └── aarch64
    │           └── usr
    │               └── bin
    │                   ├── restart_rknn.sh
    │                   ├── rknn_server
    │                   └── start_rknn.sh
├── src
    ├── common.cpp
    ├── rga_utils.cpp
    └── rknn_model.cpp
└── yolo11s.rknn


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ﻿# CMakeList.txt: rknn 的 CMake 项目，在此处包括源代码并定义
 2 | # 项目特定的逻辑。
 3 | #
 4 | cmake_minimum_required (VERSION 3.8)
 5 | 
 6 | # 如果支持，请为 MSVC 编译器启用热重载。
 7 | if (POLICY CMP0141)
 8 |   cmake_policy(SET CMP0141 NEW)
 9 |   set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "$<IF:$<AND:$<C_COMPILER_ID:MSVC>,$<CXX_COMPILER_ID:MSVC>>,$<$<CONFIG:Debug,RelWithDebInfo>:EditAndContinue>,$<$<CONFIG:Debug,RelWithDebInfo>:ProgramDatabase>>")
10 | endif()
11 | 
12 | project ("rknn")
13 | 
14 | find_package(OpenCV REQUIRED)
15 | find_package(PkgConfig REQUIRED)
16 | find_library(RGA_LIBRARY NAMES rga)
17 | find_package(OpenMP REQUIRED)
18 | 
19 | find_package(TBB REQUIRED) # 添加对 libtbb 库的依赖
20 | # 将源代码添加到此项目的可执行文件。
21 | 
22 | file(GLOB SOURCES "src/*.cpp" "rknn.cpp")
23 | 
24 | add_executable (rknn ${SOURCES})
25 | 
26 | if (CMAKE_VERSION VERSION_GREATER 3.12)
27 |   set_property(TARGET rknn PROPERTY CXX_STANDARD 20)
28 | endif()
29 | 
30 | # 添加 /usr/include 到包含路径
31 | include_directories(
32 |     /usr/include
33 |     ${OpenCV_INCLUDE_DIRS}
34 |     ${CMAKE_SOURCE_DIR}/include
35 |     /usr/include/rga
36 | )
37 | 
38 | # 设置编译选项，启用 OpenMP
39 | if(OpenMP_CXX_FOUND)
40 |     message(STATUS "OpenMP found, enabling OpenMP support.")
41 |     target_link_libraries(rknn OpenMP::OpenMP_CXX)
42 | else()
43 |     message(WARNING "OpenMP not found, compiling without OpenMP.")
44 | endif()
45 | 
46 | # 链接 /usr/lib 中的库
47 | # 查找库文件
48 | find_library(RKNN_API_LIBRARY NAMES rknnrt PATHS /usr/lib)
49 | 
50 | # 检查是否找到库文件
51 | if(RKNN_API_LIBRARY)
52 |     target_link_libraries(rknn ${RKNN_API_LIBRARY})
53 | else()
54 |     message(FATAL_ERROR "Could not find librknnrt.so")
55 | endif()
56 | 
57 | # TODO: 如有需要，请添加测试并安装目标。
58 | # 链接库
59 | target_link_libraries(rknn
60 |     ${OpenCV_LIBS}
61 |     TBB::tbb
62 |     ${RGA_LIBRARY}
63 | )
64 | # sudo apt-get install libtbb-dev
65 | 
66 | # 设置 C++ 编译选项以启用 O3 优化
67 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")


--------------------------------------------------------------------------------
/CMakePresets.json:
--------------------------------------------------------------------------------
  1 | ﻿{
  2 |     "version": 3,
  3 |     "configurePresets": [
  4 |         {
  5 |             "name": "windows-base",
  6 |             "hidden": true,
  7 |             "generator": "Ninja",
  8 |             "binaryDir": "${sourceDir}/out/build/${presetName}",
  9 |             "installDir": "${sourceDir}/out/install/${presetName}",
 10 |             "cacheVariables": {
 11 |                 "CMAKE_C_COMPILER": "cl.exe",
 12 |                 "CMAKE_CXX_COMPILER": "cl.exe"
 13 |             },
 14 |             "condition": {
 15 |                 "type": "equals",
 16 |                 "lhs": "${hostSystemName}",
 17 |                 "rhs": "Windows"
 18 |             }
 19 |         },
 20 |         {
 21 |             "name": "x64-debug",
 22 |             "displayName": "x64 Debug",
 23 |             "inherits": "windows-base",
 24 |             "architecture": {
 25 |                 "value": "x64",
 26 |                 "strategy": "external"
 27 |             },
 28 |             "cacheVariables": {
 29 |                 "CMAKE_BUILD_TYPE": "Debug"
 30 |             }
 31 |         },
 32 |         {
 33 |             "name": "x64-release",
 34 |             "displayName": "x64 Release",
 35 |             "inherits": "x64-debug",
 36 |             "cacheVariables": {
 37 |                 "CMAKE_BUILD_TYPE": "Release"
 38 |             }
 39 |         },
 40 |         {
 41 |             "name": "x86-debug",
 42 |             "displayName": "x86 Debug",
 43 |             "inherits": "windows-base",
 44 |             "architecture": {
 45 |                 "value": "x86",
 46 |                 "strategy": "external"
 47 |             },
 48 |             "cacheVariables": {
 49 |                 "CMAKE_BUILD_TYPE": "Debug"
 50 |             }
 51 |         },
 52 |         {
 53 |             "name": "x86-release",
 54 |             "displayName": "x86 Release",
 55 |             "inherits": "x86-debug",
 56 |             "cacheVariables": {
 57 |                 "CMAKE_BUILD_TYPE": "Release"
 58 |             }
 59 |         },
 60 |         {
 61 |             "name": "linux-debug",
 62 |             "displayName": "Linux Debug",
 63 |             "generator": "Ninja",
 64 |             "binaryDir": "${sourceDir}/out/build/${presetName}",
 65 |             "installDir": "${sourceDir}/out/install/${presetName}",
 66 |             "cacheVariables": {
 67 |                 "CMAKE_BUILD_TYPE": "Debug"
 68 |             },
 69 |             "condition": {
 70 |                 "type": "equals",
 71 |                 "lhs": "${hostSystemName}",
 72 |                 "rhs": "Linux"
 73 |             },
 74 |             "vendor": {
 75 |                 "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": {
 76 |                     "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}"
 77 |                 }
 78 |             }
 79 |         },
 80 |         {
 81 |             "name": "orangepi",
 82 |             "displayName": "orangepi",
 83 |             "generator": "Ninja",
 84 |             "binaryDir": "${sourceDir}/out/build/${presetName}",
 85 |             "installDir": "${sourceDir}/out/install/${presetName}",
 86 |             "cacheVariables": {
 87 |                 "CMAKE_BUILD_TYPE": "Debug",
 88 |                 "CMAKE_CXX_COMPILER": "/usr/bin/g++",
 89 |                 "CMAKE_C_COMPILER": "/usr/bin/gcc",
 90 |                 //"CMAKE_PREFIX_PATH": "/usr/local",
 91 |                 "CMAKE_INCLUDE_PATH": "/usr/include",
 92 |                 "CMAKE_LIBRARY_PATH": "/usr/lib"
 93 |             },
 94 |             "condition": {
 95 |                 "type": "equals",
 96 |                 "lhs": "${hostSystemName}",
 97 |                 "rhs": "Linux"
 98 |             },
 99 |             "vendor": {
100 |                 "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": {
101 |                     "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}"
102 |                 }
103 |             }
104 |         },
105 |         {
106 |             "name": "macos-debug",
107 |             "displayName": "macOS Debug",
108 |             "generator": "Ninja",
109 |             "binaryDir": "${sourceDir}/out/build/${presetName}",
110 |             "installDir": "${sourceDir}/out/install/${presetName}",
111 |             "cacheVariables": {
112 |                 "CMAKE_BUILD_TYPE": "Debug"
113 |             },
114 |             "condition": {
115 |                 "type": "equals",
116 |                 "lhs": "${hostSystemName}",
117 |                 "rhs": "Darwin"
118 |             },
119 |             "vendor": {
120 |                 "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": {
121 |                     "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}"
122 |                 }
123 |             }
124 |         }
125 |     ]
126 | }
127 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rknn-cpp-yolo
  2 | This project implements YOLOv11 inference on the RK3588 platform using the RKNN framework. With deep optimization of the official code and RGA hardware acceleration for image preprocessing, it achieves a stable 25 FPS for YOLOv11s without overclocking and core binding, showcasing efficient real-time object detection for embedded applications.
  3 | 
  4 | 本项目基于RKNN框架，在RK3588平台上实现了YOLOv11推理。通过对官方代码的深度优化和RGA硬件加速预处理，YOLOv11s在未超频和绑定大核的情况下，稳定达到25帧/秒，为嵌入式实时目标检测提供了高效解决方案。
  5 | ****
  6 | 
  7 | # YOLOv11 on RK3588 with RKNN
  8 | 
  9 | ## Features
 10 | - **YOLOv11 Inference**: Optimized implementation for RK3588.
 11 | - **RGA Preprocessing**: Utilizes hardware acceleration for image processing.
 12 | - **CMake Build System**: Easy to configure and build.
 13 | - **High Performance**: Achieves 25 FPS without overclocking or core binding.
 14 | - **Zero-Copy API**: Reduces inference overhead for better efficiency.
 15 | - **RK3588 Optimization**: Supports concurrent inference across three NPU cores (requires custom thread pool implementation).
 16 | 
 17 | ## Report: Inference Results and Speed
 18 | 
 19 |   ![Result](https://github.com/yuunnn-w/rknn-cpp-yolo/blob/main/img/result.jpg)  
 20 | 
 21 |   ![Speed](https://github.com/yuunnn-w/rknn-cpp-yolo/blob/main/img/speed.jpg)  
 22 | 
 23 | ## Prerequisites
 24 | - RK3588 development board
 25 | - RKNPU Driver (version >= 0.9.6)
 26 | - RKNN SDK
 27 | - CMake (version 3.10 or higher)
 28 | - OpenCV (for image handling, optional)
 29 | 
 30 | ## Build Instructions
 31 | 
 32 | 1. **Clone the repository:**
 33 |    ```bash
 34 |    git clone https://github.com/yuunnn-w/rknn-cpp-yolo.git
 35 |    cd rknn-cpp-yolo
 36 |    ```
 37 | 
 38 | 2. **Install dependencies:**
 39 |    ```bash
 40 |    sudo apt-get update
 41 |    sudo apt-get install -y build-essential gcc g++ gdb cmake ninja-build git libopencv-dev zlib1g-dev librga-dev ninja-build libomp-dev
 42 |    ```
 43 | 
 44 | 3. **Install RKNN SDK:**
 45 |    ```bash
 46 |    sudo bash install_rknpu.sh
 47 |    ```
 48 | 
 49 | 4. **Create a build directory:**
 50 |    ```bash
 51 |    mkdir build
 52 |    cd build
 53 |    ```
 54 | 
 55 | 5. **Configure the project with CMake:**
 56 |    ```bash
 57 |    cmake ..
 58 |    ```
 59 | 
 60 | 6. **Build the project:**
 61 |    ```bash
 62 |    make
 63 |    ```
 64 | 
 65 | 7. **Run the inference:**
 66 |    ```bash
 67 |    ./yolov11_rk3588
 68 |    ```
 69 | 
 70 | ## Usage
 71 | After building the project, you can run the inference by executing the generated binary. Ensure that the RKNN model and test images are correctly placed in the specified paths.
 72 | 
 73 | ## Optimization
 74 | The project includes several optimizations to achieve high performance on the RK3588 platform:
 75 | - Efficient use of RGA for image preprocessing.
 76 | - Memory and computation optimizations in the inference pipeline.
 77 | - Zero-Copy API to minimize memory overhead.
 78 | - Support for concurrent NPU core utilization (requires thread pool implementation).
 79 | 
 80 | ## Attention
 81 | 
 82 | Please note that this project only provides an example of image-based inference in the `rknn.cpp` file. If you need to perform real-time inference in more complex application scenarios, you will need to implement it yourself.  
 83 | 
 84 | Additionally, this project is purely an experimental demo and is not responsible for any products or issues. The final interpretation right belongs to **yuunnn_w**.  
 85 | 
 86 | ## License
 87 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
 88 | 
 89 | ## Acknowledgments
 90 | - Thanks to the RKNN team for their framework and support.
 91 | - Inspired by the official YOLOv11 implementation.
 92 | 
 93 | For any questions or contributions, feel free to open an issue or submit a pull request.
 94 | 
 95 | ## Contact Me
 96 | 
 97 | If you have any questions, suggestions, or would like to contribute to this project, feel free to reach out! You can contact me through the following channels:
 98 | 
 99 | - **Email**: [jiaxinsugar@gmail.com](mailto:jiaxinsugar@gmail.com)
100 | - **GitHub**: [yuunnn-w](https://github.com/yuunnn-w)
101 | 
102 | I’m always open to discussions, collaborations, and feedback. Let’s make this project even better together! 🚀  
103 | 


--------------------------------------------------------------------------------
/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/bus.jpg


--------------------------------------------------------------------------------
/img/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/img/result.jpg


--------------------------------------------------------------------------------
/img/speed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/img/speed.jpg


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/include/common.h


--------------------------------------------------------------------------------
/include/rga_utils.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/include/rga_utils.h


--------------------------------------------------------------------------------
/include/rknn.h:
--------------------------------------------------------------------------------
  1 | ﻿
  2 | /*
  3 | #include <iostream>
  4 | #include <vector>
  5 | #include <cstring>
  6 | #include <chrono> // 用于计时
  7 | #include <iomanip> // 用于 setprecision
  8 | #include "rknn_model.h"
  9 | 
 10 | int main() {
 11 |     // 初始化模型
 12 |     std::string model_path = "/root/.vs/rknn/yolov9c.rknn";
 13 |     rknn_model model(model_path);
 14 | 
 15 |     // 查询模型信息
 16 |     model.query_model_info();
 17 | 
 18 |     // 打印量化和反量化信息
 19 |     model.print_quantization_info();
 20 | 
 21 |     // 获取输入和输出属性
 22 |     const std::vector<rknn_tensor_attr>& input_attrs = model.get_input_attrs();
 23 |     const std::vector<rknn_tensor_attr>& output_attrs = model.get_output_attrs();
 24 | 
 25 | 
 26 | 
 27 |     // 打印输入张量形状
 28 |     std::cout << "Input Tensor Shapes:" << std::endl;
 29 |     for (const auto& attr : input_attrs) {
 30 |         std::cout << "  Name: " << attr.name << ", Shape: ";
 31 |         for (uint32_t j = 0; j < attr.n_dims; ++j) {
 32 |             std::cout << attr.dims[j] << " ";
 33 |         }
 34 |         std::cout << std::endl;
 35 |     }
 36 |     // 设置模型输入参数
 37 |     std::vector<rknn_input> inputs(input_attrs.size());
 38 |     for (size_t i = 0; i < input_attrs.size(); ++i) {
 39 |         inputs[i].index = i;
 40 |         inputs[i].buf = new float[input_attrs[i].n_elems]; // 分配内存
 41 |         std::memset(inputs[i].buf, 0, input_attrs[i].n_elems * sizeof(float)); // 初始化为0
 42 |         inputs[i].size = input_attrs[i].n_elems; // * sizeof(float)
 43 |         inputs[i].pass_through = 0; // 用于指定输入数据是否直接传递给模型的输入节点
 44 |         inputs[i].type = RKNN_TENSOR_INT8;//input_attrs[i].type; // 输入数据类型 RKNN_TENSOR_INT8
 45 |         inputs[i].fmt = RKNN_TENSOR_NHWC;//input_attrs[i].fmt; // 输入数据格式
 46 |         // fmt: rknn_tensor_format类型，常见的有RKNN_TENSOR_NCHW、RKNN_TENSOR_NHWC、RKNN_TENSOR_NCHW_VEC、RKNN_TENSOR_UNDEFINED
 47 |         std::cout << "inputs[i].size: " << input_attrs[i].n_elems << std::endl;
 48 |         // 打印设置的输入参数
 49 |         std::cout << "Setting input parameter for tensor " << i << ":" << std::endl;
 50 |         std::cout << "  Index: " << inputs[i].index << std::endl;
 51 |         std::cout << "  Buffer size: " << inputs[i].size << " bytes" << std::endl;
 52 |         std::cout << "  Pass through: " << static_cast<int>(inputs[i].pass_through) << std::endl;
 53 |         std::cout << "  Data type: " << inputs[i].type << std::endl;
 54 |         std::cout << "  Data format: " << inputs[i].fmt << std::endl; //RKNN_TENSOR_NHWC
 55 |     }
 56 |     model.set_input(inputs);
 57 | 
 58 |     // 运行模型推理100次并计时
 59 |     const int num_runs = 20;
 60 |     std::chrono::duration<double, std::milli> total_time(0);
 61 | 
 62 |     for (int i = 0; i < num_runs; ++i) {
 63 |         auto start_time = std::chrono::high_resolution_clock::now();
 64 |         model.run();
 65 |         auto end_time = std::chrono::high_resolution_clock::now();
 66 |         total_time += end_time - start_time;
 67 |     }
 68 | 
 69 |     // 计算平均推理时间
 70 |     double average_time = total_time.count() / num_runs;
 71 |     std::cout << "Average inference time over " << num_runs << " runs: " << std::fixed << std::setprecision(10) << average_time << " ms" << std::endl;
 72 | 
 73 |     // 查询模型推理的逐层耗时
 74 |     rknn_perf_detail perf_detail;
 75 |     int ret = rknn_query(model.get_context(), RKNN_QUERY_PERF_DETAIL, &perf_detail, sizeof(perf_detail));
 76 |     if (ret == RKNN_SUCC) {
 77 |         std::cout << "Model inference layer-wise performance details (in microseconds, 2 decimal places):" << std::endl;
 78 |         std::cout << perf_detail.perf_data << std::endl;
 79 |     }
 80 |     else {
 81 |         std::cerr << "Failed to query performance details." << std::endl;
 82 |     }
 83 | 
 84 |     // 查询模型推理的总耗时
 85 |     rknn_perf_run perf_run;
 86 |     ret = rknn_query(model.get_context(), RKNN_QUERY_PERF_RUN, &perf_run, sizeof(perf_run));
 87 |     if (ret == RKNN_SUCC) {
 88 |         std::cout << "Total inference time (in milliseconds, 4 decimal places): " << std::fixed << std::setprecision(4) << static_cast<double>(perf_run.run_duration) / 1000.0 << " ms" << std::endl;
 89 |     }
 90 |     else {
 91 |         std::cerr << "Failed to query total inference time." << std::endl;
 92 |     }
 93 |     // 获取模型输出
 94 |     std::vector<rknn_output> outputs(output_attrs.size());
 95 |     for (size_t i = 0; i < output_attrs.size(); ++i) {
 96 |         outputs[i].index = i;
 97 |         outputs[i].is_prealloc = 0;
 98 |         outputs[i].want_float = 1;
 99 |     }
100 |     model.get_output(outputs);
101 | 
102 |     // 打印输出张量形状
103 |     std::cout << "Output Tensor Shapes:" << std::endl;
104 |     for (const auto& attr : output_attrs) {
105 |         std::cout << "  Name: " << attr.name << ", Shape: ";
106 |         for (uint32_t j = 0; j < attr.n_dims; ++j) {
107 |             std::cout << attr.dims[j] << " ";
108 |         }
109 |         std::cout << std::endl;
110 |     }
111 | 
112 |     // 释放输出资源
113 |     model.release_output(outputs);
114 | 
115 |     // 释放输入数据内存
116 |     for (auto& input : inputs) {
117 |         delete[] static_cast<float*>(input.buf);
118 |     }
119 |     return 0;
120 | }
121 | */
122 | 
123 | #include <iostream>
124 | #include <vector>
125 | #include <cstring>
126 | #include <chrono> // 用于计时
127 | #include <iomanip> // 用于 setprecision
128 | #include <opencv2/opencv.hpp> // 包含 OpenCV 头文件
129 | #include "rknn_model.h"
130 | 
131 | int main() {
132 |     // 初始化模型
133 |     std::string model_path = "/root/.vs/rknn/yolov11s.rknn"; // yolov9c.rknn
134 |     rknn_model model(model_path);
135 | 
136 |     // 查询模型信息
137 |     int ctx_index = 0; // 假设使用第一个上下文
138 |     // 打印量化和反量化信息
139 |     // model.print_quantization_info();
140 | 
141 |     // 生成填充为0的640x640x3的图像
142 |     cv::Mat input_image = cv::Mat::ones(640, 640, CV_8UC3);
143 |     //printf("Start inference...");
144 |     // 预热推理5次
145 |     for (int i = 0; i < 5; ++i) {
146 |         model.run_inference(input_image, ctx_index);
147 |     }
148 | 
149 |     // 进行10次推理并统计时间
150 |     std::vector<double> inference_times;
151 |     for (int i = 0; i < 1; ++i) {
152 |         auto start = std::chrono::high_resolution_clock::now();
153 |         model.run_inference(input_image, ctx_index);
154 |         auto end = std::chrono::high_resolution_clock::now();
155 |         std::chrono::duration<double, std::milli> elapsed = end - start;
156 |         inference_times.push_back(elapsed.count());
157 |     }
158 | 
159 |     // 计算平均推理时间
160 |     double total_time = 0.0;
161 |     for (double time : inference_times) {
162 |         total_time += time;
163 |     }
164 |     double average_time = total_time / inference_times.size();
165 | 
166 |     // 输出推理时间
167 |     std::cout << "Function call time: " << std::fixed << std::setprecision(4) << average_time << " ms" << std::endl;
168 | 
169 | 
170 | 
171 |     // 打印输出向量的形状和大小  
172 |     //std::cout << "Output vector shape: [" << output.size() << "]" << std::endl;
173 | 
174 |     // 打印平均推理时间，精确到毫秒，保留四位小数
175 |     //std::cout << "Average inference time: " << std::fixed << std::setprecision(4) << average_time << " ms" << std::endl;
176 | 
177 | 
178 | 
179 | 
180 | 
181 |     // 打印输出向量的形状和大小
182 |     //std::cout << "Output vector shape: [" << output.size() << "]" << std::endl;
183 | 
184 |     // 重塑输出数据为 [8400, 84]
185 |     //std::vector<std::vector<float>> reshaped_output(8400, std::vector<float>(84));
186 |     //std::memcpy(reshaped_output.data(), output.data(), 8400 * 84 * sizeof(float));
187 | 
188 |     // 查询模型推理的逐层耗时
189 |     /*
190 |     rknn_perf_detail perf_detail;
191 |     int ret = rknn_query(model.get_context(ctx_index), RKNN_QUERY_PERF_DETAIL, &perf_detail, sizeof(perf_detail));
192 |     if (ret == RKNN_SUCC) {
193 |         std::cout << "Model inference layer-wise performance details (in microseconds, 2 decimal places):" << std::endl;
194 |         std::cout << perf_detail.perf_data << std::endl;
195 |     }
196 |     else {
197 |         std::cerr << "Failed to query performance details." << std::endl;
198 |     }
199 |     */
200 |     // 查询模型推理的总耗时
201 | 
202 | 
203 |     rknn_perf_run perf_run;
204 |     int ret = rknn_query(model.get_context(ctx_index), RKNN_QUERY_PERF_RUN, &perf_run, sizeof(perf_run));
205 |     if (ret == RKNN_SUCC) {
206 |         std::cout << "Real inference time: " << std::fixed << std::setprecision(4) << static_cast<double>(perf_run.run_duration) / 1000.0 << " ms" << std::endl;
207 |     }
208 |     else {
209 |         std::cerr << "Failed to query total inference time." << std::endl;
210 |     }
211 | 
212 | 
213 |     return 0;
214 | }


--------------------------------------------------------------------------------
/include/rknn_model.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/include/rknn_model.h


--------------------------------------------------------------------------------
/install_rknpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 定义源目录和目标目录
 4 | SOURCE_DIR_BIN="./runtime/Linux/rknn_server/aarch64/usr/bin"
 5 | DEST_DIR_BIN="/usr/bin"
 6 | 
 7 | SOURCE_DIR_INCLUDE="./runtime/Linux/librknn_api/include"
 8 | DEST_DIR_INCLUDE="/usr/include"
 9 | 
10 | SOURCE_DIR_LIB="./runtime/Linux/librknn_api/aarch64"
11 | DEST_DIR_LIB="/usr/lib"
12 | 
13 | # 检查源目录是否存在
14 | check_source_dir() {
15 |   if [ ! -d "$1" ]; then
16 |     echo "源目录 $1 不存在。"
17 |     exit 1
18 |   fi
19 | }
20 | 
21 | # 检查目标目录是否存在
22 | check_dest_dir() {
23 |   if [ ! -d "$1" ]; then
24 |     echo "目标目录 $1 不存在。"
25 |     exit 1
26 |   fi
27 | }
28 | 
29 | # 复制文件
30 | copy_files() {
31 |   local source_dir=$1
32 |   local dest_dir=$2
33 |   cp -r "$source_dir"/* "$dest_dir"
34 |   if [ $? -eq 0 ]; then
35 |     echo "文件从 $source_dir 复制到 $dest_dir 成功。"
36 |   else
37 |     echo "文件从 $source_dir 复制到 $dest_dir 失败。"
38 |     exit 1
39 |   fi
40 | }
41 | 
42 | # 检查并复制 rknn_server 文件
43 | check_source_dir "$SOURCE_DIR_BIN"
44 | check_dest_dir "$DEST_DIR_BIN"
45 | copy_files "$SOURCE_DIR_BIN" "$DEST_DIR_BIN"
46 | 
47 | # 检查并复制 include 文件
48 | check_source_dir "$SOURCE_DIR_INCLUDE"
49 | check_dest_dir "$DEST_DIR_INCLUDE"
50 | copy_files "$SOURCE_DIR_INCLUDE" "$DEST_DIR_INCLUDE"
51 | 
52 | # 检查并复制 lib 文件
53 | check_source_dir "$SOURCE_DIR_LIB"
54 | check_dest_dir "$DEST_DIR_LIB"
55 | copy_files "$SOURCE_DIR_LIB" "$DEST_DIR_LIB"
56 | 
57 | echo "所有文件复制成功。"


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/main.cpp


--------------------------------------------------------------------------------
/report.md:
--------------------------------------------------------------------------------
  1 | ### Operator Time Consuming Ranking Table
  2 | 
  3 | | OpType             | CallNumber | CPUTime(us) | GPUTime(us) | NPUTime(us) | TotalTime(us) | TimeRatio(%) |
  4 | |--------------------|------------|-------------|-------------|-------------|---------------|--------------|
  5 | | ConvExSwish         | 138        | 0           | 0           | 80201       | 80201         | 76.94%       |
  6 | | Concat              | 38         | 0           | 0           | 5868        | 5868          | 5.63%        |
  7 | | Split               | 15         | 0           | 0           | 5270        | 5270          | 5.06%        |
  8 | | AveragePool         | 5          | 0           | 0           | 4203        | 4203          | 4.03%        |
  9 | | exSoftmax13         | 1          | 0           | 0           | 2169        | 2169          | 2.08%        |
 10 | | MaxPool             | 8          | 0           | 0           | 2159        | 2159          | 2.07%        |
 11 | | Add                 | 18         | 0           | 0           | 1237        | 1237          | 1.19%        |
 12 | | Conv                | 7          | 0           | 0           | 771         | 771           | 0.74%        |
 13 | | Resize              | 2          | 0           | 0           | 663         | 663           | 0.64%        |
 14 | | Reshape             | 5          | 568         | 0           | 6           | 574           | 0.55%        |
 15 | | Transpose           | 2          | 0           | 0           | 486         | 486           | 0.47%        |
 16 | | Sigmoid             | 1          | 0           | 0           | 354         | 354           | 0.34%        |
 17 | | Mul                 | 2          | 0           | 0           | 148         | 148           | 0.14%        |
 18 | | Sub                 | 2          | 0           | 0           | 99          | 99            | 0.09%        |
 19 | | OutputOperator      | 1          | 30          | 0           | 0           | 30            | 0.03%        |
 20 | | InputOperator       | 1          | 4           | 0           | 0           | 4             | 0.00%        |
 21 | 
 22 | ### Total Inference Time
 23 | - **Total Inference Time (in milliseconds, 4 decimal places):** 113.4130 ms
 24 | 
 25 | ### Summary and Analysis
 26 | 
 27 | 1. **Dominant Operators:**
 28 |    - The operator `ConvExSwish` is the most time-consuming, accounting for **76.94%** of the total inference time. This indicates that the model spends the majority of its time in convolutional operations followed by a swish activation function.
 29 |    - The `Concat` and `Split` operators also consume a significant portion of the inference time, with **5.63%** and **5.06%** respectively.
 30 | 
 31 | 2. **Less Time-Consuming Operators:**
 32 |    - Operators like `Reshape`, `Transpose`, and `Sigmoid` consume relatively less time, each contributing less than **1%** of the total inference time.
 33 |    - The `OutputOperator` and `InputOperator` are the least time-consuming, with **0.03%** and **0.00%** respectively.
 34 | 
 35 | 3. **CPU vs. NPU Utilization:**
 36 |    - The majority of the inference time is spent on the NPU, with **103,634 microseconds** (**103.634 milliseconds**) spent on NPU operations.
 37 |    - The CPU time is minimal, with only **602 microseconds** (**0.602 milliseconds**) spent on CPU operations.
 38 | 
 39 | 4. **Potential Optimization Areas:**
 40 |    - Given that `ConvExSwish` is the most time-consuming operator, optimizing the convolutional layers or exploring alternative activation functions could potentially reduce the inference time.
 41 |    - The `Concat` and `Split` operators, while not as dominant as `ConvExSwish`, still contribute a significant portion of the inference time. Optimizing these operations could also lead to performance improvements.
 42 | 
 43 | 5. **Overall Performance:**
 44 |    - The total inference time of **113.4130 milliseconds** indicates that the model is performing inference within a reasonable time frame for many real-time applications. However, further optimizations could reduce this time, making the model even more suitable for latency-sensitive applications.
 45 | 
 46 | ### Conclusion
 47 | The inference time is primarily dominated by convolutional operations (`ConvExSwish`), followed by concatenation (`Concat`) and splitting (`Split`) operations. The model efficiently utilizes the NPU, with minimal CPU involvement. To further optimize performance, focusing on reducing the time spent in convolutional layers and concatenation/splitting operations could yield significant improvements.
 48 | 
 49 | 
 50 | ### 操作耗时排名表
 51 | 
 52 | | 操作类型             | 调用次数 | CPU时间(us) | GPU时间(us) | NPU时间(us) | 总时间(us) | 时间比例(%) |
 53 | |--------------------|------------|-------------|-------------|-------------|---------------|--------------|
 54 | | ConvExSwish         | 138        | 0           | 0           | 80201       | 80201         | 76.94%      |
 55 | | Concat              | 38         | 0           | 0           | 5868        | 5868          | 5.63%       |
 56 | | Split               | 15         | 0           | 0           | 5270        | 5270          | 5.06%       |
 57 | | AveragePool         | 5          | 0           | 0           | 4203        | 4203          | 4.03%       |
 58 | | exSoftmax13         | 1          | 0           | 0           | 2169        | 2169          | 2.08%       |
 59 | | MaxPool             | 8          | 0           | 0           | 2159        | 2159          | 2.07%       |
 60 | | Add                 | 18         | 0           | 0           | 1237        | 1237          | 1.19%       |
 61 | | Conv                | 7          | 0           | 0           | 771         | 771           | 0.74%       |
 62 | | Resize              | 2          | 0           | 0           | 663         | 663           | 0.64%       |
 63 | | Reshape             | 5          | 568         | 0           | 6           | 574           | 0.55%       |
 64 | | Transpose           | 2          | 0           | 0           | 486         | 486           | 0.47%       |
 65 | | Sigmoid             | 1          | 0           | 0           | 354         | 354           | 0.34%       |
 66 | | Mul                 | 2          | 0           | 0           | 148         | 148           | 0.14%       |
 67 | | Sub                 | 2          | 0           | 0           | 99          | 99            | 0.09%       |
 68 | | OutputOperator      | 1          | 30          | 0           | 0           | 30            | 0.03%       |
 69 | | InputOperator       | 1          | 4           | 0           | 0           | 4             | 0.00%        |
 70 | 
 71 | ### 总推理时间
 72 | - **总推理时间（以毫秒为单位，保留四位小数）：** 113.4130 ms
 73 | 
 74 | ### 总结与分析
 75 | 
 76 | 1. **主要操作：**
 77 |    - 操作 `ConvExSwish` 是最耗时的，占总推理时间的 **76.94%**。这表明模型大部分时间都花在卷积操作后跟一个 swish 激活函数上。
 78 |    - `Concat` 和 `Split` 操作也消耗了相当一部分推理时间，分别为 **5.63%** 和 **5.06%**。
 79 | 
 80 | 2. **耗时较少的操作：**
 81 |    - 像 `Reshape`、`Transpose` 和 `Sigmoid` 这样的操作消耗的时间相对较少，每个对总推理时间的贡献都不到 **1%**。
 82 |    - `OutputOperator` 和 `InputOperator` 是最不耗时的，分别为 **0.03%** 和 **0.00%**。
 83 | 
 84 | 3. **CPU与NPU利用率：**
 85 |    - 大部分推理时间都花在了NPU上，**103,634 微秒**（**103.634 毫秒**）用于NPU操作。
 86 |    - CPU时间非常少，只有 **602 微秒**（**0.602 毫秒**）用于CPU操作。
 87 | 
 88 | 4. **潜在优化领域：**
 89 |    - 鉴于 `ConvExSwish` 是最耗时的操作，优化卷积层或探索替代激活函数可能会减少推理时间。
 90 |    - `Concat` 和 `Split` 操作虽然不如 `ConvExSwish` 占主导地位，但仍占相当一部分推理时间。优化这些操作也可能导致性能提升。
 91 | 
 92 | 5. **整体性能：**
 93 |    - 总推理时间为 **113.4130 毫秒**，表明模型在许多实时应用中进行推理的时间框架是合理的。然而，进一步的优化可以减少这个时间，使模型更适合对延迟敏感的应用。
 94 | 
 95 | ### 结论
 96 | 推理时间主要由卷积操作（`ConvExSwish`）主导，其次是连接（`Concat`）和分割（`Split`）操作。模型有效地利用了NPU，CPU参与度最小。为了进一步优化性能，专注于减少卷积层和连接/分割操作所花费的时间可能会带来显著的改进。
 97 | 
 98 | ### Model size: 27.7908 MB
 99 | ### SDK API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
100 | ### Driver Version: 0.9.8
101 | 
102 | Total Operator Elapsed Per Frame Time(us): 131201
103 | Total Memory Read/Write Per Frame Size(KB): 235155.92
104 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
105 | ---------------------------------------------------------------------------------------------------
106 |                                  Operator Time Consuming Ranking Table
107 | ---------------------------------------------------------------------------------------------------
108 | OpType             CallNumber   CPUTime(us)  GPUTime(us)  NPUTime(us)  TotalTime(us)  TimeRatio(%)
109 | ---------------------------------------------------------------------------------------------------
110 | ConvExSwish        138          0            0            95657        95657          72.91%
111 | Concat             38           0            0            10339        10339          7.88%
112 | Split              15           0            0            6891         6891           5.25%
113 | AveragePool        5            0            0            4817         4817           3.67%
114 | Add                18           0            0            3539         3539           2.70%
115 | MaxPool            8            0            0            3101         3101           2.36%
116 | exSoftmax13        1            0            0            2344         2344           1.79%
117 | Conv               7            0            0            1168         1168           0.89%
118 | Reshape            5            1065         0            7            1072           0.82%
119 | Resize             2            0            0            845          845            0.64%
120 | Transpose          2            0            0            626          626            0.48%
121 | Sigmoid            1            0            0            398          398            0.30%
122 | Sub                2            0            0            202          202            0.15%
123 | Mul                2            0            0            150          150            0.11%
124 | OutputOperator     1            38           0            0            38             0.03%
125 | InputOperator      1            14           0            0            14             0.01%
126 | ---------------------------------------------------------------------------------------------------
127 | Total                           1117         0            130084       131201
128 | ---------------------------------------------------------------------------------------------------
129 | 
130 | Total inference time (in milliseconds, 4 decimal places): 151.6830 ms


--------------------------------------------------------------------------------
/rknn.cpp:
--------------------------------------------------------------------------------
  1 | ﻿#include <iostream>
  2 | #include <vector>
  3 | #include <cstring>
  4 | #include <chrono> // 用于计时
  5 | #include <iomanip> // 用于 setprecision
  6 | #include <opencv2/opencv.hpp> // 包含 OpenCV 头文件
  7 | #include "rknn_model.h"
  8 | #include "rga_utils.h"
  9 | 
 10 | int main() {
 11 |     // 初始化模型
 12 |     std::string model_path = "yolo11s.rknn"; // yolov9c.rknn
 13 |     rknn_model model(model_path);
 14 | 
 15 |     int ctx_index = 0; // 使用第一个上下文
 16 | 
 17 |     std::string image_path = "bus.jpg";
 18 |     cv::Mat image = cv::imread(image_path);
 19 |     if (image.empty()) {
 20 |         std::cerr << "Failed to read the image: " << image_path << std::endl;
 21 |         return -1;
 22 |     }
 23 |     cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
 24 | 
 25 |     // 打印图像的尺寸和数据类型
 26 |     std::cout << "Image size: " << image.size() << std::endl;
 27 |     std::cout << "Image type: " << image.type() << std::endl;
 28 | 
 29 |     // 打印图像的数据类型名称
 30 |     std::string type_name;
 31 |     switch (image.type()) {
 32 |     case CV_8U:   type_name = "CV_8U";   break;
 33 |     case CV_8S:   type_name = "CV_8S";   break;
 34 |     case CV_16U:  type_name = "CV_16U";  break;
 35 |     case CV_16S:  type_name = "CV_16S";  break;
 36 |     case CV_32S:  type_name = "CV_32S";  break;
 37 |     case CV_32F:  type_name = "CV_32F";  break;
 38 |     case CV_64F:  type_name = "CV_64F";  break;
 39 |     case CV_8UC3: type_name = "CV_8UC3"; break;
 40 |     default:      type_name = "Unknown"; break;
 41 |     }
 42 |     std::cout << "Image type name: " << type_name << std::endl;
 43 | 
 44 |     const int num_inferences = 100;
 45 |     double total_time_ms = 0.0;
 46 |     object_detect_result_list od_results;
 47 | 
 48 |     for (int i = 0; i < num_inferences; ++i) {
 49 |         auto start = std::chrono::high_resolution_clock::now();
 50 | 
 51 |         // 运行推理
 52 |         int ret = model.run_inference(image, ctx_index, &od_results);
 53 |         auto end = std::chrono::high_resolution_clock::now();
 54 |         if (ret < 0) {
 55 |             printf("rknn_run fail! ret=%d\n", ret);
 56 |             return -1;
 57 |         }
 58 |         std::chrono::duration<double, std::milli> elapsed = end - start;
 59 |         total_time_ms += elapsed.count();
 60 |     }
 61 | 
 62 |     double avg_time_ms = total_time_ms / num_inferences;
 63 |     double fps = num_inferences / (total_time_ms / 1000.0);
 64 | 
 65 |     std::cout << std::fixed << std::setprecision(10); // 设置小数点后十位
 66 |     std::cout << "\nAverage inference time over " << num_inferences << " runs: "
 67 |         << avg_time_ms << " ms" << std::endl;
 68 | 
 69 |     std::cout << std::fixed << std::setprecision(2); // 设置小数点后两位
 70 |     std::cout << "Frames per second (FPS): " << fps << std::endl;
 71 | 
 72 |     /* //打印最后一次推理的结果（如果你需要）
 73 |     for (int i = 0; i < od_results.count; ++i) {
 74 |         object_detect_result result = od_results.results[i];
 75 |         printf("Object %d:\n", i + 1);
 76 |         printf("  Box: (%d, %d, %d, %d)\n",
 77 |             result.box.left,
 78 |             result.box.top,
 79 |             result.box.right,
 80 |             result.box.bottom);
 81 |         printf("  Class ID: %d\n", result.cls_id);
 82 |         printf("  Confidence: %.2f\n", result.prop);
 83 |     }
 84 |     */
 85 | 
 86 |     // 创建一个 RGB 格式的副本用于绘制
 87 |     cv::Mat image_rgb = image.clone();
 88 |     cv::cvtColor(image_rgb, image_rgb, cv::COLOR_RGB2BGR); // 转换回 BGR 格式以便显示正确颜色
 89 | 
 90 | 
 91 |     // 定义向下偏移量
 92 |     int offset = 50;
 93 |     // 绘制检测框
 94 |     for (int i = 0; i < od_results.count; ++i) {
 95 |         object_detect_result result = od_results.results[i];
 96 | 
 97 |         // 绘制矩形框
 98 |         cv::Rect rect(result.box.left, result.box.top, result.box.right - result.box.left, result.box.bottom - result.box.top);
 99 |         cv::rectangle(image_rgb, rect, cv::Scalar(0, 255, 0), 2); // 绿色框线
100 | 
101 |         // 添加文本标签
102 |         std::ostringstream label;
103 |         label << "ID: " << result.cls_id << " Conf: " << std::fixed << std::setprecision(2) << result.prop;
104 |         int baseLine = 0;
105 |         cv::Size label_size = cv::getTextSize(label.str(), cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
106 |         cv::rectangle(image_rgb, cv::Point(result.box.left, result.box.top - label_size.height),
107 |             cv::Point(result.box.left + label_size.width, result.box.top + baseLine),
108 |             cv::Scalar(0, 255, 0), -1); // 填充背景
109 |         cv::putText(image_rgb, label.str(), cv::Point(result.box.left, result.box.top),
110 |             cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0), 1, cv::LINE_AA); // 黑色文字
111 |     }
112 | 
113 |     // 保存结果图像
114 |     cv::imwrite("result.jpg", image_rgb);
115 | 
116 |     return 0;
117 | }


--------------------------------------------------------------------------------
/runtime/Linux/librknn_api/include/rknn_api.h:
--------------------------------------------------------------------------------
  1 | /****************************************************************************
  2 | *
  3 | *    Copyright (c) 2017 - 2022 by Rockchip Corp.  All rights reserved.
  4 | *
  5 | *    The material in this file is confidential and contains trade secrets
  6 | *    of Rockchip Corporation. This is proprietary information owned by
  7 | *    Rockchip Corporation. No part of this work may be disclosed,
  8 | *    reproduced, copied, transmitted, or used in any way for any purpose,
  9 | *    without the express written permission of Rockchip Corporation.
 10 | *
 11 | *****************************************************************************/
 12 | 
 13 | 
 14 | #ifndef _RKNN_API_H
 15 | #define _RKNN_API_H
 16 | 
 17 | #ifdef __cplusplus
 18 | extern "C" {
 19 | #endif
 20 | 
 21 | #include <stdint.h>
 22 | 
 23 | /*
 24 |     Definition of extended flag for rknn_init.
 25 | */
 26 | /* set high priority context. */
 27 | #define RKNN_FLAG_PRIOR_HIGH                    0x00000000
 28 | 
 29 | /* set medium priority context */
 30 | #define RKNN_FLAG_PRIOR_MEDIUM                  0x00000001
 31 | 
 32 | /* set low priority context. */
 33 | #define RKNN_FLAG_PRIOR_LOW                     0x00000002
 34 | 
 35 | /* asynchronous mode.
 36 |    when enable, rknn_outputs_get will not block for too long because it directly retrieves the result of
 37 |    the previous frame which can increase the frame rate on single-threaded mode, but at the cost of
 38 |    rknn_outputs_get not retrieves the result of the current frame.
 39 |    in multi-threaded mode you do not need to turn this mode on. */
 40 | #define RKNN_FLAG_ASYNC_MASK                    0x00000004
 41 | 
 42 | /* collect performance mode.
 43 |    when enable, you can get detailed performance reports via rknn_query(ctx, RKNN_QUERY_PERF_DETAIL, ...),
 44 |    but it will reduce the frame rate. */
 45 | #define RKNN_FLAG_COLLECT_PERF_MASK             0x00000008
 46 | 
 47 | /* allocate all memory in outside, includes weight/internal/inputs/outputs */
 48 | #define RKNN_FLAG_MEM_ALLOC_OUTSIDE             0x00000010
 49 | 
 50 | /* weight sharing with the same network structure */
 51 | #define RKNN_FLAG_SHARE_WEIGHT_MEM              0x00000020
 52 | 
 53 | /* send fence fd from outside */
 54 | #define RKNN_FLAG_FENCE_IN_OUTSIDE              0x00000040
 55 | 
 56 | /* get fence fd from inside */
 57 | #define RKNN_FLAG_FENCE_OUT_OUTSIDE             0x00000080
 58 | 
 59 | /* dummy init flag: could only get total_weight_size and total_internal_size by rknn_query*/
 60 | #define RKNN_FLAG_COLLECT_MODEL_INFO_ONLY       0x00000100
 61 | 
 62 | /* allocate internal memory in outside */
 63 | #define RKNN_FLAG_INTERNAL_ALLOC_OUTSIDE        0x00000200
 64 | 
 65 | /* set GPU as the preferred execution backend When the operator is not supported by the NPU */
 66 | #define RKNN_FLAG_EXECUTE_FALLBACK_PRIOR_DEVICE_GPU 0x00000400
 67 | 
 68 | /* enable allocate sram type buffers */
 69 | #define RKNN_FLAG_ENABLE_SRAM                   0x00000800
 70 | 
 71 | /* sram type buffers are shared among different contexts */
 72 | #define RKNN_FLAG_SHARE_SRAM                    0x00001000
 73 | 
 74 | /* default nice -19, this flag can disable default priority */
 75 | #define RKNN_FLAG_DISABLE_PROC_HIGH_PRIORITY    0x00002000
 76 | 
 77 | /* don't flush input buffer cache, the user must ensure that the input tensor has flushed the cache before calling rknn_run. 
 78 | !!! Don't use this flags when you call rknn_inputs_set() to set input data. */
 79 | #define RKNN_FLAG_DISABLE_FLUSH_INPUT_MEM_CACHE    0x00004000
 80 | 
 81 | /* Don't invalid output buffer cache. 
 82 |    Users cannot directly access output_mem->virt_addr, 
 83 |    which will cause cache consistency problems. 
 84 |    If you want to use output_mem->virt_addr, 
 85 |    you must use rknn_mem_sync (ctx, mem, RKNN_MEMORY_SYNC_FROM_DEVICE) to flush the cache. 
 86 |    This flags is generally used when the output data of the NPU is not accessed by the CPU, 
 87 |    but is accessed by the GPU or RGA to reduce the time required to flush the cache. 
 88 |    !!! Don't use this flags when you call rknn_outputs_get() to get output data.*/
 89 | #define RKNN_FLAG_DISABLE_FLUSH_OUTPUT_MEM_CACHE   0x00008000
 90 | 
 91 | /* This flag is used when the model data buffer is allocated by NPU, and can be accessed by NPU directly. */
 92 | #define RKNN_FLAG_MODEL_BUFFER_ZERO_COPY           0x00010000
 93 | 
 94 | /* This flag is a memory allocation flag, which is used in rknn_create_mem2() when no context is available. */
 95 | #define RKNN_MEM_FLAG_ALLOC_NO_CONTEXT             0x00020000
 96 | 
 97 | 
 98 | /*
 99 |     Error code returned by the RKNN API.
100 | */
101 | #define RKNN_SUCC                               0       /* execute succeed. */
102 | #define RKNN_ERR_FAIL                           -1      /* execute failed. */
103 | #define RKNN_ERR_TIMEOUT                        -2      /* execute timeout. */
104 | #define RKNN_ERR_DEVICE_UNAVAILABLE             -3      /* device is unavailable. */
105 | #define RKNN_ERR_MALLOC_FAIL                    -4      /* memory malloc fail. */
106 | #define RKNN_ERR_PARAM_INVALID                  -5      /* parameter is invalid. */
107 | #define RKNN_ERR_MODEL_INVALID                  -6      /* model is invalid. */
108 | #define RKNN_ERR_CTX_INVALID                    -7      /* context is invalid. */
109 | #define RKNN_ERR_INPUT_INVALID                  -8      /* input is invalid. */
110 | #define RKNN_ERR_OUTPUT_INVALID                 -9      /* output is invalid. */
111 | #define RKNN_ERR_DEVICE_UNMATCH                 -10     /* the device is unmatch, please update rknn sdk
112 |                                                            and npu driver/firmware. */
113 | #define RKNN_ERR_INCOMPATILE_PRE_COMPILE_MODEL  -11     /* This RKNN model use pre_compile mode, but not compatible with current driver. */
114 | #define RKNN_ERR_INCOMPATILE_OPTIMIZATION_LEVEL_VERSION  -12     /* This RKNN model set optimization level, but not compatible with current driver. */
115 | #define RKNN_ERR_TARGET_PLATFORM_UNMATCH        -13     /* This RKNN model set target platform, but not compatible with current platform. */
116 | 
117 | /*
118 |     Definition for tensor
119 | */
120 | #define RKNN_MAX_DIMS                           16      /* maximum dimension of tensor. */
121 | #define RKNN_MAX_NUM_CHANNEL                    15      /* maximum channel number of input tensor. */
122 | #define RKNN_MAX_NAME_LEN                       256     /* maximum name lenth of tensor. */
123 | #define RKNN_MAX_DYNAMIC_SHAPE_NUM              512     /* maximum number of dynamic shape for each input. */
124 | 
125 | #ifdef __arm__
126 | typedef uint32_t rknn_context;
127 | #else
128 | typedef uint64_t rknn_context;
129 | #endif
130 | 
131 | 
132 | /*
133 |     The query command for rknn_query
134 | */
135 | typedef enum _rknn_query_cmd {
136 |     RKNN_QUERY_IN_OUT_NUM = 0,                              /* query the number of input & output tensor. */
137 |     RKNN_QUERY_INPUT_ATTR = 1,                              /* query the attribute of input tensor. */
138 |     RKNN_QUERY_OUTPUT_ATTR = 2,                             /* query the attribute of output tensor. */
139 |     RKNN_QUERY_PERF_DETAIL = 3,                             /* query the detail performance, need set
140 |                                                                RKNN_FLAG_COLLECT_PERF_MASK when call rknn_init,
141 |                                                                this query needs to be valid after rknn_outputs_get. */
142 |     RKNN_QUERY_PERF_RUN = 4,                                /* query the time of run,
143 |                                                                this query needs to be valid after rknn_outputs_get. */
144 |     RKNN_QUERY_SDK_VERSION = 5,                             /* query the sdk & driver version */
145 | 
146 |     RKNN_QUERY_MEM_SIZE = 6,                                /* query the weight & internal memory size */
147 |     RKNN_QUERY_CUSTOM_STRING = 7,                           /* query the custom string */
148 | 
149 |     RKNN_QUERY_NATIVE_INPUT_ATTR = 8,                       /* query the attribute of native input tensor. */
150 |     RKNN_QUERY_NATIVE_OUTPUT_ATTR = 9,                      /* query the attribute of native output tensor. */
151 | 
152 |     RKNN_QUERY_NATIVE_NC1HWC2_INPUT_ATTR = 8,               /* query the attribute of native input tensor. */
153 |     RKNN_QUERY_NATIVE_NC1HWC2_OUTPUT_ATTR = 9,              /* query the attribute of native output tensor. */
154 | 
155 |     RKNN_QUERY_NATIVE_NHWC_INPUT_ATTR = 10,                 /* query the attribute of native input tensor. */
156 |     RKNN_QUERY_NATIVE_NHWC_OUTPUT_ATTR = 11,                /* query the attribute of native output tensor. */
157 | 
158 |     RKNN_QUERY_DEVICE_MEM_INFO = 12,                        /* query the attribute of rknn memory information. */
159 | 
160 |     RKNN_QUERY_INPUT_DYNAMIC_RANGE = 13,                    /* query the dynamic shape range of rknn input tensor. */
161 |     RKNN_QUERY_CURRENT_INPUT_ATTR = 14,                     /* query the current shape of rknn input tensor, only valid for dynamic rknn model*/
162 |     RKNN_QUERY_CURRENT_OUTPUT_ATTR = 15,                    /* query the current shape of rknn output tensor, only valid for dynamic rknn model*/
163 | 
164 |     RKNN_QUERY_CURRENT_NATIVE_INPUT_ATTR = 16,              /* query the current native shape of rknn input tensor, only valid for dynamic rknn model*/
165 |     RKNN_QUERY_CURRENT_NATIVE_OUTPUT_ATTR = 17,             /* query the current native shape of rknn output tensor, only valid for dynamic rknn model*/
166 | 
167 | 
168 |     RKNN_QUERY_CMD_MAX
169 | } rknn_query_cmd;
170 | 
171 | /*
172 |     the tensor data type.
173 | */
174 | typedef enum _rknn_tensor_type {
175 |     RKNN_TENSOR_FLOAT32 = 0,                            /* data type is float32. */
176 |     RKNN_TENSOR_FLOAT16,                                /* data type is float16. */
177 |     RKNN_TENSOR_INT8,                                   /* data type is int8. */
178 |     RKNN_TENSOR_UINT8,                                  /* data type is uint8. */
179 |     RKNN_TENSOR_INT16,                                  /* data type is int16. */
180 |     RKNN_TENSOR_UINT16,                                 /* data type is uint16. */
181 |     RKNN_TENSOR_INT32,                                  /* data type is int32. */
182 |     RKNN_TENSOR_UINT32,                                 /* data type is uint32. */
183 |     RKNN_TENSOR_INT64,                                  /* data type is int64. */
184 |     RKNN_TENSOR_BOOL,
185 |     RKNN_TENSOR_INT4,
186 |     RKNN_TENSOR_BFLOAT16,
187 | 
188 |     RKNN_TENSOR_TYPE_MAX
189 | } rknn_tensor_type;
190 | 
191 | inline static const char* get_type_string(rknn_tensor_type type)
192 | {
193 |     switch(type) {
194 |     case RKNN_TENSOR_FLOAT32: return "FP32";
195 |     case RKNN_TENSOR_FLOAT16: return "FP16";
196 |     case RKNN_TENSOR_INT8: return "INT8";
197 |     case RKNN_TENSOR_UINT8: return "UINT8";
198 |     case RKNN_TENSOR_INT16: return "INT16";
199 |     case RKNN_TENSOR_UINT16: return "UINT16";
200 |     case RKNN_TENSOR_INT32: return "INT32";
201 |     case RKNN_TENSOR_UINT32: return "UINT32";
202 |     case RKNN_TENSOR_INT64: return "INT64";
203 |     case RKNN_TENSOR_BOOL: return "BOOL";
204 |     case RKNN_TENSOR_INT4: return "INT4";
205 |     case RKNN_TENSOR_BFLOAT16: return "BF16";
206 |     default: return "UNKNOW";
207 |     }
208 | }
209 | 
210 | /*
211 |     the quantitative type.
212 | */
213 | typedef enum _rknn_tensor_qnt_type {
214 |     RKNN_TENSOR_QNT_NONE = 0,                           /* none. */
215 |     RKNN_TENSOR_QNT_DFP,                                /* dynamic fixed point. */
216 |     RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC,                  /* asymmetric affine. */
217 | 
218 |     RKNN_TENSOR_QNT_MAX
219 | } rknn_tensor_qnt_type;
220 | 
221 | inline static const char* get_qnt_type_string(rknn_tensor_qnt_type type)
222 | {
223 |     switch(type) {
224 |     case RKNN_TENSOR_QNT_NONE: return "NONE";
225 |     case RKNN_TENSOR_QNT_DFP: return "DFP";
226 |     case RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC: return "AFFINE";
227 |     default: return "UNKNOW";
228 |     }
229 | }
230 | 
231 | /*
232 |     the tensor data format.
233 | */
234 | typedef enum _rknn_tensor_format {
235 |     RKNN_TENSOR_NCHW = 0,                               /* data format is NCHW. */
236 |     RKNN_TENSOR_NHWC,                                   /* data format is NHWC. */
237 |     RKNN_TENSOR_NC1HWC2,                                /* data format is NC1HWC2. */
238 |     RKNN_TENSOR_UNDEFINED,
239 | 
240 |     RKNN_TENSOR_FORMAT_MAX
241 | } rknn_tensor_format;
242 | 
243 | /*
244 |     the mode of running on target NPU core.
245 | */
246 | typedef enum _rknn_core_mask {
247 |     RKNN_NPU_CORE_AUTO = 0,                                       /* default, run on NPU core randomly. */
248 |     RKNN_NPU_CORE_0 = 1,                                          /* run on NPU core 0. */
249 |     RKNN_NPU_CORE_1 = 2,                                          /* run on NPU core 1. */
250 |     RKNN_NPU_CORE_2 = 4,                                          /* run on NPU core 2. */
251 |     RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1,        /* run on NPU core 0 and core 1. */
252 |     RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2,    /* run on NPU core 0 and core 1 and core 2. */
253 |     RKNN_NPU_CORE_ALL = 0xffff,                                   /* auto choice, run on NPU cores depending on platform */
254 | 
255 |     RKNN_NPU_CORE_UNDEFINED,
256 | } rknn_core_mask;
257 | 
258 | inline static const char* get_format_string(rknn_tensor_format fmt)
259 | {
260 |     switch(fmt) {
261 |     case RKNN_TENSOR_NCHW: return "NCHW";
262 |     case RKNN_TENSOR_NHWC: return "NHWC";
263 |     case RKNN_TENSOR_NC1HWC2: return "NC1HWC2";
264 |     case RKNN_TENSOR_UNDEFINED: return "UNDEFINED";
265 |     default: return "UNKNOW";
266 |     }
267 | }
268 | 
269 | /*
270 |     the information for RKNN_QUERY_IN_OUT_NUM.
271 | */
272 | typedef struct _rknn_input_output_num {
273 |     uint32_t n_input;                                   /* the number of input. */
274 |     uint32_t n_output;                                  /* the number of output. */
275 | } rknn_input_output_num;
276 | 
277 | /*
278 |     the information for RKNN_QUERY_INPUT_ATTR / RKNN_QUERY_OUTPUT_ATTR.
279 | */
280 | typedef struct _rknn_tensor_attr {
281 |     uint32_t index;                                     /* input parameter, the index of input/output tensor,
282 |                                                            need set before call rknn_query. */
283 | 
284 |     uint32_t n_dims;                                    /* the number of dimensions. */
285 |     uint32_t dims[RKNN_MAX_DIMS];                       /* the dimensions array. */
286 |     char name[RKNN_MAX_NAME_LEN];                       /* the name of tensor. */
287 | 
288 |     uint32_t n_elems;                                   /* the number of elements. */
289 |     uint32_t size;                                      /* the bytes size of tensor. */
290 | 
291 |     rknn_tensor_format fmt;                             /* the data format of tensor. */
292 |     rknn_tensor_type type;                              /* the data type of tensor. */
293 |     rknn_tensor_qnt_type qnt_type;                      /* the quantitative type of tensor. */
294 |     int8_t fl;                                          /* fractional length for RKNN_TENSOR_QNT_DFP. */
295 |     int32_t zp;                                         /* zero point for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */
296 |     float scale;                                        /* scale for RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC. */
297 | 
298 |     uint32_t w_stride;                                  /* the stride of tensor along the width dimention of input,
299 |                                                            Note: it is read-only, 0 means equal to width. */
300 |     uint32_t size_with_stride;                          /* the bytes size of tensor with stride. */
301 | 
302 |     uint8_t pass_through;                               /* pass through mode, for rknn_set_io_mem interface.
303 |                                                            if TRUE, the buf data is passed directly to the input node of the rknn model
304 |                                                                     without any conversion. the following variables do not need to be set.
305 |                                                            if FALSE, the buf data is converted into an input consistent with the model
306 |                                                                      according to the following type and fmt. so the following variables
307 |                                                                      need to be set.*/
308 |     uint32_t h_stride;                                  /* the stride along the height dimention of input,
309 |                                                            Note: it is write-only, if it was set to 0, h_stride = height. */
310 | } rknn_tensor_attr;
311 | 
312 | typedef struct _rknn_input_range {
313 |     uint32_t index;                                                 /* input parameter, the index of input/output tensor,
314 |                                                                         need set before call rknn_query. */
315 |     uint32_t shape_number;                                          /* the number of shape. */
316 |     rknn_tensor_format fmt;                                         /* the data format of tensor. */
317 |     char name[RKNN_MAX_NAME_LEN];                                   /* the name of tensor. */
318 |     uint32_t dyn_range[RKNN_MAX_DYNAMIC_SHAPE_NUM][RKNN_MAX_DIMS];  /* the dynamic input dimensions range. */
319 |     uint32_t n_dims;                                                /* the number of dimensions. */
320 | 
321 | } rknn_input_range;
322 | 
323 | /*
324 |     the information for RKNN_QUERY_PERF_DETAIL.
325 | */
326 | typedef struct _rknn_perf_detail {
327 |     char* perf_data;                                    /* the string pointer of perf detail. don't need free it by user. */
328 |     uint64_t data_len;                                  /* the string length. */
329 | } rknn_perf_detail;
330 | 
331 | /*
332 |     the information for RKNN_QUERY_PERF_RUN.
333 | */
334 | typedef struct _rknn_perf_run {
335 |     int64_t run_duration;                               /* real inference time (us) */
336 | } rknn_perf_run;
337 | 
338 | /*
339 |     the information for RKNN_QUERY_SDK_VERSION.
340 | */
341 | typedef struct _rknn_sdk_version {
342 |     char api_version[256];                              /* the version of rknn api. */
343 |     char drv_version[256];                              /* the version of rknn driver. */
344 | } rknn_sdk_version;
345 | 
346 | /*
347 |     the information for RKNN_QUERY_MEM_SIZE.
348 | */
349 | typedef struct _rknn_mem_size {
350 |     uint32_t total_weight_size;                         /* the weight memory size */
351 |     uint32_t total_internal_size;                       /* the internal memory size, exclude inputs/outputs */
352 |     uint64_t total_dma_allocated_size;                  /* total dma memory allocated size */
353 |     uint32_t total_sram_size;                           /* total system sram size reserved for rknn */
354 |     uint32_t free_sram_size;                            /* free system sram size reserved for rknn */
355 |     uint32_t reserved[10];                              /* reserved */
356 | } rknn_mem_size;
357 | 
358 | /*
359 |     the information for RKNN_QUERY_CUSTOM_STRING.
360 | */
361 | typedef struct _rknn_custom_string {
362 |     char string[1024];                                  /* the string of custom, lengths max to 1024 bytes */
363 | } rknn_custom_string;
364 | 
365 | /*
366 |    The flags of rknn_tensor_mem.
367 | */
368 | typedef enum _rknn_tensor_mem_flags {
369 |     RKNN_TENSOR_MEMORY_FLAGS_ALLOC_INSIDE = 1,           /*Used to mark in rknn_destroy_mem() whether it is necessary to release the "mem" pointer itself.
370 |                                                          If the flag RKNN_TENSOR_MEMORY_FLAGS_ALLOC_INSIDE is set, rknn_destroy_mem() will call free(mem).*/
371 |     RKNN_TENSOR_MEMORY_FLAGS_FROM_FD      = 2,           /*Used to mark in rknn_create_mem_from_fd() whether it is necessary to release the "mem" pointer itself.
372 |                                                          If the flag RKNN_TENSOR_MEMORY_FLAGS_FROM_FD is set, rknn_destroy_mem() will call free(mem).*/
373 |     RKNN_TENSOR_MEMORY_FLAGS_FROM_PHYS    = 3,           /*Used to mark in rknn_create_mem_from_phys() whether it is necessary to release the "mem" pointer itself.
374 |                                                          If the flag RKNN_TENSOR_MEMORY_FLAGS_FROM_PHYS is set, rknn_destroy_mem() will call free(mem).*/
375 |     RKNN_TENSOR_MEMORY_FLAGS_UNKNOWN
376 | } rknn_tensor_mem_flags;
377 | 
378 | /*
379 |    The mode to sync cacheable rknn memory.
380 | */
381 | typedef enum _rknn_mem_alloc_flags {
382 |     RKNN_FLAG_MEMORY_FLAGS_DEFAULT = 0 << 0, /* Same with RKNN_FLAG_MEMORY_CACHEABLE */
383 |     RKNN_FLAG_MEMORY_CACHEABLE  = 1 << 0, /* Create Cacheable memory. */
384 |     RKNN_FLAG_MEMORY_NON_CACHEABLE = 1 << 1, /* Create NON-Cacheable memory. */
385 | } rknn_mem_alloc_flags;
386 | 
387 | /*
388 |    The mode to sync cacheable rknn memory.
389 | */
390 | typedef enum _rknn_mem_sync_mode {
391 |     RKNN_MEMORY_SYNC_TO_DEVICE = 0x1, /* the mode used for consistency of device access after CPU accesses data. */
392 |     RKNN_MEMORY_SYNC_FROM_DEVICE = 0x2, /* the mode used for consistency of CPU access after device accesses data. */
393 |     RKNN_MEMORY_SYNC_BIDIRECTIONAL = RKNN_MEMORY_SYNC_TO_DEVICE | RKNN_MEMORY_SYNC_FROM_DEVICE, /* the mode used for consistency of data access
394 |                                                                                                          between device and CPU in both directions. */
395 | } rknn_mem_sync_mode;
396 | 
397 | /*
398 |     the memory information of tensor.
399 | */
400 | typedef struct _rknn_tensor_memory {
401 |     void*            virt_addr;                         /* the virtual address of tensor buffer. */
402 |     uint64_t         phys_addr;                         /* the physical address of tensor buffer. */
403 |     int32_t          fd;                                /* the fd of tensor buffer. */
404 |     int32_t          offset;                            /* indicates the offset of the memory. */
405 |     uint32_t         size;                              /* the size of tensor buffer. */
406 |     uint32_t         flags;                             /* the flags of tensor buffer, reserved */
407 |     void *           priv_data;                         /* the private data of tensor buffer. */
408 | } rknn_tensor_mem;
409 | 
410 | /*
411 |     the input information for rknn_input_set.
412 | */
413 | typedef struct _rknn_input {
414 |     uint32_t index;                                     /* the input index. */
415 |     void* buf;                                          /* the input buf for index. */
416 |     uint32_t size;                                      /* the size of input buf. */
417 |     uint8_t pass_through;                               /* pass through mode.
418 |                                                            if TRUE, the buf data is passed directly to the input node of the rknn model
419 |                                                                     without any conversion. the following variables do not need to be set.
420 |                                                            if FALSE, the buf data is converted into an input consistent with the model
421 |                                                                      according to the following type and fmt. so the following variables
422 |                                                                      need to be set.*/
423 |     rknn_tensor_type type;                              /* the data type of input buf. */
424 |     rknn_tensor_format fmt;                             /* the data format of input buf.
425 |                                                            currently the internal input format of NPU is NCHW by default.
426 |                                                            so entering NCHW data can avoid the format conversion in the driver. */
427 | } rknn_input;
428 | 
429 | /*
430 |     the output information for rknn_outputs_get.
431 | */
432 | typedef struct _rknn_output {
433 |     uint8_t want_float;                                 /* want transfer output data to float */
434 |     uint8_t is_prealloc;                                /* whether buf is pre-allocated.
435 |                                                            if TRUE, the following variables need to be set.
436 |                                                            if FALSE, the following variables do not need to be set. */
437 |     uint32_t index;                                     /* the output index. */
438 |     void* buf;                                          /* the output buf for index.
439 |                                                            when is_prealloc = FALSE and rknn_outputs_release called,
440 |                                                            this buf pointer will be free and don't use it anymore. */
441 |     uint32_t size;                                      /* the size of output buf. */
442 | } rknn_output;
443 | 
444 | /*
445 |     the extend information for rknn_init.
446 | */
447 | typedef struct _rknn_init_extend {
448 |     rknn_context ctx;                                    /* rknn context */
449 |     int32_t      real_model_offset;                      /* real rknn model file offset, only valid when init context with rknn file path and zero-copy model model */
450 |     uint32_t     real_model_size;                        /* real rknn model file size, only valid when init context with rknn file path and zero-copy model model */
451 |     int32_t      model_buffer_fd;                        /* the fd of model buffer. */
452 |     uint32_t     model_buffer_flags;                     /* the flags of model_buffer */
453 |     uint8_t      reserved[112];                          /* reserved */
454 | } rknn_init_extend;
455 | 
456 | /*
457 |     the extend information for rknn_run.
458 | */
459 | typedef struct _rknn_run_extend {
460 |     uint64_t frame_id;                                  /* output parameter, indicate current frame id of run. */
461 |     int32_t non_block;                                  /* block flag of run, 0 is block else 1 is non block */
462 |     int32_t timeout_ms;                                 /* timeout for block mode, in milliseconds */
463 |     int32_t fence_fd;                                   /* fence fd from other unit */
464 | } rknn_run_extend;
465 | 
466 | /*
467 |     the extend information for rknn_outputs_get.
468 | */
469 | typedef struct _rknn_output_extend {
470 |     uint64_t frame_id;                                  /* output parameter, indicate the frame id of outputs, corresponds to
471 |                                                            struct rknn_run_extend.frame_id.*/
472 | } rknn_output_extend;
473 | 
474 | 
475 | /*  rknn_init
476 | 
477 |     initial the context and load the rknn model.
478 | 
479 |     input:
480 |         rknn_context* context       the pointer of context handle.
481 |         void* model                 if size > 0, pointer to the rknn model, if size = 0, filepath to the rknn model.
482 |         uint32_t size               the size of rknn model.
483 |         uint32_t flag               extend flag, see the define of RKNN_FLAG_XXX_XXX.
484 |         rknn_init_extend* extend    the extend information of init.
485 |     return:
486 |         int                         error code.
487 | */
488 | int rknn_init(rknn_context* context, void* model, uint32_t size, uint32_t flag, rknn_init_extend* extend);
489 | 
490 | /*  rknn_dup_context
491 | 
492 |     initial the context and load the rknn model.
493 | 
494 |     input:
495 |         rknn_context* context_in       the pointer of context in handle.
496 |         rknn_context* context_out      the pointer of context out handle.
497 |     return:
498 |         int                         error code.
499 | */
500 | int rknn_dup_context(rknn_context* context_in, rknn_context* context_out);
501 | 
502 | /*  rknn_destroy
503 | 
504 |     unload the rknn model and destroy the context.
505 | 
506 |     input:
507 |         rknn_context context        the handle of context.
508 |     return:
509 |         int                         error code.
510 | */
511 | int rknn_destroy(rknn_context context);
512 | 
513 | 
514 | /*  rknn_query
515 | 
516 |     query the information about model or others. see rknn_query_cmd.
517 | 
518 |     input:
519 |         rknn_context context        the handle of context.
520 |         rknn_query_cmd cmd          the command of query.
521 |         void* info                  the buffer point of information.
522 |         uint32_t size               the size of information.
523 |     return:
524 |         int                         error code.
525 | */
526 | int rknn_query(rknn_context context, rknn_query_cmd cmd, void* info, uint32_t size);
527 | 
528 | 
529 | /*  rknn_inputs_set
530 | 
531 |     set inputs information by input index of rknn model.
532 |     inputs information see rknn_input.
533 | 
534 |     input:
535 |         rknn_context context        the handle of context.
536 |         uint32_t n_inputs           the number of inputs.
537 |         rknn_input inputs[]         the arrays of inputs information, see rknn_input.
538 |     return:
539 |         int                         error code
540 | */
541 | int rknn_inputs_set(rknn_context context, uint32_t n_inputs, rknn_input inputs[]);
542 | 
543 | /*
544 |     rknn_set_batch_core_num
545 | 
546 |     set rknn batch core_num.
547 | 
548 |     input:
549 |         rknn_context context        the handle of context.
550 |         int core_num                the core number.
551 |     return:
552 |         int                         error code.
553 | 
554 | */
555 | int rknn_set_batch_core_num(rknn_context context, int core_num);
556 | 
557 | /*  rknn_set_core_mask
558 | 
559 |     set rknn core mask.(only supported on RK3588 now)
560 | 
561 |     RKNN_NPU_CORE_AUTO: auto mode, default value
562 |     RKNN_NPU_CORE_0: core 0 mode
563 |     RKNN_NPU_CORE_1: core 1 mode
564 |     RKNN_NPU_CORE_2: core 2 mode
565 |     RKNN_NPU_CORE_0_1: combine core 0/1 mode
566 |     RKNN_NPU_CORE_0_1_2: combine core 0/1/2 mode
567 |     RKNN_NPU_CORE_ALL: auto mode, select multiple npu cores to run depending on platform 
568 | 
569 | 
570 |     input:
571 |         rknn_context context        the handle of context.
572 |         rknn_core_mask core_mask    the core mask.
573 |     return:
574 |         int                         error code.
575 | */
576 | int rknn_set_core_mask(rknn_context context, rknn_core_mask core_mask);
577 | 
578 | /*  rknn_run
579 | 
580 |     run the model to execute inference.
581 | 
582 |     input:
583 |         rknn_context context        the handle of context.
584 |         rknn_run_extend* extend     the extend information of run.
585 |     return:
586 |         int                         error code.
587 | */
588 | int rknn_run(rknn_context context, rknn_run_extend* extend);
589 | 
590 | 
591 | /*  rknn_wait
592 | 
593 |     wait the model after execute inference.
594 | 
595 |     input:
596 |         rknn_context context        the handle of context.
597 |         rknn_run_extend* extend     the extend information of run.
598 |     return:
599 |         int                         error code.
600 | */
601 | int rknn_wait(rknn_context context, rknn_run_extend* extend);
602 | 
603 | 
604 | /*  rknn_outputs_get
605 | 
606 |     wait the inference to finish and get the outputs.
607 |     this function will block until inference finish.
608 |     the results will set to outputs[].
609 | 
610 |     input:
611 |         rknn_context context        the handle of context.
612 |         uint32_t n_outputs          the number of outputs.
613 |         rknn_output outputs[]       the arrays of output, see rknn_output.
614 |         rknn_output_extend*         the extend information of output.
615 |     return:
616 |         int                         error code.
617 | */
618 | int rknn_outputs_get(rknn_context context, uint32_t n_outputs, rknn_output outputs[], rknn_output_extend* extend);
619 | 
620 | 
621 | /*  rknn_outputs_release
622 | 
623 |     release the outputs that get by rknn_outputs_get.
624 |     after called, the rknn_output[x].buf get from rknn_outputs_get will
625 |     also be free when rknn_output[x].is_prealloc = FALSE.
626 | 
627 |     input:
628 |         rknn_context context        the handle of context.
629 |         uint32_t n_ouputs           the number of outputs.
630 |         rknn_output outputs[]       the arrays of output.
631 |     return:
632 |         int                         error code
633 | */
634 | int rknn_outputs_release(rknn_context context, uint32_t n_ouputs, rknn_output outputs[]);
635 | 
636 | 
637 | /* new api for zero copy */
638 | 
639 | /*  rknn_create_mem_from_phys (memory allocated outside)
640 | 
641 |     initialize tensor memory from physical address.
642 | 
643 |     input:
644 |         rknn_context ctx            the handle of context.
645 |         uint64_t phys_addr          physical address.
646 |         void *virt_addr             virtual address.
647 |         uint32_t size               the size of tensor buffer.
648 |     return:
649 |         rknn_tensor_mem             the pointer of tensor memory information.
650 | */
651 | rknn_tensor_mem* rknn_create_mem_from_phys(rknn_context ctx, uint64_t phys_addr, void *virt_addr, uint32_t size);
652 | 
653 | 
654 | /*  rknn_create_mem_from_fd (memory allocated outside)
655 | 
656 |     initialize tensor memory from file description.
657 | 
658 |     input:
659 |         rknn_context ctx            the handle of context.
660 |         int32_t fd                  file description.
661 |         void *virt_addr             virtual address.
662 |         uint32_t size               the size of tensor buffer.
663 |         int32_t offset              indicates the offset of the memory (virt_addr without offset).
664 |     return:
665 |         rknn_tensor_mem             the pointer of tensor memory information.
666 | */
667 | rknn_tensor_mem* rknn_create_mem_from_fd(rknn_context ctx, int32_t fd, void *virt_addr, uint32_t size, int32_t offset);
668 | 
669 | 
670 | /*  rknn_create_mem_from_mb_blk (memory allocated outside)
671 | 
672 |     create tensor memory from mb_blk.
673 | 
674 |     input:
675 |         rknn_context ctx            the handle of context.
676 |         void *mb_blk                mb_blk allocate from system api.
677 |         int32_t offset              indicates the offset of the memory.
678 |     return:
679 |         rknn_tensor_mem             the pointer of tensor memory information.
680 | */
681 | rknn_tensor_mem* rknn_create_mem_from_mb_blk(rknn_context ctx, void *mb_blk, int32_t offset);
682 | 
683 | 
684 | /*  rknn_create_mem (memory allocated inside)
685 | 
686 |     create tensor memory.
687 | 
688 |     input:
689 |         rknn_context ctx            the handle of context.
690 |         uint32_t size               the size of tensor buffer.
691 |     return:
692 |         rknn_tensor_mem             the pointer of tensor memory information.
693 | */
694 | rknn_tensor_mem* rknn_create_mem(rknn_context ctx, uint32_t size);
695 | 
696 | /*  rknn_create_mem2 (memory allocated inside)
697 | 
698 |     create tensor memory.
699 | 
700 |     input:
701 |         rknn_context ctx            the handle of context.
702 |         uint64_t size               the size of tensor buffer.
703 |         uint64_t alloc_flags              control the memory is cacheable
704 |     return:
705 |         rknn_tensor_mem             the pointer of tensor memory information.
706 | */
707 | rknn_tensor_mem* rknn_create_mem2(rknn_context ctx, uint64_t size, uint64_t alloc_flags);
708 | 
709 | /*  rknn_destroy_mem (support allocate inside and outside)
710 | 
711 |     destroy tensor memory.
712 | 
713 |     input:
714 |         rknn_context ctx            the handle of context.
715 |         rknn_tensor_mem *mem        the pointer of tensor memory information.
716 |     return:
717 |         int                         error code
718 | */
719 | int rknn_destroy_mem(rknn_context ctx, rknn_tensor_mem *mem);
720 | 
721 | 
722 | /*  rknn_set_weight_mem
723 | 
724 |     set the weight memory.
725 | 
726 |     input:
727 |         rknn_context ctx            the handle of context.
728 |         rknn_tensor_mem *mem        the array of tensor memory information
729 |     return:
730 |         int                         error code.
731 | */
732 | int rknn_set_weight_mem(rknn_context ctx, rknn_tensor_mem *mem);
733 | 
734 | 
735 | /*  rknn_set_internal_mem
736 | 
737 |     set the internal memory.
738 | 
739 |     input:
740 |         rknn_context ctx            the handle of context.
741 |         rknn_tensor_mem *mem        the array of tensor memory information
742 |     return:
743 |         int                         error code.
744 | */
745 | int rknn_set_internal_mem(rknn_context ctx, rknn_tensor_mem *mem);
746 | 
747 | 
748 | /*  rknn_set_io_mem
749 | 
750 |     set the input and output tensors buffer.
751 | 
752 |     input:
753 |         rknn_context ctx            the handle of context.
754 |         rknn_tensor_mem *mem        the array of tensor memory information.
755 |         rknn_tensor_attr *attr      the attribute of input or output tensor buffer.
756 |     return:
757 |         int                         error code.
758 | */
759 | int rknn_set_io_mem(rknn_context ctx, rknn_tensor_mem *mem, rknn_tensor_attr *attr);
760 | 
761 | /*  rknn_set_input_shape(deprecated)
762 | 
763 |     set the input tensor shape (only valid for dynamic shape rknn model).
764 | 
765 |     input:
766 |         rknn_context ctx            the handle of context.
767 |         rknn_tensor_attr *attr      the attribute of input or output tensor buffer.
768 |     return:
769 |         int                         error code.
770 | */
771 | int rknn_set_input_shape(rknn_context ctx, rknn_tensor_attr* attr);
772 | 
773 | /*  rknn_set_input_shapes
774 | 
775 |     set all the input tensor shapes. graph will run under current set of input shapes after rknn_set_input_shapes.(only valid for dynamic shape rknn model).
776 | 
777 |     input:
778 |         rknn_context ctx            the handle of context.
779 |         uint32_t n_inputs           the number of inputs.
780 |         rknn_tensor_attr attr[]     the attribute array of all input tensors.
781 |     return:
782 |         int                         error code.
783 | */
784 | int rknn_set_input_shapes(rknn_context ctx, uint32_t n_inputs, rknn_tensor_attr attr[]);
785 | 
786 | /*  rknn_mem_sync
787 | 
788 |     sync cacheable rknn memory when both cpu and device access data.
789 | 
790 |     input:
791 |         rknn_context context        the handle of context.
792 |         rknn_tensor_mem *mem        the pointer of tensor memory information.
793 |         rknn_mem_sync_mode mode     the mode of sync cache.
794 |     return:
795 |         int                         error code.
796 | */
797 | int rknn_mem_sync(rknn_context context, rknn_tensor_mem* mem, rknn_mem_sync_mode mode);
798 | 
799 | #ifdef __cplusplus
800 | } //extern "C"
801 | #endif
802 | 
803 | #endif  //_RKNN_API_H
804 | 


--------------------------------------------------------------------------------
/runtime/Linux/librknn_api/include/rknn_custom_op.h:
--------------------------------------------------------------------------------
  1 | /****************************************************************************
  2 |  *
  3 |  *    Copyright (c) 2017 - 2023 by Rockchip Corp.  All rights reserved.
  4 |  *
  5 |  *    The material in this file is confidential and contains trade secrets
  6 |  *    of Rockchip Corporation. This is proprietary information owned by
  7 |  *    Rockchip Corporation. No part of this work may be disclosed,
  8 |  *    reproduced, copied, transmitted, or used in any way for any purpose,
  9 |  *    without the express written permission of Rockchip Corporation.
 10 |  *
 11 |  *****************************************************************************/
 12 | 
 13 | #ifndef _RKNN_CUSTOM_OP_H
 14 | #define _RKNN_CUSTOM_OP_H
 15 | 
 16 | #ifdef __cplusplus
 17 | extern "C" {
 18 | #endif
 19 | 
 20 | #include "rknn_api.h"
 21 | 
 22 | #include <stdint.h>
 23 | 
 24 | /*
 25 |     Error code returned by the RKNN Custom Operator API.
 26 | */
 27 | #define RKNN_WARNING_SKIP_CUSTOM_OP_COMPUTE -14 /* if custom op init callback funtion return this code and op type is supported by RKNN, it will use RKNN implementation. */
 28 | 
 29 | #define RKNN_CUSTOM_OP_MAX_STR_LEN 64
 30 | #define RKNN_CUSTOM_OP_MAX_VALUE_LEN 32
 31 | #define RKNN_CUSTOM_OP_EXPORT __attribute__((visibility("default")))
 32 | 
 33 | #ifdef __arm__
 34 | typedef uint32_t rknn_custom_op_interal_context;
 35 | #else
 36 | typedef uint64_t rknn_custom_op_interal_context;
 37 | #endif
 38 | /*
 39 |     the backend execution device of custom operator.
 40 | */
 41 | typedef enum _rknn_target_type
 42 | {
 43 |   RKNN_TARGET_TYPE_CPU = 1, /* backend device is cpu */
 44 |   RKNN_TARGET_TYPE_GPU = 2, /* backend device is gpu */
 45 |   RKNN_TARGET_TYPE_MAX
 46 | } rknn_target_type;
 47 | 
 48 | typedef struct _rknn_gpu_op_context
 49 | {
 50 |   void* cl_context;
 51 |   void* cl_command_queue;
 52 |   void* cl_kernel;
 53 | 
 54 | } rknn_gpu_op_context;
 55 | 
 56 | typedef struct _rknn_custom_op_context
 57 | {
 58 |   rknn_target_type               target;       /* custom op backend target */
 59 |   rknn_custom_op_interal_context internal_ctx; /* the context of custom op*/
 60 |   rknn_gpu_op_context            gpu_ctx;      /* the gpu context of custom op */
 61 |   void*                          priv_data;    /* the private data managed by user */
 62 | } rknn_custom_op_context;
 63 | 
 64 | typedef struct _rknn_custom_op_tensor
 65 | {
 66 |   rknn_tensor_attr attr; /* the attribute of tensor buffer. */
 67 |   rknn_tensor_mem  mem;  /* the memory information of tensor. */
 68 | } rknn_custom_op_tensor;
 69 | 
 70 | typedef struct _rknn_custom_op_attr
 71 | {
 72 |   char             name[RKNN_MAX_NAME_LEN]; /* the name of operator atrributes. */
 73 |   rknn_tensor_type dtype;                   /* the data type of operator attributes, indicate the 'array' type. */
 74 |   uint32_t         n_elems;                 /* the number of 'array'. */
 75 |   void* data; /* the array pointer of operator attributes, the data type of each element is determined by type. */
 76 | } rknn_custom_op_attr;
 77 | 
 78 | /*
 79 |     the information of custom operator to add to the rknn_context.
 80 | */
 81 | typedef struct _rknn_custom_op
 82 | {
 83 |   uint32_t         version;                    /* custom op version */
 84 |   rknn_target_type target;                     /* custom op backend target */
 85 |   char             op_type[RKNN_MAX_NAME_LEN]; /* custom op type */
 86 | 
 87 |   char  cl_kernel_name[RKNN_MAX_NAME_LEN]; /* the opencl kernel name used by custom op */
 88 |   char* cl_kernel_source;  /* if cl_source_size > 0, pointer to the cl kernel source string, if cl_source_size = 0,
 89 |                               filepath to the cl kernel file. */
 90 |   uint64_t cl_source_size; /* the size of cl_kernel_source */
 91 |   char     cl_build_options[RKNN_MAX_NAME_LEN]; /* the options for opencl to build clProgram used by custom op */
 92 | 
 93 |   /**
 94 |    * The callback function sets that the users need to code
 95 |    */
 96 |   int (*init)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs,
 97 |               rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [optional] custom op kernel init falllback function*/
 98 |   int (*prepare)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs,
 99 |                  rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [optional] custom op kernel prepare falllback function*/
100 |   int (*compute)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs,
101 |                  rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [required] custom op kernel compute falllback function */
102 |   int (*compute_native)(rknn_custom_op_context* op_ctx, rknn_custom_op_tensor* inputs, uint32_t n_inputs,
103 |                         rknn_custom_op_tensor* outputs, uint32_t n_outputs); /* [optional] custom op kernel compute with native attribute falllback function */
104 |   int (*destroy)(rknn_custom_op_context* op_ctx); /* [optional] custom op kernel compute falllback function */
105 | 
106 | } rknn_custom_op;
107 | 
108 | /**
109 |  * dlopen custom op with so required this function
110 |  */
111 | typedef rknn_custom_op* (*get_custom_op_func)();
112 | 
113 | /*  rknn_register_custom_ops
114 | 
115 |     Register custom operators to rknn_context.
116 |     Steps to use a custom op:
117 |     1. Create a rknn_custom_op structure array and fill in it.
118 |     2. Setup prepare/compute/compute_native/destroy callback function and add them to the
119 |        rknn_custom_op.(compute is required and other function is optional, compute_native is not supported now, set it
120 |        to nullptr)
121 |     3. Call rknn_register_custom_ops to register the op type after rknn_init.
122 |     input:
123 |         rknn_context ctx            the handle of context.
124 |         rknn_custom_op* op          the custom operator array, each of which contains operator information and calllback function.
125 |         uint32_t custom_op_num      the length of rknn_custom_op array.
126 |    return:
127 |         int                         error code.
128 | */
129 | int rknn_register_custom_ops(rknn_context ctx, rknn_custom_op* op, uint32_t custom_op_num);
130 | 
131 | /*  rknn_custom_op_get_op_attr
132 | 
133 |     input:
134 |         rknn_custom_op_context* op_ctx  the handle of custom op context.
135 |         const char* attr_name          the attribute name of operator.
136 |         rknn_custom_op_attr* op_attr   the data and information of operator attributes.
137 | */
138 | void rknn_custom_op_get_op_attr(rknn_custom_op_context* op_ctx, const char* attr_name, rknn_custom_op_attr* op_attr);
139 | 
140 | #ifdef __cplusplus
141 | } // extern "C"
142 | #endif
143 | 
144 | #endif //_RKNN_CUSTOM_OP_H
145 | 


--------------------------------------------------------------------------------
/runtime/Linux/librknn_api/include/rknn_matmul_api.h:
--------------------------------------------------------------------------------
  1 | /****************************************************************************
  2 |  *
  3 |  *    Copyright (c) 2017 - 2018 by Rockchip Corp.  All rights reserved.
  4 |  *
  5 |  *    The material in this file is confidential and contains trade secrets
  6 |  *    of Rockchip Corporation. This is proprietary information owned by
  7 |  *    Rockchip Corporation. No part of this work may be disclosed,
  8 |  *    reproduced, copied, transmitted, or used in any way for any purpose,
  9 |  *    without the express written permission of Rockchip Corporation.
 10 |  *
 11 |  *****************************************************************************/
 12 | 
 13 | #ifndef _RKNN_MATMUL_API_H
 14 | #define _RKNN_MATMUL_API_H
 15 | 
 16 | #ifdef __cplusplus
 17 | extern "C" {
 18 | #endif
 19 | 
 20 | #include "rknn_api.h"
 21 | 
 22 | typedef rknn_context rknn_matmul_ctx;
 23 | 
 24 | typedef enum _rknn_matmul_quant_type
 25 | {
 26 |   RKNN_QUANT_TYPE_PER_LAYER_SYM    = 0,
 27 |   RKNN_QUANT_TYPE_PER_LAYER_ASYM   = 1,
 28 |   RKNN_QUANT_TYPE_PER_CHANNEL_SYM  = 2,
 29 |   RKNN_QUANT_TYPE_PER_CHANNEL_ASYM = 3,
 30 |   RKNN_QUANT_TYPE_PER_GROUP_SYM    = 4,
 31 |   RKNN_QUANT_TYPE_PER_GROUP_ASYM   = 5,
 32 | } rknn_matmul_quant_type;
 33 | 
 34 | typedef struct _rknn_quant_params
 35 | {
 36 |   char name[RKNN_MAX_NAME_LEN];
 37 | 
 38 |   // matmul tensor scale
 39 |   float*  scale;
 40 |   int32_t scale_len;
 41 | 
 42 |   // matmul tensor zero point
 43 |   int32_t* zp;
 44 |   int32_t  zp_len;
 45 | 
 46 | } rknn_quant_params;
 47 | 
 48 | typedef enum _rknn_matmul_type
 49 | {
 50 |   RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT32 = 1,
 51 |   RKNN_INT8_MM_INT8_TO_INT32         = 2,
 52 |   RKNN_INT8_MM_INT8_TO_INT8          = 3,
 53 |   RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT16 = 4,
 54 |   RKNN_FLOAT16_MM_INT8_TO_FLOAT32    = 5,
 55 |   RKNN_FLOAT16_MM_INT8_TO_FLOAT16    = 6,
 56 |   RKNN_FLOAT16_MM_INT4_TO_FLOAT32    = 7,
 57 |   RKNN_FLOAT16_MM_INT4_TO_FLOAT16    = 8,
 58 |   RKNN_INT8_MM_INT8_TO_FLOAT32       = 9,
 59 |   RKNN_INT4_MM_INT4_TO_INT16         = 10,
 60 |   RKNN_INT8_MM_INT4_TO_INT32         = 11,
 61 |   RKNN_FLOAT16_MM_INT4_TO_BFLOAT16   = 12,
 62 | } rknn_matmul_type;
 63 | 
 64 | inline static const char* get_matmul_type_string(rknn_matmul_type type)
 65 | {
 66 |   switch (type) {
 67 |   case RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT32:
 68 |     return "RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT32";
 69 |   case RKNN_INT8_MM_INT8_TO_INT32:
 70 |     return "RKNN_INT8_MM_INT8_TO_INT32";
 71 |   case RKNN_INT8_MM_INT8_TO_INT8:
 72 |     return "RKNN_INT8_MM_INT8_TO_INT8";
 73 |   case RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT16:
 74 |     return "RKNN_FLOAT16_MM_FLOAT16_TO_FLOAT16";
 75 |   case RKNN_FLOAT16_MM_INT8_TO_FLOAT32:
 76 |     return "RKNN_FLOAT16_MM_INT8_TO_FLOAT32";
 77 |   case RKNN_FLOAT16_MM_INT8_TO_FLOAT16:
 78 |     return "RKNN_FLOAT16_MM_INT8_TO_FLOAT16";
 79 |   case RKNN_INT4_MM_INT4_TO_INT16:
 80 |     return "RKNN_INT4_MM_INT4_TO_INT16";
 81 |   case RKNN_FLOAT16_MM_INT4_TO_FLOAT32:
 82 |     return "RKNN_FLOAT16_MM_INT4_TO_FLOAT32";
 83 |   case RKNN_FLOAT16_MM_INT4_TO_FLOAT16:
 84 |     return "RKNN_FLOAT16_MM_INT4_TO_FLOAT16";
 85 |   case RKNN_INT8_MM_INT4_TO_INT32:
 86 |     return "RKNN_INT8_MM_INT4_TO_INT32";
 87 |   case RKNN_INT8_MM_INT8_TO_FLOAT32:
 88 |     return "RKNN_INT8_MM_INT8_TO_FLOAT32";
 89 |   case RKNN_FLOAT16_MM_INT4_TO_BFLOAT16:
 90 |     return "RKNN_FLOAT16_MM_INT4_TO_BFLOAT16";
 91 |   default:
 92 |     return "UNKNOW";
 93 |   }
 94 | }
 95 | 
 96 | typedef struct _rknn_matmul_tensor_attr
 97 | {
 98 |   char name[RKNN_MAX_NAME_LEN];
 99 | 
100 |   // indicate A(M, K) or B(K, N) or C(M, N)
101 |   uint32_t n_dims;
102 |   uint32_t dims[RKNN_MAX_DIMS];
103 | 
104 |   // matmul tensor size
105 |   uint32_t size;
106 | 
107 |   // matmul tensor data type
108 |   // int8 : A, B
109 |   // int32: C
110 |   rknn_tensor_type type;
111 | 
112 | } rknn_matmul_tensor_attr;
113 | 
114 | typedef struct _rknn_matmul_io_attr
115 | {
116 |   // indicate A(M, K) or B(K, N) or C(M, N)
117 |   rknn_matmul_tensor_attr A;
118 |   rknn_matmul_tensor_attr B;
119 |   rknn_matmul_tensor_attr C;
120 | } rknn_matmul_io_attr;
121 | 
122 | /*
123 |   matmul dynamic shape struct
124 | */
125 | typedef struct _rknn_matmul_shape
126 | {
127 |   int32_t M;
128 |   int32_t K;
129 |   int32_t N;
130 | } rknn_matmul_shape;
131 | 
132 | /*
133 |   the layout of matmul input/output tensor.
134 | */
135 | typedef enum
136 | {
137 |   RKNN_MM_LAYOUT_NORM    = 0,
138 |   RKNN_MM_LAYOUT_NATIVE  = 1,
139 |   RKNN_MM_LAYOUT_TP_NORM = 2,
140 | } rknn_matmul_layout;
141 | 
142 | /*
143 |   matmul information struct
144 |  */
145 | typedef struct rknn_matmul_info_t
146 | {
147 |   int32_t M;
148 |   int32_t K; // limit: RK3566/3568: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte;
149 |              // RK3562:      int8 type must be aligned with 32byte, float16 type must be aligned with 32byte;
150 |              // RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 32byte,
151 |              //              int4 type must be aligned with 32byte;
152 |   int32_t N; // limit: RK3566/3568: int8 type must be aligned with 16byte, float16 type must be aligned with 8byte;
153 |              // RK3562:      int8 type must be aligned with 16byte, float16 type must be aligned with 8byte;
154 |              // RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte,
155 |              //              int4 type must be aligned with 64byte;
156 |   // matmul data type
157 |   // int4: int4(A) x int4(B) -> int16(C)
158 |   // int8: int8(A) x int8(B) -> int32(C)
159 |   // float16: float16(A) x float16(B) -> float32(C)
160 |   rknn_matmul_type type;
161 | 
162 |   // matmul native layout for B
163 |   // 0: normal layout
164 |   // 1: native layout
165 |   int16_t B_layout;
166 | 
167 |   // matmul quant type for B
168 |   // A and C only support per layer
169 |   // 0: per layer
170 |   // 1: per channel
171 |   // 2: per group
172 |   int16_t B_quant_type;
173 | 
174 |   // matmul native layout for A and C
175 |   // 0: normal layout
176 |   // 1: native layout
177 |   int16_t AC_layout;
178 | 
179 |   // matmul quant type for A and C, only support 0
180 |   int16_t AC_quant_type;
181 | 
182 |   // iommu domain id, each domain has 4GB of space
183 |   int32_t iommu_domain_id;
184 | 
185 |   // B_quant_type set 2, group size is enable
186 |   int16_t group_size;
187 | 
188 |   // reserved field
189 |   int8_t reserved[34];
190 | } rknn_matmul_info;
191 | 
192 | /*  rknn_matmul_create
193 | 
194 |     params:
195 |         rknn_matmul_ctx *ctx           the handle of context.
196 |         rknn_matmul_info *info         the matmal information.
197 |         rknn_matmul_io_attr *io_attr   inputs/output attribute
198 |     return:
199 |         int                         error code
200 | */
201 | int rknn_matmul_create(rknn_matmul_ctx* ctx, rknn_matmul_info* info, rknn_matmul_io_attr* io_attr);
202 | 
203 | /*  rknn_matmul_create_dynamic_shape
204 | 
205 |     params:
206 |         rknn_matmul_ctx *ctx                the handle of context.
207 |         rknn_matmul_info *info              the matmal information.
208 |         int shape_num                       the supported shape number of matmul.
209 |         rknn_matmul_shape dynamic_shapes[]  the supported M,K,N shape struct array.
210 |         rknn_matmul_io_attr *io_attr        the array of inputs and output attribute
211 |     return:
212 |         int                                 error code
213 | */
214 | /*
215 |   原来的info.M, K, N无效
216 | */
217 | int rknn_matmul_create_dynamic_shape(rknn_matmul_ctx* ctx, rknn_matmul_info* info, int shape_num,
218 |                                  rknn_matmul_shape dynamic_shapes[], rknn_matmul_io_attr io_attrs[]);
219 | 
220 | /* rknn_matmul_set_io_mem
221 | 
222 |     params:
223 |         rknn_matmul_ctx ctx            the handle of context.
224 |         rknn_tensor_mem *mem           the pointer of tensor memory information.
225 |         rknn_matmul_tensor_attr *attr  the attribute of input or output tensor buffer.
226 |     return:
227 |         int                         error code.
228 | 
229 |     formula:
230 |       C = A * B,
231 | 
232 |     limit:
233 |       K max:   k <= 10240
234 |       K limit: RK3566/3568: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte;
235 |                RK3562:      int8 type must be aligned with 32byte, float16 type must be aligned with 32byte;
236 |                RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 32byte,
237 |                             int4 type must be aligned with 32byte;
238 |       N limit: RK3566/3568: int8 type must be aligned with 16byte, float16 type must be aligned with 8byte;
239 |                RK3562:      int8 type must be aligned with 16byte, float16 type must be aligned with 8byte;
240 |                RK3588/3576: int8 type must be aligned with 32byte, float16 type must be aligned with 16byte,
241 |                             int4 type must be aligned with 64byte;
242 |     A shape: M x K
243 |       normal layout: (M, K)
244 |               [M1K1, M1K2, ..., M1Kk,
245 |                M2K1, M2K2, ..., M2Kk,
246 |                ...
247 |                MmK1, MmK2, ..., MmKk]
248 |       for RK3566/3568：
249 |       int8:
250 |       native layout: (K / 8, M, 8)
251 |               [K1M1, K2M1,  ..., K8M1,
252 |                K9M2, K10M2, ..., K16M2,
253 |                ...
254 |                K(k-7)Mm, K(k-6)Mm, ..., KkMm]
255 |       float16:
256 |       native layout: (K / 4, M, 4)
257 |               [K1M1, K2M1,  ..., K4M1,
258 |                K9M2, K10M2, ..., K8M2,
259 |                ...
260 |                K(k-3)Mm, K(k-2)Mm, ..., KkMm]
261 |       for RK3562：
262 |       int8:
263 |       native layout: (K / 16, M, 16)
264 |               [K1M1, K2M1,  ..., K16M1,
265 |                K17M2, K18M2, ..., K32M2,
266 |                ...
267 |                K(k-15)Mm, K(k-14)Mm, ..., KkMm]
268 |       float16:
269 |       native layout: (K / 8, M, 8)
270 |               [K1M1, K2M1,  ..., K8M1,
271 |                K9M2, K10M2, ..., K16M2,
272 |                ...
273 |                K(k-7)Mm, K(k-6)Mm, ..., KkMm]
274 |       for RK3588/3576：
275 |       int4:
276 |       native layout: (K / 32, M, 32)
277 |               [K1M1, K2M1,  ..., K32M1,
278 |                K33M2, K10M2, ..., K64M2,
279 |                ...
280 |                K(k-31)Mm, K(k-30)Mm, ..., KkMm]
281 |       int8:
282 |       native layout: (K / 16, M, 16)
283 |               [K1M1, K2M1,  ..., K16M1,
284 |                K17M2, K18M2, ..., K32M2,
285 |                ...
286 |                K(k-15)Mm, K(k-14)Mm, ..., KkMm]
287 |       float16:
288 |       native layout: (K / 8, M, 8)
289 |               [K1M1, K2M1,  ..., K8M1,
290 |                K9M2, K10M2, ..., K16M2,
291 |                ...
292 |                K(k-7)Mm, K(k-6)Mm, ..., KkMm]
293 |     B shape: K x N
294 |       normal layout: (K, N)
295 |               [K1N1, K1N2, ..., K1Nn,
296 |                K2N1, K2N2, ..., K2Nn,
297 |                ...
298 |                KkN1, KkN2, ..., KkNn]
299 |       for RK3566/3568：
300 |       int8:
301 |       native layout: (N / 16, K / 32, 16, 32)
302 |               [K1N1,  K2N1,  ..., K32N1,
303 |                K1N2,  K2N2,  ..., K32N2,
304 |                ...
305 |                K1N16, K2N16, ..., K32N16,
306 |                K33N1, K34N1, ..., K64N1,
307 |                K33N2, K34N2, ..., K64N2,
308 |                ...
309 |                K(k-31)N16, K(k-30)N16, ..., KkN16,
310 |                K1N17, K2N17, ..., K32N17,
311 |                K1N18, K2N18, ..., K32N18,
312 |                ...
313 |                K(k-31)Nn, K(k-30)Nn, ..., KkNn]
314 |       float16:
315 |       native layout: (N / 8, K / 16, 8, 16)
316 |               [K1N1,  K2N1,  ..., K16N1,
317 |                K1N2,  K2N2,  ..., K16N2,
318 |                ...
319 |                K1N8,  K2N8,  ..., K16N8,
320 |                K17N1, K18N1, ..., K32N1,
321 |                K17N2, K18N2, ..., K32N2,
322 |                ...
323 |                K(k-15)N8, K(k-30)N8, ..., KkN8,
324 |                K1N9,  K2N9,  ..., K16N9,
325 |                K1N10, K2N10, ..., K16N10,
326 |                ...
327 |                K(k-15)Nn, K(k-14)Nn, ..., KkNn]
328 |       for RK3562：
329 |       int8:
330 |       native layout: (N / 16, K / 32, 16, 32)
331 |               [K1N1,  K2N1,  ..., K32N1,
332 |                K1N2,  K2N2,  ..., K32N2,
333 |                ...
334 |                K1N16, K2N16, ..., K32N16,
335 |                K33N1, K34N1, ..., K64N1,
336 |                K33N2, K34N2, ..., K64N2,
337 |                ...
338 |                K(k-31)N16, K(k-30)N16, ..., KkN16,
339 |                K1N17, K2N17, ..., K32N17,
340 |                K1N18, K2N18, ..., K32N18,
341 |                ...
342 |                K(k-31)Nn, K(k-30)Nn, ..., KkNn]
343 |       float16:
344 |       native layout: (N / 8, K / 32, 8, 32)
345 |               [K1N1,  K2N1,  ..., K32N1,
346 |                K1N2,  K2N2,  ..., K32N2,
347 |                ...
348 |                K1N8,  K2N8,  ..., K32N8,
349 |                K33N1, K34N1, ..., K64N1,
350 |                K33N2, K34N2, ..., K64N2,
351 |                ...
352 |                K(k-31)N8, K(k-30)N8, ..., KkN8,
353 |                K1N9,  K2N9,  ..., K16N9,
354 |                K1N10, K2N10, ..., K16N10,
355 |                ...
356 |                K(k-31)Nn, K(k-30)Nn, ..., KkNn]
357 |       for RK3588：
358 |       when K > 8192, the B data will be split into T segments.
359 |       int T = std::ceil(K / 8192);
360 |       For example:  normal layout  -> native layout
361 |       K =  20488, N = 4096, T = 3, the data will be split into 3 segments.
362 |       subN = rknn_matmul_io_attr.B.dims[2];
363 |       subK = rknn_matmul_io_attr.B.dims[3];
364 |                                       (8196, 4096)          (4096 / subN, 8196 / subK, subN, subK)
365 |         (K, N) = (20488, 4096)  ->    (8196, 4096)    ->    (4096 / subN, 8196 / subK, subN, subK)
366 |                  normal layout        (4096, 4096)          (4096 / subN, 4096 / subK, subN, subK)
367 |                                      T normal layout                    T native layout
368 |       It is recommended to use the rknn_B_normal_layout_to_native_layout interface for direct data conversion.
369 |       for RK3576：
370 |       when K > 4096, the B data will be split into T segments.
371 |       int T = std::ceil(K / 4096);
372 |       For example:  normal layout  -> native layout
373 |       K =  10240, N = 2048, T = 3, the data will be split into 3 segments.
374 |       subN = rknn_matmul_io_attr.B.dims[2];
375 |       subK = rknn_matmul_io_attr.B.dims[3];
376 |                                       (4096, 2048)          (2048 / subN, 4096 / subK, subN, subK)
377 |         (K, N) = (10240, 2048)  ->    (4096, 2048)    ->    (2048 / subN, 4096 / subK, subN, subK)
378 |                  normal layout        (2048, 2048)          (2048 / subN, 2048 / subK, subN, subK)
379 |                                      T normal layout                    T native layout
380 |       It is recommended to use the rknn_B_normal_layout_to_native_layout interface for direct data conversion.
381 |       for RK3588/3576：
382 |       int4:
383 |       native layout: (N / 64, K / 32, 64, 32)
384 |               [K1N1,  K2N1,  ..., K32N1,
385 |                K1N2,  K2N2,  ..., K32N2,
386 |                ...
387 |                K1N64, K2N64, ..., K32N64,
388 |                K33N1, K34N1, ..., K64N1,
389 |                K33N2, K34N2, ..., K64N2,
390 |                ...
391 |                K(k-31)N64, K(k-30)N64, ..., KkN64,
392 |                K1N65, K2N65, ..., K32N65,
393 |                K1N66, K2N66, ..., K32N66,
394 |                ...
395 |                K(k-31)Nn, K(k-30)Nn, ..., KkNn]
396 |       int8:
397 |       native layout: (N / 32, K / 32, 32, 32)
398 |               [K1N1,  K2N1,  ..., K32N1,
399 |                K1N2,  K2N2,  ..., K32N2,
400 |                ...
401 |                K1N32, K2N32, ..., K32N32,
402 |                K33N1, K34N1, ..., K64N1,
403 |                K33N2, K34N2, ..., K64N2,
404 |                ...
405 |                K(k-31)N32, K(k-30)N32, ..., KkN32,
406 |                K1N33, K2N33, ..., K32N33,
407 |                K1N34, K2N34, ..., K32N34,
408 |                ...
409 |                K(k-31)Nn, K(k-30)Nn, ..., KkNn]
410 |       float16:
411 |       native layout: (N / 16, K / 32, 16, 32)
412 |               [K1N1,  K2N1,  ..., K32N1,
413 |                K1N2,  K2N2,  ..., K32N2,
414 |                ...
415 |                K1N16, K2N16, ..., K32N16,
416 |                K33N1, K34N1, ..., K64N1,
417 |                K33N2, K34N2, ..., K64N2,
418 |                ...
419 |                K(k-31)N16, K(k-30)N16, ..., KkN16,
420 |                K1N17, K2N17, ..., K32N17,
421 |                K1N18, K2N18, ..., K32N18,
422 |                ...
423 |                K(k-31)Nn, K(k-30)Nn, ..., KkNn]
424 |     C shape: M x N
425 |       normal layout: (M, N)
426 |               [M1N1, M1N2, ..., M1Nn,
427 |                M2N1, M2N2, ..., M2Nn,
428 |                ...
429 |                MmN1, MmN2, ..., MmNn]
430 |       native layout: (N / 4, M, 4)
431 |               [N1M1, N2M1, ..., N4M1,
432 |                N5M2, N6M2, ..., N8M2,
433 |                ...
434 |                N(n-3)Mm, N(n-2)Mm, ..., NnMm]
435 |       for RK3588：
436 |       int4:
437 |       native layout: (N / 8, M, 8)
438 |               [N1M1, N2M1, ..., N8M1,
439 |                N9M2, N10M2, ..., N16M2,
440 |                ...
441 |                N(n-7)Mm, N(n-6)Mm, ..., NnMm]
442 |  */
443 | int rknn_matmul_set_io_mem(rknn_matmul_ctx ctx, rknn_tensor_mem* mem, rknn_matmul_tensor_attr* attr);
444 | 
445 | /*  rknn_matmul_set_core_mask
446 | 
447 |     set rknn core mask.(only support RK3588 in current)
448 | 
449 |     RKNN_NPU_CORE_AUTO: auto mode, default value
450 |     RKNN_NPU_CORE_0: core 0 mode
451 |     RKNN_NPU_CORE_1: core 1 mode
452 |     RKNN_NPU_CORE_2: core 2 mode
453 |     RKNN_NPU_CORE_0_1: combine core 0/1 mode
454 |     RKNN_NPU_CORE_0_1_2: combine core 0/1/2 mode
455 | 
456 |     input:
457 |         rknn_matmul_ctx context     the handle of context.
458 |         rknn_core_mask core_mask    the core mask.
459 |     return:
460 |         int                         error code.
461 | */
462 | int rknn_matmul_set_core_mask(rknn_matmul_ctx context, rknn_core_mask core_mask);
463 | 
464 | /*  rknn_matmul_set_quant_params
465 | 
466 |     set quant params.(only support matmul type RKNN_INT8_MM_INT8_TO_INT8, RKNN_INT8_MM_INT8_TO_INT32)
467 | 
468 |     input:
469 |         rknn_matmul_ctx context     the handle of context.
470 |         rknn_quant_params params    quant params.
471 |     return:
472 |         int                         error code.
473 | */
474 | int rknn_matmul_set_quant_params(rknn_matmul_ctx context, rknn_quant_params* params);
475 | 
476 | /*  rknn_matmul_get_quant_params
477 | 
478 |     get per channel quant params.(only support matmul type RKNN_INT8_MM_INT8_TO_INT32)
479 | 
480 |     input:
481 |         rknn_matmul_ctx context     the handle of context.
482 |         rknn_quant_params params    quant params.
483 |         float scale    get scale for user.
484 |     return:
485 |         int                         error code.
486 | */
487 | int rknn_matmul_get_quant_params(rknn_matmul_ctx ctx, rknn_quant_params* params, float* scale);
488 | 
489 | /*  rknn_matmul_set_dynamic_shape
490 | 
491 |     set the matmul input/output shape. matmul will run under current input shape after rknn_matmul_set_dynamic_shape,
492 |     only support M dynamicly now.
493 | 
494 |     input:
495 |         rknn_matmul_ctx ctx         the handle of context.
496 |         rknn_matmul_shape* shape    the M,K,N shape of matmul currently
497 |     return:
498 |         int                         error code.
499 | */
500 | int rknn_matmul_set_dynamic_shape(rknn_matmul_ctx ctx, rknn_matmul_shape* shape);
501 | 
502 | /*  rknn_matmul_run
503 | 
504 |     run the matmul in blocking mode
505 | 
506 |     params:
507 |         rknn_matmul_ctx ctx         the handle of context.
508 |     return:
509 |         int                         error code.
510 |  */
511 | int rknn_matmul_run(rknn_matmul_ctx ctx);
512 | 
513 | /*  rknn_matmul_destroy
514 | 
515 |     destroy the matmul context
516 | 
517 |     params:
518 |         rknn_matmul_ctx ctx         the handle of context.
519 |     return:
520 |         int                         error code.
521 |  */
522 | int rknn_matmul_destroy(rknn_matmul_ctx ctx);
523 | 
524 | /*  rknn_B_normal_layout_to_native_layout
525 | 
526 |     change the B normal layout buffer to native layout buffer
527 | 
528 |     params:
529 |         void* B_input               B normal layout buffer.
530 |         void* B_output              B native layout buffer.
531 |         int   K                     K
532 |         int   N                     N
533 |         rknn_matmul_info info       matmul info
534 |     return:
535 |         int                         error code.
536 |  */
537 | int rknn_B_normal_layout_to_native_layout(void* B_input, void* B_output, int K, int N, rknn_matmul_info* info);
538 | 
539 | #ifdef __cplusplus
540 | } // extern "C"
541 | #endif
542 | 
543 | #endif // _RKNN_MATMUL_API_H


--------------------------------------------------------------------------------
/runtime/Linux/rknn_server/aarch64/usr/bin/restart_rknn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh                                                                                                                                                     
2 |  
3 | killall start_rknn.sh > /dev/null 2>&1
4 | killall rknn_server > /dev/null 2>&1
5 | start_rknn.sh &


--------------------------------------------------------------------------------
/runtime/Linux/rknn_server/aarch64/usr/bin/rknn_server:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/runtime/Linux/rknn_server/aarch64/usr/bin/rknn_server


--------------------------------------------------------------------------------
/runtime/Linux/rknn_server/aarch64/usr/bin/start_rknn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | while true
4 | do
5 |   sleep 1
6 |   rknn_server #>/dev/null 2>&1
7 | done
8 | 


--------------------------------------------------------------------------------
/src/common.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/src/common.cpp


--------------------------------------------------------------------------------
/src/rga_utils.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/src/rga_utils.cpp


--------------------------------------------------------------------------------
/src/rknn_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/src/rknn_model.cpp


--------------------------------------------------------------------------------
/yolo11s.rknn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuunnn-w/rknn-cpp-yolo/470b663456ca437ab6cc256bbc04c3b4e1797c7a/yolo11s.rknn


--------------------------------------------------------------------------------