├── .gitattributes ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── README_onnx_plngin.md ├── images ├── coco_1.jpg └── render.jpg ├── main.cpp ├── run_yolov5.sh ├── src ├── application │ ├── CMakeLists.txt │ └── yolov5 │ │ ├── yolo.cpp │ │ └── yolo.h ├── module │ ├── CMakeLists.txt │ ├── builder │ │ ├── trt_builder.cpp │ │ └── trt_builder.h │ ├── common │ │ ├── cuda_tools.cpp │ │ ├── cuda_tools.h │ │ ├── ilogger.cpp │ │ ├── ilogger.h │ │ └── utils.h │ ├── core │ │ ├── async_infer.h │ │ ├── monopoly_allocator.h │ │ ├── trt_tensor.cpp │ │ └── trt_tensor.h │ └── infer │ │ ├── trt_infer.cpp │ │ └── trt_infer.h └── onnxplugin │ ├── CMakeLists.txt │ ├── include │ ├── SiLUPlugin.h │ ├── checkMacrosPlugin.h │ ├── kernel.h │ └── plugin.h │ └── src │ ├── SiLU.cu │ └── SiLUPlugin.cpp └── weights ├── yolov5n.engine ├── yolov5n.onnx ├── yolov5n.plugin.engine └── yolov5n.plugin.onnx /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pt filter=lfs diff=lfs merge=lfs -text 2 | *.onnx filter=lfs diff=lfs merge=lfs -text 3 | *.serialized filter=lfs diff=lfs merge=lfs -text 4 | *.engine filter=lfs diff=lfs merge=lfs -text 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | build 3 | lib 4 | .vscode 5 | *.so 6 | model/* 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #检查cmake版本 2 | cmake_minimum_required(VERSION 3.5) 3 | #项目名 4 | project(yolov5tensorrt) 5 | #可执行文件保存目录 6 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin) 7 | 8 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) 9 | set(CMAKE_CXX_STANDARD 11) 10 | set(CMAKE_BUILD_TYPE Debug) 11 | 12 | set(CUDA_GEN_CODE "-gencode=arch=compute_86,code=sm_86") 13 | 14 | #cuda 15 | find_package(CUDA REQUIRED) 16 | #自定义opencv路径 17 | set(OpenCV_DIR /home/ls/softwares/opencv-4.5.5/build) 18 | # find opencv 19 | find_package(OpenCV REQUIRED) 20 | if(NOT OpenCV_FOUND) 21 | message(ERROR "OpenCV not found!") 22 | endif(NOT OpenCV_FOUND) 23 | #tensorrt 24 | # set(TensorRT_DIR /home/ls/softwares/TensorRT-8.2.3.0) 25 | set(TensorRT_DIR /home/ls/softwares/TensorRT) 26 | 27 | include_directories( 28 | ${OpenCV_INCLUDE_DIRS} 29 | ${CUDA_INCLUDE_DIRS} 30 | ${TensorRT_DIR}/include 31 | ) 32 | 33 | link_directories( 34 | ${TensorRT_DIR}/lib 35 | /usr/local/cuda/lib64 36 | ) 37 | 38 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -O0 -Wfatal-errors -pthread -w -g") 39 | set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11 -O0 -Xcompiler -fPIC -g -w ${CUDA_GEN_CODE}") 40 | 41 | # 链接子项目部件 42 | add_subdirectory(${CMAKE_SOURCE_DIR}/src/module) 43 | add_subdirectory(${CMAKE_SOURCE_DIR}/src/application) 44 | add_subdirectory(${CMAKE_SOURCE_DIR}/src/onnxplugin) 45 | # 链接库目录 46 | link_directories(${CMAKE_SOURCE_DIR}/lib) 47 | # 链接依赖库 48 | link_libraries(module) 49 | link_libraries(application) 50 | link_libraries(onnxplugin) 51 | 52 | ADD_EXECUTABLE(${PROJECT_NAME} main.cpp) 53 | target_link_libraries(${PROJECT_NAME} nvinfer nvonnxparser) 54 | target_link_libraries(${PROJECT_NAME} cuda cublas cudart cudnn) 55 | target_link_libraries(${PROJECT_NAME} pthread) 56 | target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## yolov5_tensorrt 2 | *yolov5 deployment* 3 | 4 | ### environment 5 | - ubuntu 20.04 6 | - cuda 11.6.2 7 | - tensorrt 8.2.3.0 8 | - pytorch 1.10 9 | 10 | ### C++ 11 | ``` 12 | mkdir build 13 | cd build 14 | cmake .. 15 | make 16 | ``` 17 | 18 | ## TENSORRT ONNX PLUGIN 19 | 20 | ### STEP1:add a plugin layer in onnx 21 | * in project [yolov5](https://github.com/ZJU-lishuang/yolov5_convert/tree/main/yolov5) 22 | 23 | `export PYTHONPATH="$PWD" && python models/export_plugin_onnx.py --weights ./weights/yolov5s.pt --img 640 --batch 1` 24 | 25 | ### STEP2:do constant folding 26 | 27 | #### install polygraphy 28 | * refer to [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/master/tools/Polygraphy) 29 | 30 | #### constant folding 31 | `polygraphy surgeon sanitize model.onnx --fold-constants --output model_folded.onnx` 32 | 33 | 34 | ### STEP3(Optional):add the plugin layer in onnx-tensorrt 35 | add follow code to the `builtin_op_importers.cpp` in onnx-tensorrt. 36 | help onnx to parse the plugin layer in tensorrt. 37 | ```c++ 38 | DEFINE_BUILTIN_OP_IMPORTER(SiLU) 39 | { 40 | std::vector inputTensors; 41 | std::vector weights; 42 | for(int i = 0; i < inputs.size(); ++i){ 43 | auto& item = inputs.at(i); 44 | if(item.is_tensor()){ 45 | nvinfer1::ITensor* input = &convertToTensor(item, ctx); 46 | inputTensors.push_back(input); 47 | }else{ 48 | weights.push_back(item.weights()); 49 | } 50 | } 51 | 52 | LOG_VERBOSE("call silu plugin: "); 53 | const std::string pluginName = "SiLU"; 54 | const std::string pluginVersion = "1"; 55 | 56 | LOG_INFO("Searching for plugin: " << pluginName << ", plugin_version: " << pluginVersion); 57 | printf("node.name().c_str()=",node.name().c_str()); 58 | 59 | // Create plugin from registry 60 | const auto mPluginRegistry = getPluginRegistry(); 61 | const auto pluginCreator 62 | = mPluginRegistry->getPluginCreator(pluginName.c_str(), pluginVersion.c_str()); 63 | 64 | ASSERT(pluginCreator != nullptr, ErrorCode::kINVALID_VALUE); 65 | 66 | std::vector f; 67 | nvinfer1::PluginFieldCollection fc; 68 | fc.nbFields = f.size(); 69 | fc.fields = f.data(); 70 | 71 | auto plugin = pluginCreator->createPlugin(node.name().c_str(), &fc); 72 | 73 | ASSERT(plugin != nullptr && "NonMaxSuppression plugin was not found in the plugin registry!", 74 | ErrorCode::kUNSUPPORTED_NODE); 75 | 76 | // auto layer = ctx->network()->addPluginV2(&tensors[0], int(tensors.size()), *plugin); 77 | auto layer = ctx->network()->addPluginV2(inputTensors.data(), inputTensors.size(), *plugin); 78 | nvinfer1::ITensor* indices = layer->getOutput(0); 79 | 80 | RETURN_FIRST_OUTPUT(layer); 81 | 82 | } 83 | ``` 84 | 85 | ### STEP4:add the plugin layer in TensorRT 86 | add the plugin layer in tensorrt by using `REGISTER_TENSORRT_PLUGIN` 87 | example:[SiLUPlugin.h](onnxplugin/include/SiLUPlugin.h) 88 | 89 | -------------------------------------------------------------------------------- /README_onnx_plngin.md: -------------------------------------------------------------------------------- 1 | ## TensorRT onnx plugin 2 | 3 | ### Dependencies 4 | 5 | - [TensorRT open source libaries (master branch)](https://github.com/NVIDIA/TensorRT/tree/21.04) 6 | 7 | ### pytorch 8 | add a custom layer in pytorch model. 9 | 10 | following is a example. 11 | ```python 12 | import torch 13 | import torch.nn.functional as F 14 | import torch.nn as nn 15 | 16 | class SiLUImplementtation(torch.autograd.Function): 17 | # 主要是这里,对于autograd.Function这种自定义实现的op,只需要添加静态方法symbolic即可,除了g以外的参数应与forward函数的除ctx以外完全一样 18 | #“SiLU”作为插件名称 19 | @staticmethod 20 | def symbolic(g, input): 21 | return g.op("SiLU", input) 22 | 23 | def forward(self, x): 24 | return x * torch.sigmoid(x) 25 | 26 | #省略了backward 27 | 28 | class customSiLU(nn.Module): 29 | def forward(self, x): 30 | return SiLUImplementtation.apply(x) 31 | 32 | 33 | class FooModel(torch.nn.Module): 34 | def __init__(self): 35 | super(FooModel, self).__init__() 36 | self.SiLU = customSiLU() 37 | 38 | def forward(self, input1, input2): 39 | return input2 + self.SiLU(input1) 40 | 41 | 42 | dummy_input1 = torch.zeros((1, 3, 3, 3)) 43 | dummy_input2 = torch.zeros((1, 1, 3, 3)) 44 | model = FooModel() 45 | 46 | # 这里演示了2个输入的情况,实际上你可以自己定义几个输入 47 | # torch高版本需添加operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK来导出自定义层,参见torch.onnx官方文档 48 | torch.onnx.export(model, (dummy_input1, dummy_input2), 'test.onnx', verbose=True, opset_version=12, 49 | operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK) 50 | ``` 51 | 52 | ### onnx 53 | Because `do_constant_folding` can be set to True only when `operator_export_type` is `ONNX`,the model need to do constant folding by other tools. 54 | 55 | refer to [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt/blob/master/docs/faq.md#inputsat0-must-be-an-initializer-or-inputsat0is_weights) 56 | 57 | `polygraphy surgeon sanitize model.onnx --fold-constants --output model_folded.onnx` 58 | 59 | Right now there is `FallbackPluginImporter` in builtin_op_importers.cpp. 60 | 61 | Any ops that are not supported will attempt to import as plugins. 62 | 63 | It is not necessary to add the plugin layer in onnx-tensorrt. 64 | 65 | ### tensorrt 66 | 67 | add the plugin layer in tensorrt by using `REGISTER_TENSORRT_PLUGIN` 68 | 69 | 70 | -------------------------------------------------------------------------------- /images/coco_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZJU-lishuang/yolov5_tensorrt/312787f096bacde243bea4798527aa09a2208f65/images/coco_1.jpg -------------------------------------------------------------------------------- /images/render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZJU-lishuang/yolov5_tensorrt/312787f096bacde243bea4798527aa09a2208f65/images/render.jpg -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "src/module/builder/trt_builder.h" 2 | #include "src/module/infer/trt_infer.h" 3 | #include "src/module/core/trt_tensor.h" 4 | #include "src/module/common/ilogger.h" 5 | #include "src/application/yolov5/yolo.h" 6 | #include "src/onnxplugin/include/SiLUPlugin.h" 7 | #include 8 | 9 | #include 10 | 11 | using namespace TRT; 12 | 13 | static bool exists(const std::string& path){ 14 | return access(path.c_str(), R_OK) == 0; 15 | } 16 | 17 | void set_device(int device_id) { 18 | if (device_id == -1) 19 | return; 20 | 21 | checkCudaRuntime(cudaSetDevice(device_id)); 22 | } 23 | 24 | static void test_tensor1(){ 25 | 26 | size_t cpu_bytes = 1024; 27 | size_t gpu_bytes = 2048; 28 | 29 | /////////////////////////////////////////////////////////////////// 30 | // 封装效果,自动分配和释放 31 | TRT::MixMemory memory; 32 | void* host_ptr = memory.cpu(cpu_bytes); 33 | void* device_ptr = memory.gpu(gpu_bytes); 34 | 35 | /////////////////////////////////////////////////////////////////// 36 | // 不封装效果 37 | // void* host_ptr = nullptr; 38 | // void* device_ptr = nullptr; 39 | // cudaMallocHost(&host_ptr, cpu_bytes); 40 | // cudaMalloc(&device_ptr, gpu_bytes); 41 | 42 | // cudaFreeHost(&host_ptr); 43 | // cudaFree(&device_ptr); 44 | /////////////////////////////////////////////////////////////////// 45 | } 46 | 47 | static void test_tensor2(){ 48 | 49 | /////////////////////////////////////////////////////////////////// 50 | /* 内存的自动复制,依靠head属性标记数据最新的位置 51 | 若访问的数据不是最新的,则会自动发生复制操作 */ 52 | TRT::Tensor tensor({1, 3, 5, 5},nullptr); 53 | INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); /* 输出 Init,内存没有分配 */ 54 | 55 | tensor.cpu()[0] = 512; /* 访问cpu时,分配cpu内存 */ 56 | INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); /* 输出 Host */ 57 | 58 | float* device_ptr = tensor.gpu(); /* 访问gpu时,最新数据在Host,发生复制动作并标记最新数据在Device */ 59 | INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); /* 输出 Device */ 60 | //INFO("device_ptr[0] = %f", device_ptr[0]); /* 输出 512.00000,由于gpu内存修改为cudaMalloc,这里无法直接访问 */ 61 | } 62 | 63 | static void test_tensor3(){ 64 | 65 | /////////////////////////////////////////////////////////////////// 66 | /* 计算维度的偏移量 */ 67 | TRT::Tensor tensor({1, 3, 5, 5, 2, 5},nullptr); 68 | auto ptr_origin = tensor.cpu(); 69 | auto ptr_channel2 = tensor.cpu(0, 2, 3, 2, 1, 3); 70 | 71 | INFO("Offset = %d", ptr_channel2 - ptr_origin); /* 输出678 */ 72 | INFO("Offset = %d", tensor.offset(0, 2, 3, 2, 1, 3)); /* 输出678 */ 73 | 74 | int offset_compute = ((((0 * 3 + 2) * 5 + 3) * 5 + 2) * 2 + 1) * 5 + 3; 75 | INFO("Compute = %d", offset_compute); /* 输出678 */ 76 | } 77 | 78 | static void lesson1(){ 79 | std::string onnx_file = "weights/yolov5n.onnx"; 80 | std::string engine_file = "weights/yolov5n.engine"; 81 | auto mode = Mode::FP32; 82 | unsigned int max_batch_size = 16; 83 | size_t max_workspace_size = 1<<30; 84 | compile(mode,max_batch_size,onnx_file,engine_file); 85 | } 86 | 87 | static void lesson2(){ 88 | int gpuid = 0; 89 | /* 设置使用GPU */ 90 | set_device(gpuid); 91 | 92 | // std::string onnx_file = "../weights/yolov5n.onnx"; 93 | // std::string engine_file = "../weights/yolov5n.engine"; 94 | std::string onnx_file = "../weights/yolov5n.plugin.onnx"; 95 | std::string engine_file = "../weights/yolov5n.plugin.engine"; 96 | if(!exists(engine_file)){ 97 | auto mode = Mode::FP32; 98 | unsigned int max_batch_size = 16; 99 | size_t max_workspace_size = 1<<30; 100 | compile(mode,max_batch_size,onnx_file,engine_file); 101 | } 102 | 103 | std::shared_ptr infer(new TRTInferImpl()); 104 | infer->load(engine_file); 105 | if(infer == nullptr){ 106 | printf("Engine %s load failed", engine_file.c_str()); 107 | // 解除主线程阻塞,模型加载失败 108 | return; 109 | } 110 | /* 打印引擎相关信息 */ 111 | infer->print(); 112 | 113 | /* 获取引擎的相关信息 */ 114 | int max_batch_size = infer->get_max_batch_size(); 115 | auto input = infer->tensor("images"); 116 | auto output = infer->tensor("output"); 117 | int num_classes = output->size(2) - 5; 118 | 119 | int input_width_ = input->size(3); 120 | int input_height_ = input->size(2); 121 | CUStream stream_ = infer->get_stream(); 122 | 123 | input->resize_single_dim(0, max_batch_size).to_gpu(); 124 | int infer_batch_size = 1; 125 | input->resize_single_dim(0, infer_batch_size); 126 | 127 | size_t size_image = input_width_ * input_height_ * 3; 128 | auto workspace = input->get_data(); 129 | float* image_device = (float*)workspace->gpu(size_image); 130 | 131 | auto image = cv::imread("../images/coco_1.jpg"); 132 | std::vector data = YOLOV5::v5prepareImage(image,input_width_,input_height_); 133 | 134 | checkCudaRuntime(cudaMemcpyAsync(image_device, data.data(), size_image*sizeof(float), cudaMemcpyHostToDevice, stream_)); 135 | 136 | /* 开始推理 */ 137 | infer->forward(false); 138 | 139 | std::vector result; 140 | 141 | float confidence_threshold=0.5; 142 | int num_boxes = output->size(1); 143 | for(int b=0;bcpu(b); 145 | for(int num_box=0;num_box r_w) { 170 | w = input_w; 171 | h = r_w * image.rows; 172 | x = 0; 173 | y = (input_h - h) / 2; 174 | } else { 175 | w = r_h * image.cols; 176 | h = input_h; 177 | x = (input_w - w) / 2; 178 | y = 0; 179 | } 180 | cv::Mat re(h, w, CV_8UC3); 181 | cv::resize(image, re, re.size(), 0, 0, cv::INTER_LINEAR); 182 | //show result in image 183 | for (auto it: result){ 184 | float score = it.prob; 185 | int xmin=it.x-it.w/2-x; 186 | int xmax=it.x+it.w/2-x; 187 | int ymin=it.y-it.h/2-y; 188 | int ymax=it.y+it.h/2-y; 189 | cv::rectangle(re, cv::Point(xmin, ymin), cv::Point(xmax, ymax), cv::Scalar(255, 204,0), 3); 190 | cv::putText(re, std::to_string(score), cv::Point(xmin, ymin), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0,204,255)); 191 | } 192 | 193 | cv::imwrite("../images/render.jpg", re); 194 | 195 | } 196 | 197 | static const char* cocolabels[] = { 198 | "person", "bicycle", "car", "motorcycle", "airplane", 199 | "bus", "train", "truck", "boat", "traffic light", "fire hydrant", 200 | "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", 201 | "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", 202 | "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", 203 | "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", 204 | "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", 205 | "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", 206 | "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", 207 | "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", 208 | "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", 209 | "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", 210 | "scissors", "teddy bear", "hair drier", "toothbrush" 211 | }; 212 | 213 | static std::tuple hsv2bgr(float h, float s, float v){ 214 | const int h_i = static_cast(h * 6); 215 | const float f = h * 6 - h_i; 216 | const float p = v * (1 - s); 217 | const float q = v * (1 - f*s); 218 | const float t = v * (1 - (1 - f) * s); 219 | float r, g, b; 220 | switch (h_i) { 221 | case 0:r = v; g = t; b = p;break; 222 | case 1:r = q; g = v; b = p;break; 223 | case 2:r = p; g = v; b = t;break; 224 | case 3:r = p; g = q; b = v;break; 225 | case 4:r = t; g = p; b = v;break; 226 | case 5:r = v; g = p; b = q;break; 227 | default:r = 1; g = 1; b = 1;break;} 228 | return std::make_tuple(static_cast(b * 255), static_cast(g * 255), static_cast(r * 255)); 229 | } 230 | 231 | static std::tuple random_color(int id){ 232 | float h_plane = ((((unsigned int)id << 2) ^ 0x937151) % 100) / 100.0f;; 233 | float s_plane = ((((unsigned int)id << 3) ^ 0x315793) % 100) / 100.0f; 234 | return hsv2bgr(h_plane, s_plane, 1); 235 | } 236 | 237 | static void lesson3(){ 238 | std::string engine_file = "../weights/yolov5n.engine"; 239 | float confidence_threshold = 0.4f; 240 | float nms_threshold = 0.5f; 241 | int gpuid = 0; 242 | //create infer 243 | auto yolo = YOLOV5::create_infer(engine_file,gpuid,confidence_threshold,nms_threshold); 244 | 245 | auto image = cv::imread("../images/coco_1.jpg"); 246 | // 提交图片并获取结果 247 | auto objs = yolo->commit(image).get(); 248 | 249 | int w, h, x=0, y=0; 250 | int input_w=640; 251 | int input_h=640; 252 | float r_w = input_w / (image.cols*1.0); 253 | float r_h = input_h / (image.rows*1.0); 254 | if (r_h > r_w) { 255 | w = input_w; 256 | h = r_w * image.rows; 257 | x = 0; 258 | y = (input_h - h) / 2; 259 | } else { 260 | w = r_h * image.cols; 261 | h = input_h; 262 | x = (input_w - w) / 2; 263 | y = 0; 264 | } 265 | 266 | cv::Mat re(h, w, CV_8UC3); 267 | cv::resize(image, re, re.size(), 0, 0, cv::INTER_LINEAR); 268 | 269 | for(auto& obj : objs){ 270 | obj.left=obj.left-x; 271 | obj.top=obj.top-y; 272 | obj.right=obj.right-x; 273 | obj.bottom=obj.bottom-y; 274 | uint8_t b, g, r; 275 | std::tie(b, g, r) = random_color(obj.class_label); 276 | cv::rectangle(re, cv::Point(obj.left, obj.top), cv::Point(obj.right, obj.bottom), cv::Scalar(b, g, r), 5); 277 | 278 | auto name = cocolabels[obj.class_label]; 279 | auto caption = cv::format("%s %.2f", name, obj.confidence); 280 | int width = cv::getTextSize(caption, 0, 1, 2, nullptr).width + 10; 281 | cv::rectangle(re, cv::Point(obj.left-3, obj.top-33), cv::Point(obj.left + width, obj.top), cv::Scalar(b, g, r), -1); 282 | cv::putText(re, caption, cv::Point(obj.left, obj.top-5), 0, 1, cv::Scalar::all(0), 2, 16); 283 | } 284 | 285 | printf("Save result to infer.jpg, %d objects\n", objs.size()); 286 | cv::imwrite(cv::format("../images/render.jpg"), re); 287 | 288 | } 289 | 290 | int main(){ 291 | 292 | // lesson1(); 293 | // lesson2(); 294 | lesson3(); 295 | // test_tensor1(); 296 | // test_tensor2(); 297 | // test_tensor3(); 298 | return 0; 299 | } -------------------------------------------------------------------------------- /run_yolov5.sh: -------------------------------------------------------------------------------- 1 | sudo docker run --gpus "device=0" -it --rm --net=host --shm-size=1g --ipc=host \ 2 | -v$(pwd)/:/workspace/yolov5_tensorrt \ 3 | -v/home/ls/softwares/TensorRT:/workspace/TensorRT \ 4 | -v/home/ls/softwares/cudnn:/workspace/cudnn \ 5 | -v/home/ls/softwares/opencv-4.5.5:/workspace/opencv \ 6 | -w /workspace/yolov5_tensorrt nvcr.io/nvidia/pytorch:22.03-py3 -------------------------------------------------------------------------------- /src/application/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 指定CMake版本 2 | cmake_minimum_required(VERSION 3.5) 3 | # 指定项目名称 4 | project(application) 5 | 6 | # 指定头文件目录 7 | # include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 8 | # 指定源文件目录 9 | file(GLOB_RECURSE SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) 10 | 11 | # 打印cmake当前目录地址&源文件目录地址 12 | message(application_CMAKE_CURRENT_SOURCE_DIR => ${CMAKE_CURRENT_SOURCE_DIR}) 13 | # message( application_SOURCE_FILES => ${SOURCE_FILES}) 14 | 15 | # 设置环境变量,编译用到的源文件全部都要放到这里,否则编译能够通过, 16 | # 但是执行的时候会出现各种问题,比如"symbol lookup error xxxxx , undefined symbol" 17 | set(ALL_SRCS ${SOURCE_FILES}) 18 | # message(application_ALL_SRCS => ${ALL_SRCS}) 19 | 20 | #设置生成库保存位置 21 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/lib) 22 | 23 | # 生成so包 24 | # SHARED ->生成动态库 25 | # STATIC ->生成静态库 26 | message(application_PROJECT_NAME => ${PROJECT_NAME}) 27 | add_library(${PROJECT_NAME} SHARED ${ALL_SRCS}) -------------------------------------------------------------------------------- /src/application/yolov5/yolo.cpp: -------------------------------------------------------------------------------- 1 | #include "yolo.h" 2 | #include "../../module/core/async_infer.h" 3 | #include "../../module/infer/trt_infer.h" 4 | namespace YOLOV5{ 5 | 6 | using namespace TRT; 7 | 8 | void set_device(int device_id) { 9 | if (device_id == -1) 10 | return; 11 | 12 | checkCudaRuntime(cudaSetDevice(device_id)); 13 | } 14 | 15 | //* load_infer函数返回了一个TRTInferImpl类 16 | std::shared_ptr load_infer(const std::string& file) { 17 | /* 实例化一个推理对象 */ 18 | std::shared_ptr infer(new TRTInferImpl()); 19 | /* 加载trt文件,并反序列化,这里包含了模型的输入输出的绑定和流的设定 */ 20 | if (!infer->load(file)) 21 | infer.reset(); 22 | return infer; 23 | } 24 | 25 | using ThreadSafedAsyncInferImpl = ThreadSafeAsyncInfer 26 | < 27 | cv::Mat, // input 28 | BoxArray, // output 29 | std::tuple, // start param 30 | int // additional 31 | >; 32 | 33 | class YoloTRTInferImpl : public Infer, public ThreadSafedAsyncInferImpl{ 34 | public: 35 | virtual ~YoloTRTInferImpl(){ 36 | stop(); 37 | } 38 | 39 | virtual bool startup(const std::string& file,int gpuid,float confidence_threshold,float nms_threshold){ 40 | confidence_threshold_ = confidence_threshold; 41 | nms_threshold_ = nms_threshold; 42 | return ThreadSafedAsyncInferImpl::startup(std::make_tuple(file,gpuid)); 43 | } 44 | 45 | virtual void worker(std::promise& result) override{ 46 | std::string file = std::get<0>(start_param_); 47 | int gpuid = std::get<1>(start_param_); 48 | set_device(gpuid); 49 | auto engine = load_infer(file); 50 | engine->print(); 51 | 52 | int max_batch_size = engine->get_max_batch_size(); 53 | auto input = engine->tensor("images"); 54 | auto output = engine->tensor("output"); 55 | int num_classes = output->size(2) - 5; 56 | 57 | input_width_ = input->size(3); 58 | input_height_ = input->size(2); 59 | 60 | tensor_allocator_ = std::make_shared>(max_batch_size * 2); 61 | stream_ = engine->get_stream(); 62 | gpu_ = gpuid; 63 | 64 | result.set_value(true); 65 | input->resize_single_dim(0, max_batch_size).to_gpu(); 66 | 67 | std::vector fetch_jobs; 68 | 69 | while(get_jobs_and_wait(fetch_jobs, max_batch_size)){ 70 | /* 一旦进来说明有图片数据 ,获取图片的张数 */ 71 | int infer_batch_size = fetch_jobs.size(); 72 | input->resize_single_dim(0, infer_batch_size); 73 | /* 下面从队列取出job,把对应的仿射矩阵和预处理好的图片数据送到模型的输入 */ 74 | /* 其中input就是engine对象的方法,该方法实际上是把预处理的数据传给engine的内部属性inputs_ */ 75 | for(int ibatch = 0; ibatch < infer_batch_size; ++ibatch){ 76 | auto& job = fetch_jobs[ibatch]; 77 | auto& mono = job.mono_tensor->data(); 78 | input->copy_from_gpu(input->offset(ibatch), mono->gpu(), mono->count()); 79 | job.mono_tensor->release(); 80 | } 81 | /* 开始推理 */ 82 | engine->forward(false); 83 | /* 下面进行解码 */ 84 | for(int ibatch = 0; ibatch < infer_batch_size; ++ibatch){ 85 | auto& job = fetch_jobs[ibatch];/* 图片数据 */ 86 | float* image_based_output = output->gpu(ibatch); 87 | auto& image_based_boxes = job.output; 88 | 89 | std::vector result; 90 | float confidence_threshold=0.5; 91 | int num_boxes = output->size(1); 92 | for(int b=0;bcpu(b); 94 | for(int num_box=0;num_boxset_value(image_based_boxes); 125 | } 126 | fetch_jobs.clear(); 127 | } 128 | stream_ = nullptr; 129 | tensor_allocator_.reset(); 130 | INFO("Engine destroy."); 131 | 132 | } 133 | 134 | virtual bool preprocess(Job& job,const cv::Mat& image) override{ 135 | if(tensor_allocator_ == nullptr){ 136 | INFOE("tensor_allocator_ is nullptr"); 137 | return false; 138 | } 139 | 140 | job.mono_tensor = tensor_allocator_->query(); 141 | if(job.mono_tensor == nullptr){ 142 | INFOE("Tensor allocator query failed."); 143 | return false; 144 | } 145 | 146 | /* 配置gpu */ 147 | AutoDevice auto_device(gpu_); 148 | /* 获取job里面的tensor的数据地址,第一次为nullptr */ 149 | /* 这里需要理解的不是创建了新的tensor对象,只是把job的tensor地址拿出来使用,数据还是job指定的 */ 150 | auto& tensor = job.mono_tensor->data(); 151 | if(tensor == nullptr){ 152 | // not init 153 | tensor = std::make_shared(); 154 | tensor->set_workspace(std::make_shared()); 155 | } 156 | /* 把tensor和流绑定,后续都会使用这个流进行处理,流的创建也是在模型创建时创建 */ 157 | tensor->set_stream(stream_); 158 | /* 把tensor resize一下,此时的tensor还未填充数据 */ 159 | tensor->resize(1, 3, input_height_, input_width_); 160 | 161 | size_t size_image = input_width_ * input_height_ * 3; 162 | auto workspace = tensor->get_data(); 163 | float* gpu_workspace = (float*)workspace->gpu(size_image*sizeof(float)); 164 | float* image_device = gpu_workspace; 165 | 166 | float* cpu_workspace = (float*)workspace->cpu(size_image*sizeof(float)); 167 | float* image_host = cpu_workspace; 168 | 169 | std::vector data = YOLOV5::v5prepareImage(image,input_width_,input_height_); 170 | memcpy(image_host, data.data(), size_image*sizeof(float)); 171 | // checkCudaRuntime(cudaMemcpyAsync(image_device, data.data(), size_image*sizeof(float), cudaMemcpyHostToDevice, stream_)); 172 | // checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image*sizeof(float), cudaMemcpyHostToDevice, stream_)); 173 | checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image*sizeof(float), cudaMemcpyHostToDevice, stream_)); 174 | 175 | return true; 176 | } 177 | 178 | virtual std::vector> commits(const std::vector& images) override{ 179 | return ThreadSafedAsyncInferImpl::commits(images); 180 | } 181 | 182 | virtual std::shared_future commit(const cv::Mat& image) override{ 183 | return ThreadSafedAsyncInferImpl::commit(image); 184 | } 185 | 186 | private: 187 | int input_width_ = 0; 188 | int input_height_ = 0; 189 | int gpu_ = 0; 190 | float confidence_threshold_ = 0; 191 | float nms_threshold_ = 0; 192 | cudaStream_t stream_ = nullptr; 193 | }; 194 | 195 | std::shared_ptr create_infer(const std::string& engine_file, int gpuid, float confidence_threshold, float nms_threshold){ 196 | /* 创建一个推理实例,该实例具备了引擎的创建、加载模型,反序列化,创建线程等一系列操作, */ 197 | std::shared_ptr instance(new YoloTRTInferImpl()); 198 | if(!instance->startup(engine_file, gpuid, confidence_threshold, nms_threshold)){ 199 | instance.reset(); 200 | } 201 | return instance; 202 | } 203 | 204 | std::vector v5prepareImage(const cv::Mat &image,const int input_w,const int input_h){ 205 | 206 | int w, h, x, y; 207 | // int input_w=IMAGE_WIDTH; 208 | // int input_h=IMAGE_HEIGHT; 209 | float r_w = input_w / (image.cols*1.0); 210 | float r_h = input_h / (image.rows*1.0); 211 | if (r_h > r_w) { 212 | w = input_w; 213 | h = r_w * image.rows; 214 | x = 0; 215 | y = (input_h - h) / 2; 216 | } else { 217 | w = r_h * image.cols; 218 | h = input_h; 219 | x = (input_w - w) / 2; 220 | y = 0; 221 | } 222 | cv::Mat re(h, w, CV_8UC3); 223 | cv::resize(image, re, re.size(), 0, 0, cv::INTER_LINEAR); 224 | cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128)); 225 | re.copyTo(out(cv::Rect(x, y, re.cols, re.rows))); 226 | out.convertTo(out, CV_32FC3, 1.0 / 255); 227 | int channels=3; 228 | std::vector img; 229 | std::vector data(channels* input_h * input_w); 230 | 231 | if (out.isContinuous()) 232 | img.assign((float*)out.datastart, (float*)out.dataend); 233 | 234 | for (int c = 0; c < channels; c++) { 235 | for (int j = 0, hw = input_h * input_w; j < hw; j++) { 236 | data[c * hw + j] = img[channels * j + 2 - c]; 237 | } 238 | } 239 | return data; 240 | } 241 | 242 | float IOUCalculate(const DetectRes &det_a, const DetectRes &det_b) { 243 | cv::Point2f center_a(det_a.x, det_a.y); 244 | cv::Point2f center_b(det_b.x, det_b.y); 245 | cv::Point2f left_up(std::min(det_a.x - det_a.w / 2, det_b.x - det_b.w / 2), 246 | std::min(det_a.y - det_a.h / 2, det_b.y - det_b.h / 2)); 247 | cv::Point2f right_down(std::max(det_a.x + det_a.w / 2, det_b.x + det_b.w / 2), 248 | std::max(det_a.y + det_a.h / 2, det_b.y + det_b.h / 2)); 249 | float distance_d = (center_a - center_b).x * (center_a - center_b).x + (center_a - center_b).y * (center_a - center_b).y; 250 | float distance_c = (left_up - right_down).x * (left_up - right_down).x + (left_up - right_down).y * (left_up - right_down).y; 251 | float inter_l = det_a.x - det_a.w / 2 > det_b.x - det_b.w / 2 ? det_a.x - det_a.w / 2 : det_b.x - det_b.w / 2; 252 | float inter_t = det_a.y - det_a.h / 2 > det_b.y - det_b.h / 2 ? det_a.y - det_a.h / 2 : det_b.y - det_b.h / 2; 253 | float inter_r = det_a.x + det_a.w / 2 < det_b.x + det_b.w / 2 ? det_a.x + det_a.w / 2 : det_b.x + det_b.w / 2; 254 | float inter_b = det_a.y + det_a.h / 2 < det_b.y + det_b.h / 2 ? det_a.y + det_a.h / 2 : det_b.y + det_b.h / 2; 255 | if (inter_b < inter_t || inter_r < inter_l) 256 | return 0; 257 | float inter_area = (inter_b - inter_t) * (inter_r - inter_l); 258 | float union_area = det_a.w * det_a.h + det_b.w * det_b.h - inter_area; 259 | if (union_area == 0) 260 | return 0; 261 | else 262 | return inter_area / union_area - distance_d / distance_c; 263 | } 264 | 265 | void NmsDetect(std::vector &detections) { 266 | sort(detections.begin(), detections.end(), [=](const DetectRes &left, const DetectRes &right) { 267 | return left.prob > right.prob; 268 | }); 269 | 270 | for (int i = 0; i < (int)detections.size(); i++) 271 | for (int j = i + 1; j < (int)detections.size(); j++) 272 | { 273 | if (detections[i].classes == detections[j].classes) 274 | { 275 | float iou = IOUCalculate(detections[i], detections[j]); 276 | if (iou > 0.5) 277 | detections[j].prob = 0; 278 | } 279 | } 280 | 281 | detections.erase(std::remove_if(detections.begin(), detections.end(), [](const DetectRes &det) 282 | { return det.prob == 0; }), detections.end()); 283 | } 284 | 285 | 286 | 287 | } -------------------------------------------------------------------------------- /src/application/yolov5/yolo.h: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_H 2 | #define YOLO_H 3 | 4 | #include 5 | #include 6 | #include 7 | namespace YOLOV5{ 8 | 9 | struct Box{ 10 | float left, top, right, bottom, confidence; 11 | int class_label; 12 | 13 | Box() = default; 14 | 15 | Box(float left, float top, float right, float bottom, float confidence, int class_label) 16 | :left(left), top(top), right(right), bottom(bottom), confidence(confidence), class_label(class_label){} 17 | }; 18 | 19 | typedef std::vector BoxArray; 20 | 21 | class Infer{ 22 | public: 23 | virtual std::shared_future commit(const cv::Mat& image) = 0; 24 | virtual std::vector> commits(const std::vector& images) = 0; 25 | }; 26 | 27 | std::shared_ptr create_infer(const std::string& engine_file, int gpuid, float confidence_threshold, float nms_threshold); 28 | 29 | struct DetectRes{ 30 | int classes; 31 | float x; 32 | float y; 33 | float w; 34 | float h; 35 | float prob; 36 | }; 37 | 38 | std::vector v5prepareImage(const cv::Mat &image,const int input_w,const int input_h); 39 | 40 | void NmsDetect(std::vector &detections); 41 | 42 | } 43 | 44 | #endif -------------------------------------------------------------------------------- /src/module/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 指定CMake版本 2 | cmake_minimum_required(VERSION 3.5) 3 | # 指定项目名称 4 | project(module) 5 | 6 | # 指定头文件目录 7 | # include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 8 | # 指定源文件目录 9 | file(GLOB_RECURSE SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) 10 | 11 | # 打印cmake当前目录地址&源文件目录地址 12 | message(module_CMAKE_CURRENT_SOURCE_DIR => ${CMAKE_CURRENT_SOURCE_DIR}) 13 | # message( module_SOURCE_FILES => ${SOURCE_FILES}) 14 | 15 | # 设置环境变量,编译用到的源文件全部都要放到这里,否则编译能够通过, 16 | # 但是执行的时候会出现各种问题,比如"symbol lookup error xxxxx , undefined symbol" 17 | set(ALL_SRCS ${SOURCE_FILES}) 18 | # message(module_ALL_SRCS => ${ALL_SRCS}) 19 | 20 | #设置生成库保存位置 21 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/lib) 22 | 23 | # 生成so包 24 | # SHARED ->生成动态库 25 | # STATIC ->生成静态库 26 | message(module_PROJECT_NAME => ${PROJECT_NAME}) 27 | add_library(${PROJECT_NAME} SHARED ${ALL_SRCS}) -------------------------------------------------------------------------------- /src/module/builder/trt_builder.cpp: -------------------------------------------------------------------------------- 1 | #include "trt_builder.h" 2 | #include "../common/ilogger.h" 3 | #include "../common/utils.h" 4 | #include "../common/cuda_tools.h" 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace nvinfer1; 10 | using namespace std; 11 | 12 | namespace TRT{ 13 | 14 | static string join_dims(const vector& dims){ 15 | stringstream output; 16 | char buf[64]; 17 | const char* fmts[] = {"%d", " x %d"}; 18 | for(int i = 0; i < dims.size(); ++i){ 19 | snprintf(buf, sizeof(buf), fmts[i != 0], dims[i]); 20 | output << buf; 21 | } 22 | return output.str(); 23 | } 24 | 25 | const char* mode_string(Mode type){ 26 | switch (type){ 27 | case Mode::FP32: 28 | return "FP32"; 29 | case Mode::FP16: 30 | return "FP16"; 31 | case Mode::INT8: 32 | return "INT8"; 33 | default: 34 | return "UknowCompileMode"; 35 | } 36 | } 37 | 38 | bool compile( 39 | Mode mode, 40 | unsigned int max_batch_size, 41 | const string& source_onnx, 42 | const string& saveto, 43 | size_t max_workspace_size){ 44 | 45 | if(mode == Mode::INT8){ 46 | INFOE("int8process must not nullptr, when in int8 mode."); 47 | return false; 48 | } 49 | 50 | INFO("Compile %s %s.", mode_string(mode), source_onnx.c_str()); 51 | shared_ptr builder(createInferBuilder(gLogger), destroy_nvidia_pointer); 52 | if (builder == nullptr) { 53 | INFOE("Can not create builder."); 54 | return false; 55 | } 56 | 57 | shared_ptr config(builder->createBuilderConfig(), destroy_nvidia_pointer); 58 | if (mode == Mode::FP16) { 59 | if (!builder->platformHasFastFp16()) { 60 | INFOW("Platform not have fast fp16 support"); 61 | } 62 | config->setFlag(BuilderFlag::kFP16); 63 | } 64 | else if (mode == Mode::INT8) { 65 | if (!builder->platformHasFastInt8()) { 66 | INFOW("Platform not have fast int8 support"); 67 | } 68 | config->setFlag(BuilderFlag::kINT8); 69 | } 70 | 71 | shared_ptr network; 72 | shared_ptr onnxParser; 73 | const auto explicitBatch = 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 74 | network = shared_ptr(builder->createNetworkV2(explicitBatch), destroy_nvidia_pointer); 75 | 76 | //from onnx is not markOutput 77 | onnxParser.reset(nvonnxparser::createParser(*network, gLogger), destroy_nvidia_pointer); 78 | if (onnxParser == nullptr) { 79 | INFOE("Can not create parser."); 80 | return false; 81 | } 82 | 83 | if (!onnxParser->parseFromFile(source_onnx.c_str(), 1)) { 84 | INFOE("Can not parse OnnX file: %s", source_onnx.c_str()); 85 | return false; 86 | } 87 | 88 | auto inputTensor = network->getInput(0); 89 | auto inputDims = inputTensor->getDimensions(); 90 | 91 | INFO("Input shape is %s", join_dims(vector(inputDims.d, inputDims.d + inputDims.nbDims)).c_str()); 92 | INFO("Set max batch size = %d", max_batch_size); 93 | INFO("Set max workspace size = %.2f MB", max_workspace_size / 1024.0f / 1024.0f); 94 | 95 | int net_num_input = network->getNbInputs(); 96 | INFO("Network has %d inputs:", net_num_input); 97 | vector input_names(net_num_input); 98 | for(int i = 0; i < net_num_input; ++i){ 99 | auto tensor = network->getInput(i); 100 | auto dims = tensor->getDimensions(); 101 | auto dims_str = join_dims(vector(dims.d, dims.d+dims.nbDims)); 102 | INFO(" %d.[%s] shape is %s", i, tensor->getName(), dims_str.c_str()); 103 | 104 | input_names[i] = tensor->getName(); 105 | } 106 | 107 | int net_num_output = network->getNbOutputs(); 108 | INFO("Network has %d outputs:", net_num_output); 109 | for(int i = 0; i < net_num_output; ++i){ 110 | auto tensor = network->getOutput(i); 111 | auto dims = tensor->getDimensions(); 112 | auto dims_str = join_dims(vector(dims.d, dims.d+dims.nbDims)); 113 | INFO(" %d.[%s] shape is %s", i, tensor->getName(), dims_str.c_str()); 114 | } 115 | 116 | int net_num_layers = network->getNbLayers(); 117 | INFO("Network has %d layers", net_num_layers); 118 | builder->setMaxBatchSize(max_batch_size); 119 | config->setMaxWorkspaceSize(max_workspace_size); 120 | 121 | auto profile = builder->createOptimizationProfile(); 122 | for(int i = 0; i < net_num_input; ++i){ 123 | auto input = network->getInput(i); 124 | auto input_dims = input->getDimensions(); 125 | input_dims.d[0] = 1; 126 | profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims); 127 | profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims); 128 | input_dims.d[0] = max_batch_size; 129 | profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims); 130 | } 131 | config->addOptimizationProfile(profile); 132 | 133 | INFO("Building engine..."); 134 | auto time_start = chrono::duration_cast(chrono::system_clock::now().time_since_epoch()).count(); 135 | shared_ptr engine(builder->buildEngineWithConfig(*network, *config), destroy_nvidia_pointer); 136 | if (engine == nullptr) { 137 | INFOE("engine is nullptr"); 138 | return false; 139 | } 140 | 141 | auto time_end = chrono::duration_cast(chrono::system_clock::now().time_since_epoch()).count(); 142 | INFO("Build done %lld ms !", time_end - time_start); 143 | 144 | // serialize the engine, then close everything down 145 | shared_ptr seridata(engine->serialize(), destroy_nvidia_pointer); 146 | return save_file(saveto, seridata->data(), seridata->size()); 147 | } 148 | 149 | }// namespace TRT -------------------------------------------------------------------------------- /src/module/builder/trt_builder.h: -------------------------------------------------------------------------------- 1 | #ifndef TRT_BUILDER_H 2 | #define TRT_BUILDER_H 3 | 4 | #include 5 | #include 6 | 7 | namespace TRT{ 8 | 9 | enum class Mode:int{ 10 | FP32, 11 | FP16, 12 | INT8 13 | }; 14 | 15 | bool compile( 16 | Mode mode, 17 | unsigned int max_batch_size, 18 | const std::string& source_onnx, 19 | const std::string& saveto, 20 | size_t max_workspace_size = 1<<30 21 | ); 22 | 23 | }// namespace TRT 24 | 25 | #endif -------------------------------------------------------------------------------- /src/module/common/cuda_tools.cpp: -------------------------------------------------------------------------------- 1 | #include "cuda_tools.h" 2 | 3 | bool check_runtime(cudaError_t e, const char* call, int line, const char *file){ 4 | if (e != cudaSuccess) { 5 | INFOE("CUDA Runtime error %s # %s, code = %s [ %d ] in file %s:%d", call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line); 6 | return false; 7 | } 8 | return true; 9 | } 10 | 11 | bool check_device_id(int device_id){ 12 | int device_count = -1; 13 | checkCudaRuntime(cudaGetDeviceCount(&device_count)); 14 | if(device_id < 0 || device_id >= device_count){ 15 | INFOE("Invalid device id: %d, count = %d", device_id, device_count); 16 | return false; 17 | } 18 | return true; 19 | } 20 | 21 | AutoDevice::AutoDevice(int device_id){ 22 | 23 | cudaGetDevice(&old_); 24 | checkCudaRuntime(cudaSetDevice(device_id)); 25 | } 26 | 27 | AutoDevice::~AutoDevice(){ 28 | checkCudaRuntime(cudaSetDevice(old_)); 29 | } -------------------------------------------------------------------------------- /src/module/common/cuda_tools.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDA_TOOLS_H 2 | #define CUDA_TOOLS_H 3 | 4 | #include "ilogger.h" 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace nvinfer1; 10 | 11 | #define Assert(op) \ 12 | do{ \ 13 | bool cond = !(!(op)); \ 14 | if(!cond){ \ 15 | INFOF("Assert failed, " #op); \ 16 | } \ 17 | }while(false) 18 | 19 | #define checkCudaRuntime(call) check_runtime(call, #call, __LINE__, __FILE__) 20 | 21 | bool check_runtime(cudaError_t e, const char* call, int iLine, const char *szFile); 22 | 23 | bool check_device_id(int device_id); 24 | 25 | class Logger : public ILogger { 26 | public: 27 | virtual void log(Severity severity, const char* msg) noexcept override { 28 | 29 | if (severity == Severity::kINTERNAL_ERROR) { 30 | INFOE("NVInfer INTERNAL_ERROR: %s", msg); 31 | abort(); 32 | }else if (severity == Severity::kERROR) { 33 | INFOE("NVInfer: %s", msg); 34 | } 35 | else if (severity == Severity::kWARNING) { 36 | INFOW("NVInfer: %s", msg); 37 | } 38 | else if (severity == Severity::kINFO) { 39 | INFOD("NVInfer: %s", msg); 40 | } 41 | else { 42 | INFOD("%s", msg); 43 | } 44 | } 45 | }; 46 | 47 | static Logger gLogger; 48 | 49 | template 50 | static void destroy_nvidia_pointer(_T* ptr) { 51 | if (ptr) ptr->destroy(); 52 | } 53 | 54 | /* 构造时设置当前gpuid,析构时修改为原来的gpuid */ 55 | class AutoDevice{ 56 | public: 57 | AutoDevice(int device_id = 0); 58 | virtual ~AutoDevice(); 59 | 60 | private: 61 | int old_ = -1; 62 | }; 63 | 64 | #endif -------------------------------------------------------------------------------- /src/module/common/ilogger.cpp: -------------------------------------------------------------------------------- 1 | #include "ilogger.h" 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | namespace iLogger{ 8 | static string file_name(const string& path, bool include_suffix){ 9 | 10 | if (path.empty()) return ""; 11 | 12 | int p = path.rfind('/'); 13 | 14 | p += 1; 15 | 16 | //include suffix 17 | if (include_suffix) 18 | return path.substr(p); 19 | 20 | int u = path.rfind('.'); 21 | if (u == -1) 22 | return path.substr(p); 23 | 24 | if (u <= p) u = path.size(); 25 | return path.substr(p, u - p); 26 | } 27 | 28 | static const char* level_string(LogLevel level){ 29 | switch (level){ 30 | case LogLevel::Debug: return "debug"; 31 | case LogLevel::Verbose: return "verbo"; 32 | case LogLevel::Info: return "info"; 33 | case LogLevel::Warning: return "warn"; 34 | case LogLevel::Error: return "error"; 35 | case LogLevel::Fatal: return "fatal"; 36 | default: return "unknow"; 37 | } 38 | } 39 | 40 | void __log_func(const char* file, int line, LogLevel level, const char* fmt, ...){ 41 | 42 | if(level > CURRENT_LOG_LEVEL) 43 | return; 44 | 45 | va_list vl; 46 | va_start(vl, fmt); 47 | 48 | char buffer[2048]; 49 | string filename = file_name(file, true); 50 | int n = snprintf(buffer, sizeof(buffer), "[%s][%s:%d]:", level_string(level), filename.c_str(), line); 51 | vsnprintf(buffer + n, sizeof(buffer) - n, fmt, vl); 52 | 53 | fprintf(stdout, "%s\n", buffer); 54 | if (level == LogLevel::Fatal) { 55 | fflush(stdout); 56 | abort(); 57 | } 58 | } 59 | 60 | } -------------------------------------------------------------------------------- /src/module/common/ilogger.h: -------------------------------------------------------------------------------- 1 | #ifndef ILOGGER_HPP 2 | #define ILOGGER_HPP 3 | 4 | namespace iLogger{ 5 | 6 | enum class LogLevel : int{ 7 | Debug = 5, 8 | Verbose = 4, 9 | Info = 3, 10 | Warning = 2, 11 | Error = 1, 12 | Fatal = 0 13 | }; 14 | 15 | /* 修改这个level来实现修改日志输出级别 */ 16 | #define CURRENT_LOG_LEVEL LogLevel::Info 17 | // 可变参数宏__VA_ARGS__: 宏可以接受可变数目的参数 18 | #define INFOD(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Debug, __VA_ARGS__) 19 | #define INFOV(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Verbose, __VA_ARGS__) 20 | #define INFO(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Info, __VA_ARGS__) 21 | #define INFOW(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Warning, __VA_ARGS__) 22 | #define INFOE(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Error, __VA_ARGS__) 23 | #define INFOF(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Fatal, __VA_ARGS__) 24 | 25 | void __log_func(const char* file, int line, LogLevel level, const char* fmt, ...); 26 | 27 | } 28 | 29 | #endif -------------------------------------------------------------------------------- /src/module/common/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | 6 | namespace TRT{ 7 | 8 | template 9 | struct TrtDestroyer 10 | { 11 | void operator()(T* t) 12 | { 13 | t->destroy(); 14 | } 15 | }; 16 | 17 | template 18 | using TrtUniquePtr = std::unique_ptr>; 19 | 20 | static bool save_file(const std::string& file, const void* data, size_t length){ 21 | 22 | FILE* f = fopen(file.c_str(), "wb"); 23 | if (!f) return false; 24 | 25 | if (data && length > 0){ 26 | if (fwrite(data, 1, length, f) != length){ 27 | fclose(f); 28 | return false; 29 | } 30 | } 31 | fclose(f); 32 | return true; 33 | } 34 | 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/module/core/async_infer.h: -------------------------------------------------------------------------------- 1 | #ifndef ASYNC_INFER_H 2 | #define ASYNC_INFER_H 3 | 4 | #include 5 | #include "monopoly_allocator.h" 6 | #include "trt_tensor.h" 7 | 8 | template,class JobAdditional=int> 9 | class ThreadSafeAsyncInfer{ 10 | public: 11 | struct Job{ 12 | Input input; 13 | Output output; 14 | JobAdditional additional; 15 | MonopolyAllocator::MonopolyDataPointer mono_tensor; 16 | std::shared_ptr> pro; 17 | }; 18 | 19 | virtual ~ThreadSafeAsyncInfer(){ 20 | stop(); 21 | } 22 | 23 | void stop(){ 24 | run_ = false; 25 | cond_.notify_all(); 26 | { 27 | std::unique_lock l(jobs_lock_); 28 | while(!jobs_.empty()){ 29 | auto& item = jobs_.front(); 30 | if(item.pro) 31 | item.pro->set_value(Output()); 32 | jobs_.pop(); 33 | } 34 | }; 35 | 36 | if(worker_){ 37 | worker_->join(); 38 | worker_.reset(); 39 | } 40 | } 41 | 42 | bool startup(const StartParam& param){ 43 | run_ = true; 44 | std::promise pro; 45 | start_param_ = param; 46 | worker_ = std::make_shared(&ThreadSafeAsyncInfer::worker,this,std::ref(pro)); 47 | return pro.get_future().get(); 48 | } 49 | 50 | virtual std::shared_future commit(const Input& input){ 51 | Job job; 52 | job.pro = std::make_shared>(); 53 | if(!preprocess(job,input)){ 54 | job.pro->set_value(Output()); 55 | return job.pro->get_future(); 56 | } 57 | 58 | { 59 | std::unique_lock l(jobs_lock_); 60 | jobs_.push(job); 61 | }; 62 | cond_.notify_one(); 63 | return job.pro->get_future(); 64 | } 65 | 66 | virtual std::vector> commits(const std::vector& inputs){ 67 | int batch_size = std::min((int)inputs.size(),this->tensor_allocator_->capacity()); 68 | std::vector jobs(inputs.size()); 69 | std::vector> results(inputs.size()); 70 | 71 | int nepoch = (inputs.size() + batch_size - 1)/batch_size; 72 | for(int epoch = 0;epoch < nepoch; ++ epoch){ 73 | int begin = epoch * batch_size; 74 | int end = std::min((int)inputs.size(),begin + batch_size); 75 | for(int i = begin; i < end; ++i){ 76 | Job& job = jobs[i]; 77 | job.pro = std::make_shared>(); 78 | if(!preprocess(job,inputs[i])){ 79 | job.pro->set_value(Output()); 80 | } 81 | results[i] = job.pro->get_future(); 82 | } 83 | 84 | { 85 | std::unique_lock l(jobs_lock_); 86 | for(int i = begin; i < end; ++i){ 87 | jobs_.emplace(std::move(jobs[i])); 88 | }; 89 | } 90 | cond_.notify_one(); 91 | } 92 | return results; 93 | } 94 | 95 | protected: 96 | virtual void worker(std::promise& result) = 0; 97 | virtual bool preprocess(Job& job,const Input& input) = 0; 98 | 99 | virtual bool get_jobs_and_wait(std::vector& fetch_jobs,int max_size){ 100 | std::unique_lock l(jobs_lock_); 101 | cond_.wait(l, [&](){ 102 | return !run_ || !jobs_.empty(); 103 | }); 104 | if(!run_) return false; 105 | 106 | fetch_jobs.clear(); 107 | for(int i = 0;i < max_size && !jobs_.empty();++i){ 108 | fetch_jobs.emplace_back(std::move(jobs_.front())); 109 | jobs_.pop(); 110 | } 111 | return true; 112 | } 113 | 114 | virtual bool get_job_and_wait(Job& fetch_job){ 115 | std::unique_lock l(jobs_lock_); 116 | cond_.wait(l,[&](){ 117 | return !run_ || !jobs_.empty(); 118 | }); 119 | 120 | if(!run_) return false; 121 | 122 | fetch_job = std::move(jobs_.front()); 123 | jobs_.pop(); 124 | return true; 125 | } 126 | 127 | StartParam start_param_; 128 | std::atomic run_; 129 | std::mutex jobs_lock_; 130 | std::queue jobs_; 131 | std::shared_ptr worker_; 132 | std::condition_variable cond_; 133 | std::shared_ptr> tensor_allocator_; 134 | }; 135 | 136 | 137 | #endif -------------------------------------------------------------------------------- /src/module/core/monopoly_allocator.h: -------------------------------------------------------------------------------- 1 | #ifndef MONOPOLY_ALLOCATOR_H 2 | #define MONOPOLY_ALLOCATOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | class MonopolyAllocator{ 11 | public: 12 | class MonopolyData{ 13 | public: 14 | std::shared_ptr<_ItemType>& data(){return data_;} 15 | void release(){manager_->release_one(this);} 16 | 17 | private: 18 | MonopolyData(MonopolyAllocator* pmanager){manager_ = pmanager;} 19 | 20 | friend class MonopolyAllocator; 21 | MonopolyAllocator* manager_ = nullptr; 22 | std::shared_ptr<_ItemType> data_; 23 | bool available_ = true; 24 | }; 25 | typedef std::shared_ptr MonopolyDataPointer; 26 | 27 | MonopolyAllocator(int size){ 28 | capacity_ = size; 29 | num_available_ = size; 30 | datas_.resize(size); 31 | 32 | for(int i = 0;i < size; ++i){ 33 | datas_[i] = std::shared_ptr(new MonopolyData(this)); 34 | } 35 | } 36 | 37 | virtual ~MonopolyAllocator(){ 38 | run_ = false; 39 | cv_.notify_all(); // 唤醒所有的等待(wait)线程。如果当前没有等待线程,则该函数什么也不做。 40 | 41 | std::unique_lock l(lock_); 42 | // lambda函数 [外部变量访问方式说明符](参数表){语句块} 43 | // 当收到其他线程通知且num_wait_thread_ == 0时,才解除阻塞 44 | // 当num_wait_thread_ != 0时,阻塞当前线程 45 | cv_exit_.wait(l,[&](){ 46 | return num_wait_thread_ == 0; 47 | }); 48 | } 49 | 50 | MonopolyDataPointer query(int timeout = 10000){ 51 | std::unique_lock l(lock_); 52 | if(!run_) return nullptr; 53 | 54 | if(num_available_ == 0){ 55 | num_wait_thread_++; 56 | 57 | // 指定一个时间,超时后且返回值为true才解除阻塞 58 | auto state = cv_.wait_for(l,std::chrono::milliseconds(timeout),[&](){ 59 | return num_available_ > 0 || !run_; 60 | }); 61 | 62 | num_wait_thread_--; 63 | // 唤醒某个等待(wait)线程。如果当前没有等待线程,则该函数什么也不做,如果同时存在多个等待线程,则唤醒某个线程是不确定的(unspecified)。 64 | cv_exit_.notify_one(); 65 | 66 | if(!state || num_available_ == 0 || !run_) 67 | return nullptr; 68 | } 69 | 70 | auto item = std::find_if(datas_.begin(),datas_.end(),[](MonopolyDataPointer& item){ 71 | return item->available_; 72 | }); 73 | if(item == datas_.end()) 74 | return nullptr; 75 | 76 | (*item)->available_ = false; 77 | num_available_--; 78 | return *item; 79 | } 80 | 81 | int num_available(){ 82 | return num_available_; 83 | } 84 | 85 | int capacity(){ 86 | return capacity_; 87 | } 88 | 89 | private: 90 | void release_one(MonopolyData* prq){ 91 | std::unique_lock l(lock_); 92 | if(!prq->available_){ 93 | prq->available_ = true; 94 | num_available_++; 95 | cv_.notify_one(); 96 | } 97 | 98 | } 99 | // 互斥锁 100 | std::mutex lock_; 101 | // 条件变量 102 | std::condition_variable cv_; 103 | std::condition_variable cv_exit_; 104 | std::vector datas_; 105 | int capacity_ = 0; 106 | /* 107 | volatile提醒编译器它后面所定义的变量随时都有可能改变, 108 | 因此编译后的程序每次需要存储或读取这个变量的时候,告诉编译器对该变量不做优化, 109 | 都会直接从变量内存地址中读取数据,从而可以提供对特殊地址的稳定访问。 110 | 111 | 如果没有volatile关键字,则编译器可能优化读取和存储, 112 | 可能暂时使用寄存器中的值,如果这个变量由别的程序更新了的话, 113 | 将出现不一致的现象。 114 | (简洁的说就是:volatile关键词影响编译器编译的结果, 115 | 用volatile声明的变量表示该变量随时可能发生变化, 116 | 与该变量有关的运算,不要进行编译优化,以免出错) 117 | */ 118 | volatile int num_available_ = 0; 119 | volatile int num_wait_thread_ = 0; 120 | volatile bool run_ = true; 121 | }; 122 | 123 | #endif -------------------------------------------------------------------------------- /src/module/core/trt_tensor.cpp: -------------------------------------------------------------------------------- 1 | // #include 2 | #include 3 | #include "trt_tensor.h" 4 | #include "../common/cuda_tools.h" 5 | 6 | using namespace std; 7 | namespace TRT{ 8 | 9 | inline static int get_device(int device_id){ 10 | if(device_id != CURRENT_DEVICE_ID){ 11 | check_device_id(device_id); 12 | return device_id; 13 | } 14 | 15 | checkCudaRuntime(cudaGetDevice(&device_id)); 16 | return device_id; 17 | } 18 | 19 | MixMemory::MixMemory(int device_id){ 20 | device_id_ = get_device(device_id); 21 | } 22 | 23 | MixMemory::MixMemory(void* cpu,size_t cpu_size,void* gpu,size_t gpu_size){ 24 | reference_data(cpu,cpu_size,gpu,gpu_size); 25 | } 26 | 27 | void MixMemory::reference_data(void* cpu,size_t cpu_size,void* gpu,size_t gpu_size){ 28 | release_all(); 29 | 30 | if(cpu == nullptr || cpu_size == 0){ 31 | cpu = nullptr; 32 | cpu_size = 0; 33 | } 34 | 35 | if(gpu ==nullptr || gpu_size == 0){ 36 | gpu = nullptr; 37 | gpu_size = 0; 38 | } 39 | 40 | this->cpu_ = cpu; 41 | this->cpu_size_ = cpu_size; 42 | this->gpu_ = gpu; 43 | this->gpu_size_ = gpu_size; 44 | 45 | // 判断内存块是否属于MixMemory管理 46 | this->owner_cpu_ = !(cpu && cpu_size > 0); 47 | this->owner_gpu_ = !(gpu && gpu_size > 0); 48 | checkCudaRuntime(cudaGetDevice(&device_id_)); 49 | } 50 | 51 | MixMemory::~MixMemory(){ 52 | release_all(); 53 | } 54 | 55 | void* MixMemory::gpu(size_t size){ 56 | if(gpu_size_ < size){ 57 | release_gpu(); 58 | 59 | gpu_size_ = size; 60 | AutoDevice auto_device_exchange(device_id_); 61 | checkCudaRuntime(cudaMalloc(&gpu_,size)); 62 | checkCudaRuntime(cudaMemset(gpu_,0,size)); 63 | } 64 | return gpu_; 65 | } 66 | 67 | void* MixMemory::cpu(size_t size){ 68 | if(cpu_size_ < size){ 69 | release_cpu(); 70 | 71 | cpu_size_ = size; 72 | AutoDevice auto_device_exchange(device_id_); 73 | // 锁页内存 74 | checkCudaRuntime(cudaMallocHost(&cpu_,size)); 75 | Assert(cpu_ != nullptr); 76 | memset(cpu_,0,size); 77 | } 78 | return cpu_; 79 | } 80 | 81 | void MixMemory::release_cpu(){ 82 | if(cpu_){ 83 | if(owner_cpu_){ 84 | AutoDevice auto_device_exchange(device_id_); 85 | checkCudaRuntime(cudaFreeHost(cpu_)); 86 | } 87 | cpu_ = nullptr; 88 | } 89 | cpu_size_ = 0; 90 | } 91 | 92 | void MixMemory::release_gpu(){ 93 | if(gpu_){ 94 | if(owner_gpu_){ 95 | AutoDevice auto_device_exchange(device_id_); 96 | checkCudaRuntime(cudaFree(gpu_)); 97 | } 98 | gpu_ = nullptr; 99 | } 100 | gpu_size_ = 0; 101 | } 102 | 103 | void MixMemory::release_all(){ 104 | release_cpu(); 105 | release_gpu(); 106 | } 107 | 108 | const char* data_head_string(DataHead dh){ 109 | switch(dh){ 110 | case DataHead::Init: return "Init"; 111 | case DataHead::Device: return "Device"; 112 | case DataHead::Host: return "Host"; 113 | default: return "Unknow"; 114 | } 115 | } 116 | 117 | Tensor::Tensor(int n,int c,int h,int w,shared_ptr data,int device_id){ 118 | this->device_id_ = get_device(device_id); 119 | descriptor_string_[0]=0; 120 | setup_data(data); 121 | resize(n,c,h,w); 122 | } 123 | 124 | Tensor::Tensor(const vector& dims,shared_ptr data,int device_id){ 125 | this->device_id_ = get_device(device_id); 126 | descriptor_string_[0]=0; 127 | setup_data(data); 128 | resize(dims); 129 | } 130 | 131 | Tensor::Tensor(int ndims,const int* dims,shared_ptr data,int device_id){ 132 | this->device_id_ = get_device(device_id); 133 | descriptor_string_[0] = 0; 134 | setup_data(data); 135 | resize(ndims, dims); 136 | } 137 | 138 | Tensor::Tensor(shared_ptr data,int device_id){ 139 | shape_string_[0]=0; 140 | descriptor_string_[0]=0; 141 | this->device_id_=get_device(device_id); 142 | setup_data(data); 143 | } 144 | 145 | Tensor::~Tensor(){ 146 | release(); 147 | } 148 | 149 | const char* Tensor::descriptor() const{ 150 | char* descriptor_ptr = (char*)descriptor_string_; 151 | int device_id = device(); 152 | snprintf(descriptor_ptr,sizeof(descriptor_string_), 153 | "Tensor:%p, %s, CUDA:%d", 154 | data_.get(), 155 | shape_string_, 156 | device_id 157 | ); 158 | return descriptor_ptr; 159 | } 160 | 161 | Tensor& Tensor::compute_shape_string(){ 162 | shape_string_[0] = 0; 163 | char* buffer = shape_string_; 164 | size_t buffer_size = sizeof(shape_string_); 165 | for(int i=0;i data){ 179 | data_ = data; 180 | if(data_ ==nullptr){ 181 | data_ = make_shared(device_id_); 182 | }else{ 183 | device_id_ = data_->device_id(); 184 | } 185 | 186 | head_ = DataHead::Init; 187 | if(data_->cpu()){ 188 | head_ = DataHead::Host; 189 | } 190 | 191 | if(data_->gpu()){ 192 | head_ = DataHead::Device; 193 | } 194 | } 195 | 196 | Tensor& Tensor::copy_from_gpu(size_t offset,const void* src,size_t num_element,int device_id){ 197 | if(head_ == DataHead::Init) 198 | to_gpu(false); 199 | 200 | size_t offset_location = offset * element_size(); 201 | if(offset_location >= bytes_){ 202 | INFOE("Offset location[%lld] >= bytes_[%lld], out of range", offset_location, bytes_); 203 | return *this; 204 | } 205 | 206 | size_t copyed_bytes = num_element * element_size(); 207 | size_t remain_bytes = bytes_ - offset_location; 208 | if(copyed_bytes > remain_bytes){ 209 | INFOE("Copyed bytes[%lld] > remain bytes[%lld], out of range", copyed_bytes, remain_bytes); 210 | return *this; 211 | } 212 | 213 | if(head_ == DataHead::Device){ 214 | int current_device_id = get_device(device_id); 215 | int gpu_device_id = device(); 216 | if(current_device_id != gpu_device_id){ 217 | checkCudaRuntime(cudaMemcpyPeerAsync(gpu() + offset_location, gpu_device_id, src, current_device_id, copyed_bytes, stream_)); 218 | } 219 | else{ 220 | checkCudaRuntime(cudaMemcpyAsync(gpu() + offset_location, src, copyed_bytes, cudaMemcpyDeviceToDevice, stream_)); 221 | } 222 | }else if(head_ == DataHead::Host){ 223 | AutoDevice auto_device_exchange(this->device()); 224 | checkCudaRuntime(cudaMemcpyAsync(cpu() + offset_location, src, copyed_bytes, cudaMemcpyDeviceToHost, stream_)); 225 | }else{ 226 | INFOE("Unsupport head type %d", head_); 227 | } 228 | return *this; 229 | } 230 | 231 | Tensor& Tensor::release(){ 232 | data_->release_all(); 233 | shape_.clear(); 234 | bytes_ = 0; 235 | head_ = DataHead::Init; 236 | return *this; 237 | } 238 | 239 | bool Tensor::empty() const{ 240 | return data_->cpu() == nullptr && data_->gpu() == nullptr; 241 | } 242 | 243 | int Tensor::count(int start_axis) const{ 244 | if(start_axis >=0 && start_axis < shape_.size()){ 245 | int size = 1; 246 | for(int i=start_axis;i& dims){ 256 | return resize(dims.size(),dims.data()); 257 | } 258 | 259 | int Tensor::numel() const{ 260 | int value = shape_.empty() ? 0 : 1; 261 | for(int i=0;i= 0 && idim < shape_.size()); 269 | 270 | auto new_shape = shape_; 271 | new_shape[idim] = size; 272 | return resize(new_shape); 273 | } 274 | 275 | Tensor& Tensor::resize(int ndims,const int* dims){ 276 | vector setup_dims(ndims); 277 | for(int i=0;ishape_ = setup_dims; 286 | this->adajust_memory_by_update_dims_or_type(); 287 | this->compute_shape_string(); 288 | return *this; 289 | } 290 | 291 | Tensor& Tensor::adajust_memory_by_update_dims_or_type(){ 292 | int needed_size = this->numel() * element_size(); 293 | if(needed_size > this->bytes_){ 294 | head_ = DataHead::Init; 295 | } 296 | this->bytes_ = needed_size; 297 | return *this; 298 | } 299 | 300 | Tensor& Tensor::synchronize(){ 301 | AutoDevice auto_device_exchange(this->device()); 302 | checkCudaRuntime(cudaStreamSynchronize(stream_)); 303 | return *this; 304 | } 305 | 306 | Tensor& Tensor::to_gpu(bool copy){ 307 | if(head_ == DataHead::Device) 308 | return *this; 309 | head_ = DataHead::Device; 310 | data_->gpu(bytes_); 311 | if (copy && data_->cpu() != nullptr){ 312 | AutoDevice auto_device_exchange(this->device()); 313 | checkCudaRuntime(cudaMemcpyAsync(data_->gpu(),data_->cpu(),bytes_,cudaMemcpyHostToDevice,stream_)); 314 | } 315 | return *this; 316 | } 317 | 318 | Tensor& Tensor::to_cpu(bool copy){ 319 | if(head_ == DataHead::Host) 320 | return *this; 321 | 322 | head_ = DataHead::Host; 323 | data_->cpu(bytes_); 324 | if(copy && data_->gpu() != nullptr){ 325 | AutoDevice auto_device_exchange(this->device()); 326 | checkCudaRuntime(cudaMemcpyAsync(data_->cpu(), data_->gpu(), bytes_, cudaMemcpyDeviceToHost, stream_)); 327 | checkCudaRuntime(cudaStreamSynchronize(stream_)); 328 | } 329 | return *this; 330 | } 331 | 332 | int Tensor::offset_array(size_t size,const int* index_array) const{ 333 | Assert(size <= shape_.size()); 334 | int value = 0; 335 | for(int i=0;i& index_array) const{ 346 | return offset_array(index_array.size(),index_array.data()); 347 | } 348 | 349 | } -------------------------------------------------------------------------------- /src/module/core/trt_tensor.h: -------------------------------------------------------------------------------- 1 | #ifndef TRT_TENSOR_H 2 | #define TRT_TENSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define CURRENT_DEVICE_ID -1 10 | 11 | struct CUstream_st; 12 | typedef CUstream_st CUStreamRaw; 13 | 14 | 15 | namespace TRT{ 16 | 17 | typedef CUStreamRaw *CUStream; 18 | 19 | enum class DataHead:int { 20 | Init = 0, 21 | Device = 1, 22 | Host = 2 23 | }; 24 | 25 | const char* data_head_string(DataHead dh); 26 | 27 | // cpu和gpu混合内存管理 28 | class MixMemory{ 29 | public: 30 | MixMemory(int device_id = CURRENT_DEVICE_ID); 31 | MixMemory(void* cpu,size_t cpu_size,void* gpu,size_t gpu_size); 32 | // 虚函数 33 | virtual ~MixMemory(); 34 | 35 | void* gpu(size_t size); 36 | void* cpu(size_t size); 37 | void release_gpu(); 38 | void release_cpu(); 39 | void release_all(); 40 | 41 | inline bool owner_gpu() const{return owner_gpu_;} 42 | inline bool owner_cpu() const{return owner_cpu_;} 43 | inline size_t cpu_size() const{return cpu_size_;} 44 | inline size_t gpu_size() const{return gpu_size_;} 45 | inline int device_id() const{return device_id_;} 46 | inline void* gpu() const{return gpu_;} 47 | inline void* cpu() const{return cpu_;} 48 | 49 | void reference_data(void* cpu,size_t cpu_size,void* gpu,size_t gpu_size); 50 | 51 | private: 52 | void* cpu_ = nullptr; 53 | size_t cpu_size_ = 0; 54 | bool owner_cpu_ = true; 55 | 56 | int device_id_ = 0; 57 | void* gpu_ = nullptr; 58 | size_t gpu_size_ = 0; 59 | bool owner_gpu_ = true; 60 | }; 61 | 62 | class Tensor{ 63 | public: 64 | // = delete --> 禁用成员函数 65 | Tensor(const Tensor& other) = delete; 66 | Tensor& operator =(const Tensor& other) = delete; 67 | // explicit阻止隐式转换 68 | explicit Tensor(std::shared_ptr data = nullptr,int device_id = CURRENT_DEVICE_ID); 69 | explicit Tensor(int n,int c,int h,int w,std::shared_ptr data = nullptr, int device_id = CURRENT_DEVICE_ID); 70 | explicit Tensor(int ndims,const int* dims,std::shared_ptr data = nullptr, int device_id = CURRENT_DEVICE_ID); 71 | explicit Tensor(const std::vector& dims,std::shared_ptr data = nullptr,int device_id = CURRENT_DEVICE_ID); 72 | virtual ~Tensor(); 73 | 74 | int numel() const; 75 | inline int ndims() const{return shape_.size();} 76 | inline int size(int index) const{return shape_[index];} 77 | inline int shape(int index) const{return shape_[index];} 78 | 79 | inline int batch() const{return shape_[0];} 80 | inline int channel() const{return shape_[1];} 81 | inline int height() const{return shape_[2];} 82 | inline int width() const{return shape_[3];} 83 | 84 | inline const std::vector& dims() const{return shape_;} 85 | inline int bytes() const{return bytes_;} 86 | inline int bytes(int start_axis) const{return count(start_axis)*element_size();} 87 | inline int element_size() const{return sizeof(float);} 88 | inline DataHead head() const{return head_;} 89 | 90 | std::shared_ptr clone() const; 91 | Tensor& release(); 92 | Tensor& set_to(float value); 93 | bool empty() const; 94 | 95 | // 想要知道包中有多少元素时,可以使用sizeof...运算符,该运算符返回一个常量表达式,并且不会对其实参求值 96 | template 97 | int offset(int index,_Args... index_args){ 98 | const int index_array[] = {index,index_args...}; 99 | return offset_array(sizeof...(index_args)+1,index_array); 100 | } 101 | 102 | int offset_array(const std::vector& index) const; 103 | int offset_array(size_t size,const int* index_array) const; 104 | 105 | template 106 | Tensor& resize(int dim_size,_Args... dim_size_args){ 107 | const int dim_size_array[] = {dim_size,dim_size_args...}; 108 | return resize(sizeof...(dim_size_args)+1,dim_size_array); 109 | } 110 | 111 | Tensor& resize(int ndims,const int* dims); 112 | Tensor& resize(const std::vector& dims); 113 | Tensor& resize_single_dim(int idim,int size); 114 | int count(int start_axis = 0) const; 115 | int device() const{return device_id_;} 116 | 117 | Tensor& to_gpu(bool copy=true); 118 | Tensor& to_cpu(bool copy=true); 119 | inline void* cpu() const{((Tensor*)this)->to_cpu();return data_->cpu();} 120 | inline void* gpu() const{((Tensor*)this)->to_gpu();return data_->gpu();} 121 | 122 | template 123 | inline const DType* cpu() const{return (DType*)cpu();} 124 | template 125 | inline DType* cpu() {return (DType*)cpu();} 126 | template 127 | inline DType* cpu(int i,_Args&&... args) {return cpu()+offset(i,args...);} 128 | 129 | template 130 | inline const DType* gpu() const{return (DType*)gpu();} 131 | template 132 | inline DType* gpu() {return (DType*)gpu();} 133 | template 134 | inline DType* gpu(int i,_Args&&... args) {return gpu()+offset(i,args...);} 135 | 136 | template 137 | inline DType& at(int i,_Args&&... args) {return *(gpu()+offset(i,args...));} 138 | 139 | std::shared_ptr get_data() const{return data_;} 140 | std::shared_ptr get_workspace() const{return workspace_;} 141 | Tensor& set_workspace(std::shared_ptr workspace) {workspace_ = workspace; return *this;} 142 | 143 | CUStream get_stream() const{return stream_;} 144 | Tensor& set_stream(CUStream stream) {stream_ = stream;return *this;} 145 | 146 | Tensor& set_mat (int n,const cv::Mat& image); 147 | Tensor& set_norm_mat (int n,const cv::Mat& image,float mean[3],float std[3]); 148 | cv::Mat at_mat(int n=0,int c=0) {return cv::Mat(height(),width(),CV_32F,cpu(n,c));} 149 | 150 | Tensor& synchronize(); 151 | const char* shape_string() const{return shape_string_;} 152 | const char* descriptor() const; 153 | 154 | Tensor& copy_from_gpu(size_t offset,const void* src,size_t num_element,int device_id = CURRENT_DEVICE_ID); 155 | 156 | private: 157 | Tensor& compute_shape_string(); 158 | Tensor& adajust_memory_by_update_dims_or_type(); 159 | void setup_data(std::shared_ptr data); 160 | 161 | std::vector shape_; 162 | size_t bytes_ = 0; 163 | DataHead head_ = DataHead::Init; 164 | CUStream stream_ = nullptr; 165 | int device_id_ = 0; 166 | char shape_string_[100]; 167 | char descriptor_string_[100]; 168 | std::shared_ptr data_; 169 | std::shared_ptr workspace_; 170 | }; 171 | 172 | } 173 | 174 | #endif -------------------------------------------------------------------------------- /src/module/infer/trt_infer.cpp: -------------------------------------------------------------------------------- 1 | #include "trt_infer.h" 2 | #include 3 | 4 | namespace TRT 5 | { 6 | 7 | static std::vector load_file(const std::string& file){ 8 | 9 | std::ifstream in(file, std::ios::in | std::ios::binary); 10 | if (!in.is_open()) 11 | return {}; 12 | 13 | in.seekg(0, std::ios::end); 14 | size_t length = in.tellg(); 15 | 16 | std::vector data; 17 | if (length > 0){ 18 | in.seekg(0, std::ios::beg); 19 | data.resize(length); 20 | 21 | in.read((char*)&data[0], length); 22 | } 23 | in.close(); 24 | return data; 25 | } 26 | 27 | TRTInferImpl::~TRTInferImpl(){ 28 | destroy(); 29 | } 30 | 31 | void TRTInferImpl::destroy(){ 32 | int old_device = 0; 33 | checkCudaRuntime(cudaGetDevice(&old_device)); 34 | checkCudaRuntime(cudaSetDevice(device_)); 35 | this->context_.reset(); 36 | this->blobsNameMapper_.clear(); 37 | this->outputs_.clear(); 38 | this->inputs_.clear(); 39 | this->inputs_name_.clear(); 40 | this->outputs_name_.clear(); 41 | checkCudaRuntime(cudaSetDevice(old_device)); 42 | } 43 | 44 | void TRTInferImpl::print(){ 45 | if(!context_){ 46 | INFOW("Infer print,nullptr."); 47 | return; 48 | } 49 | 50 | INFO("Infer %p detail",this); 51 | INFO("\tMax Batch Size: %d",this->get_max_batch_size()); 52 | INFO("\tInputs: %d",inputs_.size()); 53 | for(int i = 0;i < inputs_.size();++i){ 54 | auto& tensor = inputs_[i]; 55 | auto& name = inputs_name_[i]; 56 | INFO("\t\t%d.%s : shape {%s}", i ,name.c_str(), tensor->shape_string()); 57 | } 58 | 59 | INFO("\tOutputs: %d",outputs_.size()); 60 | for(int i = 0;i < outputs_.size();++i){ 61 | auto& tensor = outputs_[i]; 62 | auto& name = outputs_name_[i]; 63 | INFO("\t\t%d.%s : shape {%s}", i, name.c_str(), tensor->shape_string()); 64 | } 65 | } 66 | 67 | std::shared_ptr> TRTInferImpl::serial_engine(){ 68 | auto memory = this->context_->engine_->serialize(); 69 | auto output = std::make_shared>((uint8_t*)memory->data(),(uint8_t*)memory->data()+memory->size()); 70 | memory->destroy(); 71 | return output; 72 | } 73 | 74 | bool TRTInferImpl::load_from_memory(const void* pdata,size_t size){ 75 | if(pdata == nullptr || size == 0) 76 | return false; 77 | 78 | context_.reset(new EngineContext()); 79 | 80 | if(!context_->build_model(pdata, size)){ 81 | context_.reset(); 82 | return false; 83 | } 84 | 85 | workspace_.reset(new MixMemory()); 86 | cudaGetDevice(&device_); 87 | build_engine_input_and_outputs_mapper(); 88 | return true; 89 | } 90 | 91 | bool TRTInferImpl::load(const std::string& file){ 92 | auto data = load_file(file); 93 | if(data.empty()) 94 | return false; 95 | 96 | context_.reset(new EngineContext()); 97 | 98 | if(!context_->build_model(data.data(), data.size())){ 99 | context_.reset(); 100 | return false; 101 | } 102 | 103 | workspace_.reset(new MixMemory()); 104 | cudaGetDevice(&device_); 105 | build_engine_input_and_outputs_mapper(); 106 | return true; 107 | } 108 | 109 | size_t TRTInferImpl::get_device_memory_size(){ 110 | EngineContext* context = (EngineContext*)this->context_.get(); 111 | return context->context_->getEngine().getDeviceMemorySize(); 112 | } 113 | 114 | void TRTInferImpl::build_engine_input_and_outputs_mapper(){ 115 | EngineContext* context = (EngineContext*)this->context_.get(); 116 | int nbBindings = context->engine_->getNbBindings(); 117 | int max_batchsize = context->engine_->getMaxBatchSize(); 118 | 119 | inputs_.clear(); 120 | inputs_name_.clear(); 121 | outputs_.clear(); 122 | outputs_name_.clear(); 123 | orderdBlobs_.clear(); 124 | bindingsPtr_.clear(); 125 | blobsNameMapper_.clear(); 126 | for(int i = 0;i < nbBindings;++i){ 127 | auto dims = context->engine_->getBindingDimensions(i); 128 | auto type = context->engine_->getBindingDataType(i); 129 | const char* bindingName = context->engine_->getBindingName(i); 130 | dims.d[0] = max_batchsize; 131 | auto newTensor = std::make_shared(dims.nbDims,dims.d); 132 | newTensor->set_stream(this->context_->stream_); 133 | newTensor->set_workspace(this->workspace_); 134 | if(context->engine_->bindingIsInput(i)){ 135 | inputs_.push_back(newTensor); 136 | inputs_name_.push_back(bindingName); 137 | inputs_map_to_ordered_index_.push_back(orderdBlobs_.size()); 138 | } 139 | else{ 140 | outputs_.push_back(newTensor); 141 | outputs_name_.push_back(bindingName); 142 | outputs_map_to_ordered_index_.push_back(orderdBlobs_.size()); 143 | } 144 | blobsNameMapper_[bindingName] = i; 145 | orderdBlobs_.push_back(newTensor); 146 | } 147 | bindingsPtr_.resize(orderdBlobs_.size()); 148 | } 149 | 150 | void TRTInferImpl::set_stream(CUStream stream){ 151 | this->context_->set_stream(stream); 152 | for(auto& t:orderdBlobs_) 153 | t->set_stream(stream); 154 | } 155 | 156 | CUStream TRTInferImpl::get_stream(){ 157 | return this->context_->stream_; 158 | } 159 | 160 | int TRTInferImpl::device(){ 161 | return device_; 162 | } 163 | 164 | void TRTInferImpl::synchronize(){ 165 | checkCudaRuntime(cudaStreamSynchronize(context_->stream_)); 166 | } 167 | 168 | bool TRTInferImpl::is_output_name(const std::string& name){ 169 | return std::find(outputs_name_.begin(),outputs_name_.end(),name) != outputs_name_.end(); 170 | } 171 | 172 | bool TRTInferImpl::is_input_name(const std::string& name){ 173 | return std::find(inputs_name_.begin(),inputs_name_.end(),name) != inputs_name_.end(); 174 | } 175 | 176 | void TRTInferImpl::forward(bool sync){ 177 | EngineContext* context = (EngineContext*) context_.get(); 178 | int inputBatchSize = inputs_[0]->size(0); 179 | for(int i = 0;i < context->engine_->getNbBindings();++i){ 180 | auto dims = context->engine_->getBindingDimensions(i); 181 | auto type = context->engine_->getBindingDataType(i); 182 | dims.d[0] = inputBatchSize; 183 | if(context->engine_->bindingIsInput(i)){ 184 | context->context_->setBindingDimensions(i,dims); 185 | } 186 | } 187 | 188 | for(int i = 0;i < outputs_.size();++i){ 189 | outputs_[i]->resize_single_dim(0,inputBatchSize); 190 | outputs_[i]->to_gpu(false); 191 | } 192 | 193 | for(int i = 0;i < orderdBlobs_.size();++i){ 194 | bindingsPtr_[i] = orderdBlobs_[i]->gpu(); 195 | } 196 | 197 | void** bindingsptr = bindingsPtr_.data(); 198 | bool excute_result = context->context_->enqueueV2(bindingsptr, context->stream_,nullptr); 199 | if(!excute_result){ 200 | auto code = cudaGetLastError(); 201 | INFOF("execute fail, code %d[%s], message %s", code, cudaGetErrorName(code), cudaGetErrorString(code)); 202 | } 203 | 204 | if(sync){ 205 | synchronize(); 206 | } 207 | } 208 | 209 | std::shared_ptr TRTInferImpl::get_workspace(){ 210 | return workspace_; 211 | } 212 | 213 | int TRTInferImpl::num_input(){ 214 | return this->inputs_.size(); 215 | } 216 | 217 | int TRTInferImpl::num_output(){ 218 | return this->outputs_.size(); 219 | } 220 | 221 | void TRTInferImpl::set_input(int index,std::shared_ptr tensor){ 222 | Assert(index >= 0 && index < inputs_.size()); 223 | this->inputs_[index] = tensor; 224 | 225 | int order_index = inputs_map_to_ordered_index_[index]; 226 | this->orderdBlobs_[order_index] = tensor; 227 | } 228 | 229 | void TRTInferImpl::set_output(int index,std::shared_ptr tensor){ 230 | Assert(index >= 0 && index < outputs_.size()); 231 | this->outputs_[index] = tensor; 232 | 233 | int order_index = outputs_map_to_ordered_index_[index]; 234 | this->orderdBlobs_[order_index] = tensor; 235 | } 236 | 237 | std::shared_ptr TRTInferImpl::input(int index){ 238 | Assert(index >= 0 && index < inputs_name_.size()); 239 | return this->inputs_[index]; 240 | } 241 | 242 | std::string TRTInferImpl::get_input_name(int index){ 243 | Assert(index >= 0 && index < inputs_name_.size()); 244 | return inputs_name_[index]; 245 | } 246 | 247 | std::shared_ptr TRTInferImpl::output(int index) { 248 | Assert(index >= 0 && index < outputs_.size()); 249 | return outputs_[index]; 250 | } 251 | 252 | std::string TRTInferImpl::get_output_name(int index){ 253 | Assert(index >= 0 && index < outputs_name_.size()); 254 | return outputs_name_[index]; 255 | } 256 | 257 | int TRTInferImpl::get_max_batch_size(){ 258 | Assert(this->context_ != nullptr); 259 | return this->context_->engine_->getMaxBatchSize(); 260 | } 261 | 262 | std::shared_ptr TRTInferImpl::tensor(const std::string& name){ 263 | Assert(this->blobsNameMapper_.find(name) != this->blobsNameMapper_.end()); 264 | return orderdBlobs_[blobsNameMapper_[name]]; 265 | } 266 | 267 | } // namespace TRT 268 | -------------------------------------------------------------------------------- /src/module/infer/trt_infer.h: -------------------------------------------------------------------------------- 1 | #ifndef TRT_INFER_H 2 | #define TRT_INFER_H 3 | 4 | #include "../core/trt_tensor.h" 5 | #include "../common/cuda_tools.h" 6 | 7 | namespace TRT 8 | { 9 | 10 | class EngineContext{ 11 | public: 12 | virtual ~EngineContext() {destroy();} 13 | 14 | void set_stream(CUStream stream){ 15 | if(owner_stream_){ 16 | if(stream_){ 17 | cudaStreamDestroy(stream_); 18 | } 19 | owner_stream_ = false; 20 | } 21 | stream_ = stream; 22 | } 23 | 24 | bool build_model(const void* pdata,size_t size){ 25 | destroy(); 26 | 27 | if(pdata == nullptr || size == 0) 28 | return false; 29 | 30 | owner_stream_ = true; 31 | checkCudaRuntime(cudaStreamCreate(&stream_)); 32 | if(stream_ == nullptr) 33 | return false; 34 | 35 | runtime_ = std::shared_ptr(createInferRuntime(gLogger), destroy_nvidia_pointer); 36 | if (runtime_ == nullptr) 37 | return false; 38 | 39 | engine_ = std::shared_ptr(runtime_->deserializeCudaEngine(pdata, size, nullptr), destroy_nvidia_pointer); 40 | if (engine_ == nullptr) 41 | return false; 42 | 43 | context_ = std::shared_ptr(engine_->createExecutionContext(), destroy_nvidia_pointer); 44 | return context_ != nullptr; 45 | } 46 | 47 | CUStream stream_ = nullptr; 48 | bool owner_stream_ = false; 49 | std::shared_ptr context_; 50 | std::shared_ptr engine_; 51 | std::shared_ptr runtime_ = nullptr; 52 | 53 | private: 54 | void destroy(){ 55 | context_.reset(); 56 | engine_.reset(); 57 | runtime_.reset(); 58 | 59 | if(owner_stream_){ 60 | if(stream_){ 61 | cudaStreamDestroy(stream_); 62 | } 63 | } 64 | stream_ = nullptr; 65 | } 66 | }; 67 | 68 | class TRTInferImpl{ 69 | public: 70 | virtual ~TRTInferImpl(); 71 | 72 | bool load(const std::string& file); 73 | bool load_from_memory(const void* pdata,size_t size); 74 | void destroy(); 75 | void forward(bool sync); 76 | int get_max_batch_size(); 77 | CUStream get_stream(); 78 | void set_stream(CUStream stream); 79 | void synchronize(); 80 | size_t get_device_memory_size(); 81 | std::shared_ptr get_workspace(); 82 | std::shared_ptr input(int index = 0); 83 | std::string get_input_name(int index = 0); 84 | std::shared_ptr output(int index = 0); 85 | std::string get_output_name(int index = 0); 86 | std::shared_ptr tensor(const std::string& name); 87 | bool is_output_name(const std::string& name); 88 | bool is_input_name(const std::string& name); 89 | void set_input(int index,std::shared_ptr tensor); 90 | void set_output(int index,std::shared_ptr tensor); 91 | std::shared_ptr> serial_engine(); 92 | 93 | void print(); 94 | 95 | int num_output(); 96 | int num_input(); 97 | int device(); 98 | 99 | private: 100 | void build_engine_input_and_outputs_mapper(); 101 | 102 | std::vector> inputs_; 103 | std::vector> outputs_; 104 | std::vector inputs_map_to_ordered_index_; 105 | std::vector outputs_map_to_ordered_index_; 106 | std::vector inputs_name_; 107 | std::vector outputs_name_; 108 | std::vector> orderdBlobs_; 109 | std::map blobsNameMapper_; 110 | std::shared_ptr context_; 111 | std::vector bindingsPtr_; 112 | std::shared_ptr workspace_; 113 | int device_ = 0; 114 | 115 | 116 | }; 117 | 118 | } // namespace TRT 119 | 120 | 121 | #endif -------------------------------------------------------------------------------- /src/onnxplugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 指定CMake版本 2 | cmake_minimum_required(VERSION 3.5) 3 | # 指定项目名称 4 | project(onnxplugin) 5 | 6 | # 指定头文件目录 7 | # include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 8 | # 指定源文件目录 9 | # GLOB_RECURSE参数:捕获所有的cpp文件作为SOURCE_FILES 10 | 11 | file(GLOB_RECURSE cpp_srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) 12 | file(GLOB_RECURSE cuda_srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cu) 13 | 14 | # 打印cmake当前目录地址&源文件目录地址 15 | message(module_CMAKE_CURRENT_SOURCE_DIR => ${CMAKE_CURRENT_SOURCE_DIR}) 16 | message( module_CPP_FILES => ${cpp_srcs}) 17 | message( module_CUDA_FILES => ${cuda_srcs}) 18 | 19 | # 设置环境变量,编译用到的源文件全部都要放到这里,否则编译能够通过, 20 | # 但是执行的时候会出现各种问题,比如"symbol lookup error xxxxx , undefined symbol" 21 | set(ALL_SRCS ${cpp_srcs} ${cuda_srcs}) 22 | message(module_ALL_SRCS => ${ALL_SRCS}) 23 | 24 | #设置生成库保存位置 25 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/lib) 26 | 27 | # 生成so包 28 | # SHARED ->生成动态库 29 | # STATIC ->生成静态库 30 | message(module_PROJECT_NAME => ${PROJECT_NAME}) 31 | cuda_add_library(${PROJECT_NAME} SHARED ${ALL_SRCS}) 32 | # 链接相关cuda库 33 | # find_library(CUDNN_LIB cudnn HINTS 34 | # ${CUDA_TOOLKIT_ROOT_DIR} ${CUDNN_ROOT_DIR} PATH_SUFFIXES lib64 lib) 35 | # find_library(CUBLAS_LIB cublas HINTS 36 | # ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib64 lib lib/stubs) 37 | # find_library(CUBLASLT_LIB cublasLt HINTS 38 | # ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib64 lib lib/stubs) 39 | # find_library(CUDART_LIB cudart HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64) 40 | # target_link_libraries(${PROJECT_NAME} 41 | # ${CUBLAS_LIB} 42 | # ${CUBLASLT_LIB} 43 | # ${CUDART_LIB} 44 | # ${CUDNN_LIB} 45 | # ${CMAKE_DL_LIBS} 46 | # ) 47 | -------------------------------------------------------------------------------- /src/onnxplugin/include/SiLUPlugin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_SILU_PLUGIN_H 18 | #define TRT_SILU_PLUGIN_H 19 | #include "NvInfer.h" 20 | #include "kernel.h" 21 | // #include "plugin.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | namespace nvinfer1 28 | { 29 | namespace plugin 30 | { 31 | 32 | class SILU : public IPluginV2DynamicExt 33 | { 34 | public: 35 | SILU(); 36 | 37 | SILU(const void* buffer, size_t length); 38 | 39 | ~SILU() override = default; 40 | 41 | int getNbOutputs() const TRT_NOEXCEPT override; 42 | 43 | // Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRT_NOEXCEPT override; 44 | 45 | DimsExprs getOutputDimensions(int outputIndex, const DimsExprs* inputs, int nbInputs, 46 | IExprBuilder& exprBuilder) TRT_NOEXCEPT override; 47 | 48 | int initialize() TRT_NOEXCEPT override; 49 | 50 | void terminate() TRT_NOEXCEPT override; 51 | 52 | // size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override; 53 | size_t getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs, const PluginTensorDesc* outputs, 54 | int32_t nbOutputs) const TRT_NOEXCEPT override; 55 | 56 | // int enqueue( 57 | // int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; 58 | 59 | int32_t enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc, 60 | const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; 61 | 62 | size_t getSerializationSize() const TRT_NOEXCEPT override; 63 | 64 | void serialize(void* buffer) const TRT_NOEXCEPT override; 65 | 66 | // bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override; 67 | bool supportsFormatCombination(int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) TRT_NOEXCEPT override; 68 | 69 | 70 | const char* getPluginType() const TRT_NOEXCEPT override; 71 | 72 | const char* getPluginVersion() const TRT_NOEXCEPT override; 73 | 74 | void destroy() TRT_NOEXCEPT override; 75 | 76 | IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override; 77 | 78 | void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override; 79 | 80 | const char* getPluginNamespace() const TRT_NOEXCEPT override; 81 | 82 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT override; 83 | 84 | // bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override; 85 | 86 | // bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override; 87 | 88 | void attachToContext( 89 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override; 90 | 91 | // void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) TRT_NOEXCEPT override; 92 | void configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,const DynamicPluginTensorDesc* out, int32_t nbOutputs) TRT_NOEXCEPT override; 93 | 94 | void detachFromContext() TRT_NOEXCEPT override; 95 | 96 | int input_size_; 97 | 98 | private: 99 | const char* mPluginNamespace; 100 | // int mBatchDim; 101 | pluginStatus_t SiLUInference_cpu(const int n, const float* input, float* output); 102 | 103 | }; 104 | 105 | class SiLUPluginCreator : public IPluginCreator 106 | { 107 | public: 108 | SiLUPluginCreator(); 109 | 110 | ~SiLUPluginCreator() override = default; 111 | 112 | const char* getPluginName() const TRT_NOEXCEPT override; 113 | 114 | const char* getPluginVersion() const TRT_NOEXCEPT override; 115 | 116 | const PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; 117 | 118 | IPluginV2DynamicExt* createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT override; 119 | 120 | IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT override; 121 | 122 | void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override 123 | { 124 | mNamespace = libNamespace; 125 | } 126 | 127 | const char* getPluginNamespace() const TRT_NOEXCEPT override 128 | { 129 | return mNamespace.c_str(); 130 | } 131 | 132 | 133 | private: 134 | std::string mNamespace; 135 | static PluginFieldCollection mFC; 136 | static std::vector mPluginAttributes; 137 | }; 138 | 139 | REGISTER_TENSORRT_PLUGIN(SiLUPluginCreator); 140 | 141 | } // namespace plugin 142 | } // namespace nvinfer1 143 | 144 | #endif // TRT_SILU_PLUGIN_H 145 | -------------------------------------------------------------------------------- /src/onnxplugin/include/checkMacrosPlugin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef CHECK_MACROS_PLUGIN_H 18 | #define CHECK_MACROS_PLUGIN_H 19 | 20 | #include "NvInfer.h" 21 | #include 22 | 23 | #ifndef TRT_CHECK_MACROS_H 24 | #ifndef TRT_TUT_HELPERS_H 25 | 26 | #ifdef _MSC_VER 27 | #define FN_NAME __FUNCTION__ 28 | #else 29 | #define FN_NAME __func__ 30 | #endif 31 | #if __cplusplus < 201103L 32 | #define OVERRIDE 33 | #define NORETURN 34 | #else 35 | #define OVERRIDE override 36 | #define NORETURN [[noreturn]] 37 | #endif 38 | #if NV_TENSORRT_MAJOR >= 8 39 | #define TRT_NOEXCEPT noexcept 40 | #define TRT_CONST_ENQUEUE const 41 | #else 42 | #define TRT_NOEXCEPT 43 | #define TRT_CONST_ENQUEUE 44 | #endif 45 | 46 | #endif // TRT_TUT_HELPERS_H 47 | #endif // TRT_CHECK_MACROS_H 48 | 49 | namespace nvinfer1 50 | { 51 | namespace plugin 52 | { 53 | template 54 | class LogStream : public std::ostream 55 | { 56 | class Buf : public std::stringbuf 57 | { 58 | public: 59 | int sync() override; 60 | }; 61 | 62 | Buf buffer; 63 | 64 | public: 65 | LogStream() 66 | : std::ostream(&buffer){}; 67 | }; 68 | 69 | extern LogStream gLogError; 70 | extern LogStream gLogWarning; 71 | extern LogStream gLogInfo; 72 | extern LogStream gLogVerbose; 73 | 74 | void reportAssertion(const char* msg, const char* file, int line); 75 | void logError(const char* msg, const char* file, const char* fn, int line); 76 | 77 | NORETURN void throwCudaError(const char* file, const char* function, int line, int status, const char* msg = nullptr); 78 | NORETURN void throwCudnnError(const char* file, const char* function, int line, int status, const char* msg = nullptr); 79 | NORETURN void throwCublasError(const char* file, const char* function, int line, int status, const char* msg = nullptr); 80 | 81 | class TRTException : public std::exception 82 | { 83 | public: 84 | TRTException(const char* fl, const char* fn, int ln, int st, const char* msg, const char* nm) 85 | : file(fl) 86 | , function(fn) 87 | , line(ln) 88 | , status(st) 89 | , message(msg) 90 | , name(nm) 91 | { 92 | } 93 | virtual void log(std::ostream& logStream) const; 94 | void setMessage(const char* msg) 95 | { 96 | message = msg; 97 | } 98 | 99 | protected: 100 | const char* file{nullptr}; 101 | const char* function{nullptr}; 102 | int line{0}; 103 | int status{0}; 104 | const char* message{nullptr}; 105 | const char* name{nullptr}; 106 | }; 107 | 108 | class CudaError : public TRTException 109 | { 110 | public: 111 | CudaError(const char* fl, const char* fn, int ln, int stat, const char* msg = nullptr) 112 | : TRTException(fl, fn, ln, stat, msg, "Cuda") 113 | { 114 | } 115 | }; 116 | 117 | class CudnnError : public TRTException 118 | { 119 | public: 120 | CudnnError(const char* fl, const char* fn, int ln, int stat, const char* msg = nullptr) 121 | : TRTException(fl, fn, ln, stat, msg, "Cudnn") 122 | { 123 | } 124 | }; 125 | 126 | class CublasError : public TRTException 127 | { 128 | public: 129 | CublasError(const char* fl, const char* fn, int ln, int stat, const char* msg = nullptr) 130 | : TRTException(fl, fn, ln, stat, msg, "cuBLAS") 131 | { 132 | } 133 | }; 134 | 135 | } // namespace plugin 136 | 137 | } // namespace nvinfer1 138 | 139 | #ifndef TRT_CHECK_MACROS_H 140 | #ifndef TRT_TUT_HELPERS_H 141 | 142 | #define API_CHECK(condition) \ 143 | { \ 144 | if ((condition) == false) \ 145 | { \ 146 | nvinfer1::plugin::logError(#condition, __FILE__, FN_NAME, __LINE__); \ 147 | return; \ 148 | } \ 149 | } 150 | 151 | #define API_CHECK_RETVAL(condition, retval) \ 152 | { \ 153 | if ((condition) == false) \ 154 | { \ 155 | nvinfer1::plugin::logError(#condition, __FILE__, FN_NAME, __LINE__); \ 156 | return retval; \ 157 | } \ 158 | } 159 | 160 | #define API_CHECK_WEIGHTS(Name) \ 161 | API_CHECK((Name).values != nullptr); \ 162 | API_CHECK((Name).count > 0); \ 163 | API_CHECK(int((Name).type) >= 0 && int((Name).type) < EnumMax()); 164 | 165 | #define API_CHECK_WEIGHTS0(Name) \ 166 | API_CHECK((Name).count >= 0); \ 167 | API_CHECK((Name).count > 0 ? ((Name).values != nullptr) : ((Name).values == nullptr)); \ 168 | API_CHECK(int((Name).type) >= 0 && int((Name).type) < EnumMax()); 169 | 170 | #define API_CHECK_WEIGHTS_RETVAL(Name, retval) \ 171 | API_CHECK_RETVAL((Name).values != nullptr, retval); \ 172 | API_CHECK_RETVAL((Name).count > 0, retval); \ 173 | API_CHECK_RETVAL(int((Name).type) >= 0 && int((Name).type) < EnumMax(), retval); 174 | 175 | #define API_CHECK_WEIGHTS0_RETVAL(Name, retval) \ 176 | API_CHECK_RETVAL((Name).count >= 0, retval); \ 177 | API_CHECK_RETVAL((Name).count > 0 ? ((Name).values != nullptr) : ((Name).values == nullptr), retval); \ 178 | API_CHECK_RETVAL(int((Name).type) >= 0 && int((Name).type) < EnumMax(), retval); 179 | 180 | #define API_CHECK_NULL(param) API_CHECK((param) != nullptr) 181 | #define API_CHECK_NULL_RETVAL(param, retval) API_CHECK_RETVAL((param) != nullptr, retval) 182 | #define API_CHECK_NULL_RET_NULL(ptr) API_CHECK_NULL_RETVAL(ptr, nullptr) 183 | 184 | #define API_CHECK_ENUM_RANGE(Type, val) API_CHECK(int(val) >= 0 && int(val) < EnumMax()) 185 | #define API_CHECK_ENUM_RANGE_RETVAL(Type, val, retval) \ 186 | API_CHECK_RETVAL(int(val) >= 0 && int(val) < EnumMax(), retval) 187 | 188 | #define CUBLASASSERTMSG(status_, msg) \ 189 | { \ 190 | auto s_ = status_; \ 191 | if (s_ != CUBLAS_STATUS_SUCCESS) \ 192 | { \ 193 | nvinfer1::plugin::throwCublasError(__FILE__, FN_NAME, __LINE__, s_, msg); \ 194 | } \ 195 | } 196 | 197 | #define CUBLASASSERT(status_) \ 198 | { \ 199 | auto s_ = status_; \ 200 | if (s_ != CUBLAS_STATUS_SUCCESS) \ 201 | { \ 202 | nvinfer1::plugin::throwCublasError(__FILE__, FN_NAME, __LINE__, s_); \ 203 | } \ 204 | } 205 | 206 | #define CUDNNASSERTMSG(status_, msg) \ 207 | { \ 208 | auto s_ = status_; \ 209 | if (s_ != CUDNN_STATUS_SUCCESS) \ 210 | { \ 211 | nvinfer1::plugin::throwCudnnError(__FILE__, FN_NAME, __LINE__, s_, msg); \ 212 | } \ 213 | } 214 | 215 | #define CUDNNASSERT(status_) \ 216 | { \ 217 | auto s_ = status_; \ 218 | if (s_ != CUDNN_STATUS_SUCCESS) \ 219 | { \ 220 | const char* msg = cudnnGetErrorString(s_); \ 221 | nvinfer1::plugin::throwCudnnError(__FILE__, FN_NAME, __LINE__, s_, msg); \ 222 | } \ 223 | } 224 | 225 | #define CUASSERTMSG(status_, msg) \ 226 | { \ 227 | auto s_ = status_; \ 228 | if (s_ != cudaSuccess) \ 229 | { \ 230 | nvinfer1::plugin::throwCudaError(__FILE__, FN_NAME, __LINE__, s_, msg); \ 231 | } \ 232 | } 233 | 234 | #define CUASSERT(status_) \ 235 | { \ 236 | auto s_ = status_; \ 237 | if (s_ != cudaSuccess) \ 238 | { \ 239 | const char* msg = cudaGetErrorString(s_); \ 240 | nvinfer1::plugin::throwCudaError(__FILE__, FN_NAME, __LINE__, s_, msg); \ 241 | } \ 242 | } 243 | 244 | #define ASSERT(assertion) \ 245 | { \ 246 | if (!(assertion)) \ 247 | { \ 248 | nvinfer1::plugin::reportAssertion(#assertion, __FILE__, __LINE__); \ 249 | } \ 250 | } 251 | 252 | #define FAIL(msg) \ 253 | { \ 254 | nvinfer1::plugin::reportAssertion(msg, __FILE__, __LINE__); \ 255 | } 256 | 257 | #define CUERRORMSG(status_) \ 258 | { \ 259 | auto s_ = status_; \ 260 | if (s_ != 0) \ 261 | nvinfer1::plugin::logError(#status_ " failure.", __FILE__, FN_NAME, __LINE__); \ 262 | } 263 | 264 | #endif // TRT_TUT_HELPERS_H 265 | #endif // TRT_CHECK_MACROS_H 266 | 267 | #endif /*CHECK_MACROS_PLUGIN_H*/ 268 | -------------------------------------------------------------------------------- /src/onnxplugin/include/kernel.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_KERNEL_H 18 | #define TRT_KERNEL_H 19 | 20 | #include "cublas_v2.h" 21 | #include "plugin.h" 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace nvinfer1; 27 | using namespace nvinfer1::plugin; 28 | #define DEBUG_ENABLE 0 29 | 30 | #ifndef TRT_RPNLAYER_H 31 | typedef enum 32 | { 33 | NCHW = 0, 34 | NC4HW = 1 35 | } DLayout_t; 36 | 37 | pluginStatus_t SiLUInference(cudaStream_t stream, int n, const void* input, void* output); 38 | 39 | #endif // TRT_RPNLAYER_H 40 | #endif 41 | -------------------------------------------------------------------------------- /src/onnxplugin/include/plugin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_PLUGIN_H 18 | #define TRT_PLUGIN_H 19 | #include "checkMacrosPlugin.h" 20 | 21 | #include "NvInferPlugin.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #ifndef TRT_LEGACY_PLUGIN_H 30 | // Enumerator for status 31 | typedef enum 32 | { 33 | STATUS_SUCCESS = 0, 34 | STATUS_FAILURE = 1, 35 | STATUS_BAD_PARAM = 2, 36 | STATUS_NOT_SUPPORTED = 3, 37 | STATUS_NOT_INITIALIZED = 4 38 | } pluginStatus_t; 39 | 40 | namespace nvinfer1 41 | { 42 | namespace plugin 43 | { 44 | 45 | class BasePlugin : public IPluginV2 46 | { 47 | protected: 48 | void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override 49 | { 50 | mNamespace = libNamespace; 51 | } 52 | 53 | const char* getPluginNamespace() const TRT_NOEXCEPT override 54 | { 55 | return mNamespace.c_str(); 56 | } 57 | 58 | std::string mNamespace; 59 | }; 60 | 61 | class BaseCreator : public IPluginCreator 62 | { 63 | public: 64 | void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override 65 | { 66 | mNamespace = libNamespace; 67 | } 68 | 69 | const char* getPluginNamespace() const TRT_NOEXCEPT override 70 | { 71 | return mNamespace.c_str(); 72 | } 73 | 74 | protected: 75 | std::string mNamespace; 76 | }; 77 | 78 | // Write values into buffer 79 | template 80 | void write(char*& buffer, const T& val) 81 | { 82 | std::memcpy(buffer, &val, sizeof(T)); 83 | buffer += sizeof(T); 84 | } 85 | 86 | // Read values from buffer 87 | template 88 | T read(const char*& buffer) 89 | { 90 | T val{}; 91 | std::memcpy(&val, buffer, sizeof(T)); 92 | buffer += sizeof(T); 93 | return val; 94 | } 95 | 96 | } // namespace plugin 97 | } // namespace nvinfer1 98 | 99 | #ifndef DEBUG 100 | #ifndef TRT_CHECK_MACROS_H 101 | #ifndef TRT_TUT_HELPERS_H 102 | 103 | #define CHECK(status) \ 104 | do \ 105 | { \ 106 | if (status != 0) \ 107 | abort(); \ 108 | } while (0) 109 | 110 | #define ASSERT_PARAM(exp) \ 111 | do \ 112 | { \ 113 | if (!(exp)) \ 114 | return STATUS_BAD_PARAM; \ 115 | } while (0) 116 | 117 | #define ASSERT_FAILURE(exp) \ 118 | do \ 119 | { \ 120 | if (!(exp)) \ 121 | return STATUS_FAILURE; \ 122 | } while (0) 123 | 124 | #define CSC(call, err) \ 125 | do \ 126 | { \ 127 | cudaError_t cudaStatus = call; \ 128 | if (cudaStatus != cudaSuccess) \ 129 | { \ 130 | return err; \ 131 | } \ 132 | } while (0) 133 | 134 | #define DEBUG_PRINTF(...) \ 135 | do \ 136 | { \ 137 | } while (0) 138 | 139 | #else 140 | 141 | #define ASSERT_PARAM(exp) \ 142 | do \ 143 | { \ 144 | if (!(exp)) \ 145 | { \ 146 | fprintf(stderr, "Bad param - " #exp ", %s:%d\n", __FILE__, __LINE__); \ 147 | return STATUS_BAD_PARAM; \ 148 | } \ 149 | } while (0) 150 | 151 | #define ASSERT_FAILURE(exp) \ 152 | do \ 153 | { \ 154 | if (!(exp)) \ 155 | { \ 156 | fprintf(stderr, "Failure - " #exp ", %s:%d\n", __FILE__, __LINE__); \ 157 | return STATUS_FAILURE; \ 158 | } \ 159 | } while (0) 160 | 161 | #define CSC(call, err) \ 162 | do \ 163 | { \ 164 | cudaError_t cudaStatus = call; \ 165 | if (cudaStatus != cudaSuccess) \ 166 | { \ 167 | printf("%s %d CUDA FAIL %s\n", __FILE__, __LINE__, cudaGetErrorString(cudaStatus)); \ 168 | return err; \ 169 | } \ 170 | } while (0) 171 | 172 | #define CHECK(status) \ 173 | { \ 174 | if (status != 0) \ 175 | { \ 176 | DEBUG_PRINTF("%s %d CUDA FAIL %s\n", __FILE__, __LINE__, cudaGetErrorString(status)); \ 177 | abort(); \ 178 | } \ 179 | } 180 | 181 | #define DEBUG_PRINTF(...) \ 182 | do \ 183 | { \ 184 | printf(__VA_ARGS__); \ 185 | } while (0) 186 | 187 | #endif // TRT_TUT_HELPERS_H 188 | #endif // TRT_CHECK_MACROS_H 189 | #endif // TRT_LEGACY_PLUGIN_H 190 | #endif 191 | 192 | #endif // TRT_PLUGIN_H 193 | -------------------------------------------------------------------------------- /src/onnxplugin/src/SiLU.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "../include/kernel.h" 18 | #include 19 | 20 | __device__ float Logist_kernel(float data) { return 1.0f / (1.0f + expf(-data)); }; 21 | 22 | template 23 | __launch_bounds__(nthdsPerCTA) __global__ 24 | void SiLUKernel(const int n, const float* input, float* output) 25 | { 26 | for (int i = blockIdx.x * nthdsPerCTA + threadIdx.x; i < n; i += gridDim.x * nthdsPerCTA) 27 | { 28 | output[i] = input[i] * Logist_kernel(input[i]); 29 | } 30 | } 31 | 32 | pluginStatus_t SiLUGPU(cudaStream_t stream, const int n, const void* input, void* output) 33 | { 34 | const int BS = 512; 35 | const int GS = (n + BS - 1) / BS; 36 | SiLUKernel<<>>(n, 37 | (const float*) input, 38 | (float*) output); 39 | return STATUS_SUCCESS; 40 | } 41 | 42 | pluginStatus_t SiLUInference( 43 | cudaStream_t stream, const int n, const void* input, void* output) 44 | { 45 | return SiLUGPU(stream, n, (const float*) input, (float*) output); 46 | } 47 | -------------------------------------------------------------------------------- /src/onnxplugin/src/SiLUPlugin.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include "../include/SiLUPlugin.h" 19 | #include "../include/checkMacrosPlugin.h" 20 | 21 | using namespace nvinfer1; 22 | using nvinfer1::plugin::SiLUPluginCreator; 23 | using nvinfer1::plugin::SILU; 24 | 25 | static const char* SILU_PLUGIN_VERSION{"1"}; 26 | static const char* SILU_PLUGIN_NAME{"SiLU"}; 27 | PluginFieldCollection SiLUPluginCreator::mFC{}; 28 | std::vector SiLUPluginCreator::mPluginAttributes; 29 | 30 | // LeakyReLU {{{ 31 | SILU::SILU() 32 | { 33 | } 34 | 35 | SILU::SILU(const void* buffer, size_t length) 36 | { 37 | // const char *d = reinterpret_cast(buffer), *a = d; 38 | // mBatchDim = read(d); 39 | // ASSERT(d == a + length); 40 | assert(length==sizeof(input_size_)); 41 | input_size_ = *reinterpret_cast(buffer); 42 | 43 | } 44 | 45 | int SILU::getNbOutputs() const TRT_NOEXCEPT 46 | { 47 | return 1; 48 | } 49 | 50 | // Dims SILU::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRT_NOEXCEPT 51 | // { 52 | // ASSERT(nbInputDims == 1); 53 | // ASSERT(index == 0); 54 | // input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; 55 | // return inputs[0]; 56 | // } 57 | 58 | DimsExprs SILU::getOutputDimensions( 59 | int32_t outputIndex, const DimsExprs* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) TRT_NOEXCEPT 60 | { 61 | ASSERT(nbInputs == 1); 62 | ASSERT(outputIndex == 0); 63 | // input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; 64 | input_size_ = exprBuilder.operation(DimensionOperation::kPROD, 65 | *exprBuilder.operation(DimensionOperation::kPROD, *inputs[0].d[1], *inputs[0].d[2]), 66 | *inputs[0].d[3])->getConstantValue(); 67 | // [batch,channel,height,width] batch is not Constant 68 | // printf("inputs[0].d[0]=%d\n",inputs[0].d[0]->getConstantValue()); 69 | // printf("inputs[0].d[1]=%d\n",inputs[0].d[1]->getConstantValue()); 70 | // printf("inputs[0].d[2]=%d\n",inputs[0].d[2]->getConstantValue()); 71 | // printf("inputs[0].d[3]=%d\n",inputs[0].d[3]->getConstantValue()); 72 | // printf("input_size_=%d\n",input_size_); 73 | return inputs[0]; 74 | } 75 | 76 | __device__ float Logist_kernel_cpu(float data) { return 1.0f / (1.0f + expf(-data)); }; 77 | 78 | 79 | pluginStatus_t SILU::SiLUInference_cpu(const int n, const float* input, float* output) 80 | { 81 | printf("SiLUInference_cpu start\n"); 82 | for (int i =0; i < n; i += 1) 83 | { 84 | printf("SiLUInference_cpu id=%d\n",i); 85 | output[i] = input[i] * Logist_kernel_cpu(input[i]); 86 | } 87 | return STATUS_SUCCESS; 88 | } 89 | 90 | int32_t SILU::enqueue(const PluginTensorDesc* inputDesc, 91 | const PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, 92 | cudaStream_t stream) TRT_NOEXCEPT 93 | { 94 | const int batchSize = inputDesc[0].dims.d[0]; 95 | inputDesc[0].dims; 96 | const void* inputData = inputs[0]; 97 | void* outputData = outputs[0]; 98 | pluginStatus_t status = SiLUInference(stream, batchSize*input_size_, inputData, outputData); 99 | // pluginStatus_t status = SiLUInference_cpu(batchSize*input_size_, (const float*) inputData, (float*) outputData); 100 | ASSERT(status == STATUS_SUCCESS); 101 | return status; 102 | } 103 | 104 | // int SILU::enqueue(int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT 105 | // { 106 | // const void* inputData = inputs[0]; 107 | // void* outputData = outputs[0]; 108 | // pluginStatus_t status = SiLUInference(stream, batchSize*input_size_, inputData, outputData); 109 | // // pluginStatus_t status = SiLUInference_cpu(batchSize*input_size_, (const float*) inputData, (float*) outputData); 110 | // ASSERT(status == STATUS_SUCCESS); 111 | // return status; 112 | // } 113 | 114 | size_t SILU::getSerializationSize() const TRT_NOEXCEPT 115 | { 116 | // mNegSlope, mBatchDim 117 | // return sizeof(float) + sizeof(int); 118 | // return sizeof(int); 119 | return sizeof(input_size_); 120 | } 121 | 122 | // Set plugin namespace 123 | void SILU::setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT 124 | { 125 | mPluginNamespace = pluginNamespace; 126 | } 127 | 128 | const char* SILU::getPluginNamespace() const TRT_NOEXCEPT 129 | { 130 | return mPluginNamespace; 131 | } 132 | 133 | // Return the DataType of the plugin output at the requested index 134 | DataType SILU::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT 135 | { 136 | return DataType::kFLOAT; 137 | } 138 | 139 | // Return true if output tensor is broadcast across a batch. 140 | // bool SILU::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT 141 | // { 142 | // return false; 143 | // } 144 | 145 | // Return true if plugin can use input that is broadcast across batch without replication. 146 | // bool SILU::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT 147 | // { 148 | // return false; 149 | // } 150 | 151 | void SILU::configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,const DynamicPluginTensorDesc* out, int32_t nbOutputs) TRT_NOEXCEPT 152 | { 153 | // gLogVerbose << "SiLU configurePlugin\n"; 154 | } 155 | 156 | // void SILU::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) TRT_NOEXCEPT 157 | // { 158 | // // ASSERT(mBatchDim == 1); 159 | // // for (int i = 0; i dims.nbDims; ++i) 160 | // // { 161 | // // mBatchDim *= in->dims.d[i]; 162 | // // } 163 | // } 164 | 165 | void SILU::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT 166 | { 167 | } 168 | 169 | // Detach the plugin object from its execution context. 170 | void SILU::detachFromContext() TRT_NOEXCEPT {} 171 | 172 | void SILU::serialize(void* buffer) const TRT_NOEXCEPT 173 | { 174 | // char *d = reinterpret_cast(buffer), *a = d; 175 | // write(d, mBatchDim); 176 | // ASSERT(d == a + getSerializationSize()); 177 | *reinterpret_cast(buffer)=input_size_; 178 | } 179 | 180 | // void SILU::configureWithFormat( 181 | // const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int) 182 | // { 183 | // ASSERT(type == DataType::kFLOAT && format == PluginFormat::kNCHW); 184 | // ASSERT(mBatchDim == 1); 185 | // ASSERT(nbOutputs == 1); 186 | // for (int i = 0; i < inputDims[0].nbDims; ++i) 187 | // { 188 | // mBatchDim *= inputDims[0].d[i]; 189 | // } 190 | // } 191 | 192 | // bool SILU::supportsFormat(DataType type, PluginFormat format) const 193 | // { 194 | // return (type == DataType::kFLOAT && format == PluginFormat::kNCHW); 195 | // } 196 | 197 | // bool SILU::supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT 198 | // { 199 | // // ASSERT(mBatchDim == 1); 200 | // // for (int i = 0; i dims.nbDims; ++i) 201 | // // { 202 | // // mBatchDim *= inOut->dims.d[i]; 203 | // // } 204 | // return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 205 | // } 206 | 207 | bool SILU::supportsFormatCombination(int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) TRT_NOEXCEPT 208 | { 209 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 210 | } 211 | 212 | int SILU::initialize() TRT_NOEXCEPT 213 | { 214 | return 0; 215 | } 216 | 217 | void SILU::terminate() TRT_NOEXCEPT {} 218 | 219 | // size_t SILU::getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT 220 | // { 221 | // return 0; 222 | // } 223 | 224 | size_t SILU::getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs, 225 | const PluginTensorDesc* outputs, int32_t nbOutputs) const TRT_NOEXCEPT 226 | { 227 | return 0; 228 | } 229 | 230 | const char* SILU::getPluginType() const TRT_NOEXCEPT 231 | { 232 | return SILU_PLUGIN_NAME; 233 | } 234 | 235 | const char* SILU::getPluginVersion() const TRT_NOEXCEPT 236 | { 237 | return SILU_PLUGIN_VERSION; 238 | } 239 | 240 | void SILU::destroy() TRT_NOEXCEPT 241 | { 242 | delete this; 243 | } 244 | 245 | IPluginV2DynamicExt* SILU::clone() const TRT_NOEXCEPT 246 | { 247 | SILU* plugin = new SILU(); 248 | plugin->input_size_ = input_size_; 249 | plugin->setPluginNamespace(mPluginNamespace); 250 | return plugin; 251 | } 252 | 253 | SiLUPluginCreator::SiLUPluginCreator() 254 | { 255 | // mPluginAttributes.emplace_back(PluginField("negSlope", nullptr, PluginFieldType::kFLOAT32, 1)); 256 | mPluginAttributes.clear(); 257 | 258 | mFC.nbFields = mPluginAttributes.size(); 259 | mFC.fields = mPluginAttributes.data(); 260 | } 261 | 262 | const char* SiLUPluginCreator::getPluginName() const TRT_NOEXCEPT 263 | { 264 | return SILU_PLUGIN_NAME; 265 | } 266 | 267 | const char* SiLUPluginCreator::getPluginVersion() const TRT_NOEXCEPT 268 | { 269 | return SILU_PLUGIN_VERSION; 270 | } 271 | 272 | const PluginFieldCollection* SiLUPluginCreator::getFieldNames() TRT_NOEXCEPT 273 | { 274 | return &mFC; 275 | } 276 | 277 | IPluginV2DynamicExt* SiLUPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT 278 | { 279 | // const PluginField* fields = fc->fields; 280 | // ASSERT(fc->nbFields == 1); 281 | // ASSERT(fields[0].type == PluginFieldType::kFLOAT32); 282 | // negSlope = *(static_cast(fields[0].data)); 283 | 284 | // return new SILU(); 285 | SILU* obj = new SILU(); 286 | obj->setPluginNamespace(mNamespace.c_str()); 287 | return obj; 288 | } 289 | 290 | IPluginV2DynamicExt* SiLUPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT 291 | { 292 | // This object will be deleted when the network is destroyed, which will 293 | // call LReluPlugin::destroy() 294 | // return new SILU(serialData, serialLength); 295 | SILU* obj = new SILU(serialData, serialLength); 296 | obj->setPluginNamespace(mNamespace.c_str()); 297 | return obj; 298 | } 299 | // LeakReLU }}} 300 | -------------------------------------------------------------------------------- /weights/yolov5n.engine: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZJU-lishuang/yolov5_tensorrt/312787f096bacde243bea4798527aa09a2208f65/weights/yolov5n.engine -------------------------------------------------------------------------------- /weights/yolov5n.onnx: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:08c7d1ee0cc657514388fdab6f9ef44a6a461697d34719e34fbc3f63ae534c4c 3 | size 7697369 4 | -------------------------------------------------------------------------------- /weights/yolov5n.plugin.engine: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZJU-lishuang/yolov5_tensorrt/312787f096bacde243bea4798527aa09a2208f65/weights/yolov5n.plugin.engine -------------------------------------------------------------------------------- /weights/yolov5n.plugin.onnx: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e7d688d9a80ffd86007b2ff14d4f2b2a0788e282d97c0e3678512833108af56 3 | size 7691077 4 | --------------------------------------------------------------------------------