├── .gitignore
├── Benchmark.py
├── LICENSE
├── README.md
├── calibration
    └── README.md
├── cpp
    ├── README.md
    ├── jetson_csi
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── csi_detect.cpp
    │   ├── gstreamer.cpp
    │   ├── gstreamer.h
    │   ├── labels_coco.yaml
    │   ├── preprocess.cu
    │   ├── preprocess.h
    │   ├── trt_infer.cpp
    │   ├── trt_infer.h
    │   ├── utils_detection.cpp
    │   └── utils_detection.h
    ├── kp_jetson_csi
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── csi_kp_detect.cpp
    │   ├── gstreamer.cpp
    │   ├── gstreamer.h
    │   ├── labels_det.yaml
    │   ├── points_link.yaml
    │   ├── preprocess.cu
    │   ├── preprocess.h
    │   ├── trt_infer.cpp
    │   ├── trt_infer.h
    │   ├── utils_detection.cpp
    │   └── utils_detection.h
    └── video_detect
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── labels_coco.yaml
    │   ├── main.cpp
    │   ├── preprocess.cu
    │   ├── preprocess.h
    │   ├── trt_infer.cpp
    │   ├── trt_infer.h
    │   ├── utils_detection.cpp
    │   └── utils_detection.h
├── doc
    └── yolov5s_det.png
├── labels_coco.yaml
├── labels_voc.yaml
├── models_onnx
    └── README.md
├── models_trt
    └── README.md
├── onnx2trt.py
├── requirements.txt
├── utils
    ├── calibrator.py
    ├── trt_infer.py
    └── utils_detection.py
├── yolo_detect_v1.py
├── yolo_detect_v2.py
└── yolox_detect.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__
3 | *.mp4
4 | *.onnx
5 | *.engine
6 | calibration


--------------------------------------------------------------------------------
/Benchmark.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------------
 2 | # 这个脚本向你展示了如何使用 tensorRT 对导出的模型进行推理，并进行速度测试
 3 | # 目前 GPU 上 tensorRT 是跑的最快的部署框架 ...
 4 | # ---------------------------------------------------------------
 5 | 
 6 | import time
 7 | import numpy as np
 8 | import tensorrt as trt
 9 | 
10 | from tqdm import tqdm
11 | from utils import trt_infer
12 | 
13 | # int8 / fp32 ~ 70%
14 | # trt > ppq > fp32
15 | 
16 | # Nvidia Nsight Performance Profile
17 | ENGINE_PATH = './models_trt/yolov5s.engine'
18 | BATCH_SIZE  = 1
19 | INPUT_SHAPE = [BATCH_SIZE, 3, 512, 512]
20 | BENCHMARK_SAMPLES = 12800
21 | 
22 | print(f'Benchmark with {ENGINE_PATH}')
23 | logger = trt.Logger(trt.Logger.ERROR)
24 | with open(ENGINE_PATH, 'rb') as f, trt.Runtime(logger) as runtime:
25 |     engine = runtime.deserialize_cuda_engine(f.read())
26 | 
27 | with engine.create_execution_context() as context:
28 |     inputs, outputs, bindings, stream = trt_infer.allocate_buffers(context.engine)
29 |     inputs[0].host = np.zeros(shape=INPUT_SHAPE, dtype=np.float32)
30 | 
31 |     t1 = time.time()
32 |     for _ in tqdm(range(BENCHMARK_SAMPLES), desc=f'Benchmark ...'):
33 |         trt_infer.do_inference(
34 |             context, bindings=bindings, inputs=inputs, 
35 |             outputs=outputs, stream=stream, batch_size=BATCH_SIZE)
36 | 
37 |     t2 = time.time()
38 |     t = (t2 - t1)*1000/BENCHMARK_SAMPLES
39 |     print(f"{t:0.5f}ms")
40 | 
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # yolov5_TensorRT_inference
  2 | 记录yolov5的TensorRT量化(fp16, int8)及推理代码。经实测可运行于Jetson平台，可将yolov5s、yolov8s这类的小模型部署在Jetson nano 4g上用于摄像头的检测。  
  3 | <div align=center>
  4 | <img src="https://github.com/MadaoFY/yolov5_TensorRT_inference/blob/main/doc/yolov5s_det.png">
  5 | </div>
  6 | 
  7 | CPP：  
  8 | [视频目标检测](https://github.com/MadaoFY/yolov5_TensorRT_inference/tree/main/cpp/video_detect)  
  9 | [jetson nano摄像头目标检测](https://github.com/MadaoFY/yolov5_TensorRT_inference/tree/main/cpp/jetson_csi)  
 10 | [jetson nano摄像头人体关键点检测](https://github.com/MadaoFY/yolov5_TensorRT_inference/tree/main/cpp/kp_jetson_csi)  
 11 | 
 12 | 模型支持：  
 13 | yolov5  
 14 | yolov7  
 15 | yolov8  
 16 | yolox(不可在生成的engine中添加nms模块)
 17 | 
 18 | 温馨提示：本人使用的TensrRT版本为8.4.3.1，为保证成功运行，你的TensorRT大版本最好在8.4。具体环境依赖请参考```requirements.txt```
 19 | 
 20 | 项目文件如下：
 21 | ```bash
 22 | |-yolov5_TensorRT_inference
 23 |     |-calibration       # 默认情况下用于存放int8量化校准集的文件夹
 24 |     |-cpp               # c++推理代码，有jetson nano上用的代码
 25 |     |-doc               # 单纯用来存放文本的文件夹
 26 |     |-models_onnx       # 默认情况下用于存放onnx模型的文件夹
 27 |     |-models_trt        # 默认情况下用于存放量化后生成的trt模型的文件夹
 28 |     |-utils             # 存放utils的文件夹
 29 |     |-Benchmark.py      # 测试trt模型速度的脚本
 30 |     |-labels_coco.yaml  # coco数据集类别标签
 31 |     |-labels_voc.yaml   # voc数据集类别标签
 32 |     |-onnx2trt.py       # onnx模型转engine的脚本，已添加EfficientNMS算子的支持
 33 |     |-yolo_detect_v1.py    # 不带nms算子的视频检测脚本
 34 |     |-yolo_detect_v2.py    # 带nms算子的视频检测脚本，该脚本使用的trt模型添加了EfficientNMS算子
 35 |     |-yolox_detect.py    # yolovx的视频检测脚本
 36 | ```
 37 | 
 38 | 以下将使用yolov5s模型演示如何量化及用于视频的推理。
 39 | ## 数据准备
 40 | 使用yolov5官方提供的coco训练模型，已导出为onnx。这里使用voc2012作为校准集，仅用来演示，你可以下载coco数据集作为你的校准集。
 41 | 
 42 | yolov5s.onnx：https://pan.baidu.com/s/1eYaU3ndVpwexL4k6goxjHg  
 43 | 提取码: sduf   
 44 | 
 45 | voc2012：https://pan.baidu.com/s/1rICWiczIv_GyrYIrEj1p3Q  
 46 | 提取码: 4pgx
 47 | 
 48 | 视频源：https://pan.baidu.com/s/1HBIjz6019vn9qfoKPIuV2A  
 49 | 提取码: fbfh
 50 | 
 51 | ## 量化(onnx2trt.py)
 52 | 你需要从yolov5、yolov7、yolox的官方库导出相应onnx模型，从第三方实现的库中导出的yolo onnx模型不保证适用，注意导出的onnx不包含nms部分。如果你想把nms算子加入到engine中，add_nms设置为True。默认将onnx模型放置于models_onnx文件夹，导出的trt模型可保存于models_trt文件夹。如果你想使用int8量化，你需要从训练集中准备至少500张图片作为校准集，图片放置于calibration文件夹。
 53 | 
 54 | ```shell
 55 | python onnx2trt.py  --onnx_dir ./models_onnx/yolov5s.onnx --engine_dir ./models_trt/yolov5s.engine --int8 True --imgs_dir ./calibration
 56 | ```  
 57 | 参数说明:  
 58 | - ```--onnx_dir``` onnx模型路径
 59 | - ```--engine_dir``` trt模型的保存路径
 60 | - ```--min_shape``` 最小的shape
 61 | - ```--opt_shape``` 优化的shape
 62 | - ```--max_shape``` 最大的shape
 63 | - ```--fp16``` 是否使用fp16量化
 64 | - ```--int8``` 是否使用int8量化
 65 | - ```--imgs_dir``` 校准集路径
 66 | - ```--n_iteration``` int8量化校准轮次
 67 | - ```--cache_file``` 是否生成cache
 68 | - ```--yolov8_head``` 是否为yolov8的检测头(注意，yolov8的输出与yolov5不一样)
 69 | - ```--add_nms``` 添加EfficientNMS算子
 70 | - ```--conf_thres``` nms的置信度设置
 71 | - ```--iou_thres``` nms的iou设置
 72 | - ```--max_det``` nms输出的最大检测数量
 73 | 
 74 | 更详细参数说明可以在脚本中查看。
 75 | 
 76 | ## 视频推理
 77 | ### 1.不带EfficientNMS算子的推理脚本(yolo_detect_v1.py)  
 78 | 你需要准备一个模型输出类别的labels文件，具体可参考仓库的labels_coco.yaml文件。本演示中用到模型为coco训练的yolov5s模型，所以需要用到相对应的coco类别。如果你使用的是yolov5、yolov7模型，运行yolo_detect_v1.py脚本，yolox模型运行yolox_detect.py脚本。以yolov5s.engine推理为例。
 79 | ```shell
 80 | python yolo_detect_v1.py  --video_dir ./sample_1080p_h265.mp4 --engine_dir ./models_trt/yolov5s.engine --labels ./labels_coco.yaml
 81 | ```
 82 | 
 83 | - ```--video_dir``` 视频源路径
 84 | - ```--engine_dir``` trt模型路径
 85 | - ```--labels``` 模型labels文件
 86 | - ```--conf_thres``` nms的置信度设置
 87 | - ```--iou_thres``` nms的iou设置
 88 | - ```--max_det``` nms输出的最大检测数量
 89 | 
 90 | ### 2.带EfficientNMS算子的推理脚本(yolo_detect_v2.py)  
 91 | yolo_detect_v2.py脚本里的所使用trt模型已添加EfficientNMS算子，所以无需在对nms参数进行设置    
 92 | ```shell
 93 | python yolo_detect_v2.py  --video_dir ./sample_1080p_h265.mp4 --engine_dir ./models_trt/yolov7_nms.engine --labels ./labels_coco.yaml
 94 | ```
 95 | 
 96 | - ```--video_dir``` 视频源路径
 97 | - ```--engine_dir``` trt模型路径
 98 | - ```--labels``` 模型labels文件
 99 | 
100 | 
101 | ## 其他相关
102 | 可能TensoRT安装是最消耗时间的事情、、、  
103 | TensoRT：https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing  
104 | https://developer.nvidia.com/tensorrt
105 | 
106 | Trt_sample: https://github.com/NVIDIA/trt-samples-for-hackathon-cn/tree/master/cookbook
107 | 
108 | yolox：https://github.com/Megvii-BaseDetection/YOLOX  
109 | yolov5：https://github.com/ultralytics/yolov5  
110 | yolov7：https://github.com/WongKinYiu/yolov7  
111 | yolov8: https://github.com/ultralytics/ultralytics
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/calibration/README.md:
--------------------------------------------------------------------------------
1 | 用于存放量化用的校准集
2 | 


--------------------------------------------------------------------------------
/cpp/README.md:
--------------------------------------------------------------------------------
1 | # cpp_inference
2 | c++的TensorRT推理代码。jetson_csi为jetson nano的摄像头检测代码。
3 | 
4 | 模型支持：yolov5、yolov7、yolov8
5 | 
6 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_CXX_STANDARD 14)
 4 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 5 | set(CMAKE_CXX_EXTENSIONS ON)
 6 | 
 7 | project(yolo_detect C CXX)
 8 | 
 9 | add_definitions(-DAPI_EXPORTS)
10 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
11 | # SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb")
12 | if(NOT CMAKE_BUILD_TYPE)
13 |     SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3")
14 | endif()
15 | set(src_list csi_detect.cpp utils_detection.cpp utils_detection.h trt_infer.cpp trt_infer.h gstreamer.cpp gstreamer.h preprocess.cu preprocess.h)
16 | 
17 | # CUDA
18 | # TODO(Call for PR): make cmake compatible with Windows
19 | set(CMAKE_CUDA_COMPILER /usr/local/cuda-10.2/bin/nvcc)
20 | enable_language(CUDA)
21 | find_package(CUDA REQUIRED)
22 | message(STATUS "    libraries: ${CUDA_LIBRARIES}")
23 | message(STATUS "    include path: ${CUDA_INCLUDE_DIRS}")
24 | 
25 | 
26 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
27 | include_directories(/usr/local/cuda-10.2/include/)
28 | link_directories(/usr/local/cuda-10.2/lib64/)
29 | 
30 | 
31 | # tensorrt
32 | # set(TRT_DIR J:/tensorrt/TensorRT-8.4.3.1) 
33 | # set(TRT_INCLUDE_DIRS ${TRT_DIR}/include/)
34 | # set(TRT_LIB_DIRS ${TRT_DIR}/lib/)
35 | 
36 | # include_directories(${TRT_INCLUDE_DIRS}) 
37 | # link_directories(${TRT_LIB_DIRS})
38 | 
39 | #include_directories(${PROJECT_SOURCE_DIR}/)
40 | #file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/*.cpp ${PROJECT_SOURCE_DIR}/*.h)
41 | 
42 | # opencv
43 | find_package(OpenCV REQUIRED)
44 | include_directories( ${OpenCV_INCLUDE_DIRS} )
45 | 
46 | add_executable(${PROJECT_NAME}  ${src_list})
47 | target_link_libraries(${PROJECT_NAME} nvinfer)
48 | target_link_libraries(${PROJECT_NAME} cudart)
49 | target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
50 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/README.md:
--------------------------------------------------------------------------------
 1 | # jetson_csi
 2 | 用jetson nano摄像头目标检测的c++代码。  
 3 | 用cmake编译后，运行yolo_detect。
 4 | 
 5 | ```shell
 6 | yolo_detect --engine_dir=./yolov5s.engine --labels=./labels_coco.yaml
 7 | ```  
 8 | 
 9 | 参数说明:
10 | - ```--engine_dir``` trt模型的保存路径
11 | - ```--labels``` 模型labels文件
12 | - ```--conf_thres``` nms的置信度设置
13 | - ```--iou_thres``` nms的iou设置
14 | - ```--max_det``` nms输出的最大检测数量
15 | 
16 | 更详细参数说明可以在csi_detect.cpp中查看。
17 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/csi_detect.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/jetson_csi/csi_detect.cpp


--------------------------------------------------------------------------------
/cpp/jetson_csi/gstreamer.cpp:
--------------------------------------------------------------------------------
 1 | #include "gstreamer.h"
 2 | 
 3 | 
 4 | std::string gs_pipeline(int capture_width, int capture_height, int display_width, int display_height, int framerate, int flip_method)
 5 | {
 6 |     std::string result = "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)" + std::to_string(capture_width) +
 7 |         ", height=(int)" + std::to_string(capture_height) +
 8 |         ", format=(string)NV12, framerate=(fraction)" + std::to_string(framerate) +
 9 |         "/1 ! nvvidconv flip-method=" + std::to_string(flip_method) +
10 |         " ! video/x-raw, width=(int)" + std::to_string(display_width) +
11 |         ", height=(int)" + std::to_string(display_height) +
12 |         ", format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink";
13 | 
14 |     return result;
15 | }
16 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/gstreamer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <string>
4 | 
5 | std::string gs_pipeline(int capture_width, int capture_height, int display_width, int display_height, int framerate, int flip_method);
6 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/labels_coco.yaml:
--------------------------------------------------------------------------------
 1 | 0: person
 2 | 1: bicycle
 3 | 2: car
 4 | 3: motorcycle
 5 | 4: airplane
 6 | 5: bus
 7 | 6: train
 8 | 7: truck
 9 | 8: boat
10 | 9: traffic light
11 | 10: fire hydrant
12 | 11: stop sign
13 | 12: parking meter
14 | 13: bench
15 | 14: bird
16 | 15: cat
17 | 16: dog
18 | 17: horse
19 | 18: sheep
20 | 19: cow
21 | 20: elephant
22 | 21: bear
23 | 22: zebra
24 | 23: giraffe
25 | 24: backpack
26 | 25: umbrella
27 | 26: handbag
28 | 27: tie
29 | 28: suitcase
30 | 29: frisbee
31 | 30: skis
32 | 31: snowboard
33 | 32: sports ball
34 | 33: kite
35 | 34: baseball bat
36 | 35: baseball glove
37 | 36: skateboard
38 | 37: surfboard
39 | 38: tennis racket
40 | 39: bottle
41 | 40: wine glass
42 | 41: cup
43 | 42: fork
44 | 43: knife
45 | 44: spoon
46 | 45: bowl
47 | 46: banana
48 | 47: apple
49 | 48: sandwich
50 | 49: orange
51 | 50: broccoli
52 | 51: carrot
53 | 52: hot dog
54 | 53: pizza
55 | 54: donut
56 | 55: cake
57 | 56: chair
58 | 57: couch
59 | 58: potted plant
60 | 59: bed
61 | 60: dining table
62 | 61: toilet
63 | 62: tv
64 | 63: laptop
65 | 64: mouse
66 | 65: remote
67 | 66: keyboard
68 | 67: cell phone
69 | 68: microwave
70 | 69: oven
71 | 70: toaster
72 | 71: sink
73 | 72: refrigerator
74 | 73: book
75 | 74: clock
76 | 75: vase
77 | 76: scissors
78 | 77: teddy bear
79 | 78: hair drier
80 | 79: toothbrush
81 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/preprocess.cu:
--------------------------------------------------------------------------------
  1 | #include "preprocess.h"
  2 | 
  3 | #include <device_launch_parameters.h>
  4 | 
  5 | 
  6 | __global__ void warpaffine_nearest_bgrbgr2rrggbb_kernel(
  7 |     uint8_t* src, int src_step_size, int src_width,
  8 |     int src_height, float* dst, int dst_width,
  9 |     int dst_height, uint8_t const_value_st,
 10 |     AffineMatrix d2s, int h_p, int w_p)
 11 | {
 12 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 13 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 14 |     if (dx >= dst_width || dy >= dst_height) return;
 15 | 
 16 |     float m_x1 = d2s.value[0];
 17 |     float m_y1 = d2s.value[1];
 18 |     float m_z1 = d2s.value[2];
 19 |     float m_x2 = d2s.value[3];
 20 |     float m_y2 = d2s.value[4];
 21 |     float m_z2 = d2s.value[5];
 22 | 
 23 |     float c0, c1, c2;
 24 |     if (dy < h_p || dy >(dst_height - h_p) || dx < w_p || dx >(dst_width - w_p))
 25 |     {
 26 |         // out of range
 27 |         c0 = const_value_st;
 28 |         c1 = const_value_st;
 29 |         c2 = const_value_st;
 30 |     }
 31 |     else
 32 |     {
 33 |         float src_x = m_x1 * (dx + 0.5f) + m_y1 * dy + m_z1 - 0.5f;
 34 |         float src_y = m_x2 * dx + m_y2 * (dy + 0.5f) + m_z2 - 0.5f;
 35 | 
 36 |         int sy_1 = floorf(src_y + 0.5f);
 37 |         int sx_1 = floorf(src_x + 0.5f);
 38 | 
 39 |         uint8_t const_value[] = { const_value_st, const_value_st, const_value_st };
 40 |         uint8_t* p = const_value;
 41 | 
 42 |         if (sy_1 >= 0 && sy_1 <= src_height && sx_1 >=0 && sx_1 <= src_width) 
 43 |         {
 44 |             p = src + sy_1 * src_step_size + sx_1 * 3;
 45 |         }
 46 | 
 47 |         c0 = p[0];
 48 |         c1 = p[1];
 49 |         c2 = p[2];
 50 |     }
 51 | 
 52 |     // normalization
 53 |     c0 /= 255.0f;
 54 |     c1 /= 255.0f;
 55 |     c2 /= 255.0f;
 56 | 
 57 |     // bgrbgrbgr to rrrgggbbb
 58 |     int area = dst_width * dst_height;
 59 |     float* pdst_c0 = dst + dy * dst_width + dx;
 60 |     pdst_c0[0] = c2;
 61 |     pdst_c0[area] = c1;
 62 |     pdst_c0[2 * area] = c0;
 63 | }
 64 | 
 65 | __global__ void warpaffine_bilinear_bgrbgr2rrggbb_kernel(
 66 |     uint8_t* src, int src_step_size, int src_width,
 67 |     int src_height, float* dst, int dst_width,
 68 |     int dst_height, uint8_t const_value_st,
 69 |     AffineMatrix d2s, int h_p, int w_p)
 70 | {
 71 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 72 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 73 |     if (dx >= dst_width || dy >= dst_height) return;
 74 | 
 75 |     float m_x1 = d2s.value[0];
 76 |     float m_y1 = d2s.value[1];
 77 |     float m_z1 = d2s.value[2];
 78 |     float m_x2 = d2s.value[3];
 79 |     float m_y2 = d2s.value[4];
 80 |     float m_z2 = d2s.value[5];
 81 | 
 82 |     float c0, c1, c2;
 83 |     if (dy < h_p || dy >(dst_height - h_p) || dx < w_p || dx >(dst_width - w_p))
 84 |     {
 85 |         // out of range
 86 |         c0 = const_value_st;
 87 |         c1 = const_value_st;
 88 |         c2 = const_value_st;
 89 |     }
 90 |     else
 91 |     {
 92 |         float src_x = m_x1 * (dx + 0.5f) + m_y1 * dy + m_z1 - 0.5f;
 93 |         float src_y = m_x2 * dx + m_y2 * (dy + 0.5f) + m_z2 - 0.5f;
 94 | 
 95 |         int sy_1 = floorf(src_y);
 96 |         int sx_1 = floorf(src_x);
 97 |         int sy_2 = sy_1 + 1;
 98 |         int sx_2 = sx_1 + 1;
 99 | 
100 |         uint8_t const_value[] = { const_value_st, const_value_st, const_value_st };
101 |         float a2 = src_y - sy_1;
102 |         float a1 = 1.0f - a2;
103 |         float b2 = src_x - sx_1;
104 |         float b1 = 1.0f - b2;
105 |         float w11 = a1 * b1;
106 |         float w12 = a1 * b2;
107 |         float w21 = a2 * b1;
108 |         float w22 = a2 * b2;
109 |         uint8_t* p11 = const_value;
110 |         uint8_t* p12 = const_value;
111 |         uint8_t* p21 = const_value;
112 |         uint8_t* p22 = const_value;
113 | 
114 |         /*if (sy_1 >= 0) {
115 |             if (sx_1 >= 0)*/
116 |         p11 = src + sy_1 * src_step_size + sx_1 * 3;
117 | 
118 |         //if (sx_2 < src_width)
119 |         p12 = src + sy_1 * src_step_size + sx_2 * 3;
120 |         //}
121 | 
122 |         /*if (sy_2 < src_height) {
123 |             if (sx_1 >= 0)*/
124 |         p21 = src + sy_2 * src_step_size + sx_1 * 3;
125 | 
126 |         /*if (sx_2 < src_width)*/
127 |         p22 = src + sy_2 * src_step_size + sx_2 * 3;
128 |         //}
129 | 
130 |         c0 = w11 * p11[0] + w12 * p12[0] + w21 * p21[0] + w22 * p22[0] + 0.5f;
131 |         c1 = w11 * p11[1] + w12 * p12[1] + w21 * p21[1] + w22 * p22[1] + 0.5f;
132 |         c2 = w11 * p11[2] + w12 * p12[2] + w21 * p21[2] + w22 * p22[2] + 0.5f;
133 |     }
134 | 
135 |     // normalization
136 |     c0 /= 255.0f;
137 |     c1 /= 255.0f;
138 |     c2 /= 255.0f;
139 | 
140 |     // bgrbgrbgr to rrrgggbbb
141 |     int area = dst_width * dst_height;
142 |     float* pdst_c0 = dst + dy * dst_width + dx;
143 |     pdst_c0[0] = c2;
144 |     pdst_c0[area] = c1;
145 |     pdst_c0[2 * area] = c0;
146 | }
147 | 
148 | 
149 | void cuda_preprocess(cv::Mat& image, preproc_struct& image_trans, std::vector<void*>& bufferH,
150 |     std::vector<void*>& bufferD, std::vector<int>& bindingsize, cudaStream_t& stream, cv::Size resize)
151 | {
152 |     int h, w, h_p, w_p;
153 | 
154 |     float scale = cv::min((float)resize.height / (float)image.rows, (float)resize.width / (float)image.cols);
155 |     scale = cv::min(scale, 1.1f);
156 | 
157 |     h = image.rows * scale;
158 |     w = image.cols * scale;
159 |     h_p = (resize.height - h) * 0.5f;
160 |     w_p = (resize.width - w) * 0.5f;
161 | 
162 |     image_trans.scale = scale;
163 |     image_trans.h_p = h_p;
164 |     image_trans.w_p = w_p;
165 | 
166 |     // copy data to device memory
167 |     // memcpy(bufferH[2], image.data, bindingsize[2]);
168 |     // cudaMemcpyAsync(bufferD[2], bufferH[2], bindingsize[2], cudaMemcpyHostToDevice, stream);
169 |     cudaMemcpyAsync(bufferD[2], image.data, bindingsize[2], cudaMemcpyHostToDevice, stream);
170 | 
171 |     AffineMatrix s2d, d2s;
172 | 
173 |     /*s2d.value[0] = scale;
174 |     s2d.value[1] = 0;
175 |     s2d.value[2] = (resize.width - scale * image.cols + scale - 1) * 0.5f;
176 |     s2d.value[3] = 0;
177 |     s2d.value[4] = scale;
178 |     s2d.value[5] = (resize.height - scale * image.rows + scale - 1) * 0.5f;*/
179 | 
180 |     d2s.value[0] = 1.0f / scale;
181 |     d2s.value[1] = 0;
182 |     d2s.value[2] = (image.cols - resize.width / scale + d2s.value[0] - 1) * 0.5f;
183 |     d2s.value[3] = 0;
184 |     d2s.value[4] = 1.0f / scale;
185 |     d2s.value[5] = (image.rows - resize.height / scale + d2s.value[0] - 1) * 0.5f;
186 | 
187 |     /*cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
188 |     cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
189 |     cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
190 |     memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));*/
191 | 
192 |     dim3 block(128, 1);
193 |     dim3 grid((resize.width + block.x - 1) / block.x, (resize.height + block.y - 1) / block.y);
194 | 
195 |     warpaffine_nearest_bgrbgr2rrggbb_kernel <<< grid, block, 0, stream >>> (
196 |         (uint8_t*)bufferD[2], image.cols * 3, image.cols,
197 |         image.rows, (float*)bufferD[0], resize.width,
198 |         resize.height, 0, d2s, h_p, w_p);
199 | }
200 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/preprocess.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "utils_detection.h"
 4 | 
 5 | #include <cuda_runtime.h>
 6 | #include <opencv2/opencv.hpp>
 7 | 
 8 | struct AffineMatrix
 9 | {
10 |     float value[6];
11 | };
12 | 
13 | void cuda_preprocess(cv::Mat& image, preproc_struct& image_trans, std::vector<void*>& bufferH,
14 |     std::vector<void*>& bufferD, std::vector<int>& bindingsize, cudaStream_t& stream, cv::Size resize);
15 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/trt_infer.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/jetson_csi/trt_infer.cpp


--------------------------------------------------------------------------------
/cpp/jetson_csi/trt_infer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/core/core.hpp>
 3 | 
 4 | #include <cuda_runtime_api.h>
 5 | #include <cuda_fp16.h>
 6 | #include <NvInfer.h>
 7 | #include <cassert>
 8 | #include <vector>
 9 | 
10 | 
11 | 
12 | class Logger : public nvinfer1::ILogger
13 | {
14 | public:
15 |     Severity reportableSeverity;
16 | 
17 |     Logger(Severity severity = Severity::kINFO);
18 |     void log(Severity severity, const char* msg) noexcept override;
19 | };
20 | 
21 | 
22 | bool load_engine(nvinfer1::IRuntime*& runtime, nvinfer1::ICudaEngine*& engine, const std::string& engine_dir,
23 |     nvinfer1::ILogger& gLogger);
24 | 
25 | void allocate_buffers(nvinfer1::ICudaEngine*& engine, std::vector<void*>& bufferH, std::vector<void*>& bufferD, std::vector<int>& bindingsize,
26 |     cv::Size img_size);
27 | 
28 | float* do_inference(nvinfer1::IExecutionContext*& context, std::vector<void*>& bufferH, const std::vector<void*>& bufferD,
29 |     cudaStream_t& stream, const std::vector<int>& BindingSize);
30 | 
31 | 
32 | class yolo_trt_det
33 | {
34 | private:
35 | 
36 |     nvinfer1::IRuntime* _runtime = nullptr;
37 |     nvinfer1::ICudaEngine* _engine = nullptr;
38 |     nvinfer1::IExecutionContext* _context = nullptr;
39 | 
40 |     std::unordered_map<int, std::string> catid_labels;
41 |     color_dicts catid_colors;
42 |     cv::Size img_size;
43 |     cv::Size set_size;
44 |     bool v8_head;
45 | 
46 |     std::vector<void*> cpu_buffer;
47 |     std::vector<void*> gpu_buffer;
48 |     std::vector<int> BindingSize;
49 |     cudaStream_t stream;
50 | 
51 | public:
52 |     yolo_trt_det(const std::string& engine_dir, const std::string& labels_dir, cv::Size img_size);
53 |     ~yolo_trt_det();
54 | 
55 |     std::vector<cv::Mat> draw_batch(std::vector<cv::Mat>& image_list, float conf, float iou, int max_det);
56 | 
57 |     cv::Mat draw(cv::Mat& image, float conf, float iou, int max_det);
58 | };
59 | 


--------------------------------------------------------------------------------
/cpp/jetson_csi/utils_detection.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/jetson_csi/utils_detection.cpp


--------------------------------------------------------------------------------
/cpp/jetson_csi/utils_detection.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/core/core.hpp>
 3 | 
 4 | #include <unordered_map>
 5 | #include <vector>
 6 | #include <string>
 7 | #include <array>
 8 | 
 9 | 
10 | 
11 | struct color_dicts
12 | {
13 |     std::unordered_map<int, std::array<size_t, 3>> color_map;
14 |     std::vector<int> catid;
15 | 
16 |     color_dicts(const std::unordered_map<int, std::string>& catid_labels);
17 | };
18 | 
19 | 
20 | struct preproc_struct
21 | {
22 |     float* img = nullptr;
23 |     float scale;
24 |     int h_p;
25 |     int w_p;
26 | 
27 |     ~preproc_struct();
28 | };
29 | 
30 | 
31 | 
32 | std::unordered_map<int, std::string> yaml_load_labels(const std::string& dir = "data.yaml");
33 | 
34 | void preprocess(cv::Mat& image, preproc_struct& image_trans, cv::Size resize);
35 | 
36 | void fliter_boxes(float* const boxes, bool v8_head, const std::array<int, 4>& output_shape, float conf_thres,
37 |     std::vector<cv::Rect>& keep_boxes, std::vector<float>& keep_scores, std::vector<int>& keep_classes);
38 | 
39 | void scale_boxes(cv::Rect& box, const preproc_struct& preproc_res);
40 | 
41 | void draw_boxes(cv::Mat image, const cv::Rect& box, float score, int class_id,
42 |     std::unordered_map<int, std::string> catid_labels, color_dicts& color_dicts);
43 | 
44 | void imgresize(const cv::Mat& image, cv::Mat& input_image, float scale, cv::Size resize);
45 | 
46 | template <typename T = int>
47 | static bool SortScorePairDescend(const std::pair<float, T>& pair1, const std::pair<float, T>& pair2);
48 | 
49 | template <typename T>
50 | void max_score_idx(const std::vector<float>& scores, float score_thres, T scores_idxs);
51 | 
52 | float get_iou(const cv::Rect& bbox1, const cv::Rect& bbox2);
53 | 
54 | void base_nms(const std::vector<cv::Rect>& bboxes, const std::vector<float>& scores, const std::vector<int>& catid,
55 |     float score_threshold, float nms_threshold, std::vector<int>& indices,  int limit);
56 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_CXX_STANDARD 14)
 4 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 5 | set(CMAKE_CXX_EXTENSIONS ON)
 6 | 
 7 | project(yolo_detect C CXX)
 8 | 
 9 | add_definitions(-DAPI_EXPORTS)
10 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
11 | # SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb")
12 | if(NOT CMAKE_BUILD_TYPE)
13 |     SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3")
14 | endif()
15 | set(src_list csi_kp_detect.cpp utils_detection.cpp utils_detection.h trt_infer.cpp trt_infer.h gstreamer.cpp gstreamer.h preprocess.cu preprocess.h)
16 | 
17 | # CUDA
18 | # TODO(Call for PR): make cmake compatible with Windows
19 | set(CMAKE_CUDA_COMPILER /usr/local/cuda-10.2/bin/nvcc)
20 | enable_language(CUDA)
21 | find_package(CUDA REQUIRED)
22 | message(STATUS "    libraries: ${CUDA_LIBRARIES}")
23 | message(STATUS "    include path: ${CUDA_INCLUDE_DIRS}")
24 | 
25 | 
26 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
27 | include_directories(/usr/local/cuda-10.2/include/)
28 | link_directories(/usr/local/cuda-10.2/lib64/)
29 | 
30 | 
31 | # tensorrt
32 | # set(TRT_DIR J:/tensorrt/TensorRT-8.4.3.1) 
33 | # set(TRT_INCLUDE_DIRS ${TRT_DIR}/include/)
34 | # set(TRT_LIB_DIRS ${TRT_DIR}/lib/)
35 | 
36 | # include_directories(${TRT_INCLUDE_DIRS}) 
37 | # link_directories(${TRT_LIB_DIRS})
38 | 
39 | #include_directories(${PROJECT_SOURCE_DIR}/)
40 | #file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/*.cpp ${PROJECT_SOURCE_DIR}/*.h)
41 | 
42 | # opencv
43 | find_package(OpenCV REQUIRED)
44 | include_directories( ${OpenCV_INCLUDE_DIRS} )
45 | 
46 | add_executable(${PROJECT_NAME}  ${src_list})
47 | target_link_libraries(${PROJECT_NAME} nvinfer)
48 | target_link_libraries(${PROJECT_NAME} cudart)
49 | target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
50 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/README.md:
--------------------------------------------------------------------------------
 1 | # kp_jetson_csi
 2 | 在jetson nano 4g上使用yolov5和hrnet进行摄像头人体关键点检测。  
 3 | 我对hrnet进行了轻量化改造，使其能在算力有限的平台上运行。替换上mobilenetv2的backbone后用coco2017数据集进行了训练，可满足单目标的人体关键点检测需求。
 4 | 后续有时间可能会更新关键点检测模型，当然如果没时间魔改出更快更准的模型的话就算了...
 5 | 
 6 | ## 数据准备
 7 | 相比于目标检测，这里需要多提供一个关键点检测的engine和关键点链接信息。  
 8 | 你可以使用我提供的以下两个onnx模型，在运行的设备上生成engine。或者自己训练一个专门用于检测人的yolo模型，和一个用于关键点检测的hrnet模型。
 9 | 
10 | yolov5s_person.onnx：https://pan.baidu.com/s/1mgbFLOENiIaTmfsyc2RtVw  
11 | 提取码：qei0   
12 | 
13 | Myhrnet.onnx：https://pan.baidu.com/s/1rIR_CjOuu6qzaWsoirfP3A  
14 | 提取码：43dw
15 | 
16 | points_link.yaml文件里记录的是关键点的链接信息，用于绘图。
17 | 
18 | 用cmake编译后，运行yolo_detect。
19 | 
20 | ```shell
21 | yolo_detect --det_engine_dir=./yolov5s_person.engine --kp_engine_dir=./Myhrnet.engine --labels=./labels_det.yaml --pointlinker=./points_link.yaml
22 | ```  
23 | 
24 | 参数说明:
25 | - ```--det_engine_dir``` 目标检测trt模型的保存路径
26 | - ```--kp_engine_dir``` 关键点检测trt模型的保存路径
27 | - ```--labels``` 模型labels的yaml文件
28 | - ```--pointlinker``` 关键点链接的yaml文件
29 | - ```--conf_thres``` nms的置信度设置
30 | - ```--iou_thres``` nms的iou设置
31 | - ```--max_det``` 输出的最大检测数量
32 | - ```--skip``` 隔帧检测帧数
33 | 
34 | 更详细参数说明可以在csi_kp_detect.cpp中查看。
35 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/csi_kp_detect.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/kp_jetson_csi/csi_kp_detect.cpp


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/gstreamer.cpp:
--------------------------------------------------------------------------------
 1 | #include "gstreamer.h"
 2 | 
 3 | 
 4 | std::string gs_pipeline(int capture_width, int capture_height, int display_width, int display_height, int framerate, int flip_method)
 5 | {
 6 |     std::string result = "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)" + std::to_string(capture_width) +
 7 |         ", height=(int)" + std::to_string(capture_height) +
 8 |         ", format=(string)NV12, framerate=(fraction)" + std::to_string(framerate) +
 9 |         "/1 ! nvvidconv flip-method=" + std::to_string(flip_method) +
10 |         " ! video/x-raw, width=(int)" + std::to_string(display_width) +
11 |         ", height=(int)" + std::to_string(display_height) +
12 |         ", format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink";
13 | 
14 |     return result;
15 | }
16 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/gstreamer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <string>
4 | 
5 | std::string gs_pipeline(int capture_width, int capture_height, int display_width, int display_height, int framerate, int flip_method);
6 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/labels_det.yaml:
--------------------------------------------------------------------------------
1 | 0: person
2 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/points_link.yaml:
--------------------------------------------------------------------------------
 1 | 15: 13
 2 | 13: 11
 3 | 16: 14
 4 | 14: 12
 5 | 12: 11
 6 | 11: 5
 7 | 6: 12
 8 | 5: 6
 9 | 7: 5
10 | 8: 6
11 | 9: 7
12 | 10: 8
13 | 1: 2
14 | 0: 1
15 | 0: 2
16 | 1: 3
17 | 2: 4
18 | 3: 5
19 | 4: 6
20 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/preprocess.cu:
--------------------------------------------------------------------------------
  1 | #include "preprocess.h"
  2 | 
  3 | #include <device_launch_parameters.h>
  4 | 
  5 | 
  6 | __global__ void warpaffine_nearest_bgrbgr2rrggbb_kernel(
  7 |     uint8_t* src, int src_step_size, int src_width,
  8 |     int src_height, float* dst, int dst_width,
  9 |     int dst_height, uint8_t const_value_st,
 10 |     AffineMatrix d2s, int h_p, int w_p)
 11 | {
 12 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 13 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 14 |     if (dx >= dst_width || dy >= dst_height) return;
 15 | 
 16 |     float m_x1 = d2s.value[0];
 17 |     float m_y1 = d2s.value[1];
 18 |     float m_z1 = d2s.value[2];
 19 |     float m_x2 = d2s.value[3];
 20 |     float m_y2 = d2s.value[4];
 21 |     float m_z2 = d2s.value[5];
 22 | 
 23 |     float c0, c1, c2;
 24 |     if (dy < h_p || dy >(dst_height - h_p) || dx < w_p || dx >(dst_width - w_p))
 25 |     {
 26 |         // out of range
 27 |         c0 = const_value_st;
 28 |         c1 = const_value_st;
 29 |         c2 = const_value_st;
 30 |     }
 31 |     else
 32 |     {
 33 |         float src_x = m_x1 * (dx + 0.5f) + m_y1 * dy + m_z1 - 0.5f;
 34 |         float src_y = m_x2 * dx + m_y2 * (dy + 0.5f) + m_z2 - 0.5f;
 35 | 
 36 |         int sy_1 = floorf(src_y + 0.5f);
 37 |         int sx_1 = floorf(src_x + 0.5f);
 38 | 
 39 |         uint8_t const_value[] = { const_value_st, const_value_st, const_value_st };
 40 |         uint8_t* p = const_value;
 41 | 
 42 |         if (sy_1 >= 0 && sy_1 <= src_height && sx_1 >=0 && sx_1 <= src_width) 
 43 |         {
 44 |             p = src + sy_1 * src_step_size + sx_1 * 3;
 45 |         }
 46 | 
 47 |         c0 = p[0];
 48 |         c1 = p[1];
 49 |         c2 = p[2];
 50 |     }
 51 | 
 52 |     // normalization
 53 |     c0 /= 255.0f;
 54 |     c1 /= 255.0f;
 55 |     c2 /= 255.0f;
 56 | 
 57 |     // bgrbgrbgr to rrrgggbbb
 58 |     int area = dst_width * dst_height;
 59 |     float* pdst_c0 = dst + dy * dst_width + dx;
 60 |     pdst_c0[0] = c2;
 61 |     pdst_c0[area] = c1;
 62 |     pdst_c0[2 * area] = c0;
 63 | }
 64 | 
 65 | __global__ void warpaffine_bilinear_bgrbgr2rrggbb_kernel(
 66 |     uint8_t* src, int src_step_size, int src_width,
 67 |     int src_height, float* dst, int dst_width,
 68 |     int dst_height, uint8_t const_value_st,
 69 |     AffineMatrix d2s, int h_p, int w_p)
 70 | {
 71 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 72 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 73 |     if (dx >= dst_width || dy >= dst_height) return;
 74 | 
 75 |     float m_x1 = d2s.value[0];
 76 |     float m_y1 = d2s.value[1];
 77 |     float m_z1 = d2s.value[2];
 78 |     float m_x2 = d2s.value[3];
 79 |     float m_y2 = d2s.value[4];
 80 |     float m_z2 = d2s.value[5];
 81 | 
 82 |     float c0, c1, c2;
 83 |     if (dy < h_p || dy >(dst_height - h_p) || dx < w_p || dx >(dst_width - w_p))
 84 |     {
 85 |         // out of range
 86 |         c0 = const_value_st;
 87 |         c1 = const_value_st;
 88 |         c2 = const_value_st;
 89 |     }
 90 |     else
 91 |     {
 92 |         float src_x = m_x1 * (dx + 0.5f) + m_y1 * dy + m_z1 - 0.5f;
 93 |         float src_y = m_x2 * dx + m_y2 * (dy + 0.5f) + m_z2 - 0.5f;
 94 | 
 95 |         int sy_1 = floorf(src_y);
 96 |         int sx_1 = floorf(src_x);
 97 |         int sy_2 = sy_1 + 1;
 98 |         int sx_2 = sx_1 + 1;
 99 | 
100 |         uint8_t const_value[] = { const_value_st, const_value_st, const_value_st };
101 |         float a2 = src_y - sy_1;
102 |         float a1 = 1.0f - a2;
103 |         float b2 = src_x - sx_1;
104 |         float b1 = 1.0f - b2;
105 |         float w11 = a1 * b1;
106 |         float w12 = a1 * b2;
107 |         float w21 = a2 * b1;
108 |         float w22 = a2 * b2;
109 |         uint8_t* p11 = const_value;
110 |         uint8_t* p12 = const_value;
111 |         uint8_t* p21 = const_value;
112 |         uint8_t* p22 = const_value;
113 | 
114 |         /*if (sy_1 >= 0) {
115 |             if (sx_1 >= 0)*/
116 |         p11 = src + sy_1 * src_step_size + sx_1 * 3;
117 | 
118 |         //if (sx_2 < src_width)
119 |         p12 = src + sy_1 * src_step_size + sx_2 * 3;
120 |         //}
121 | 
122 |         /*if (sy_2 < src_height) {
123 |             if (sx_1 >= 0)*/
124 |         p21 = src + sy_2 * src_step_size + sx_1 * 3;
125 | 
126 |         /*if (sx_2 < src_width)*/
127 |         p22 = src + sy_2 * src_step_size + sx_2 * 3;
128 |         //}
129 | 
130 |         c0 = w11 * p11[0] + w12 * p12[0] + w21 * p21[0] + w22 * p22[0] + 0.5f;
131 |         c1 = w11 * p11[1] + w12 * p12[1] + w21 * p21[1] + w22 * p22[1] + 0.5f;
132 |         c2 = w11 * p11[2] + w12 * p12[2] + w21 * p21[2] + w22 * p22[2] + 0.5f;
133 |     }
134 | 
135 |     // normalization
136 |     c0 /= 255.0f;
137 |     c1 /= 255.0f;
138 |     c2 /= 255.0f;
139 | 
140 |     // bgrbgrbgr to rrrgggbbb
141 |     int area = dst_width * dst_height;
142 |     float* pdst_c0 = dst + dy * dst_width + dx;
143 |     pdst_c0[0] = c2;
144 |     pdst_c0[area] = c1;
145 |     pdst_c0[2 * area] = c0;
146 | }
147 | 
148 | 
149 | void cuda_preprocess(cv::Mat& image, preproc_struct& image_trans, std::vector<void*>& bufferH,
150 |     std::vector<void*>& bufferD, std::vector<int>& bindingsize, cudaStream_t& stream, cv::Size resize)
151 | {
152 |     int h, w, h_p, w_p;
153 | 
154 |     float scale = cv::min((float)resize.height / (float)image.rows, (float)resize.width / (float)image.cols);
155 |     scale = cv::min(scale, 1.1f);
156 | 
157 |     h = image.rows * scale;
158 |     w = image.cols * scale;
159 |     h_p = (resize.height - h) * 0.5f;
160 |     w_p = (resize.width - w) * 0.5f;
161 | 
162 |     image_trans.scale = scale;
163 |     image_trans.ori_h = image.rows;
164 |     image_trans.ori_w = image.cols;
165 |     image_trans.h_p = h_p;
166 |     image_trans.w_p = w_p;
167 | 
168 |     // copy data to device memory
169 |     cudaMemcpyAsync(bufferD[2], image.data, bindingsize[2], cudaMemcpyHostToDevice, stream);
170 |     
171 |     AffineMatrix d2s;
172 | 
173 |     d2s.value[0] = 1.0f / scale;
174 |     d2s.value[1] = 0;
175 |     d2s.value[2] = (image.cols - resize.width / scale + d2s.value[0] - 1) * 0.5f;
176 |     d2s.value[3] = 0;
177 |     d2s.value[4] = 1.0f / scale;
178 |     d2s.value[5] = (image.rows - resize.height / scale + d2s.value[0] - 1) * 0.5f;
179 |     
180 |     // AffineMatrix s2d;
181 | 
182 |     /*s2d.value[0] = scale;
183 |     s2d.value[1] = 0;
184 |     s2d.value[2] = (resize.width - scale * image.cols + scale - 1) * 0.5f;
185 | 
186 |     s2d.value[3] = 0;
187 |     s2d.value[4] = scale;
188 |     s2d.value[5] = (resize.height - scale * image.rows + scale - 1) * 0.5f;*/
189 | 
190 |     /*cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
191 |     cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
192 |     cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
193 |     memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));*/
194 | 
195 |     dim3 block(128, 1);
196 |     dim3 grid((resize.width + block.x - 1) / block.x, (resize.height + block.y - 1) / block.y);
197 | 
198 |     warpaffine_nearest_bgrbgr2rrggbb_kernel <<< grid, block, 0, stream >>> (
199 |         (uint8_t*)bufferD[2], image.cols * 3, image.cols,
200 |         image.rows, (float*)bufferD[0], resize.width,
201 |         resize.height, 0, d2s, h_p, w_p);
202 | }
203 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/preprocess.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "utils_detection.h"
 4 | 
 5 | #include <cuda_runtime.h>
 6 | #include <opencv2/opencv.hpp>
 7 | 
 8 | struct AffineMatrix
 9 | {
10 |     float value[6];
11 | };
12 | 
13 | void cuda_preprocess(cv::Mat& image, preproc_struct& image_trans, std::vector<void*>& bufferH,
14 |     std::vector<void*>& bufferD, std::vector<int>& bindingsize, cudaStream_t& stream, cv::Size resize);
15 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/trt_infer.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/kp_jetson_csi/trt_infer.cpp


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/trt_infer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/core/core.hpp>
 3 | 
 4 | #include <cuda_runtime_api.h>
 5 | #include <cuda_fp16.h>
 6 | #include <NvInfer.h>
 7 | #include <cassert>
 8 | #include <vector>
 9 | 
10 | 
11 | 
12 | class Logger : public nvinfer1::ILogger
13 | {
14 | public:
15 |     Severity reportableSeverity;
16 | 
17 |     Logger(Severity severity = Severity::kINFO);
18 |     void log(Severity severity, const char* msg) noexcept override;
19 | };
20 | 
21 | 
22 | bool load_engine(nvinfer1::IRuntime*& runtime, nvinfer1::ICudaEngine*& engine, const std::string& engine_dir,
23 |     nvinfer1::ILogger& gLogger);
24 | 
25 | void allocate_buffers(nvinfer1::ICudaEngine*& engine, std::vector<void*>& bufferH, std::vector<void*>& bufferD, std::vector<int>& bindingsize);
26 | 
27 | float* det_inference(nvinfer1::IExecutionContext*& context, std::vector<void*>& bufferH, const std::vector<void*>& bufferD,
28 |     const std::vector<int>& BindingSize, cudaStream_t& stream);
29 | 
30 | float* kp_inference(nvinfer1::IExecutionContext*& context, std::vector<void*>& bufferH, const std::vector<void*>& bufferD,
31 |     const std::vector<int>& BindingSize, cudaStream_t& stream);
32 | 
33 | 
34 | class yolo_trt_det
35 | {
36 | private:
37 | 
38 |     nvinfer1::IRuntime* det_runtime = nullptr;
39 |     nvinfer1::ICudaEngine* det_engine = nullptr;
40 |     nvinfer1::IExecutionContext* det_context = nullptr;
41 | 
42 |     nvinfer1::IRuntime* kp_runtime = nullptr;
43 |     nvinfer1::ICudaEngine* kp_engine = nullptr;
44 |     nvinfer1::IExecutionContext* kp_context = nullptr;
45 | 
46 |     std::unordered_map<int, std::string> catid_labels;
47 |     std::vector<std::array<int, 2>> points_linker;
48 |     color_dicts catid_colors;
49 |     cv::Size img_resize;
50 |     cv::Size kp_img_resize;
51 | 
52 |     bool v8_head;
53 | 
54 |     std::vector<void*> det_bufferh;
55 |     std::vector<void*> det_bufferd;
56 |     std::vector<int> det_bindingsize;
57 | 
58 |     std::vector<void*> kp_bufferh;
59 |     std::vector<void*> kp_bufferd;
60 |     std::vector<int> kp_bindingsize;
61 |     cudaStream_t stream;
62 | 
63 |     int skip;    
64 |     std::vector< int > nms_idx;
65 |     std::vector<cv::Rect> nms_boxes;
66 |     std::vector<float> nms_scores;
67 |     std::vector<int> nms_catid;
68 | 
69 |     uint64_t infer_times;
70 |     uint32_t frams_num;
71 | 
72 | public:
73 |     yolo_trt_det() = default;
74 |     yolo_trt_det(const std::string & det_engine_dir, const std::string & kp_engine_dir, const std::string & labels_dir,
75 |         const std::string & pointlinker_dir, cv::Size img_size);
76 |     ~yolo_trt_det();
77 | 
78 |     cv::Mat draw(cv::Mat & image, float conf, float iou, int max_det, int skip);
79 | };
80 | 


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/utils_detection.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/kp_jetson_csi/utils_detection.cpp


--------------------------------------------------------------------------------
/cpp/kp_jetson_csi/utils_detection.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/core/core.hpp>
 3 | 
 4 | #include <unordered_map>
 5 | #include <vector>
 6 | #include <string>
 7 | #include <array>
 8 | 
 9 | 
10 | 
11 | struct color_dicts
12 | {
13 |     std::unordered_map<int, std::array<size_t, 3>> color_map;
14 |     std::vector<int> catid;
15 | 
16 |     color_dicts() {};
17 |     color_dicts(const std::unordered_map<int, std::string>& catid_labels);
18 | };
19 | 
20 | 
21 | struct preproc_struct
22 | {
23 |     float* img = nullptr;
24 |     float scale;
25 |     int ori_h;
26 |     int ori_w;
27 |     int h_p;
28 |     int w_p;
29 | 
30 |     ~preproc_struct();
31 | };
32 | 
33 | 
34 | 
35 | std::unordered_map<int, std::string> yaml_load_labels(const std::string& dir = "data.yaml");
36 | 
37 | std::vector<std::array<int, 2>> yaml_load_points_link(const std::string& dir);
38 | 
39 | void preprocess(cv::Mat& image, preproc_struct& image_trans, const cv::Size& resize);
40 | 
41 | void fliter_boxes(float* const boxes, bool v8_head, const std::array<int, 4>& output_shape, const float& conf_thres,
42 |     std::vector<cv::Rect>& keep_boxes, std::vector<float>& keep_scores, std::vector<int>& keep_classes);
43 | 
44 | void scale_boxes(cv::Rect& box, const preproc_struct& preproc_res);
45 | 
46 | void draw_boxes(cv::Mat image, const cv::Rect& box, const float& score, const int& class_id,
47 |     std::unordered_map<int, std::string> catid_labels, color_dicts& color_dicts);
48 | 
49 | void imgresize(const cv::Mat& image, cv::Mat& input_image, const float& scale, cv::Size resize);
50 | 
51 | template <typename T = int>
52 | static bool SortScorePairDescend(const std::pair<float, T>& pair1, const std::pair<float, T>& pair2);
53 | 
54 | template <typename T>
55 | void max_score_idx(const std::vector<float>& scores, const float& score_thres, T& scores_idxs);
56 | 
57 | float get_iou(const cv::Rect& bbox1, const cv::Rect& bbox2);
58 | 
59 | void base_nms(const std::vector<cv::Rect>& bboxes, const std::vector<float>& scores, const std::vector<int>& catid,
60 |     const float& score_threshold, const float& nms_threshold, std::vector<int>& indices, const int& limit);
61 | 
62 | void get_final_preds(float* const heatmaps, preproc_struct& keypoints_trans, const std::array<int, 4>& output_shape,
63 |     const cv::Rect& bbox, std::vector<float>& keypoints_scorce, std::vector<cv::Point2f>& keypoints);
64 | 
65 | void draw_keypoints(cv::Mat image, const std::vector<cv::Point2f>& keypoints, std::vector<float>& keypoints_score,
66 |     float score, const std::vector<std::array<int, 2>>& points_linker);
67 | 


--------------------------------------------------------------------------------
/cpp/video_detect/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_CXX_STANDARD 14)
 4 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 5 | set(CMAKE_CXX_EXTENSIONS ON)
 6 | 
 7 | project(yolo_tensorrt C CXX)
 8 | 
 9 | add_definitions(-DAPI_EXPORTS)
10 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
11 | 
12 | if(NOT CMAKE_BUILD_TYPE)
13 | 	set(CMAKE_BUILD_TYPE Release CACHE STRING "Build Type" FORCE)
14 | 	set(CMAKE_CXX_FLAGS_Release "$ENV{CXXFLAGS} -O3 -Wall")
15 | endif()
16 | 
17 | 
18 | set(src_list main.cpp  utils_detection.cpp  utils_detection.h trt_infer.cpp trt_infer.h preprocess.cu preprocess.h)
19 | 
20 | # TODO(Call for PR): make cmake compatible with Windows
21 | set(CMAKE_CUDA_COMPILER  E:/NV/cuda11.7/bin/nvcc)
22 | enable_language(CUDA)
23 | 
24 | # CUDA
25 | # TODO(Call for PR): make cmake compatible with Windows
26 | find_package(CUDA REQUIRED)
27 | message(STATUS "    libraries: ${CUDA_LIBRARIES}")
28 | message(STATUS "    include path: ${CUDA_INCLUDE_DIRS}")
29 | 
30 | 
31 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
32 | include_directories(E:/NV/cuda11.7/include/)
33 | link_directories(E:/NV/cuda11.7/lib/x64/)
34 | 
35 | 
36 | # tensorrt
37 | set(TRT_DIR J:/tensorrt/TensorRT-8.4.3.1) 
38 | set(TRT_INCLUDE_DIRS ${TRT_DIR}/include/)
39 | set(TRT_LIB_DIRS ${TRT_DIR}/lib/)
40 | 
41 | include_directories(${TRT_INCLUDE_DIRS}) 
42 | 
43 | 
44 | # opencv
45 | set(CMAKE_PREFIX_PATH E:/opencv/build/x64/vc16/lib)
46 | find_package(OpenCV REQUIRED)
47 | include_directories( ${OpenCV_INCLUDE_DIRS} )
48 | 
49 | add_executable(${PROJECT_NAME}  ${src_list})
50 | target_link_libraries(${PROJECT_NAME} nvinfer)
51 | target_link_libraries(${PROJECT_NAME} cudart)
52 | target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})


--------------------------------------------------------------------------------
/cpp/video_detect/README.md:
--------------------------------------------------------------------------------
 1 | # video_detect
 2 | 用于视频目标检测的c++代码。  
 3 | 用cmake编译后，运行yolo_detect。
 4 | 
 5 | ```shell
 6 | yolo_detect --engine_dir=./yolov5s.engine --video_dir=./sample_1080p_h265.mp4 --labels=./labels_coco.yaml
 7 | ```  
 8 | 
 9 | 参数说明:
10 | - ```--engine_dir``` trt模型的保存路径
11 | - ```--video_dir``` 视频源路径
12 | - ```--labels``` 模型labels文件
13 | - ```--conf_thres``` nms的置信度设置
14 | - ```--iou_thres``` nms的iou设置
15 | - ```--max_det``` nms输出的最大检测数量
16 | 
17 | 更详细参数说明可以在main.cpp中查看。
18 | 


--------------------------------------------------------------------------------
/cpp/video_detect/labels_coco.yaml:
--------------------------------------------------------------------------------
 1 | 0: person
 2 | 1: bicycle
 3 | 2: car
 4 | 3: motorcycle
 5 | 4: airplane
 6 | 5: bus
 7 | 6: train
 8 | 7: truck
 9 | 8: boat
10 | 9: traffic light
11 | 10: fire hydrant
12 | 11: stop sign
13 | 12: parking meter
14 | 13: bench
15 | 14: bird
16 | 15: cat
17 | 16: dog
18 | 17: horse
19 | 18: sheep
20 | 19: cow
21 | 20: elephant
22 | 21: bear
23 | 22: zebra
24 | 23: giraffe
25 | 24: backpack
26 | 25: umbrella
27 | 26: handbag
28 | 27: tie
29 | 28: suitcase
30 | 29: frisbee
31 | 30: skis
32 | 31: snowboard
33 | 32: sports ball
34 | 33: kite
35 | 34: baseball bat
36 | 35: baseball glove
37 | 36: skateboard
38 | 37: surfboard
39 | 38: tennis racket
40 | 39: bottle
41 | 40: wine glass
42 | 41: cup
43 | 42: fork
44 | 43: knife
45 | 44: spoon
46 | 45: bowl
47 | 46: banana
48 | 47: apple
49 | 48: sandwich
50 | 49: orange
51 | 50: broccoli
52 | 51: carrot
53 | 52: hot dog
54 | 53: pizza
55 | 54: donut
56 | 55: cake
57 | 56: chair
58 | 57: couch
59 | 58: potted plant
60 | 59: bed
61 | 60: dining table
62 | 61: toilet
63 | 62: tv
64 | 63: laptop
65 | 64: mouse
66 | 65: remote
67 | 66: keyboard
68 | 67: cell phone
69 | 68: microwave
70 | 69: oven
71 | 70: toaster
72 | 71: sink
73 | 72: refrigerator
74 | 73: book
75 | 74: clock
76 | 75: vase
77 | 76: scissors
78 | 77: teddy bear
79 | 78: hair drier
80 | 79: toothbrush
81 | 


--------------------------------------------------------------------------------
/cpp/video_detect/main.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/video_detect/main.cpp


--------------------------------------------------------------------------------
/cpp/video_detect/preprocess.cu:
--------------------------------------------------------------------------------
  1 | #include "preprocess.h"
  2 | 
  3 | #include <device_launch_parameters.h>
  4 | 
  5 | 
  6 | __global__ void warpaffine_nearest_bgrbgr2rrggbb_kernel(
  7 |     uint8_t* src, int src_step_size, int src_width,
  8 |     int src_height, float* dst, int dst_width,
  9 |     int dst_height, uint8_t const_value_st,
 10 |     AffineMatrix d2s, int h_p, int w_p)
 11 | {
 12 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 13 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 14 |     if (dx >= dst_width || dy >= dst_height) return;
 15 | 
 16 |     float m_x1 = d2s.value[0];
 17 |     float m_y1 = d2s.value[1];
 18 |     float m_z1 = d2s.value[2];
 19 |     float m_x2 = d2s.value[3];
 20 |     float m_y2 = d2s.value[4];
 21 |     float m_z2 = d2s.value[5];
 22 | 
 23 |     float c0, c1, c2;
 24 |     if (dy < h_p || dy >(dst_height - h_p) || dx < w_p || dx >(dst_width - w_p))
 25 |     {
 26 |         // out of range
 27 |         c0 = const_value_st;
 28 |         c1 = const_value_st;
 29 |         c2 = const_value_st;
 30 |     }
 31 |     else
 32 |     {
 33 |         float src_x = m_x1 * (dx + 0.5f) + m_y1 * dy + m_z1 - 0.5f;
 34 |         float src_y = m_x2 * dx + m_y2 * (dy + 0.5f) + m_z2 - 0.5f;
 35 | 
 36 |         int sy_1 = floorf(src_y + 0.5f);
 37 |         int sx_1 = floorf(src_x + 0.5f);
 38 | 
 39 |         uint8_t const_value[] = { const_value_st, const_value_st, const_value_st };
 40 |         uint8_t* p = const_value;
 41 | 
 42 |         if (sy_1 >= 0 && sy_1 <= src_height && sx_1 >=0 && sx_1 <= src_width) 
 43 |         {
 44 |             p = src + sy_1 * src_step_size + sx_1 * 3;
 45 |         }
 46 | 
 47 |         c0 = p[0];
 48 |         c1 = p[1];
 49 |         c2 = p[2];
 50 |     }
 51 | 
 52 |     // normalization
 53 |     c0 = c0 / 255.0f;
 54 |     c1 = c1 / 255.0f;
 55 |     c2 = c2 / 255.0f;
 56 | 
 57 |     // bgrbgrbgr to rrrgggbbb
 58 |     int area = dst_width * dst_height;
 59 |     float* pdst_c0 = dst + dy * dst_width + dx;
 60 |     pdst_c0[0] = c2;
 61 |     pdst_c0[area] = c1;
 62 |     pdst_c0[2 * area] = c0;
 63 | }
 64 | 
 65 | __global__ void warpaffine_bilinear_bgrbgr2rrggbb_kernel(
 66 |     uint8_t* src, int src_step_size, int src_width,
 67 |     int src_height, float* dst, int dst_width,
 68 |     int dst_height, uint8_t const_value_st,
 69 |     AffineMatrix d2s, int h_p, int w_p)
 70 | {
 71 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 72 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 73 |     if (dx >= dst_width || dy >= dst_height) return;
 74 | 
 75 |     float m_x1 = d2s.value[0];
 76 |     float m_y1 = d2s.value[1];
 77 |     float m_z1 = d2s.value[2];
 78 |     float m_x2 = d2s.value[3];
 79 |     float m_y2 = d2s.value[4];
 80 |     float m_z2 = d2s.value[5];
 81 | 
 82 |     float c0, c1, c2;
 83 |     if (dy < h_p || dy >(dst_height - h_p) || dx < w_p || dx >(dst_width - w_p))
 84 |     {
 85 |         // out of range
 86 |         c0 = const_value_st;
 87 |         c1 = const_value_st;
 88 |         c2 = const_value_st;
 89 |     }
 90 |     else
 91 |     {
 92 |         float src_x = m_x1 * (dx + 0.5f) + m_y1 * dy + m_z1 - 0.5f;
 93 |         float src_y = m_x2 * dx + m_y2 * (dy + 0.5f) + m_z2 - 0.5f;
 94 | 
 95 |         int sy_1 = floorf(src_y);
 96 |         int sx_1 = floorf(src_x);
 97 |         int sy_2 = sy_1 + 1;
 98 |         int sx_2 = sx_1 + 1;
 99 | 
100 |         uint8_t const_value[] = { const_value_st, const_value_st, const_value_st };
101 |         float a2 = src_y - sy_1;
102 |         float a1 = 1.0f - a2;
103 |         float b2 = src_x - sx_1;
104 |         float b1 = 1.0f - b2;
105 |         float w11 = a1 * b1;
106 |         float w12 = a1 * b2;
107 |         float w21 = a2 * b1;
108 |         float w22 = a2 * b2;
109 |         uint8_t* p11 = const_value;
110 |         uint8_t* p12 = const_value;
111 |         uint8_t* p21 = const_value;
112 |         uint8_t* p22 = const_value;
113 | 
114 |         /*if (sy_1 >= 0) {
115 |             if (sx_1 >= 0)*/
116 |         p11 = src + sy_1 * src_step_size + sx_1 * 3;
117 | 
118 |         //if (sx_2 < src_width)
119 |         p12 = src + sy_1 * src_step_size + sx_2 * 3;
120 |         //}
121 | 
122 |         /*if (sy_2 < src_height) {
123 |             if (sx_1 >= 0)*/
124 |         p21 = src + sy_2 * src_step_size + sx_1 * 3;
125 | 
126 |         /*if (sx_2 < src_width)*/
127 |         p22 = src + sy_2 * src_step_size + sx_2 * 3;
128 |         //}
129 | 
130 |         c0 = w11 * p11[0] + w12 * p12[0] + w21 * p21[0] + w22 * p22[0] + 0.5f;
131 |         c1 = w11 * p11[1] + w12 * p12[1] + w21 * p21[1] + w22 * p22[1] + 0.5f;
132 |         c2 = w11 * p11[2] + w12 * p12[2] + w21 * p21[2] + w22 * p22[2] + 0.5f;
133 |     }
134 | 
135 |     // normalization
136 |     c0 /= 255.0f;
137 |     c1 /= 255.0f;
138 |     c2 /= 255.0f;
139 | 
140 |     // bgrbgrbgr to rrrgggbbb
141 |     int area = dst_width * dst_height;
142 |     float* pdst_c0 = dst + dy * dst_width + dx;
143 |     dst[dy * dst_width + dx] = c2;
144 |     dst[dy * dst_width + dx + area] = c1;
145 |     dst[dy * dst_width + dx + 2 * area] = c0;
146 | }
147 | 
148 | 
149 | void cuda_preprocess(cv::Mat& image, preproc_struct& image_trans, std::vector<void*>& bufferH,
150 |     std::vector<void*>& bufferD, std::vector<int>& bindingsize, cudaStream_t& stream, cv::Size resize)
151 | {
152 |     int h, w, h_p, w_p;
153 | 
154 |     float scale = cv::min((float)resize.height / (float)image.rows, (float)resize.width / (float)image.cols);
155 |     scale = cv::min(scale, 1.1f);
156 | 
157 |     h = image.rows * scale;
158 |     w = image.cols * scale;
159 |     h_p = (resize.height - h) * 0.5f;
160 |     w_p = (resize.width - w) * 0.5f;
161 | 
162 |     image_trans.scale = scale;
163 |     image_trans.h_p = h_p;
164 |     image_trans.w_p = w_p;
165 | 
166 |     // copy data to device memory
167 |     memcpy(bufferH[2], image.data, bindingsize[2]);
168 |     cudaMemcpyAsync(bufferD[2], bufferH[2], bindingsize[2], cudaMemcpyHostToDevice, stream);
169 | 
170 |     // AffineMatrix s2d;
171 |     /*s2d.value[0] = scale;
172 |     s2d.value[1] = 0;
173 |     s2d.value[2] = (resize.width - scale * image.cols + scale - 1) * 0.5f;
174 |     s2d.value[3] = 0;
175 |     s2d.value[4] = scale;
176 |     s2d.value[5] = (resize.height - scale * image.rows + scale - 1) * 0.5f;*/
177 | 
178 |     /*cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
179 |     cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
180 |     cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
181 |     memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));*/
182 | 
183 |     AffineMatrix d2s;
184 | 
185 |     d2s.value[0] = 1.0f / scale;
186 |     d2s.value[1] = 0;
187 |     d2s.value[2] = (image.cols - resize.width / scale + d2s.value[0] - 1) * 0.5f;
188 |     d2s.value[3] = 0;
189 |     d2s.value[4] = 1.0f / scale;
190 |     d2s.value[5] = (image.rows - resize.height / scale + d2s.value[0] - 1) * 0.5f;
191 | 
192 |     dim3 block(128, 1);
193 |     dim3 grid((resize.width + block.x - 1) / block.x, (resize.height + block.y - 1) / block.y);
194 | 
195 |     warpaffine_nearest_bgrbgr2rrggbb_kernel <<< grid, block, 0, stream >>> (
196 |         (uint8_t*)bufferD[2], image.cols * 3, image.cols,
197 |         image.rows, (float*)bufferD[0], resize.width,
198 |         resize.height, 0, d2s, h_p, w_p);
199 | }


--------------------------------------------------------------------------------
/cpp/video_detect/preprocess.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "utils_detection.h"
 4 | 
 5 | #include <cuda_runtime.h>
 6 | #include <opencv2/opencv.hpp>
 7 | 
 8 | struct AffineMatrix
 9 | {
10 |     float value[6];
11 | };
12 | 
13 | void cuda_preprocess(cv::Mat& image, preproc_struct& image_trans, std::vector<void*>& bufferH,
14 |     std::vector<void*>& bufferD, std::vector<int>& bindingsize, cudaStream_t& stream, cv::Size resize);


--------------------------------------------------------------------------------
/cpp/video_detect/trt_infer.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/video_detect/trt_infer.cpp


--------------------------------------------------------------------------------
/cpp/video_detect/trt_infer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/core/core.hpp>
 3 | 
 4 | #include <cuda_runtime_api.h>
 5 | #include <cuda_fp16.h>
 6 | #include <NvInfer.h>
 7 | #include <cassert>
 8 | #include <vector>
 9 | 
10 | 
11 | 
12 | class Logger : public nvinfer1::ILogger
13 | {
14 | public:
15 |     Severity reportableSeverity;
16 | 
17 |     Logger(Severity severity = Severity::kINFO);
18 |     void log(Severity severity, const char* msg) noexcept override;
19 | };
20 | 
21 | 
22 | bool load_engine(nvinfer1::IRuntime*& runtime, nvinfer1::ICudaEngine*& engine, const std::string& engine_dir,
23 |     nvinfer1::ILogger& gLogger);
24 | 
25 | void allocate_buffers(nvinfer1::ICudaEngine*& engine, 
26 |     std::vector<void*>& bufferH, std::vector<void*>& bufferD, std::vector<int>& bindingsize, cv::Size img_size);
27 | 
28 | float* do_inference(nvinfer1::IExecutionContext*& context, std::vector<void*>& bufferH, const std::vector<void*>& bufferD,
29 |     cudaStream_t& stream, const std::vector<int>& BindingSize);
30 | 
31 | 
32 | class yolo_trt_det
33 | {
34 | private:
35 | 
36 |     nvinfer1::IRuntime* _runtime = nullptr;
37 |     nvinfer1::ICudaEngine* _engine = nullptr;
38 |     nvinfer1::IExecutionContext* _context = nullptr;
39 | 
40 |     std::unordered_map<int, std::string> catid_labels;
41 |     color_dicts catid_colors;
42 |     cv::Size set_size;
43 |     bool v8_head;
44 | 
45 |     std::vector<void*> cpu_buffer;
46 |     std::vector<void*> gpu_buffer;
47 |     std::vector<int> BindingSize;
48 |     cudaStream_t stream;
49 | 
50 | public:
51 |     yolo_trt_det(const std::string& engine_dir, const std::string& labels_dir, cv::Size img_size);
52 |     ~yolo_trt_det();
53 | 
54 |     //std::vector<cv::Mat> draw_batch(std::vector<cv::Mat>& image_list, float conf, float iou, int max_det);
55 | 
56 |     cv::Mat draw(cv::Mat& image, float conf, float iou, int max_det);
57 | };


--------------------------------------------------------------------------------
/cpp/video_detect/utils_detection.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/cpp/video_detect/utils_detection.cpp


--------------------------------------------------------------------------------
/cpp/video_detect/utils_detection.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/core/core.hpp>
 3 | 
 4 | #include <unordered_map>
 5 | #include <vector>
 6 | #include <string>
 7 | #include <array>
 8 | 
 9 | 
10 | 
11 | struct color_dicts
12 | {
13 |     std::unordered_map<int, std::array<size_t, 3>> color_map;
14 |     std::vector<int> catid;
15 | 
16 |     color_dicts(const std::unordered_map<int, std::string>& catid_labels);
17 | };
18 | 
19 | 
20 | struct preproc_struct
21 | {
22 |     float* img = nullptr;
23 |     float scale;
24 |     int h_p;
25 |     int w_p;
26 | 
27 |     ~preproc_struct();
28 | };
29 | 
30 | 
31 | 
32 | std::unordered_map<int, std::string> yaml_load_labels(const std::string& dir = "data.yaml");
33 | 
34 | void preprocess(cv::Mat& image, preproc_struct& image_trans, cv::Size resize);
35 | 
36 | void fliter_boxes(float* const boxes, bool v8_head, const std::array<int, 4>& output_shape, float conf_thres,
37 |     std::vector<cv::Rect>& keep_boxes, std::vector<float>& keep_scores, std::vector<int>& keep_classes);
38 | 
39 | void scale_boxes(cv::Rect& box, const preproc_struct& preproc_res);
40 | 
41 | void draw_boxes(cv::Mat image, const cv::Rect& box, float score, int class_id,
42 |     std::unordered_map<int, std::string> catid_labels, color_dicts& color_dicts);
43 | 
44 | void imgresize(const cv::Mat& image, cv::Mat& input_image, float scale, cv::Size resize);
45 | 
46 | template <typename T = int>
47 | static bool SortScorePairDescend(const std::pair<float, T>& pair1, const std::pair<float, T>& pair2);
48 | 
49 | template <typename T>
50 | void max_score_idx(const std::vector<float>& scores, float score_thres, T scores_idxs);
51 | 
52 | float get_iou(const cv::Rect& bbox1, const cv::Rect& bbox2);
53 | 
54 | void base_nms(const std::vector<cv::Rect>& bboxes, const std::vector<float>& scores, const std::vector<int>& catid, float score_threshold, float nms_threshold, std::vector<int>& indices, int limit);
55 | 


--------------------------------------------------------------------------------
/doc/yolov5s_det.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MadaoFY/yolov5_TensorRT_inference/4cc1ec7316c63b101da3c842c1f98dc82c90e70c/doc/yolov5s_det.png


--------------------------------------------------------------------------------
/labels_coco.yaml:
--------------------------------------------------------------------------------
 1 | 0: person
 2 | 1: bicycle
 3 | 2: car
 4 | 3: motorcycle
 5 | 4: airplane
 6 | 5: bus
 7 | 6: train
 8 | 7: truck
 9 | 8: boat
10 | 9: traffic light
11 | 10: fire hydrant
12 | 11: stop sign
13 | 12: parking meter
14 | 13: bench
15 | 14: bird
16 | 15: cat
17 | 16: dog
18 | 17: horse
19 | 18: sheep
20 | 19: cow
21 | 20: elephant
22 | 21: bear
23 | 22: zebra
24 | 23: giraffe
25 | 24: backpack
26 | 25: umbrella
27 | 26: handbag
28 | 27: tie
29 | 28: suitcase
30 | 29: frisbee
31 | 30: skis
32 | 31: snowboard
33 | 32: sports ball
34 | 33: kite
35 | 34: baseball bat
36 | 35: baseball glove
37 | 36: skateboard
38 | 37: surfboard
39 | 38: tennis racket
40 | 39: bottle
41 | 40: wine glass
42 | 41: cup
43 | 42: fork
44 | 43: knife
45 | 44: spoon
46 | 45: bowl
47 | 46: banana
48 | 47: apple
49 | 48: sandwich
50 | 49: orange
51 | 50: broccoli
52 | 51: carrot
53 | 52: hot dog
54 | 53: pizza
55 | 54: donut
56 | 55: cake
57 | 56: chair
58 | 57: couch
59 | 58: potted plant
60 | 59: bed
61 | 60: dining table
62 | 61: toilet
63 | 62: tv
64 | 63: laptop
65 | 64: mouse
66 | 65: remote
67 | 66: keyboard
68 | 67: cell phone
69 | 68: microwave
70 | 69: oven
71 | 70: toaster
72 | 71: sink
73 | 72: refrigerator
74 | 73: book
75 | 74: clock
76 | 75: vase
77 | 76: scissors
78 | 77: teddy bear
79 | 78: hair drier
80 | 79: toothbrush
81 | 


--------------------------------------------------------------------------------
/labels_voc.yaml:
--------------------------------------------------------------------------------
 1 | 0: bus
 2 | 1: train
 3 | 2: cow
 4 | 3: diningtable
 5 | 4: motorbike
 6 | 5: horse
 7 | 6: sofa
 8 | 7: bicycle
 9 | 8: tvmonitor
10 | 9: aeroplane
11 | 10: boat
12 | 11: sheep
13 | 12: pottedplant
14 | 13: bird
15 | 14: cat
16 | 15: bottle
17 | 16: dog
18 | 17: car
19 | 18: chair
20 | 19: person
21 | 


--------------------------------------------------------------------------------
/models_onnx/README.md:
--------------------------------------------------------------------------------
1 | 用于存放待量化的onnx
2 | 


--------------------------------------------------------------------------------
/models_trt/README.md:
--------------------------------------------------------------------------------
1 | 用于存放导出后的engine
2 | 


--------------------------------------------------------------------------------
/onnx2trt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import numpy as np
  4 | import tensorrt as trt
  5 | 
  6 | 
  7 | from utils import calibrator
  8 | 
  9 | __all__ = [
 10 |     'build_engine',
 11 |     'onnx2trt'
 12 | ]
 13 | 
 14 | 
 15 | def AddEfficientNMSPlugin(conf_thres=0.25, iou_thres=0.45, max_det=200, box_coding=1):
 16 |     """
 17 |     添加efficientNMS
 18 | 
 19 |     score_threshold: score_thresh
 20 |     iou_threshold: iou_thresh
 21 |     max_output_boxes: detections_per_img
 22 |     box_coding: 0->[x1, y1, x2, y2], 1->[x, y, w, h]
 23 |     """
 24 |     for c in trt.get_plugin_registry().plugin_creator_list:
 25 |         if c.name == "EfficientNMS_TRT":
 26 |             print(f'Succeeded finding {c.name}')
 27 |             parameter = [
 28 |                 trt.PluginField("score_threshold", np.float32(conf_thres), trt.PluginFieldType.FLOAT32),
 29 |                 trt.PluginField("iou_threshold", np.float32(iou_thres), trt.PluginFieldType.FLOAT32),
 30 |                 trt.PluginField("max_output_boxes", np.int32(max_det), trt.PluginFieldType.INT32),
 31 |                 trt.PluginField("background_class", np.int32(-1), trt.PluginFieldType.INT32),  # background_class: -1, no background class
 32 |                 trt.PluginField("score_activation", np.int32(0), trt.PluginFieldType.INT32),  # score_activation: 0->False, 1->True
 33 |                 trt.PluginField("box_coding", np.int32(box_coding), trt.PluginFieldType.INT32)
 34 |             ]
 35 |             return c.create_plugin(c.name, trt.PluginFieldCollection(parameter))
 36 |     return None
 37 | 
 38 | 
 39 | def build_engine(
 40 |         onnx_file, model_engine, min_shape, opt_shape, max_shape,
 41 |         fp16=False, int8=False, imgs_dir=None, imgs_list=None, n_iteration=128, cache_file=None,
 42 |         v8_head=False, add_nms=False, conf_thres=0.25, iou_thres=0.45, max_det=200, box_coding=1
 43 | ):
 44 |     logger = trt.Logger(trt.Logger.ERROR)
 45 |     trt.init_libnvinfer_plugins(logger, namespace="")
 46 |     builder = trt.Builder(logger)
 47 |     network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
 48 |     config = builder.create_builder_config()
 49 |     config.max_workspace_size = (4 << 30)
 50 |     # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 4 << 30)
 51 | 
 52 |     # Parse model file
 53 |     parser = trt.OnnxParser(network, logger)
 54 |     if not os.path.exists(onnx_file):
 55 |         print("ONNX file is not exists!")
 56 |         exit()
 57 |     print("Succeeded finding .onnx file!")
 58 |     with open(onnx_file, "rb") as model:
 59 |         if not parser.parse(model.read()):
 60 |             print("Failed parsing .onnx file!")
 61 |             for error in range(parser.num_errors):
 62 |                 print(parser.get_error(error))
 63 |             exit()
 64 |         else:
 65 |             print("Succeeded parsing .onnx file!")
 66 | 
 67 |     if v8_head:
 68 |         outputTensor = network.get_output(0)
 69 |         print(f'v8 {outputTensor.name} shape:{outputTensor.shape}')
 70 |         network.unmark_output(outputTensor)
 71 |         outputTensor = network.add_shuffle(outputTensor)
 72 |         outputTensor.first_transpose = (0, 2, 1)
 73 |         network.mark_output(outputTensor.get_output(0))
 74 | 
 75 |     # 添加nms算子
 76 |     if add_nms:
 77 |         """
 78 |         对原输出进行预处理，拆分成 目标框数据 和 类别置信度数据 两个矩阵，背景置信度要与类别置信度相乘。
 79 |         [1, 8500, 4 + 1 + 80] ——> [1, 8500, 4] + [1, 8500, 1 + 80] ——> [1, 8500, 4] + [1, 8500, 80]
 80 |         """
 81 |         outputTensor = network.get_output(0)
 82 |         print(f'{outputTensor.name} shape:{outputTensor.shape}')
 83 |         bs, num_boxes, det_res = outputTensor.shape
 84 |         network.unmark_output(outputTensor)
 85 |         xycwh = network.add_slice(outputTensor, (0, 0, 0), (bs, num_boxes, 4), (1, 1, 1))
 86 |         if v8_head:
 87 |             obj = network.add_slice(
 88 |                 outputTensor, (0, 0, 4), (bs, num_boxes, det_res - 4), (1, 1, 1)
 89 |             )
 90 |         else:
 91 |             scores = network.add_slice(outputTensor, (0, 0, 4), (bs, num_boxes, 1), (1, 1, 1))
 92 |             obj = network.add_slice(outputTensor, (0, 0, 5), (bs, num_boxes, det_res - 5), (1, 1, 1))
 93 |             obj = network.add_elementwise(
 94 |                 scores.get_output(0), obj.get_output(0), trt.ElementWiseOperation.PROD
 95 |             )
 96 |         print('Add EfficientNMS_TRT!')
 97 |         nms = AddEfficientNMSPlugin(conf_thres, iou_thres, max_det, box_coding)
 98 |         pluginlayer = network.add_plugin_v2([xycwh.get_output(0), obj.get_output(0)], nms)
 99 |         pluginlayer.get_output(0).name = "num_dets"
100 |         pluginlayer.get_output(1).name = "det_boxes"
101 |         pluginlayer.get_output(2).name = "det_scores"
102 |         pluginlayer.get_output(3).name = "det_classes"
103 |         for i in range(4):
104 |             network.mark_output(pluginlayer.get_output(i))
105 | 
106 |     inputTensor = network.get_input(0)
107 |     print(f'{inputTensor.name} shape:{inputTensor.shape}')
108 |     batch, c, h, w =  inputTensor.shape
109 |     if batch != -1:
110 |         min_shape[0], opt_shape[0], max_shape[0] = batch, batch, batch
111 |     if c != -1:
112 |         min_shape[1], opt_shape[1], max_shape[1] = c, c, c
113 |     if h != -1:
114 |         min_shape[-2], opt_shape[-2], max_shape[-2] = h, h, h
115 |     if w != -1:
116 |         min_shape[-1], opt_shape[-1], max_shape[-1] = w, w, w
117 | 
118 |     profile = builder.create_optimization_profile()
119 |     profile.set_shape(inputTensor.name, min_shape, opt_shape, max_shape)
120 |     config.add_optimization_profile(profile)
121 | 
122 |     # Quantization
123 |     if fp16:
124 |         config.set_flag(trt.BuilderFlag.FP16)
125 |     if int8 and imgs_dir:
126 |         config.set_flag(trt.BuilderFlag.INT8)
127 |         if imgs_list is None:
128 |             imgs_list = os.listdir(imgs_dir)
129 |         config.int8_calibrator = calibrator.MyCalibrator(
130 |                 calibrationpath=imgs_dir,
131 |                 imgslist=imgs_list,
132 |                 nCalibration=n_iteration,
133 |                 inputShape=max_shape,
134 |                 cacheFile=cache_file
135 |             )
136 | 
137 | 
138 |     print('Now, engine is building!')
139 |     plan = builder.build_serialized_network(network, config)
140 |     if plan is None:
141 |         print("Failed building engine!")
142 |         # exit()
143 |     with open(model_engine, "wb") as f:
144 |         f.write(plan)
145 |         print('Engine has been built!!!')
146 | 
147 |     runtime = trt.Runtime(logger)
148 |     return runtime.deserialize_cuda_engine(plan)
149 | 
150 | 
151 | class onnx2trt:
152 |     """
153 |     Parses an ONNX graph and builds a TensorRT engine from it.
154 |     """
155 |     def __init__(self, verbose=False):
156 | 
157 |         self.logger = trt.Logger(trt.Logger.ERROR)
158 |         if verbose:
159 |             self.logger = trt.Logger(trt.Logger.INFO)
160 |             self.logger.min_severity = trt.Logger.Severity.VERBOSE
161 | 
162 |         trt.init_libnvinfer_plugins(self.logger, namespace="")
163 | 
164 |         self.builder = trt.Builder(self.logger)
165 |         self.config = self.builder.create_builder_config()
166 |         self.config.max_workspace_size = (4 << 30)
167 |         # self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 4 << 30)
168 | 
169 |         self.network = None
170 |         self.profile = None
171 |         self.parser = None
172 | 
173 |         self.FP16 = False
174 |         self.INT8 = False
175 | 
176 |     def create_network(
177 |             self, onnx_dir, v8_head=False, add_nms=False, conf_thres=0.25, iou_thres=0.45, max_det=200, box_coding=1
178 |     ):
179 | 
180 |         self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
181 |         # Parse model file
182 |         self.parser = trt.OnnxParser(self.network, self.logger)
183 |         if not os.path.exists(onnx_dir):
184 |             print("ONNX file is not exists!")
185 |             exit()
186 |         print("Succeeded finding .onnx file!")
187 |         with open(onnx_dir, "rb") as model:
188 |             if not self.parser.parse(model.read()):
189 |                 print("Failed parsing .onnx file!")
190 |                 for error in range(self.parser.num_errors):
191 |                     print(self.parser.get_error(error))
192 |                 exit()
193 |             else:
194 |                 print("Succeeded parsing .onnx file!")
195 | 
196 |         if v8_head:
197 |             outputTensor = self.network.get_output(0)
198 |             print(f'v8 {outputTensor.name} shape:{outputTensor.shape}')
199 |             self.network.unmark_output(outputTensor)
200 |             outputTensor = self.network.add_shuffle(outputTensor)
201 |             # (bs, det_res, num_boxes ) -> (bs, num_boxes, det_res)
202 |             outputTensor.first_transpose = (0, 2, 1)
203 |             self.network.mark_output(outputTensor.get_output(0))
204 | 
205 |         # 添加nms算子
206 |         if add_nms:
207 |             """
208 |             对原输出进行预处理，拆分成 目标框数据 和 类别置信度数据 两个矩阵，背景置信度要与类别置信度相乘。
209 |             [1, 8500, 4 + 1 + 80] ——> [1, 8500, 4] + [1, 8500, 1 + 80] ——> [1, 8500, 4] + [1, 8500, 80]
210 |             """
211 |             outputTensor = self.network.get_output(0)
212 |             print(f'{outputTensor.name} shape:{outputTensor.shape}')
213 |             bs, num_boxes, det_res = outputTensor.shape
214 |             self.network.unmark_output(outputTensor)
215 |             xycwh = self.network.add_slice(outputTensor, (0, 0, 0), (bs, num_boxes, 4), (1, 1, 1))
216 |             if v8_head:
217 |                 obj = self.network.add_slice(
218 |                     outputTensor, (0, 0, 4), (bs, num_boxes, det_res - 4), (1, 1, 1)
219 |                 )
220 |             else:
221 |                 scores = self.network.add_slice(outputTensor, (0, 0, 4), (bs, num_boxes, 1), (1, 1, 1))
222 |                 obj = self.network.add_slice(outputTensor, (0, 0, 5), (bs, num_boxes, det_res - 5), (1, 1, 1))
223 |                 obj = self.network.add_elementwise(
224 |                     scores.get_output(0), obj.get_output(0), trt.ElementWiseOperation.PROD
225 |                 )
226 |             print('Add EfficientNMS_TRT!')
227 |             nms = AddEfficientNMSPlugin(conf_thres, iou_thres, max_det, box_coding)
228 |             pluginlayer = self.network.add_plugin_v2([xycwh.get_output(0), obj.get_output(0)], nms)
229 |             pluginlayer.get_output(0).name = "num_dets"
230 |             pluginlayer.get_output(1).name = "det_boxes"
231 |             pluginlayer.get_output(2).name = "det_scores"
232 |             pluginlayer.get_output(3).name = "det_classes"
233 |             for i in range(4):
234 |                 self.network.mark_output(pluginlayer.get_output(i))
235 | 
236 | 
237 |     def create_engine(self, engine_dir, min_shape, opt_shape, max_shape, fp16=False, int8=False,
238 |                       imgs_dir=None, n_iteration=128, cache_file=None):
239 | 
240 |         self.FP16 = fp16
241 |         self.INT8 = int8
242 | 
243 |         inputTensor = self.network.get_input(0)
244 |         print(f'{inputTensor.name} shape:{inputTensor.shape}')
245 |         batch, c, h, w = inputTensor.shape
246 |         if batch != -1:
247 |             min_shape[0], opt_shape[0], max_shape[0] = batch, batch, batch
248 |         if c != -1:
249 |             min_shape[1], opt_shape[1], max_shape[1] = c, c, c
250 |         if h != -1:
251 |             min_shape[-2], opt_shape[-2], max_shape[-2] = h, h, h
252 |         if w != -1:
253 |             min_shape[-1], opt_shape[-1], max_shape[-1] = w, w, w
254 | 
255 |         self.profile = self.builder.create_optimization_profile()
256 |         self.profile.set_shape(inputTensor.name, min_shape, opt_shape, max_shape)
257 |         self.config.add_optimization_profile(self.profile)
258 | 
259 |         # Quantization
260 |         if self.FP16:
261 |             self.config.set_flag(trt.BuilderFlag.FP16)
262 |         if self.INT8:
263 |             assert imgs_dir ,'If you choice int8, you should also set imgs_dir for the calibration'
264 |             self.config.set_flag(trt.BuilderFlag.INT8)
265 |             imgs_list = os.listdir(imgs_dir)
266 |             calib = calibrator.MyCalibrator(
267 |                 calibrationpath=imgs_dir,
268 |                 imgslist=imgs_list,
269 |                 nCalibration=n_iteration,
270 |                 inputShape=max_shape,
271 |                 cacheFile=cache_file
272 |             )
273 |             self.config.int8_calibrator = calib
274 | 
275 |         print('Now, engine is building...')
276 |         t1 = time.time()
277 |         plan = self.builder.build_serialized_network(self.network, self.config)
278 |         t2 = time.time()
279 |         print(f'{(t2 - t1)/60:0.2f}min')
280 |         if plan is None:
281 |             print("Failed building engine!")
282 |             # exit()
283 |         with open(engine_dir, "wb") as f:
284 |             f.write(plan)
285 |             print('Engine has been built!!!')
286 | 
287 |         runtime = trt.Runtime(self.logger)
288 |         return runtime.deserialize_cuda_engine(plan)
289 | 
290 | 
291 | def main(args):
292 | 
293 |     onnx_dir = args.onnx_dir
294 |     engine_dir = args.engine_dir
295 |     if engine_dir is None:
296 |         engine_dir = f"./models_trt/{onnx_dir.split('/')[-1].replace('onnx', 'engine')}"
297 | 
298 |     yolo_engine = onnx2trt()
299 |     yolo_engine.create_network(
300 |         onnx_dir,
301 |         v8_head=args.yolov8_head,
302 |         add_nms=args.add_nms,
303 |         conf_thres=args.conf_thres,
304 |         iou_thres=args.iou_thres,
305 |         max_det=args.max_det
306 |     )
307 | 
308 |     yolo_engine.create_engine(
309 |         engine_dir,
310 |         min_shape=args.min_shape,
311 |         opt_shape=args.opt_shape,
312 |         max_shape=args.max_shape,
313 |         fp16=args.fp16,
314 |         int8=args.int8,
315 |         imgs_dir=args.imgs_dir,
316 |         n_iteration=args.n_iteration,
317 |         cache_file=args.cache_file
318 |     )
319 | 
320 | 
321 | if __name__ == '__main__':
322 |     import argparse
323 | 
324 |     parser = argparse.ArgumentParser(description=__doc__)
325 |     # onnx模型
326 |     parser.add_argument('--onnx_dir', type=str, default='./models_onnx/yolov5s.onnx', help='onnx path')
327 |     # engine模型保存地址
328 |     parser.add_argument('--engine_dir', type=str, default=None, help='engine path')
329 |     # 最小的输入shape
330 |     parser.add_argument('--min_shape', nargs='+', type=int, default=[1, 3, 512, 512],
331 |                         help='input min shape [batch, channel, height, width]')
332 |     # 最佳优化的输入shape
333 |     parser.add_argument('--opt_shape', nargs='+', type=int, default=[1, 3, 512, 512],
334 |                         help='input opt shape [batch, channel, height, width]')
335 |     # 最大的输入shape
336 |     parser.add_argument('--max_shape', nargs='+', type=int, default=[1, 3, 512, 512],
337 |                         help='input max shape [batch, channel, height, width]')
338 |     # 是否使用fp16量化
339 |     parser.add_argument('--fp16', type=bool, default=True, choices=[True, False],
340 |                         help='TensorRt FP16 half-precision export')
341 |     # 是否使用int8量化
342 |     parser.add_argument('--int8', type=bool, default=False, choices=[True, False],
343 |                         help='TensorRt INT8 quantization')
344 |     # int8量化校准集位置
345 |     parser.add_argument('--imgs_dir', default='./calibration', help='Dataset for int8 calibration')
346 |     # 校准的轮次
347 |     parser.add_argument('--n_iteration', type=int, default=512, help='Iteration for int8 calibration')
348 |     # cache保存位置
349 |     parser.add_argument('--cache_file', default=None, help='Int8 cache path')
350 |     # 是否为yolov8的检测头
351 |     parser.add_argument('--yolov8_head', type=bool, default=True, choices=[True, False], help='yolov8_head or not')
352 |     # 是否添加nms
353 |     parser.add_argument('--add_nms', type=bool, default=False, choices=[True, False], help='add efficientNMS')
354 |     # 只有得分大于置信度的预测框会被保留下来
355 |     parser.add_argument('--conf_thres', type=float, default=0.25, help='confidence threshold')
356 |     # 非极大抑制所用到的nms_iou大小
357 |     parser.add_argument('--iou_thres', type=float, default=0.45, help='NMS IoU threshold')
358 |     # 目标框数量限制
359 |     parser.add_argument('--max_det', type=int, default=200, help='maximum detections per image')
360 | 
361 |     args = parser.parse_args()
362 |     print(args)
363 | 
364 |     main(args)
365 | 
366 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | PyYAML
 2 | numpy>=1.21.0
 3 | opencv-python>=4.1.1
 4 | onnx>=1.10.2
 5 | torch>=1.10.2+cu113
 6 | torchvision>=0.11.3
 7 | 
 8 | 
 9 | pycuda<2021.1   # old CUDA python API (not recommended), replaced by cuda-python
10 | nvidia-pyindex
11 | tensorrt == 8.4.3.1   # https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#downloading
12 | cuda-python
13 | 


--------------------------------------------------------------------------------
/utils/calibrator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2 as cv
  3 | import numpy as np
  4 | import pycuda.autoinit
  5 | import pycuda.driver as cuda
  6 | from cuda import cudart
  7 | import tensorrt as trt
  8 | 
  9 | if cudart:
 10 |     cudart.cudaDeviceSynchronize()
 11 | 
 12 | __all__ = [
 13 |     'MyCalibrator',
 14 |     'MyCalibrator_v2'
 15 | ]
 16 | 
 17 | def trans(img, size):
 18 |     crop_shape = min(img.shape[:2])
 19 |     img = img[:crop_shape - 1, :crop_shape - 1, :]
 20 |     img = cv.resize(img, size)
 21 |     img /= 255.0
 22 |     return img
 23 | 
 24 | 
 25 | class MyCalibrator(trt.IInt8EntropyCalibrator2):
 26 |     """pycuda"""
 27 |     def __init__(self, calibrationpath, imgslist, nCalibration, inputShape, cacheFile):
 28 |         trt.IInt8EntropyCalibrator2.__init__(self)
 29 |         self.calibrationpath = calibrationpath
 30 |         self.imgslist = imgslist
 31 |         self.nCalibration = nCalibration
 32 |         self.shape = inputShape  # (N,C,H,W)
 33 |         self.buffeSize = trt.volume(inputShape) * trt.float32.itemsize
 34 |         self.cacheFile = cacheFile
 35 |         self.dIn = cuda.mem_alloc(self.buffeSize)
 36 |         self.oneBatch = self.batchGenerator()
 37 | 
 38 |         print(int(self.dIn))
 39 | 
 40 |     # def __del__(self):
 41 |     #     cudart.cudaFree(self.dIn)
 42 | 
 43 |     def batchGenerator(self):
 44 |         for i in range(self.nCalibration):
 45 |             print("> calibration %d" % i)
 46 |             subImageList = np.random.choice(self.imgslist, self.shape[0], replace=False)
 47 |             # self.imgslist = list(set(self.imgslist) - set(subImageList))
 48 |             yield np.ascontiguousarray(self.loadImages(subImageList))
 49 | 
 50 |     def loadImages(self, imageList):
 51 |         res = np.empty(self.shape, dtype=np.float32)
 52 |         for i in range(self.shape[0]):
 53 |             path = os.path.join(self.calibrationpath, imageList[i])
 54 |             img = cv.imread(path)
 55 |             img = cv.cvtColor(img, cv.COLOR_BGR2RGB).astype(np.float32)
 56 |             img = trans(img, self.shape[-2:]).transpose((2, 0, 1))
 57 |             res[i] = img
 58 |         return res
 59 | 
 60 |     def get_batch_size(self):  # do NOT change name
 61 |         return self.shape[0]
 62 | 
 63 |     def get_batch(self, nameList=None, inputNodeName=None):  # do NOT change name
 64 |         try:
 65 |             data = next(self.oneBatch)
 66 |             # cudart.cudaMemcpy(self.dIn, data.ctypes.data, self.buffeSize, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
 67 |             cuda.memcpy_htod(self.dIn, data.ravel())
 68 |             return [int(self.dIn)]
 69 |         except StopIteration:
 70 |             return None
 71 | 
 72 |     def read_calibration_cache(self):  # do NOT change name
 73 |         if os.path.exists(self.cacheFile):
 74 |             print("Succeed finding cahce file: %s" % (self.cacheFile))
 75 |             with open(self.cacheFile, "rb") as f:
 76 |                 cache = f.read()
 77 |                 return cache
 78 |         else:
 79 |             print("Failed finding int8 cache!")
 80 |             return
 81 | 
 82 |     def write_calibration_cache(self, cache):  # do NOT change name
 83 |         with open(self.cacheFile, "wb") as f:
 84 |             f.write(cache)
 85 |         print("Succeed saving int8 cache!")
 86 | 
 87 | 
 88 | class MyCalibrator_v2(trt.IInt8EntropyCalibrator2):
 89 |     """cuda-python"""
 90 |     def __init__(self, calibrationpath, imgslist, nCalibration, inputShape, cacheFile):
 91 |         trt.IInt8EntropyCalibrator2.__init__(self)
 92 |         self.calibrationpath = calibrationpath
 93 |         self.imgslist = imgslist
 94 |         self.nCalibration = nCalibration
 95 |         self.shape = inputShape  # (N,C,H,W)
 96 |         self.buffeSize = trt.volume(inputShape) * trt.float32.itemsize
 97 |         self.cacheFile = cacheFile
 98 |         _, self.dIn = cudart.cudaMalloc(self.buffeSize)
 99 |         self.oneBatch = self.batchGenerator()
100 | 
101 |         print(int(self.dIn))
102 | 
103 |     def __del__(self):
104 |         cudart.cudaFree(self.dIn)
105 | 
106 |     def batchGenerator(self):
107 |         for i in range(self.nCalibration):
108 |             print("> calibration %d" % i)
109 |             subImageList = np.random.choice(self.imgslist, self.shape[0], replace=False)
110 |             # self.imgslist = list(set(self.imgslist) - set(subImageList))
111 |             yield np.ascontiguousarray(self.loadImages(subImageList))
112 | 
113 |     def loadImages(self, imageList):
114 |         res = np.empty(self.shape, dtype=np.float32)
115 |         for i in range(self.shape[0]):
116 |             path = os.path.join(self.calibrationpath, imageList[i])
117 |             img = cv.imread(path)
118 |             img = cv.cvtColor(img, cv.COLOR_BGR2RGB).astype(np.float32)
119 |             img = trans(img, self.shape[-2:]).transpose((2, 0, 1))
120 |             res[i] = img
121 |         return res
122 | 
123 |     def get_batch_size(self):  # do NOT change name
124 |         return self.shape[0]
125 | 
126 |     def get_batch(self, nameList=None, inputNodeName=None):  # do NOT change name
127 |         try:
128 |             data = next(self.oneBatch)
129 |             cudart.cudaMemcpy(self.dIn, data.ctypes.data, self.buffeSize, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
130 |             return [int(self.dIn)]
131 |         except StopIteration:
132 |             return None
133 | 
134 |     def read_calibration_cache(self):  # do NOT change name
135 |         if os.path.exists(self.cacheFile):
136 |             print("Succeed finding cahce file: %s" % (self.cacheFile))
137 |             with open(self.cacheFile, "rb") as f:
138 |                 cache = f.read()
139 |                 return cache
140 |         else:
141 |             print("Failed finding int8 cache!")
142 |             return
143 | 
144 |     def write_calibration_cache(self, cache):  # do NOT change name
145 |         with open(self.cacheFile, "wb") as f:
146 |             f.write(cache)
147 |         print("Succeed saving int8 cache!")
148 | 


--------------------------------------------------------------------------------
/utils/trt_infer.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | import argparse
 19 | import logging
 20 | import os
 21 | import sys
 22 | 
 23 | import numpy as np
 24 | import pycuda.autoinit
 25 | import pycuda.driver as cuda
 26 | import tensorrt as trt
 27 | 
 28 | try:
 29 |     # Sometimes python does not understand FileNotFoundError
 30 |     FileNotFoundError
 31 | except NameError:
 32 |     FileNotFoundError = IOError
 33 | 
 34 | EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
 35 | logging.basicConfig(level=logging.INFO)
 36 | logging.getLogger("EngineBuilder").setLevel(logging.INFO)
 37 | log = logging.getLogger("EngineBuilder")
 38 | 
 39 | def GiB(val):
 40 |     return val * 1 << 30
 41 | 
 42 | 
 43 | def add_help(description):
 44 |     parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 45 |     args, _ = parser.parse_known_args()
 46 | 
 47 | 
 48 | def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg=""):
 49 |     '''
 50 |     Parses sample arguments.
 51 | 
 52 |     Args:
 53 |         description (str): Description of the sample.
 54 |         subfolder (str): The subfolder containing data relevant to this sample
 55 |         find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
 56 | 
 57 |     Returns:
 58 |         str: Path of data directory.
 59 |     '''
 60 | 
 61 |     # Standard command-line arguments for all samples.
 62 |     kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
 63 |     parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 64 |     parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory, and any additional data directories.", action="append", default=[kDEFAULT_DATA_ROOT])
 65 |     args, _ = parser.parse_known_args()
 66 | 
 67 |     def get_data_path(data_dir):
 68 |         # If the subfolder exists, append it to the path, otherwise use the provided path as-is.
 69 |         data_path = os.path.join(data_dir, subfolder)
 70 |         if not os.path.exists(data_path):
 71 |             if data_dir != kDEFAULT_DATA_ROOT:
 72 |                 print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.")
 73 |             data_path = data_dir
 74 |         # Make sure data directory exists.
 75 |         if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT:
 76 |             print("WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format(data_path))
 77 |         return data_path
 78 | 
 79 |     data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
 80 |     return data_paths, locate_files(data_paths, find_files, err_msg)
 81 | 
 82 | def locate_files(data_paths, filenames, err_msg=""):
 83 |     """
 84 |     Locates the specified files in the specified data directories.
 85 |     If a file exists in multiple data directories, the first directory is used.
 86 | 
 87 |     Args:
 88 |         data_paths (List[str]): The data directories.
 89 |         filename (List[str]): The names of the files to find.
 90 | 
 91 |     Returns:
 92 |         List[str]: The absolute paths of the files.
 93 | 
 94 |     Raises:
 95 |         FileNotFoundError if a file could not be located.
 96 |     """
 97 |     found_files = [None] * len(filenames)
 98 |     for data_path in data_paths:
 99 |         # Find all requested files.
100 |         for index, (found, filename) in enumerate(zip(found_files, filenames)):
101 |             if not found:
102 |                 file_path = os.path.abspath(os.path.join(data_path, filename))
103 |                 if os.path.exists(file_path):
104 |                     found_files[index] = file_path
105 | 
106 |     # Check that all files were found
107 |     for f, filename in zip(found_files, filenames):
108 |         if not f or not os.path.exists(f):
109 |             raise FileNotFoundError("Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg))
110 |     return found_files
111 | 
112 | def load_engine(engine_path):
113 |     # TRT_LOGGER = trt.Logger(trt.Logger.WARNING)  # INFO
114 |     logger = trt.Logger(trt.Logger.ERROR)
115 |     trt.init_libnvinfer_plugins(logger, '')
116 |     with open(engine_path, 'rb') as f, trt.Runtime(logger) as runtime:
117 |         return runtime.deserialize_cuda_engine(f.read())
118 | 
119 | # Simple helper data class that's a little nicer to use than a 2-tuple.
120 | class HostDeviceMem(object):
121 |     def __init__(self, host_mem, device_mem):
122 |         self.host = host_mem
123 |         self.device = device_mem
124 | 
125 |     def __str__(self):
126 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
127 | 
128 |     def __repr__(self):
129 |         return self.__str__()
130 | 
131 | 
132 | # Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
133 | def allocate_buffers(engine):
134 |     inputs = []
135 |     outputs = []
136 |     bindings = []
137 |     stream = cuda.Stream()
138 |     for binding in engine:
139 |         size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
140 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
141 |         # Allocate host and device buffers
142 |         host_mem = cuda.pagelocked_empty(size, dtype)
143 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
144 |         # Append the device buffer to device bindings.
145 |         bindings.append(int(device_mem))
146 |         # Append to the appropriate list.
147 |         if engine.binding_is_input(binding):
148 |             inputs.append(HostDeviceMem(host_mem, device_mem))
149 |         else:
150 |             outputs.append(HostDeviceMem(host_mem, device_mem))
151 |     return inputs, outputs, bindings, stream
152 | 
153 | 
154 | def allocate_buffers_v2(context):
155 |     inputs = []
156 |     outputs = []
157 |     bindings = []
158 |     stream = cuda.Stream()
159 |     for idx, binding in enumerate(context.engine):
160 |         # size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
161 |         size = trt.volume(context.get_binding_shape(idx))
162 |         dtype = trt.nptype(context.engine.get_binding_dtype(idx))
163 |         # Allocate host and device buffers
164 |         host_mem = cuda.pagelocked_empty(size, dtype)
165 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
166 |         # Append the device buffer to device bindings.
167 |         bindings.append(int(device_mem))
168 |         # Append to the appropriate list.
169 |         if context.engine.binding_is_input(binding):
170 |             inputs.append(HostDeviceMem(host_mem, device_mem))
171 |         else:
172 |             outputs.append(HostDeviceMem(host_mem, device_mem))
173 |     return inputs, outputs, bindings, stream
174 | 
175 | 
176 | # This function is generalized for multiple inputs/outputs.
177 | # inputs and outputs are expected to be lists of HostDeviceMem objects.
178 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
179 |     # Transfer input data to the GPU.
180 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
181 |     # Run inference.
182 |     context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
183 |     # Transfer predictions back from the GPU.
184 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
185 |     # Synchronize the stream
186 |     stream.synchronize()
187 |     # Return only the host outputs.
188 |     return [out.host for out in outputs]
189 | 
190 | # This function is generalized for multiple inputs/outputs for full dimension networks.
191 | # inputs and outputs are expected to be lists of HostDeviceMem objects.
192 | def do_inference_v2(context, bindings, inputs, outputs, stream):
193 |     # Transfer input data to the GPU.
194 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
195 |     # Run inference.
196 |     context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
197 |     # Transfer predictions back from the GPU.
198 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
199 |     # Synchronize the stream
200 |     stream.synchronize()
201 |     # Return only the host outputs.
202 |     return [out.host for out in outputs]
203 | 
204 | 
205 | class EngineBuilder:
206 |     """
207 |     Parses an ONNX graph and builds a TensorRT engine from it.
208 |     """
209 | 
210 |     def __init__(self, verbose=False):
211 |         """
212 |         :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
213 |         """
214 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
215 |         if verbose:
216 |             self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
217 | 
218 |         trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
219 | 
220 |         self.builder = trt.Builder(self.trt_logger)
221 |         self.config = self.builder.create_builder_config()
222 |         self.config.max_workspace_size = 8 * (2 ** 30)  # 8 GB
223 | 
224 |         self.batch_size = None
225 |         self.network = None
226 |         self.parser = None
227 | 
228 |     def create_network(self, onnx_path):
229 |         """
230 |         Parse the ONNX graph and create the corresponding TensorRT network definition.
231 |         :param onnx_path: The path to the ONNX graph to load.
232 |         """
233 |         network_flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
234 | 
235 |         self.network = self.builder.create_network(network_flags)
236 |         self.parser = trt.OnnxParser(self.network, self.trt_logger)
237 | 
238 |         onnx_path = os.path.realpath(onnx_path)
239 |         with open(onnx_path, "rb") as f:
240 |             if not self.parser.parse(f.read()):
241 |                 log.error("Failed to load ONNX file: {}".format(onnx_path))
242 |                 for error in range(self.parser.num_errors):
243 |                     log.error(self.parser.get_error(error))
244 |                 sys.exit(1)
245 | 
246 |         inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
247 |         outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
248 | 
249 |         log.info("Network Description")
250 |         for input in inputs:
251 |             self.batch_size = input.shape[0]
252 |             log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype))
253 |         for output in outputs:
254 |             log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype))
255 |         assert self.batch_size > 0
256 |         self.builder.max_batch_size = self.batch_size
257 | 
258 |     def create_engine(self, engine_path, precision, calib_input=None, calib_cache=None, calib_num_images=25000,
259 |                       calib_batch_size=8, calib_preprocessor=None):
260 |         """
261 |         Build the TensorRT engine and serialize it to disk.
262 |         :param engine_path: The path where to serialize the engine to.
263 |         :param precision: The datatype to use for the engine, either 'fp32', 'fp16' or 'int8'.
264 |         :param calib_input: The path to a directory holding the calibration images.
265 |         :param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
266 |         :param calib_num_images: The maximum number of images to use for calibration.
267 |         :param calib_batch_size: The batch size to use for the calibration process.
268 |         :param calib_preprocessor: The ImageBatcher preprocessor algorithm to use.
269 |         """
270 |         engine_path = os.path.realpath(engine_path)
271 |         engine_dir = os.path.dirname(engine_path)
272 |         os.makedirs(engine_dir, exist_ok=True)
273 |         log.info("Building {} Engine in {}".format(precision, engine_path))
274 | 
275 |         inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
276 | 
277 |         if precision == "fp16":
278 |             if not self.builder.platform_has_fast_fp16:
279 |                 log.warning("FP16 is not supported natively on this platform/device")
280 |             else:
281 |                 self.config.set_flag(trt.BuilderFlag.FP16)
282 | 
283 |         with self.builder.build_engine(self.network, self.config) as engine, open(engine_path, "wb") as f:
284 |             log.info("Serializing engine to file: {:}".format(engine_path))
285 |             f.write(engine.serialize())
286 | 
287 | 
288 | class EngineBuilder_v2:
289 |     """
290 |     Parses an ONNX graph and builds a TensorRT engine from it.
291 |     """
292 | 
293 |     def __init__(self, verbose=False):
294 |         """
295 |         :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
296 |         """
297 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
298 |         if verbose:
299 |             self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
300 | 
301 |         trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
302 | 
303 |         self.builder = trt.Builder(self.trt_logger)
304 |         self.config = self.builder.create_builder_config()
305 |         self.config.max_workspace_size = 8 * (2 ** 30)  # 8 GB
306 | 
307 |         self.batch_size = None
308 |         self.network = None
309 |         self.parser = None
310 | 
311 |     def create_network(self, onnx_path, get_inputs):
312 |         """
313 |         Parse the ONNX graph and create the corresponding TensorRT network definition.
314 |         :param onnx_path: The path to the ONNX graph to load.
315 |         """
316 |         network_flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
317 | 
318 |         self.network = self.builder.create_network(network_flags)
319 |         self.parser = trt.OnnxParser(self.network, self.trt_logger)
320 | 
321 |         onnx_path = os.path.realpath(onnx_path)
322 |         with open(onnx_path, "rb") as f:
323 |             if not self.parser.parse(f.read()):
324 |                 log.error("Failed to load ONNX file: {}".format(onnx_path))
325 |                 for error in range(self.parser.num_errors):
326 |                     log.error(self.parser.get_error(error))
327 |                 sys.exit(1)
328 | 
329 |         inputs = []
330 |         for i, shape in enumerate(get_inputs):
331 |             self.network.get_input(i).shape = shape
332 |             inputs.append(shape)
333 |         # inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
334 |         outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
335 | 
336 |         log.info("Network Description")
337 |         for input in inputs:
338 |             self.batch_size = input.shape[0]
339 |             log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype))
340 |         for output in outputs:
341 |             log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype))
342 |         assert self.batch_size > 0
343 |         self.builder.max_batch_size = self.batch_size
344 | 
345 |     def create_engine(self, engine_path, precision, calib_input=None, calib_cache=None, calib_num_images=25000,
346 |                       calib_batch_size=8, calib_preprocessor=None):
347 |         """
348 |         Build the TensorRT engine and serialize it to disk.
349 |         :param engine_path: The path where to serialize the engine to.
350 |         :param precision: The datatype to use for the engine, either 'fp32', 'fp16' or 'int8'.
351 |         :param calib_input: The path to a directory holding the calibration images.
352 |         :param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
353 |         :param calib_num_images: The maximum number of images to use for calibration.
354 |         :param calib_batch_size: The batch size to use for the calibration process.
355 |         :param calib_preprocessor: The ImageBatcher preprocessor algorithm to use.
356 |         """
357 |         engine_path = os.path.realpath(engine_path)
358 |         engine_dir = os.path.dirname(engine_path)
359 |         os.makedirs(engine_dir, exist_ok=True)
360 |         log.info("Building {} Engine in {}".format(precision, engine_path))
361 | 
362 |         inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
363 | 
364 |         if precision == "fp16":
365 |             if not self.builder.platform_has_fast_fp16:
366 |                 log.warning("FP16 is not supported natively on this platform/device")
367 |             else:
368 |                 self.config.set_flag(trt.BuilderFlag.FP16)
369 | 
370 |         with self.builder.build_engine(self.network, self.config) as engine, open(engine_path, "wb") as f:
371 |             log.info("Serializing engine to file: {:}".format(engine_path))
372 |             f.write(engine.serialize())


--------------------------------------------------------------------------------
/utils/utils_detection.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import json
  3 | import torch
  4 | import cv2 as cv
  5 | import numpy as np
  6 | import torchvision
  7 | 
  8 | 
  9 | def yaml_load(file='data.yaml'):
 10 |     # Single-line safe yaml loading
 11 |     with open(file, errors='ignore') as f:
 12 |         return yaml.safe_load(f)
 13 | 
 14 | 
 15 | def json_load(file='data.json'):
 16 |     with open(file, "r") as f:
 17 |         return json.load(f)
 18 | 
 19 | 
 20 | def xyxy2xywh(x):
 21 |     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
 22 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
 23 |     y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
 24 |     y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
 25 |     y[:, 2] = x[:, 2] - x[:, 0]  # width
 26 |     y[:, 3] = x[:, 3] - x[:, 1]  # height
 27 |     return y
 28 | 
 29 | 
 30 | def xywh2xyxy(x):
 31 |     # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
 32 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
 33 |     y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
 34 |     y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
 35 |     y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
 36 |     y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
 37 |     return y
 38 | 
 39 | 
 40 | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
 41 |     # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
 42 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
 43 |     y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
 44 |     y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
 45 |     y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
 46 |     y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
 47 |     return y
 48 | 
 49 | 
 50 | def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
 51 |     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
 52 |     if clip:
 53 |         clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
 54 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
 55 |     y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w  # x center
 56 |     y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h  # y center
 57 |     y[:, 2] = (x[:, 2] - x[:, 0]) / w  # width
 58 |     y[:, 3] = (x[:, 3] - x[:, 1]) / h  # height
 59 |     return y
 60 | 
 61 | 
 62 | def xyn2xy(x, w=640, h=640, padw=0, padh=0):
 63 |     # Convert normalized segments into pixel segments, shape (n,2)
 64 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
 65 |     y[:, 0] = w * x[:, 0] + padw  # top left x
 66 |     y[:, 1] = h * x[:, 1] + padh  # top left y
 67 |     return y
 68 | 
 69 | 
 70 | def letterbox_image(image, return_padding=False):
 71 |     """
 72 |         为保持h,w的一致,对图片短边两侧进行等距离padding
 73 |     """
 74 |     h, w = image.shape[:2]
 75 | 
 76 |     if h > w:
 77 |         p = int((h - w) // 2)
 78 |         image = cv.copyMakeBorder(image, 0, 0, p, (h - w - p), cv.BORDER_CONSTANT, value=0)
 79 |     else:
 80 |         p = int((w - h) // 2)
 81 |         image = cv.copyMakeBorder(image, p, (w - h - p), 0, 0, cv.BORDER_CONSTANT, value=0)
 82 | 
 83 |     if return_padding:
 84 |         return image, p
 85 |     else:
 86 |         return image
 87 | 
 88 | def image_trans(img, size):
 89 |     scale = min((size[0] / img.shape[0]), (size[1] / img.shape[1]), 1.1)
 90 |     new_size = (int(img.shape[1] * scale), int(img.shape[0] * scale))
 91 |     # img_new = cv.resize(img, new_size, interpolation=cv.INTER_NEAREST)
 92 |     img_new = cv.resize(img, new_size, interpolation=cv.INTER_LINEAR)
 93 |     top = round((size[0] - new_size[1]) * 0.5)
 94 |     bottom = (size[0] - new_size[1]) - top
 95 |     left = round((size[1] - new_size[0]) * 0.5)
 96 |     right = (size[1] - new_size[0]) - left
 97 |     img_new = cv.copyMakeBorder(img_new, top, bottom, left, right, cv.BORDER_CONSTANT, value=0)
 98 |     img_new = img_new.transpose((2, 0, 1))[::-1]
 99 |     img_new = np.expand_dims(img_new, 0)
100 |     img_new = np.ascontiguousarray(img_new).astype(np.float32)
101 |     img_new = img_new / 255.0
102 |     return img_new
103 | 
104 | 
105 | def scale_bboxes(bboxes, img_ori_hw, img_det_hw):
106 |     assert len(img_ori_hw) == len(img_ori_hw)
107 | 
108 |     scale = max(img_ori_hw[0] / img_det_hw[0], img_ori_hw[1] / img_det_hw[1])
109 |     bboxes[:, :4] = bboxes[:, :4] * scale
110 | 
111 |     h_bias = (max(img_ori_hw) - img_ori_hw[0]) / 2.0
112 |     w_bias = (max(img_ori_hw) - img_ori_hw[1]) / 2.0
113 | 
114 |     bboxes[:, [0, 2]] -= w_bias
115 |     bboxes[:, [1, 3]] -= h_bias
116 | 
117 |     clip_boxes(bboxes, img_ori_hw)
118 | 
119 |     return bboxes
120 | 
121 | 
122 | def scale_bboxes_v2(bboxes, img_ori_hw, img_det_hw, p):
123 |     assert len(img_ori_hw) == len(img_ori_hw)
124 | 
125 |     scale = max(img_ori_hw[0] / img_det_hw[0], img_ori_hw[1] / img_det_hw[1])
126 |     bboxes[:, :4] = bboxes[:, :4] * scale
127 |     if img_ori_hw[0] > img_ori_hw[1]:
128 |         bboxes[:, [0, 2]] -= p
129 |     else:
130 |         bboxes[:, [1, 3]] -= p
131 | 
132 |     clip_boxes(bboxes, img_ori_hw)
133 | 
134 |     return bboxes
135 | 
136 | 
137 | def clip_boxes(boxes, shape):
138 |     # Clip boxes (xyxy) to image shape (height, width)
139 |     if isinstance(boxes, torch.Tensor):  # faster individually
140 |         boxes[:, 0].clamp_(0, shape[1])  # x1
141 |         boxes[:, 1].clamp_(0, shape[0])  # y1
142 |         boxes[:, 2].clamp_(0, shape[1])  # x2
143 |         boxes[:, 3].clamp_(0, shape[0])  # y2
144 |     else:  # np.array (faster grouped)
145 |         boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
146 |         boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
147 | 
148 | 
149 | 
150 | def box_area(box):
151 |     # box = xyxy(4,n)
152 |     return (box[2] - box[0]) * (box[3] - box[1])
153 | 
154 | 
155 | def box_iou(box1, box2, eps=1e-7):
156 |     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
157 |     (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
158 |     inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
159 | 
160 |     # IoU = inter / (area1 + area2 - inter)
161 |     return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
162 | 
163 | 
164 | def draw_boxes(img, boxes, scores, labels, catid_labels, textscale=1, color_dicts=None):
165 |     boxes = tuple(boxes.astype('int'))
166 |     if color_dicts is None:
167 |         color_dicts = {k:(0,0,255) for k in labels.keys}
168 | 
169 |     text_size, _ = cv.getTextSize(f'{catid_labels[labels]}:{scores:.2f}', fontFace=cv.FONT_HERSHEY_DUPLEX,
170 |                                   fontScale=textscale, thickness=1)
171 |     text_w, text_h = text_size
172 |     img0 = cv.rectangle(img, boxes[:2], boxes[2:], thickness=2, lineType=cv.LINE_AA, color=color_dicts[labels])
173 |     img0 = cv.rectangle(img0, boxes[:2], (boxes[0] + text_w + 1, boxes[1] + text_h + 2),
174 |                         thickness=-1, color=color_dicts[labels])
175 |     img0 = cv.putText(img0, f'{catid_labels[labels]}:{scores:.2f}',
176 |                       (boxes[0], boxes[1] + text_h),
177 |                       fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=textscale, thickness=1,
178 |                       lineType=cv.LINE_AA,
179 |                       color=(255, 255, 255)
180 |                       )
181 |     return img0
182 | 
183 | 
184 | def non_max_suppression(prediction,
185 |                         v8_head=False,
186 |                         conf_thres=0.25,
187 |                         iou_thres=0.45,
188 |                         agnostic=False,
189 |                         max_det=300):
190 |     bs = prediction.shape[0]  # batch size
191 |     # Settings
192 |     # min_wh = 2  # (pixels) minimum box width and height
193 |     max_wh = 7680  # (pixels) maximum box width and height
194 |     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
195 |     redundant = True  # require redundant detections
196 |     merge = False  # use merge-NMS
197 |     output = [np.zeros((0, 6), dtype=np.float32)] * bs
198 |     if not v8_head:
199 |         xc = prediction[..., 4] > conf_thres # candidates
200 |     else:
201 |         xc = prediction[..., 4:].max(2) > conf_thres  # candidates
202 |     for xi, x in enumerate(prediction):  # image index, image inference
203 |         # If none remain process next image
204 |         if not x.shape[0]:
205 |             continue
206 |         # Apply constraints
207 |         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
208 |         x = x[xc[xi]]  # confidence
209 |         if not v8_head:
210 |             # Compute conf
211 |             x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
212 |             j = x[:, 5:].argmax(axis=1, keepdims=True)
213 |             conf = x[:, 5:]
214 |         else:
215 |             j = x[:, 4:].argmax(axis=1, keepdims=True)
216 |             conf = x[:, 4:]
217 |         conf = conf[range(len(j)), j.ravel()].reshape(-1, 1)
218 |         # Detections matrix nx6 (xywh, conf, cls)
219 |         x = np.concatenate((x[:,:4], conf, j), 1)[conf.ravel() > conf_thres]
220 |         # Check shape
221 |         n = x.shape[0]  # number of boxes
222 |         if not n:  # no boxes
223 |             continue
224 |         elif n > max_nms:  # excess boxes
225 |             x = x[x[:, 4].argsort()[::-1][:max_nms]]  # sort by confidence
226 | 
227 |         # Batched NMS
228 |         # c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
229 |         # boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
230 |         # i = cv.dnn.NMSBoxes(boxes, scores, conf_thres, iou_thres)
231 |         c = x[:, 5].ravel().astype("int32")
232 |         i = cv.dnn.NMSBoxesBatched(x[:, :4], x[:, 4], c, conf_thres, iou_thres, None, 0)
233 |         if i.shape[0] > max_det:  # limit detections
234 |             i = i[:max_det]
235 |         if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
236 |             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
237 |             iou = box_iou(x[:, :4][i], x[:, :4]) > iou_thres  # iou matrix
238 |             weights = iou * x[:, 4][None]  # box weights
239 |             x[i, :4] = np.matmul(weights, x[:, :4]) / weights.sum(1, keepdim=True)  # merged boxes
240 |             if redundant:
241 |                 i = i[iou.sum(1) > 1]  # require redundancy
242 | 
243 |         output[xi] = xywh2xyxy(x[i])
244 |     return output
245 | 
246 | 
247 | def non_max_suppression_torch(prediction,
248 |                         v8_head=False,
249 |                         conf_thres=0.25,
250 |                         iou_thres=0.45,
251 |                         agnostic=False,
252 |                         max_det=300):
253 |     bs = prediction.shape[0]  # batch size
254 |     # Settings
255 |     # min_wh = 2  # (pixels) minimum box width and height
256 |     max_wh = 7680  # (pixels) maximum box width and height
257 |     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
258 |     redundant = True  # require redundant detections
259 |     merge = False  # use merge-NMS
260 |     output = [torch.zeros((0, 6), device=prediction.device)] * bs
261 |     if not v8_head:
262 |         xc = prediction[..., 4] > conf_thres # candidates
263 |     else:
264 |         xc = prediction[..., 4:].max(2) > conf_thres  # candidates
265 |     for xi, x in enumerate(prediction):  # image index, image inference
266 |         # If none remain process next image
267 |         if not x.shape[0]:
268 |             continue
269 |         # Apply constraints
270 |         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
271 |         x = x[xc[xi]]  # confidence
272 |         if not v8_head:
273 |             # Compute conf
274 |             x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
275 |             # Detections matrix nx6 (xywh, conf, cls)
276 |             conf, j = x[:, 5:].max(1, keepdim=True)
277 |         else:
278 |             # Detections matrix nx6 (xywh, conf, cls)
279 |             conf, j = x[:, 4:].max(1, keepdim=True)
280 | 
281 |         # Box (center x, center y, width, height) to (x1, y1, x2, y2)
282 |         box = xywh2xyxy(x[:, :4])
283 |         x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
284 |         # Check shape
285 |         n = x.shape[0]  # number of boxes
286 |         if not n:  # no boxes
287 |             continue
288 |         elif n > max_nms:  # excess boxes
289 |             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
290 | 
291 |         # Batched NMS
292 |         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
293 |         boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
294 |         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
295 |         if i.shape[0] > max_det:  # limit detections
296 |             i = i[:max_det]
297 |         if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
298 |             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
299 |             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
300 |             weights = iou * scores[None]  # box weights
301 |             x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
302 |             if redundant:
303 |                 i = i[iou.sum(1) > 1]  # require redundancy
304 | 
305 |         output[xi] = x[i]
306 |     return output
307 | 
308 | 
309 | def yolox_postprocess(outputs, img_size, p6=False):
310 | 
311 |     grids = []
312 |     expanded_strides = []
313 | 
314 |     if not p6:
315 |         strides = [8, 16, 32]
316 |     else:
317 |         strides = [8, 16, 32, 64]
318 | 
319 |     hsizes = [img_size[0] // stride for stride in strides]
320 |     wsizes = [img_size[1] // stride for stride in strides]
321 | 
322 |     for hsize, wsize, stride in zip(hsizes, wsizes, strides):
323 |         xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
324 |         grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
325 |         grids.append(grid)
326 |         shape = grid.shape[:2]
327 |         expanded_strides.append(np.full((*shape, 1), stride))
328 | 
329 |     grids = np.concatenate(grids, 1)
330 |     expanded_strides = np.concatenate(expanded_strides, 1)
331 |     outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
332 |     outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
333 | 
334 |     return outputs
335 | 
336 | 
337 | class Colors:
338 |     # Ultralytics color palette https://ultralytics.com/
339 |     def __init__(self, id_and_obj):
340 |         base_hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
341 |                      '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
342 |         n = len(id_and_obj) / len(base_hexs)
343 |         if n > 1:
344 |             n = int(n) + 1
345 |             base_hexs *= n
346 | 
347 |         self.obj_id = tuple(id_and_obj.keys())
348 |         self.hex = base_hexs[:len(self.obj_id)]
349 |         self.id_and_hex = {k: v for k, v in zip(self.obj_id, self.hex)}
350 | 
351 |     def get_id_and_colors(self):
352 |         id_and_colors = {k: self.hex2rgb(f'#{v}') for k, v in self.id_and_hex.items()}
353 |         return id_and_colors
354 | 
355 |     def hex2rgb(self, h):  # rgb order
356 |         return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
357 | 
358 | 
359 | 


--------------------------------------------------------------------------------
/yolo_detect_v1.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import torch
  4 | import cv2 as cv
  5 | import numpy as np
  6 | 
  7 | from utils import trt_infer
  8 | from utils.trt_infer import load_engine
  9 | from utils.utils_detection import yaml_load, image_trans, scale_bboxes, non_max_suppression, Colors, draw_boxes, \
 10 |     non_max_suppression_torch
 11 | 
 12 | 
 13 | class yolo_engine_det:
 14 |     def __init__(self, engine_dir, catid_labels):
 15 |         self.engine = load_engine(engine_dir)
 16 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 17 |         self.context = self.engine.create_execution_context()
 18 |         self.resize = self.engine.get_binding_shape(0)[2:]
 19 |         self.colors = self.get_colors_dict(catid_labels)
 20 |         self.labels = catid_labels
 21 |         self.v8_head = False
 22 |         self.nms = non_max_suppression
 23 | 
 24 |         if self.engine.get_binding_shape(1)[-1] - len(catid_labels) == 4:
 25 |             self.v8_head = True
 26 | 
 27 |         # self.context.set_binding_shape(0, [1, 3, self.resize[0], self.resize[1]])
 28 |         self.inputs = None
 29 |         self.outputs = None
 30 |         self.bindings = None
 31 |         self.stream = None
 32 | 
 33 |         self.inputs, self.outputs, self.bindings, self.stream = trt_infer.allocate_buffers_v2(self.context)
 34 | 
 35 |     @staticmethod
 36 |     def get_colors_dict(catid_labels):
 37 |         color_dicts = Colors(catid_labels)
 38 |         return color_dicts.get_id_and_colors()
 39 | 
 40 | 
 41 |     def draw(self, frame, conf=0.25, iou=0.45, max_det=200):
 42 |         x = image_trans(frame, self.resize)
 43 |         np.copyto(self.inputs[0].host, x.ravel())
 44 |         # self.inputs[0].host = x.ravel()
 45 |         t1 = time.time()
 46 |         pred = trt_infer.do_inference_v2(
 47 |             self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream
 48 |         )
 49 |         pred = pred[0].reshape(self.context.get_binding_shape(1))
 50 |         pred = self.nms(pred, v8_head=self.v8_head, conf_thres=conf, iou_thres=iou, agnostic=False, max_det=max_det)[0]
 51 |         t2 = time.time()
 52 |         fps = round((0.1 / (t2 - t1) * 10))
 53 |         times = round((t2 - t1) * 1000, 3)
 54 |         pred = scale_bboxes(pred, frame.shape[:2], self.resize)
 55 |         for i in pred:
 56 |             # pred: x1, y1, x2, y2, conf, labels
 57 |             frame = draw_boxes(frame, i[:4], i[4], i[5], self.labels, 0.7, self.colors)
 58 |         frame = cv.putText(frame, f'fps: {fps}', (10, 30), fontFace=cv.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2,
 59 |                            lineType=cv.LINE_AA, color=(255, 0, 255))
 60 |         return frame, times, pred
 61 | 
 62 | 
 63 | 
 64 | def main(args):
 65 |     times = []
 66 |     # 检测物体标签
 67 |     catid_labels = yaml_load(args.labels)
 68 |     # 视频源
 69 |     vc = cv.VideoCapture(args.video_dir)
 70 |     # 载入engine
 71 |     yolo_draw = yolo_engine_det(args.engine_dir, catid_labels)
 72 | 
 73 |     # 循环读取视频中的每一帧
 74 |     while vc.isOpened():
 75 |         ret, frame = vc.read()
 76 | 
 77 |         if ret is True:
 78 |             frame, t, _ = yolo_draw.draw(frame, conf=args.conf_thres, iou=args.iou_thres, max_det=args.max_det)
 79 |             print(f'{t}ms')
 80 |             times.append(t)
 81 |             cv.imshow('video', frame)
 82 | 
 83 |             if cv.waitKey(30) & 0xFF == 27:
 84 |                 break
 85 |         else:
 86 |             break
 87 |     print(np.mean(times))
 88 |     vc.release()
 89 |     cv.destroyAllWindows()
 90 | 
 91 | 
 92 | if __name__ == "__main__":
 93 |     import argparse
 94 | 
 95 |     parser = argparse.ArgumentParser(description=__doc__)
 96 |     # 目标类别标签
 97 |     parser.add_argument('--labels', type=str, default='./labels_coco.yaml', help='obj labels')
 98 |     # video地址
 99 |     parser.add_argument('--video_dir', type=str, default='sample_1080p_h265.mp4',
100 |                         help='video path')
101 |     # engine模型地址
102 |     parser.add_argument('--engine_dir', type=str, default='./models_trt/yolov5s.engine',
103 |                         help='engine path')
104 |     # 只有得分大于置信度的预测框会被保留下来
105 |     parser.add_argument('--conf_thres', type=float, default=0.25, help='confidence threshold')
106 |     # 非极大抑制所用到的nms_iou大小
107 |     parser.add_argument('--iou_thres', type=float, default=0.45, help='NMS IoU threshold')
108 |     # 目标框数量限制
109 |     parser.add_argument('--max_det', type=int, default=200, help='maximum detections per image')
110 | 
111 |     args = parser.parse_args()
112 |     print(args)
113 | 
114 |     main(args)
115 | 
116 | 


--------------------------------------------------------------------------------
/yolo_detect_v2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import torch
  4 | import cv2 as cv
  5 | import numpy as np
  6 | 
  7 | from utils import trt_infer
  8 | from utils.trt_infer import load_engine
  9 | from utils.utils_detection import yaml_load, image_trans, scale_bboxes, non_max_suppression, Colors, draw_boxes
 10 | 
 11 | 
 12 | class yolo_engine_det:
 13 |     def __init__(self, engine_dir, catid_labels):
 14 |         self.engine = load_engine(engine_dir)
 15 |         self.context = self.engine.create_execution_context()
 16 |         self.resize = self.engine.get_binding_shape(0)[2:]
 17 |         self.colors = self.get_colors_dict(catid_labels)
 18 |         self.labels = catid_labels
 19 | 
 20 |         # self.context.set_binding_shape(0, [1, 3, self.resize[0], self.resize[1]])
 21 |         self.inputs = None
 22 |         self.outputs = None
 23 |         self.bindings = None
 24 |         self.stream = None
 25 | 
 26 |         self.inputs, self.outputs, self.bindings, self.stream = trt_infer.allocate_buffers_v2(self.context)
 27 | 
 28 |     @staticmethod
 29 |     def get_colors_dict(catid_labels):
 30 |         color_dicts = Colors(catid_labels)
 31 |         return color_dicts.get_id_and_colors()
 32 | 
 33 | 
 34 |     def draw(self, frame):
 35 |         x = image_trans(frame, self.resize)
 36 |         np.copyto(self.inputs[0].host, x.ravel())
 37 |         t1 = time.time()
 38 |         pred = trt_infer.do_inference_v2(
 39 |             self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream
 40 |         )
 41 |         t2 = time.time()
 42 |         fps = int(1.0 / (t2 - t1))
 43 |         times = round((t2 - t1) * 1000, 3)
 44 |         num_det, boxes, conf, labels = pred
 45 |         num_det = num_det[0]
 46 |         if num_det > 0:
 47 |             # conf = conf[:num_det]
 48 |             # labels = labels[:num_det]
 49 |             boxes = boxes[:num_det * 4].reshape(-1, 4)
 50 |             boxes = scale_bboxes(boxes, frame.shape[:2], self.resize)
 51 |             for i in range(num_det):
 52 |                 frame = draw_boxes(frame, boxes[i], conf[i], labels[i], self.labels, 0.7, self.colors)
 53 |         frame = cv.putText(frame, f'fps: {fps}', (10, 30), fontFace=cv.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2,
 54 |                            lineType=cv.LINE_AA, color=(255, 0, 255))
 55 |         return frame, times
 56 | 
 57 | 
 58 | def main(args):
 59 |     times = []
 60 |     # 检测物体标签
 61 |     catid_labels = yaml_load(args.labels)
 62 |     # 视频源
 63 |     vc = cv.VideoCapture(args.video_dir)
 64 |     # 载入engine
 65 |     yolo_draw = yolo_engine_det(
 66 |         args.engine_dir, catid_labels
 67 |     )
 68 | 
 69 |     # 循环读取视频中的每一帧
 70 |     while vc.isOpened():
 71 |         ret, frame = vc.read()
 72 | 
 73 |         if ret is True:
 74 |             frame, t = yolo_draw.draw(frame)
 75 |             print(f'{t}ms')
 76 |             times.append(t)
 77 |             cv.imshow('video', frame)
 78 | 
 79 |             if cv.waitKey(30) & 0xFF == 27:
 80 |                 break
 81 |         else:
 82 |             break
 83 |     print(np.mean(times))
 84 |     vc.release()
 85 |     cv.destroyAllWindows()
 86 | 
 87 | 
 88 | if __name__ == "__main__":
 89 |     import argparse
 90 | 
 91 |     parser = argparse.ArgumentParser(description=__doc__)
 92 |     # 目标类别标签
 93 |     parser.add_argument('--labels', type=str, default='./labels_coco.yaml', help='obj labels')
 94 |     # video地址
 95 |     parser.add_argument('--video_dir', type=str, default='sample_1080p_h265.mp4',
 96 |                         help='video path')
 97 |     # engine模型地址
 98 |     parser.add_argument('--engine_dir', type=str, default='./models_trt/yolov7_nms.engine',
 99 |                         help='engine path')
100 | 
101 | 
102 |     args = parser.parse_args()
103 |     print(args)
104 | 
105 |     main(args)
106 | 
107 | 


--------------------------------------------------------------------------------
/yolox_detect.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import torch
  4 | import cv2 as cv
  5 | import numpy as np
  6 | 
  7 | from utils import trt_infer
  8 | from utils.trt_infer import load_engine
  9 | from utils.utils_detection import yaml_load, image_trans, scale_bboxes, non_max_suppression_torch, yolox_postprocess, \
 10 |     Colors, draw_boxes
 11 | 
 12 | 
 13 | class yolox_engine_det:
 14 |     def __init__(self, engine_dir, catid_labels):
 15 |         self.engine = load_engine(engine_dir)
 16 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 17 |         self.context = self.engine.create_execution_context()
 18 |         self.resize = self.engine.get_binding_shape(0)[2:]
 19 |         self.colors = self.get_colors_dict(catid_labels)
 20 |         self.labels = catid_labels
 21 |         self.nms = non_max_suppression_torch
 22 | 
 23 |         # self.context.set_binding_shape(0, [1, 3, self.resize[0], self.resize[1]])
 24 |         self.inputs = None
 25 |         self.outputs = None
 26 |         self.bindings = None
 27 |         self.stream = None
 28 | 
 29 |         self.inputs, self.outputs, self.bindings, self.stream = trt_infer.allocate_buffers_v2(self.context)
 30 | 
 31 |     @staticmethod
 32 |     def get_colors_dict(catid_labels):
 33 |         color_dicts = Colors(catid_labels)
 34 |         return color_dicts.get_id_and_colors()
 35 | 
 36 | 
 37 |     def draw(self, frame, conf=0.25, iou=0.45, max_det=200):
 38 |         x = image_trans(frame, self.resize)
 39 |         np.copyto(self.inputs[0].host, x.ravel())
 40 |         t1 = time.time()
 41 |         pred = trt_infer.do_inference_v2(
 42 |             self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream
 43 |         )
 44 |         pred = pred[0].reshape(self.context.get_binding_shape(1))
 45 |         pred = yolox_postprocess(pred, self.resize, p6=False)
 46 |         pred = torch.from_numpy(pred).to(self.device)
 47 |         pred = self.nms(pred, False, conf_thres=conf, iou_thres=iou, agnostic=False, max_det=max_det)[0]
 48 |         t2 = time.time()
 49 |         fps = int(1.0 / (t2 - t1))
 50 |         pred = scale_bboxes(pred, frame.shape[:2], self.resize)
 51 |         pred = pred.cpu().numpy()
 52 |         for i in pred:
 53 |             # pred: x1, y1, x2, y2, conf, labels
 54 |             # bbox = tuple(i[:4].astype('int'))
 55 |             # frame = cv.rectangle(frame, bbox[:2], bbox[2:], thickness=2, lineType=cv.LINE_AA,
 56 |             #                      color=self.colors[i[-1]]
 57 |             #                      )
 58 |             # frame = cv.putText(frame, f'{self.labels[i[-1]]}:{i[-2]:.2f}', (bbox[0] + 5, bbox[1] + 30),
 59 |             #                    fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=1, thickness=1, lineType=cv.LINE_AA,
 60 |             #                    color = (210, 105, 30)
 61 |             #                    )
 62 |             frame = draw_boxes(frame, i[:4], i[4], i[5], self.labels, 0.7, self.colors)
 63 |         frame = cv.putText(frame, f'fps: {fps}', (10, 30), fontFace=cv.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2,
 64 |                            lineType=cv.LINE_AA, color=(255, 0, 255))
 65 |         return frame
 66 | 
 67 | 
 68 | def main(args):
 69 |     # 检测物体标签
 70 |     catid_labels = yaml_load(args.labels)['labels']
 71 |     # 视频源
 72 |     vc = cv.VideoCapture(args.video_dir)
 73 |     # 载入engine
 74 |     yolo_draw = yolox_engine_det(args.engine_dir, catid_labels)
 75 | 
 76 |     # 循环读取视频中的每一帧
 77 |     while vc.isOpened():
 78 |         ret, frame = vc.read()
 79 | 
 80 |         if ret is True:
 81 |             frame = yolo_draw.draw(
 82 |                 frame, conf=args.conf_thres, iou=args.iou_thres, max_det=args.max_det
 83 |             )
 84 |             cv.imshow('video', frame)
 85 | 
 86 |             if cv.waitKey(30) & 0xFF == 27:
 87 |                 break
 88 |         else:
 89 |             break
 90 | 
 91 |     vc.release()
 92 |     cv.destroyAllWindows()
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     import argparse
 97 | 
 98 |     parser = argparse.ArgumentParser(description=__doc__)
 99 |     # 目标类别标签
100 |     parser.add_argument('--labels', type=str, default='./labels_coco.yaml', help='obj labels')
101 |     # video地址
102 |     parser.add_argument('--video_dir', type=str, default='sample_1080p_h265.mp4',
103 |                         help='video path')
104 |     # engine模型地址
105 |     parser.add_argument('--engine_dir', type=str, default='./models_trt/yolox_s.engine',
106 |                         help='engine path')
107 |     # 只有得分大于置信度的预测框会被保留下来
108 |     parser.add_argument('--conf_thres', type=float, default=0.25, help='confidence threshold')
109 |     # 非极大抑制所用到的nms_iou大小
110 |     parser.add_argument('--iou_thres', type=float, default=0.45, help='NMS IoU threshold')
111 |     # 目标框数量限制
112 |     parser.add_argument('--max_det', type=int, default=200, help='maximum detections per image')
113 | 
114 |     args = parser.parse_args()
115 |     print(args)
116 | 
117 |     main(args)
118 | 


--------------------------------------------------------------------------------