├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README-en.md
├── README.md
├── assets
    ├── bytetrack.png
    └── effect.gif
├── bytetrack
    ├── include
    │   ├── BYTETracker.h
    │   ├── STrack.h
    │   ├── dataType.h
    │   ├── kalmanFilter.h
    │   ├── lapjv.h
    │   └── logging.h
    └── src
    │   ├── BYTETracker.cpp
    │   ├── STrack.cpp
    │   ├── kalmanFilter.cpp
    │   ├── lapjv.cpp
    │   └── utils.cpp
├── main.cpp
├── tensorrtx-yolov8
    ├── CMakeLists.txt
    ├── README.md
    ├── gen_wts.py
    ├── images
    │   ├── 10001.jpg
    │   └── 10002.jpeg
    ├── include
    │   ├── block.h
    │   ├── calibrator.h
    │   ├── config.h
    │   ├── cuda_utils.h
    │   ├── logging.h
    │   ├── macros.h
    │   ├── model.h
    │   ├── postprocess.h
    │   ├── preprocess.h
    │   ├── types.h
    │   └── utils.h
    ├── main.cpp
    ├── plugin
    │   ├── yololayer.cu
    │   └── yololayer.h
    ├── src
    │   ├── block.cpp
    │   ├── calibrator.cpp
    │   ├── model.cpp
    │   ├── postprocess.cpp
    │   ├── postprocess.cu
    │   └── preprocess.cu
    └── yolov8_trt.py
├── videos
    └── demo.mp4
└── yolo
    ├── CMakeLists.txt
    ├── README.md
    ├── include
        ├── config.h
        ├── macros.h
        ├── postprocess.h
        ├── preprocess.h
        ├── public.h
        ├── types.h
        ├── utils.h
        └── yolov8_lib.h
    ├── main.cpp
    ├── plugin
        ├── yololayer.cu
        └── yololayer.h
    └── src
        ├── postprocess.cpp
        ├── preprocess.cu
        └── yolov8_lib.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | project(yolov8_bytetrack)
 4 | 
 5 | add_definitions(-std=c++11)
 6 | add_definitions(-DAPI_EXPORTS)
 7 | set(CMAKE_CXX_STANDARD 11)
 8 | set(CMAKE_BUILD_TYPE release)
 9 | 
10 | include_directories("/usr/include/eigen3")
11 | 
12 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
13 | # ============= cuda ============
14 | find_package(CUDA REQUIRED)
15 | include_directories(/usr/local/cuda/include)
16 | link_directories(/usr/local/cuda/lib64)
17 | 
18 | # ============= tensorrt ============
19 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
20 |   message("Embed_platform on")
21 |   include_directories(/usr/include/aarch64-linux-gnu)
22 |   link_directories(/usr/lib/aarch64-linux-gnu)
23 | else()
24 |   message("Embed_platform off")
25 |   include_directories(/usr/include/x86_64-linux-gnu)
26 |   link_directories(/usr/lib/x86_64-linux-gnu)
27 | endif()
28 | 
29 | # ============ opencv ============
30 | find_package(OpenCV)
31 | include_directories(${OpenCV_INCLUDE_DIRS})
32 | 
33 | # =========== bytetrack lib ===========
34 | include_directories(${PROJECT_SOURCE_DIR}/bytetrack/include)
35 | file(GLOB_RECURSE SRCS01 ${PROJECT_SOURCE_DIR}/bytetrack/src/*.cpp)
36 | add_library(bytetrack SHARED ${SRCS01})
37 | target_link_libraries(bytetrack cudart nvinfer ${OpenCV_LIBS})
38 | 
39 | # ============= yolov8 lib =============
40 | include_directories(${PROJECT_SOURCE_DIR}/yolo/include)
41 | include_directories(${PROJECT_SOURCE_DIR}/yolo/plugin)
42 | file(GLOB_RECURSE SRCS02 ${PROJECT_SOURCE_DIR}/yolo/src/*.cpp ${PROJECT_SOURCE_DIR}/yolo/src/*.cu ${PROJECT_SOURCE_DIR}/yolo/plugin/*.cu)
43 | cuda_add_library(yolo_infer SHARED ${SRCS02})
44 | target_link_libraries(yolo_infer nvinfer cudart ${OpenCV_LIBS})
45 | 
46 | # ======== main execute file ========
47 | add_executable(main ${PROJECT_SOURCE_DIR}/main.cpp)
48 | target_link_libraries(main bytetrack yolo_infer)
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 emptysoal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README-en.md:
--------------------------------------------------------------------------------
 1 | # TensorRT C++ api deploy YOLOv8 + ByteTrack
 2 | 
 3 | - My other TensorRT project of YOLOv8 tasks: [YOLOv8 detection, key points, segmentation, tracking](https://github.com/emptysoal/TensorRT-YOLOv8)
 4 | 
 5 | ## Introduction
 6 | 
 7 | - Based on `TensorRT-v8` , deploy `YOLOv8` + `ByteTrack` ;
 8 | 
 9 | -  Support `Jetson` series, also `Linux x86_64`;
10 | 
11 | Main work I have done:
12 | 
13 | 1. Refer to [tensorrtx](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov8) ，model:  `.pth` -> `.engine`,extract the inference part of the code, encapsulated into C++ classes, easy to call other projects ;
14 | 2. Preprocessing replaced with my own CUDA programming preprocessing;
15 | 3. Post-processing removed CUDA programming because it was not significantly faster in tests compared to CPU post-processing ;
16 | 4. The post-processed NMS greatly reduces conf_thres hyperparameters due to the principle of `ByteTrack` tracking, which is very important ;
17 | 5. `YOLOv8` inference compiles to a dynamic link library to decouple projects;
18 | 6. Reference official [ByteTrack TensorRT deploy](https://github.com/ifzhang/ByteTrack/tree/main/deploy/TensorRT/cpp) , modify its interface to the `YOLO` detector;
19 | 7. `ByteTrack` also compiles to a dynamic link library, further decoupling projects;
20 | 8. Add category filtering function, you can set the category you want to track in `main.cpp` line 8 .
21 | 
22 | ## Effect
23 | 
24 | ![](./assets/effect.gif)
25 | 
26 | # Environment
27 | 
28 | 1. Base requirements：
29 | 
30 | - `TensorRT 8.0+`
31 | - `OpenCV 3.4.0+`
32 | 
33 | 2. My running environment on `Jetson Nano` is as follows:
34 | 
35 | - The burned system image is `Jetpack 4.6.1`，original environment is as follows：
36 | 
37 | | CUDA | cuDNN | TensorRT | OpenCV |
38 | | ---- | ----- | -------- | ------ |
39 | | 10.2 | 8.2   | 8.2.1    | 4.1.1  |
40 | 
41 | - Install Eigen
42 | 
43 | ```bash
44 | apt install libeigen3-dev
45 | ```
46 | 
47 | ## Model conversion
48 | 
49 | Get the serialized file of TensorRT, suffix.engine
50 | 
51 | - First get the wts format model file, link: [yolov8s.wts](https://pan.baidu.com/s/16d_MqVlUxnjOhLxVyjQy8w) , code：gsqm
52 | - Then follow these steps:
53 | 
54 | ```bash
55 | cd {TensorRT-YOLOv8-ByteTrack}/tensorrtx-yolov8/
56 | mkdir build
57 | cd build
58 | cp {path/to/yolov8s.wts} .
59 | cmake ..
60 | make
61 | ./yolov8 -s yolov8s.wts yolov8s.engine s
62 | 
63 | cd ../../
64 | mkdir yolo/engine
65 | cp tensorrtx-yolov8/build/yolov8s.engine yolo/engine
66 | ```
67 | 
68 | ## Run tracking
69 | 
70 | - Follow these steps
71 | 
72 | ```bash
73 | mkdir build
74 | cd build
75 | cmake ..
76 | make
77 | ./main ../videos/demo.mp4  # The path to your own video
78 | ```
79 | 
80 | Then the `result.mp4` will be in the build directory, is to track the effect of the video file 
81 | 
82 | If you want the tracked video to play in real time, you can uncomment line 94 of main.cpp. 
83 | 
84 | # Reference
85 | 
86 | - [tensorrtx](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov8)
87 | 
88 | - [ByteTrack](https://github.com/ifzhang/ByteTrack)
89 | 
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TensorRT C++ api 部署 YOLOv8 + ByteTrack
  2 | 
  3 | - 本人另一个 `TensorRT` 部署 `YOLOv8` 各任务的项目： [YOLOv8 检测、关键点、分割、跟踪](https://github.com/emptysoal/TensorRT-YOLOv8)
  4 | 
  5 | ## 一. 项目简介
  6 | 
  7 | - 基于 `TensorRT-v8` ，部署`YOLOv8` + `ByteTrack` 的目标跟踪；
  8 | 
  9 | - 支持 `Jetson` 系列嵌入式设备上部署，也可以在 `Linux x86_64`的服务器上部署；
 10 | 
 11 | 本人所做的主要工作：
 12 | 
 13 | 1. 参考 [tensorrtx](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov8) 项目，模型 `.pth` -> `.engine`，提取出**推理部分代码，封装为C++的类**，便于其他项目调用；
 14 | 2. 预处理更换成了自己写的 CUDA编程预处理；
 15 | 3. 后处理去掉了CUDA编程，因为测试其相比CPU后处理提速并不明显；
 16 | 4. 后处理的 `NMS` **大幅减小`conf_thres`超参数**，源于 `ByteTrack` 跟踪的原理，这一点**非常重要**；
 17 | 5. `YOLOv8` 推理编译为一个动态链接库，以解耦项目；
 18 | 6. 参考官方 [ByteTrack TensorRT部署](https://github.com/ifzhang/ByteTrack/tree/main/deploy/TensorRT/cpp)，修改其与YOLO检测器的接口；
 19 | 7. `ByteTrack` 也编译为一个动态链接库，进一步解耦项目；
 20 | 8. 增加类别过滤功能，可以在`main.cpp`第 8 行设置自己想要跟踪的类别。
 21 | 
 22 | ## 二. 项目效果
 23 | 
 24 | ![](./assets/effect.gif)
 25 | 
 26 | ## 三. 环境配置
 27 | 
 28 | 1. 基本要求：
 29 | 
 30 | - `TensorRT 8.0+`
 31 | - `OpenCV 3.4.0+`
 32 | 
 33 | 2. 本人在 `Jetson Nano` 上的运行环境如下：
 34 | 
 35 | - 烧录的系统镜像为 `Jetpack 4.6.1`，该`jetpack` 原装环境如下：
 36 | 
 37 | | CUDA | cuDNN | TensorRT | OpenCV |
 38 | | ---- | ----- | -------- | ------ |
 39 | | 10.2 | 8.2   | 8.2.1    | 4.1.1  |
 40 | 
 41 | 关于如何在 `Jetson nano` 上烧录镜像，网上资料有很多，这里就不赘述了，注意下载 `Jetpack`镜像时选择 4.6.1 版本，该版本对应的 TensorRT v8 版本
 42 | 
 43 | - 安装`Eigen`库
 44 | 
 45 | ```bash
 46 | apt install libeigen3-dev
 47 | ```
 48 | 
 49 | 3. 如果是服务器上，保证基本环境版本满足，再安装`Eigen`库即可
 50 | 
 51 | 提示：无论何种设备，记得确认 `CMakeLists.txt` 文件中相关库的路径。
 52 | 
 53 | ## 四. 模型转换
 54 | 
 55 | 目的：得到`TensorRT`的序列化文件，后缀 `.engine`
 56 | 
 57 | - 首先获取 `wts` 格式的模型文件，链接：[yolov8s.wts](https://pan.baidu.com/s/16d_MqVlUxnjOhLxVyjQy8w)，提取码：gsqm
 58 | 
 59 | - 然后按以下步骤执行：
 60 | 
 61 | ```bash
 62 | cd {TensorRT-YOLOv8-ByteTrack}/tensorrtx-yolov8/
 63 | mkdir build
 64 | cd build
 65 | cp {path/to/yolov8s.wts} .
 66 | cmake ..
 67 | make
 68 | ./yolov8 -s yolov8s.wts yolov8s.engine s
 69 | 
 70 | cd ../../
 71 | mkdir yolo/engine
 72 | cp tensorrtx-yolov8/build/yolov8s.engine yolo/engine
 73 | ```
 74 | 
 75 | ## 五. 运行项目
 76 | 
 77 | - 开始编译并运行目标跟踪的代码
 78 | - 按如下步骤运行
 79 | 
 80 | ```bash
 81 | mkdir build
 82 | cd build
 83 | cmake ..
 84 | make
 85 | ./main ../videos/demo.mp4  # 传入自己视频的路径
 86 | ```
 87 | 
 88 | 之后会在 `build` 目录下得到`result.mp4`，为跟踪效果的视频文件
 89 | 
 90 | 如果想要跟踪的视频实时播放，可解开`main.cpp`第 94 行的注释。
 91 | 
 92 | ## 六. 项目参考
 93 | 
 94 | 主要参考了下面的项目：
 95 | 
 96 | - [tensorrtx](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov8)
 97 | 
 98 | - [ByteTrack](https://github.com/ifzhang/ByteTrack)
 99 | 
100 | 


--------------------------------------------------------------------------------
/assets/bytetrack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emptysoal/TensorRT-YOLOv8-ByteTrack/cec012e0672dd3d1f79a8dc29875be9c56feedb5/assets/bytetrack.png


--------------------------------------------------------------------------------
/assets/effect.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emptysoal/TensorRT-YOLOv8-ByteTrack/cec012e0672dd3d1f79a8dc29875be9c56feedb5/assets/effect.gif


--------------------------------------------------------------------------------
/bytetrack/include/BYTETracker.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "STrack.h"
 4 | 
 5 | struct Object
 6 | {
 7 |     cv::Rect_<float> rect;
 8 |     int label;
 9 |     float prob;
10 | };
11 | 
12 | class BYTETracker
13 | {
14 | public:
15 | 	BYTETracker(int frame_rate = 30, int track_buffer = 30);
16 | 	~BYTETracker();
17 | 
18 | 	vector<STrack> update(const vector<Object>& objects);
19 | 	Scalar get_color(int idx);
20 | 
21 | private:
22 | 	vector<STrack*> joint_stracks(vector<STrack*> &tlista, vector<STrack> &tlistb);
23 | 	vector<STrack> joint_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
24 | 
25 | 	vector<STrack> sub_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
26 | 	void remove_duplicate_stracks(vector<STrack> &resa, vector<STrack> &resb, vector<STrack> &stracksa, vector<STrack> &stracksb);
27 | 
28 | 	void linear_assignment(vector<vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
29 | 		vector<vector<int> > &matches, vector<int> &unmatched_a, vector<int> &unmatched_b);
30 | 	vector<vector<float> > iou_distance(vector<STrack*> &atracks, vector<STrack> &btracks, int &dist_size, int &dist_size_size);
31 | 	vector<vector<float> > iou_distance(vector<STrack> &atracks, vector<STrack> &btracks);
32 | 	vector<vector<float> > ious(vector<vector<float> > &atlbrs, vector<vector<float> > &btlbrs);
33 | 
34 | 	double lapjv(const vector<vector<float> > &cost, vector<int> &rowsol, vector<int> &colsol, 
35 | 		bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true);
36 | 
37 | private:
38 | 
39 | 	float track_thresh;
40 | 	float high_thresh;
41 | 	float match_thresh;
42 | 	int frame_id;
43 | 	int max_time_lost;
44 | 
45 | 	vector<STrack> tracked_stracks;
46 | 	vector<STrack> lost_stracks;
47 | 	vector<STrack> removed_stracks;
48 | 	byte_kalman::KalmanFilter kalman_filter;
49 | };


--------------------------------------------------------------------------------
/bytetrack/include/STrack.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <opencv2/opencv.hpp>
 4 | #include "kalmanFilter.h"
 5 | 
 6 | using namespace cv;
 7 | using namespace std;
 8 | 
 9 | enum TrackState { New = 0, Tracked, Lost, Removed };
10 | 
11 | class STrack
12 | {
13 | public:
14 | 	STrack(vector<float> tlwh_, float score);
15 | 	~STrack();
16 | 
17 | 	vector<float> static tlbr_to_tlwh(vector<float> &tlbr);
18 | 	void static multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter);
19 | 	void static_tlwh();
20 | 	void static_tlbr();
21 | 	vector<float> tlwh_to_xyah(vector<float> tlwh_tmp);
22 | 	vector<float> to_xyah();
23 | 	void mark_lost();
24 | 	void mark_removed();
25 | 	int next_id();
26 | 	int end_frame();
27 | 	
28 | 	void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id);
29 | 	void re_activate(STrack &new_track, int frame_id, bool new_id = false);
30 | 	void update(STrack &new_track, int frame_id);
31 | 
32 | public:
33 | 	bool is_activated;
34 | 	int track_id;
35 | 	int state;
36 | 
37 | 	vector<float> _tlwh;
38 | 	vector<float> tlwh;
39 | 	vector<float> tlbr;
40 | 	int frame_id;
41 | 	int tracklet_len;
42 | 	int start_frame;
43 | 
44 | 	KAL_MEAN mean;
45 | 	KAL_COVA covariance;
46 | 	float score;
47 | 
48 | private:
49 | 	byte_kalman::KalmanFilter kalman_filter;
50 | };


--------------------------------------------------------------------------------
/bytetrack/include/dataType.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | #include <vector>
 5 | 
 6 | #include <Eigen/Core>
 7 | #include <Eigen/Dense>
 8 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> DETECTBOX;
 9 | typedef Eigen::Matrix<float, -1, 4, Eigen::RowMajor> DETECTBOXSS;
10 | typedef Eigen::Matrix<float, 1, 128, Eigen::RowMajor> FEATURE;
11 | typedef Eigen::Matrix<float, Eigen::Dynamic, 128, Eigen::RowMajor> FEATURESS;
12 | //typedef std::vector<FEATURE> FEATURESS;
13 | 
14 | //Kalmanfilter
15 | //typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_FILTER;
16 | typedef Eigen::Matrix<float, 1, 8, Eigen::RowMajor> KAL_MEAN;
17 | typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_COVA;
18 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> KAL_HMEAN;
19 | typedef Eigen::Matrix<float, 4, 4, Eigen::RowMajor> KAL_HCOVA;
20 | using KAL_DATA = std::pair<KAL_MEAN, KAL_COVA>;
21 | using KAL_HDATA = std::pair<KAL_HMEAN, KAL_HCOVA>;
22 | 
23 | //main
24 | using RESULT_DATA = std::pair<int, DETECTBOX>;
25 | 
26 | //tracker:
27 | using TRACKER_DATA = std::pair<int, FEATURESS>;
28 | using MATCH_DATA = std::pair<int, int>;
29 | typedef struct t {
30 | 	std::vector<MATCH_DATA> matches;
31 | 	std::vector<int> unmatched_tracks;
32 | 	std::vector<int> unmatched_detections;
33 | }TRACHER_MATCHD;
34 | 
35 | //linear_assignment:
36 | typedef Eigen::Matrix<float, -1, -1, Eigen::RowMajor> DYNAMICM;


--------------------------------------------------------------------------------
/bytetrack/include/kalmanFilter.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "dataType.h"
 4 | 
 5 | namespace byte_kalman
 6 | {
 7 | 	class KalmanFilter
 8 | 	{
 9 | 	public:
10 | 		static const double chi2inv95[10];
11 | 		KalmanFilter();
12 | 		KAL_DATA initiate(const DETECTBOX& measurement);
13 | 		void predict(KAL_MEAN& mean, KAL_COVA& covariance);
14 | 		KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance);
15 | 		KAL_DATA update(const KAL_MEAN& mean,
16 | 			const KAL_COVA& covariance,
17 | 			const DETECTBOX& measurement);
18 | 
19 | 		Eigen::Matrix<float, 1, -1> gating_distance(
20 | 			const KAL_MEAN& mean,
21 | 			const KAL_COVA& covariance,
22 | 			const std::vector<DETECTBOX>& measurements,
23 | 			bool only_position = false);
24 | 
25 | 	private:
26 | 		Eigen::Matrix<float, 8, 8, Eigen::RowMajor> _motion_mat;
27 | 		Eigen::Matrix<float, 4, 8, Eigen::RowMajor> _update_mat;
28 | 		float _std_weight_position;
29 | 		float _std_weight_velocity;
30 | 	};
31 | }


--------------------------------------------------------------------------------
/bytetrack/include/lapjv.h:
--------------------------------------------------------------------------------
 1 | #ifndef LAPJV_H
 2 | #define LAPJV_H
 3 | 
 4 | #define LARGE 1000000
 5 | 
 6 | #if !defined TRUE
 7 | #define TRUE 1
 8 | #endif
 9 | #if !defined FALSE
10 | #define FALSE 0
11 | #endif
12 | 
13 | #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; }
14 | #define FREE(x) if (x != 0) { free(x); x = 0; }
15 | #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; }
16 | 
17 | #if 0
18 | #include <assert.h>
19 | #define ASSERT(cond) assert(cond)
20 | #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__)
21 | #define PRINT_COST_ARRAY(a, n) \
22 |     while (1) { \
23 |         printf(#a" = ["); \
24 |         if ((n) > 0) { \
25 |             printf("%f", (a)[0]); \
26 |             for (uint_t j = 1; j < n; j++) { \
27 |                 printf(", %f", (a)[j]); \
28 |             } \
29 |         } \
30 |         printf("]\n"); \
31 |         break; \
32 |     }
33 | #define PRINT_INDEX_ARRAY(a, n) \
34 |     while (1) { \
35 |         printf(#a" = ["); \
36 |         if ((n) > 0) { \
37 |             printf("%d", (a)[0]); \
38 |             for (uint_t j = 1; j < n; j++) { \
39 |                 printf(", %d", (a)[j]); \
40 |             } \
41 |         } \
42 |         printf("]\n"); \
43 |         break; \
44 |     }
45 | #else
46 | #define ASSERT(cond)
47 | #define PRINTF(fmt, ...)
48 | #define PRINT_COST_ARRAY(a, n)
49 | #define PRINT_INDEX_ARRAY(a, n)
50 | #endif
51 | 
52 | 
53 | typedef signed int int_t;
54 | typedef unsigned int uint_t;
55 | typedef double cost_t;
56 | typedef char boolean;
57 | typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
58 | 
59 | extern int_t lapjv_internal(
60 | 	const uint_t n, cost_t *cost[],
61 | 	int_t *x, int_t *y);
62 | 
63 | #endif // LAPJV_H


--------------------------------------------------------------------------------
/bytetrack/include/logging.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef TENSORRT_LOGGING_H
 18 | #define TENSORRT_LOGGING_H
 19 | 
 20 | #include "NvInferRuntimeCommon.h"
 21 | #include <cassert>
 22 | #include <ctime>
 23 | #include <iomanip>
 24 | #include <iostream>
 25 | #include <ostream>
 26 | #include <sstream>
 27 | #include <string>
 28 | 
 29 | using Severity = nvinfer1::ILogger::Severity;
 30 | 
 31 | class LogStreamConsumerBuffer : public std::stringbuf
 32 | {
 33 | public:
 34 |     LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
 35 |         : mOutput(stream)
 36 |         , mPrefix(prefix)
 37 |         , mShouldLog(shouldLog)
 38 |     {
 39 |     }
 40 | 
 41 |     LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
 42 |         : mOutput(other.mOutput)
 43 |     {
 44 |     }
 45 | 
 46 |     ~LogStreamConsumerBuffer()
 47 |     {
 48 |         // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
 49 |         // std::streambuf::pptr() gives a pointer to the current position of the output sequence
 50 |         // if the pointer to the beginning is not equal to the pointer to the current position,
 51 |         // call putOutput() to log the output to the stream
 52 |         if (pbase() != pptr())
 53 |         {
 54 |             putOutput();
 55 |         }
 56 |     }
 57 | 
 58 |     // synchronizes the stream buffer and returns 0 on success
 59 |     // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
 60 |     // resetting the buffer and flushing the stream
 61 |     virtual int sync()
 62 |     {
 63 |         putOutput();
 64 |         return 0;
 65 |     }
 66 | 
 67 |     void putOutput()
 68 |     {
 69 |         if (mShouldLog)
 70 |         {
 71 |             // prepend timestamp
 72 |             std::time_t timestamp = std::time(nullptr);
 73 |             tm* tm_local = std::localtime(&timestamp);
 74 |             std::cout << "[";
 75 |             std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
 76 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
 77 |             std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
 78 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
 79 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
 80 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
 81 |             // std::stringbuf::str() gets the string contents of the buffer
 82 |             // insert the buffer contents pre-appended by the appropriate prefix into the stream
 83 |             mOutput << mPrefix << str();
 84 |             // set the buffer to empty
 85 |             str("");
 86 |             // flush the stream
 87 |             mOutput.flush();
 88 |         }
 89 |     }
 90 | 
 91 |     void setShouldLog(bool shouldLog)
 92 |     {
 93 |         mShouldLog = shouldLog;
 94 |     }
 95 | 
 96 | private:
 97 |     std::ostream& mOutput;
 98 |     std::string mPrefix;
 99 |     bool mShouldLog;
100 | };
101 | 
102 | //!
103 | //! \class LogStreamConsumerBase
104 | //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
105 | //!
106 | class LogStreamConsumerBase
107 | {
108 | public:
109 |     LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
110 |         : mBuffer(stream, prefix, shouldLog)
111 |     {
112 |     }
113 | 
114 | protected:
115 |     LogStreamConsumerBuffer mBuffer;
116 | };
117 | 
118 | //!
119 | //! \class LogStreamConsumer
120 | //! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
121 | //!  Order of base classes is LogStreamConsumerBase and then std::ostream.
122 | //!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
123 | //!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
124 | //!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
125 | //!  Please do not change the order of the parent classes.
126 | //!
127 | class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
128 | {
129 | public:
130 |     //! \brief Creates a LogStreamConsumer which logs messages with level severity.
131 |     //!  Reportable severity determines if the messages are severe enough to be logged.
132 |     LogStreamConsumer(Severity reportableSeverity, Severity severity)
133 |         : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
134 |         , std::ostream(&mBuffer) // links the stream buffer with the stream
135 |         , mShouldLog(severity <= reportableSeverity)
136 |         , mSeverity(severity)
137 |     {
138 |     }
139 | 
140 |     LogStreamConsumer(LogStreamConsumer&& other)
141 |         : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
142 |         , std::ostream(&mBuffer) // links the stream buffer with the stream
143 |         , mShouldLog(other.mShouldLog)
144 |         , mSeverity(other.mSeverity)
145 |     {
146 |     }
147 | 
148 |     void setReportableSeverity(Severity reportableSeverity)
149 |     {
150 |         mShouldLog = mSeverity <= reportableSeverity;
151 |         mBuffer.setShouldLog(mShouldLog);
152 |     }
153 | 
154 | private:
155 |     static std::ostream& severityOstream(Severity severity)
156 |     {
157 |         return severity >= Severity::kINFO ? std::cout : std::cerr;
158 |     }
159 | 
160 |     static std::string severityPrefix(Severity severity)
161 |     {
162 |         switch (severity)
163 |         {
164 |         case Severity::kINTERNAL_ERROR: return "[F] ";
165 |         case Severity::kERROR: return "[E] ";
166 |         case Severity::kWARNING: return "[W] ";
167 |         case Severity::kINFO: return "[I] ";
168 |         case Severity::kVERBOSE: return "[V] ";
169 |         default: assert(0); return "";
170 |         }
171 |     }
172 | 
173 |     bool mShouldLog;
174 |     Severity mSeverity;
175 | };
176 | 
177 | //! \class Logger
178 | //!
179 | //! \brief Class which manages logging of TensorRT tools and samples
180 | //!
181 | //! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
182 | //! and supports logging two types of messages:
183 | //!
184 | //! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
185 | //! - Test pass/fail messages
186 | //!
187 | //! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
188 | //! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
189 | //!
190 | //! In the future, this class could be extended to support dumping test results to a file in some standard format
191 | //! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
192 | //!
193 | //! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
194 | //! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
195 | //! library and messages coming from the sample.
196 | //!
197 | //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
198 | //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
199 | //! object.
200 | 
201 | class Logger : public nvinfer1::ILogger
202 | {
203 | public:
204 |     Logger(Severity severity = Severity::kWARNING)
205 |         : mReportableSeverity(severity)
206 |     {
207 |     }
208 | 
209 |     //!
210 |     //! \enum TestResult
211 |     //! \brief Represents the state of a given test
212 |     //!
213 |     enum class TestResult
214 |     {
215 |         kRUNNING, //!< The test is running
216 |         kPASSED,  //!< The test passed
217 |         kFAILED,  //!< The test failed
218 |         kWAIVED   //!< The test was waived
219 |     };
220 | 
221 |     //!
222 |     //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
223 |     //! \return The nvinfer1::ILogger associated with this Logger
224 |     //!
225 |     //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
226 |     //! we can eliminate the inheritance of Logger from ILogger
227 |     //!
228 |     nvinfer1::ILogger& getTRTLogger()
229 |     {
230 |         return *this;
231 |     }
232 | 
233 |     //!
234 |     //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
235 |     //!
236 |     //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
237 |     //! inheritance from nvinfer1::ILogger
238 |     //!
239 |     void log(Severity severity, const char* msg) noexcept override
240 |     {
241 |         LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
242 |     }
243 | 
244 |     //!
245 |     //! \brief Method for controlling the verbosity of logging output
246 |     //!
247 |     //! \param severity The logger will only emit messages that have severity of this level or higher.
248 |     //!
249 |     void setReportableSeverity(Severity severity)
250 |     {
251 |         mReportableSeverity = severity;
252 |     }
253 | 
254 |     //!
255 |     //! \brief Opaque handle that holds logging information for a particular test
256 |     //!
257 |     //! This object is an opaque handle to information used by the Logger to print test results.
258 |     //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
259 |     //! with Logger::reportTest{Start,End}().
260 |     //!
261 |     class TestAtom
262 |     {
263 |     public:
264 |         TestAtom(TestAtom&&) = default;
265 | 
266 |     private:
267 |         friend class Logger;
268 | 
269 |         TestAtom(bool started, const std::string& name, const std::string& cmdline)
270 |             : mStarted(started)
271 |             , mName(name)
272 |             , mCmdline(cmdline)
273 |         {
274 |         }
275 | 
276 |         bool mStarted;
277 |         std::string mName;
278 |         std::string mCmdline;
279 |     };
280 | 
281 |     //!
282 |     //! \brief Define a test for logging
283 |     //!
284 |     //! \param[in] name The name of the test.  This should be a string starting with
285 |     //!                  "TensorRT" and containing dot-separated strings containing
286 |     //!                  the characters [A-Za-z0-9_].
287 |     //!                  For example, "TensorRT.sample_googlenet"
288 |     //! \param[in] cmdline The command line used to reproduce the test
289 |     //
290 |     //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
291 |     //!
292 |     static TestAtom defineTest(const std::string& name, const std::string& cmdline)
293 |     {
294 |         return TestAtom(false, name, cmdline);
295 |     }
296 | 
297 |     //!
298 |     //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
299 |     //!        as input
300 |     //!
301 |     //! \param[in] name The name of the test
302 |     //! \param[in] argc The number of command-line arguments
303 |     //! \param[in] argv The array of command-line arguments (given as C strings)
304 |     //!
305 |     //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
306 |     static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
307 |     {
308 |         auto cmdline = genCmdlineString(argc, argv);
309 |         return defineTest(name, cmdline);
310 |     }
311 | 
312 |     //!
313 |     //! \brief Report that a test has started.
314 |     //!
315 |     //! \pre reportTestStart() has not been called yet for the given testAtom
316 |     //!
317 |     //! \param[in] testAtom The handle to the test that has started
318 |     //!
319 |     static void reportTestStart(TestAtom& testAtom)
320 |     {
321 |         reportTestResult(testAtom, TestResult::kRUNNING);
322 |         assert(!testAtom.mStarted);
323 |         testAtom.mStarted = true;
324 |     }
325 | 
326 |     //!
327 |     //! \brief Report that a test has ended.
328 |     //!
329 |     //! \pre reportTestStart() has been called for the given testAtom
330 |     //!
331 |     //! \param[in] testAtom The handle to the test that has ended
332 |     //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
333 |     //!                   TestResult::kFAILED, TestResult::kWAIVED
334 |     //!
335 |     static void reportTestEnd(const TestAtom& testAtom, TestResult result)
336 |     {
337 |         assert(result != TestResult::kRUNNING);
338 |         assert(testAtom.mStarted);
339 |         reportTestResult(testAtom, result);
340 |     }
341 | 
342 |     static int reportPass(const TestAtom& testAtom)
343 |     {
344 |         reportTestEnd(testAtom, TestResult::kPASSED);
345 |         return EXIT_SUCCESS;
346 |     }
347 | 
348 |     static int reportFail(const TestAtom& testAtom)
349 |     {
350 |         reportTestEnd(testAtom, TestResult::kFAILED);
351 |         return EXIT_FAILURE;
352 |     }
353 | 
354 |     static int reportWaive(const TestAtom& testAtom)
355 |     {
356 |         reportTestEnd(testAtom, TestResult::kWAIVED);
357 |         return EXIT_SUCCESS;
358 |     }
359 | 
360 |     static int reportTest(const TestAtom& testAtom, bool pass)
361 |     {
362 |         return pass ? reportPass(testAtom) : reportFail(testAtom);
363 |     }
364 | 
365 |     Severity getReportableSeverity() const
366 |     {
367 |         return mReportableSeverity;
368 |     }
369 | 
370 | private:
371 |     //!
372 |     //! \brief returns an appropriate string for prefixing a log message with the given severity
373 |     //!
374 |     static const char* severityPrefix(Severity severity)
375 |     {
376 |         switch (severity)
377 |         {
378 |         case Severity::kINTERNAL_ERROR: return "[F] ";
379 |         case Severity::kERROR: return "[E] ";
380 |         case Severity::kWARNING: return "[W] ";
381 |         case Severity::kINFO: return "[I] ";
382 |         case Severity::kVERBOSE: return "[V] ";
383 |         default: assert(0); return "";
384 |         }
385 |     }
386 | 
387 |     //!
388 |     //! \brief returns an appropriate string for prefixing a test result message with the given result
389 |     //!
390 |     static const char* testResultString(TestResult result)
391 |     {
392 |         switch (result)
393 |         {
394 |         case TestResult::kRUNNING: return "RUNNING";
395 |         case TestResult::kPASSED: return "PASSED";
396 |         case TestResult::kFAILED: return "FAILED";
397 |         case TestResult::kWAIVED: return "WAIVED";
398 |         default: assert(0); return "";
399 |         }
400 |     }
401 | 
402 |     //!
403 |     //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
404 |     //!
405 |     static std::ostream& severityOstream(Severity severity)
406 |     {
407 |         return severity >= Severity::kINFO ? std::cout : std::cerr;
408 |     }
409 | 
410 |     //!
411 |     //! \brief method that implements logging test results
412 |     //!
413 |     static void reportTestResult(const TestAtom& testAtom, TestResult result)
414 |     {
415 |         severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
416 |                                          << testAtom.mCmdline << std::endl;
417 |     }
418 | 
419 |     //!
420 |     //! \brief generate a command line string from the given (argc, argv) values
421 |     //!
422 |     static std::string genCmdlineString(int argc, char const* const* argv)
423 |     {
424 |         std::stringstream ss;
425 |         for (int i = 0; i < argc; i++)
426 |         {
427 |             if (i > 0)
428 |                 ss << " ";
429 |             ss << argv[i];
430 |         }
431 |         return ss.str();
432 |     }
433 | 
434 |     Severity mReportableSeverity;
435 | };
436 | 
437 | namespace
438 | {
439 | 
440 | //!
441 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
442 | //!
443 | //! Example usage:
444 | //!
445 | //!     LOG_VERBOSE(logger) << "hello world" << std::endl;
446 | //!
447 | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
448 | {
449 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
450 | }
451 | 
452 | //!
453 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
454 | //!
455 | //! Example usage:
456 | //!
457 | //!     LOG_INFO(logger) << "hello world" << std::endl;
458 | //!
459 | inline LogStreamConsumer LOG_INFO(const Logger& logger)
460 | {
461 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
462 | }
463 | 
464 | //!
465 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
466 | //!
467 | //! Example usage:
468 | //!
469 | //!     LOG_WARN(logger) << "hello world" << std::endl;
470 | //!
471 | inline LogStreamConsumer LOG_WARN(const Logger& logger)
472 | {
473 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
474 | }
475 | 
476 | //!
477 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
478 | //!
479 | //! Example usage:
480 | //!
481 | //!     LOG_ERROR(logger) << "hello world" << std::endl;
482 | //!
483 | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
484 | {
485 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
486 | }
487 | 
488 | //!
489 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
490 | //         ("fatal" severity)
491 | //!
492 | //! Example usage:
493 | //!
494 | //!     LOG_FATAL(logger) << "hello world" << std::endl;
495 | //!
496 | inline LogStreamConsumer LOG_FATAL(const Logger& logger)
497 | {
498 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
499 | }
500 | 
501 | } // anonymous namespace
502 | 
503 | #endif // TENSORRT_LOGGING_H
504 | 


--------------------------------------------------------------------------------
/bytetrack/src/BYTETracker.cpp:
--------------------------------------------------------------------------------
  1 | #include "BYTETracker.h"
  2 | #include <fstream>
  3 | 
  4 | BYTETracker::BYTETracker(int frame_rate, int track_buffer)
  5 | {
  6 | 	track_thresh = 0.5;
  7 | 	high_thresh = 0.6;
  8 | 	match_thresh = 0.8;
  9 | 
 10 | 	frame_id = 0;
 11 | 	max_time_lost = int(frame_rate / 30.0 * track_buffer);
 12 | 	cout << "Init ByteTrack!" << endl;
 13 | }
 14 | 
 15 | BYTETracker::~BYTETracker()
 16 | {
 17 | }
 18 | 
 19 | vector<STrack> BYTETracker::update(const vector<Object>& objects)
 20 | {
 21 | 
 22 | 	////////////////// Step 1: Get detections //////////////////
 23 | 	this->frame_id++;
 24 | 	vector<STrack> activated_stracks;
 25 | 	vector<STrack> refind_stracks;
 26 | 	vector<STrack> removed_stracks;
 27 | 	vector<STrack> lost_stracks;
 28 | 	vector<STrack> detections;
 29 | 	vector<STrack> detections_low;
 30 | 
 31 | 	vector<STrack> detections_cp;
 32 | 	vector<STrack> tracked_stracks_swap;
 33 | 	vector<STrack> resa, resb;
 34 | 	vector<STrack> output_stracks;
 35 | 
 36 | 	vector<STrack*> unconfirmed;
 37 | 	vector<STrack*> tracked_stracks;
 38 | 	vector<STrack*> strack_pool;
 39 | 	vector<STrack*> r_tracked_stracks;
 40 | 
 41 | 	if (objects.size() > 0)
 42 | 	{
 43 | 		for (int i = 0; i < objects.size(); i++)
 44 | 		{
 45 | 			vector<float> tlbr_;
 46 | 			tlbr_.resize(4);
 47 | 			tlbr_[0] = objects[i].rect.x;
 48 | 			tlbr_[1] = objects[i].rect.y;
 49 | 			tlbr_[2] = objects[i].rect.x + objects[i].rect.width;
 50 | 			tlbr_[3] = objects[i].rect.y + objects[i].rect.height;
 51 | 
 52 | 			float score = objects[i].prob;
 53 | 
 54 | 			STrack strack(STrack::tlbr_to_tlwh(tlbr_), score);
 55 | 			if (score >= track_thresh)
 56 | 			{
 57 | 				detections.push_back(strack);
 58 | 			}
 59 | 			else
 60 | 			{
 61 | 				detections_low.push_back(strack);
 62 | 			}
 63 | 			
 64 | 		}
 65 | 	}
 66 | 
 67 | 	// Add newly detected tracklets to tracked_stracks
 68 | 	for (int i = 0; i < this->tracked_stracks.size(); i++)
 69 | 	{
 70 | 		if (!this->tracked_stracks[i].is_activated)
 71 | 			unconfirmed.push_back(&this->tracked_stracks[i]);
 72 | 		else
 73 | 			tracked_stracks.push_back(&this->tracked_stracks[i]);
 74 | 	}
 75 | 
 76 | 	////////////////// Step 2: First association, with IoU //////////////////
 77 | 	strack_pool = joint_stracks(tracked_stracks, this->lost_stracks);
 78 | 	STrack::multi_predict(strack_pool, this->kalman_filter);
 79 | 
 80 | 	vector<vector<float> > dists;
 81 | 	int dist_size = 0, dist_size_size = 0;
 82 | 	dists = iou_distance(strack_pool, detections, dist_size, dist_size_size);
 83 | 
 84 | 	vector<vector<int> > matches;
 85 | 	vector<int> u_track, u_detection;
 86 | 	linear_assignment(dists, dist_size, dist_size_size, match_thresh, matches, u_track, u_detection);
 87 | 
 88 | 	for (int i = 0; i < matches.size(); i++)
 89 | 	{
 90 | 		STrack *track = strack_pool[matches[i][0]];
 91 | 		STrack *det = &detections[matches[i][1]];
 92 | 		if (track->state == TrackState::Tracked)
 93 | 		{
 94 | 			track->update(*det, this->frame_id);
 95 | 			activated_stracks.push_back(*track);
 96 | 		}
 97 | 		else
 98 | 		{
 99 | 			track->re_activate(*det, this->frame_id, false);
100 | 			refind_stracks.push_back(*track);
101 | 		}
102 | 	}
103 | 
104 | 	////////////////// Step 3: Second association, using low score dets //////////////////
105 | 	for (int i = 0; i < u_detection.size(); i++)
106 | 	{
107 | 		detections_cp.push_back(detections[u_detection[i]]);
108 | 	}
109 | 	detections.clear();
110 | 	detections.assign(detections_low.begin(), detections_low.end());
111 | 	
112 | 	for (int i = 0; i < u_track.size(); i++)
113 | 	{
114 | 		if (strack_pool[u_track[i]]->state == TrackState::Tracked)
115 | 		{
116 | 			r_tracked_stracks.push_back(strack_pool[u_track[i]]);
117 | 		}
118 | 	}
119 | 
120 | 	dists.clear();
121 | 	dists = iou_distance(r_tracked_stracks, detections, dist_size, dist_size_size);
122 | 
123 | 	matches.clear();
124 | 	u_track.clear();
125 | 	u_detection.clear();
126 | 	linear_assignment(dists, dist_size, dist_size_size, 0.5, matches, u_track, u_detection);
127 | 
128 | 	for (int i = 0; i < matches.size(); i++)
129 | 	{
130 | 		STrack *track = r_tracked_stracks[matches[i][0]];
131 | 		STrack *det = &detections[matches[i][1]];
132 | 		if (track->state == TrackState::Tracked)
133 | 		{
134 | 			track->update(*det, this->frame_id);
135 | 			activated_stracks.push_back(*track);
136 | 		}
137 | 		else
138 | 		{
139 | 			track->re_activate(*det, this->frame_id, false);
140 | 			refind_stracks.push_back(*track);
141 | 		}
142 | 	}
143 | 
144 | 	for (int i = 0; i < u_track.size(); i++)
145 | 	{
146 | 		STrack *track = r_tracked_stracks[u_track[i]];
147 | 		if (track->state != TrackState::Lost)
148 | 		{
149 | 			track->mark_lost();
150 | 			lost_stracks.push_back(*track);
151 | 		}
152 | 	}
153 | 
154 | 	// Deal with unconfirmed tracks, usually tracks with only one beginning frame
155 | 	detections.clear();
156 | 	detections.assign(detections_cp.begin(), detections_cp.end());
157 | 
158 | 	dists.clear();
159 | 	dists = iou_distance(unconfirmed, detections, dist_size, dist_size_size);
160 | 
161 | 	matches.clear();
162 | 	vector<int> u_unconfirmed;
163 | 	u_detection.clear();
164 | 	linear_assignment(dists, dist_size, dist_size_size, 0.7, matches, u_unconfirmed, u_detection);
165 | 
166 | 	for (int i = 0; i < matches.size(); i++)
167 | 	{
168 | 		unconfirmed[matches[i][0]]->update(detections[matches[i][1]], this->frame_id);
169 | 		activated_stracks.push_back(*unconfirmed[matches[i][0]]);
170 | 	}
171 | 
172 | 	for (int i = 0; i < u_unconfirmed.size(); i++)
173 | 	{
174 | 		STrack *track = unconfirmed[u_unconfirmed[i]];
175 | 		track->mark_removed();
176 | 		removed_stracks.push_back(*track);
177 | 	}
178 | 
179 | 	////////////////// Step 4: Init new stracks //////////////////
180 | 	for (int i = 0; i < u_detection.size(); i++)
181 | 	{
182 | 		STrack *track = &detections[u_detection[i]];
183 | 		if (track->score < this->high_thresh)
184 | 			continue;
185 | 		track->activate(this->kalman_filter, this->frame_id);
186 | 		activated_stracks.push_back(*track);
187 | 	}
188 | 
189 | 	////////////////// Step 5: Update state //////////////////
190 | 	for (int i = 0; i < this->lost_stracks.size(); i++)
191 | 	{
192 | 		if (this->frame_id - this->lost_stracks[i].end_frame() > this->max_time_lost)
193 | 		{
194 | 			this->lost_stracks[i].mark_removed();
195 | 			removed_stracks.push_back(this->lost_stracks[i]);
196 | 		}
197 | 	}
198 | 	
199 | 	for (int i = 0; i < this->tracked_stracks.size(); i++)
200 | 	{
201 | 		if (this->tracked_stracks[i].state == TrackState::Tracked)
202 | 		{
203 | 			tracked_stracks_swap.push_back(this->tracked_stracks[i]);
204 | 		}
205 | 	}
206 | 	this->tracked_stracks.clear();
207 | 	this->tracked_stracks.assign(tracked_stracks_swap.begin(), tracked_stracks_swap.end());
208 | 
209 | 	this->tracked_stracks = joint_stracks(this->tracked_stracks, activated_stracks);
210 | 	this->tracked_stracks = joint_stracks(this->tracked_stracks, refind_stracks);
211 | 
212 | 	//std::cout << activated_stracks.size() << std::endl;
213 | 
214 | 	this->lost_stracks = sub_stracks(this->lost_stracks, this->tracked_stracks);
215 | 	for (int i = 0; i < lost_stracks.size(); i++)
216 | 	{
217 | 		this->lost_stracks.push_back(lost_stracks[i]);
218 | 	}
219 | 
220 | 	this->lost_stracks = sub_stracks(this->lost_stracks, this->removed_stracks);
221 | 	for (int i = 0; i < removed_stracks.size(); i++)
222 | 	{
223 | 		this->removed_stracks.push_back(removed_stracks[i]);
224 | 	}
225 | 	
226 | 	remove_duplicate_stracks(resa, resb, this->tracked_stracks, this->lost_stracks);
227 | 
228 | 	this->tracked_stracks.clear();
229 | 	this->tracked_stracks.assign(resa.begin(), resa.end());
230 | 	this->lost_stracks.clear();
231 | 	this->lost_stracks.assign(resb.begin(), resb.end());
232 | 	
233 | 	for (int i = 0; i < this->tracked_stracks.size(); i++)
234 | 	{
235 | 		if (this->tracked_stracks[i].is_activated)
236 | 		{
237 | 			output_stracks.push_back(this->tracked_stracks[i]);
238 | 		}
239 | 	}
240 | 	return output_stracks;
241 | }


--------------------------------------------------------------------------------
/bytetrack/src/STrack.cpp:
--------------------------------------------------------------------------------
  1 | #include "STrack.h"
  2 | 
  3 | STrack::STrack(vector<float> tlwh_, float score)
  4 | {
  5 | 	_tlwh.resize(4);
  6 | 	_tlwh.assign(tlwh_.begin(), tlwh_.end());
  7 | 
  8 | 	is_activated = false;
  9 | 	track_id = 0;
 10 | 	state = TrackState::New;
 11 | 	
 12 | 	tlwh.resize(4);
 13 | 	tlbr.resize(4);
 14 | 
 15 | 	static_tlwh();
 16 | 	static_tlbr();
 17 | 	frame_id = 0;
 18 | 	tracklet_len = 0;
 19 | 	this->score = score;
 20 | 	start_frame = 0;
 21 | }
 22 | 
 23 | STrack::~STrack()
 24 | {
 25 | }
 26 | 
 27 | void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id)
 28 | {
 29 | 	this->kalman_filter = kalman_filter;
 30 | 	this->track_id = this->next_id();
 31 | 
 32 | 	vector<float> _tlwh_tmp(4);
 33 | 	_tlwh_tmp[0] = this->_tlwh[0];
 34 | 	_tlwh_tmp[1] = this->_tlwh[1];
 35 | 	_tlwh_tmp[2] = this->_tlwh[2];
 36 | 	_tlwh_tmp[3] = this->_tlwh[3];
 37 | 	vector<float> xyah = tlwh_to_xyah(_tlwh_tmp);
 38 | 	DETECTBOX xyah_box;
 39 | 	xyah_box[0] = xyah[0];
 40 | 	xyah_box[1] = xyah[1];
 41 | 	xyah_box[2] = xyah[2];
 42 | 	xyah_box[3] = xyah[3];
 43 | 	auto mc = this->kalman_filter.initiate(xyah_box);
 44 | 	this->mean = mc.first;
 45 | 	this->covariance = mc.second;
 46 | 
 47 | 	static_tlwh();
 48 | 	static_tlbr();
 49 | 
 50 | 	this->tracklet_len = 0;
 51 | 	this->state = TrackState::Tracked;
 52 | 	if (frame_id == 1)
 53 | 	{
 54 | 		this->is_activated = true;
 55 | 	}
 56 | 	//this->is_activated = true;
 57 | 	this->frame_id = frame_id;
 58 | 	this->start_frame = frame_id;
 59 | }
 60 | 
 61 | void STrack::re_activate(STrack &new_track, int frame_id, bool new_id)
 62 | {
 63 | 	vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
 64 | 	DETECTBOX xyah_box;
 65 | 	xyah_box[0] = xyah[0];
 66 | 	xyah_box[1] = xyah[1];
 67 | 	xyah_box[2] = xyah[2];
 68 | 	xyah_box[3] = xyah[3];
 69 | 	auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
 70 | 	this->mean = mc.first;
 71 | 	this->covariance = mc.second;
 72 | 
 73 | 	static_tlwh();
 74 | 	static_tlbr();
 75 | 
 76 | 	this->tracklet_len = 0;
 77 | 	this->state = TrackState::Tracked;
 78 | 	this->is_activated = true;
 79 | 	this->frame_id = frame_id;
 80 | 	this->score = new_track.score;
 81 | 	if (new_id)
 82 | 		this->track_id = next_id();
 83 | }
 84 | 
 85 | void STrack::update(STrack &new_track, int frame_id)
 86 | {
 87 | 	this->frame_id = frame_id;
 88 | 	this->tracklet_len++;
 89 | 
 90 | 	vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
 91 | 	DETECTBOX xyah_box;
 92 | 	xyah_box[0] = xyah[0];
 93 | 	xyah_box[1] = xyah[1];
 94 | 	xyah_box[2] = xyah[2];
 95 | 	xyah_box[3] = xyah[3];
 96 | 
 97 | 	auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
 98 | 	this->mean = mc.first;
 99 | 	this->covariance = mc.second;
100 | 
101 | 	static_tlwh();
102 | 	static_tlbr();
103 | 
104 | 	this->state = TrackState::Tracked;
105 | 	this->is_activated = true;
106 | 
107 | 	this->score = new_track.score;
108 | }
109 | 
110 | void STrack::static_tlwh()
111 | {
112 | 	if (this->state == TrackState::New)
113 | 	{
114 | 		tlwh[0] = _tlwh[0];
115 | 		tlwh[1] = _tlwh[1];
116 | 		tlwh[2] = _tlwh[2];
117 | 		tlwh[3] = _tlwh[3];
118 | 		return;
119 | 	}
120 | 
121 | 	tlwh[0] = mean[0];
122 | 	tlwh[1] = mean[1];
123 | 	tlwh[2] = mean[2];
124 | 	tlwh[3] = mean[3];
125 | 
126 | 	tlwh[2] *= tlwh[3];
127 | 	tlwh[0] -= tlwh[2] / 2;
128 | 	tlwh[1] -= tlwh[3] / 2;
129 | }
130 | 
131 | void STrack::static_tlbr()
132 | {
133 | 	tlbr.clear();
134 | 	tlbr.assign(tlwh.begin(), tlwh.end());
135 | 	tlbr[2] += tlbr[0];
136 | 	tlbr[3] += tlbr[1];
137 | }
138 | 
139 | vector<float> STrack::tlwh_to_xyah(vector<float> tlwh_tmp)
140 | {
141 | 	vector<float> tlwh_output = tlwh_tmp;
142 | 	tlwh_output[0] += tlwh_output[2] / 2;
143 | 	tlwh_output[1] += tlwh_output[3] / 2;
144 | 	tlwh_output[2] /= tlwh_output[3];
145 | 	return tlwh_output;
146 | }
147 | 
148 | vector<float> STrack::to_xyah()
149 | {
150 | 	return tlwh_to_xyah(tlwh);
151 | }
152 | 
153 | vector<float> STrack::tlbr_to_tlwh(vector<float> &tlbr)
154 | {
155 | 	tlbr[2] -= tlbr[0];
156 | 	tlbr[3] -= tlbr[1];
157 | 	return tlbr;
158 | }
159 | 
160 | void STrack::mark_lost()
161 | {
162 | 	state = TrackState::Lost;
163 | }
164 | 
165 | void STrack::mark_removed()
166 | {
167 | 	state = TrackState::Removed;
168 | }
169 | 
170 | int STrack::next_id()
171 | {
172 | 	static int _count = 0;
173 | 	_count++;
174 | 	return _count;
175 | }
176 | 
177 | int STrack::end_frame()
178 | {
179 | 	return this->frame_id;
180 | }
181 | 
182 | void STrack::multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter)
183 | {
184 | 	for (int i = 0; i < stracks.size(); i++)
185 | 	{
186 | 		if (stracks[i]->state != TrackState::Tracked)
187 | 		{
188 | 			stracks[i]->mean[7] = 0;
189 | 		}
190 | 		kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance);
191 | 		stracks[i]->static_tlwh();
192 | 		stracks[i]->static_tlbr();
193 | 	}
194 | }


--------------------------------------------------------------------------------
/bytetrack/src/kalmanFilter.cpp:
--------------------------------------------------------------------------------
  1 | #include "kalmanFilter.h"
  2 | #include <Eigen/Cholesky>
  3 | 
  4 | namespace byte_kalman
  5 | {
  6 | 	const double KalmanFilter::chi2inv95[10] = {
  7 | 	0,
  8 | 	3.8415,
  9 | 	5.9915,
 10 | 	7.8147,
 11 | 	9.4877,
 12 | 	11.070,
 13 | 	12.592,
 14 | 	14.067,
 15 | 	15.507,
 16 | 	16.919
 17 | 	};
 18 | 	KalmanFilter::KalmanFilter()
 19 | 	{
 20 | 		int ndim = 4;
 21 | 		double dt = 1.;
 22 | 
 23 | 		_motion_mat = Eigen::MatrixXf::Identity(8, 8);
 24 | 		for (int i = 0; i < ndim; i++) {
 25 | 			_motion_mat(i, ndim + i) = dt;
 26 | 		}
 27 | 		_update_mat = Eigen::MatrixXf::Identity(4, 8);
 28 | 
 29 | 		this->_std_weight_position = 1. / 20;
 30 | 		this->_std_weight_velocity = 1. / 160;
 31 | 	}
 32 | 
 33 | 	KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement)
 34 | 	{
 35 | 		DETECTBOX mean_pos = measurement;
 36 | 		DETECTBOX mean_vel;
 37 | 		for (int i = 0; i < 4; i++) mean_vel(i) = 0;
 38 | 
 39 | 		KAL_MEAN mean;
 40 | 		for (int i = 0; i < 8; i++) {
 41 | 			if (i < 4) mean(i) = mean_pos(i);
 42 | 			else mean(i) = mean_vel(i - 4);
 43 | 		}
 44 | 
 45 | 		KAL_MEAN std;
 46 | 		std(0) = 2 * _std_weight_position * measurement[3];
 47 | 		std(1) = 2 * _std_weight_position * measurement[3];
 48 | 		std(2) = 1e-2;
 49 | 		std(3) = 2 * _std_weight_position * measurement[3];
 50 | 		std(4) = 10 * _std_weight_velocity * measurement[3];
 51 | 		std(5) = 10 * _std_weight_velocity * measurement[3];
 52 | 		std(6) = 1e-5;
 53 | 		std(7) = 10 * _std_weight_velocity * measurement[3];
 54 | 
 55 | 		KAL_MEAN tmp = std.array().square();
 56 | 		KAL_COVA var = tmp.asDiagonal();
 57 | 		return std::make_pair(mean, var);
 58 | 	}
 59 | 
 60 | 	void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance)
 61 | 	{
 62 | 		//revise the data;
 63 | 		DETECTBOX std_pos;
 64 | 		std_pos << _std_weight_position * mean(3),
 65 | 			_std_weight_position * mean(3),
 66 | 			1e-2,
 67 | 			_std_weight_position * mean(3);
 68 | 		DETECTBOX std_vel;
 69 | 		std_vel << _std_weight_velocity * mean(3),
 70 | 			_std_weight_velocity * mean(3),
 71 | 			1e-5,
 72 | 			_std_weight_velocity * mean(3);
 73 | 		KAL_MEAN tmp;
 74 | 		tmp.block<1, 4>(0, 0) = std_pos;
 75 | 		tmp.block<1, 4>(0, 4) = std_vel;
 76 | 		tmp = tmp.array().square();
 77 | 		KAL_COVA motion_cov = tmp.asDiagonal();
 78 | 		KAL_MEAN mean1 = this->_motion_mat * mean.transpose();
 79 | 		KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose());
 80 | 		covariance1 += motion_cov;
 81 | 
 82 | 		mean = mean1;
 83 | 		covariance = covariance1;
 84 | 	}
 85 | 
 86 | 	KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance)
 87 | 	{
 88 | 		DETECTBOX std;
 89 | 		std << _std_weight_position * mean(3), _std_weight_position * mean(3),
 90 | 			1e-1, _std_weight_position * mean(3);
 91 | 		KAL_HMEAN mean1 = _update_mat * mean.transpose();
 92 | 		KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose());
 93 | 		Eigen::Matrix<float, 4, 4> diag = std.asDiagonal();
 94 | 		diag = diag.array().square().matrix();
 95 | 		covariance1 += diag;
 96 | 		//    covariance1.diagonal() << diag;
 97 | 		return std::make_pair(mean1, covariance1);
 98 | 	}
 99 | 
100 | 	KAL_DATA
101 | 		KalmanFilter::update(
102 | 			const KAL_MEAN &mean,
103 | 			const KAL_COVA &covariance,
104 | 			const DETECTBOX &measurement)
105 | 	{
106 | 		KAL_HDATA pa = project(mean, covariance);
107 | 		KAL_HMEAN projected_mean = pa.first;
108 | 		KAL_HCOVA projected_cov = pa.second;
109 | 
110 | 		//chol_factor, lower =
111 | 		//scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
112 | 		//kalmain_gain =
113 | 		//scipy.linalg.cho_solve((cho_factor, lower),
114 | 		//np.dot(covariance, self._upadte_mat.T).T,
115 | 		//check_finite=False).T
116 | 		Eigen::Matrix<float, 4, 8> B = (covariance * (_update_mat.transpose())).transpose();
117 | 		Eigen::Matrix<float, 8, 4> kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4
118 | 		Eigen::Matrix<float, 1, 4> innovation = measurement - projected_mean; //eg.1x4
119 | 		auto tmp = innovation * (kalman_gain.transpose());
120 | 		KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix();
121 | 		KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose());
122 | 		return std::make_pair(new_mean, new_covariance);
123 | 	}
124 | 
125 | 	Eigen::Matrix<float, 1, -1>
126 | 		KalmanFilter::gating_distance(
127 | 			const KAL_MEAN &mean,
128 | 			const KAL_COVA &covariance,
129 | 			const std::vector<DETECTBOX> &measurements,
130 | 			bool only_position)
131 | 	{
132 | 		KAL_HDATA pa = this->project(mean, covariance);
133 | 		if (only_position) {
134 | 			printf("not implement!");
135 | 			exit(0);
136 | 		}
137 | 		KAL_HMEAN mean1 = pa.first;
138 | 		KAL_HCOVA covariance1 = pa.second;
139 | 
140 | 		//    Eigen::Matrix<float, -1, 4, Eigen::RowMajor> d(size, 4);
141 | 		DETECTBOXSS d(measurements.size(), 4);
142 | 		int pos = 0;
143 | 		for (DETECTBOX box : measurements) {
144 | 			d.row(pos++) = box - mean1;
145 | 		}
146 | 		Eigen::Matrix<float, -1, -1, Eigen::RowMajor> factor = covariance1.llt().matrixL();
147 | 		Eigen::Matrix<float, -1, -1> z = factor.triangularView<Eigen::Lower>().solve<Eigen::OnTheRight>(d).transpose();
148 | 		auto zz = ((z.array())*(z.array())).matrix();
149 | 		auto square_maha = zz.colwise().sum();
150 | 		return square_maha;
151 | 	}
152 | }


--------------------------------------------------------------------------------
/bytetrack/src/lapjv.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | #include "lapjv.h"
  6 | 
  7 | /** Column-reduction and reduction transfer for a dense cost matrix.
  8 |  */
  9 | int_t _ccrrt_dense(const uint_t n, cost_t *cost[],
 10 | 	int_t *free_rows, int_t *x, int_t *y, cost_t *v)
 11 | {
 12 | 	int_t n_free_rows;
 13 | 	boolean *unique;
 14 | 
 15 | 	for (uint_t i = 0; i < n; i++) {
 16 | 		x[i] = -1;
 17 | 		v[i] = LARGE;
 18 | 		y[i] = 0;
 19 | 	}
 20 | 	for (uint_t i = 0; i < n; i++) {
 21 | 		for (uint_t j = 0; j < n; j++) {
 22 | 			const cost_t c = cost[i][j];
 23 | 			if (c < v[j]) {
 24 | 				v[j] = c;
 25 | 				y[j] = i;
 26 | 			}
 27 | 			PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]);
 28 | 		}
 29 | 	}
 30 | 	PRINT_COST_ARRAY(v, n);
 31 | 	PRINT_INDEX_ARRAY(y, n);
 32 | 	NEW(unique, boolean, n);
 33 | 	memset(unique, TRUE, n);
 34 | 	{
 35 | 		int_t j = n;
 36 | 		do {
 37 | 			j--;
 38 | 			const int_t i = y[j];
 39 | 			if (x[i] < 0) {
 40 | 				x[i] = j;
 41 | 			}
 42 | 			else {
 43 | 				unique[i] = FALSE;
 44 | 				y[j] = -1;
 45 | 			}
 46 | 		} while (j > 0);
 47 | 	}
 48 | 	n_free_rows = 0;
 49 | 	for (uint_t i = 0; i < n; i++) {
 50 | 		if (x[i] < 0) {
 51 | 			free_rows[n_free_rows++] = i;
 52 | 		}
 53 | 		else if (unique[i]) {
 54 | 			const int_t j = x[i];
 55 | 			cost_t min = LARGE;
 56 | 			for (uint_t j2 = 0; j2 < n; j2++) {
 57 | 				if (j2 == (uint_t)j) {
 58 | 					continue;
 59 | 				}
 60 | 				const cost_t c = cost[i][j2] - v[j2];
 61 | 				if (c < min) {
 62 | 					min = c;
 63 | 				}
 64 | 			}
 65 | 			PRINTF("v[%d] = %f - %f\n", j, v[j], min);
 66 | 			v[j] -= min;
 67 | 		}
 68 | 	}
 69 | 	FREE(unique);
 70 | 	return n_free_rows;
 71 | }
 72 | 
 73 | 
 74 | /** Augmenting row reduction for a dense cost matrix.
 75 |  */
 76 | int_t _carr_dense(
 77 | 	const uint_t n, cost_t *cost[],
 78 | 	const uint_t n_free_rows,
 79 | 	int_t *free_rows, int_t *x, int_t *y, cost_t *v)
 80 | {
 81 | 	uint_t current = 0;
 82 | 	int_t new_free_rows = 0;
 83 | 	uint_t rr_cnt = 0;
 84 | 	PRINT_INDEX_ARRAY(x, n);
 85 | 	PRINT_INDEX_ARRAY(y, n);
 86 | 	PRINT_COST_ARRAY(v, n);
 87 | 	PRINT_INDEX_ARRAY(free_rows, n_free_rows);
 88 | 	while (current < n_free_rows) {
 89 | 		int_t i0;
 90 | 		int_t j1, j2;
 91 | 		cost_t v1, v2, v1_new;
 92 | 		boolean v1_lowers;
 93 | 
 94 | 		rr_cnt++;
 95 | 		PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt);
 96 | 		const int_t free_i = free_rows[current++];
 97 | 		j1 = 0;
 98 | 		v1 = cost[free_i][0] - v[0];
 99 | 		j2 = -1;
100 | 		v2 = LARGE;
101 | 		for (uint_t j = 1; j < n; j++) {
102 | 			PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2);
103 | 			const cost_t c = cost[free_i][j] - v[j];
104 | 			if (c < v2) {
105 | 				if (c >= v1) {
106 | 					v2 = c;
107 | 					j2 = j;
108 | 				}
109 | 				else {
110 | 					v2 = v1;
111 | 					v1 = c;
112 | 					j2 = j1;
113 | 					j1 = j;
114 | 				}
115 | 			}
116 | 		}
117 | 		i0 = y[j1];
118 | 		v1_new = v[j1] - (v2 - v1);
119 | 		v1_lowers = v1_new < v[j1];
120 | 		PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new);
121 | 		if (rr_cnt < current * n) {
122 | 			if (v1_lowers) {
123 | 				v[j1] = v1_new;
124 | 			}
125 | 			else if (i0 >= 0 && j2 >= 0) {
126 | 				j1 = j2;
127 | 				i0 = y[j2];
128 | 			}
129 | 			if (i0 >= 0) {
130 | 				if (v1_lowers) {
131 | 					free_rows[--current] = i0;
132 | 				}
133 | 				else {
134 | 					free_rows[new_free_rows++] = i0;
135 | 				}
136 | 			}
137 | 		}
138 | 		else {
139 | 			PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n);
140 | 			if (i0 >= 0) {
141 | 				free_rows[new_free_rows++] = i0;
142 | 			}
143 | 		}
144 | 		x[free_i] = j1;
145 | 		y[j1] = free_i;
146 | 	}
147 | 	return new_free_rows;
148 | }
149 | 
150 | 
151 | /** Find columns with minimum d[j] and put them on the SCAN list.
152 |  */
153 | uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y)
154 | {
155 | 	uint_t hi = lo + 1;
156 | 	cost_t mind = d[cols[lo]];
157 | 	for (uint_t k = hi; k < n; k++) {
158 | 		int_t j = cols[k];
159 | 		if (d[j] <= mind) {
160 | 			if (d[j] < mind) {
161 | 				hi = lo;
162 | 				mind = d[j];
163 | 			}
164 | 			cols[k] = cols[hi];
165 | 			cols[hi++] = j;
166 | 		}
167 | 	}
168 | 	return hi;
169 | }
170 | 
171 | 
172 | // Scan all columns in TODO starting from arbitrary column in SCAN
173 | // and try to decrease d of the TODO columns using the SCAN column.
174 | int_t _scan_dense(const uint_t n, cost_t *cost[],
175 | 	uint_t *plo, uint_t*phi,
176 | 	cost_t *d, int_t *cols, int_t *pred,
177 | 	int_t *y, cost_t *v)
178 | {
179 | 	uint_t lo = *plo;
180 | 	uint_t hi = *phi;
181 | 	cost_t h, cred_ij;
182 | 
183 | 	while (lo != hi) {
184 | 		int_t j = cols[lo++];
185 | 		const int_t i = y[j];
186 | 		const cost_t mind = d[j];
187 | 		h = cost[i][j] - v[j] - mind;
188 | 		PRINTF("i=%d j=%d h=%f\n", i, j, h);
189 | 		// For all columns in TODO
190 | 		for (uint_t k = hi; k < n; k++) {
191 | 			j = cols[k];
192 | 			cred_ij = cost[i][j] - v[j] - h;
193 | 			if (cred_ij < d[j]) {
194 | 				d[j] = cred_ij;
195 | 				pred[j] = i;
196 | 				if (cred_ij == mind) {
197 | 					if (y[j] < 0) {
198 | 						return j;
199 | 					}
200 | 					cols[k] = cols[hi];
201 | 					cols[hi++] = j;
202 | 				}
203 | 			}
204 | 		}
205 | 	}
206 | 	*plo = lo;
207 | 	*phi = hi;
208 | 	return -1;
209 | }
210 | 
211 | 
212 | /** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper.
213 |  *
214 |  * This is a dense matrix version.
215 |  *
216 |  * \return The closest free column index.
217 |  */
218 | int_t find_path_dense(
219 | 	const uint_t n, cost_t *cost[],
220 | 	const int_t start_i,
221 | 	int_t *y, cost_t *v,
222 | 	int_t *pred)
223 | {
224 | 	uint_t lo = 0, hi = 0;
225 | 	int_t final_j = -1;
226 | 	uint_t n_ready = 0;
227 | 	int_t *cols;
228 | 	cost_t *d;
229 | 
230 | 	NEW(cols, int_t, n);
231 | 	NEW(d, cost_t, n);
232 | 
233 | 	for (uint_t i = 0; i < n; i++) {
234 | 		cols[i] = i;
235 | 		pred[i] = start_i;
236 | 		d[i] = cost[start_i][i] - v[i];
237 | 	}
238 | 	PRINT_COST_ARRAY(d, n);
239 | 	while (final_j == -1) {
240 | 		// No columns left on the SCAN list.
241 | 		if (lo == hi) {
242 | 			PRINTF("%d..%d -> find\n", lo, hi);
243 | 			n_ready = lo;
244 | 			hi = _find_dense(n, lo, d, cols, y);
245 | 			PRINTF("check %d..%d\n", lo, hi);
246 | 			PRINT_INDEX_ARRAY(cols, n);
247 | 			for (uint_t k = lo; k < hi; k++) {
248 | 				const int_t j = cols[k];
249 | 				if (y[j] < 0) {
250 | 					final_j = j;
251 | 				}
252 | 			}
253 | 		}
254 | 		if (final_j == -1) {
255 | 			PRINTF("%d..%d -> scan\n", lo, hi);
256 | 			final_j = _scan_dense(
257 | 				n, cost, &lo, &hi, d, cols, pred, y, v);
258 | 			PRINT_COST_ARRAY(d, n);
259 | 			PRINT_INDEX_ARRAY(cols, n);
260 | 			PRINT_INDEX_ARRAY(pred, n);
261 | 		}
262 | 	}
263 | 
264 | 	PRINTF("found final_j=%d\n", final_j);
265 | 	PRINT_INDEX_ARRAY(cols, n);
266 | 	{
267 | 		const cost_t mind = d[cols[lo]];
268 | 		for (uint_t k = 0; k < n_ready; k++) {
269 | 			const int_t j = cols[k];
270 | 			v[j] += d[j] - mind;
271 | 		}
272 | 	}
273 | 
274 | 	FREE(cols);
275 | 	FREE(d);
276 | 
277 | 	return final_j;
278 | }
279 | 
280 | 
281 | /** Augment for a dense cost matrix.
282 |  */
283 | int_t _ca_dense(
284 | 	const uint_t n, cost_t *cost[],
285 | 	const uint_t n_free_rows,
286 | 	int_t *free_rows, int_t *x, int_t *y, cost_t *v)
287 | {
288 | 	int_t *pred;
289 | 
290 | 	NEW(pred, int_t, n);
291 | 
292 | 	for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
293 | 		int_t i = -1, j;
294 | 		uint_t k = 0;
295 | 
296 | 		PRINTF("looking at free_i=%d\n", *pfree_i);
297 | 		j = find_path_dense(n, cost, *pfree_i, y, v, pred);
298 | 		ASSERT(j >= 0);
299 | 		ASSERT(j < n);
300 | 		while (i != *pfree_i) {
301 | 			PRINTF("augment %d\n", j);
302 | 			PRINT_INDEX_ARRAY(pred, n);
303 | 			i = pred[j];
304 | 			PRINTF("y[%d]=%d -> %d\n", j, y[j], i);
305 | 			y[j] = i;
306 | 			PRINT_INDEX_ARRAY(x, n);
307 | 			SWAP_INDICES(j, x[i]);
308 | 			k++;
309 | 			if (k >= n) {
310 | 				ASSERT(FALSE);
311 | 			}
312 | 		}
313 | 	}
314 | 	FREE(pred);
315 | 	return 0;
316 | }
317 | 
318 | 
319 | /** Solve dense sparse LAP.
320 |  */
321 | int lapjv_internal(
322 | 	const uint_t n, cost_t *cost[],
323 | 	int_t *x, int_t *y)
324 | {
325 | 	int ret;
326 | 	int_t *free_rows;
327 | 	cost_t *v;
328 | 
329 | 	NEW(free_rows, int_t, n);
330 | 	NEW(v, cost_t, n);
331 | 	ret = _ccrrt_dense(n, cost, free_rows, x, y, v);
332 | 	int i = 0;
333 | 	while (ret > 0 && i < 2) {
334 | 		ret = _carr_dense(n, cost, ret, free_rows, x, y, v);
335 | 		i++;
336 | 	}
337 | 	if (ret > 0) {
338 | 		ret = _ca_dense(n, cost, ret, free_rows, x, y, v);
339 | 	}
340 | 	FREE(v);
341 | 	FREE(free_rows);
342 | 	return ret;
343 | }


--------------------------------------------------------------------------------
/bytetrack/src/utils.cpp:
--------------------------------------------------------------------------------
  1 | #include "BYTETracker.h"
  2 | #include "lapjv.h"
  3 | 
  4 | vector<STrack*> BYTETracker::joint_stracks(vector<STrack*> &tlista, vector<STrack> &tlistb)
  5 | {
  6 | 	map<int, int> exists;
  7 | 	vector<STrack*> res;
  8 | 	for (int i = 0; i < tlista.size(); i++)
  9 | 	{
 10 | 		exists.insert(pair<int, int>(tlista[i]->track_id, 1));
 11 | 		res.push_back(tlista[i]);
 12 | 	}
 13 | 	for (int i = 0; i < tlistb.size(); i++)
 14 | 	{
 15 | 		int tid = tlistb[i].track_id;
 16 | 		if (!exists[tid] || exists.count(tid) == 0)
 17 | 		{
 18 | 			exists[tid] = 1;
 19 | 			res.push_back(&tlistb[i]);
 20 | 		}
 21 | 	}
 22 | 	return res;
 23 | }
 24 | 
 25 | vector<STrack> BYTETracker::joint_stracks(vector<STrack> &tlista, vector<STrack> &tlistb)
 26 | {
 27 | 	map<int, int> exists;
 28 | 	vector<STrack> res;
 29 | 	for (int i = 0; i < tlista.size(); i++)
 30 | 	{
 31 | 		exists.insert(pair<int, int>(tlista[i].track_id, 1));
 32 | 		res.push_back(tlista[i]);
 33 | 	}
 34 | 	for (int i = 0; i < tlistb.size(); i++)
 35 | 	{
 36 | 		int tid = tlistb[i].track_id;
 37 | 		if (!exists[tid] || exists.count(tid) == 0)
 38 | 		{
 39 | 			exists[tid] = 1;
 40 | 			res.push_back(tlistb[i]);
 41 | 		}
 42 | 	}
 43 | 	return res;
 44 | }
 45 | 
 46 | vector<STrack> BYTETracker::sub_stracks(vector<STrack> &tlista, vector<STrack> &tlistb)
 47 | {
 48 | 	map<int, STrack> stracks;
 49 | 	for (int i = 0; i < tlista.size(); i++)
 50 | 	{
 51 | 		stracks.insert(pair<int, STrack>(tlista[i].track_id, tlista[i]));
 52 | 	}
 53 | 	for (int i = 0; i < tlistb.size(); i++)
 54 | 	{
 55 | 		int tid = tlistb[i].track_id;
 56 | 		if (stracks.count(tid) != 0)
 57 | 		{
 58 | 			stracks.erase(tid);
 59 | 		}
 60 | 	}
 61 | 
 62 | 	vector<STrack> res;
 63 | 	std::map<int, STrack>::iterator  it;
 64 | 	for (it = stracks.begin(); it != stracks.end(); ++it)
 65 | 	{
 66 | 		res.push_back(it->second);
 67 | 	}
 68 | 
 69 | 	return res;
 70 | }
 71 | 
 72 | void BYTETracker::remove_duplicate_stracks(vector<STrack> &resa, vector<STrack> &resb, vector<STrack> &stracksa, vector<STrack> &stracksb)
 73 | {
 74 | 	vector<vector<float> > pdist = iou_distance(stracksa, stracksb);
 75 | 	vector<pair<int, int> > pairs;
 76 | 	for (int i = 0; i < pdist.size(); i++)
 77 | 	{
 78 | 		for (int j = 0; j < pdist[i].size(); j++)
 79 | 		{
 80 | 			if (pdist[i][j] < 0.15)
 81 | 			{
 82 | 				pairs.push_back(pair<int, int>(i, j));
 83 | 			}
 84 | 		}
 85 | 	}
 86 | 
 87 | 	vector<int> dupa, dupb;
 88 | 	for (int i = 0; i < pairs.size(); i++)
 89 | 	{
 90 | 		int timep = stracksa[pairs[i].first].frame_id - stracksa[pairs[i].first].start_frame;
 91 | 		int timeq = stracksb[pairs[i].second].frame_id - stracksb[pairs[i].second].start_frame;
 92 | 		if (timep > timeq)
 93 | 			dupb.push_back(pairs[i].second);
 94 | 		else
 95 | 			dupa.push_back(pairs[i].first);
 96 | 	}
 97 | 
 98 | 	for (int i = 0; i < stracksa.size(); i++)
 99 | 	{
100 | 		vector<int>::iterator iter = find(dupa.begin(), dupa.end(), i);
101 | 		if (iter == dupa.end())
102 | 		{
103 | 			resa.push_back(stracksa[i]);
104 | 		}
105 | 	}
106 | 
107 | 	for (int i = 0; i < stracksb.size(); i++)
108 | 	{
109 | 		vector<int>::iterator iter = find(dupb.begin(), dupb.end(), i);
110 | 		if (iter == dupb.end())
111 | 		{
112 | 			resb.push_back(stracksb[i]);
113 | 		}
114 | 	}
115 | }
116 | 
117 | void BYTETracker::linear_assignment(vector<vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
118 | 	vector<vector<int> > &matches, vector<int> &unmatched_a, vector<int> &unmatched_b)
119 | {
120 | 	if (cost_matrix.size() == 0)
121 | 	{
122 | 		for (int i = 0; i < cost_matrix_size; i++)
123 | 		{
124 | 			unmatched_a.push_back(i);
125 | 		}
126 | 		for (int i = 0; i < cost_matrix_size_size; i++)
127 | 		{
128 | 			unmatched_b.push_back(i);
129 | 		}
130 | 		return;
131 | 	}
132 | 
133 | 	vector<int> rowsol; vector<int> colsol;
134 | 	float c = lapjv(cost_matrix, rowsol, colsol, true, thresh);
135 | 	for (int i = 0; i < rowsol.size(); i++)
136 | 	{
137 | 		if (rowsol[i] >= 0)
138 | 		{
139 | 			vector<int> match;
140 | 			match.push_back(i);
141 | 			match.push_back(rowsol[i]);
142 | 			matches.push_back(match);
143 | 		}
144 | 		else
145 | 		{
146 | 			unmatched_a.push_back(i);
147 | 		}
148 | 	}
149 | 
150 | 	for (int i = 0; i < colsol.size(); i++)
151 | 	{
152 | 		if (colsol[i] < 0)
153 | 		{
154 | 			unmatched_b.push_back(i);
155 | 		}
156 | 	}
157 | }
158 | 
159 | vector<vector<float> > BYTETracker::ious(vector<vector<float> > &atlbrs, vector<vector<float> > &btlbrs)
160 | {
161 | 	vector<vector<float> > ious;
162 | 	if (atlbrs.size()*btlbrs.size() == 0)
163 | 		return ious;
164 | 
165 | 	ious.resize(atlbrs.size());
166 | 	for (int i = 0; i < ious.size(); i++)
167 | 	{
168 | 		ious[i].resize(btlbrs.size());
169 | 	}
170 | 
171 | 	//bbox_ious
172 | 	for (int k = 0; k < btlbrs.size(); k++)
173 | 	{
174 | 		vector<float> ious_tmp;
175 | 		float box_area = (btlbrs[k][2] - btlbrs[k][0] + 1)*(btlbrs[k][3] - btlbrs[k][1] + 1);
176 | 		for (int n = 0; n < atlbrs.size(); n++)
177 | 		{
178 | 			float iw = min(atlbrs[n][2], btlbrs[k][2]) - max(atlbrs[n][0], btlbrs[k][0]) + 1;
179 | 			if (iw > 0)
180 | 			{
181 | 				float ih = min(atlbrs[n][3], btlbrs[k][3]) - max(atlbrs[n][1], btlbrs[k][1]) + 1;
182 | 				if(ih > 0)
183 | 				{
184 | 					float ua = (atlbrs[n][2] - atlbrs[n][0] + 1)*(atlbrs[n][3] - atlbrs[n][1] + 1) + box_area - iw * ih;
185 | 					ious[n][k] = iw * ih / ua;
186 | 				}
187 | 				else
188 | 				{
189 | 					ious[n][k] = 0.0;
190 | 				}
191 | 			}
192 | 			else
193 | 			{
194 | 				ious[n][k] = 0.0;
195 | 			}
196 | 		}
197 | 	}
198 | 
199 | 	return ious;
200 | }
201 | 
202 | vector<vector<float> > BYTETracker::iou_distance(vector<STrack*> &atracks, vector<STrack> &btracks, int &dist_size, int &dist_size_size)
203 | {
204 | 	vector<vector<float> > cost_matrix;
205 | 	if (atracks.size() * btracks.size() == 0)
206 | 	{
207 | 		dist_size = atracks.size();
208 | 		dist_size_size = btracks.size();
209 | 		return cost_matrix;
210 | 	}
211 | 	vector<vector<float> > atlbrs, btlbrs;
212 | 	for (int i = 0; i < atracks.size(); i++)
213 | 	{
214 | 		atlbrs.push_back(atracks[i]->tlbr);
215 | 	}
216 | 	for (int i = 0; i < btracks.size(); i++)
217 | 	{
218 | 		btlbrs.push_back(btracks[i].tlbr);
219 | 	}
220 | 
221 | 	dist_size = atracks.size();
222 | 	dist_size_size = btracks.size();
223 | 
224 | 	vector<vector<float> > _ious = ious(atlbrs, btlbrs);
225 | 	
226 | 	for (int i = 0; i < _ious.size();i++)
227 | 	{
228 | 		vector<float> _iou;
229 | 		for (int j = 0; j < _ious[i].size(); j++)
230 | 		{
231 | 			_iou.push_back(1 - _ious[i][j]);
232 | 		}
233 | 		cost_matrix.push_back(_iou);
234 | 	}
235 | 
236 | 	return cost_matrix;
237 | }
238 | 
239 | vector<vector<float> > BYTETracker::iou_distance(vector<STrack> &atracks, vector<STrack> &btracks)
240 | {
241 | 	vector<vector<float> > atlbrs, btlbrs;
242 | 	for (int i = 0; i < atracks.size(); i++)
243 | 	{
244 | 		atlbrs.push_back(atracks[i].tlbr);
245 | 	}
246 | 	for (int i = 0; i < btracks.size(); i++)
247 | 	{
248 | 		btlbrs.push_back(btracks[i].tlbr);
249 | 	}
250 | 
251 | 	vector<vector<float> > _ious = ious(atlbrs, btlbrs);
252 | 	vector<vector<float> > cost_matrix;
253 | 	for (int i = 0; i < _ious.size(); i++)
254 | 	{
255 | 		vector<float> _iou;
256 | 		for (int j = 0; j < _ious[i].size(); j++)
257 | 		{
258 | 			_iou.push_back(1 - _ious[i][j]);
259 | 		}
260 | 		cost_matrix.push_back(_iou);
261 | 	}
262 | 
263 | 	return cost_matrix;
264 | }
265 | 
266 | double BYTETracker::lapjv(const vector<vector<float> > &cost, vector<int> &rowsol, vector<int> &colsol,
267 | 	bool extend_cost, float cost_limit, bool return_cost)
268 | {
269 | 	vector<vector<float> > cost_c;
270 | 	cost_c.assign(cost.begin(), cost.end());
271 | 
272 | 	vector<vector<float> > cost_c_extended;
273 | 
274 | 	int n_rows = cost.size();
275 | 	int n_cols = cost[0].size();
276 | 	rowsol.resize(n_rows);
277 | 	colsol.resize(n_cols);
278 | 
279 | 	int n = 0;
280 | 	if (n_rows == n_cols)
281 | 	{
282 | 		n = n_rows;
283 | 	}
284 | 	else
285 | 	{
286 | 		if (!extend_cost)
287 | 		{
288 | 			cout << "set extend_cost=True" << endl;
289 | 			system("pause");
290 | 			exit(0);
291 | 		}
292 | 	}
293 | 		
294 | 	if (extend_cost || cost_limit < LONG_MAX)
295 | 	{
296 | 		n = n_rows + n_cols;
297 | 		cost_c_extended.resize(n);
298 | 		for (int i = 0; i < cost_c_extended.size(); i++)
299 | 			cost_c_extended[i].resize(n);
300 | 
301 | 		if (cost_limit < LONG_MAX)
302 | 		{
303 | 			for (int i = 0; i < cost_c_extended.size(); i++)
304 | 			{
305 | 				for (int j = 0; j < cost_c_extended[i].size(); j++)
306 | 				{
307 | 					cost_c_extended[i][j] = cost_limit / 2.0;
308 | 				}
309 | 			}
310 | 		}
311 | 		else
312 | 		{
313 | 			float cost_max = -1;
314 | 			for (int i = 0; i < cost_c.size(); i++)
315 | 			{
316 | 				for (int j = 0; j < cost_c[i].size(); j++)
317 | 				{
318 | 					if (cost_c[i][j] > cost_max)
319 | 						cost_max = cost_c[i][j];
320 | 				}
321 | 			}
322 | 			for (int i = 0; i < cost_c_extended.size(); i++)
323 | 			{
324 | 				for (int j = 0; j < cost_c_extended[i].size(); j++)
325 | 				{
326 | 					cost_c_extended[i][j] = cost_max + 1;
327 | 				}
328 | 			}
329 | 		}
330 | 
331 | 		for (int i = n_rows; i < cost_c_extended.size(); i++)
332 | 		{
333 | 			for (int j = n_cols; j < cost_c_extended[i].size(); j++)
334 | 			{
335 | 				cost_c_extended[i][j] = 0;
336 | 			}
337 | 		}
338 | 		for (int i = 0; i < n_rows; i++)
339 | 		{
340 | 			for (int j = 0; j < n_cols; j++)
341 | 			{
342 | 				cost_c_extended[i][j] = cost_c[i][j];
343 | 			}
344 | 		}
345 | 
346 | 		cost_c.clear();
347 | 		cost_c.assign(cost_c_extended.begin(), cost_c_extended.end());
348 | 	}
349 | 
350 | 	double **cost_ptr;
351 | 	cost_ptr = new double *[sizeof(double *) * n];
352 | 	for (int i = 0; i < n; i++)
353 | 		cost_ptr[i] = new double[sizeof(double) * n];
354 | 
355 | 	for (int i = 0; i < n; i++)
356 | 	{
357 | 		for (int j = 0; j < n; j++)
358 | 		{
359 | 			cost_ptr[i][j] = cost_c[i][j];
360 | 		}
361 | 	}
362 | 
363 | 	int* x_c = new int[sizeof(int) * n];
364 | 	int *y_c = new int[sizeof(int) * n];
365 | 
366 | 	int ret = lapjv_internal(n, cost_ptr, x_c, y_c);
367 | 	if (ret != 0)
368 | 	{
369 | 		cout << "Calculate Wrong!" << endl;
370 | 		system("pause");
371 | 		exit(0);
372 | 	}
373 | 
374 | 	double opt = 0.0;
375 | 
376 | 	if (n != n_rows)
377 | 	{
378 | 		for (int i = 0; i < n; i++)
379 | 		{
380 | 			if (x_c[i] >= n_cols)
381 | 				x_c[i] = -1;
382 | 			if (y_c[i] >= n_rows)
383 | 				y_c[i] = -1;
384 | 		}
385 | 		for (int i = 0; i < n_rows; i++)
386 | 		{
387 | 			rowsol[i] = x_c[i];
388 | 		}
389 | 		for (int i = 0; i < n_cols; i++)
390 | 		{
391 | 			colsol[i] = y_c[i];
392 | 		}
393 | 
394 | 		if (return_cost)
395 | 		{
396 | 			for (int i = 0; i < rowsol.size(); i++)
397 | 			{
398 | 				if (rowsol[i] != -1)
399 | 				{
400 | 					//cout << i << "\t" << rowsol[i] << "\t" << cost_ptr[i][rowsol[i]] << endl;
401 | 					opt += cost_ptr[i][rowsol[i]];
402 | 				}
403 | 			}
404 | 		}
405 | 	}
406 | 	else if (return_cost)
407 | 	{
408 | 		for (int i = 0; i < rowsol.size(); i++)
409 | 		{
410 | 			opt += cost_ptr[i][rowsol[i]];
411 | 		}
412 | 	}
413 | 
414 | 	for (int i = 0; i < n; i++)
415 | 	{
416 | 		delete[]cost_ptr[i];
417 | 	}
418 | 	delete[]cost_ptr;
419 | 	delete[]x_c;
420 | 	delete[]y_c;
421 | 
422 | 	return opt;
423 | }
424 | 
425 | Scalar BYTETracker::get_color(int idx)
426 | {
427 | 	idx += 3;
428 | 	return Scalar(37 * idx % 255, 17 * idx % 255, 29 * idx % 255);
429 | }


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include "yolov8_lib.h"
  4 | #include "BYTETracker.h"
  5 | 
  6 | 
  7 | // 需要跟踪的类别，可以根据自己需求调整，筛选自己想要跟踪的对象的种类（以下对应COCO数据集类别索引）
  8 | std::vector<int>  trackClasses {0, 1, 2, 3, 5, 7};  // person, bicycle, car, motorcycle, bus, truck
  9 | 
 10 | bool isTrackingClass(int class_id){
 11 | 	for (auto& c : trackClasses){
 12 | 		if (class_id == c) return true;
 13 | 	}
 14 | 	return false;
 15 | }
 16 | 
 17 | int run(char* videoPath)
 18 | {
 19 |     // read video
 20 |     std::string input_video_path = std::string(videoPath);
 21 |     cv::VideoCapture cap(input_video_path);
 22 |     if ( !cap.isOpened() ) return 0;
 23 | 
 24 |     int img_w = cap.get(CAP_PROP_FRAME_WIDTH);
 25 | 	int img_h = cap.get(CAP_PROP_FRAME_HEIGHT);
 26 |     int fps = cap.get(CAP_PROP_FPS);
 27 |     long nFrame = static_cast<long>(cap.get(CAP_PROP_FRAME_COUNT));
 28 |     cout << "Total frames: " << nFrame << endl;
 29 | 
 30 |     cv::VideoWriter writer("result.mp4", VideoWriter::fourcc('m', 'p', '4', 'v'), fps, Size(img_w, img_h));
 31 | 
 32 |     // YOLOv8 predictor
 33 |     std::string trtFile = "../yolo/engine/yolov8s.engine";
 34 |     YoloDetecter detecter(trtFile);
 35 |     
 36 |     // ByteTrack tracker
 37 |     BYTETracker tracker(fps, 30);
 38 | 
 39 |     cv::Mat img;
 40 |     int num_frames = 0;
 41 |     int total_ms = 0;
 42 |     while (true)
 43 |     {
 44 |         if(!cap.read(img)) break;
 45 |         num_frames ++;
 46 |         if (num_frames % 20 == 0)
 47 |         {
 48 |             cout << "Processing frame " << num_frames << " (" << num_frames * 1000000 / total_ms << " fps)" << endl;
 49 |         }
 50 |         if (img.empty()) break;
 51 | 
 52 |         auto start = std::chrono::system_clock::now();
 53 |         
 54 |         // yolo inference
 55 |         std::vector<DetectResult> res = detecter.inference(img);
 56 | 
 57 |         // yolo output format to bytetrack input format, and filter bbox by class id
 58 |         std::vector<Object> objects;
 59 |         for (long unsigned int j = 0; j < res.size(); j++)
 60 |         {
 61 |             cv::Rect r = res[j].tlwh;
 62 | 		    float conf = (float)res[j].conf;
 63 | 		    int class_id = (int)res[j].class_id;
 64 | 
 65 |             if (isTrackingClass(class_id)){
 66 |                 cv::Rect_<float> rect((float)r.x, (float)r.y, (float)r.width, (float)r.height);
 67 |                 Object obj {rect, class_id, conf};
 68 |                 objects.push_back(obj);
 69 |             }
 70 |         }
 71 | 
 72 |         // track
 73 |         std::vector<STrack> output_stracks = tracker.update(objects);
 74 | 
 75 |         auto end = std::chrono::system_clock::now();
 76 |         total_ms = total_ms + std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
 77 | 
 78 |         for (int i = 0; i < output_stracks.size(); i++)
 79 | 		{
 80 | 			std::vector<float> tlwh = output_stracks[i].tlwh;
 81 | 			// bool vertical = tlwh[2] / tlwh[3] > 1.6;
 82 | 			// if (tlwh[2] * tlwh[3] > 20 && !vertical)
 83 | 			if (tlwh[2] * tlwh[3] > 20)
 84 | 			{
 85 | 				cv::Scalar s = tracker.get_color(output_stracks[i].track_id);
 86 | 				cv::putText(img, cv::format("%d", output_stracks[i].track_id), cv::Point(tlwh[0], tlwh[1] - 5), 
 87 |                         0, 0.6, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
 88 |                 cv::rectangle(img, cv::Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 2);
 89 | 			}
 90 | 		}
 91 |         cv::putText(img, cv::format("frame: %d fps: %d num: %ld", num_frames, num_frames * 1000000 / total_ms, output_stracks.size()), 
 92 |                 cv::Point(0, 30), 0, 0.6, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
 93 |         writer.write(img);
 94 | 
 95 |         // cv::imshow("img", img);
 96 |         int c = cv::waitKey(1);
 97 | 	if (c == 27) break;  // ESC to exit
 98 |     }
 99 | 
100 |     cap.release();
101 |     std::cout << "FPS: " << num_frames * 1000000 / total_ms << std::endl;
102 | 
103 |     return 0;
104 | }
105 | 
106 | 
107 | int main(int argc, char *argv[])
108 | {
109 |     if (argc != 2 )
110 |     {
111 |         std::cerr << "arguments not right!" << std::endl;
112 |         std::cerr << "Usage: ./main [video path]" << std::endl;
113 |         std::cerr << "Example: ./main ./test_videos/demo.mp4" << std::endl;
114 |         return -1;
115 |     }
116 | 
117 |     return run(argv[1]);
118 | }
119 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | project(yolov8)
 4 | 
 5 | add_definitions(-std=c++11)
 6 | add_definitions(-DAPI_EXPORTS)
 7 | set(CMAKE_CXX_STANDARD 11)
 8 | set(CMAKE_BUILD_TYPE Debug)
 9 | 
10 | set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
11 | enable_language(CUDA)
12 | 
13 | include_directories(${PROJECT_SOURCE_DIR}/include)
14 | include_directories(${PROJECT_SOURCE_DIR}/plugin)
15 | 
16 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
17 | # ============= cuda ============
18 | find_package(CUDA REQUIRED)
19 | include_directories(/usr/local/cuda/include)
20 | link_directories(/usr/local/cuda/lib64)
21 | 
22 | # ============= tensorrt ============
23 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
24 |   message("Embed_platform on")
25 |   include_directories(/usr/include/aarch64-linux-gnu)
26 |   link_directories(/usr/lib/aarch64-linux-gnu)
27 | else()
28 |   message("Embed_platform off")
29 |   include_directories(/usr/include/x86_64-linux-gnu)
30 |   link_directories(/usr/lib/x86_64-linux-gnu)
31 | endif()
32 | 
33 | add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu)
34 | target_link_libraries(myplugins nvinfer cudart)
35 | 
36 | find_package(OpenCV)
37 | include_directories(${OpenCV_INCLUDE_DIRS})
38 | 
39 | 
40 | file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
41 | add_executable(yolov8 ${PROJECT_SOURCE_DIR}/main.cpp ${SRCS})
42 | 
43 | target_link_libraries(yolov8 nvinfer)
44 | target_link_libraries(yolov8 cudart)
45 | target_link_libraries(yolov8 myplugins)
46 | target_link_libraries(yolov8 ${OpenCV_LIBS})
47 | 
48 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/README.md:
--------------------------------------------------------------------------------
 1 | # yolov8
 2 | 
 3 | The Pytorch implementation is [ultralytics/yolov8](https://github.com/ultralytics/ultralytics/tree/main/ultralytics).
 4 | 
 5 | The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github.com/xiaocao-tian/yolov8_tensorrt)
 6 | 
 7 | 
 8 | ## Requirements
 9 | 
10 | - TensorRT 8.0+
11 | - OpenCV 3.4.0+
12 | 
13 | ## Different versions of yolov8
14 | 
15 | Currently, we support yolov8 
16 | 
17 | - For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page.
18 | 
19 | ## Config
20 | 
21 | - Choose the model n/s/m/l/x from command line arguments.
22 | - Check more configs in [include/config.h](./include/config.h)
23 | 
24 | ## How to Run, yolov8n as example
25 | 
26 | 1. generate .wts from pytorch with .pt, or download .wts from model zoo
27 | 
28 | ```
29 | // download https://github.com/ultralytics/assets/releases/yolov8n.pt
30 | cp {tensorrtx}/yolov8/gen_wts.py {ultralytics}/ultralytics
31 | cd {ultralytics}/ultralytics
32 | python gen_wts.py
33 | // a file 'yolov8n.wts' will be generated.
34 | ```
35 | 
36 | 2. build tensorrtx/yolov8 and run
37 | 
38 | ```
39 | cd {tensorrtx}/yolov8/
40 | // update kNumClass in config.h if your model is trained on custom dataset
41 | mkdir build
42 | cd build
43 | cp {ultralytics}/ultralytics/yolov8.wts {tensorrtx}/yolov8/build
44 | cmake ..
45 | make
46 | sudo ./yolov8 -s [.wts] [.engine] [n/s/m/l/x]  // serialize model to plan file
47 | sudo ./yolov8 -d [.engine] [image folder]  [c/g] // deserialize and run inference, the images in [image folder] will be processed.
48 | // For example yolov8
49 | sudo ./yolov8 -s yolov8n.wts yolov8.engine n
50 | sudo ./yolov8 -d yolov8n.engine ../images c //cpu postprocess
51 | sudo ./yolov8 -d yolov8n.engine ../images g //gpu postprocess
52 | 
53 | ```
54 | 3. check the images generated, as follows. _zidane.jpg and _bus.jpg
55 | 
56 | 4. optional, load and run the tensorrt model in python
57 | 
58 | ```
59 | // install python-tensorrt, pycuda, etc.
60 | // ensure the yolov8n.engine and libmyplugins.so have been built
61 | python yolov8_trt.py
62 | ```
63 | 
64 | # INT8 Quantization
65 | 
66 | 1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
67 | 
68 | 2. unzip it in yolov8/build
69 | 
70 | 3. set the macro `USE_INT8` in config.h and make
71 | 
72 | 4. serialize the model and test
73 | 
74 | <p align="center">
75 | <img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg" height="360px;">
76 | </p>
77 | 
78 | ## More Information
79 | 
80 | See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
81 | 
82 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/gen_wts.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | import os
 4 | import struct
 5 | import torch
 6 | 
 7 | pt_file = "./weights/yolov8s.pt"
 8 | wts_file = "./weights/yolov8s.wts"
 9 | 
10 | # Initialize
11 | device = 'cpu'
12 | 
13 | # Load model
14 | model = torch.load(pt_file, map_location=device)['model'].float()  # load to FP32
15 | 
16 | anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]
17 | 
18 | delattr(model.model[-1], 'anchors')
19 | 
20 | model.to(device).eval()
21 | 
22 | with open(wts_file, 'w') as f:
23 |     f.write('{}\n'.format(len(model.state_dict().keys())))
24 |     for k, v in model.state_dict().items():
25 |         vr = v.reshape(-1).cpu().numpy()
26 |         f.write('{} {} '.format(k, len(vr)))
27 |         for vv in vr:
28 |             f.write(' ')
29 |             f.write(struct.pack('>f', float(vv)).hex())
30 |         f.write('\n')
31 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/images/10001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emptysoal/TensorRT-YOLOv8-ByteTrack/cec012e0672dd3d1f79a8dc29875be9c56feedb5/tensorrtx-yolov8/images/10001.jpg


--------------------------------------------------------------------------------
/tensorrtx-yolov8/images/10002.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emptysoal/TensorRT-YOLOv8-ByteTrack/cec012e0672dd3d1f79a8dc29875be9c56feedb5/tensorrtx-yolov8/images/10002.jpeg


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/block.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <map>
 3 | #include <vector>
 4 | #include <string>
 5 | #include "NvInfer.h"
 6 | 
 7 | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
 8 | 
 9 | nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
10 | nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname);
11 | 
12 | nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
13 | nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
14 | 
15 | nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
16 | nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname);
17 | 
18 | nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
19 | nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname);
20 | 
21 | nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition *network, std::vector<nvinfer1::IConcatenationLayer*> dets);
22 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/calibrator.h:
--------------------------------------------------------------------------------
 1 | #ifndef ENTROPY_CALIBRATOR_H
 2 | #define ENTROPY_CALIBRATOR_H
 3 | 
 4 | #include <NvInfer.h>
 5 | #include <string>
 6 | #include <vector>
 7 | #include "macros.h"
 8 | 
 9 | //! \class Int8EntropyCalibrator2
10 | //!
11 | //! \brief Implements Entropy calibrator 2.
12 | //!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
13 | //!
14 | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
15 | {
16 | public:
17 |     Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
18 |     virtual ~Int8EntropyCalibrator2();
19 |     int getBatchSize() const TRT_NOEXCEPT override;
20 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override;
21 |     const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override;
22 |     void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override;
23 | 
24 | private:
25 |     int batchsize_;
26 |     int input_w_;
27 |     int input_h_;
28 |     int img_idx_;
29 |     std::string img_dir_;
30 |     std::vector<std::string> img_files_;
31 |     size_t input_count_;
32 |     std::string calib_table_name_;
33 |     const char* input_blob_name_;
34 |     bool read_cache_;
35 |     void* device_input_;
36 |     std::vector<char> calib_cache_;
37 | };
38 | 
39 | #endif // ENTROPY_CALIBRATOR_H
40 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/config.h:
--------------------------------------------------------------------------------
 1 | #define USE_FP16
 2 | //#define USE_INT8
 3 | 
 4 | const static char *kInputTensorName = "images";
 5 | const static char *kOutputTensorName = "output";
 6 | const static int kNumClass = 80;
 7 | const static int kBatchSize = 1;
 8 | const static int kGpuId = 0;
 9 | const static int kInputH = 640;
10 | const static int kInputW = 640;
11 | const static float kNmsThresh = 0.45f;
12 | const static float kConfThresh = 0.5f;
13 | const static int kMaxInputImageSize = 3000 * 3000;
14 | const static int kMaxNumOutputBbox = 1000;
15 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRTX_CUDA_UTILS_H_
 2 | #define TRTX_CUDA_UTILS_H_
 3 | 
 4 | #include <cuda_runtime_api.h>
 5 | 
 6 | #ifndef CUDA_CHECK
 7 | #define CUDA_CHECK(callstr)\
 8 |     {\
 9 |         cudaError_t error_code = callstr;\
10 |         if (error_code != cudaSuccess) {\
11 |             std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
12 |             assert(0);\
13 |         }\
14 |     }
15 | #endif  // CUDA_CHECK
16 | 
17 | #endif  // TRTX_CUDA_UTILS_H_
18 | 
19 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/logging.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef TENSORRT_LOGGING_H
 18 | #define TENSORRT_LOGGING_H
 19 | 
 20 | #include "NvInferRuntimeCommon.h"
 21 | #include <cassert>
 22 | #include <ctime>
 23 | #include <iomanip>
 24 | #include <iostream>
 25 | #include <ostream>
 26 | #include <sstream>
 27 | #include <string>
 28 | #include "macros.h"
 29 | 
 30 | using Severity = nvinfer1::ILogger::Severity;
 31 | 
 32 | class LogStreamConsumerBuffer : public std::stringbuf
 33 | {
 34 | public:
 35 |     LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
 36 |         : mOutput(stream)
 37 |         , mPrefix(prefix)
 38 |         , mShouldLog(shouldLog)
 39 |     {
 40 |     }
 41 | 
 42 |     LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
 43 |         : mOutput(other.mOutput)
 44 |     {
 45 |     }
 46 | 
 47 |     ~LogStreamConsumerBuffer()
 48 |     {
 49 |         // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
 50 |         // std::streambuf::pptr() gives a pointer to the current position of the output sequence
 51 |         // if the pointer to the beginning is not equal to the pointer to the current position,
 52 |         // call putOutput() to log the output to the stream
 53 |         if (pbase() != pptr())
 54 |         {
 55 |             putOutput();
 56 |         }
 57 |     }
 58 | 
 59 |     // synchronizes the stream buffer and returns 0 on success
 60 |     // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
 61 |     // resetting the buffer and flushing the stream
 62 |     virtual int sync()
 63 |     {
 64 |         putOutput();
 65 |         return 0;
 66 |     }
 67 | 
 68 |     void putOutput()
 69 |     {
 70 |         if (mShouldLog)
 71 |         {
 72 |             // prepend timestamp
 73 |             std::time_t timestamp = std::time(nullptr);
 74 |             tm* tm_local = std::localtime(&timestamp);
 75 |             std::cout << "[";
 76 |             std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
 77 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
 78 |             std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
 79 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
 80 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
 81 |             std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
 82 |             // std::stringbuf::str() gets the string contents of the buffer
 83 |             // insert the buffer contents pre-appended by the appropriate prefix into the stream
 84 |             mOutput << mPrefix << str();
 85 |             // set the buffer to empty
 86 |             str("");
 87 |             // flush the stream
 88 |             mOutput.flush();
 89 |         }
 90 |     }
 91 | 
 92 |     void setShouldLog(bool shouldLog)
 93 |     {
 94 |         mShouldLog = shouldLog;
 95 |     }
 96 | 
 97 | private:
 98 |     std::ostream& mOutput;
 99 |     std::string mPrefix;
100 |     bool mShouldLog;
101 | };
102 | 
103 | //!
104 | //! \class LogStreamConsumerBase
105 | //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
106 | //!
107 | class LogStreamConsumerBase
108 | {
109 | public:
110 |     LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
111 |         : mBuffer(stream, prefix, shouldLog)
112 |     {
113 |     }
114 | 
115 | protected:
116 |     LogStreamConsumerBuffer mBuffer;
117 | };
118 | 
119 | //!
120 | //! \class LogStreamConsumer
121 | //! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
122 | //!  Order of base classes is LogStreamConsumerBase and then std::ostream.
123 | //!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
124 | //!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
125 | //!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
126 | //!  Please do not change the order of the parent classes.
127 | //!
128 | class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
129 | {
130 | public:
131 |     //! \brief Creates a LogStreamConsumer which logs messages with level severity.
132 |     //!  Reportable severity determines if the messages are severe enough to be logged.
133 |     LogStreamConsumer(Severity reportableSeverity, Severity severity)
134 |         : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
135 |         , std::ostream(&mBuffer) // links the stream buffer with the stream
136 |         , mShouldLog(severity <= reportableSeverity)
137 |         , mSeverity(severity)
138 |     {
139 |     }
140 | 
141 |     LogStreamConsumer(LogStreamConsumer&& other)
142 |         : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
143 |         , std::ostream(&mBuffer) // links the stream buffer with the stream
144 |         , mShouldLog(other.mShouldLog)
145 |         , mSeverity(other.mSeverity)
146 |     {
147 |     }
148 | 
149 |     void setReportableSeverity(Severity reportableSeverity)
150 |     {
151 |         mShouldLog = mSeverity <= reportableSeverity;
152 |         mBuffer.setShouldLog(mShouldLog);
153 |     }
154 | 
155 | private:
156 |     static std::ostream& severityOstream(Severity severity)
157 |     {
158 |         return severity >= Severity::kINFO ? std::cout : std::cerr;
159 |     }
160 | 
161 |     static std::string severityPrefix(Severity severity)
162 |     {
163 |         switch (severity)
164 |         {
165 |         case Severity::kINTERNAL_ERROR: return "[F] ";
166 |         case Severity::kERROR: return "[E] ";
167 |         case Severity::kWARNING: return "[W] ";
168 |         case Severity::kINFO: return "[I] ";
169 |         case Severity::kVERBOSE: return "[V] ";
170 |         default: assert(0); return "";
171 |         }
172 |     }
173 | 
174 |     bool mShouldLog;
175 |     Severity mSeverity;
176 | };
177 | 
178 | //! \class Logger
179 | //!
180 | //! \brief Class which manages logging of TensorRT tools and samples
181 | //!
182 | //! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
183 | //! and supports logging two types of messages:
184 | //!
185 | //! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
186 | //! - Test pass/fail messages
187 | //!
188 | //! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
189 | //! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
190 | //!
191 | //! In the future, this class could be extended to support dumping test results to a file in some standard format
192 | //! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
193 | //!
194 | //! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
195 | //! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
196 | //! library and messages coming from the sample.
197 | //!
198 | //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
199 | //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
200 | //! object.
201 | 
202 | class Logger : public nvinfer1::ILogger
203 | {
204 | public:
205 |     Logger(Severity severity = Severity::kWARNING)
206 |         : mReportableSeverity(severity)
207 |     {
208 |     }
209 | 
210 |     //!
211 |     //! \enum TestResult
212 |     //! \brief Represents the state of a given test
213 |     //!
214 |     enum class TestResult
215 |     {
216 |         kRUNNING, //!< The test is running
217 |         kPASSED,  //!< The test passed
218 |         kFAILED,  //!< The test failed
219 |         kWAIVED   //!< The test was waived
220 |     };
221 | 
222 |     //!
223 |     //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
224 |     //! \return The nvinfer1::ILogger associated with this Logger
225 |     //!
226 |     //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
227 |     //! we can eliminate the inheritance of Logger from ILogger
228 |     //!
229 |     nvinfer1::ILogger& getTRTLogger()
230 |     {
231 |         return *this;
232 |     }
233 | 
234 |     //!
235 |     //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
236 |     //!
237 |     //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
238 |     //! inheritance from nvinfer1::ILogger
239 |     //!
240 |     void log(Severity severity, const char* msg) TRT_NOEXCEPT override 
241 |     {
242 |         LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
243 |     }
244 | 
245 |     //!
246 |     //! \brief Method for controlling the verbosity of logging output
247 |     //!
248 |     //! \param severity The logger will only emit messages that have severity of this level or higher.
249 |     //!
250 |     void setReportableSeverity(Severity severity)
251 |     {
252 |         mReportableSeverity = severity;
253 |     }
254 | 
255 |     //!
256 |     //! \brief Opaque handle that holds logging information for a particular test
257 |     //!
258 |     //! This object is an opaque handle to information used by the Logger to print test results.
259 |     //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
260 |     //! with Logger::reportTest{Start,End}().
261 |     //!
262 |     class TestAtom
263 |     {
264 |     public:
265 |         TestAtom(TestAtom&&) = default;
266 | 
267 |     private:
268 |         friend class Logger;
269 | 
270 |         TestAtom(bool started, const std::string& name, const std::string& cmdline)
271 |             : mStarted(started)
272 |             , mName(name)
273 |             , mCmdline(cmdline)
274 |         {
275 |         }
276 | 
277 |         bool mStarted;
278 |         std::string mName;
279 |         std::string mCmdline;
280 |     };
281 | 
282 |     //!
283 |     //! \brief Define a test for logging
284 |     //!
285 |     //! \param[in] name The name of the test.  This should be a string starting with
286 |     //!                  "TensorRT" and containing dot-separated strings containing
287 |     //!                  the characters [A-Za-z0-9_].
288 |     //!                  For example, "TensorRT.sample_googlenet"
289 |     //! \param[in] cmdline The command line used to reproduce the test
290 |     //
291 |     //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
292 |     //!
293 |     static TestAtom defineTest(const std::string& name, const std::string& cmdline)
294 |     {
295 |         return TestAtom(false, name, cmdline);
296 |     }
297 | 
298 |     //!
299 |     //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
300 |     //!        as input
301 |     //!
302 |     //! \param[in] name The name of the test
303 |     //! \param[in] argc The number of command-line arguments
304 |     //! \param[in] argv The array of command-line arguments (given as C strings)
305 |     //!
306 |     //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
307 |     static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
308 |     {
309 |         auto cmdline = genCmdlineString(argc, argv);
310 |         return defineTest(name, cmdline);
311 |     }
312 | 
313 |     //!
314 |     //! \brief Report that a test has started.
315 |     //!
316 |     //! \pre reportTestStart() has not been called yet for the given testAtom
317 |     //!
318 |     //! \param[in] testAtom The handle to the test that has started
319 |     //!
320 |     static void reportTestStart(TestAtom& testAtom)
321 |     {
322 |         reportTestResult(testAtom, TestResult::kRUNNING);
323 |         assert(!testAtom.mStarted);
324 |         testAtom.mStarted = true;
325 |     }
326 | 
327 |     //!
328 |     //! \brief Report that a test has ended.
329 |     //!
330 |     //! \pre reportTestStart() has been called for the given testAtom
331 |     //!
332 |     //! \param[in] testAtom The handle to the test that has ended
333 |     //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
334 |     //!                   TestResult::kFAILED, TestResult::kWAIVED
335 |     //!
336 |     static void reportTestEnd(const TestAtom& testAtom, TestResult result)
337 |     {
338 |         assert(result != TestResult::kRUNNING);
339 |         assert(testAtom.mStarted);
340 |         reportTestResult(testAtom, result);
341 |     }
342 | 
343 |     static int reportPass(const TestAtom& testAtom)
344 |     {
345 |         reportTestEnd(testAtom, TestResult::kPASSED);
346 |         return EXIT_SUCCESS;
347 |     }
348 | 
349 |     static int reportFail(const TestAtom& testAtom)
350 |     {
351 |         reportTestEnd(testAtom, TestResult::kFAILED);
352 |         return EXIT_FAILURE;
353 |     }
354 | 
355 |     static int reportWaive(const TestAtom& testAtom)
356 |     {
357 |         reportTestEnd(testAtom, TestResult::kWAIVED);
358 |         return EXIT_SUCCESS;
359 |     }
360 | 
361 |     static int reportTest(const TestAtom& testAtom, bool pass)
362 |     {
363 |         return pass ? reportPass(testAtom) : reportFail(testAtom);
364 |     }
365 | 
366 |     Severity getReportableSeverity() const
367 |     {
368 |         return mReportableSeverity;
369 |     }
370 | 
371 | private:
372 |     //!
373 |     //! \brief returns an appropriate string for prefixing a log message with the given severity
374 |     //!
375 |     static const char* severityPrefix(Severity severity)
376 |     {
377 |         switch (severity)
378 |         {
379 |         case Severity::kINTERNAL_ERROR: return "[F] ";
380 |         case Severity::kERROR: return "[E] ";
381 |         case Severity::kWARNING: return "[W] ";
382 |         case Severity::kINFO: return "[I] ";
383 |         case Severity::kVERBOSE: return "[V] ";
384 |         default: assert(0); return "";
385 |         }
386 |     }
387 | 
388 |     //!
389 |     //! \brief returns an appropriate string for prefixing a test result message with the given result
390 |     //!
391 |     static const char* testResultString(TestResult result)
392 |     {
393 |         switch (result)
394 |         {
395 |         case TestResult::kRUNNING: return "RUNNING";
396 |         case TestResult::kPASSED: return "PASSED";
397 |         case TestResult::kFAILED: return "FAILED";
398 |         case TestResult::kWAIVED: return "WAIVED";
399 |         default: assert(0); return "";
400 |         }
401 |     }
402 | 
403 |     //!
404 |     //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
405 |     //!
406 |     static std::ostream& severityOstream(Severity severity)
407 |     {
408 |         return severity >= Severity::kINFO ? std::cout : std::cerr;
409 |     }
410 | 
411 |     //!
412 |     //! \brief method that implements logging test results
413 |     //!
414 |     static void reportTestResult(const TestAtom& testAtom, TestResult result)
415 |     {
416 |         severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
417 |                                          << testAtom.mCmdline << std::endl;
418 |     }
419 | 
420 |     //!
421 |     //! \brief generate a command line string from the given (argc, argv) values
422 |     //!
423 |     static std::string genCmdlineString(int argc, char const* const* argv)
424 |     {
425 |         std::stringstream ss;
426 |         for (int i = 0; i < argc; i++)
427 |         {
428 |             if (i > 0)
429 |                 ss << " ";
430 |             ss << argv[i];
431 |         }
432 |         return ss.str();
433 |     }
434 | 
435 |     Severity mReportableSeverity;
436 | };
437 | 
438 | namespace
439 | {
440 | 
441 | //!
442 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
443 | //!
444 | //! Example usage:
445 | //!
446 | //!     LOG_VERBOSE(logger) << "hello world" << std::endl;
447 | //!
448 | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
449 | {
450 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
451 | }
452 | 
453 | //!
454 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
455 | //!
456 | //! Example usage:
457 | //!
458 | //!     LOG_INFO(logger) << "hello world" << std::endl;
459 | //!
460 | inline LogStreamConsumer LOG_INFO(const Logger& logger)
461 | {
462 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
463 | }
464 | 
465 | //!
466 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
467 | //!
468 | //! Example usage:
469 | //!
470 | //!     LOG_WARN(logger) << "hello world" << std::endl;
471 | //!
472 | inline LogStreamConsumer LOG_WARN(const Logger& logger)
473 | {
474 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
475 | }
476 | 
477 | //!
478 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
479 | //!
480 | //! Example usage:
481 | //!
482 | //!     LOG_ERROR(logger) << "hello world" << std::endl;
483 | //!
484 | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
485 | {
486 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
487 | }
488 | 
489 | //!
490 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
491 | //         ("fatal" severity)
492 | //!
493 | //! Example usage:
494 | //!
495 | //!     LOG_FATAL(logger) << "hello world" << std::endl;
496 | //!
497 | inline LogStreamConsumer LOG_FATAL(const Logger& logger)
498 | {
499 |     return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
500 | }
501 | 
502 | } // anonymous namespace
503 | 
504 | #endif // TENSORRT_LOGGING_H
505 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/macros.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MACROS_H
 2 | #define __MACROS_H
 3 | 
 4 | #include "NvInfer.h"
 5 | 
 6 | #ifdef API_EXPORTS
 7 | #if defined(_MSC_VER)
 8 | #define API __declspec(dllexport)
 9 | #else
10 | #define API __attribute__((visibility("default")))
11 | #endif
12 | #else
13 | 
14 | #if defined(_MSC_VER)
15 | #define API __declspec(dllimport)
16 | #else
17 | #define API
18 | #endif
19 | #endif  // API_EXPORTS
20 | 
21 | #if NV_TENSORRT_MAJOR >= 8
22 | #define TRT_NOEXCEPT noexcept
23 | #define TRT_CONST_ENQUEUE const
24 | #else
25 | #define TRT_NOEXCEPT
26 | #define TRT_CONST_ENQUEUE
27 | #endif
28 | 
29 | #endif  // __MACROS_H
30 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/model.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "NvInfer.h"
 3 | #include <string>
 4 | #include <assert.h>
 5 | 
 6 | nvinfer1::IHostMemory* buildEngineYolov8n(nvinfer1::IBuilder* builder,
 7 | nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path);
 8 | 
 9 | nvinfer1::IHostMemory* buildEngineYolov8s(nvinfer1::IBuilder* builder,
10 | nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path);
11 | 
12 | nvinfer1::IHostMemory* buildEngineYolov8m(nvinfer1::IBuilder* builder,
13 | nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path);
14 | 
15 | nvinfer1::IHostMemory* buildEngineYolov8l(nvinfer1::IBuilder* builder,
16 | nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path);
17 | 
18 | nvinfer1::IHostMemory* buildEngineYolov8x(nvinfer1::IBuilder* builder,
19 | nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path);
20 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/postprocess.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "types.h"
 4 | #include "NvInfer.h"
 5 | #include <opencv2/opencv.hpp>
 6 | 
 7 | cv::Rect get_rect(cv::Mat& img, float bbox[4]);
 8 | 
 9 | void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh = 0.5);
10 | 
11 | void batch_nms(std::vector<std::vector<Detection>>& batch_res, float *output, int batch_size, int output_size, float conf_thresh, float nms_thresh = 0.5);
12 | 
13 | void draw_bbox(std::vector<cv::Mat> &img_batch, std::vector<std::vector<Detection>> &res_batch);
14 | 
15 | void batch_process(std::vector<std::vector<Detection>> &res_batch, const float* decode_ptr_host, int batch_size, int bbox_element, const std::vector<cv::Mat>& img_batch);
16 | 
17 | void process_decode_ptr_host(std::vector<Detection> &res, const float* decode_ptr_host, int bbox_element, cv::Mat& img, int count);
18 | 
19 | void cuda_decode(float* predict, int num_bboxes, float confidence_threshold,float* parray,int max_objects, cudaStream_t stream);
20 | 
21 | void cuda_nms(float* parray, float nms_threshold, int max_objects, cudaStream_t stream);
22 | 
23 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/preprocess.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <opencv2/opencv.hpp>
 4 | #include "NvInfer.h"
 5 | #include "types.h"
 6 | #include <map>
 7 | 
 8 | 
 9 | void cuda_preprocess_init(int max_image_size);
10 | 
11 | void cuda_preprocess_destroy();
12 | 
13 | void cuda_preprocess(uint8_t *src, int src_width, int src_height, float *dst, int dst_width, int dst_height, cudaStream_t stream);
14 | 
15 | void cuda_batch_preprocess(std::vector<cv::Mat> &img_batch, float *dst, int dst_width, int dst_height, cudaStream_t stream);
16 | 
17 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/types.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "config.h"
 3 | 
 4 | struct alignas(float) Detection {
 5 |   //center_x center_y w h
 6 |   float bbox[4];
 7 |   float conf;  // bbox_conf * cls_conf
 8 |   float class_id;
 9 | };
10 | 
11 | struct AffineMatrix {
12 |     float value[6];
13 | };
14 | 
15 | const int bbox_element = sizeof(AffineMatrix) / sizeof(float)+1;      // left, top, right, bottom, confidence, class, keepflag
16 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/include/utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <opencv2/opencv.hpp>
 3 | #include <dirent.h>
 4 | 
 5 | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
 6 |     int w, h, x, y;
 7 |     float r_w = input_w / (img.cols*1.0);
 8 |     float r_h = input_h / (img.rows*1.0);
 9 |     if (r_h > r_w) {
10 |         w = input_w;
11 |         h = r_w * img.rows;
12 |         x = 0;
13 |         y = (input_h - h) / 2;
14 |     } else {
15 |         w = r_h * img.cols;
16 |         h = input_h;
17 |         x = (input_w - w) / 2;
18 |         y = 0;
19 |     }
20 |     cv::Mat re(h, w, CV_8UC3);
21 |     cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
22 |     cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
23 |     re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
24 |     return out;
25 | }
26 | 
27 | static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
28 |     DIR *p_dir = opendir(p_dir_name);
29 |     if (p_dir == nullptr) {
30 |         return -1;
31 |     }
32 | 
33 |     struct dirent* p_file = nullptr;
34 |     while ((p_file = readdir(p_dir)) != nullptr) {
35 |         if (strcmp(p_file->d_name, ".") != 0 &&
36 |             strcmp(p_file->d_name, "..") != 0) {
37 |             //std::string cur_file_name(p_dir_name);
38 |             //cur_file_name += "/";
39 |             //cur_file_name += p_file->d_name;
40 |             std::string cur_file_name(p_file->d_name);
41 |             file_names.push_back(cur_file_name);
42 |         }
43 |     }
44 | 
45 |     closedir(p_dir);
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/main.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <iostream>
  3 | #include <fstream>
  4 | #include <opencv2/opencv.hpp>
  5 | #include "model.h"
  6 | #include "utils.h"
  7 | #include "preprocess.h"
  8 | #include "postprocess.h"
  9 | #include "cuda_utils.h"
 10 | #include "logging.h"
 11 | 
 12 | Logger gLogger;
 13 | using namespace nvinfer1;
 14 | const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1;
 15 | 
 16 | void serialize_engine(std::string &wts_name, std::string &engine_name, std::string &sub_type) {
 17 |     IBuilder *builder = createInferBuilder(gLogger);
 18 |     IBuilderConfig *config = builder->createBuilderConfig();
 19 |     IHostMemory *serialized_engine = nullptr;
 20 | 
 21 |     if (sub_type == "n") {
 22 |         serialized_engine = buildEngineYolov8n(builder, config, DataType::kFLOAT, wts_name);
 23 |     } else if (sub_type == "s") {
 24 |         serialized_engine = buildEngineYolov8s(builder, config, DataType::kFLOAT, wts_name);
 25 |     } else if (sub_type == "m") {
 26 |         serialized_engine = buildEngineYolov8m(builder, config, DataType::kFLOAT, wts_name);
 27 |     } else if (sub_type == "l") {
 28 |         serialized_engine = buildEngineYolov8l(builder, config, DataType::kFLOAT, wts_name);
 29 |     } else if (sub_type == "x") {
 30 |         serialized_engine = buildEngineYolov8x(builder, config, DataType::kFLOAT, wts_name);
 31 |     }
 32 | 
 33 |     assert(serialized_engine);
 34 |     std::ofstream p(engine_name, std::ios::binary);
 35 |     if (!p) {
 36 |         std::cout << "could not open plan output file" << std::endl;
 37 |         assert(false);
 38 |     }
 39 |     p.write(reinterpret_cast<const char *>(serialized_engine->data()), serialized_engine->size());
 40 | 
 41 |     delete builder;
 42 |     delete config;
 43 |     delete serialized_engine;
 44 | }
 45 | 
 46 | 
 47 | void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **engine, IExecutionContext **context) {
 48 |     std::ifstream file(engine_name, std::ios::binary);
 49 |     if (!file.good()) {
 50 |         std::cerr << "read " << engine_name << " error!" << std::endl;
 51 |         assert(false);
 52 |     }
 53 |     size_t size = 0;
 54 |     file.seekg(0, file.end);
 55 |     size = file.tellg();
 56 |     file.seekg(0, file.beg);
 57 |     char *serialized_engine = new char[size];
 58 |     assert(serialized_engine);
 59 |     file.read(serialized_engine, size);
 60 |     file.close();
 61 | 
 62 |     *runtime = createInferRuntime(gLogger);
 63 |     assert(*runtime);
 64 |     *engine = (*runtime)->deserializeCudaEngine(serialized_engine, size);
 65 |     assert(*engine);
 66 |     *context = (*engine)->createExecutionContext();
 67 |     assert(*context);
 68 |     delete[] serialized_engine;
 69 | }
 70 | 
 71 | void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **output_buffer_device,
 72 |                     float **output_buffer_host, float **decode_ptr_host, float **decode_ptr_device, std::string cuda_post_process) {
 73 |     assert(engine->getNbBindings() == 2);
 74 |     // In order to bind the buffers, we need to know the names of the input and output tensors.
 75 |     // Note that indices are guaranteed to be less than IEngine::getNbBindings()
 76 |     const int inputIndex = engine->getBindingIndex(kInputTensorName);
 77 |     const int outputIndex = engine->getBindingIndex(kOutputTensorName);
 78 |     assert(inputIndex == 0);
 79 |     assert(outputIndex == 1);
 80 |     // Create GPU buffers on device
 81 |     CUDA_CHECK(cudaMalloc((void **) input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float)));
 82 |     CUDA_CHECK(cudaMalloc((void **) output_buffer_device, kBatchSize * kOutputSize * sizeof(float)));
 83 |     if (cuda_post_process == "c") {
 84 |         *output_buffer_host = new float[kBatchSize * kOutputSize];
 85 |     } else if (cuda_post_process == "g") {
 86 |         if (kBatchSize > 1) {
 87 |             std::cerr << "Do not yet support GPU post processing for multiple batches" << std::endl;
 88 |             exit(0);
 89 |         }
 90 |         // Allocate memory for decode_ptr_host and copy to device
 91 |         *decode_ptr_host = new float[1 + kMaxNumOutputBbox * bbox_element];
 92 |         CUDA_CHECK(cudaMalloc((void **)decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element)));
 93 |     }
 94 | }
 95 | 
 96 | void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, float *output, int batchsize, float* decode_ptr_host, float* decode_ptr_device, int model_bboxes, std::string cuda_post_process) {
 97 |     // infer on the batch asynchronously, and DMA output back to host
 98 |     // auto start = std::chrono::system_clock::now();
 99 |     context.enqueue(batchsize, buffers, stream, nullptr);
100 |     if (cuda_post_process == "c") {
101 |         CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchsize * kOutputSize * sizeof(float), cudaMemcpyDeviceToHost,stream));
102 |         // auto end = std::chrono::system_clock::now();
103 |         // std::cout << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
104 |     } else if (cuda_post_process == "g") {
105 |         CUDA_CHECK(cudaMemsetAsync(decode_ptr_device, 0, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), stream));
106 |         cuda_decode((float *)buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream);
107 |         cuda_nms(decode_ptr_device, kNmsThresh, kMaxNumOutputBbox, stream);//cuda nms
108 |         CUDA_CHECK(cudaMemcpyAsync(decode_ptr_host, decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), cudaMemcpyDeviceToHost, stream));
109 |         // auto end = std::chrono::system_clock::now();
110 |         // std::cout << "inference and gpu postprocess time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
111 |     }
112 | 
113 |     CUDA_CHECK(cudaStreamSynchronize(stream));
114 | }
115 | 
116 | 
117 | bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, std::string &img_dir, std::string &sub_type, std::string &cuda_post_process) {
118 |     if (argc < 4) return false;
119 |     if (std::string(argv[1]) == "-s" && argc == 5) {
120 |         wts = std::string(argv[2]);
121 |         engine = std::string(argv[3]);
122 |         sub_type = std::string(argv[4]);
123 |     } else if (std::string(argv[1]) == "-d" && argc == 5) {
124 |         engine = std::string(argv[2]);
125 |         img_dir = std::string(argv[3]);
126 |         cuda_post_process = std::string(argv[4]);
127 |     } else {
128 |         return false;
129 |     }
130 |     return true;
131 | }
132 | 
133 | int main(int argc, char **argv) {
134 |     cudaSetDevice(kGpuId);
135 |     std::string wts_name = "";
136 |     std::string engine_name = "";
137 |     std::string img_dir;
138 |     std::string sub_type = "";
139 |     std::string cuda_post_process="";
140 |     int model_bboxes;
141 | 
142 |     if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type, cuda_post_process)) {
143 |         std::cerr << "Arguments not right!" << std::endl;
144 |         std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x]  // serialize model to plan file" << std::endl;
145 |         std::cerr << "./yolov8 -d [.engine] ../samples  [c/g]// deserialize plan file and run inference" << std::endl;
146 |         return -1;
147 |     }
148 | 
149 |     // Create a model using the API directly and serialize it to a file
150 |     if (!wts_name.empty()) {
151 |         serialize_engine(wts_name, engine_name, sub_type);
152 |         return 0;
153 |     }
154 | 
155 |     // Deserialize the engine from file
156 |     IRuntime *runtime = nullptr;
157 |     ICudaEngine *engine = nullptr;
158 |     IExecutionContext *context = nullptr;
159 |     deserialize_engine(engine_name, &runtime, &engine, &context);
160 |     cudaStream_t stream;
161 |     CUDA_CHECK(cudaStreamCreate(&stream));
162 |     cuda_preprocess_init(kMaxInputImageSize);
163 |     auto out_dims = engine->getBindingDimensions(1);
164 |     model_bboxes = out_dims.d[0];
165 |     // Prepare cpu and gpu buffers
166 |     float *device_buffers[2];
167 |     float *output_buffer_host = nullptr;
168 |     float *decode_ptr_host=nullptr;
169 |     float *decode_ptr_device=nullptr;
170 | 
171 |     // Read images from directory
172 |     std::vector<std::string> file_names;
173 |     if (read_files_in_dir(img_dir.c_str(), file_names) < 0) {
174 |         std::cerr << "read_files_in_dir failed." << std::endl;
175 |         return -1;
176 |     }
177 | 
178 |     prepare_buffer(engine, &device_buffers[0], &device_buffers[1], &output_buffer_host, &decode_ptr_host, &decode_ptr_device, cuda_post_process);
179 | 
180 |     // batch predict
181 |     for (size_t i = 0; i < file_names.size(); i += kBatchSize) {
182 |         // Get a batch of images
183 |         std::vector<cv::Mat> img_batch;
184 |         std::vector<std::string> img_name_batch;
185 |         for (size_t j = i; j < i + kBatchSize && j < file_names.size(); j++) {
186 |             cv::Mat img = cv::imread(img_dir + "/" + file_names[j]);
187 |             img_batch.push_back(img);
188 |             img_name_batch.push_back(file_names[j]);
189 |         }
190 |         auto start = std::chrono::system_clock::now();
191 |         // Preprocess
192 |         cuda_batch_preprocess(img_batch, device_buffers[0], kInputW, kInputH, stream);
193 |         // Run inference
194 |         infer(*context, stream, (void **)device_buffers, output_buffer_host, kBatchSize, decode_ptr_host, decode_ptr_device, model_bboxes, cuda_post_process);
195 |         std::vector<std::vector<Detection>> res_batch;
196 |         if (cuda_post_process == "c") {
197 |             // NMS
198 |             batch_nms(res_batch, output_buffer_host, img_batch.size(), kOutputSize, kConfThresh, kNmsThresh);
199 |         } else if (cuda_post_process == "g") {
200 |             //Process gpu decode and nms results
201 |             batch_process(res_batch, decode_ptr_host, img_batch.size(), bbox_element, img_batch);
202 |         }
203 |         auto end = std::chrono::system_clock::now();
204 |         std::cout << "inference and gpu postprocess time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
205 |         // Draw bounding boxes
206 |         draw_bbox(img_batch, res_batch);
207 |         // Save images
208 |         for (size_t j = 0; j < img_batch.size(); j++) {
209 |             cv::imwrite("_" + img_name_batch[j], img_batch[j]);
210 |         }
211 |     }
212 | 
213 |     // Release stream and buffers
214 |     cudaStreamDestroy(stream);
215 |     CUDA_CHECK(cudaFree(device_buffers[0]));
216 |     CUDA_CHECK(cudaFree(device_buffers[1]));
217 |     CUDA_CHECK(cudaFree(decode_ptr_device));
218 |     delete[] decode_ptr_host;
219 |     delete[] output_buffer_host;
220 |     cuda_preprocess_destroy();
221 |     // Destroy the engine
222 |     delete context;
223 |     delete engine;
224 |     delete runtime;
225 | 
226 |     // Print histogram of the output distribution
227 |     //std::cout << "\nOutput:\n\n";
228 |     //for (unsigned int i = 0; i < kOutputSize; i++)
229 |     //{
230 |     //    std::cout << prob[i] << ", ";
231 |     //    if (i % 10 == 0) std::cout << std::endl;
232 |     //}
233 |     //std::cout << std::endl;
234 | 
235 |     return 0;
236 | }
237 | 
238 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/plugin/yololayer.cu:
--------------------------------------------------------------------------------
  1 | #include "yololayer.h"
  2 | #include "types.h"
  3 | #include <assert.h>
  4 | #include <math.h>
  5 | #include "cuda_utils.h"
  6 | #include <vector>
  7 | #include <iostream>
  8 | 
  9 | namespace Tn {
 10 |     template<typename T>
 11 |     void write(char*& buffer, const T& val) {
 12 |         *reinterpret_cast<T*>(buffer) = val;
 13 |         buffer += sizeof(T);
 14 |     }
 15 | 
 16 |     template<typename T>
 17 |     void read(const char*& buffer, T& val) {
 18 |         val = *reinterpret_cast<const T*>(buffer);
 19 |         buffer += sizeof(T);
 20 |     }
 21 | }  // namespace Tn
 22 | 
 23 | 
 24 | namespace nvinfer1 {
 25 | YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut) {
 26 |     mClassCount = classCount;
 27 |     mYoloV8NetWidth = netWidth;
 28 |     mYoloV8netHeight = netHeight;
 29 |     mMaxOutObject = maxOut;
 30 | }
 31 | 
 32 | YoloLayerPlugin::~YoloLayerPlugin() {}
 33 | 
 34 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) {
 35 |     using namespace Tn;
 36 |     const char* d = reinterpret_cast<const char*>(data), * a = d;
 37 |     read(d, mClassCount);
 38 |     read(d, mThreadCount);
 39 |     read(d, mYoloV8NetWidth);
 40 |     read(d, mYoloV8netHeight);
 41 |     read(d, mMaxOutObject);
 42 | 
 43 |     assert(d == a + length);
 44 | }
 45 | 
 46 | void YoloLayerPlugin::serialize(void* buffer) const TRT_NOEXCEPT {
 47 | 
 48 |     using namespace Tn;
 49 |     char* d = static_cast<char*>(buffer), * a = d;
 50 |     write(d, mClassCount);
 51 |     write(d, mThreadCount);
 52 |     write(d, mYoloV8NetWidth);
 53 |     write(d, mYoloV8netHeight);
 54 |     write(d, mMaxOutObject);
 55 | 
 56 |     assert(d == a + getSerializationSize());
 57 | }
 58 | 
 59 | size_t YoloLayerPlugin::getSerializationSize() const TRT_NOEXCEPT {
 60 |     return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mYoloV8netHeight) + sizeof(mYoloV8NetWidth) + sizeof(mMaxOutObject);
 61 | }
 62 | 
 63 | int YoloLayerPlugin::initialize() TRT_NOEXCEPT {
 64 |     return 0;
 65 | }
 66 | 
 67 | nvinfer1::Dims YoloLayerPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT {
 68 |     int total_size = mMaxOutObject * sizeof(Detection) / sizeof(float);
 69 |     return nvinfer1::Dims3(total_size + 1, 1, 1);
 70 | }
 71 | 
 72 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT {
 73 |     mPluginNamespace = pluginNamespace;
 74 | }
 75 | 
 76 | const char* YoloLayerPlugin::getPluginNamespace() const TRT_NOEXCEPT {
 77 |     return mPluginNamespace;
 78 | }
 79 | 
 80 | nvinfer1::DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT {
 81 |     return nvinfer1::DataType::kFLOAT;
 82 | }
 83 | 
 84 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT {
 85 | 
 86 |     return false;
 87 | }
 88 | 
 89 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT {
 90 | 
 91 |     return false;
 92 | }
 93 | 
 94 | void YoloLayerPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in, int nbInput, nvinfer1::PluginTensorDesc const* out, int nbOutput) TRT_NOEXCEPT {};
 95 | 
 96 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {};
 97 | 
 98 | void YoloLayerPlugin::detachFromContext() TRT_NOEXCEPT {}
 99 | 
100 | const char* YoloLayerPlugin::getPluginType() const TRT_NOEXCEPT {
101 | 
102 |     return "YoloLayer_TRT";
103 | }
104 | 
105 | const char* YoloLayerPlugin::getPluginVersion() const TRT_NOEXCEPT {
106 |     return "1";
107 | }
108 | 
109 | void YoloLayerPlugin::destroy() TRT_NOEXCEPT {
110 | 
111 |     delete this;
112 | }
113 | 
114 | nvinfer1::IPluginV2IOExt* YoloLayerPlugin::clone() const TRT_NOEXCEPT {
115 | 
116 |     YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV8NetWidth, mYoloV8netHeight, mMaxOutObject);
117 |     p->setPluginNamespace(mPluginNamespace);
118 |     return p;
119 | }
120 | 
121 | int YoloLayerPlugin::enqueue(int batchSize, const void* TRT_CONST_ENQUEUE* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT {
122 | 
123 |     forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, mYoloV8netHeight, mYoloV8NetWidth, batchSize);
124 |     return 0;
125 | }
126 | 
127 | 
128 | __device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };
129 | 
130 | __global__ void CalDetection(const float* input, float* output, int numElements, int maxoutobject,
131 |                              const int grid_h, int grid_w, const int stride, int classes, int outputElem) {
132 |     int idx = threadIdx.x + blockDim.x * blockIdx.x;
133 |     if (idx >= numElements) return;
134 | 
135 |     int total_grid = grid_h * grid_w;
136 |     int info_len = 4 + classes;
137 |     int batchIdx = idx / total_grid;
138 |     int elemIdx = idx % total_grid;
139 |     const float* curInput = input + batchIdx * total_grid * info_len;
140 |     int outputIdx = batchIdx * outputElem;
141 | 
142 |     int class_id = 0;
143 |     float max_cls_prob = 0.0;
144 |     for (int i = 4; i < info_len; i++) {
145 |         float p = Logist(curInput[elemIdx + i * total_grid]);
146 |         if (p > max_cls_prob) {
147 |             max_cls_prob = p;
148 |             class_id = i - 4;
149 |         }
150 |     }
151 | 
152 |     if (max_cls_prob < 0.1) return;
153 | 
154 |     int count = (int)atomicAdd(output + outputIdx, 1);
155 |     if (count >= maxoutobject) return;
156 |     char* data = (char*)(output + outputIdx) + sizeof(float) + count * sizeof(Detection);
157 |     Detection* det = (Detection*)(data);
158 | 
159 |     int row = elemIdx / grid_w;
160 |     int col = elemIdx % grid_w;
161 | 
162 |     det->conf = max_cls_prob;
163 |     det->class_id = class_id;
164 |     det->bbox[0] = (col + 0.5f - curInput[elemIdx + 0 * total_grid]) * stride;
165 |     det->bbox[1] = (row + 0.5f - curInput[elemIdx + 1 * total_grid]) * stride;
166 |     det->bbox[2] = (col + 0.5f + curInput[elemIdx + 2 * total_grid]) * stride;
167 |     det->bbox[3] = (row + 0.5f + curInput[elemIdx + 3 * total_grid]) * stride;
168 | }
169 | 
170 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight,int mYoloV8NetWidth, int batchSize) {
171 |     int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
172 |     cudaMemsetAsync(output, 0, sizeof(float), stream);
173 |     for (int idx = 0; idx < batchSize; ++idx) {
174 |         CUDA_CHECK(cudaMemsetAsync(output + idx * outputElem, 0, sizeof(float), stream));
175 |     }
176 |     int numElem = 0;
177 |     int grids[3][2] = { {mYoloV8netHeight / 8, mYoloV8NetWidth / 8}, {mYoloV8netHeight / 16, mYoloV8NetWidth / 16}, {mYoloV8netHeight / 32, mYoloV8NetWidth / 32} };
178 |     int strides[] = { 8, 16, 32 };
179 |     for (unsigned int i = 0; i < 3; i++) {
180 |         int grid_h = grids[i][0];
181 |         int grid_w = grids[i][1];
182 |         int stride = strides[i];
183 |         numElem = grid_h * grid_w * batchSize;
184 |         if (numElem < mThreadCount) mThreadCount = numElem;
185 | 
186 |         CalDetection << <(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> >
187 |             (inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, outputElem);
188 |     }
189 | }
190 | 
191 | PluginFieldCollection YoloPluginCreator::mFC{};
192 | std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
193 | 
194 | YoloPluginCreator::YoloPluginCreator() {
195 |     mPluginAttributes.clear();
196 |     mFC.nbFields = mPluginAttributes.size();
197 |     mFC.fields = mPluginAttributes.data();
198 | }
199 | 
200 | const char* YoloPluginCreator::getPluginName() const TRT_NOEXCEPT {
201 |     return "YoloLayer_TRT";
202 | }
203 | 
204 | const char* YoloPluginCreator::getPluginVersion() const TRT_NOEXCEPT {
205 |     return "1";
206 | }
207 | 
208 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() TRT_NOEXCEPT {
209 |     return &mFC;
210 | }
211 | 
212 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT {
213 |     assert(fc->nbFields == 1);
214 |     assert(strcmp(fc->fields[0].name, "netinfo") == 0);
215 |     int* p_netinfo = (int*)(fc->fields[0].data);
216 |     int class_count = p_netinfo[0];
217 |     int input_w = p_netinfo[1];
218 |     int input_h = p_netinfo[2];
219 |     int max_output_object_count = p_netinfo[3];
220 |     YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count);
221 |     obj->setPluginNamespace(mNamespace.c_str());
222 |     return obj;
223 | }
224 | 
225 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT {
226 |     // This object will be deleted when the network is destroyed, which will
227 |     // call YoloLayerPlugin::destroy()
228 |     YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
229 |     obj->setPluginNamespace(mNamespace.c_str());
230 |     return obj;
231 | }
232 | 
233 | } // namespace nvinfer1
234 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/plugin/yololayer.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "macros.h"
  3 | #include "NvInfer.h"
  4 | #include <string>
  5 | #include <vector>
  6 | #include "macros.h"
  7 | namespace nvinfer1 {
  8 | class API YoloLayerPlugin : public IPluginV2IOExt {
  9 | public:
 10 |         YoloLayerPlugin(int classCount, int netWdith, int netHeight, int maxOut);
 11 |         YoloLayerPlugin(const void* data, size_t length);
 12 |         ~YoloLayerPlugin();
 13 | 
 14 |         int getNbOutputs() const TRT_NOEXCEPT override {
 15 |             return 1;
 16 |         }
 17 | 
 18 |         nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT override;
 19 | 
 20 |         int initialize() TRT_NOEXCEPT override;
 21 | 
 22 |         virtual void terminate() TRT_NOEXCEPT override {}
 23 | 
 24 |         virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override { return 0; }
 25 | 
 26 |         virtual int enqueue(int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override;
 27 | 
 28 |         virtual size_t getSerializationSize() const TRT_NOEXCEPT override;
 29 | 
 30 |         virtual void serialize(void* buffer) const TRT_NOEXCEPT override;
 31 | 
 32 |         bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override {
 33 |             return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
 34 |         }
 35 | 
 36 | 
 37 |         const char* getPluginType() const TRT_NOEXCEPT override;
 38 | 
 39 |         const char* getPluginVersion() const TRT_NOEXCEPT override;
 40 | 
 41 |         void destroy() TRT_NOEXCEPT override;
 42 | 
 43 |         IPluginV2IOExt* clone() const TRT_NOEXCEPT override;
 44 | 
 45 |         void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override;
 46 | 
 47 |         const char* getPluginNamespace() const TRT_NOEXCEPT override;
 48 | 
 49 |         nvinfer1::DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const TRT_NOEXCEPT;
 50 | 
 51 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override;
 52 | 
 53 |         bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override;
 54 | 
 55 |         void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override;
 56 | 
 57 |         void configurePlugin(PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) TRT_NOEXCEPT override;
 58 | 
 59 |         void detachFromContext() TRT_NOEXCEPT override;
 60 | 
 61 |     private:
 62 |         void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight, int mYoloV8NetWidth, int batchSize);
 63 |         int mThreadCount = 256;
 64 |         const char* mPluginNamespace;
 65 |         int mClassCount;
 66 |         int mYoloV8NetWidth;
 67 |         int mYoloV8netHeight;
 68 |         int mMaxOutObject;
 69 |     };
 70 | 
 71 | class API YoloPluginCreator : public IPluginCreator {
 72 | public:
 73 |         YoloPluginCreator();
 74 |         ~YoloPluginCreator() override = default;
 75 | 
 76 |         const char* getPluginName() const TRT_NOEXCEPT override;
 77 | 
 78 |         const char* getPluginVersion() const TRT_NOEXCEPT override;
 79 | 
 80 |         const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override;
 81 | 
 82 |         nvinfer1::IPluginV2IOExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override;
 83 | 
 84 |         nvinfer1::IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT override;
 85 | 
 86 |         void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override {
 87 |             mNamespace = libNamespace;
 88 |         }
 89 | 
 90 |         const char* getPluginNamespace() const TRT_NOEXCEPT override {
 91 |             return mNamespace.c_str();
 92 |         }
 93 | 
 94 |     private:
 95 |         std::string mNamespace;
 96 |         static PluginFieldCollection mFC;
 97 |         static std::vector<PluginField> mPluginAttributes;
 98 |     };
 99 |     REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
100 | } // namespace nvinfer1
101 | 
102 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/src/block.cpp:
--------------------------------------------------------------------------------
  1 | #include "block.h"
  2 | #include "yololayer.h"
  3 | #include "config.h"
  4 | #include <iostream>
  5 | #include <assert.h>
  6 | #include <fstream>
  7 | #include <math.h>
  8 | 
  9 | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file){
 10 |     std::cout << "Loading weights: " << file << std::endl;
 11 |     std::map<std::string, nvinfer1::Weights> WeightMap;
 12 | 
 13 |     std::ifstream input(file);
 14 |     assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!");
 15 | 
 16 |     int32_t count;
 17 |     input>>count ;
 18 |     assert(count > 0 && "Invalid weight map file.");
 19 | 
 20 |     while(count--){
 21 |         nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0};
 22 |         uint32_t size;
 23 | 
 24 |         std::string name;
 25 |         input >> name >> std::dec >> size;
 26 |         wt.type = nvinfer1::DataType::kFLOAT;
 27 | 
 28 |         uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
 29 |         for(uint32_t x = 0, y = size; x < y; x++){
 30 |             input >> std::hex >> val[x];
 31 |         }
 32 |         wt.values = val;
 33 |         wt.count = size;
 34 |         WeightMap[name] = wt;
 35 |     }
 36 |     return WeightMap;
 37 | }
 38 | 
 39 | 
 40 | static nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
 41 | nvinfer1::ITensor& input, std::string lname, float eps){
 42 |     float* gamma = (float*)weightMap[lname + ".weight"].values;
 43 |     float* beta = (float*)weightMap[lname + ".bias"].values;
 44 |     float* mean = (float*)weightMap[lname + ".running_mean"].values;
 45 |     float* var = (float*)weightMap[lname + ".running_var"].values;
 46 |     int len = weightMap[lname + ".running_var"].count;
 47 | 
 48 |     float* scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
 49 |     for(int i = 0; i < len; i++){
 50 |         scval[i] = gamma[i] / sqrt(var[i] + eps);
 51 |     }
 52 |     nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, scval, len};
 53 | 
 54 |     float* shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
 55 |     for(int i = 0; i < len; i++){
 56 |         shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
 57 |     }
 58 |     nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, shval, len};
 59 | 
 60 |     float* pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
 61 |     for (int i = 0; i < len; i++) {
 62 |         pval[i] = 1.0;
 63 |     }
 64 |     nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, pval, len };
 65 |     weightMap[lname + ".scale"] = scale;
 66 |     weightMap[lname + ".shift"] = shift;
 67 |     weightMap[lname + ".power"] = power;
 68 |     nvinfer1::IScaleLayer* output = network->addScale(input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
 69 |     assert(output);
 70 |     return output;
 71 | }
 72 | 
 73 | 
 74 | nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 75 | nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname){
 76 |     nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0};
 77 |     nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(input, ch, nvinfer1::DimsHW{k, k}, weightMap[lname+".conv.weight"], bias_empty);
 78 |     assert(conv);
 79 |     conv->setStrideNd(nvinfer1::DimsHW{s, s});
 80 |     conv->setPaddingNd(nvinfer1::DimsHW{p, p});
 81 | 
 82 |     nvinfer1::IScaleLayer* bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname+".bn", 1e-5);
 83 | 
 84 |     nvinfer1::IActivationLayer* sigmoid = network->addActivation(*bn->getOutput(0), nvinfer1::ActivationType::kSIGMOID);
 85 |     nvinfer1::IElementWiseLayer* ew = network->addElementWise(*bn->getOutput(0), *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
 86 |     assert(ew);
 87 |     return ew;
 88 | }
 89 | 
 90 | 
 91 | nvinfer1::ILayer* bottleneck(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 92 | nvinfer1::ITensor& input, int c1, int c2, bool shortcut, float e, std::string lname){
 93 |     nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c2, 3, 1, 1, lname+".cv1");
 94 |     nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *conv1->getOutput(0), c2, 3, 1, 1, lname+".cv2");
 95 |     
 96 |     if(shortcut && c1 == c2){
 97 |         nvinfer1::IElementWiseLayer* ew = network->addElementWise(input, *conv2->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
 98 |         return ew;
 99 |     }
100 |     return conv2;
101 | }
102 | 
103 | 
104 | nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
105 | nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname){
106 |     int c_ = (float)c2 * e;
107 |     
108 |     nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2* c_, 1, 1, 0, lname+".cv1");
109 |     nvinfer1::Dims d = conv1->getOutput(0)->getDimensions();
110 |     
111 |     nvinfer1::ISliceLayer* split1 = network->addSlice(*conv1->getOutput(0), nvinfer1::Dims3{0,0,0}, nvinfer1::Dims3{d.d[0]/2, d.d[1], d.d[2]}, nvinfer1::Dims3{1,1,1});
112 |     nvinfer1::ISliceLayer* split2 = network->addSlice(*conv1->getOutput(0), nvinfer1::Dims3{d.d[0]/2,0,0}, nvinfer1::Dims3{d.d[0]/2, d.d[1], d.d[2]}, nvinfer1::Dims3{1,1,1});
113 |     nvinfer1::ITensor* inputTensor0[] = {split1->getOutput(0), split2->getOutput(0)};
114 |     nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensor0, 2);
115 |     nvinfer1::ITensor* y1 = split2->getOutput(0);
116 |     for(int i = 0; i < n; i++){
117 |         auto* b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname+".m." + std::to_string(i));
118 |         y1 = b->getOutput(0);
119 | 
120 |         nvinfer1::ITensor* inputTensors[] = {cat->getOutput(0), b->getOutput(0)};
121 |         cat = network->addConcatenation(inputTensors, 2);
122 |     }
123 |     
124 |     nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname+".cv2");
125 |     
126 |     return conv2;
127 | }
128 | 
129 | 
130 | nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
131 | nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname){
132 |     int c_ = c1 / 2;
133 |     
134 |     nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, 0, lname+".cv1");
135 |     
136 |     nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*conv1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k,k});
137 |     pool1->setStrideNd(nvinfer1::DimsHW{1, 1});
138 |     pool1->setPaddingNd(nvinfer1::DimsHW{ k / 2, k / 2 });
139 |     nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*pool1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k,k});
140 |     pool2->setStrideNd(nvinfer1::DimsHW{1, 1});
141 |     pool2->setPaddingNd(nvinfer1::DimsHW{ k / 2, k / 2 });
142 |     nvinfer1::IPoolingLayer* pool3 = network->addPoolingNd(*pool2->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k,k});
143 |     pool3->setStrideNd(nvinfer1::DimsHW{1, 1});
144 |     pool3->setPaddingNd(nvinfer1::DimsHW{ k / 2, k / 2 });
145 |     nvinfer1::ITensor* inputTensors[] = {conv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0)};
146 |     nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensors, 4);
147 |     nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname+".cv2");
148 |     return conv2;
149 | }
150 | 
151 | 
152 | nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
153 | nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname){
154 | 
155 |     nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(input);
156 |     shuffle1->setReshapeDimensions(nvinfer1::Dims3{4, 16, grid});
157 |     shuffle1->setSecondTranspose(nvinfer1::Permutation{1, 0, 2});
158 |     nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*shuffle1->getOutput(0));
159 | 
160 |     nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0};
161 |     nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*softmax->getOutput(0), 1, nvinfer1::DimsHW{1, 1}, weightMap[lname], bias_empty);
162 |     conv->setStrideNd(nvinfer1::DimsHW{s, s});
163 |     conv->setPaddingNd(nvinfer1::DimsHW{p, p});
164 | 
165 |     nvinfer1::IShuffleLayer* shuffle2 = network->addShuffle(*conv->getOutput(0));
166 |     shuffle2->setReshapeDimensions(nvinfer1::Dims2{4, grid});
167 | 
168 |     return shuffle2;
169 | }
170 | 
171 | 
172 | nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition *network, std::vector<nvinfer1::IConcatenationLayer*> dets) {
173 |     auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1");
174 | 
175 |     nvinfer1::PluginField plugin_fields[1];
176 |     int netinfo[4] = {kNumClass, kInputW, kInputH, kMaxNumOutputBbox};
177 |     plugin_fields[0].data = netinfo;
178 |     plugin_fields[0].length = 4;
179 |     plugin_fields[0].name = "netinfo";
180 |     plugin_fields[0].type = nvinfer1::PluginFieldType::kFLOAT32;
181 | 
182 | 
183 |     nvinfer1::PluginFieldCollection plugin_data;
184 |     plugin_data.nbFields = 1;
185 |     plugin_data.fields = plugin_fields;
186 |     nvinfer1::IPluginV2 *plugin_obj = creator->createPlugin("yololayer", &plugin_data);
187 |     std::vector<nvinfer1::ITensor*> input_tensors;
188 |     for (auto det: dets) {
189 |         input_tensors.push_back(det->getOutput(0));
190 |     }
191 |     auto yolo = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj);
192 |     return yolo;
193 | }
194 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/src/calibrator.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <iterator>
 3 | #include <fstream>
 4 | #include <opencv2/dnn/dnn.hpp>
 5 | #include "calibrator.h"
 6 | #include "cuda_utils.h"
 7 | #include "utils.h"
 8 | 
 9 | Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name,
10 |                                                const char* input_blob_name, bool read_cache)
11 |     : batchsize_(batchsize)
12 |     , input_w_(input_w)
13 |     , input_h_(input_h)
14 |     , img_idx_(0)
15 |     , img_dir_(img_dir)
16 |     , calib_table_name_(calib_table_name)
17 |     , input_blob_name_(input_blob_name)
18 |     , read_cache_(read_cache)
19 | {
20 |     input_count_ = 3 * input_w * input_h * batchsize;
21 |     CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
22 |     read_files_in_dir(img_dir, img_files_);
23 | }
24 | 
25 | Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
26 | {
27 |     CUDA_CHECK(cudaFree(device_input_));
28 | }
29 | 
30 | int Int8EntropyCalibrator2::getBatchSize() const TRT_NOEXCEPT
31 | {
32 |     return batchsize_;
33 | }
34 | 
35 | bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT
36 | {
37 |     if (img_idx_ + batchsize_ > (int)img_files_.size()) {
38 |         return false;
39 |     }
40 | 
41 |     std::vector<cv::Mat> input_imgs_;
42 |     for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
43 |         std::cout << img_files_[i] << "  " << i << std::endl;
44 |         cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
45 |         if (temp.empty()){
46 |             std::cerr << "Fatal error: image cannot open!" << std::endl;
47 |             return false;
48 |         }
49 |         cv::Mat pr_img = preprocess_img(temp, input_w_, input_h_);
50 |         input_imgs_.push_back(pr_img);
51 |     }
52 |     img_idx_ += batchsize_;
53 |     cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
54 |     CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr<float>(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
55 |     assert(!strcmp(names[0], input_blob_name_));
56 |     bindings[0] = device_input_;
57 |     return true;
58 | }
59 | 
60 | const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) TRT_NOEXCEPT
61 | {
62 |     std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
63 |     calib_cache_.clear();
64 |     std::ifstream input(calib_table_name_, std::ios::binary);
65 |     input >> std::noskipws;
66 |     if (read_cache_ && input.good())
67 |     {
68 |         std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
69 |     }
70 |     length = calib_cache_.size();
71 |     return length ? calib_cache_.data() : nullptr;
72 | }
73 | 
74 | void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT
75 | {
76 |     std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
77 |     std::ofstream output(calib_table_name_, std::ios::binary);
78 |     output.write(reinterpret_cast<const char*>(cache), length);
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/src/postprocess.cpp:
--------------------------------------------------------------------------------
  1 | #include "postprocess.h"
  2 | 
  3 | 
  4 | cv::Rect get_rect(cv::Mat &img, float bbox[4]) {
  5 |     float l, r, t, b;
  6 |     float r_w = kInputW / (img.cols * 1.0);
  7 |     float r_h = kInputH / (img.rows * 1.0);
  8 | 
  9 |     if (r_h > r_w) {
 10 |         l = bbox[0];
 11 |         r = bbox[2];
 12 |         t = bbox[1] - (kInputH - r_w * img.rows) / 2;
 13 |         b = bbox[3] - (kInputH - r_w * img.rows) / 2;
 14 |         l = l / r_w;
 15 |         r = r / r_w;
 16 |         t = t / r_w;
 17 |         b = b / r_w;
 18 |     } else {
 19 |         l = bbox[0] - (kInputW - r_h * img.cols) / 2;
 20 |         r = bbox[2] - (kInputW - r_h * img.cols) / 2;
 21 |         t = bbox[1];
 22 |         b = bbox[3];
 23 |         l = l / r_h;
 24 |         r = r / r_h;
 25 |         t = t / r_h;
 26 |         b = b / r_h;
 27 |     }
 28 |     return cv::Rect(round(l), round(t), round(r - l), round(b - t));
 29 | }
 30 | 
 31 | static float iou(float lbox[4], float rbox[4]) {
 32 |     float interBox[] = {
 33 |             (std::max)(lbox[0], rbox[0]), //left
 34 |             (std::min)(lbox[2], rbox[2]), //right
 35 |             (std::max)(lbox[1], rbox[1]), //top
 36 |             (std::min)(lbox[3], rbox[3]), //bottom
 37 |     };
 38 | 
 39 |     if (interBox[2] > interBox[3] || interBox[0] > interBox[1])
 40 |         return 0.0f;
 41 | 
 42 |     float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]);
 43 |     float unionBoxS = (lbox[2] - lbox[0]) * (lbox[3] - lbox[1]) + (rbox[2] - rbox[0]) * (rbox[3] - rbox[1]) - interBoxS;
 44 |     return interBoxS / unionBoxS;
 45 | }
 46 | 
 47 | static bool cmp(const Detection &a, const Detection &b) {
 48 |     return a.conf > b.conf;
 49 | }
 50 | 
 51 | void nms(std::vector<Detection> &res, float *output, float conf_thresh, float nms_thresh) {
 52 |     int det_size = sizeof(Detection) / sizeof(float);
 53 |     std::map<float, std::vector<Detection>> m;
 54 | 
 55 |     for (int i = 0; i < output[0]; i++) {
 56 |         if (output[1 + det_size * i + 4] <= conf_thresh) continue;
 57 |         Detection det;
 58 |         memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
 59 |         if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
 60 |         m[det.class_id].push_back(det);
 61 |     }
 62 |     for (auto it = m.begin(); it != m.end(); it++) {
 63 |         auto &dets = it->second;
 64 |         std::sort(dets.begin(), dets.end(), cmp);
 65 |         for (size_t m = 0; m < dets.size(); ++m) {
 66 |             auto &item = dets[m];
 67 |             res.push_back(item);
 68 |             for (size_t n = m + 1; n < dets.size(); ++n) {
 69 |                 if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
 70 |                     dets.erase(dets.begin() + n);
 71 |                     --n;
 72 |                 }
 73 |             }
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | void batch_nms(std::vector<std::vector<Detection>> &res_batch, float *output, int batch_size, int output_size,
 79 |                float conf_thresh, float nms_thresh) {
 80 |     res_batch.resize(batch_size);
 81 |     for (int i = 0; i < batch_size; i++) {
 82 |         nms(res_batch[i], &output[i * output_size], conf_thresh, nms_thresh);
 83 |     }
 84 | }
 85 | 
 86 | void process_decode_ptr_host(std::vector<Detection> &res, const float* decode_ptr_host, int bbox_element, cv::Mat& img, int count) {
 87 |     Detection det;
 88 |     for (int i = 0; i < count; i++) {
 89 |         int basic_pos = 1 + i * bbox_element;
 90 |         int keep_flag = decode_ptr_host[basic_pos + 6];
 91 |         if (keep_flag == 1) {
 92 |             det.bbox[0] = decode_ptr_host[basic_pos + 0];
 93 |             det.bbox[1] = decode_ptr_host[basic_pos + 1];
 94 |             det.bbox[2] = decode_ptr_host[basic_pos + 2];
 95 |             det.bbox[3] = decode_ptr_host[basic_pos + 3];
 96 |             det.conf = decode_ptr_host[basic_pos + 4];
 97 |             det.class_id = decode_ptr_host[basic_pos + 5];
 98 |             res.push_back(det);
 99 |         }
100 |     }
101 | }
102 | 
103 | void batch_process(std::vector<std::vector<Detection>> &res_batch, const float* decode_ptr_host, int batch_size, int bbox_element, const std::vector<cv::Mat>& img_batch) {
104 |     res_batch.resize(batch_size);
105 |     int count = static_cast<int>(*decode_ptr_host);
106 |     count = std::min(count, kMaxNumOutputBbox);
107 |     for (int i = 0; i < batch_size; i++) {
108 |         auto& img = const_cast<cv::Mat&>(img_batch[i]);
109 |         process_decode_ptr_host(res_batch[i], &decode_ptr_host[i * count], bbox_element, img, count);
110 |     }
111 | }
112 | 
113 | void draw_bbox(std::vector<cv::Mat> &img_batch, std::vector<std::vector<Detection>> &res_batch) {
114 |     for (size_t i = 0; i < img_batch.size(); i++) {
115 |         auto &res = res_batch[i];
116 |         cv::Mat img = img_batch[i];
117 |         for (size_t j = 0; j < res.size(); j++) {
118 |             cv::Rect r = get_rect(img, res[j].bbox);
119 |             cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
120 |             cv::putText(img, std::to_string((int) res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN,
121 |                         1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
122 |         }
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/src/postprocess.cu:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lindsay on 23-7-17.
 3 | //
 4 | #include "types.h"
 5 | #include "postprocess.h"
 6 | 
 7 | static __global__ void
 8 | decode_kernel(float *predict, int num_bboxes, float confidence_threshold, float *parray, int max_objects) {
 9 | 
10 |     float count = predict[0];
11 |     int position = (blockDim.x * blockIdx.x + threadIdx.x);
12 |     if (position >= count)
13 |         return;
14 |     float *pitem = predict + 1 + position * 6;
15 |     int index = atomicAdd(parray, 1);
16 |     if (index >= max_objects)
17 |         return;
18 |     float confidence = pitem[4];
19 |     if (confidence < confidence_threshold)
20 |         return;
21 |     float left = pitem[0];
22 |     float top = pitem[1];
23 |     float right = pitem[2];
24 |     float bottom = pitem[3];
25 |     float label = pitem[5];
26 |     float *pout_item = parray + 1 + index * bbox_element;
27 |     *pout_item++ = left;
28 |     *pout_item++ = top;
29 |     *pout_item++ = right;
30 |     *pout_item++ = bottom;
31 |     *pout_item++ = confidence;
32 |     *pout_item++ = label;
33 |     *pout_item++ = 1; // 1 = keep, 0 = ignore
34 | }
35 | 
36 | static __device__ float
37 | box_iou(float aleft, float atop, float aright, float abottom, float bleft, float btop, float bright, float bbottom) {
38 | 
39 |     float cleft = max(aleft, bleft);
40 |     float ctop = max(atop, btop);
41 |     float cright = min(aright, bright);
42 |     float cbottom = min(abottom, bbottom);
43 | 
44 |     float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
45 |     if (c_area == 0.0f)
46 |         return 0.0f;
47 | 
48 |     float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop);
49 |     float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop);
50 |     return c_area / (a_area + b_area - c_area);
51 | }
52 | 
53 | static __global__ void nms_kernel(float *bboxes, int max_objects, float threshold) {
54 | 
55 |     int position = (blockDim.x * blockIdx.x + threadIdx.x);
56 |     int count = bboxes[0];
57 | 
58 |     // float count = 0.0f;
59 |     if (position >= count)
60 |         return;
61 | 
62 |     float *pcurrent = bboxes + 1 + position * bbox_element;
63 |     for (int i = 1; i < count; ++i) {
64 |         float *pitem = bboxes + 1 + i * bbox_element;
65 |         if (i == position || pcurrent[5] != pitem[5]) continue;
66 | 
67 |         if (pitem[4] >= pcurrent[4]) {
68 |             if (pitem[4] == pcurrent[4] && i < position)
69 |                 continue;
70 | 
71 |             float iou = box_iou(
72 |                     pcurrent[0], pcurrent[1], pcurrent[2], pcurrent[3],
73 |                     pitem[0], pitem[1], pitem[2], pitem[3]
74 |             );
75 | 
76 |             if (iou > threshold) {
77 |                 pcurrent[6] = 0;
78 |                 return;
79 |             }
80 |         }
81 |     }
82 | }
83 | 
84 | void cuda_decode(float *predict, int num_bboxes, float confidence_threshold, float *parray, int max_objects,
85 |                  cudaStream_t stream) {
86 |     int block = 256;
87 |     int grid = ceil(num_bboxes / (float) block);
88 |     decode_kernel << <
89 |     grid, block, 0, stream >> > ((float *) predict, num_bboxes, confidence_threshold, parray, max_objects);
90 | 
91 | }
92 | 
93 | void cuda_nms(float *parray, float nms_threshold, int max_objects, cudaStream_t stream) {
94 |     int block = max_objects < 256 ? max_objects : 256;
95 |     int grid = ceil(max_objects / (float) block);
96 |     nms_kernel << < grid, block, 0, stream >> > (parray, max_objects, nms_threshold);
97 | 
98 | }
99 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/src/preprocess.cu:
--------------------------------------------------------------------------------
  1 | #include "preprocess.h"
  2 | #include "cuda_utils.h"
  3 | 
  4 | static uint8_t *img_buffer_host = nullptr;
  5 | static uint8_t *img_buffer_device = nullptr;
  6 | 
  7 | 
  8 | __global__ void
  9 | warpaffine_kernel(uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width,
 10 |                   int dst_height, uint8_t const_value_st, AffineMatrix d2s, int edge) {
 11 |     int position = blockDim.x * blockIdx.x + threadIdx.x;
 12 |     if (position >= edge) return;
 13 | 
 14 |     float m_x1 = d2s.value[0];
 15 |     float m_y1 = d2s.value[1];
 16 |     float m_z1 = d2s.value[2];
 17 |     float m_x2 = d2s.value[3];
 18 |     float m_y2 = d2s.value[4];
 19 |     float m_z2 = d2s.value[5];
 20 | 
 21 |     int dx = position % dst_width;
 22 |     int dy = position / dst_width;
 23 |     float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
 24 |     float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
 25 |     float c0, c1, c2;
 26 | 
 27 |     if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
 28 |         // out of range
 29 |         c0 = const_value_st;
 30 |         c1 = const_value_st;
 31 |         c2 = const_value_st;
 32 |     } else {
 33 |         int y_low = floorf(src_y);
 34 |         int x_low = floorf(src_x);
 35 |         int y_high = y_low + 1;
 36 |         int x_high = x_low + 1;
 37 | 
 38 |         uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
 39 |         float ly = src_y - y_low;
 40 |         float lx = src_x - x_low;
 41 |         float hy = 1 - ly;
 42 |         float hx = 1 - lx;
 43 |         float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
 44 |         uint8_t *v1 = const_value;
 45 |         uint8_t *v2 = const_value;
 46 |         uint8_t *v3 = const_value;
 47 |         uint8_t *v4 = const_value;
 48 | 
 49 |         if (y_low >= 0) {
 50 |             if (x_low >= 0)
 51 |                 v1 = src + y_low * src_line_size + x_low * 3;
 52 | 
 53 |             if (x_high < src_width)
 54 |                 v2 = src + y_low * src_line_size + x_high * 3;
 55 |         }
 56 | 
 57 |         if (y_high < src_height) {
 58 |             if (x_low >= 0)
 59 |                 v3 = src + y_high * src_line_size + x_low * 3;
 60 | 
 61 |             if (x_high < src_width)
 62 |                 v4 = src + y_high * src_line_size + x_high * 3;
 63 |         }
 64 | 
 65 |         c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
 66 |         c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
 67 |         c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
 68 |     }
 69 | 
 70 |     // bgr to rgb
 71 |     float t = c2;
 72 |     c2 = c0;
 73 |     c0 = t;
 74 | 
 75 |     // normalization
 76 |     c0 = c0 / 255.0f;
 77 |     c1 = c1 / 255.0f;
 78 |     c2 = c2 / 255.0f;
 79 | 
 80 |     // rgbrgbrgb to rrrgggbbb
 81 |     int area = dst_width * dst_height;
 82 |     float *pdst_c0 = dst + dy * dst_width + dx;
 83 |     float *pdst_c1 = pdst_c0 + area;
 84 |     float *pdst_c2 = pdst_c1 + area;
 85 |     *pdst_c0 = c0;
 86 |     *pdst_c1 = c1;
 87 |     *pdst_c2 = c2;
 88 | }
 89 | 
 90 | 
 91 | 
 92 | 
 93 | void cuda_preprocess(uint8_t *src, int src_width, int src_height, float *dst, int dst_width, int dst_height,
 94 |                      cudaStream_t stream) {
 95 |     int img_size = src_width * src_height * 3;
 96 |     // copy data to pinned memory
 97 |     memcpy(img_buffer_host, src, img_size);
 98 |     // copy data to device memory
 99 |     CUDA_CHECK(cudaMemcpyAsync(img_buffer_device, img_buffer_host, img_size, cudaMemcpyHostToDevice, stream));
100 | 
101 |     AffineMatrix s2d, d2s;
102 |     float scale = std::min(dst_height / (float) src_height, dst_width / (float) src_width);
103 | 
104 |     s2d.value[0] = scale;
105 |     s2d.value[1] = 0;
106 |     s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5;
107 |     s2d.value[3] = 0;
108 |     s2d.value[4] = scale;
109 |     s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5;
110 |     cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
111 |     cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
112 |     cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
113 | 
114 |     memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
115 | 
116 |     int jobs = dst_height * dst_width;
117 |     int threads = 256;
118 |     int blocks = ceil(jobs / (float) threads);
119 |     warpaffine_kernel<<<blocks, threads, 0, stream>>>(
120 |             img_buffer_device, src_width * 3, src_width,
121 |             src_height, dst, dst_width,
122 |             dst_height, 128, d2s, jobs);
123 | }
124 | 
125 | 
126 | void cuda_batch_preprocess(std::vector<cv::Mat> &img_batch,
127 |                            float *dst, int dst_width, int dst_height,
128 |                            cudaStream_t stream) {
129 |     int dst_size = dst_width * dst_height * 3;
130 |     for (size_t i = 0; i < img_batch.size(); i++) {
131 |         cuda_preprocess(img_batch[i].ptr(), img_batch[i].cols, img_batch[i].rows, &dst[dst_size * i], dst_width,
132 |                         dst_height, stream);
133 |         CUDA_CHECK(cudaStreamSynchronize(stream));
134 |     }
135 | }
136 | 
137 | 
138 | 
139 | 
140 | 
141 | void cuda_preprocess_init(int max_image_size) {
142 |     // prepare input data in pinned memory
143 |     CUDA_CHECK(cudaMallocHost((void **) &img_buffer_host, max_image_size * 3));
144 |     // prepare input data in device memory
145 |     CUDA_CHECK(cudaMalloc((void **) &img_buffer_device, max_image_size * 3));
146 | }
147 | 
148 | void cuda_preprocess_destroy() {
149 |     CUDA_CHECK(cudaFree(img_buffer_device));
150 |     CUDA_CHECK(cudaFreeHost(img_buffer_host));
151 | }
152 | 
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/tensorrtx-yolov8/yolov8_trt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | An example that uses TensorRT's Python api to make inferences.
  3 | """
  4 | import ctypes
  5 | import os
  6 | import shutil
  7 | import random
  8 | import sys
  9 | import threading
 10 | import time
 11 | import cv2
 12 | import numpy as np
 13 | import pycuda.autoinit
 14 | import pycuda.driver as cuda
 15 | import tensorrt as trt
 16 | 
 17 | CONF_THRESH = 0.5
 18 | IOU_THRESHOLD = 0.4
 19 | 
 20 | 
 21 | def get_img_path_batches(batch_size, img_dir):
 22 |     ret = []
 23 |     batch = []
 24 |     for root, dirs, files in os.walk(img_dir):
 25 |         for name in files:
 26 |             if len(batch) == batch_size:
 27 |                 ret.append(batch)
 28 |                 batch = []
 29 |             batch.append(os.path.join(root, name))
 30 |     if len(batch) > 0:
 31 |         ret.append(batch)
 32 |     return ret
 33 | 
 34 | 
 35 | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
 36 |     """
 37 |     description: Plots one bounding box on image img,
 38 |                  this function comes from YoLov8 project.
 39 |     param: 
 40 |         x:      a box likes [x1,y1,x2,y2]
 41 |         img:    a opencv image object
 42 |         color:  color to draw rectangle, such as (0,255,0)
 43 |         label:  str
 44 |         line_thickness: int
 45 |     return:
 46 |         no return
 47 | 
 48 |     """
 49 |     tl = (
 50 |             line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
 51 |     )  # line/font thickness
 52 |     color = color or [random.randint(0, 255) for _ in range(3)]
 53 |     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
 54 |     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
 55 |     if label:
 56 |         tf = max(tl - 1, 1)  # font thickness
 57 |         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
 58 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
 59 |         cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
 60 |         cv2.putText(
 61 |             img,
 62 |             label,
 63 |             (c1[0], c1[1] - 2),
 64 |             0,
 65 |             tl / 3,
 66 |             [225, 255, 255],
 67 |             thickness=tf,
 68 |             lineType=cv2.LINE_AA,
 69 |         )
 70 | 
 71 | 
 72 | class YoLov8TRT(object):
 73 |     """
 74 |     description: A YOLOv8 class that warps TensorRT ops, preprocess and postprocess ops.
 75 |     """
 76 | 
 77 |     def __init__(self, engine_file_path):
 78 |         # Create a Context on this device,
 79 |         self.ctx = cuda.Device(0).make_context()
 80 |         stream = cuda.Stream()
 81 |         TRT_LOGGER = trt.Logger(trt.Logger.INFO)
 82 |         runtime = trt.Runtime(TRT_LOGGER)
 83 | 
 84 |         # Deserialize the engine from file
 85 |         with open(engine_file_path, "rb") as f:
 86 |             engine = runtime.deserialize_cuda_engine(f.read())
 87 |         context = engine.create_execution_context()
 88 | 
 89 |         host_inputs = []
 90 |         cuda_inputs = []
 91 |         host_outputs = []
 92 |         cuda_outputs = []
 93 |         bindings = []
 94 | 
 95 |         for binding in engine:
 96 |             print('bingding:', binding, engine.get_binding_shape(binding))
 97 |             size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
 98 |             dtype = trt.nptype(engine.get_binding_dtype(binding))
 99 |             # Allocate host and device buffers
100 |             host_mem = cuda.pagelocked_empty(size, dtype)
101 |             cuda_mem = cuda.mem_alloc(host_mem.nbytes)
102 |             # Append the device buffer to device bindings.
103 |             bindings.append(int(cuda_mem))
104 |             # Append to the appropriate list.
105 |             if engine.binding_is_input(binding):
106 |                 self.input_w = engine.get_binding_shape(binding)[-1]
107 |                 self.input_h = engine.get_binding_shape(binding)[-2]
108 |                 host_inputs.append(host_mem)
109 |                 cuda_inputs.append(cuda_mem)
110 |             else:
111 |                 host_outputs.append(host_mem)
112 |                 cuda_outputs.append(cuda_mem)
113 | 
114 |         # Store
115 |         self.stream = stream
116 |         self.context = context
117 |         self.engine = engine
118 |         self.host_inputs = host_inputs
119 |         self.cuda_inputs = cuda_inputs
120 |         self.host_outputs = host_outputs
121 |         self.cuda_outputs = cuda_outputs
122 |         self.bindings = bindings
123 |         self.batch_size = engine.max_batch_size
124 | 
125 |     def infer(self, raw_image_generator):
126 |         threading.Thread.__init__(self)
127 |         # Make self the active context, pushing it on top of the context stack.
128 |         self.ctx.push()
129 |         # Restore
130 |         stream = self.stream
131 |         context = self.context
132 |         engine = self.engine
133 |         host_inputs = self.host_inputs
134 |         cuda_inputs = self.cuda_inputs
135 |         host_outputs = self.host_outputs
136 |         cuda_outputs = self.cuda_outputs
137 |         bindings = self.bindings
138 |         # Do image preprocess
139 |         batch_image_raw = []
140 |         batch_origin_h = []
141 |         batch_origin_w = []
142 |         batch_input_image = np.empty(shape=[self.batch_size, 3, self.input_h, self.input_w])
143 |         for i, image_raw in enumerate(raw_image_generator):
144 |             input_image, image_raw, origin_h, origin_w = self.preprocess_image(image_raw)
145 |             batch_image_raw.append(image_raw)
146 |             batch_origin_h.append(origin_h)
147 |             batch_origin_w.append(origin_w)
148 |             np.copyto(batch_input_image[i], input_image)
149 |         batch_input_image = np.ascontiguousarray(batch_input_image)
150 | 
151 |         # Copy input image to host buffer
152 |         np.copyto(host_inputs[0], batch_input_image.ravel())
153 |         start = time.time()
154 |         # Transfer input data  to the GPU.
155 |         cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
156 |         # Run inference.
157 |         context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle)
158 |         # Transfer predictions back from the GPU.
159 |         cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
160 |         # Synchronize the stream
161 |         stream.synchronize()
162 |         end = time.time()
163 |         # Remove any context from the top of the context stack, deactivating it.
164 |         self.ctx.pop()
165 |         # Here we use the first row of output in that batch_size = 1
166 |         output = host_outputs[0]
167 |         # Do postprocess
168 |         for i in range(self.batch_size):
169 |             result_boxes, result_scores, result_classid = self.post_process(
170 |                 output[i * 6001: (i + 1) * 6001], batch_origin_h[i], batch_origin_w[i]
171 |             )
172 |             # Draw rectangles and labels on the original image
173 |             for j in range(len(result_boxes)):
174 |                 box = result_boxes[j]
175 |                 plot_one_box(
176 |                     box,
177 |                     batch_image_raw[i],
178 |                     label="{}:{:.2f}".format(
179 |                         categories[int(result_classid[j])], result_scores[j]
180 |                     ),
181 |                 )
182 |         return batch_image_raw, end - start
183 | 
184 |     def destroy(self):
185 |         # Remove any context from the top of the context stack, deactivating it.
186 |         self.ctx.pop()
187 | 
188 |     def get_raw_image(self, image_path_batch):
189 |         """
190 |         description: Read an image from image path
191 |         """
192 |         for img_path in image_path_batch:
193 |             yield cv2.imread(img_path)
194 | 
195 |     def get_raw_image_zeros(self, image_path_batch=None):
196 |         """
197 |         description: Ready data for warmup
198 |         """
199 |         for _ in range(self.batch_size):
200 |             yield np.zeros([self.input_h, self.input_w, 3], dtype=np.uint8)
201 | 
202 |     def preprocess_image(self, raw_bgr_image):
203 |         """
204 |         description: Convert BGR image to RGB,
205 |                      resize and pad it to target size, normalize to [0,1],
206 |                      transform to NCHW format.
207 |         param:
208 |             input_image_path: str, image path
209 |         return:
210 |             image:  the processed image
211 |             image_raw: the original image
212 |             h: original height
213 |             w: original width
214 |         """
215 |         image_raw = raw_bgr_image
216 |         h, w, c = image_raw.shape
217 |         image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
218 |         # Calculate widht and height and paddings
219 |         r_w = self.input_w / w
220 |         r_h = self.input_h / h
221 |         if r_h > r_w:
222 |             tw = self.input_w
223 |             th = int(r_w * h)
224 |             tx1 = tx2 = 0
225 |             ty1 = int((self.input_h - th) / 2)
226 |             ty2 = self.input_h - th - ty1
227 |         else:
228 |             tw = int(r_h * w)
229 |             th = self.input_h
230 |             tx1 = int((self.input_w - tw) / 2)
231 |             tx2 = self.input_w - tw - tx1
232 |             ty1 = ty2 = 0
233 |         # Resize the image with long side while maintaining ratio
234 |         image = cv2.resize(image, (tw, th))
235 |         # Pad the short side with (128,128,128)
236 |         image = cv2.copyMakeBorder(
237 |             image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, None, (128, 128, 128)
238 |         )
239 |         image = image.astype(np.float32)
240 |         # Normalize to [0,1]
241 |         image /= 255.0
242 |         # HWC to CHW format:
243 |         image = np.transpose(image, [2, 0, 1])
244 |         # CHW to NCHW format
245 |         image = np.expand_dims(image, axis=0)
246 |         # Convert the image to row-major order, also known as "C order":
247 |         image = np.ascontiguousarray(image)
248 |         return image, image_raw, h, w
249 | 
250 |     def xywh2xyxy(self, origin_h, origin_w, x):
251 |         """
252 |         description:    Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
253 |         param:
254 |             origin_h:   height of original image
255 |             origin_w:   width of original image
256 |             x:          A boxes numpy, each row is a box [center_x, center_y, w, h]
257 |         return:
258 |             y:          A boxes numpy, each row is a box [x1, y1, x2, y2]
259 |         """
260 |         y = np.zeros_like(x)
261 |         r_w = self.input_w / origin_w
262 |         r_h = self.input_h / origin_h
263 |         if r_h > r_w:
264 |             y[:, 0] = x[:, 0]
265 |             y[:, 2] = x[:, 2]
266 |             y[:, 1] = x[:, 1] - (self.input_h - r_w * origin_h) / 2
267 |             y[:, 3] = x[:, 3] - (self.input_h - r_w * origin_h) / 2
268 |             y /= r_w
269 |         else:
270 |             y[:, 0] = x[:, 0] - (self.input_w - r_h * origin_w) / 2
271 |             y[:, 2] = x[:, 2] - (self.input_w - r_h * origin_w) / 2
272 |             y[:, 1] = x[:, 1]
273 |             y[:, 3] = x[:, 3]
274 |             y /= r_h
275 | 
276 |         return y
277 | 
278 |     def post_process(self, output, origin_h, origin_w):
279 |         """
280 |         description: postprocess the prediction
281 |         param:
282 |             output:     A numpy likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...] 
283 |             origin_h:   height of original image
284 |             origin_w:   width of original image
285 |         return:
286 |             result_boxes: finally boxes, a boxes numpy, each row is a box [x1, y1, x2, y2]
287 |             result_scores: finally scores, a numpy, each element is the score correspoing to box
288 |             result_classid: finally classid, a numpy, each element is the classid correspoing to box
289 |         """
290 |         # Get the num of boxes detected
291 |         num = int(output[0])
292 |         # Reshape to a two dimentional ndarray
293 |         pred = np.reshape(output[1:], (-1, 6))[:num, :]
294 |         # Do nms
295 |         boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=CONF_THRESH, nms_thres=IOU_THRESHOLD)
296 |         result_boxes = boxes[:, :4] if len(boxes) else np.array([])
297 |         result_scores = boxes[:, 4] if len(boxes) else np.array([])
298 |         result_classid = boxes[:, 5] if len(boxes) else np.array([])
299 |         return result_boxes, result_scores, result_classid
300 | 
301 |     def bbox_iou(self, box1, box2, x1y1x2y2=True):
302 |         """
303 |         description: compute the IoU of two bounding boxes
304 |         param:
305 |             box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
306 |             box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))            
307 |             x1y1x2y2: select the coordinate format
308 |         return:
309 |             iou: computed iou
310 |         """
311 |         if not x1y1x2y2:
312 |             # Transform from center and width to exact coordinates
313 |             b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
314 |             b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
315 |             b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
316 |             b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
317 |         else:
318 |             # Get the coordinates of bounding boxes
319 |             b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
320 |             b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
321 | 
322 |         # Get the coordinates of the intersection rectangle
323 |         inter_rect_x1 = np.maximum(b1_x1, b2_x1)
324 |         inter_rect_y1 = np.maximum(b1_y1, b2_y1)
325 |         inter_rect_x2 = np.minimum(b1_x2, b2_x2)
326 |         inter_rect_y2 = np.minimum(b1_y2, b2_y2)
327 |         # Intersection area
328 |         inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \
329 |                      np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)
330 |         # Union Area
331 |         b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
332 |         b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
333 | 
334 |         iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
335 | 
336 |         return iou
337 | 
338 |     def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4):
339 |         """
340 |         description: Removes detections with lower object confidence score than 'conf_thres' and performs
341 |         Non-Maximum Suppression to further filter detections.
342 |         param:
343 |             prediction: detections, (x1, y1, x2, y2, conf, cls_id)
344 |             origin_h: original image height
345 |             origin_w: original image width
346 |             conf_thres: a confidence threshold to filter detections
347 |             nms_thres: a iou threshold to filter detections
348 |         return:
349 |             boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id)
350 |         """
351 |         # Get the boxes that score > CONF_THRESH
352 |         boxes = prediction[prediction[:, 4] >= conf_thres]
353 |         # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
354 |         boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4])
355 |         # clip the coordinates
356 |         boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w - 1)
357 |         boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w - 1)
358 |         boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h - 1)
359 |         boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h - 1)
360 |         # Object confidence
361 |         confs = boxes[:, 4]
362 |         # Sort by the confs
363 |         boxes = boxes[np.argsort(-confs)]
364 |         # Perform non-maximum suppression
365 |         keep_boxes = []
366 |         while boxes.shape[0]:
367 |             large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres
368 |             label_match = boxes[0, -1] == boxes[:, -1]
369 |             # Indices of boxes with lower confidence scores, large IOUs and matching labels
370 |             invalid = large_overlap & label_match
371 |             keep_boxes += [boxes[0]]
372 |             boxes = boxes[~invalid]
373 |         boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([])
374 |         return boxes
375 | 
376 | 
377 | class inferThread(threading.Thread):
378 |     def __init__(self, yolov8_wrapper, image_path_batch):
379 |         threading.Thread.__init__(self)
380 |         self.yolov8_wrapper = yolov8_wrapper
381 |         self.image_path_batch = image_path_batch
382 | 
383 |     def run(self):
384 |         batch_image_raw, use_time = self.yolov8_wrapper.infer(self.yolov8_wrapper.get_raw_image(self.image_path_batch))
385 |         for i, img_path in enumerate(self.image_path_batch):
386 |             parent, filename = os.path.split(img_path)
387 |             save_name = os.path.join('output', filename)
388 |             # Save image
389 |             cv2.imwrite(save_name, batch_image_raw[i])
390 |         print('input->{}, time->{:.2f}ms, saving into output/'.format(self.image_path_batch, use_time * 1000))
391 | 
392 | 
393 | class warmUpThread(threading.Thread):
394 |     def __init__(self, yolov8_wrapper):
395 |         threading.Thread.__init__(self)
396 |         self.yolov8_wrapper = yolov8_wrapper
397 | 
398 |     def run(self):
399 |         batch_image_raw, use_time = self.yolov8_wrapper.infer(self.yolov8_wrapper.get_raw_image_zeros())
400 |         print('warm_up->{}, time->{:.2f}ms'.format(batch_image_raw[0].shape, use_time * 1000))
401 | 
402 | 
403 | if __name__ == "__main__":
404 |     # load custom plugin and engine
405 |     PLUGIN_LIBRARY = "build/libmyplugins.so"
406 |     engine_file_path = "yolov8n.engine"
407 | 
408 |     if len(sys.argv) > 1:
409 |         engine_file_path = sys.argv[1]
410 |     if len(sys.argv) > 2:
411 |         PLUGIN_LIBRARY = sys.argv[2]
412 | 
413 |     ctypes.CDLL(PLUGIN_LIBRARY)
414 | 
415 |     # load coco labels
416 | 
417 |     categories = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
418 |                   "traffic light",
419 |                   "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
420 |                   "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
421 |                   "frisbee",
422 |                   "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
423 |                   "surfboard",
424 |                   "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
425 |                   "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
426 |                   "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
427 |                   "cell phone",
428 |                   "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
429 |                   "teddy bear",
430 |                   "hair drier", "toothbrush"]
431 | 
432 |     if os.path.exists('output/'):
433 |         shutil.rmtree('output/')
434 |     os.makedirs('output/')
435 |     # a YoLov8TRT instance
436 |     yolov8_wrapper = YoLov8TRT(engine_file_path)
437 |     try:
438 |         print('batch size is', yolov8_wrapper.batch_size)
439 | 
440 |         image_dir = "samples/"
441 |         image_path_batches = get_img_path_batches(yolov8_wrapper.batch_size, image_dir)
442 | 
443 |         for i in range(10):
444 |             # create a new thread to do warm_up
445 |             thread1 = warmUpThread(yolov8_wrapper)
446 |             thread1.start()
447 |             thread1.join()
448 |         for batch in image_path_batches:
449 |             # create a new thread to do inference
450 |             thread1 = inferThread(yolov8_wrapper, batch)
451 |             thread1.start()
452 |             thread1.join()
453 |     finally:
454 |         # destroy the instance
455 |         yolov8_wrapper.destroy()
456 | 


--------------------------------------------------------------------------------
/videos/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emptysoal/TensorRT-YOLOv8-ByteTrack/cec012e0672dd3d1f79a8dc29875be9c56feedb5/videos/demo.mp4


--------------------------------------------------------------------------------
/yolo/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | project(yolov8_trt_infer)
 4 | 
 5 | add_definitions(-std=c++11)
 6 | add_definitions(-DAPI_EXPORTS)
 7 | set(CMAKE_CXX_STANDARD 11)
 8 | set(CMAKE_BUILD_TYPE release)
 9 | 
10 | include_directories(${PROJECT_SOURCE_DIR}/include)
11 | include_directories(${PROJECT_SOURCE_DIR}/plugin)
12 | 
13 | find_package(CUDA REQUIRED)
14 | 
15 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
16 | # ============= cuda ============
17 | include_directories(/usr/local/cuda/include)
18 | link_directories(/usr/local/cuda/lib64)
19 | 
20 | # ============= tensorrt ============
21 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
22 |   message("Embed_platform on")
23 |   include_directories(/usr/include/aarch64-linux-gnu)
24 |   link_directories(/usr/lib/aarch64-linux-gnu)
25 | else()
26 |   message("Embed_platform off")
27 |   include_directories(/usr/include/x86_64-linux-gnu)
28 |   link_directories(/usr/lib/x86_64-linux-gnu)
29 | endif()
30 | 
31 | # ============ opencv ===========
32 | find_package(OpenCV)
33 | include_directories(${OpenCV_INCLUDE_DIRS})
34 | 
35 | # ====== yolo infer shared lib ======
36 | file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu ${PROJECT_SOURCE_DIR}/plugin/*.cu)
37 | cuda_add_library(yolo_infer SHARED ${SRCS})
38 | target_link_libraries(yolo_infer nvinfer cudart ${OpenCV_LIBS})
39 | set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)
40 | 
41 | # ======== main execute file ========
42 | link_directories(${PROJECT_SOURCE_DIR}/lib)
43 | add_executable(main ${PROJECT_SOURCE_DIR}/main.cpp)
44 | target_link_libraries(main yolo_infer)
45 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
46 | 


--------------------------------------------------------------------------------
/yolo/README.md:
--------------------------------------------------------------------------------
 1 | # 封装 YOLOv8 TensorRT 推理
 2 | 
 3 | ## 一. 项目简介
 4 | 
 5 | - 基于 `TensorRT-v8` ，运行`YOLOv8`推理；
 6 | 
 7 | - 支持嵌入式设备 `Jetson` 系列上部署，也可以在 `Linux x86_64`的服务器上部署；
 8 | 
 9 | 本人所做的主要工作：
10 | 
11 | 1. 参考 [tensorrtx](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov8) 项目，模型 `.pth` -> `.engine`，提取出推理部分代码，并**封装为C++的类**，便于其他项目调用；
12 | 2. 预处理更换成了自己写的 CUDA编程预处理；
13 | 3. 后处理去掉了CUDA编程，因为测试其相比CPU后处理提速并不明显；
14 | 5. `YOLOv8` 推理编译为一个动态链接库，以解耦项目。
15 | 
16 | 特点：
17 | 
18 | - 在其他项目中使用 `YOLOv8` 推理时，调用下面 3 行代码即可：
19 | 
20 | ```C++
21 | // 加载模型
22 | std::string trtFile = "./engine/yolov8s.engine";
23 | YoloDetecter detecter(trtFile);
24 | 
25 | // 使用TensorRT推理
26 | std::vector<DetectResult> res = detecter.inference(img);
27 | ```
28 | 
29 | ## 二. 环境配置
30 | 
31 | 1. 基本要求：
32 | 
33 | - `TensorRT 8.0+`
34 | - `OpenCV 3.4.0+`
35 | 
36 | 2. 本人在 `Jetson Nano` 上的运行环境如下：
37 | 
38 | - 烧录的系统镜像为 `Jetpack 4.6.1`，该`jetpack` 原装环境如下：
39 | 
40 | | CUDA | cuDNN | TensorRT | OpenCV |
41 | | ---- | ----- | -------- | ------ |
42 | | 10.2 | 8.2   | 8.2.1    | 4.1.1  |
43 | 
44 | 关于如何在 `Jetson nano` 上烧录镜像，网上资料还是很多的，这里就不赘述了，注意下载 `Jetpack`镜像时选择 4.6.1 版本，该版本对应的 TensorRT v8 版本
45 | 
46 | 提示：无论何种设备，记得确认 `CMakeLists.txt` 文件中相关库的路径。
47 | 
48 | ## 三. 模型转换
49 | 
50 | 目的：把 `YOLOv8`的`pth`检测模型，转换成`TensorRT`的序列化文件，后缀 `.engine`
51 | 
52 | 步骤：
53 | 
54 | 1. 按照 [tensorrtx](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov8) 项目操作，但作者亲测有以下注意点：
55 |    - 拷贝`gen_wts.py`文件时，拷贝到 `YOLOv8`一级`ultralytics`目录下即可，且不需要安装`YOLOv8`，无需按其所写的二级目录；
56 |    - 注意修改 `gen_wts.py` 文件中的输入输出目录。
57 | 
58 | 2. 之后可成功得到 `yolov8s.engine` 文件（本人使用的是YOLOv8 s 模型，也可以使用其他的）
59 | 
60 | 3. 在本项目中新建 `engine`目录，并放入转换后的模型文件
61 | 
62 | ## 四. 运行项目
63 | 
64 | - 开始编译并运行
65 | - 按如下步骤运行
66 | 
67 | ```bash
68 | mkdir build
69 | cd build
70 | cmake ..
71 | make
72 | cd ..
73 | ./main ./images  # 传入自己图像的目录
74 | ```
75 | 
76 | 


--------------------------------------------------------------------------------
/yolo/include/config.h:
--------------------------------------------------------------------------------
 1 | #define USE_FP16
 2 | //#define USE_INT8
 3 | 
 4 | const static char *kInputTensorName = "images";
 5 | const static char *kOutputTensorName = "output";
 6 | const static int kNumClass = 80;
 7 | const static int kBatchSize = 1;
 8 | const static int kGpuId = 0;
 9 | const static int kInputH = 640;
10 | const static int kInputW = 640;
11 | const static float kNmsThresh = 0.45f;
12 | const static float kConfThresh = 0.01f;
13 | const static int kMaxInputImageSize = 3000 * 3000;
14 | const static int kMaxNumOutputBbox = 1000;
15 | 


--------------------------------------------------------------------------------
/yolo/include/macros.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MACROS_H
 2 | #define __MACROS_H
 3 | 
 4 | #include "NvInfer.h"
 5 | 
 6 | #ifdef API_EXPORTS
 7 | #if defined(_MSC_VER)
 8 | #define API __declspec(dllexport)
 9 | #else
10 | #define API __attribute__((visibility("default")))
11 | #endif
12 | #else
13 | 
14 | #if defined(_MSC_VER)
15 | #define API __declspec(dllimport)
16 | #else
17 | #define API
18 | #endif
19 | #endif  // API_EXPORTS
20 | 
21 | #if NV_TENSORRT_MAJOR >= 8
22 | #define TRT_NOEXCEPT noexcept
23 | #define TRT_CONST_ENQUEUE const
24 | #else
25 | #define TRT_NOEXCEPT
26 | #define TRT_CONST_ENQUEUE
27 | #endif
28 | 
29 | #endif  // __MACROS_H
30 | 


--------------------------------------------------------------------------------
/yolo/include/postprocess.h:
--------------------------------------------------------------------------------
 1 | #ifndef POSTPROCESS_H
 2 | #define POSTPROCESS_H
 3 | 
 4 | #include "types.h"
 5 | #include <opencv2/opencv.hpp>
 6 | 
 7 | cv::Rect get_rect(cv::Mat& img, float bbox[4]);
 8 | 
 9 | void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh = 0.5);
10 | 
11 | #endif  // POSTPROCESS_H
12 | 


--------------------------------------------------------------------------------
/yolo/include/preprocess.h:
--------------------------------------------------------------------------------
 1 | #ifndef PREPROCESS_H
 2 | #define PREPROCESS_H
 3 | 
 4 | #include <opencv2/opencv.hpp>
 5 | #include <cuda_runtime.h>
 6 | 
 7 | void preprocess(const cv::Mat& srcImg, float* dstData, const int dstHeight, const int dstWidth);
 8 | /*
 9 | srcImg:    source image for inference
10 | dstData:   data after preprocess (resize / bgr to rgb / hwc to chw / normalize)
11 | dstHeight: CNN input height
12 | dstWidth:  CNN input width
13 | */
14 | 
15 | #endif  // PREPROCESS_H
16 | 


--------------------------------------------------------------------------------
/yolo/include/public.h:
--------------------------------------------------------------------------------
 1 | #ifndef PUBLIC_H
 2 | #define PUBLIC_H
 3 | 
 4 | #include <iostream>
 5 | #include <fstream>
 6 | #include <unistd.h>
 7 | #include <string.h>
 8 | #include <cmath>
 9 | #include <iomanip>
10 | #include <chrono>
11 | #include <string>
12 | #include <vector>
13 | #include <map>
14 | 
15 | #include <NvInfer.h>
16 | #include <cuda_fp16.h>
17 | #include <cuda_runtime_api.h>
18 | #include <opencv2/opencv.hpp>
19 | 
20 | #define CUDA_CHECK(call) check(call, __LINE__, __FILE__)
21 | 
22 | inline bool check(cudaError_t e, int iLine, const char *szFile)
23 | {
24 |     if (e != cudaSuccess)
25 |     {
26 |         std::cout << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile << std::endl;
27 |         return false;
28 |     }
29 |     return true;
30 | }
31 | 
32 | using namespace nvinfer1;
33 | 
34 | 
35 | class Logger : public ILogger
36 | {
37 | public:
38 |     Severity reportableSeverity;
39 | 
40 |     Logger(Severity severity = Severity::kINFO):
41 |         reportableSeverity(severity) {}
42 | 
43 |     void log(Severity severity, const char *msg) noexcept override
44 |     {
45 |         if (severity > reportableSeverity)
46 |         {
47 |             return;
48 |         }
49 |         switch (severity)
50 |         {
51 |         case Severity::kINTERNAL_ERROR:
52 |             std::cerr << "INTERNAL_ERROR: ";
53 |             break;
54 |         case Severity::kERROR:
55 |             std::cerr << "ERROR: ";
56 |             break;
57 |         case Severity::kWARNING:
58 |             std::cerr << "WARNING: ";
59 |             break;
60 |         case Severity::kINFO:
61 |             std::cerr << "INFO: ";
62 |             break;
63 |         default:
64 |             std::cerr << "VERBOSE: ";
65 |             break;
66 |         }
67 |         std::cerr << msg << std::endl;
68 |     }
69 | };
70 | 
71 | #endif  // PUBLIC_H
72 | 


--------------------------------------------------------------------------------
/yolo/include/types.h:
--------------------------------------------------------------------------------
 1 | #ifndef TYPES_H
 2 | #define TYPES_H
 3 | 
 4 | #include "config.h"
 5 | 
 6 | struct alignas(float) Detection {
 7 |   //center_x center_y w h
 8 |   float bbox[4];
 9 |   float conf;  // bbox_conf * cls_conf
10 |   float class_id;
11 | };
12 | 
13 | struct AffineMatrix {
14 |     float value[6];
15 | };
16 | 
17 | const int bbox_element = sizeof(AffineMatrix) / sizeof(float)+1;      // left, top, right, bottom, confidence, class, keepflag
18 | 
19 | #endif  // TYPES_H
20 | 


--------------------------------------------------------------------------------
/yolo/include/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | 
 4 | #include <opencv2/opencv.hpp>
 5 | #include <dirent.h>
 6 | 
 7 | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
 8 |     int w, h, x, y;
 9 |     float r_w = input_w / (img.cols*1.0);
10 |     float r_h = input_h / (img.rows*1.0);
11 |     if (r_h > r_w) {
12 |         w = input_w;
13 |         h = r_w * img.rows;
14 |         x = 0;
15 |         y = (input_h - h) / 2;
16 |     } else {
17 |         w = r_h * img.cols;
18 |         h = input_h;
19 |         x = (input_w - w) / 2;
20 |         y = 0;
21 |     }
22 |     cv::Mat re(h, w, CV_8UC3);
23 |     cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
24 |     cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
25 |     re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
26 |     return out;
27 | }
28 | 
29 | static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
30 |     DIR *p_dir = opendir(p_dir_name);
31 |     if (p_dir == nullptr) {
32 |         return -1;
33 |     }
34 | 
35 |     struct dirent* p_file = nullptr;
36 |     while ((p_file = readdir(p_dir)) != nullptr) {
37 |         if (strcmp(p_file->d_name, ".") != 0 &&
38 |             strcmp(p_file->d_name, "..") != 0) {
39 |             //std::string cur_file_name(p_dir_name);
40 |             //cur_file_name += "/";
41 |             //cur_file_name += p_file->d_name;
42 |             std::string cur_file_name(p_file->d_name);
43 |             file_names.push_back(cur_file_name);
44 |         }
45 |     }
46 | 
47 |     closedir(p_dir);
48 |     return 0;
49 | }
50 | 
51 | #endif  // UTILS_H
52 | 


--------------------------------------------------------------------------------
/yolo/include/yolov8_lib.h:
--------------------------------------------------------------------------------
 1 | #ifndef YOLOV8_LIB
 2 | #define YOLOV8_LIB
 3 | 
 4 | #include <opencv2/opencv.hpp>
 5 | #include "public.h"
 6 | #include "yololayer.h"
 7 | 
 8 | using namespace nvinfer1;
 9 | 
10 | 
11 | struct DetectResult
12 | {
13 |     cv::Rect tlwh;  // top left width height
14 |     float conf;
15 |     int class_id;
16 | };
17 | 
18 | 
19 | class YoloDetecter
20 | {
21 | public:
22 |     YoloDetecter(const std::string trtFile);
23 |     ~YoloDetecter();
24 |     std::vector<DetectResult> inference(cv::Mat& img);
25 | 
26 | private:
27 |     void deserialize_engine();
28 |     void inference();
29 | 
30 | private:
31 |     Logger              gLogger;
32 |     std::string         trtFile_;
33 | 
34 |     ICudaEngine *       engine;
35 |     IRuntime *          runtime;
36 |     IExecutionContext * context;
37 | 
38 |     cudaStream_t        stream;
39 | 
40 |     int                 kOutputSize;
41 |     std::vector<int>    vTensorSize;  // bytes of input and output
42 |     float *             inputData;
43 |     float *             outputData;
44 |     std::vector<void *> vBufferD;
45 | };
46 | 
47 | #endif  // YOLOV8_LIB
48 | 


--------------------------------------------------------------------------------
/yolo/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <dirent.h>
 2 | #include "utils.h"
 3 | #include "yolov8_lib.h"
 4 | 
 5 | 
 6 | int run(char* imageDir)
 7 | {
 8 |     // get image file names for inferencing
 9 |     std::vector<std::string> file_names;
10 |     if (read_files_in_dir(imageDir, file_names) < 0) {
11 |         std::cout << "read_files_in_dir failed." << std::endl;
12 |         return -1;
13 |     }
14 | 
15 |     // create detecter, and load engine plan
16 |     std::string trtFile = "./engine/yolov8s.engine";
17 |     YoloDetecter detecter(trtFile);
18 | 
19 |     // inference
20 |     for (long unsigned int i = 0; i < file_names.size(); i++)
21 |     {
22 |         std::string imagePath = std::string(imageDir) + "/" + file_names[i];
23 |         cv::Mat img = cv::imread(imagePath, cv::IMREAD_COLOR);
24 |         if (img.empty()) continue;
25 | 
26 |         std::vector<DetectResult> res = detecter.inference(img);
27 | 
28 |         // draw result on image
29 |         for (long unsigned int j = 0; j < res.size(); j++)
30 |         {
31 |             cv::Rect r = res[j].tlwh;
32 |             cv::rectangle(img, r, cv::Scalar(255, 0, 255), 2);
33 |             cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0, 0, 255), 2);
34 |         }
35 | 
36 |         cv::imwrite("_" + file_names[i], img);
37 | 
38 |         std::cout << "Image: " << file_names[i] << " done." << std::endl;
39 |     }
40 | 
41 |     return 0;
42 | }
43 | 
44 | int main(int argc, char *argv[])
45 | {
46 |     if (argc != 2) {
47 |         printf("This program need 1 argument\n");
48 |         printf("Usage: ./main [image dir]\n");
49 |         printf("Example: ./main ./images\n");
50 |         return 1;
51 |     }
52 | 
53 |     return run(argv[1]);
54 | }
55 | 


--------------------------------------------------------------------------------
/yolo/plugin/yololayer.cu:
--------------------------------------------------------------------------------
  1 | #include "yololayer.h"
  2 | #include "types.h"
  3 | #include <assert.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | #include <iostream>
  7 | 
  8 | namespace Tn {
  9 |     template<typename T>
 10 |     void write(char*& buffer, const T& val) {
 11 |         *reinterpret_cast<T*>(buffer) = val;
 12 |         buffer += sizeof(T);
 13 |     }
 14 | 
 15 |     template<typename T>
 16 |     void read(const char*& buffer, T& val) {
 17 |         val = *reinterpret_cast<const T*>(buffer);
 18 |         buffer += sizeof(T);
 19 |     }
 20 | }  // namespace Tn
 21 | 
 22 | 
 23 | namespace nvinfer1 {
 24 | YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut) {
 25 |     mClassCount = classCount;
 26 |     mYoloV8NetWidth = netWidth;
 27 |     mYoloV8netHeight = netHeight;
 28 |     mMaxOutObject = maxOut;
 29 | }
 30 | 
 31 | YoloLayerPlugin::~YoloLayerPlugin() {}
 32 | 
 33 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) {
 34 |     using namespace Tn;
 35 |     const char* d = reinterpret_cast<const char*>(data), * a = d;
 36 |     read(d, mClassCount);
 37 |     read(d, mThreadCount);
 38 |     read(d, mYoloV8NetWidth);
 39 |     read(d, mYoloV8netHeight);
 40 |     read(d, mMaxOutObject);
 41 | 
 42 |     assert(d == a + length);
 43 | }
 44 | 
 45 | void YoloLayerPlugin::serialize(void* buffer) const TRT_NOEXCEPT {
 46 | 
 47 |     using namespace Tn;
 48 |     char* d = static_cast<char*>(buffer), * a = d;
 49 |     write(d, mClassCount);
 50 |     write(d, mThreadCount);
 51 |     write(d, mYoloV8NetWidth);
 52 |     write(d, mYoloV8netHeight);
 53 |     write(d, mMaxOutObject);
 54 | 
 55 |     assert(d == a + getSerializationSize());
 56 | }
 57 | 
 58 | size_t YoloLayerPlugin::getSerializationSize() const TRT_NOEXCEPT {
 59 |     return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mYoloV8netHeight) + sizeof(mYoloV8NetWidth) + sizeof(mMaxOutObject);
 60 | }
 61 | 
 62 | int YoloLayerPlugin::initialize() TRT_NOEXCEPT {
 63 |     return 0;
 64 | }
 65 | 
 66 | nvinfer1::Dims YoloLayerPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT {
 67 |     int total_size = mMaxOutObject * sizeof(Detection) / sizeof(float);
 68 |     return nvinfer1::Dims3(total_size + 1, 1, 1);
 69 | }
 70 | 
 71 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT {
 72 |     mPluginNamespace = pluginNamespace;
 73 | }
 74 | 
 75 | const char* YoloLayerPlugin::getPluginNamespace() const TRT_NOEXCEPT {
 76 |     return mPluginNamespace;
 77 | }
 78 | 
 79 | nvinfer1::DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT {
 80 |     return nvinfer1::DataType::kFLOAT;
 81 | }
 82 | 
 83 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT {
 84 | 
 85 |     return false;
 86 | }
 87 | 
 88 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT {
 89 | 
 90 |     return false;
 91 | }
 92 | 
 93 | void YoloLayerPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in, int nbInput, nvinfer1::PluginTensorDesc const* out, int nbOutput) TRT_NOEXCEPT {};
 94 | 
 95 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {};
 96 | 
 97 | void YoloLayerPlugin::detachFromContext() TRT_NOEXCEPT {}
 98 | 
 99 | const char* YoloLayerPlugin::getPluginType() const TRT_NOEXCEPT {
100 | 
101 |     return "YoloLayer_TRT";
102 | }
103 | 
104 | const char* YoloLayerPlugin::getPluginVersion() const TRT_NOEXCEPT {
105 |     return "1";
106 | }
107 | 
108 | void YoloLayerPlugin::destroy() TRT_NOEXCEPT {
109 | 
110 |     delete this;
111 | }
112 | 
113 | nvinfer1::IPluginV2IOExt* YoloLayerPlugin::clone() const TRT_NOEXCEPT {
114 | 
115 |     YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV8NetWidth, mYoloV8netHeight, mMaxOutObject);
116 |     p->setPluginNamespace(mPluginNamespace);
117 |     return p;
118 | }
119 | 
120 | int YoloLayerPlugin::enqueue(int batchSize, const void* TRT_CONST_ENQUEUE* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT {
121 | 
122 |     forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, mYoloV8netHeight, mYoloV8NetWidth, batchSize);
123 |     return 0;
124 | }
125 | 
126 | 
127 | __device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };
128 | 
129 | __global__ void CalDetection(const float* input, float* output, int numElements, int maxoutobject,
130 |                              const int grid_h, int grid_w, const int stride, int classes, int outputElem) {
131 |     int idx = threadIdx.x + blockDim.x * blockIdx.x;
132 |     if (idx >= numElements) return;
133 | 
134 |     int total_grid = grid_h * grid_w;
135 |     int info_len = 4 + classes;
136 |     int batchIdx = idx / total_grid;
137 |     int elemIdx = idx % total_grid;
138 |     const float* curInput = input + batchIdx * total_grid * info_len;
139 |     int outputIdx = batchIdx * outputElem;
140 | 
141 |     int class_id = 0;
142 |     float max_cls_prob = 0.0;
143 |     for (int i = 4; i < info_len; i++) {
144 |         float p = Logist(curInput[elemIdx + i * total_grid]);
145 |         if (p > max_cls_prob) {
146 |             max_cls_prob = p;
147 |             class_id = i - 4;
148 |         }
149 |     }
150 | 
151 |     if (max_cls_prob < 0.1) return;
152 | 
153 |     int count = (int)atomicAdd(output + outputIdx, 1);
154 |     if (count >= maxoutobject) return;
155 |     char* data = (char*)(output + outputIdx) + sizeof(float) + count * sizeof(Detection);
156 |     Detection* det = (Detection*)(data);
157 | 
158 |     int row = elemIdx / grid_w;
159 |     int col = elemIdx % grid_w;
160 | 
161 |     det->conf = max_cls_prob;
162 |     det->class_id = class_id;
163 |     det->bbox[0] = (col + 0.5f - curInput[elemIdx + 0 * total_grid]) * stride;
164 |     det->bbox[1] = (row + 0.5f - curInput[elemIdx + 1 * total_grid]) * stride;
165 |     det->bbox[2] = (col + 0.5f + curInput[elemIdx + 2 * total_grid]) * stride;
166 |     det->bbox[3] = (row + 0.5f + curInput[elemIdx + 3 * total_grid]) * stride;
167 | }
168 | 
169 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight,int mYoloV8NetWidth, int batchSize) {
170 |     int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
171 |     cudaMemsetAsync(output, 0, sizeof(float), stream);
172 |     for (int idx = 0; idx < batchSize; ++idx) {
173 |         CUDA_CHECK(cudaMemsetAsync(output + idx * outputElem, 0, sizeof(float), stream));
174 |     }
175 |     int numElem = 0;
176 |     int grids[3][2] = { {mYoloV8netHeight / 8, mYoloV8NetWidth / 8}, {mYoloV8netHeight / 16, mYoloV8NetWidth / 16}, {mYoloV8netHeight / 32, mYoloV8NetWidth / 32} };
177 |     int strides[] = { 8, 16, 32 };
178 |     for (unsigned int i = 0; i < 3; i++) {
179 |         int grid_h = grids[i][0];
180 |         int grid_w = grids[i][1];
181 |         int stride = strides[i];
182 |         numElem = grid_h * grid_w * batchSize;
183 |         if (numElem < mThreadCount) mThreadCount = numElem;
184 | 
185 |         CalDetection << <(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> >
186 |             (inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, outputElem);
187 |     }
188 | }
189 | 
190 | PluginFieldCollection YoloPluginCreator::mFC{};
191 | std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
192 | 
193 | YoloPluginCreator::YoloPluginCreator() {
194 |     mPluginAttributes.clear();
195 |     mFC.nbFields = mPluginAttributes.size();
196 |     mFC.fields = mPluginAttributes.data();
197 | }
198 | 
199 | const char* YoloPluginCreator::getPluginName() const TRT_NOEXCEPT {
200 |     return "YoloLayer_TRT";
201 | }
202 | 
203 | const char* YoloPluginCreator::getPluginVersion() const TRT_NOEXCEPT {
204 |     return "1";
205 | }
206 | 
207 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() TRT_NOEXCEPT {
208 |     return &mFC;
209 | }
210 | 
211 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT {
212 |     assert(fc->nbFields == 1);
213 |     assert(strcmp(fc->fields[0].name, "netinfo") == 0);
214 |     int* p_netinfo = (int*)(fc->fields[0].data);
215 |     int class_count = p_netinfo[0];
216 |     int input_w = p_netinfo[1];
217 |     int input_h = p_netinfo[2];
218 |     int max_output_object_count = p_netinfo[3];
219 |     YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count);
220 |     obj->setPluginNamespace(mNamespace.c_str());
221 |     return obj;
222 | }
223 | 
224 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT {
225 |     // This object will be deleted when the network is destroyed, which will
226 |     // call YoloLayerPlugin::destroy()
227 |     YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
228 |     obj->setPluginNamespace(mNamespace.c_str());
229 |     return obj;
230 | }
231 | 
232 | } // namespace nvinfer1
233 | 


--------------------------------------------------------------------------------
/yolo/plugin/yololayer.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "macros.h"
  3 | #include "NvInfer.h"
  4 | #include <string>
  5 | #include <vector>
  6 | #include "macros.h"
  7 | #include "public.h"
  8 | namespace nvinfer1 {
  9 | class API YoloLayerPlugin : public IPluginV2IOExt {
 10 | public:
 11 |         YoloLayerPlugin(int classCount, int netWdith, int netHeight, int maxOut);
 12 |         YoloLayerPlugin(const void* data, size_t length);
 13 |         ~YoloLayerPlugin();
 14 | 
 15 |         int getNbOutputs() const TRT_NOEXCEPT override {
 16 |             return 1;
 17 |         }
 18 | 
 19 |         nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT override;
 20 | 
 21 |         int initialize() TRT_NOEXCEPT override;
 22 | 
 23 |         virtual void terminate() TRT_NOEXCEPT override {}
 24 | 
 25 |         virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override { return 0; }
 26 | 
 27 |         virtual int enqueue(int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override;
 28 | 
 29 |         virtual size_t getSerializationSize() const TRT_NOEXCEPT override;
 30 | 
 31 |         virtual void serialize(void* buffer) const TRT_NOEXCEPT override;
 32 | 
 33 |         bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override {
 34 |             return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
 35 |         }
 36 | 
 37 | 
 38 |         const char* getPluginType() const TRT_NOEXCEPT override;
 39 | 
 40 |         const char* getPluginVersion() const TRT_NOEXCEPT override;
 41 | 
 42 |         void destroy() TRT_NOEXCEPT override;
 43 | 
 44 |         IPluginV2IOExt* clone() const TRT_NOEXCEPT override;
 45 | 
 46 |         void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override;
 47 | 
 48 |         const char* getPluginNamespace() const TRT_NOEXCEPT override;
 49 | 
 50 |         nvinfer1::DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const TRT_NOEXCEPT;
 51 | 
 52 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override;
 53 | 
 54 |         bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override;
 55 | 
 56 |         void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override;
 57 | 
 58 |         void configurePlugin(PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) TRT_NOEXCEPT override;
 59 | 
 60 |         void detachFromContext() TRT_NOEXCEPT override;
 61 | 
 62 |     private:
 63 |         void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight, int mYoloV8NetWidth, int batchSize);
 64 |         int mThreadCount = 256;
 65 |         const char* mPluginNamespace;
 66 |         int mClassCount;
 67 |         int mYoloV8NetWidth;
 68 |         int mYoloV8netHeight;
 69 |         int mMaxOutObject;
 70 |     };
 71 | 
 72 | class API YoloPluginCreator : public IPluginCreator {
 73 | public:
 74 |         YoloPluginCreator();
 75 |         ~YoloPluginCreator() override = default;
 76 | 
 77 |         const char* getPluginName() const TRT_NOEXCEPT override;
 78 | 
 79 |         const char* getPluginVersion() const TRT_NOEXCEPT override;
 80 | 
 81 |         const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override;
 82 | 
 83 |         nvinfer1::IPluginV2IOExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override;
 84 | 
 85 |         nvinfer1::IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT override;
 86 | 
 87 |         void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override {
 88 |             mNamespace = libNamespace;
 89 |         }
 90 | 
 91 |         const char* getPluginNamespace() const TRT_NOEXCEPT override {
 92 |             return mNamespace.c_str();
 93 |         }
 94 | 
 95 |     private:
 96 |         std::string mNamespace;
 97 |         static PluginFieldCollection mFC;
 98 |         static std::vector<PluginField> mPluginAttributes;
 99 |     };
100 |     REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
101 | } // namespace nvinfer1
102 | 
103 | 


--------------------------------------------------------------------------------
/yolo/src/postprocess.cpp:
--------------------------------------------------------------------------------
 1 | #include "postprocess.h"
 2 | 
 3 | 
 4 | cv::Rect get_rect(cv::Mat &img, float bbox[4]) {
 5 |     float l, r, t, b;
 6 |     float r_w = kInputW / (img.cols * 1.0);
 7 |     float r_h = kInputH / (img.rows * 1.0);
 8 | 
 9 |     if (r_h > r_w) {
10 |         l = bbox[0];
11 |         r = bbox[2];
12 |         t = bbox[1] - (kInputH - r_w * img.rows) / 2;
13 |         b = bbox[3] - (kInputH - r_w * img.rows) / 2;
14 |         l = l / r_w;
15 |         r = r / r_w;
16 |         t = t / r_w;
17 |         b = b / r_w;
18 |     } else {
19 |         l = bbox[0] - (kInputW - r_h * img.cols) / 2;
20 |         r = bbox[2] - (kInputW - r_h * img.cols) / 2;
21 |         t = bbox[1];
22 |         b = bbox[3];
23 |         l = l / r_h;
24 |         r = r / r_h;
25 |         t = t / r_h;
26 |         b = b / r_h;
27 |     }
28 |     return cv::Rect(round(l), round(t), round(r - l), round(b - t));
29 | }
30 | 
31 | static float iou(float lbox[4], float rbox[4]) {
32 |     float interBox[] = {
33 |             (std::max)(lbox[0], rbox[0]), //left
34 |             (std::min)(lbox[2], rbox[2]), //right
35 |             (std::max)(lbox[1], rbox[1]), //top
36 |             (std::min)(lbox[3], rbox[3]), //bottom
37 |     };
38 | 
39 |     if (interBox[2] > interBox[3] || interBox[0] > interBox[1])
40 |         return 0.0f;
41 | 
42 |     float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]);
43 |     float unionBoxS = (lbox[2] - lbox[0]) * (lbox[3] - lbox[1]) + (rbox[2] - rbox[0]) * (rbox[3] - rbox[1]) - interBoxS;
44 |     return interBoxS / unionBoxS;
45 | }
46 | 
47 | static bool cmp(const Detection &a, const Detection &b) {
48 |     return a.conf > b.conf;
49 | }
50 | 
51 | void nms(std::vector<Detection> &res, float *output, float conf_thresh, float nms_thresh) {
52 |     int det_size = sizeof(Detection) / sizeof(float);
53 |     std::map<float, std::vector<Detection>> m;
54 | 
55 |     for (int i = 0; i < output[0]; i++) {
56 |         if (output[1 + det_size * i + 4] <= conf_thresh) continue;
57 |         Detection det;
58 |         memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
59 |         if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
60 |         m[det.class_id].push_back(det);
61 |     }
62 |     for (auto it = m.begin(); it != m.end(); it++) {
63 |         auto &dets = it->second;
64 |         std::sort(dets.begin(), dets.end(), cmp);
65 |         for (size_t m = 0; m < dets.size(); ++m) {
66 |             auto &item = dets[m];
67 |             res.push_back(item);
68 |             for (size_t n = m + 1; n < dets.size(); ++n) {
69 |                 if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
70 |                     dets.erase(dets.begin() + n);
71 |                     --n;
72 |                 }
73 |             }
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/yolo/src/preprocess.cu:
--------------------------------------------------------------------------------
  1 | #include "preprocess.h"
  2 | 
  3 | 
  4 | __global__ void letterbox(const uchar* srcData, const int srcH, const int srcW, uchar* tgtData, 
  5 |     const int tgtH, const int tgtW, const int rszH, const int rszW, const int startY, const int startX)
  6 | {
  7 |     int ix = threadIdx.x + blockDim.x * blockIdx.x;
  8 |     int iy = threadIdx.y + blockDim.y * blockIdx.y;
  9 |     int idx = ix + iy * tgtW;
 10 |     int idx3 = idx * 3;
 11 | 
 12 |     if ( ix > tgtW || iy > tgtH ) return;  // thread out of target range
 13 |     // gray region on target image
 14 |     if ( iy < startY || iy > (startY + rszH - 1) ) {
 15 |         tgtData[idx3] = 128;
 16 |         tgtData[idx3 + 1] = 128;
 17 |         tgtData[idx3 + 2] = 128;
 18 |         return;
 19 |     }
 20 |     if ( ix < startX || ix > (startX + rszW - 1) ){
 21 |         tgtData[idx3] = 128;
 22 |         tgtData[idx3 + 1] = 128;
 23 |         tgtData[idx3 + 2] = 128;
 24 |         return;
 25 |     }
 26 | 
 27 |     float scaleY = (float)rszH / (float)srcH;
 28 |     float scaleX = (float)rszW / (float)srcW;
 29 | 
 30 |     // (ix,iy)为目标图像坐标
 31 |     // (before_x,before_y)原图坐标
 32 |     float beforeX = float(ix - startX + 0.5) / scaleX - 0.5;
 33 |     float beforeY = float(iy - startY + 0.5) / scaleY - 0.5;
 34 |     // 原图像坐标四个相邻点
 35 |     // 获得变换前最近的四个顶点,取整
 36 |     int topY = static_cast<int>(beforeY);
 37 |     int bottomY = topY + 1;
 38 |     int leftX = static_cast<int>(beforeX);
 39 |     int rightX = leftX + 1;
 40 |     //计算变换前坐标的小数部分
 41 |     float u = beforeX - leftX;
 42 |     float v = beforeY - topY;
 43 | 
 44 |     if (topY >= srcH - 1 && leftX >= srcW - 1)  //右下角
 45 |     {
 46 |         for (int k = 0; k < 3; k++)
 47 |         {
 48 |             tgtData[idx3 + k] = (1. - u) * (1. - v) * srcData[(leftX + topY * srcW) * 3 + k];
 49 |         }
 50 |     }
 51 |     else if (topY >= srcH - 1)  // 最后一行
 52 |     {
 53 |         for (int k = 0; k < 3; k++)
 54 |         {
 55 |             tgtData[idx3 + k]
 56 |             = (1. - u) * (1. - v) * srcData[(leftX + topY * srcW) * 3 + k]
 57 |             + (u) * (1. - v) * srcData[(rightX + topY * srcW) * 3 + k];
 58 |         }
 59 |     }
 60 |     else if (leftX >= srcW - 1)  // 最后一列
 61 |     {
 62 |         for (int k = 0; k < 3; k++)
 63 |         {
 64 |             tgtData[idx3 + k]
 65 |             = (1. - u) * (1. - v) * srcData[(leftX + topY * srcW) * 3 + k]
 66 |             + (1. - u) * (v) * srcData[(leftX + bottomY * srcW) * 3 + k];
 67 |         }
 68 |     }
 69 |     else  // 非最后一行或最后一列情况
 70 |     {
 71 |         for (int k = 0; k < 3; k++)
 72 |         {
 73 |             tgtData[idx3 + k]
 74 |             = (1. - u) * (1. - v) * srcData[(leftX + topY * srcW) * 3 + k]
 75 |             + (u) * (1. - v) * srcData[(rightX + topY * srcW) * 3 + k]
 76 |             + (1. - u) * (v) * srcData[(leftX + bottomY * srcW) * 3 + k]
 77 |             + u * v * srcData[(rightX + bottomY * srcW) * 3 + k];
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | __global__ void process(const uchar* srcData, float* tgtData, const int h, const int w)
 83 | {
 84 |     int ix = threadIdx.x + blockIdx.x * blockDim.x;
 85 |     int iy = threadIdx.y + blockIdx.y * blockDim.y;
 86 |     int idx = ix + iy * w;
 87 |     int idx3 = idx * 3;
 88 | 
 89 |     if (ix < w && iy < h)
 90 |     {
 91 |         tgtData[idx] = (float)srcData[idx3 + 2] / 255.0;  // R pixel
 92 |         tgtData[idx + h * w] = (float)srcData[idx3 + 1] / 255.0;  // G pixel
 93 |         tgtData[idx + h * w * 2] = (float)srcData[idx3] / 255.0;  // B pixel
 94 |     }
 95 | }
 96 | 
 97 | void preprocess(const cv::Mat& srcImg, float* dstData, const int dstHeight, const int dstWidth)
 98 | {
 99 |     int srcHeight = srcImg.rows;
100 |     int srcWidth = srcImg.cols;
101 |     int srcElements = srcHeight * srcWidth * 3;
102 |     int dstElements = dstHeight * dstWidth * 3;
103 | 
104 |     // target data on device
105 |     float* dstDevData;
106 |     cudaMalloc((void**)&dstDevData, sizeof(float) * dstElements);
107 |     // middle image data on device ( for bilinear resize )
108 |     uchar* midDevData;
109 |     cudaMalloc((void**)&midDevData, sizeof(uchar) * dstElements);
110 |     // source images data on device
111 |     uchar* srcDevData;
112 |     cudaMalloc((void**)&srcDevData, sizeof(uchar) * srcElements);
113 |     cudaMemcpy(srcDevData, srcImg.data, sizeof(uchar) * srcElements, cudaMemcpyHostToDevice);
114 | 
115 |     // calculate width and height after resize
116 |     int w, h, x, y;
117 |     float r_w = dstWidth / (srcWidth * 1.0);
118 |     float r_h = dstHeight / (srcHeight * 1.0);
119 |     if (r_h > r_w) {
120 |         w = dstWidth;
121 |         h = r_w * srcHeight;
122 |         x = 0;
123 |         y = (dstHeight - h) / 2;
124 |     }
125 |     else {
126 |         w = r_h * srcWidth;
127 |         h = dstHeight;
128 |         x = (dstWidth - w) / 2;
129 |         y = 0;
130 |     }
131 |     
132 |     dim3 blockSize(32, 32);
133 |     dim3 gridSize((dstWidth + blockSize.x - 1) / blockSize.x, (dstHeight + blockSize.y - 1) / blockSize.y);
134 | 
135 |     // letterbox and resize
136 |     letterbox<<<gridSize, blockSize>>>(srcDevData, srcHeight, srcWidth, midDevData, dstHeight, dstWidth, h, w, y, x);
137 |     cudaDeviceSynchronize();
138 |     // hwc to chw / bgr to rgb / normalize
139 |     process<<<gridSize, blockSize>>>(midDevData, dstDevData, dstHeight, dstWidth);
140 | 
141 |     cudaMemcpy(dstData, dstDevData, sizeof(float) * dstElements, cudaMemcpyDeviceToHost);
142 | 
143 |     cudaFree(srcDevData);
144 |     cudaFree(midDevData);
145 |     cudaFree(dstDevData);
146 | }
147 | 


--------------------------------------------------------------------------------
/yolo/src/yolov8_lib.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | 
  4 | #include "yolov8_lib.h"
  5 | #include "preprocess.h"
  6 | #include "postprocess.h"
  7 | 
  8 | using namespace nvinfer1;
  9 | 
 10 | 
 11 | YoloDetecter::YoloDetecter(const std::string trtFile): trtFile_(trtFile)
 12 | {
 13 |     gLogger = Logger(ILogger::Severity::kERROR);
 14 |     cudaSetDevice(kGpuId);
 15 | 
 16 |     // load engine
 17 |     deserialize_engine();
 18 | 
 19 |     CUDA_CHECK(cudaStreamCreate(&stream));
 20 | 
 21 |     // bytes of input and output
 22 |     kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1;
 23 |     vTensorSize.resize(2, 0);
 24 |     vTensorSize[0] = 3 * kInputH * kInputW * sizeof(float);
 25 |     vTensorSize[1] = kOutputSize * sizeof(float);
 26 | 
 27 |     // prepare input data and output data ---------------------------
 28 |     inputData = new float[3 * kInputH * kInputW];
 29 |     outputData = new float[kOutputSize];
 30 | 
 31 |     // prepare input and output space on device
 32 |     vBufferD.resize(2, nullptr);
 33 |     for (int i = 0; i < 2; i++)
 34 |     {
 35 |         CUDA_CHECK(cudaMalloc(&vBufferD[i], vTensorSize[i]));
 36 |     }
 37 | }
 38 | 
 39 | void YoloDetecter::deserialize_engine()
 40 | {
 41 |     std::ifstream file(trtFile_, std::ios::binary);
 42 |     if (!file.good()){
 43 |         std::cerr << "read " << trtFile_ << " error!" << std::endl;
 44 |         assert(false);
 45 |     }
 46 |     size_t size = 0;
 47 |     file.seekg(0, file.end);
 48 |     size = file.tellg();
 49 |     file.seekg(0, file.beg);
 50 |     char* serialized_engine = new char[size];
 51 |     assert(serialized_engine);
 52 |     file.read(serialized_engine, size);
 53 |     file.close();
 54 | 
 55 |     runtime = createInferRuntime(gLogger);
 56 |     engine = runtime->deserializeCudaEngine(serialized_engine, size);
 57 |     context = engine->createExecutionContext();
 58 |     delete[] serialized_engine;
 59 | }
 60 | 
 61 | YoloDetecter::~YoloDetecter()
 62 | {
 63 |     cudaStreamDestroy(stream);
 64 | 
 65 |     for (int i = 0; i < 2; ++i)
 66 |     {
 67 |         CUDA_CHECK(cudaFree(vBufferD[i]));
 68 |     }
 69 | 
 70 |     delete context;
 71 |     delete engine;
 72 |     delete runtime;
 73 | 
 74 |     delete [] inputData;
 75 |     delete [] outputData;
 76 | }
 77 | 
 78 | void YoloDetecter::inference()
 79 | {
 80 |     CUDA_CHECK(cudaMemcpyAsync(vBufferD[0], (void *)inputData, vTensorSize[0], cudaMemcpyHostToDevice, stream));
 81 |     context->enqueue(1, vBufferD.data(), stream, nullptr);
 82 |     CUDA_CHECK(cudaMemcpyAsync((void *)outputData, vBufferD[1], vTensorSize[1], cudaMemcpyDeviceToHost, stream));
 83 |     CUDA_CHECK(cudaStreamSynchronize(stream));
 84 | }
 85 | 
 86 | std::vector<DetectResult> YoloDetecter::inference(cv::Mat& img)
 87 | {
 88 |     preprocess(img, inputData, kInputH, kInputW);  // put image data on inputData
 89 | 
 90 |     inference();
 91 | 
 92 |     std::vector<Detection> res;
 93 |     nms(res, outputData, kConfThresh, kNmsThresh);
 94 | 
 95 |     std::vector<DetectResult> final_res;
 96 |     for (size_t j = 0; j < res.size(); j++)
 97 |     {
 98 |         cv::Rect r = get_rect(img, res[j].bbox);
 99 |         DetectResult single_res {r, res[j].conf, (int)res[j].class_id};
100 |         final_res.push_back(single_res);
101 |     }
102 | 
103 |     return final_res;
104 | }
105 | 


--------------------------------------------------------------------------------