├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── assets └── sample_yolov8_bytetrack.gif ├── build_opencv.sh ├── models ├── engine │ └── .gitkeep ├── onnx │ ├── .gitkeep │ ├── deepsort.onnx │ └── yolov8n.onnx └── to_export │ └── .gitkeep ├── sample_video ├── .gitkeep └── sample_1.webm └── srcs ├── bytetrack ├── basetrack.py ├── byte_tracker.py ├── kalman_filter.py └── matching.py ├── config.py ├── deep_sort ├── __init__.py ├── configs │ └── deep_sort.yaml ├── deep_sort │ ├── __init__.py │ ├── deep │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ └── .gitkeep │ │ └── feature_extractor_trt.py │ ├── deep_sort_trt.py │ └── sort │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── iou_matching.py │ │ ├── kalman_filter.py │ │ ├── linear_assignment.py │ │ ├── nn_matching.py │ │ ├── preprocessing.py │ │ ├── track.py │ │ └── tracker.py └── utils │ ├── __init__.py │ ├── asserts.py │ ├── draw.py │ ├── evaluation.py │ ├── io.py │ ├── json_logger.py │ ├── log.py │ ├── parser.py │ └── tools.py ├── models ├── __init__.py ├── api.py ├── common.py ├── cudart_api.py ├── engine.py ├── pycuda_api.py ├── torch_utils.py └── utils.py ├── tracker_trt.py ├── yolov8_bytetrack_trt.py └── yolov8_deepsort_trt.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | 163 | 164 | *.pt 165 | *.pth 166 | *.engine 167 | *.pkl 168 | *.h5 169 | *.npy 170 | *.npz 171 | opencv_build 172 | *.mp4 173 | *.avi 174 | tracking_bytetrack_output.txt 175 | run_bt.py 176 | *.llc 177 | *.webm 178 | 179 | .idea 180 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "refs/YOLOv8-TensorRT"] 2 | path = refs/YOLOv8-TensorRT 3 | url = https://github.com/triple-Mu/YOLOv8-TensorRT 4 | [submodule "refs/deepsort_tensorrt"] 5 | path = refs/deepsort_tensorrt 6 | url = https://github.com/GesilaA/deepsort_tensorrt 7 | [submodule "refs/opencv"] 8 | path = refs/opencv 9 | url = https://github.com/opencv/opencv.git 10 | [submodule "refs/opencv_contrib"] 11 | path = refs/opencv_contrib 12 | url = https://github.com/opencv/opencv_contrib.git 13 | [submodule "refs/ByteTrack"] 14 | path = refs/ByteTrack 15 | url = https://github.com/ifzhang/ByteTrack.git 16 | [submodule "refs/BoostTrack"] 17 | path = refs/BoostTrack 18 | url = https://github.com/vukasin-stanojevic/BoostTrack 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Bang Nguyen Anh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # YOLO Object Tracking TensorRT 4 | 5 |
6 | 7 | 8 | Using OpenCV to capture video from camera or video file, then use **YOLOv8 TensorRT** to detect objects and **DeepSORT TensorRT** or **BYTETrack** to track objects. 9 | 10 | Support for both **NVIDIA dGPU** and **Jetson** devices. 11 | 12 | ## Demo 13 | 14 | ### OpenCV + YOLOv8 + BYTETrack on NVIDA Geforce GTX 1660Ti 15 | ![](assets/sample_yolov8_bytetrack.gif) 16 | 17 | 18 | 19 | 20 | ## Performance 21 | 22 | ### Both OpenCV YOLOv8 and DeepSORT TensorRT 23 | Using OpenCV to capture video from camera or video file, then use YOLOv8 TensorRT to detect objects and DeepSORT TensorRT to track objects. 24 | 25 | | Model | Device | FPS | 26 | | --- | --- | --- | 27 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA dGPU GTX 1660Ti 6Gb| ~ | 28 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA Jetson Xavier NX 8Gb | ~ | 29 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA Jetson Orin Nano 8Gb | ~34 | 30 | 31 | ### YOLOv8 TensorRT model 32 | 33 | Test speed of YOLOv8 TensorRT model using `trtexec` from TensorRT 34 | 35 | `/usr/src/tensorrt/bin/trtexec` on NVIDIA Jetson 36 | 37 | > batch size = 1 38 | 39 | | Model | Device | Throughput (qps) | Latency(ms) | 40 | | --- | --- | --- | --- | 41 | | `yolov8n.engine` | NVIDIA dGPU GTX 1660Ti 6Gb| ~419.742 | ~2.91736 | 42 | | `yolov8n.engine` | NVIDIA Jetson Xavier NX 8Gb | ~ | ~ | 43 | | `yolov8n.engine` | NVIDIA Jetson Orin Nano 8Gb | ~137.469 | ~137.469 | 44 | 45 | ### DeepSORT TensorRT model 46 | 47 | Test speed of DeepSORT TensorRT model using `trtexec` from TensorRT 48 | 49 | `/usr/src/tensorrt/bin/trtexec` on NVIDIA Jetson 50 | 51 | > batch size = 1 52 | 53 | | Model | Device | Throughput (qps) | Latency(ms) | 54 | | --- | --- | --- | --- | 55 | | `deepsort.engine` | NVIDIA dGPU GTX 1660Ti 6Gb| ~614.738 | ~1.52197 | 56 | | `deepsort.engine` | NVIDIA Jetson Xavier NX 8Gb | ~ | ~ | 57 | | `deepsort.engine` | NVIDIA Jetson Orin Nano 8Gb | ~546.135 | ~1.82227 | 58 | 59 | ## For NVIDIA dGPU 60 | 61 | ### Environment 62 | 63 | - NVIDIA CUDA: 11.4 64 | - NVIDIA TensorRT: 8.5.2 65 | 66 | 67 | #### Clone repository 68 | 69 | Clone repository and submodules 70 | 71 | ```bash 72 | git clone --recurse-submodules https://github.com/nabang1010/YOLOv8_DeepSORT_TensorRT.git 73 | ``` 74 | 75 | #### Prepare enviroment 76 | 77 | Create new enviroment 78 | 79 | ```bash 80 | conda create -n yolov8_ds python=3.8 81 | ``` 82 | 83 | Activate enviroment 84 | 85 | ```bash 86 | conda activate yolov8_ds 87 | ``` 88 | 89 | ### Prepare models 90 | 91 | Go to **`refs/YOLOv8-TensorRT`** and install requirements for exporting models 92 | 93 | ```bash 94 | cd refs/YOLOv8-TensorRT 95 | pip3 install -r requirements.txt 96 | pip3 install tensorrt easydict pycuda lap cython_bbox 97 | ``` 98 | Install `python3-libnvinfer` 99 | 100 | ```bash 101 | sudo apt-get install python3-libnvinfer 102 | ``` 103 | 104 | Download YOLOv8 weights from [ultralytics](https://github.com/ultralytics/ultralytics) here: [yolov8n.pt](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt) and save in folder **`models/to_export`** 105 | 106 | **Export YOLOv8 ONNX model** 107 | 108 | In **`refs/YOLOv8-TensorRT`** run the following command to export YOLOv8 ONNX model 109 | 110 | ```bash 111 | python3 export-det.py \ 112 | --weights ../../models/to_export/yolov8n.pt \ 113 | --iou-thres 0.65 \ 114 | --conf-thres 0.25 \ 115 | --topk 100 \ 116 | --opset 11 \ 117 | --sim \ 118 | --input-shape 1 3 640 640 \ 119 | --device cuda:0 120 | ``` 121 | 122 | The output `.onnx` model will be saved in **`models/to_export`** folder, move the model to **`models/onnx`** folder 123 | ```bash 124 | mv ../../models/to_export/yolov8n.onnx ../../models/onnx/yolov8n.onnx 125 | ``` 126 | **Export YOLOv8 TensorRT model** 127 | 128 | In **`refs/YOLOv8-TensorRT`** run the following command to export YOLOv8 TensorRT model 129 | 130 | ```bash 131 | python3 build.py \ 132 | --weights ../../models/onnx/yolov8n.onnx \ 133 | --iou-thres 0.65 \ 134 | --conf-thres 0.25 \ 135 | --topk 100 \ 136 | --fp16 \ 137 | --device cuda:0 138 | ``` 139 | The output `.engine` model will be saved in **`models/onnx`** folder, move the model to **`models/trt`** folder 140 | 141 | ```bash 142 | mv ../../models/onnx/yolov8n.engine ../../models/engine/yolov8n.engine 143 | ``` 144 | 145 | **Build OpenCV** 146 | 147 | ```bash 148 | bash build_opencv.sh 149 | ``` 150 | 151 | **Export DeepSORT TensorRT model *(if use BYTETrack, ignore this step)*** 152 | 153 | 154 | Install `libeigen3-dev` 155 | ```bash 156 | apt-get install libeigen3-dev 157 | ``` 158 | Go to **`refs/deepsort_tensorrt`** and run the following command to build `onnx2engine` 159 | 160 | ```bash 161 | cd refs/deepsort_tensorrt 162 | mkdir build 163 | cd build 164 | cmake .. 165 | make -j$(nproc) 166 | 167 | ``` 168 | 169 | > If catch error `fatal error: Eigen/Core: No such file or directory`, replace `#include ` with `#include ` in all files of this repo (`datatype.h`, `kalmanfilter.cpp`) and rebuild again. 170 | 171 | > If catch error `error: looser exception specification on overriding virtual function 'virtual void Logger::log(nvinfer1::ILogger::Severity` add `noexcept` before `override` in `logger.h` line 239 and rebuild again. 172 | 173 | Run following command to export DeepSORT TensorRT model 174 | 175 | ```bash 176 | ./build/onnx2engine ../../models/onnx/deepsort.onnx ../../models/engine/deepsort.engine 177 | ``` 178 | ### Run script 179 | 180 | **Go to `src` folder** 181 | 182 | ```bash 183 | cd src 184 | ``` 185 | 186 | **Run YOLOv8 + DeepSORT** 187 | 188 | ```bash 189 | python3 yolov8_deepsort_trt.py --show 190 | 191 | ``` 192 | **Run YOLOv8 + BYTETrack** 193 | 194 | ```bash 195 | python3 yolov8_bytetrack_trt.py --show 196 | 197 | ``` 198 | 199 | ## For NVIDIA Jetson Device 200 | 201 | ***Coming soon*** 202 | 203 | 204 | --- 205 | 206 | # References 207 | 208 | - [ultralytics](https://github.com/ultralytics/ultralytics) 209 | - [YOLOv8-TensorRT](https://github.com/triple-Mu/YOLOv8-TensorRT) 210 | - [deepsort_tensorrt](https://github.com/GesilaA/deepsort_tensorrt) 211 | - [yolov5_deepsort_tensorrt](https://github.com/cong/yolov5_deepsort_tensorrt) 212 | - [ByteTrack](https://github.com/ifzhang/ByteTrack) 213 | 214 | # Star History 215 | 216 | 217 | 218 | 219 | 220 | Star History Chart 221 | 222 | 223 | -------------------------------------------------------------------------------- /assets/sample_yolov8_bytetrack.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/assets/sample_yolov8_bytetrack.gif -------------------------------------------------------------------------------- /build_opencv.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | cd refs/opencv 4 | sed -i 's/include /include /g' modules/core/include/opencv2/core/private.hpp 5 | rm -rf build 6 | mkdir build && cd build 7 | apt install -y build-essential cmake git pkg-config libgtk-3-dev \ 8 | libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \ 9 | libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \ 10 | gfortran openexr libatlas-base-dev python3-dev python3-numpy \ 11 | libtbb2 libtbb-dev libdc1394-22-dev 12 | cmake -D CMAKE_BUILD_TYPE=RELEASE \ 13 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 14 | -D EIGEN_INCLUDE_PATH=/usr/include/eigen3 \ 15 | -D ENABLE_FAST_MATH=1 \ 16 | -D CUDA_FAST_MATH=1 \ 17 | -D WITH_CUBLAS=1 \ 18 | -D OPENCV_GENERATE_PKGCONFIG=ON \ 19 | -D OPENCV_EXTRA_MODULES_PATH= ../opencv_contrib/modules \ 20 | -D WITH_GSTREAMER=ON \ 21 | -D WITH_V4L=ON \ 22 | -D WITH_LIBV4L=ON \ 23 | -D BUILD_opencv_python2=ON \ 24 | -D BUILD_opencv_python3=ON \ 25 | ../ 26 | # -D WITH_CUDA=ON \ 27 | make -j$(nproc) 28 | make install 29 | ldconfig -v 30 | 31 | -------------------------------------------------------------------------------- /models/engine/.gitkeep: -------------------------------------------------------------------------------- 1 | Store NVIDIA TensorRT Engine models -------------------------------------------------------------------------------- /models/onnx/.gitkeep: -------------------------------------------------------------------------------- 1 | Store ONNX models to export -------------------------------------------------------------------------------- /models/onnx/deepsort.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/models/onnx/deepsort.onnx -------------------------------------------------------------------------------- /models/onnx/yolov8n.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/models/onnx/yolov8n.onnx -------------------------------------------------------------------------------- /models/to_export/.gitkeep: -------------------------------------------------------------------------------- 1 | Store models to export -------------------------------------------------------------------------------- /sample_video/.gitkeep: -------------------------------------------------------------------------------- 1 | Store sampe video -------------------------------------------------------------------------------- /sample_video/sample_1.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/sample_video/sample_1.webm -------------------------------------------------------------------------------- /srcs/bytetrack/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed -------------------------------------------------------------------------------- /srcs/bytetrack/byte_tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | import os 4 | import os.path as osp 5 | import copy 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | from .kalman_filter import KalmanFilter 10 | from .matching import iou_distance, fuse_score, linear_assignment 11 | from .basetrack import BaseTrack, TrackState 12 | 13 | class STrack(BaseTrack): 14 | shared_kalman = KalmanFilter() 15 | def __init__(self, tlwh, score): 16 | 17 | # wait activate 18 | self._tlwh = np.asarray(tlwh, dtype=np.float) 19 | self.kalman_filter = None 20 | self.mean, self.covariance = None, None 21 | self.is_activated = False 22 | 23 | self.score = score 24 | self.tracklet_len = 0 25 | 26 | def predict(self): 27 | mean_state = self.mean.copy() 28 | if self.state != TrackState.Tracked: 29 | mean_state[7] = 0 30 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) 31 | 32 | @staticmethod 33 | def multi_predict(stracks): 34 | if len(stracks) > 0: 35 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 36 | multi_covariance = np.asarray([st.covariance for st in stracks]) 37 | for i, st in enumerate(stracks): 38 | if st.state != TrackState.Tracked: 39 | multi_mean[i][7] = 0 40 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) 41 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 42 | stracks[i].mean = mean 43 | stracks[i].covariance = cov 44 | 45 | def activate(self, kalman_filter, frame_id): 46 | """Start a new tracklet""" 47 | self.kalman_filter = kalman_filter 48 | self.track_id = self.next_id() 49 | self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) 50 | 51 | self.tracklet_len = 0 52 | self.state = TrackState.Tracked 53 | if frame_id == 1: 54 | self.is_activated = True 55 | # self.is_activated = True 56 | self.frame_id = frame_id 57 | self.start_frame = frame_id 58 | 59 | def re_activate(self, new_track, frame_id, new_id=False): 60 | self.mean, self.covariance = self.kalman_filter.update( 61 | self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) 62 | ) 63 | self.tracklet_len = 0 64 | self.state = TrackState.Tracked 65 | self.is_activated = True 66 | self.frame_id = frame_id 67 | if new_id: 68 | self.track_id = self.next_id() 69 | self.score = new_track.score 70 | 71 | def update(self, new_track, frame_id): 72 | """ 73 | Update a matched track 74 | :type new_track: STrack 75 | :type frame_id: int 76 | :type update_feature: bool 77 | :return: 78 | """ 79 | self.frame_id = frame_id 80 | self.tracklet_len += 1 81 | 82 | new_tlwh = new_track.tlwh 83 | self.mean, self.covariance = self.kalman_filter.update( 84 | self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) 85 | self.state = TrackState.Tracked 86 | self.is_activated = True 87 | 88 | self.score = new_track.score 89 | 90 | @property 91 | # @jit(nopython=True) 92 | def tlwh(self): 93 | """Get current position in bounding box format `(top left x, top left y, 94 | width, height)`. 95 | """ 96 | if self.mean is None: 97 | return self._tlwh.copy() 98 | ret = self.mean[:4].copy() 99 | ret[2] *= ret[3] 100 | ret[:2] -= ret[2:] / 2 101 | return ret 102 | 103 | @property 104 | # @jit(nopython=True) 105 | def tlbr(self): 106 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 107 | `(top left, bottom right)`. 108 | """ 109 | ret = self.tlwh.copy() 110 | ret[2:] += ret[:2] 111 | return ret 112 | 113 | @staticmethod 114 | # @jit(nopython=True) 115 | def tlwh_to_xyah(tlwh): 116 | """Convert bounding box to format `(center x, center y, aspect ratio, 117 | height)`, where the aspect ratio is `width / height`. 118 | """ 119 | ret = np.asarray(tlwh).copy() 120 | ret[:2] += ret[2:] / 2 121 | ret[2] /= ret[3] 122 | return ret 123 | 124 | def to_xyah(self): 125 | return self.tlwh_to_xyah(self.tlwh) 126 | 127 | @staticmethod 128 | # @jit(nopython=True) 129 | def tlbr_to_tlwh(tlbr): 130 | ret = np.asarray(tlbr).copy() 131 | ret[2:] -= ret[:2] 132 | return ret 133 | 134 | @staticmethod 135 | # @jit(nopython=True) 136 | def tlwh_to_tlbr(tlwh): 137 | ret = np.asarray(tlwh).copy() 138 | ret[2:] += ret[:2] 139 | return ret 140 | 141 | def __repr__(self): 142 | return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) 143 | 144 | 145 | class BYTETracker(object): 146 | def __init__(self, args, frame_rate=30): 147 | self.tracked_stracks = [] # type: list[STrack] 148 | self.lost_stracks = [] # type: list[STrack] 149 | self.removed_stracks = [] # type: list[STrack] 150 | 151 | self.frame_id = 0 152 | self.args = args 153 | #self.det_thresh = args.track_thresh 154 | self.det_thresh = args.track_thresh + 0.1 155 | self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) 156 | self.max_time_lost = self.buffer_size 157 | self.kalman_filter = KalmanFilter() 158 | 159 | def update(self, output_results, img_info, img_size): 160 | self.frame_id += 1 161 | activated_starcks = [] 162 | refind_stracks = [] 163 | lost_stracks = [] 164 | removed_stracks = [] 165 | 166 | if output_results.shape[1] == 5: 167 | scores = output_results[:, 4] 168 | bboxes = output_results[:, :4] 169 | else: 170 | output_results = output_results.cpu().numpy() 171 | scores = output_results[:, 4] * output_results[:, 5] 172 | bboxes = output_results[:, :4] # x1y1x2y2 173 | img_h, img_w = img_info[0], img_info[1] 174 | scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) 175 | bboxes /= scale 176 | 177 | remain_inds = scores > self.args.track_thresh 178 | inds_low = scores > 0.1 179 | inds_high = scores < self.args.track_thresh 180 | 181 | inds_second = np.logical_and(inds_low, inds_high) 182 | dets_second = bboxes[inds_second] 183 | dets = bboxes[remain_inds] 184 | scores_keep = scores[remain_inds] 185 | scores_second = scores[inds_second] 186 | 187 | if len(dets) > 0: 188 | '''Detections''' 189 | detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for 190 | (tlbr, s) in zip(dets, scores_keep)] 191 | else: 192 | detections = [] 193 | 194 | ''' Add newly detected tracklets to tracked_stracks''' 195 | unconfirmed = [] 196 | tracked_stracks = [] # type: list[STrack] 197 | for track in self.tracked_stracks: 198 | if not track.is_activated: 199 | unconfirmed.append(track) 200 | else: 201 | tracked_stracks.append(track) 202 | 203 | ''' Step 2: First association, with high score detection boxes''' 204 | strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) 205 | # Predict the current location with KF 206 | STrack.multi_predict(strack_pool) 207 | dists = iou_distance(strack_pool, detections) 208 | if not self.args.mot20: 209 | dists = fuse_score(dists, detections) 210 | matches, u_track, u_detection = linear_assignment(dists, thresh=self.args.match_thresh) 211 | 212 | for itracked, idet in matches: 213 | track = strack_pool[itracked] 214 | det = detections[idet] 215 | if track.state == TrackState.Tracked: 216 | track.update(detections[idet], self.frame_id) 217 | activated_starcks.append(track) 218 | else: 219 | track.re_activate(det, self.frame_id, new_id=False) 220 | refind_stracks.append(track) 221 | 222 | ''' Step 3: Second association, with low score detection boxes''' 223 | # association the untrack to the low score detections 224 | if len(dets_second) > 0: 225 | '''Detections''' 226 | detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for 227 | (tlbr, s) in zip(dets_second, scores_second)] 228 | else: 229 | detections_second = [] 230 | r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] 231 | dists = iou_distance(r_tracked_stracks, detections_second) 232 | matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5) 233 | for itracked, idet in matches: 234 | track = r_tracked_stracks[itracked] 235 | det = detections_second[idet] 236 | if track.state == TrackState.Tracked: 237 | track.update(det, self.frame_id) 238 | activated_starcks.append(track) 239 | else: 240 | track.re_activate(det, self.frame_id, new_id=False) 241 | refind_stracks.append(track) 242 | 243 | for it in u_track: 244 | track = r_tracked_stracks[it] 245 | if not track.state == TrackState.Lost: 246 | track.mark_lost() 247 | lost_stracks.append(track) 248 | 249 | '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' 250 | detections = [detections[i] for i in u_detection] 251 | dists = iou_distance(unconfirmed, detections) 252 | if not self.args.mot20: 253 | dists = fuse_score(dists, detections) 254 | matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7) 255 | for itracked, idet in matches: 256 | unconfirmed[itracked].update(detections[idet], self.frame_id) 257 | activated_starcks.append(unconfirmed[itracked]) 258 | for it in u_unconfirmed: 259 | track = unconfirmed[it] 260 | track.mark_removed() 261 | removed_stracks.append(track) 262 | 263 | """ Step 4: Init new stracks""" 264 | for inew in u_detection: 265 | track = detections[inew] 266 | if track.score < self.det_thresh: 267 | continue 268 | track.activate(self.kalman_filter, self.frame_id) 269 | activated_starcks.append(track) 270 | """ Step 5: Update state""" 271 | for track in self.lost_stracks: 272 | if self.frame_id - track.end_frame > self.max_time_lost: 273 | track.mark_removed() 274 | removed_stracks.append(track) 275 | 276 | # print('Ramained match {} s'.format(t4-t3)) 277 | 278 | self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] 279 | self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) 280 | self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) 281 | self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) 282 | self.lost_stracks.extend(lost_stracks) 283 | self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) 284 | self.removed_stracks.extend(removed_stracks) 285 | self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) 286 | # get scores of lost tracks 287 | output_stracks = [track for track in self.tracked_stracks if track.is_activated] 288 | 289 | return output_stracks 290 | 291 | 292 | def joint_stracks(tlista, tlistb): 293 | exists = {} 294 | res = [] 295 | for t in tlista: 296 | exists[t.track_id] = 1 297 | res.append(t) 298 | for t in tlistb: 299 | tid = t.track_id 300 | if not exists.get(tid, 0): 301 | exists[tid] = 1 302 | res.append(t) 303 | return res 304 | 305 | 306 | def sub_stracks(tlista, tlistb): 307 | stracks = {} 308 | for t in tlista: 309 | stracks[t.track_id] = t 310 | for t in tlistb: 311 | tid = t.track_id 312 | if stracks.get(tid, 0): 313 | del stracks[tid] 314 | return list(stracks.values()) 315 | 316 | 317 | def remove_duplicate_stracks(stracksa, stracksb): 318 | pdist = iou_distance(stracksa, stracksb) 319 | pairs = np.where(pdist < 0.15) 320 | dupa, dupb = list(), list() 321 | for p, q in zip(*pairs): 322 | timep = stracksa[p].frame_id - stracksa[p].start_frame 323 | timeq = stracksb[q].frame_id - stracksb[q].start_frame 324 | if timep > timeq: 325 | dupb.append(q) 326 | else: 327 | dupa.append(p) 328 | resa = [t for i, t in enumerate(stracksa) if not i in dupa] 329 | resb = [t for i, t in enumerate(stracksb) if not i in dupb] 330 | return resa, resb -------------------------------------------------------------------------------- /srcs/bytetrack/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | #mean = np.dot(self._motion_mat, mean) 120 | mean = np.dot(mean, self._motion_mat.T) 121 | covariance = np.linalg.multi_dot(( 122 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 123 | 124 | return mean, covariance 125 | 126 | def project(self, mean, covariance): 127 | """Project state distribution to measurement space. 128 | 129 | Parameters 130 | ---------- 131 | mean : ndarray 132 | The state's mean vector (8 dimensional array). 133 | covariance : ndarray 134 | The state's covariance matrix (8x8 dimensional). 135 | 136 | Returns 137 | ------- 138 | (ndarray, ndarray) 139 | Returns the projected mean and covariance matrix of the given state 140 | estimate. 141 | 142 | """ 143 | std = [ 144 | self._std_weight_position * mean[3], 145 | self._std_weight_position * mean[3], 146 | 1e-1, 147 | self._std_weight_position * mean[3]] 148 | innovation_cov = np.diag(np.square(std)) 149 | 150 | mean = np.dot(self._update_mat, mean) 151 | covariance = np.linalg.multi_dot(( 152 | self._update_mat, covariance, self._update_mat.T)) 153 | return mean, covariance + innovation_cov 154 | 155 | def multi_predict(self, mean, covariance): 156 | """Run Kalman filter prediction step (Vectorized version). 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The Nx8 dimensional mean matrix of the object states at the previous 161 | time step. 162 | covariance : ndarray 163 | The Nx8x8 dimensional covariance matrics of the object states at the 164 | previous time step. 165 | Returns 166 | ------- 167 | (ndarray, ndarray) 168 | Returns the mean vector and covariance matrix of the predicted 169 | state. Unobserved velocities are initialized to 0 mean. 170 | """ 171 | std_pos = [ 172 | self._std_weight_position * mean[:, 3], 173 | self._std_weight_position * mean[:, 3], 174 | 1e-2 * np.ones_like(mean[:, 3]), 175 | self._std_weight_position * mean[:, 3]] 176 | std_vel = [ 177 | self._std_weight_velocity * mean[:, 3], 178 | self._std_weight_velocity * mean[:, 3], 179 | 1e-5 * np.ones_like(mean[:, 3]), 180 | self._std_weight_velocity * mean[:, 3]] 181 | sqr = np.square(np.r_[std_pos, std_vel]).T 182 | 183 | motion_cov = [] 184 | for i in range(len(mean)): 185 | motion_cov.append(np.diag(sqr[i])) 186 | motion_cov = np.asarray(motion_cov) 187 | 188 | mean = np.dot(mean, self._motion_mat.T) 189 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) 190 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 191 | 192 | return mean, covariance 193 | 194 | def update(self, mean, covariance, measurement): 195 | """Run Kalman filter correction step. 196 | 197 | Parameters 198 | ---------- 199 | mean : ndarray 200 | The predicted state's mean vector (8 dimensional). 201 | covariance : ndarray 202 | The state's covariance matrix (8x8 dimensional). 203 | measurement : ndarray 204 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 205 | is the center position, a the aspect ratio, and h the height of the 206 | bounding box. 207 | 208 | Returns 209 | ------- 210 | (ndarray, ndarray) 211 | Returns the measurement-corrected state distribution. 212 | 213 | """ 214 | projected_mean, projected_cov = self.project(mean, covariance) 215 | 216 | chol_factor, lower = scipy.linalg.cho_factor( 217 | projected_cov, lower=True, check_finite=False) 218 | kalman_gain = scipy.linalg.cho_solve( 219 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 220 | check_finite=False).T 221 | innovation = measurement - projected_mean 222 | 223 | new_mean = mean + np.dot(innovation, kalman_gain.T) 224 | new_covariance = covariance - np.linalg.multi_dot(( 225 | kalman_gain, projected_cov, kalman_gain.T)) 226 | return new_mean, new_covariance 227 | 228 | def gating_distance(self, mean, covariance, measurements, 229 | only_position=False, metric='maha'): 230 | """Compute gating distance between state distribution and measurements. 231 | A suitable distance threshold can be obtained from `chi2inv95`. If 232 | `only_position` is False, the chi-square distribution has 4 degrees of 233 | freedom, otherwise 2. 234 | Parameters 235 | ---------- 236 | mean : ndarray 237 | Mean vector over the state distribution (8 dimensional). 238 | covariance : ndarray 239 | Covariance of the state distribution (8x8 dimensional). 240 | measurements : ndarray 241 | An Nx4 dimensional matrix of N measurements, each in 242 | format (x, y, a, h) where (x, y) is the bounding box center 243 | position, a the aspect ratio, and h the height. 244 | only_position : Optional[bool] 245 | If True, distance computation is done with respect to the bounding 246 | box center position only. 247 | Returns 248 | ------- 249 | ndarray 250 | Returns an array of length N, where the i-th element contains the 251 | squared Mahalanobis distance between (mean, covariance) and 252 | `measurements[i]`. 253 | """ 254 | mean, covariance = self.project(mean, covariance) 255 | if only_position: 256 | mean, covariance = mean[:2], covariance[:2, :2] 257 | measurements = measurements[:, :2] 258 | 259 | d = measurements - mean 260 | if metric == 'gaussian': 261 | return np.sum(d * d, axis=1) 262 | elif metric == 'maha': 263 | cholesky_factor = np.linalg.cholesky(covariance) 264 | z = scipy.linalg.solve_triangular( 265 | cholesky_factor, d.T, lower=True, check_finite=False, 266 | overwrite_b=True) 267 | squared_maha = np.sum(z * z, axis=0) 268 | return squared_maha 269 | else: 270 | raise ValueError('invalid distance metric') -------------------------------------------------------------------------------- /srcs/bytetrack/matching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import scipy 4 | import lap 5 | from scipy.spatial.distance import cdist 6 | 7 | from cython_bbox import bbox_overlaps as bbox_ious 8 | from .kalman_filter import chi2inv95 9 | import time 10 | 11 | def merge_matches(m1, m2, shape): 12 | O,P,Q = shape 13 | m1 = np.asarray(m1) 14 | m2 = np.asarray(m2) 15 | 16 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 17 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 18 | 19 | mask = M1*M2 20 | match = mask.nonzero() 21 | match = list(zip(match[0], match[1])) 22 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) 23 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) 24 | 25 | return match, unmatched_O, unmatched_Q 26 | 27 | 28 | def _indices_to_matches(cost_matrix, indices, thresh): 29 | matched_cost = cost_matrix[tuple(zip(*indices))] 30 | matched_mask = (matched_cost <= thresh) 31 | 32 | matches = indices[matched_mask] 33 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 34 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 35 | 36 | return matches, unmatched_a, unmatched_b 37 | 38 | 39 | def linear_assignment(cost_matrix, thresh): 40 | if cost_matrix.size == 0: 41 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 42 | matches, unmatched_a, unmatched_b = [], [], [] 43 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 44 | for ix, mx in enumerate(x): 45 | if mx >= 0: 46 | matches.append([ix, mx]) 47 | unmatched_a = np.where(x < 0)[0] 48 | unmatched_b = np.where(y < 0)[0] 49 | matches = np.asarray(matches) 50 | return matches, unmatched_a, unmatched_b 51 | 52 | 53 | def ious(atlbrs, btlbrs): 54 | """ 55 | Compute cost based on IoU 56 | :type atlbrs: list[tlbr] | np.ndarray 57 | :type atlbrs: list[tlbr] | np.ndarray 58 | 59 | :rtype ious np.ndarray 60 | """ 61 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 62 | if ious.size == 0: 63 | return ious 64 | 65 | ious = bbox_ious( 66 | np.ascontiguousarray(atlbrs, dtype=np.float), 67 | np.ascontiguousarray(btlbrs, dtype=np.float) 68 | ) 69 | 70 | return ious 71 | 72 | 73 | def iou_distance(atracks, btracks): 74 | """ 75 | Compute cost based on IoU 76 | :type atracks: list[STrack] 77 | :type btracks: list[STrack] 78 | 79 | :rtype cost_matrix np.ndarray 80 | """ 81 | 82 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 83 | atlbrs = atracks 84 | btlbrs = btracks 85 | else: 86 | atlbrs = [track.tlbr for track in atracks] 87 | btlbrs = [track.tlbr for track in btracks] 88 | _ious = ious(atlbrs, btlbrs) 89 | cost_matrix = 1 - _ious 90 | 91 | return cost_matrix 92 | 93 | def v_iou_distance(atracks, btracks): 94 | """ 95 | Compute cost based on IoU 96 | :type atracks: list[STrack] 97 | :type btracks: list[STrack] 98 | 99 | :rtype cost_matrix np.ndarray 100 | """ 101 | 102 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 103 | atlbrs = atracks 104 | btlbrs = btracks 105 | else: 106 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] 107 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] 108 | _ious = ious(atlbrs, btlbrs) 109 | cost_matrix = 1 - _ious 110 | 111 | return cost_matrix 112 | 113 | def embedding_distance(tracks, detections, metric='cosine'): 114 | """ 115 | :param tracks: list[STrack] 116 | :param detections: list[BaseTrack] 117 | :param metric: 118 | :return: cost_matrix np.ndarray 119 | """ 120 | 121 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 122 | if cost_matrix.size == 0: 123 | return cost_matrix 124 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) 125 | #for i, track in enumerate(tracks): 126 | #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 127 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) 128 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features 129 | return cost_matrix 130 | 131 | 132 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 133 | if cost_matrix.size == 0: 134 | return cost_matrix 135 | gating_dim = 2 if only_position else 4 136 | gating_threshold = chi2inv95[gating_dim] 137 | measurements = np.asarray([det.to_xyah() for det in detections]) 138 | for row, track in enumerate(tracks): 139 | gating_distance = kf.gating_distance( 140 | track.mean, track.covariance, measurements, only_position) 141 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 142 | return cost_matrix 143 | 144 | 145 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): 146 | if cost_matrix.size == 0: 147 | return cost_matrix 148 | gating_dim = 2 if only_position else 4 149 | gating_threshold = chi2inv95[gating_dim] 150 | measurements = np.asarray([det.to_xyah() for det in detections]) 151 | for row, track in enumerate(tracks): 152 | gating_distance = kf.gating_distance( 153 | track.mean, track.covariance, measurements, only_position, metric='maha') 154 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 155 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance 156 | return cost_matrix 157 | 158 | 159 | def fuse_iou(cost_matrix, tracks, detections): 160 | if cost_matrix.size == 0: 161 | return cost_matrix 162 | reid_sim = 1 - cost_matrix 163 | iou_dist = iou_distance(tracks, detections) 164 | iou_sim = 1 - iou_dist 165 | fuse_sim = reid_sim * (1 + iou_sim) / 2 166 | det_scores = np.array([det.score for det in detections]) 167 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 168 | #fuse_sim = fuse_sim * (1 + det_scores) / 2 169 | fuse_cost = 1 - fuse_sim 170 | return fuse_cost 171 | 172 | 173 | def fuse_score(cost_matrix, detections): 174 | if cost_matrix.size == 0: 175 | return cost_matrix 176 | iou_sim = 1 - cost_matrix 177 | det_scores = np.array([det.score for det in detections]) 178 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 179 | fuse_sim = iou_sim * det_scores 180 | fuse_cost = 1 - fuse_sim 181 | return fuse_cost -------------------------------------------------------------------------------- /srcs/config.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | random.seed(0) 6 | 7 | # detection model classes 8 | CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 9 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 10 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 11 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 12 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 14 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 15 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 16 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 17 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 18 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 19 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 20 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 21 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush') 22 | 23 | # colors for per classes 24 | COLORS = { 25 | cls: [random.randint(0, 255) for _ in range(3)] 26 | for i, cls in enumerate(CLASSES) 27 | } 28 | 29 | # colors for segment masks 30 | MASK_COLORS = np.array([(255, 56, 56), (255, 157, 151), (255, 112, 31), 31 | (255, 178, 29), (207, 210, 49), (72, 249, 10), 32 | (146, 204, 23), (61, 219, 134), (26, 147, 52), 33 | (0, 212, 187), (44, 153, 168), (0, 194, 255), 34 | (52, 69, 147), (100, 115, 255), (0, 24, 236), 35 | (132, 56, 255), (82, 0, 133), (203, 56, 255), 36 | (255, 149, 200), (255, 55, 199)], 37 | dtype=np.float32) / 255. 38 | 39 | KPS_COLORS = [[0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], 40 | [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0], 41 | [255, 128, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255], 42 | [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255]] 43 | 44 | SKELETON = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], 45 | [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], 46 | [2, 4], [3, 5], [4, 6], [5, 7]] 47 | 48 | LIMB_COLORS = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], 49 | [255, 51, 255], [255, 51, 255], [255, 51, 255], [255, 128, 0], 50 | [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0], 51 | [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], 52 | [0, 255, 0], [0, 255, 0]] 53 | 54 | # alpha for segment masks 55 | ALPHA = 0.5 56 | -------------------------------------------------------------------------------- /srcs/deep_sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/__init__.py -------------------------------------------------------------------------------- /srcs/deep_sort/configs/deep_sort.yaml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | REID_CKPT: "/home/sai/yolov8_deepsort_jetson/model/deepsort.engine" 3 | MAX_DIST: 0.3 4 | MIN_CONFIDENCE: 0.3 5 | NMS_MAX_OVERLAP: 0.5 6 | MAX_IOU_DISTANCE: 0.7 7 | MAX_AGE: 70 8 | N_INIT: 3 9 | NN_BUDGET: 100 10 | 11 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort_trt import DeepSort 2 | 3 | __all__ = ['DeepSort', 'build_tracker'] 4 | 5 | 6 | def build_tracker(cfg, use_cuda): 7 | return DeepSort(cfg.DEEPSORT.REID_CKPT, 8 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 9 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 10 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda) 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/deep/__init__.py -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/deep/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/deep/checkpoint/.gitkeep -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/deep/feature_extractor_trt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/python3 3 | """ 4 | Created on 2021/5/24 14:05 5 | @Author: Wang Cong 6 | @Email : iwangcong@outlook.com 7 | @Version : 0.1 8 | @File : feature_extractor_trt.py 9 | """ 10 | import cv2 11 | import numpy as np 12 | import tensorrt as trt 13 | import pycuda.driver as cuda 14 | import pycuda.autoinit 15 | INPUT_W = 128 16 | INPUT_H = 64 17 | 18 | 19 | class TrackerExtractor: 20 | 21 | def __init__(self, engine_file_path): 22 | self.img_size = 640 23 | self.threshold = 0.3 24 | self.stride = 1 25 | self.size = (64, 128) 26 | 27 | # Create a Context on this device, 28 | self.cfx = cuda.Device(0).make_context() 29 | stream = cuda.Stream() 30 | TRT_LOGGER = trt.Logger(trt.Logger.ERROR) 31 | runtime = trt.Runtime(TRT_LOGGER) 32 | 33 | # Deserialize the engine from file 34 | print("Engine file path: ", engine_file_path) 35 | with open(engine_file_path, "rb") as f: 36 | engine = runtime.deserialize_cuda_engine(f.read()) 37 | context = engine.create_execution_context() 38 | 39 | host_inputs = [] 40 | cuda_inputs = [] 41 | host_outputs = [] 42 | cuda_outputs = [] 43 | bindings = [] 44 | 45 | for binding in engine: 46 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size 47 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 48 | dims = engine.get_binding_shape(binding) 49 | if dims[0] < 0: 50 | size *= -1 51 | # Allocate host and device buffers 52 | host_mem = cuda.pagelocked_empty(size, dtype) 53 | cuda_mem = cuda.mem_alloc(host_mem.nbytes) 54 | # Append the device buffer to device bindings. 55 | bindings.append(int(cuda_mem)) 56 | # Append to the appropriate list. 57 | if engine.binding_is_input(binding): 58 | host_inputs.append(host_mem) 59 | cuda_inputs.append(cuda_mem) 60 | else: 61 | host_outputs.append(host_mem) 62 | cuda_outputs.append(cuda_mem) 63 | 64 | # Store 65 | self.stream = stream 66 | self.context = context 67 | self.engine = engine 68 | self.host_inputs = host_inputs 69 | self.cuda_inputs = cuda_inputs 70 | self.host_outputs = host_outputs 71 | self.cuda_outputs = cuda_outputs 72 | self.bindings = bindings 73 | 74 | def _preprocess(self, im_crops): 75 | """ 76 | 1. to float with scale from 0 to 1 77 | 2. resize to (64, 128) as Market1501 dataset did 78 | 3. concatenate to a numpy array 79 | 3. to torch Tensor 80 | 4. normalize 81 | """ 82 | def _resize(im, size): 83 | return cv2.resize(im.astype(np.float32)/255., size) 84 | def _normalize(im): 85 | mean = [0.485, 0.456, 0.406] 86 | std = [0.229, 0.224, 0.225] 87 | return (im.astype(np.float32) - np.array(mean)) / np.array(std) 88 | imgs = [] 89 | for im in im_crops: 90 | img = _normalize(_resize(im, self.size)) 91 | # img = img.cpu().numpy() 92 | imgs.append(img) 93 | return imgs 94 | 95 | def track_extractor(self, im_crops): 96 | # threading.Thread.__init__(self) 97 | # Make self the active context, pushing it on top of the context stack. 98 | self.cfx.push() 99 | # Restore 100 | stream = self.stream 101 | context = self.context 102 | engine = self.engine 103 | host_inputs = self.host_inputs 104 | cuda_inputs = self.cuda_inputs 105 | host_outputs = self.host_outputs 106 | cuda_outputs = self.cuda_outputs 107 | bindings = self.bindings 108 | # Do image preprocess 109 | im_batchs = self._preprocess(im_crops) 110 | features_trt = [] 111 | for im_batch in im_batchs: 112 | # Copy input image to host buffer 113 | np.copyto(host_inputs[0], im_batch.ravel()) 114 | # Transfer input data to the GPU. 115 | cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream) 116 | # Run inference. 117 | context.set_binding_shape(0, (1, 3, 128, 64)) 118 | context.execute_async(bindings=bindings, stream_handle=stream.handle) 119 | # Transfer predictions back from the GPU. 120 | cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream) 121 | # Synchronize the stream 122 | stream.synchronize() 123 | # Remove any context from the top of the context stack, deactivating it. 124 | # self.cfx.pop() 125 | # Here we use the first row of output in that batch_size = 1 126 | trt_outputs = host_outputs[0] 127 | # Do postprocess 128 | feature_trt = trt_outputs 129 | features_trt.append(feature_trt) 130 | return np.array(features_trt) 131 | 132 | def destroy(self): 133 | self.cfx.pop() 134 | 135 | 136 | if __name__ == '__main__': 137 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)] 138 | extr = TrackerExtractor("checkpoint/deepsort.engine") 139 | feature = extr.track_extractor(img) 140 | print(feature.shape) 141 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/deep_sort_trt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/python3 3 | """ 4 | Created on 2021/5/24 13:46 5 | @Author: Wang Cong 6 | @Email : iwangcong@outlook.com 7 | @Version : 0.1 8 | @File : deep_sort_trt.py 9 | """ 10 | import numpy as np 11 | import time 12 | from .deep.feature_extractor_trt import TrackerExtractor 13 | from .sort.nn_matching import NearestNeighborDistanceMetric 14 | from .sort.preprocessing import non_max_suppression 15 | from .sort.detection import Detection 16 | from .sort.tracker import Tracker 17 | 18 | __all__ = ['DeepSort'] 19 | 20 | 21 | class DeepSort(object): 22 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, 23 | max_age=70, n_init=3, nn_budget=100, use_cuda=True): 24 | self.min_confidence = min_confidence 25 | self.nms_max_overlap = nms_max_overlap 26 | model_path = "../models/engine/deepsort.engine" 27 | 28 | self.extractor = TrackerExtractor(model_path) 29 | 30 | max_cosine_distance = max_dist 31 | nn_budget = 100 32 | metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 33 | self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 34 | 35 | def clear(self): 36 | self.extractor.context.pop() 37 | 38 | def update(self, bbox_xywh, lbls, confidences, ori_img): 39 | self.height, self.width = ori_img.shape[:2] 40 | # generate detections 41 | features = self._get_features(bbox_xywh, ori_img) 42 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 43 | detections = [Detection(bbox_tlwh[i], lbls[i], conf, features[i]) for i, conf in enumerate(confidences) if 44 | conf > self.min_confidence] 45 | 46 | # run on non-maximum supression 47 | boxes = np.array([d.tlwh for d in detections]) 48 | labels = np.array([d.label for d in detections]) 49 | scores = np.array([d.confidence for d in detections]) 50 | indices = non_max_suppression(boxes, self.nms_max_overlap, scores) 51 | detections = [detections[i] for i in indices] 52 | 53 | # update tracker 54 | self.tracker.predict() 55 | self.tracker.update(detections) 56 | 57 | # output bbox identities 58 | outputs = [] 59 | for track in self.tracker.tracks: 60 | if not track.is_confirmed() or track.time_since_update > 1: 61 | continue 62 | box = track.to_tlwh() 63 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box) 64 | track_label = track.track_label 65 | track_id = track.track_id 66 | outputs.append(np.array([x1, y1, x2, y2, track_label, track_id])) 67 | if len(outputs) > 0: 68 | outputs = np.stack(outputs, axis=0) 69 | return outputs 70 | 71 | """ 72 | TODO: 73 | Convert bbox from xc_yc_w_h to xtl_ytl_w_h 74 | Thanks JieChen91@github.com for reporting this bug! 75 | """ 76 | 77 | @staticmethod 78 | def _xywh_to_tlwh(bbox_xywh): 79 | # if isinstance(bbox_xywh, np.ndarray): 80 | # bbox_tlwh = bbox_xywh.copy() 81 | bbox_tlwh = bbox_xywh.copy() 82 | bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. 83 | bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. 84 | return bbox_tlwh 85 | 86 | def _xywh_to_xyxy(self, bbox_xywh): 87 | x, y, w, h = bbox_xywh 88 | x1 = max(int(x - w / 2), 0) 89 | x2 = min(int(x + w / 2), self.width - 1) 90 | y1 = max(int(y - h / 2), 0) 91 | y2 = min(int(y + h / 2), self.height - 1) 92 | return x1, y1, x2, y2 93 | 94 | def _tlwh_to_xyxy(self, bbox_tlwh): 95 | """ 96 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h 97 | Thanks JieChen91@github.com for reporting this bug! 98 | """ 99 | x, y, w, h = bbox_tlwh 100 | x1 = max(int(x), 0) 101 | x2 = min(int(x + w), self.width - 1) 102 | y1 = max(int(y), 0) 103 | y2 = min(int(y + h), self.height - 1) 104 | return x1, y1, x2, y2 105 | 106 | def _xyxy_to_tlwh(self, bbox_xyxy): 107 | x1, y1, x2, y2 = bbox_xyxy 108 | 109 | t = x1 110 | l = y1 111 | w = int(x2 - x1) 112 | h = int(y2 - y1) 113 | return t, l, w, h 114 | 115 | def _get_features(self, bbox_xywh, ori_img): 116 | im_crops = [] 117 | for box in bbox_xywh: 118 | x1, y1, x2, y2 = self._xywh_to_xyxy(box) 119 | im = ori_img[y1:y2, x1:x2] 120 | im_crops.append(im) 121 | if im_crops: 122 | features = self.extractor.track_extractor(im_crops) 123 | else: 124 | features = np.array([]) 125 | return features 126 | 127 | 128 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/sort/__init__.py -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, label, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.label = label 32 | self.confidence = float(confidence) 33 | self.feature = np.asarray(feature, dtype=np.float32) 34 | 35 | def to_tlbr(self): 36 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 37 | `(top left, bottom right)`. 38 | """ 39 | ret = self.tlwh.copy() 40 | ret[2:] += ret[:2] 41 | return ret 42 | 43 | def to_xyah(self): 44 | """Convert bounding box to format `(center x, center y, aspect ratio, 45 | height)`, where the aspect ratio is `width / height`. 46 | """ 47 | ret = self.tlwh.copy() 48 | ret[:2] += ret[2:] / 2 49 | ret[2] /= ret[3] 50 | return ret 51 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | # from sklearn.utils.linear_assignment_ import linear_assignment 5 | from scipy.optimize import linear_sum_assignment as linear_assignment 6 | from . import kalman_filter 7 | 8 | 9 | INFTY_COST = 1e+5 10 | 11 | 12 | def min_cost_matching( 13 | distance_metric, max_distance, tracks, detections, track_indices=None, 14 | detection_indices=None): 15 | """Solve linear assignment problem. 16 | 17 | Parameters 18 | ---------- 19 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 20 | The distance metric is given a list of tracks and detections as well as 21 | a list of N track indices and M detection indices. The metric should 22 | return the NxM dimensional cost matrix, where element (i, j) is the 23 | association cost between the i-th track in the given track indices and 24 | the j-th detection in the given detection_indices. 25 | max_distance : float 26 | Gating threshold. Associations with cost larger than this value are 27 | disregarded. 28 | tracks : List[track.Track] 29 | A list of predicted tracks at the current time step. 30 | detections : List[detection.Detection] 31 | A list of detections at the current time step. 32 | track_indices : List[int] 33 | List of track indices that maps rows in `cost_matrix` to tracks in 34 | `tracks` (see description above). 35 | detection_indices : List[int] 36 | List of detection indices that maps columns in `cost_matrix` to 37 | detections in `detections` (see description above). 38 | 39 | Returns 40 | ------- 41 | (List[(int, int)], List[int], List[int]) 42 | Returns a tuple with the following three entries: 43 | * A list of matched track and detection indices. 44 | * A list of unmatched track indices. 45 | * A list of unmatched detection indices. 46 | 47 | """ 48 | if track_indices is None: 49 | track_indices = np.arange(len(tracks)) 50 | if detection_indices is None: 51 | detection_indices = np.arange(len(detections)) 52 | 53 | if len(detection_indices) == 0 or len(track_indices) == 0: 54 | return [], track_indices, detection_indices # Nothing to match. 55 | 56 | cost_matrix = distance_metric( 57 | tracks, detections, track_indices, detection_indices) 58 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 59 | 60 | row_indices, col_indices = linear_assignment(cost_matrix) 61 | 62 | matches, unmatched_tracks, unmatched_detections = [], [], [] 63 | for col, detection_idx in enumerate(detection_indices): 64 | if col not in col_indices: 65 | unmatched_detections.append(detection_idx) 66 | for row, track_idx in enumerate(track_indices): 67 | if row not in row_indices: 68 | unmatched_tracks.append(track_idx) 69 | for row, col in zip(row_indices, col_indices): 70 | track_idx = track_indices[row] 71 | detection_idx = detection_indices[col] 72 | if cost_matrix[row, col] > max_distance: 73 | unmatched_tracks.append(track_idx) 74 | unmatched_detections.append(detection_idx) 75 | else: 76 | matches.append((track_idx, detection_idx)) 77 | return matches, unmatched_tracks, unmatched_detections 78 | 79 | 80 | def matching_cascade( 81 | distance_metric, max_distance, cascade_depth, tracks, detections, 82 | track_indices=None, detection_indices=None): 83 | """Run matching cascade. 84 | 85 | Parameters 86 | ---------- 87 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 88 | The distance metric is given a list of tracks and detections as well as 89 | a list of N track indices and M detection indices. The metric should 90 | return the NxM dimensional cost matrix, where element (i, j) is the 91 | association cost between the i-th track in the given track indices and 92 | the j-th detection in the given detection indices. 93 | max_distance : float 94 | Gating threshold. Associations with cost larger than this value are 95 | disregarded. 96 | cascade_depth: int 97 | The cascade depth, should be se to the maximum track age. 98 | tracks : List[track.Track] 99 | A list of predicted tracks at the current time step. 100 | detections : List[detection.Detection] 101 | A list of detections at the current time step. 102 | track_indices : Optional[List[int]] 103 | List of track indices that maps rows in `cost_matrix` to tracks in 104 | `tracks` (see description above). Defaults to all tracks. 105 | detection_indices : Optional[List[int]] 106 | List of detection indices that maps columns in `cost_matrix` to 107 | detections in `detections` (see description above). Defaults to all 108 | detections. 109 | 110 | Returns 111 | ------- 112 | (List[(int, int)], List[int], List[int]) 113 | Returns a tuple with the following three entries: 114 | * A list of matched track and detection indices. 115 | * A list of unmatched track indices. 116 | * A list of unmatched detection indices. 117 | 118 | """ 119 | if track_indices is None: 120 | track_indices = list(range(len(tracks))) 121 | if detection_indices is None: 122 | detection_indices = list(range(len(detections))) 123 | 124 | unmatched_detections = detection_indices 125 | matches = [] 126 | for level in range(cascade_depth): 127 | if len(unmatched_detections) == 0: # No detections left 128 | break 129 | 130 | track_indices_l = [ 131 | k for k in track_indices 132 | if tracks[k].time_since_update == 1 + level 133 | ] 134 | if len(track_indices_l) == 0: # Nothing to match at this level 135 | continue 136 | 137 | matches_l, _, unmatched_detections = \ 138 | min_cost_matching( 139 | distance_metric, max_distance, tracks, detections, 140 | track_indices_l, unmatched_detections) 141 | matches += matches_l 142 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 143 | return matches, unmatched_tracks, unmatched_detections 144 | 145 | 146 | def gate_cost_matrix( 147 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 148 | gated_cost=INFTY_COST, only_position=False): 149 | """Invalidate infeasible entries in cost matrix based on the state 150 | distributions obtained by Kalman filtering. 151 | 152 | Parameters 153 | ---------- 154 | kf : The Kalman filter. 155 | cost_matrix : ndarray 156 | The NxM dimensional cost matrix, where N is the number of track indices 157 | and M is the number of detection indices, such that entry (i, j) is the 158 | association cost between `tracks[track_indices[i]]` and 159 | `detections[detection_indices[j]]`. 160 | tracks : List[track.Track] 161 | A list of predicted tracks at the current time step. 162 | detections : List[detection.Detection] 163 | A list of detections at the current time step. 164 | track_indices : List[int] 165 | List of track indices that maps rows in `cost_matrix` to tracks in 166 | `tracks` (see description above). 167 | detection_indices : List[int] 168 | List of detection indices that maps columns in `cost_matrix` to 169 | detections in `detections` (see description above). 170 | gated_cost : Optional[float] 171 | Entries in the cost matrix corresponding to infeasible associations are 172 | set this value. Defaults to a very large value. 173 | only_position : Optional[bool] 174 | If True, only the x, y position of the state distribution is considered 175 | during gating. Defaults to False. 176 | 177 | Returns 178 | ------- 179 | ndarray 180 | Returns the modified cost matrix. 181 | 182 | """ 183 | gating_dim = 2 if only_position else 4 184 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 185 | measurements = np.asarray( 186 | [detections[i].to_xyah() for i in detection_indices]) 187 | for row, track_idx in enumerate(track_indices): 188 | track = tracks[track_idx] 189 | gating_distance = kf.gating_distance( 190 | track.mean, track.covariance, measurements, only_position) 191 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 192 | return cost_matrix 193 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | 126 | if metric == "euclidean": 127 | self._metric = _nn_euclidean_distance 128 | elif metric == "cosine": 129 | self._metric = _nn_cosine_distance 130 | else: 131 | raise ValueError( 132 | "Invalid metric; must be either 'euclidean' or 'cosine'") 133 | self.matching_threshold = matching_threshold 134 | self.budget = budget 135 | self.samples = {} 136 | 137 | def partial_fit(self, features, targets, active_targets): 138 | """Update the distance metric with new data. 139 | 140 | Parameters 141 | ---------- 142 | features : ndarray 143 | An NxM matrix of N features of dimensionality M. 144 | targets : ndarray 145 | An integer array of associated target identities. 146 | active_targets : List[int] 147 | A list of targets that are currently present in the scene. 148 | 149 | """ 150 | for feature, target in zip(features, targets): 151 | self.samples.setdefault(target, []).append(feature) 152 | if self.budget is not None: 153 | self.samples[target] = self.samples[target][-self.budget:] 154 | self.samples = {k: self.samples[k] for k in active_targets} 155 | 156 | def distance(self, features, targets): 157 | """Compute distance between features and targets. 158 | 159 | Parameters 160 | ---------- 161 | features : ndarray 162 | An NxM matrix of N features of dimensionality M. 163 | targets : List[int] 164 | A list of targets to match the given `features` against. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Returns a cost matrix of shape len(targets), len(features), where 170 | element (i, j) contains the closest squared distance between 171 | `targets[i]` and `features[j]`. 172 | 173 | """ 174 | cost_matrix = np.zeros((len(targets), len(features))) 175 | for i, target in enumerate(targets): 176 | cost_matrix[i, :] = self._metric(self.samples[target], features) 177 | return cost_matrix 178 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_label, track_id, n_init, max_age, 67 | feature=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_label = track_label 71 | self.track_id = track_id 72 | self.hits = 1 73 | self.age = 1 74 | self.time_since_update = 0 75 | 76 | self.state = TrackState.Tentative 77 | self.features = [] 78 | if feature is not None: 79 | self.features.append(feature) 80 | 81 | self._n_init = n_init 82 | self._max_age = max_age 83 | 84 | def to_tlwh(self): 85 | """Get current position in bounding box format `(top left x, top left y, 86 | width, height)`. 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | The bounding box. 92 | 93 | """ 94 | ret = self.mean[:4].copy() 95 | ret[2] *= ret[3] 96 | ret[:2] -= ret[2:] / 2 97 | return ret 98 | 99 | def to_tlbr(self): 100 | """Get current position in bounding box format `(min x, miny, max x, 101 | max y)`. 102 | 103 | Returns 104 | ------- 105 | ndarray 106 | The bounding box. 107 | 108 | """ 109 | ret = self.to_tlwh() 110 | ret[2:] = ret[:2] + ret[2:] 111 | return ret 112 | 113 | def predict(self, kf): 114 | """Propagate the state distribution to the current time step using a 115 | Kalman filter prediction step. 116 | 117 | Parameters 118 | ---------- 119 | kf : kalman_filter.KalmanFilter 120 | The Kalman filter. 121 | 122 | """ 123 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 124 | self.age += 1 125 | self.time_since_update += 1 126 | 127 | def update(self, kf, detection): 128 | """Perform Kalman filter measurement update step and update the feature 129 | cache. 130 | 131 | Parameters 132 | ---------- 133 | kf : kalman_filter.KalmanFilter 134 | The Kalman filter. 135 | detection : Detection 136 | The associated detection. 137 | 138 | """ 139 | self.mean, self.covariance = kf.update( 140 | self.mean, self.covariance, detection.to_xyah()) 141 | self.features.append(detection.feature) 142 | 143 | self.hits += 1 144 | self.time_since_update = 0 145 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 146 | self.state = TrackState.Confirmed 147 | 148 | def mark_missed(self): 149 | """Mark this track as missed (no association at the current time step). 150 | """ 151 | if self.state == TrackState.Tentative: 152 | self.state = TrackState.Deleted 153 | elif self.time_since_update > self._max_age: 154 | self.state = TrackState.Deleted 155 | 156 | def is_tentative(self): 157 | """Returns True if this track is tentative (unconfirmed). 158 | """ 159 | return self.state == TrackState.Tentative 160 | 161 | def is_confirmed(self): 162 | """Returns True if this track is confirmed.""" 163 | return self.state == TrackState.Confirmed 164 | 165 | def is_deleted(self): 166 | """Returns True if this track is dead and should be deleted.""" 167 | return self.state == TrackState.Deleted 168 | -------------------------------------------------------------------------------- /srcs/deep_sort/deep_sort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def update(self, detections): 59 | """Perform measurement update and track management. 60 | 61 | Parameters 62 | ---------- 63 | detections : List[deep_sort.detection.Detection] 64 | A list of detections at the current time step. 65 | 66 | """ 67 | # Run matching cascade. 68 | matches, unmatched_tracks, unmatched_detections = \ 69 | self._match(detections) 70 | 71 | # Update track set. 72 | for track_idx, detection_idx in matches: 73 | self.tracks[track_idx].update( 74 | self.kf, detections[detection_idx]) 75 | for track_idx in unmatched_tracks: 76 | self.tracks[track_idx].mark_missed() 77 | for detection_idx in unmatched_detections: 78 | self._initiate_track(detections[detection_idx]) 79 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 80 | 81 | # Update distance metric. 82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 83 | features, targets = [], [] 84 | for track in self.tracks: 85 | if not track.is_confirmed(): 86 | continue 87 | features += track.features 88 | targets += [track.track_id for _ in track.features] 89 | track.features = [] 90 | self.metric.partial_fit( 91 | np.asarray(features), np.asarray(targets), active_targets) 92 | 93 | def _match(self, detections): 94 | 95 | def gated_metric(tracks, dets, track_indices, detection_indices): 96 | features = np.array([dets[i].feature for i in detection_indices]) 97 | targets = np.array([tracks[i].track_id for i in track_indices]) 98 | cost_matrix = self.metric.distance(features, targets) 99 | cost_matrix = linear_assignment.gate_cost_matrix( 100 | self.kf, cost_matrix, tracks, dets, track_indices, 101 | detection_indices) 102 | 103 | return cost_matrix 104 | 105 | # Split track set into confirmed and unconfirmed tracks. 106 | confirmed_tracks = [ 107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 108 | unconfirmed_tracks = [ 109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 110 | 111 | # Associate confirmed tracks using appearance features. 112 | matches_a, unmatched_tracks_a, unmatched_detections = \ 113 | linear_assignment.matching_cascade( 114 | gated_metric, self.metric.matching_threshold, self.max_age, 115 | self.tracks, detections, confirmed_tracks) 116 | 117 | # Associate remaining tracks together with unconfirmed tracks using IOU. 118 | iou_track_candidates = unconfirmed_tracks + [ 119 | k for k in unmatched_tracks_a if 120 | self.tracks[k].time_since_update == 1] 121 | unmatched_tracks_a = [ 122 | k for k in unmatched_tracks_a if 123 | self.tracks[k].time_since_update != 1] 124 | matches_b, unmatched_tracks_b, unmatched_detections = \ 125 | linear_assignment.min_cost_matching( 126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 127 | detections, iou_track_candidates, unmatched_detections) 128 | 129 | matches = matches_a + matches_b 130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 131 | return matches, unmatched_tracks, unmatched_detections 132 | 133 | def _initiate_track(self, detection): 134 | mean, covariance = self.kf.initiate(detection.to_xyah()) 135 | self.tracks.append(Track( 136 | mean, covariance, detection.label, self._next_id, self.n_init, self.max_age, 137 | detection.feature)) 138 | self._next_id += 1 139 | -------------------------------------------------------------------------------- /srcs/deep_sort/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/utils/__init__.py -------------------------------------------------------------------------------- /srcs/deep_sort/utils/asserts.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | 4 | def assert_in(file, files_to_check): 5 | if file not in files_to_check: 6 | raise AssertionError("{} does not exist in the list".format(str(file))) 7 | return True 8 | 9 | 10 | def assert_in_env(check_list: list): 11 | for item in check_list: 12 | assert_in(item, environ.keys()) 13 | return True 14 | -------------------------------------------------------------------------------- /srcs/deep_sort/utils/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 5 | 6 | 7 | def compute_color_for_labels(label): 8 | """ 9 | Simple function that adds fixed color depending on the class 10 | """ 11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 12 | return tuple(color) 13 | 14 | 15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)): 16 | for i,box in enumerate(bbox): 17 | x1,y1,x2,y2 = [int(i) for i in box] 18 | x1 += offset[0] 19 | x2 += offset[0] 20 | y1 += offset[1] 21 | y2 += offset[1] 22 | # box text and bar 23 | id = int(identities[i]) if identities is not None else 0 24 | color = compute_color_for_labels(id) 25 | label = '{}{:d}'.format("", id) 26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 30 | return img 31 | 32 | 33 | 34 | if __name__ == '__main__': 35 | for i in range(82): 36 | print(compute_color_for_labels(i)) 37 | -------------------------------------------------------------------------------- /srcs/deep_sort/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /srcs/deep_sort/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | # from utils.log import get_logger 6 | 7 | 8 | def write_results(filename, results, data_type): 9 | if data_type == 'mot': 10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' 11 | elif data_type == 'kitti': 12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 13 | else: 14 | raise ValueError(data_type) 15 | 16 | with open(filename, 'w') as f: 17 | for frame_id, tlwhs, track_ids in results: 18 | if data_type == 'kitti': 19 | frame_id -= 1 20 | for tlwh, track_id in zip(tlwhs, track_ids): 21 | if track_id < 0: 22 | continue 23 | x1, y1, w, h = tlwh 24 | x2, y2 = x1 + w, y1 + h 25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 26 | f.write(line) 27 | 28 | 29 | # def write_results(filename, results_dict: Dict, data_type: str): 30 | # if not filename: 31 | # return 32 | # path = os.path.dirname(filename) 33 | # if not os.path.exists(path): 34 | # os.makedirs(path) 35 | 36 | # if data_type in ('mot', 'mcmot', 'lab'): 37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 38 | # elif data_type == 'kitti': 39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 40 | # else: 41 | # raise ValueError(data_type) 42 | 43 | # with open(filename, 'w') as f: 44 | # for frame_id, frame_data in results_dict.items(): 45 | # if data_type == 'kitti': 46 | # frame_id -= 1 47 | # for tlwh, track_id in frame_data: 48 | # if track_id < 0: 49 | # continue 50 | # x1, y1, w, h = tlwh 51 | # x2, y2 = x1 + w, y1 + h 52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 53 | # f.write(line) 54 | # logger.info('Save results to {}'.format(filename)) 55 | 56 | 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 58 | if data_type in ('mot', 'lab'): 59 | read_fun = read_mot_results 60 | else: 61 | raise ValueError('Unknown data type: {}'.format(data_type)) 62 | 63 | return read_fun(filename, is_gt, is_ignore) 64 | 65 | 66 | """ 67 | labels={'ped', ... % 1 68 | 'person_on_vhcl', ... % 2 69 | 'car', ... % 3 70 | 'bicycle', ... % 4 71 | 'mbike', ... % 5 72 | 'non_mot_vhcl', ... % 6 73 | 'static_person', ... % 7 74 | 'distractor', ... % 8 75 | 'occluder', ... % 9 76 | 'occluder_on_grnd', ... %10 77 | 'occluder_full', ... % 11 78 | 'reflection', ... % 12 79 | 'crowd' ... % 13 80 | }; 81 | """ 82 | 83 | 84 | def read_mot_results(filename, is_gt, is_ignore): 85 | valid_labels = {1} 86 | ignore_labels = {2, 7, 8, 12} 87 | results_dict = dict() 88 | if os.path.isfile(filename): 89 | with open(filename, 'r') as f: 90 | for line in f.readlines(): 91 | linelist = line.split(',') 92 | if len(linelist) < 7: 93 | continue 94 | fid = int(linelist[0]) 95 | if fid < 1: 96 | continue 97 | results_dict.setdefault(fid, list()) 98 | 99 | if is_gt: 100 | if 'MOT16-' in filename or 'MOT17-' in filename: 101 | label = int(float(linelist[7])) 102 | mark = int(float(linelist[6])) 103 | if mark == 0 or label not in valid_labels: 104 | continue 105 | score = 1 106 | elif is_ignore: 107 | if 'MOT16-' in filename or 'MOT17-' in filename: 108 | label = int(float(linelist[7])) 109 | vis_ratio = float(linelist[8]) 110 | if label not in ignore_labels and vis_ratio >= 0: 111 | continue 112 | else: 113 | continue 114 | score = 1 115 | else: 116 | score = float(linelist[6]) 117 | 118 | tlwh = tuple(map(float, linelist[2:6])) 119 | target_id = int(linelist[1]) 120 | 121 | results_dict[fid].append((tlwh, target_id, score)) 122 | 123 | return results_dict 124 | 125 | 126 | def unzip_objs(objs): 127 | if len(objs) > 0: 128 | tlwhs, ids, scores = zip(*objs) 129 | else: 130 | tlwhs, ids, scores = [], [], [] 131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 132 | 133 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /srcs/deep_sort/utils/json_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | References: 3 | https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f 4 | """ 5 | import json 6 | from os import makedirs 7 | from os.path import exists, join 8 | from datetime import datetime 9 | 10 | 11 | class JsonMeta(object): 12 | HOURS = 3 13 | MINUTES = 59 14 | SECONDS = 59 15 | PATH_TO_SAVE = 'LOGS' 16 | DEFAULT_FILE_NAME = 'remaining' 17 | 18 | 19 | class BaseJsonLogger(object): 20 | """ 21 | This is the base class that returns __dict__ of its own 22 | it also returns the dicts of objects in the attributes that are list instances 23 | 24 | """ 25 | 26 | def dic(self): 27 | # returns dicts of objects 28 | out = {} 29 | for k, v in self.__dict__.items(): 30 | if hasattr(v, 'dic'): 31 | out[k] = v.dic() 32 | elif isinstance(v, list): 33 | out[k] = self.list(v) 34 | else: 35 | out[k] = v 36 | return out 37 | 38 | @staticmethod 39 | def list(values): 40 | # applies the dic method on items in the list 41 | return [v.dic() if hasattr(v, 'dic') else v for v in values] 42 | 43 | 44 | class Label(BaseJsonLogger): 45 | """ 46 | For each bounding box there are various categories with confidences. Label class keeps track of that information. 47 | """ 48 | 49 | def __init__(self, category: str, confidence: float): 50 | self.category = category 51 | self.confidence = confidence 52 | 53 | 54 | class Bbox(BaseJsonLogger): 55 | """ 56 | This module stores the information for each frame and use them in JsonParser 57 | Attributes: 58 | labels (list): List of label module. 59 | top (int): 60 | left (int): 61 | width (int): 62 | height (int): 63 | 64 | Args: 65 | bbox_id (float): 66 | top (int): 67 | left (int): 68 | width (int): 69 | height (int): 70 | 71 | References: 72 | Check Label module for better understanding. 73 | 74 | 75 | """ 76 | 77 | def __init__(self, bbox_id, top, left, width, height): 78 | self.labels = [] 79 | self.bbox_id = bbox_id 80 | self.top = top 81 | self.left = left 82 | self.width = width 83 | self.height = height 84 | 85 | def add_label(self, category, confidence): 86 | # adds category and confidence only if top_k is not exceeded. 87 | self.labels.append(Label(category, confidence)) 88 | 89 | def labels_full(self, value): 90 | return len(self.labels) == value 91 | 92 | 93 | class Frame(BaseJsonLogger): 94 | """ 95 | This module stores the information for each frame and use them in JsonParser 96 | Attributes: 97 | timestamp (float): The elapsed time of captured frame 98 | frame_id (int): The frame number of the captured video 99 | bboxes (list of Bbox objects): Stores the list of bbox objects. 100 | 101 | References: 102 | Check Bbox class for better information 103 | 104 | Args: 105 | timestamp (float): 106 | frame_id (int): 107 | 108 | """ 109 | 110 | def __init__(self, frame_id: int, timestamp: float = None): 111 | self.frame_id = frame_id 112 | self.timestamp = timestamp 113 | self.bboxes = [] 114 | 115 | def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int): 116 | bboxes_ids = [bbox.bbox_id for bbox in self.bboxes] 117 | if bbox_id not in bboxes_ids: 118 | self.bboxes.append(Bbox(bbox_id, top, left, width, height)) 119 | else: 120 | raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id)) 121 | 122 | def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float): 123 | bboxes = {bbox.id: bbox for bbox in self.bboxes} 124 | if bbox_id in bboxes.keys(): 125 | res = bboxes.get(bbox_id) 126 | res.add_label(category, confidence) 127 | else: 128 | raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id)) 129 | 130 | 131 | class BboxToJsonLogger(BaseJsonLogger): 132 | """ 133 | ُ This module is designed to automate the task of logging jsons. An example json is used 134 | to show the contents of json file shortly 135 | Example: 136 | { 137 | "video_details": { 138 | "frame_width": 1920, 139 | "frame_height": 1080, 140 | "frame_rate": 20, 141 | "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi" 142 | }, 143 | "frames": [ 144 | { 145 | "frame_id": 329, 146 | "timestamp": 3365.1254 147 | "bboxes": [ 148 | { 149 | "labels": [ 150 | { 151 | "category": "pedestrian", 152 | "confidence": 0.9 153 | } 154 | ], 155 | "bbox_id": 0, 156 | "top": 1257, 157 | "left": 138, 158 | "width": 68, 159 | "height": 109 160 | } 161 | ] 162 | }], 163 | 164 | Attributes: 165 | frames (dict): It's a dictionary that maps each frame_id to json attributes. 166 | video_details (dict): information about video file. 167 | top_k_labels (int): shows the allowed number of labels 168 | start_time (datetime object): we use it to automate the json output by time. 169 | 170 | Args: 171 | top_k_labels (int): shows the allowed number of labels 172 | 173 | """ 174 | 175 | def __init__(self, top_k_labels: int = 1): 176 | self.frames = {} 177 | self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None, 178 | video_name=None) 179 | self.top_k_labels = top_k_labels 180 | self.start_time = datetime.now() 181 | 182 | def set_top_k(self, value): 183 | self.top_k_labels = value 184 | 185 | def frame_exists(self, frame_id: int) -> bool: 186 | """ 187 | Args: 188 | frame_id (int): 189 | 190 | Returns: 191 | bool: true if frame_id is recognized 192 | """ 193 | return frame_id in self.frames.keys() 194 | 195 | def add_frame(self, frame_id: int, timestamp: float = None) -> None: 196 | """ 197 | Args: 198 | frame_id (int): 199 | timestamp (float): opencv captured frame time property 200 | 201 | Raises: 202 | ValueError: if frame_id would not exist in class frames attribute 203 | 204 | Returns: 205 | None 206 | 207 | """ 208 | if not self.frame_exists(frame_id): 209 | self.frames[frame_id] = Frame(frame_id, timestamp) 210 | else: 211 | raise ValueError("Frame id: {} already exists".format(frame_id)) 212 | 213 | def bbox_exists(self, frame_id: int, bbox_id: int) -> bool: 214 | """ 215 | Args: 216 | frame_id: 217 | bbox_id: 218 | 219 | Returns: 220 | bool: if bbox exists in frame bboxes list 221 | """ 222 | bboxes = [] 223 | if self.frame_exists(frame_id=frame_id): 224 | bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes] 225 | return bbox_id in bboxes 226 | 227 | def find_bbox(self, frame_id: int, bbox_id: int): 228 | """ 229 | 230 | Args: 231 | frame_id: 232 | bbox_id: 233 | 234 | Returns: 235 | bbox_id (int): 236 | 237 | Raises: 238 | ValueError: if bbox_id does not exist in the bbox list of specific frame. 239 | """ 240 | if not self.bbox_exists(frame_id, bbox_id): 241 | raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id)) 242 | bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes} 243 | return bboxes.get(bbox_id) 244 | 245 | def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None: 246 | """ 247 | 248 | Args: 249 | frame_id (int): 250 | bbox_id (int): 251 | top (int): 252 | left (int): 253 | width (int): 254 | height (int): 255 | 256 | Returns: 257 | None 258 | 259 | Raises: 260 | ValueError: if bbox_id already exist in frame information with frame_id 261 | ValueError: if frame_id does not exist in frames attribute 262 | """ 263 | if self.frame_exists(frame_id): 264 | frame = self.frames[frame_id] 265 | if not self.bbox_exists(frame_id, bbox_id): 266 | frame.add_bbox(bbox_id, top, left, width, height) 267 | else: 268 | raise ValueError( 269 | "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id)) 270 | else: 271 | raise ValueError("frame with frame_id: {} does not exist".format(frame_id)) 272 | 273 | def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float): 274 | """ 275 | Args: 276 | frame_id: 277 | bbox_id: 278 | category: 279 | confidence: the confidence value returned from yolo detection 280 | 281 | Returns: 282 | None 283 | 284 | Raises: 285 | ValueError: if labels quota (top_k_labels) exceeds. 286 | """ 287 | bbox = self.find_bbox(frame_id, bbox_id) 288 | if not bbox.labels_full(self.top_k_labels): 289 | bbox.add_label(category, confidence) 290 | else: 291 | raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id)) 292 | 293 | def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None, 294 | video_name: str = None): 295 | self.video_details['frame_width'] = frame_width 296 | self.video_details['frame_height'] = frame_height 297 | self.video_details['frame_rate'] = frame_rate 298 | self.video_details['video_name'] = video_name 299 | 300 | def output(self): 301 | output = {'video_details': self.video_details} 302 | result = list(self.frames.values()) 303 | output['frames'] = [item.dic() for item in result] 304 | return output 305 | 306 | def json_output(self, output_name): 307 | """ 308 | Args: 309 | output_name: 310 | 311 | Returns: 312 | None 313 | 314 | Notes: 315 | It creates the json output with `output_name` name. 316 | """ 317 | if not output_name.endswith('.json'): 318 | output_name += '.json' 319 | with open(output_name, 'w') as file: 320 | json.dump(self.output(), file) 321 | file.close() 322 | 323 | def set_start(self): 324 | self.start_time = datetime.now() 325 | 326 | def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0, 327 | seconds: int = 60) -> None: 328 | """ 329 | Notes: 330 | Creates folder and then periodically stores the jsons on that address. 331 | 332 | Args: 333 | output_dir (str): the directory where output files will be stored 334 | hours (int): 335 | minutes (int): 336 | seconds (int): 337 | 338 | Returns: 339 | None 340 | 341 | """ 342 | end = datetime.now() 343 | interval = 0 344 | interval += abs(min([hours, JsonMeta.HOURS]) * 3600) 345 | interval += abs(min([minutes, JsonMeta.MINUTES]) * 60) 346 | interval += abs(min([seconds, JsonMeta.SECONDS])) 347 | diff = (end - self.start_time).seconds 348 | 349 | if diff > interval: 350 | output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json' 351 | if not exists(output_dir): 352 | makedirs(output_dir) 353 | output = join(output_dir, output_name) 354 | self.json_output(output_name=output) 355 | self.frames = {} 356 | self.start_time = datetime.now() 357 | 358 | def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE): 359 | """ 360 | saves as the number of frames quota increases higher. 361 | :param frames_quota: 362 | :param frame_counter: 363 | :param output_dir: 364 | :return: 365 | """ 366 | pass 367 | 368 | def flush(self, output_dir): 369 | """ 370 | Notes: 371 | We use this function to output jsons whenever possible. 372 | like the time that we exit the while loop of opencv. 373 | 374 | Args: 375 | output_dir: 376 | 377 | Returns: 378 | None 379 | 380 | """ 381 | filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json' 382 | output = join(output_dir, filename) 383 | self.json_output(output_name=output) 384 | -------------------------------------------------------------------------------- /srcs/deep_sort/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.INFO) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | -------------------------------------------------------------------------------- /srcs/deep_sort/utils/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | class YamlParser(edict): 6 | """ 7 | This is yaml parser based on EasyDict. 8 | """ 9 | def __init__(self, cfg_dict=None, config_file=None): 10 | if cfg_dict is None: 11 | cfg_dict = {} 12 | 13 | if config_file is not None: 14 | assert(os.path.isfile(config_file)) 15 | with open(config_file, 'r') as fo: 16 | cfg_dict.update(yaml.load(fo.read())) 17 | 18 | super(YamlParser, self).__init__(cfg_dict) 19 | 20 | 21 | def merge_from_file(self, config_file): 22 | with open(config_file, 'r') as fo: 23 | self.update(yaml.load(fo.read(), Loader=yaml.FullLoader)) 24 | 25 | 26 | def merge_from_dict(self, config_dict): 27 | self.update(config_dict) 28 | 29 | 30 | def get_config(config_file=None): 31 | return YamlParser(config_file=config_file) 32 | 33 | 34 | if __name__ == "__main__": 35 | cfg = YamlParser(config_file="../configs/yolov3.yaml") 36 | cfg.merge_from_file("../configs/deep_sort.yaml") 37 | 38 | import ipdb; ipdb.set_trace() -------------------------------------------------------------------------------- /srcs/deep_sort/utils/tools.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from time import time 3 | 4 | 5 | def is_video(ext: str): 6 | """ 7 | Returns true if ext exists in 8 | allowed_exts for video files. 9 | 10 | Args: 11 | ext: 12 | 13 | Returns: 14 | 15 | """ 16 | 17 | allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') 18 | return any((ext.endswith(x) for x in allowed_exts)) 19 | 20 | 21 | def tik_tok(func): 22 | """ 23 | keep track of time for each process. 24 | Args: 25 | func: 26 | 27 | Returns: 28 | 29 | """ 30 | @wraps(func) 31 | def _time_it(*args, **kwargs): 32 | start = time() 33 | try: 34 | return func(*args, **kwargs) 35 | finally: 36 | end_ = time() 37 | print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) 38 | 39 | return _time_it 40 | -------------------------------------------------------------------------------- /srcs/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .engine import EngineBuilder, TRTModule, TRTProfilerV0, TRTProfilerV1 # isort:skip # noqa: E501 2 | import warnings 3 | 4 | import torch 5 | 6 | warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) 7 | warnings.filterwarnings(action='ignore', category=torch.jit.ScriptWarning) 8 | warnings.filterwarnings(action='ignore', category=UserWarning) 9 | warnings.filterwarnings(action='ignore', category=FutureWarning) 10 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 11 | __all__ = ['EngineBuilder', 'TRTModule', 'TRTProfilerV0', 'TRTProfilerV1'] 12 | -------------------------------------------------------------------------------- /srcs/models/api.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import List, OrderedDict, Tuple, Union 3 | 4 | import numpy as np 5 | import tensorrt as trt 6 | 7 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 8 | 9 | 10 | def trtweight(weights: np.ndarray) -> trt.Weights: 11 | weights = weights.astype(weights.dtype.name) 12 | return trt.Weights(weights) 13 | 14 | 15 | def get_width(x: int, gw: float, divisor: int = 8) -> int: 16 | return int(np.ceil(x * gw / divisor) * divisor) 17 | 18 | 19 | def get_depth(x: int, gd: float) -> int: 20 | return max(int(round(x * gd)), 1) 21 | 22 | 23 | def Conv2d(network: trt.INetworkDefinition, weights: OrderedDict, 24 | input: trt.ITensor, out_channel: int, ksize: int, stride: int, 25 | group: int, layer_name: str) -> trt.ILayer: 26 | padding = ksize // 2 27 | conv_w = trtweight(weights[layer_name + '.weight']) 28 | conv_b = trtweight(weights[layer_name + '.bias']) 29 | conv = network.add_convolution_nd(input, 30 | num_output_maps=out_channel, 31 | kernel_shape=trt.DimsHW(ksize, ksize), 32 | kernel=conv_w, 33 | bias=conv_b) 34 | assert conv, 'Add convolution_nd layer failed' 35 | conv.stride_nd = trt.DimsHW(stride, stride) 36 | conv.padding_nd = trt.DimsHW(padding, padding) 37 | conv.num_groups = group 38 | return conv 39 | 40 | 41 | def Conv(network: trt.INetworkDefinition, weights: OrderedDict, 42 | input: trt.ITensor, out_channel: int, ksize: int, stride: int, 43 | group: int, layer_name: str) -> trt.ILayer: 44 | padding = ksize // 2 45 | if ksize > 3: 46 | padding -= 1 47 | conv_w = trtweight(weights[layer_name + '.conv.weight']) 48 | conv_b = trtweight(weights[layer_name + '.conv.bias']) 49 | 50 | conv = network.add_convolution_nd(input, 51 | num_output_maps=out_channel, 52 | kernel_shape=trt.DimsHW(ksize, ksize), 53 | kernel=conv_w, 54 | bias=conv_b) 55 | assert conv, 'Add convolution_nd layer failed' 56 | conv.stride_nd = trt.DimsHW(stride, stride) 57 | conv.padding_nd = trt.DimsHW(padding, padding) 58 | conv.num_groups = group 59 | 60 | sigmoid = network.add_activation(conv.get_output(0), 61 | trt.ActivationType.SIGMOID) 62 | assert sigmoid, 'Add activation layer failed' 63 | dot_product = network.add_elementwise(conv.get_output(0), 64 | sigmoid.get_output(0), 65 | trt.ElementWiseOperation.PROD) 66 | assert dot_product, 'Add elementwise layer failed' 67 | return dot_product 68 | 69 | 70 | def Bottleneck(network: trt.INetworkDefinition, weights: OrderedDict, 71 | input: trt.ITensor, c1: int, c2: int, shortcut: bool, 72 | group: int, scale: float, layer_name: str) -> trt.ILayer: 73 | c_ = int(c2 * scale) 74 | conv1 = Conv(network, weights, input, c_, 3, 1, 1, layer_name + '.cv1') 75 | conv2 = Conv(network, weights, conv1.get_output(0), c2, 3, 1, group, 76 | layer_name + '.cv2') 77 | if shortcut and c1 == c2: 78 | ew = network.add_elementwise(input, 79 | conv2.get_output(0), 80 | op=trt.ElementWiseOperation.SUM) 81 | assert ew, 'Add elementwise layer failed' 82 | return ew 83 | return conv2 84 | 85 | 86 | def C2f(network: trt.INetworkDefinition, weights: OrderedDict, 87 | input: trt.ITensor, cout: int, n: int, shortcut: bool, group: int, 88 | scale: float, layer_name: str) -> trt.ILayer: 89 | c_ = int(cout * scale) # e:expand param 90 | conv1 = Conv(network, weights, input, 2 * c_, 1, 1, 1, layer_name + '.cv1') 91 | y1 = conv1.get_output(0) 92 | 93 | b, _, h, w = y1.shape 94 | slice = network.add_slice(y1, (0, c_, 0, 0), (b, c_, h, w), (1, 1, 1, 1)) 95 | assert slice, 'Add slice layer failed' 96 | y2 = slice.get_output(0) 97 | 98 | input_tensors = [y1] 99 | for i in range(n): 100 | b = Bottleneck(network, weights, y2, c_, c_, shortcut, group, 1.0, 101 | layer_name + '.m.' + str(i)) 102 | y2 = b.get_output(0) 103 | input_tensors.append(y2) 104 | 105 | cat = network.add_concatenation(input_tensors) 106 | assert cat, 'Add concatenation layer failed' 107 | 108 | conv2 = Conv(network, weights, cat.get_output(0), cout, 1, 1, 1, 109 | layer_name + '.cv2') 110 | return conv2 111 | 112 | 113 | def SPPF(network: trt.INetworkDefinition, weights: OrderedDict, 114 | input: trt.ITensor, c1: int, c2: int, ksize: int, 115 | layer_name: str) -> trt.ILayer: 116 | c_ = c1 // 2 117 | conv1 = Conv(network, weights, input, c_, 1, 1, 1, layer_name + '.cv1') 118 | 119 | pool1 = network.add_pooling_nd(conv1.get_output(0), trt.PoolingType.MAX, 120 | trt.DimsHW(ksize, ksize)) 121 | assert pool1, 'Add pooling_nd layer failed' 122 | pool1.padding_nd = trt.DimsHW(ksize // 2, ksize // 2) 123 | pool1.stride_nd = trt.DimsHW(1, 1) 124 | 125 | pool2 = network.add_pooling_nd(pool1.get_output(0), trt.PoolingType.MAX, 126 | trt.DimsHW(ksize, ksize)) 127 | assert pool2, 'Add pooling_nd layer failed' 128 | pool2.padding_nd = trt.DimsHW(ksize // 2, ksize // 2) 129 | pool2.stride_nd = trt.DimsHW(1, 1) 130 | 131 | pool3 = network.add_pooling_nd(pool2.get_output(0), trt.PoolingType.MAX, 132 | trt.DimsHW(ksize, ksize)) 133 | assert pool3, 'Add pooling_nd layer failed' 134 | pool3.padding_nd = trt.DimsHW(ksize // 2, ksize // 2) 135 | pool3.stride_nd = trt.DimsHW(1, 1) 136 | 137 | input_tensors = [ 138 | conv1.get_output(0), 139 | pool1.get_output(0), 140 | pool2.get_output(0), 141 | pool3.get_output(0) 142 | ] 143 | cat = network.add_concatenation(input_tensors) 144 | assert cat, 'Add concatenation layer failed' 145 | conv2 = Conv(network, weights, cat.get_output(0), c2, 1, 1, 1, 146 | layer_name + '.cv2') 147 | return conv2 148 | 149 | 150 | def Detect( 151 | network: trt.INetworkDefinition, 152 | weights: OrderedDict, 153 | input: Union[List, Tuple], 154 | s: Union[List, Tuple], 155 | layer_name: str, 156 | reg_max: int = 16, 157 | fp16: bool = True, 158 | iou: float = 0.65, 159 | conf: float = 0.25, 160 | topk: int = 100, 161 | ) -> trt.ILayer: 162 | bboxes_branch = [] 163 | scores_branch = [] 164 | anchors = [] 165 | strides = [] 166 | for i, (inp, stride) in enumerate(zip(input, s)): 167 | h, w = inp.shape[2:] 168 | sx = np.arange(0, w).astype(np.float16 if fp16 else np.float32) + 0.5 169 | sy = np.arange(0, h).astype(np.float16 if fp16 else np.float32) + 0.5 170 | sy, sx = np.meshgrid(sy, sx) 171 | a = np.ascontiguousarray(np.stack((sy, sx), -1).reshape(-1, 2)) 172 | anchors.append(a) 173 | strides.append( 174 | np.full((1, h * w), 175 | stride, 176 | dtype=np.float16 if fp16 else np.float32)) 177 | c2 = weights[f'{layer_name}.cv2.{i}.0.conv.weight'].shape[0] 178 | c3 = weights[f'{layer_name}.cv3.{i}.0.conv.weight'].shape[0] 179 | nc = weights[f'{layer_name}.cv3.0.2.weight'].shape[0] 180 | reg_max_x4 = weights[layer_name + f'.cv2.{i}.2.weight'].shape[0] 181 | assert reg_max_x4 == reg_max * 4 182 | b_Conv_0 = Conv(network, weights, inp, c2, 3, 1, 1, 183 | layer_name + f'.cv2.{i}.0') 184 | b_Conv_1 = Conv(network, weights, b_Conv_0.get_output(0), c2, 3, 1, 1, 185 | layer_name + f'.cv2.{i}.1') 186 | b_Conv_2 = Conv2d(network, weights, b_Conv_1.get_output(0), reg_max_x4, 187 | 1, 1, 1, layer_name + f'.cv2.{i}.2') 188 | 189 | b_out = b_Conv_2.get_output(0) 190 | b_shape = network.add_constant([ 191 | 4, 192 | ], np.array(b_out.shape[0:1] + (4, reg_max, -1), dtype=np.int32)) 193 | assert b_shape, 'Add constant layer failed' 194 | b_shuffle = network.add_shuffle(b_out) 195 | assert b_shuffle, 'Add shuffle layer failed' 196 | b_shuffle.set_input(1, b_shape.get_output(0)) 197 | b_shuffle.second_transpose = (0, 3, 1, 2) 198 | 199 | bboxes_branch.append(b_shuffle.get_output(0)) 200 | 201 | s_Conv_0 = Conv(network, weights, inp, c3, 3, 1, 1, 202 | layer_name + f'.cv3.{i}.0') 203 | s_Conv_1 = Conv(network, weights, s_Conv_0.get_output(0), c3, 3, 1, 1, 204 | layer_name + f'.cv3.{i}.1') 205 | s_Conv_2 = Conv2d(network, weights, s_Conv_1.get_output(0), nc, 1, 1, 206 | 1, layer_name + f'.cv3.{i}.2') 207 | s_out = s_Conv_2.get_output(0) 208 | s_shape = network.add_constant([ 209 | 3, 210 | ], np.array(s_out.shape[0:2] + (-1, ), dtype=np.int32)) 211 | assert s_shape, 'Add constant layer failed' 212 | s_shuffle = network.add_shuffle(s_out) 213 | assert s_shuffle, 'Add shuffle layer failed' 214 | s_shuffle.set_input(1, s_shape.get_output(0)) 215 | s_shuffle.second_transpose = (0, 2, 1) 216 | 217 | scores_branch.append(s_shuffle.get_output(0)) 218 | 219 | Cat_bboxes = network.add_concatenation(bboxes_branch) 220 | assert Cat_bboxes, 'Add concatenation layer failed' 221 | Cat_scores = network.add_concatenation(scores_branch) 222 | assert Cat_scores, 'Add concatenation layer failed' 223 | Cat_scores.axis = 1 224 | 225 | Softmax = network.add_softmax(Cat_bboxes.get_output(0)) 226 | assert Softmax, 'Add softmax layer failed' 227 | Softmax.axes = 1 << 3 228 | 229 | SCORES = network.add_activation(Cat_scores.get_output(0), 230 | trt.ActivationType.SIGMOID) 231 | assert SCORES, 'Add activation layer failed' 232 | 233 | reg_max = np.arange( 234 | 0, reg_max).astype(np.float16 if fp16 else np.float32).reshape( 235 | (1, 1, -1, 1)) 236 | constant = network.add_constant(reg_max.shape, reg_max) 237 | assert constant, 'Add constant layer failed' 238 | Matmul = network.add_matrix_multiply(Softmax.get_output(0), 239 | trt.MatrixOperation.NONE, 240 | constant.get_output(0), 241 | trt.MatrixOperation.NONE) 242 | assert Matmul, 'Add matrix_multiply layer failed' 243 | pre_bboxes = network.add_gather( 244 | Matmul.get_output(0), 245 | network.add_constant([ 246 | 1, 247 | ], np.array([0], dtype=np.int32)).get_output(0), 3) 248 | assert pre_bboxes, 'Add gather layer failed' 249 | pre_bboxes.num_elementwise_dims = 1 250 | 251 | pre_bboxes_tensor = pre_bboxes.get_output(0) 252 | b, c, _ = pre_bboxes_tensor.shape 253 | slice_x1y1 = network.add_slice(pre_bboxes_tensor, (0, 0, 0), (b, c, 2), 254 | (1, 1, 1)) 255 | assert slice_x1y1, 'Add slice layer failed' 256 | slice_x2y2 = network.add_slice(pre_bboxes_tensor, (0, 0, 2), (b, c, 2), 257 | (1, 1, 1)) 258 | assert slice_x2y2, 'Add slice layer failed' 259 | anchors = np.concatenate(anchors, 0)[np.newaxis] 260 | anchors = network.add_constant(anchors.shape, anchors) 261 | assert anchors, 'Add constant layer failed' 262 | strides = np.concatenate(strides, 1)[..., np.newaxis] 263 | strides = network.add_constant(strides.shape, strides) 264 | assert strides, 'Add constant layer failed' 265 | 266 | Sub = network.add_elementwise(anchors.get_output(0), 267 | slice_x1y1.get_output(0), 268 | trt.ElementWiseOperation.SUB) 269 | assert Sub, 'Add elementwise layer failed' 270 | Add = network.add_elementwise(anchors.get_output(0), 271 | slice_x2y2.get_output(0), 272 | trt.ElementWiseOperation.SUM) 273 | assert Add, 'Add elementwise layer failed' 274 | x1y1 = Sub.get_output(0) 275 | x2y2 = Add.get_output(0) 276 | 277 | Cat_bboxes_ = network.add_concatenation([x1y1, x2y2]) 278 | assert Cat_bboxes_, 'Add concatenation layer failed' 279 | Cat_bboxes_.axis = 2 280 | 281 | BBOXES = network.add_elementwise(Cat_bboxes_.get_output(0), 282 | strides.get_output(0), 283 | trt.ElementWiseOperation.PROD) 284 | assert BBOXES, 'Add elementwise layer failed' 285 | plugin_creator = trt.get_plugin_registry().get_plugin_creator( 286 | 'EfficientNMS_TRT', '1') 287 | assert plugin_creator, 'Plugin EfficientNMS_TRT is not registried' 288 | 289 | background_class = trt.PluginField('background_class', 290 | np.array(-1, np.int32), 291 | trt.PluginFieldType.INT32) 292 | box_coding = trt.PluginField('box_coding', np.array(0, np.int32), 293 | trt.PluginFieldType.INT32) 294 | iou_threshold = trt.PluginField('iou_threshold', 295 | np.array(iou, dtype=np.float32), 296 | trt.PluginFieldType.FLOAT32) 297 | max_output_boxes = trt.PluginField('max_output_boxes', 298 | np.array(topk, np.int32), 299 | trt.PluginFieldType.INT32) 300 | plugin_version = trt.PluginField('plugin_version', np.array('1'), 301 | trt.PluginFieldType.CHAR) 302 | score_activation = trt.PluginField('score_activation', 303 | np.array(0, np.int32), 304 | trt.PluginFieldType.INT32) 305 | score_threshold = trt.PluginField('score_threshold', 306 | np.array(conf, dtype=np.float32), 307 | trt.PluginFieldType.FLOAT32) 308 | 309 | batched_nms_op = plugin_creator.create_plugin( 310 | name='batched_nms', 311 | field_collection=trt.PluginFieldCollection([ 312 | background_class, box_coding, iou_threshold, max_output_boxes, 313 | plugin_version, score_activation, score_threshold 314 | ])) 315 | 316 | batched_nms = network.add_plugin_v2( 317 | inputs=[BBOXES.get_output(0), 318 | SCORES.get_output(0)], 319 | plugin=batched_nms_op) 320 | 321 | batched_nms.get_output(0).name = 'num_dets' 322 | batched_nms.get_output(1).name = 'bboxes' 323 | batched_nms.get_output(2).name = 'scores' 324 | batched_nms.get_output(3).name = 'labels' 325 | 326 | return batched_nms 327 | -------------------------------------------------------------------------------- /srcs/models/common.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch import Graph, Tensor, Value 6 | 7 | 8 | def make_anchors(feats: Tensor, 9 | strides: Tensor, 10 | grid_cell_offset: float = 0.5) -> Tuple[Tensor, Tensor]: 11 | anchor_points, stride_tensor = [], [] 12 | assert feats is not None 13 | dtype, device = feats[0].dtype, feats[0].device 14 | for i, stride in enumerate(strides): 15 | _, _, h, w = feats[i].shape 16 | sx = torch.arange(end=w, device=device, 17 | dtype=dtype) + grid_cell_offset # shift x 18 | sy = torch.arange(end=h, device=device, 19 | dtype=dtype) + grid_cell_offset # shift y 20 | sy, sx = torch.meshgrid(sy, sx) 21 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) 22 | stride_tensor.append( 23 | torch.full((h * w, 1), stride, dtype=dtype, device=device)) 24 | return torch.cat(anchor_points), torch.cat(stride_tensor) 25 | 26 | 27 | class TRT_NMS(torch.autograd.Function): 28 | 29 | @staticmethod 30 | def forward( 31 | ctx: Graph, 32 | boxes: Tensor, 33 | scores: Tensor, 34 | iou_threshold: float = 0.65, 35 | score_threshold: float = 0.25, 36 | max_output_boxes: int = 100, 37 | background_class: int = -1, 38 | box_coding: int = 0, 39 | plugin_version: str = '1', 40 | score_activation: int = 0 41 | ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: 42 | batch_size, num_boxes, num_classes = scores.shape 43 | num_dets = torch.randint(0, 44 | max_output_boxes, (batch_size, 1), 45 | dtype=torch.int32) 46 | boxes = torch.randn(batch_size, max_output_boxes, 4) 47 | scores = torch.randn(batch_size, max_output_boxes) 48 | labels = torch.randint(0, 49 | num_classes, (batch_size, max_output_boxes), 50 | dtype=torch.int32) 51 | 52 | return num_dets, boxes, scores, labels 53 | 54 | @staticmethod 55 | def symbolic( 56 | g, 57 | boxes: Value, 58 | scores: Value, 59 | iou_threshold: float = 0.45, 60 | score_threshold: float = 0.25, 61 | max_output_boxes: int = 100, 62 | background_class: int = -1, 63 | box_coding: int = 0, 64 | score_activation: int = 0, 65 | plugin_version: str = '1') -> Tuple[Value, Value, Value, Value]: 66 | out = g.op('TRT::EfficientNMS_TRT', 67 | boxes, 68 | scores, 69 | iou_threshold_f=iou_threshold, 70 | score_threshold_f=score_threshold, 71 | max_output_boxes_i=max_output_boxes, 72 | background_class_i=background_class, 73 | box_coding_i=box_coding, 74 | plugin_version_s=plugin_version, 75 | score_activation_i=score_activation, 76 | outputs=4) 77 | nums_dets, boxes, scores, classes = out 78 | return nums_dets, boxes, scores, classes 79 | 80 | 81 | class C2f(nn.Module): 82 | 83 | def __init__(self, *args, **kwargs): 84 | super().__init__() 85 | 86 | def forward(self, x): 87 | x = self.cv1(x) 88 | x = [x, x[:, self.c:, ...]] 89 | x.extend(m(x[-1]) for m in self.m) 90 | x.pop(1) 91 | return self.cv2(torch.cat(x, 1)) 92 | 93 | 94 | class PostDetect(nn.Module): 95 | export = True 96 | shape = None 97 | dynamic = False 98 | iou_thres = 0.65 99 | conf_thres = 0.25 100 | topk = 100 101 | 102 | def __init__(self, *args, **kwargs): 103 | super().__init__() 104 | 105 | def forward(self, x): 106 | shape = x[0].shape 107 | b, res, b_reg_num = shape[0], [], self.reg_max * 4 108 | for i in range(self.nl): 109 | res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) 110 | if self.dynamic or self.shape != shape: 111 | self.anchors, self.strides = (x.transpose( 112 | 0, 1) for x in make_anchors(x, self.stride, 0.5)) 113 | self.shape = shape 114 | x = [i.view(b, self.no, -1) for i in res] 115 | y = torch.cat(x, 2) 116 | boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid() 117 | boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2) 118 | boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes) 119 | boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...] 120 | boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1) 121 | boxes = boxes * self.strides 122 | 123 | return TRT_NMS.apply(boxes.transpose(1, 2), scores.transpose(1, 2), 124 | self.iou_thres, self.conf_thres, self.topk) 125 | 126 | 127 | class PostSeg(nn.Module): 128 | export = True 129 | shape = None 130 | dynamic = False 131 | 132 | def __init__(self, *args, **kwargs): 133 | super().__init__() 134 | 135 | def forward(self, x): 136 | p = self.proto(x[0]) # mask protos 137 | bs = p.shape[0] # batch size 138 | mc = torch.cat( 139 | [self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 140 | 2) # mask coefficients 141 | boxes, scores, labels = self.forward_det(x) 142 | out = torch.cat([boxes, scores, labels.float(), mc.transpose(1, 2)], 2) 143 | return out, p.flatten(2) 144 | 145 | def forward_det(self, x): 146 | shape = x[0].shape 147 | b, res, b_reg_num = shape[0], [], self.reg_max * 4 148 | for i in range(self.nl): 149 | res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) 150 | if self.dynamic or self.shape != shape: 151 | self.anchors, self.strides = \ 152 | (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) 153 | self.shape = shape 154 | x = [i.view(b, self.no, -1) for i in res] 155 | y = torch.cat(x, 2) 156 | boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid() 157 | boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2) 158 | boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes) 159 | boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...] 160 | boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1) 161 | boxes = boxes * self.strides 162 | scores, labels = scores.transpose(1, 2).max(dim=-1, keepdim=True) 163 | return boxes.transpose(1, 2), scores, labels 164 | 165 | 166 | def optim(module: nn.Module): 167 | s = str(type(module))[6:-2].split('.')[-1] 168 | if s == 'Detect': 169 | setattr(module, '__class__', PostDetect) 170 | elif s == 'Segment': 171 | setattr(module, '__class__', PostSeg) 172 | elif s == 'C2f': 173 | setattr(module, '__class__', C2f) 174 | -------------------------------------------------------------------------------- /srcs/models/cudart_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import numpy as np 8 | import tensorrt as trt 9 | from cuda import cudart 10 | from numpy import ndarray 11 | 12 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY' 13 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 14 | 15 | 16 | @dataclass 17 | class Tensor: 18 | name: str 19 | dtype: np.dtype 20 | shape: Tuple 21 | cpu: ndarray 22 | gpu: int 23 | 24 | 25 | class TRTEngine: 26 | 27 | def __init__(self, weight: Union[str, Path]) -> None: 28 | self.weight = Path(weight) if isinstance(weight, str) else weight 29 | status, self.stream = cudart.cudaStreamCreate() 30 | assert status.value == 0 31 | self.__init_engine() 32 | self.__init_bindings() 33 | self.__warm_up() 34 | 35 | def __init_engine(self) -> None: 36 | logger = trt.Logger(trt.Logger.WARNING) 37 | trt.init_libnvinfer_plugins(logger, namespace='') 38 | with trt.Runtime(logger) as runtime: 39 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) 40 | 41 | context = model.create_execution_context() 42 | 43 | names = [model.get_binding_name(i) for i in range(model.num_bindings)] 44 | self.num_bindings = model.num_bindings 45 | self.bindings: List[int] = [0] * self.num_bindings 46 | num_inputs, num_outputs = 0, 0 47 | 48 | for i in range(model.num_bindings): 49 | if model.binding_is_input(i): 50 | num_inputs += 1 51 | else: 52 | num_outputs += 1 53 | 54 | self.num_inputs = num_inputs 55 | self.num_outputs = num_outputs 56 | self.model = model 57 | self.context = context 58 | self.input_names = names[:num_inputs] 59 | self.output_names = names[num_inputs:] 60 | 61 | def __init_bindings(self) -> None: 62 | dynamic = False 63 | inp_info = [] 64 | out_info = [] 65 | out_ptrs = [] 66 | for i, name in enumerate(self.input_names): 67 | assert self.model.get_binding_name(i) == name 68 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 69 | shape = tuple(self.model.get_binding_shape(i)) 70 | if -1 in shape: 71 | dynamic |= True 72 | if not dynamic: 73 | cpu = np.empty(shape, dtype) 74 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream) 75 | assert status.value == 0 76 | cudart.cudaMemcpyAsync( 77 | gpu, cpu.ctypes.data, cpu.nbytes, 78 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 79 | else: 80 | cpu, gpu = np.empty(0), 0 81 | inp_info.append(Tensor(name, dtype, shape, cpu, gpu)) 82 | for i, name in enumerate(self.output_names): 83 | i += self.num_inputs 84 | assert self.model.get_binding_name(i) == name 85 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 86 | shape = tuple(self.model.get_binding_shape(i)) 87 | if not dynamic: 88 | cpu = np.empty(shape, dtype=dtype) 89 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream) 90 | assert status.value == 0 91 | cudart.cudaMemcpyAsync( 92 | gpu, cpu.ctypes.data, cpu.nbytes, 93 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 94 | out_ptrs.append(gpu) 95 | else: 96 | cpu, gpu = np.empty(0), 0 97 | out_info.append(Tensor(name, dtype, shape, cpu, gpu)) 98 | 99 | self.is_dynamic = dynamic 100 | self.inp_info = inp_info 101 | self.out_info = out_info 102 | self.out_ptrs = out_ptrs 103 | 104 | def __warm_up(self) -> None: 105 | if self.is_dynamic: 106 | print('You engine has dynamic axes, please warm up by yourself !') 107 | return 108 | for _ in range(10): 109 | inputs = [] 110 | for i in self.inp_info: 111 | inputs.append(i.cpu) 112 | self.__call__(inputs) 113 | 114 | def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None: 115 | self.context.profiler = profiler \ 116 | if profiler is not None else trt.Profiler() 117 | 118 | def __call__(self, *inputs) -> Union[Tuple, ndarray]: 119 | 120 | assert len(inputs) == self.num_inputs 121 | contiguous_inputs: List[ndarray] = [ 122 | np.ascontiguousarray(i) for i in inputs 123 | ] 124 | 125 | for i in range(self.num_inputs): 126 | 127 | if self.is_dynamic: 128 | self.context.set_binding_shape( 129 | i, tuple(contiguous_inputs[i].shape)) 130 | status, self.inp_info[i].gpu = cudart.cudaMallocAsync( 131 | contiguous_inputs[i].nbytes, self.stream) 132 | assert status.value == 0 133 | cudart.cudaMemcpyAsync( 134 | self.inp_info[i].gpu, contiguous_inputs[i].ctypes.data, 135 | contiguous_inputs[i].nbytes, 136 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 137 | self.bindings[i] = self.inp_info[i].gpu 138 | 139 | output_gpu_ptrs: List[int] = [] 140 | outputs: List[ndarray] = [] 141 | 142 | for i in range(self.num_outputs): 143 | j = i + self.num_inputs 144 | if self.is_dynamic: 145 | shape = tuple(self.context.get_binding_shape(j)) 146 | dtype = self.out_info[i].dtype 147 | cpu = np.empty(shape, dtype=dtype) 148 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream) 149 | assert status.value == 0 150 | cudart.cudaMemcpyAsync( 151 | gpu, cpu.ctypes.data, cpu.nbytes, 152 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 153 | else: 154 | cpu = self.out_info[i].cpu 155 | gpu = self.out_info[i].gpu 156 | outputs.append(cpu) 157 | output_gpu_ptrs.append(gpu) 158 | self.bindings[j] = gpu 159 | 160 | self.context.execute_async_v2(self.bindings, self.stream) 161 | cudart.cudaStreamSynchronize(self.stream) 162 | 163 | for i, o in enumerate(output_gpu_ptrs): 164 | cudart.cudaMemcpyAsync( 165 | outputs[i].ctypes.data, o, outputs[i].nbytes, 166 | cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream) 167 | 168 | return tuple(outputs) if len(outputs) > 1 else outputs[0] 169 | -------------------------------------------------------------------------------- /srcs/models/engine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from collections import defaultdict, namedtuple 4 | from pathlib import Path 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import onnx 8 | import tensorrt as trt 9 | import torch 10 | 11 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY' 12 | 13 | 14 | class EngineBuilder: 15 | seg = False 16 | 17 | def __init__( 18 | self, 19 | checkpoint: Union[str, Path], 20 | device: Optional[Union[str, int, torch.device]] = None) -> None: 21 | checkpoint = Path(checkpoint) if isinstance(checkpoint, 22 | str) else checkpoint 23 | assert checkpoint.exists() and checkpoint.suffix in ('.onnx', '.pkl') 24 | self.api = checkpoint.suffix == '.pkl' 25 | if isinstance(device, str): 26 | device = torch.device(device) 27 | elif isinstance(device, int): 28 | device = torch.device(f'cuda:{device}') 29 | 30 | self.checkpoint = checkpoint 31 | self.device = device 32 | 33 | def __build_engine(self, 34 | fp16: bool = True, 35 | input_shape: Union[List, Tuple] = (1, 3, 640, 640), 36 | iou_thres: float = 0.65, 37 | conf_thres: float = 0.25, 38 | topk: int = 100, 39 | with_profiling: bool = True) -> None: 40 | logger = trt.Logger(trt.Logger.WARNING) 41 | trt.init_libnvinfer_plugins(logger, namespace='') 42 | builder = trt.Builder(logger) 43 | config = builder.create_builder_config() 44 | config.max_workspace_size = torch.cuda.get_device_properties( 45 | self.device).total_memory 46 | flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) 47 | network = builder.create_network(flag) 48 | 49 | self.logger = logger 50 | self.builder = builder 51 | self.network = network 52 | if self.api: 53 | self.build_from_api(fp16, input_shape, iou_thres, conf_thres, topk) 54 | else: 55 | self.build_from_onnx(iou_thres, conf_thres, topk) 56 | if fp16 and self.builder.platform_has_fast_fp16: 57 | config.set_flag(trt.BuilderFlag.FP16) 58 | self.weight = self.checkpoint.with_suffix('.engine') 59 | 60 | if with_profiling: 61 | config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED 62 | with self.builder.build_engine(self.network, config) as engine: 63 | self.weight.write_bytes(engine.serialize()) 64 | self.logger.log( 65 | trt.Logger.WARNING, f'Build tensorrt engine finish.\n' 66 | f'Save in {str(self.weight.absolute())}') 67 | 68 | def build(self, 69 | fp16: bool = True, 70 | input_shape: Union[List, Tuple] = (1, 3, 640, 640), 71 | iou_thres: float = 0.65, 72 | conf_thres: float = 0.25, 73 | topk: int = 100, 74 | with_profiling=True) -> None: 75 | self.__build_engine(fp16, input_shape, iou_thres, conf_thres, topk, 76 | with_profiling) 77 | 78 | def build_from_onnx(self, 79 | iou_thres: float = 0.65, 80 | conf_thres: float = 0.25, 81 | topk: int = 100): 82 | parser = trt.OnnxParser(self.network, self.logger) 83 | onnx_model = onnx.load(str(self.checkpoint)) 84 | if not self.seg: 85 | onnx_model.graph.node[-1].attribute[2].i = topk 86 | onnx_model.graph.node[-1].attribute[3].f = conf_thres 87 | onnx_model.graph.node[-1].attribute[4].f = iou_thres 88 | 89 | if not parser.parse(onnx_model.SerializeToString()): 90 | raise RuntimeError( 91 | f'failed to load ONNX file: {str(self.checkpoint)}') 92 | inputs = [ 93 | self.network.get_input(i) for i in range(self.network.num_inputs) 94 | ] 95 | outputs = [ 96 | self.network.get_output(i) for i in range(self.network.num_outputs) 97 | ] 98 | 99 | for inp in inputs: 100 | self.logger.log( 101 | trt.Logger.WARNING, 102 | f'input "{inp.name}" with shape: {inp.shape} ' 103 | f'dtype: {inp.dtype}') 104 | for out in outputs: 105 | self.logger.log( 106 | trt.Logger.WARNING, 107 | f'output "{out.name}" with shape: {out.shape} ' 108 | f'dtype: {out.dtype}') 109 | 110 | def build_from_api( 111 | self, 112 | fp16: bool = True, 113 | input_shape: Union[List, Tuple] = (1, 3, 640, 640), 114 | iou_thres: float = 0.65, 115 | conf_thres: float = 0.25, 116 | topk: int = 100, 117 | ): 118 | assert not self.seg 119 | from .api import SPPF, C2f, Conv, Detect, get_depth, get_width 120 | 121 | with open(self.checkpoint, 'rb') as f: 122 | state_dict = pickle.load(f) 123 | mapping = {0.25: 1024, 0.5: 1024, 0.75: 768, 1.0: 512, 1.25: 512} 124 | 125 | GW = state_dict['GW'] 126 | GD = state_dict['GD'] 127 | width_64 = get_width(64, GW) 128 | width_128 = get_width(128, GW) 129 | width_256 = get_width(256, GW) 130 | width_512 = get_width(512, GW) 131 | width_1024 = get_width(mapping[GW], GW) 132 | depth_3 = get_depth(3, GD) 133 | depth_6 = get_depth(6, GD) 134 | strides = state_dict['strides'] 135 | reg_max = state_dict['reg_max'] 136 | images = self.network.add_input(name='images', 137 | dtype=trt.float32, 138 | shape=trt.Dims4(input_shape)) 139 | assert images, 'Add input failed' 140 | 141 | Conv_0 = Conv(self.network, state_dict, images, width_64, 3, 2, 1, 142 | 'Conv.0') 143 | Conv_1 = Conv(self.network, state_dict, Conv_0.get_output(0), 144 | width_128, 3, 2, 1, 'Conv.1') 145 | C2f_2 = C2f(self.network, state_dict, Conv_1.get_output(0), width_128, 146 | depth_3, True, 1, 0.5, 'C2f.2') 147 | Conv_3 = Conv(self.network, state_dict, C2f_2.get_output(0), width_256, 148 | 3, 2, 1, 'Conv.3') 149 | C2f_4 = C2f(self.network, state_dict, Conv_3.get_output(0), width_256, 150 | depth_6, True, 1, 0.5, 'C2f.4') 151 | Conv_5 = Conv(self.network, state_dict, C2f_4.get_output(0), width_512, 152 | 3, 2, 1, 'Conv.5') 153 | C2f_6 = C2f(self.network, state_dict, Conv_5.get_output(0), width_512, 154 | depth_6, True, 1, 0.5, 'C2f.6') 155 | Conv_7 = Conv(self.network, state_dict, C2f_6.get_output(0), 156 | width_1024, 3, 2, 1, 'Conv.7') 157 | C2f_8 = C2f(self.network, state_dict, Conv_7.get_output(0), width_1024, 158 | depth_3, True, 1, 0.5, 'C2f.8') 159 | SPPF_9 = SPPF(self.network, state_dict, C2f_8.get_output(0), 160 | width_1024, width_1024, 5, 'SPPF.9') 161 | Upsample_10 = self.network.add_resize(SPPF_9.get_output(0)) 162 | assert Upsample_10, 'Add Upsample_10 failed' 163 | Upsample_10.resize_mode = trt.ResizeMode.NEAREST 164 | Upsample_10.shape = Upsample_10.get_output( 165 | 0).shape[:2] + C2f_6.get_output(0).shape[2:] 166 | input_tensors11 = [Upsample_10.get_output(0), C2f_6.get_output(0)] 167 | Cat_11 = self.network.add_concatenation(input_tensors11) 168 | C2f_12 = C2f(self.network, state_dict, Cat_11.get_output(0), width_512, 169 | depth_3, False, 1, 0.5, 'C2f.12') 170 | Upsample13 = self.network.add_resize(C2f_12.get_output(0)) 171 | assert Upsample13, 'Add Upsample13 failed' 172 | Upsample13.resize_mode = trt.ResizeMode.NEAREST 173 | Upsample13.shape = Upsample13.get_output( 174 | 0).shape[:2] + C2f_4.get_output(0).shape[2:] 175 | input_tensors14 = [Upsample13.get_output(0), C2f_4.get_output(0)] 176 | Cat_14 = self.network.add_concatenation(input_tensors14) 177 | C2f_15 = C2f(self.network, state_dict, Cat_14.get_output(0), width_256, 178 | depth_3, False, 1, 0.5, 'C2f.15') 179 | Conv_16 = Conv(self.network, state_dict, C2f_15.get_output(0), 180 | width_256, 3, 2, 1, 'Conv.16') 181 | input_tensors17 = [Conv_16.get_output(0), C2f_12.get_output(0)] 182 | Cat_17 = self.network.add_concatenation(input_tensors17) 183 | C2f_18 = C2f(self.network, state_dict, Cat_17.get_output(0), width_512, 184 | depth_3, False, 1, 0.5, 'C2f.18') 185 | Conv_19 = Conv(self.network, state_dict, C2f_18.get_output(0), 186 | width_512, 3, 2, 1, 'Conv.19') 187 | input_tensors20 = [Conv_19.get_output(0), SPPF_9.get_output(0)] 188 | Cat_20 = self.network.add_concatenation(input_tensors20) 189 | C2f_21 = C2f(self.network, state_dict, Cat_20.get_output(0), 190 | width_1024, depth_3, False, 1, 0.5, 'C2f.21') 191 | input_tensors22 = [ 192 | C2f_15.get_output(0), 193 | C2f_18.get_output(0), 194 | C2f_21.get_output(0) 195 | ] 196 | batched_nms = Detect(self.network, state_dict, input_tensors22, 197 | strides, 'Detect.22', reg_max, fp16, iou_thres, 198 | conf_thres, topk) 199 | for o in range(batched_nms.num_outputs): 200 | self.network.mark_output(batched_nms.get_output(o)) 201 | 202 | 203 | class TRTModule(torch.nn.Module): 204 | dtypeMapping = { 205 | trt.bool: torch.bool, 206 | trt.int8: torch.int8, 207 | trt.int32: torch.int32, 208 | trt.float16: torch.float16, 209 | trt.float32: torch.float32 210 | } 211 | 212 | def __init__(self, weight: Union[str, Path], 213 | device: Optional[torch.device]) -> None: 214 | super(TRTModule, self).__init__() 215 | self.weight = Path(weight) if isinstance(weight, str) else weight 216 | self.device = device if device is not None else torch.device('cuda:0') 217 | self.stream = torch.cuda.Stream(device=device) 218 | self.__init_engine() 219 | self.__init_bindings() 220 | 221 | def __init_engine(self) -> None: 222 | logger = trt.Logger(trt.Logger.WARNING) 223 | trt.init_libnvinfer_plugins(logger, namespace='') 224 | with trt.Runtime(logger) as runtime: 225 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) 226 | 227 | context = model.create_execution_context() 228 | num_bindings = model.num_bindings 229 | names = [model.get_binding_name(i) for i in range(num_bindings)] 230 | 231 | self.bindings: List[int] = [0] * num_bindings 232 | num_inputs, num_outputs = 0, 0 233 | 234 | for i in range(num_bindings): 235 | if model.binding_is_input(i): 236 | num_inputs += 1 237 | else: 238 | num_outputs += 1 239 | 240 | self.num_bindings = num_bindings 241 | self.num_inputs = num_inputs 242 | self.num_outputs = num_outputs 243 | self.model = model 244 | self.context = context 245 | self.input_names = names[:num_inputs] 246 | self.output_names = names[num_inputs:] 247 | self.idx = list(range(self.num_outputs)) 248 | 249 | def __init_bindings(self) -> None: 250 | idynamic = odynamic = False 251 | Tensor = namedtuple('Tensor', ('name', 'dtype', 'shape')) 252 | inp_info = [] 253 | out_info = [] 254 | for i, name in enumerate(self.input_names): 255 | assert self.model.get_binding_name(i) == name 256 | dtype = self.dtypeMapping[self.model.get_binding_dtype(i)] 257 | shape = tuple(self.model.get_binding_shape(i)) 258 | if -1 in shape: 259 | idynamic |= True 260 | inp_info.append(Tensor(name, dtype, shape)) 261 | for i, name in enumerate(self.output_names): 262 | i += self.num_inputs 263 | assert self.model.get_binding_name(i) == name 264 | dtype = self.dtypeMapping[self.model.get_binding_dtype(i)] 265 | shape = tuple(self.model.get_binding_shape(i)) 266 | if -1 in shape: 267 | odynamic |= True 268 | out_info.append(Tensor(name, dtype, shape)) 269 | 270 | if not odynamic: 271 | self.output_tensor = [ 272 | torch.empty(info.shape, dtype=info.dtype, device=self.device) 273 | for info in out_info 274 | ] 275 | self.idynamic = idynamic 276 | self.odynamic = odynamic 277 | self.inp_info = inp_info 278 | self.out_info = out_info 279 | 280 | def set_profiler(self, profiler: Optional[trt.IProfiler]): 281 | self.context.profiler = profiler \ 282 | if profiler is not None else trt.Profiler() 283 | 284 | def set_desired(self, desired: Optional[Union[List, Tuple]]): 285 | if isinstance(desired, 286 | (list, tuple)) and len(desired) == self.num_outputs: 287 | self.idx = [self.output_names.index(i) for i in desired] 288 | 289 | def forward(self, *inputs) -> Union[Tuple, torch.Tensor]: 290 | 291 | assert len(inputs) == self.num_inputs 292 | contiguous_inputs: List[torch.Tensor] = [ 293 | i.contiguous() for i in inputs 294 | ] 295 | 296 | for i in range(self.num_inputs): 297 | self.bindings[i] = contiguous_inputs[i].data_ptr() 298 | if self.idynamic: 299 | self.context.set_binding_shape( 300 | i, tuple(contiguous_inputs[i].shape)) 301 | 302 | outputs: List[torch.Tensor] = [] 303 | 304 | for i in range(self.num_outputs): 305 | j = i + self.num_inputs 306 | if self.odynamic: 307 | shape = tuple(self.context.get_binding_shape(j)) 308 | output = torch.empty(size=shape, 309 | dtype=self.out_info[i].dtype, 310 | device=self.device) 311 | else: 312 | output = self.output_tensor[i] 313 | self.bindings[j] = output.data_ptr() 314 | outputs.append(output) 315 | 316 | self.context.execute_async_v2(self.bindings, self.stream.cuda_stream) 317 | self.stream.synchronize() 318 | 319 | return tuple(outputs[i] 320 | for i in self.idx) if len(outputs) > 1 else outputs[0] 321 | 322 | 323 | class TRTProfilerV1(trt.IProfiler): 324 | 325 | def __init__(self): 326 | trt.IProfiler.__init__(self) 327 | self.total_runtime = 0.0 328 | self.recorder = defaultdict(float) 329 | 330 | def report_layer_time(self, layer_name: str, ms: float): 331 | self.total_runtime += ms * 1000 332 | self.recorder[layer_name] += ms * 1000 333 | 334 | def report(self): 335 | f = '\t%40s\t\t\t\t%10.4f' 336 | print('\t%40s\t\t\t\t%10s' % ('layername', 'cost(us)')) 337 | for name, cost in sorted(self.recorder.items(), key=lambda x: -x[1]): 338 | print( 339 | f % 340 | (name if len(name) < 40 else name[:35] + ' ' + '*' * 4, cost)) 341 | print(f'\nTotal Inference Time: {self.total_runtime:.4f}(us)') 342 | 343 | 344 | class TRTProfilerV0(trt.IProfiler): 345 | 346 | def __init__(self): 347 | trt.IProfiler.__init__(self) 348 | 349 | def report_layer_time(self, layer_name: str, ms: float): 350 | f = '\t%40s\t\t\t\t%10.4fms' 351 | print(f % (layer_name if len(layer_name) < 40 else layer_name[:35] + 352 | ' ' + '*' * 4, ms)) 353 | -------------------------------------------------------------------------------- /srcs/models/pycuda_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import numpy as np 8 | import pycuda.autoinit # noqa F401 9 | import pycuda.driver as cuda 10 | import tensorrt as trt 11 | from numpy import ndarray 12 | 13 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY' 14 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 15 | 16 | 17 | @dataclass 18 | class Tensor: 19 | name: str 20 | dtype: np.dtype 21 | shape: Tuple 22 | cpu: ndarray 23 | gpu: int 24 | 25 | 26 | class TRTEngine: 27 | 28 | def __init__(self, weight: Union[str, Path]) -> None: 29 | self.weight = Path(weight) if isinstance(weight, str) else weight 30 | self.stream = cuda.Stream(0) 31 | self.__init_engine() 32 | self.__init_bindings() 33 | self.__warm_up() 34 | 35 | def __init_engine(self) -> None: 36 | logger = trt.Logger(trt.Logger.WARNING) 37 | trt.init_libnvinfer_plugins(logger, namespace='') 38 | with trt.Runtime(logger) as runtime: 39 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) 40 | 41 | context = model.create_execution_context() 42 | 43 | names = [model.get_binding_name(i) for i in range(model.num_bindings)] 44 | self.num_bindings = model.num_bindings 45 | self.bindings: List[int] = [0] * self.num_bindings 46 | num_inputs, num_outputs = 0, 0 47 | 48 | for i in range(model.num_bindings): 49 | if model.binding_is_input(i): 50 | num_inputs += 1 51 | else: 52 | num_outputs += 1 53 | 54 | self.num_inputs = num_inputs 55 | self.num_outputs = num_outputs 56 | self.model = model 57 | self.context = context 58 | self.input_names = names[:num_inputs] 59 | self.output_names = names[num_inputs:] 60 | 61 | def __init_bindings(self) -> None: 62 | dynamic = False 63 | inp_info = [] 64 | out_info = [] 65 | out_ptrs = [] 66 | for i, name in enumerate(self.input_names): 67 | assert self.model.get_binding_name(i) == name 68 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 69 | shape = tuple(self.model.get_binding_shape(i)) 70 | if -1 in shape: 71 | dynamic |= True 72 | if not dynamic: 73 | cpu = np.empty(shape, dtype) 74 | gpu = cuda.mem_alloc(cpu.nbytes) 75 | cuda.memcpy_htod_async(gpu, cpu, self.stream) 76 | else: 77 | cpu, gpu = np.empty(0), 0 78 | inp_info.append(Tensor(name, dtype, shape, cpu, gpu)) 79 | for i, name in enumerate(self.output_names): 80 | i += self.num_inputs 81 | assert self.model.get_binding_name(i) == name 82 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 83 | shape = tuple(self.model.get_binding_shape(i)) 84 | if not dynamic: 85 | cpu = np.empty(shape, dtype=dtype) 86 | gpu = cuda.mem_alloc(cpu.nbytes) 87 | cuda.memcpy_htod_async(gpu, cpu, self.stream) 88 | out_ptrs.append(gpu) 89 | else: 90 | cpu, gpu = np.empty(0), 0 91 | out_info.append(Tensor(name, dtype, shape, cpu, gpu)) 92 | 93 | self.is_dynamic = dynamic 94 | self.inp_info = inp_info 95 | self.out_info = out_info 96 | self.out_ptrs = out_ptrs 97 | 98 | def __warm_up(self) -> None: 99 | if self.is_dynamic: 100 | print('You engine has dynamic axes, please warm up by yourself !') 101 | return 102 | for _ in range(10): 103 | inputs = [] 104 | for i in self.inp_info: 105 | inputs.append(i.cpu) 106 | self.__call__(inputs) 107 | 108 | def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None: 109 | self.context.profiler = profiler \ 110 | if profiler is not None else trt.Profiler() 111 | 112 | def __call__(self, *inputs) -> Union[Tuple, ndarray]: 113 | 114 | assert len(inputs) == self.num_inputs 115 | contiguous_inputs: List[ndarray] = [ 116 | np.ascontiguousarray(i) for i in inputs 117 | ] 118 | 119 | for i in range(self.num_inputs): 120 | 121 | if self.is_dynamic: 122 | self.context.set_binding_shape( 123 | i, tuple(contiguous_inputs[i].shape)) 124 | self.inp_info[i].gpu = cuda.mem_alloc( 125 | contiguous_inputs[i].nbytes) 126 | 127 | cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i], 128 | self.stream) 129 | self.bindings[i] = int(self.inp_info[i].gpu) 130 | 131 | output_gpu_ptrs: List[int] = [] 132 | outputs: List[ndarray] = [] 133 | 134 | for i in range(self.num_outputs): 135 | j = i + self.num_inputs 136 | if self.is_dynamic: 137 | shape = tuple(self.context.get_binding_shape(j)) 138 | dtype = self.out_info[i].dtype 139 | cpu = np.empty(shape, dtype=dtype) 140 | gpu = cuda.mem_alloc(cpu.nbytes) 141 | cuda.memcpy_htod_async(gpu, cpu, self.stream) 142 | else: 143 | cpu = self.out_info[i].cpu 144 | gpu = self.out_info[i].gpu 145 | outputs.append(cpu) 146 | output_gpu_ptrs.append(gpu) 147 | self.bindings[j] = int(gpu) 148 | 149 | self.context.execute_async_v2(self.bindings, self.stream.handle) 150 | self.stream.synchronize() 151 | 152 | for i, o in enumerate(output_gpu_ptrs): 153 | cuda.memcpy_dtoh_async(outputs[i], o, self.stream) 154 | 155 | return tuple(outputs) if len(outputs) > 1 else outputs[0] 156 | -------------------------------------------------------------------------------- /srcs/models/torch_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Union 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import Tensor 6 | from torchvision.ops import batched_nms, nms 7 | 8 | 9 | def seg_postprocess( 10 | data: Tuple[Tensor], 11 | shape: Union[Tuple, List], 12 | conf_thres: float = 0.25, 13 | iou_thres: float = 0.65) \ 14 | -> Tuple[Tensor, Tensor, Tensor, Tensor]: 15 | assert len(data) == 2 16 | h, w = shape[0] // 4, shape[1] // 4 # 4x downsampling 17 | outputs, proto = data[0][0], data[1][0] 18 | bboxes, scores, labels, maskconf = outputs.split([4, 1, 1, 32], 1) 19 | scores, labels = scores.squeeze(), labels.squeeze() 20 | idx = scores > conf_thres 21 | if not idx.any(): # no bounding boxes or seg were created 22 | return bboxes.new_zeros((0, 4)), scores.new_zeros( 23 | (0, )), labels.new_zeros((0, )), bboxes.new_zeros((0, 0, 0, 0)) 24 | bboxes, scores, labels, maskconf = \ 25 | bboxes[idx], scores[idx], labels[idx], maskconf[idx] 26 | idx = batched_nms(bboxes, scores, labels, iou_thres) 27 | bboxes, scores, labels, maskconf = \ 28 | bboxes[idx], scores[idx], labels[idx].int(), maskconf[idx] 29 | masks = (maskconf @ proto).sigmoid().view(-1, h, w) 30 | masks = crop_mask(masks, bboxes / 4.) 31 | masks = F.interpolate(masks[None], 32 | shape, 33 | mode='bilinear', 34 | align_corners=False)[0] 35 | masks = masks.gt_(0.5)[..., None] 36 | return bboxes, scores, labels, masks 37 | 38 | 39 | def pose_postprocess( 40 | data: Union[Tuple, Tensor], 41 | conf_thres: float = 0.25, 42 | iou_thres: float = 0.65) \ 43 | -> Tuple[Tensor, Tensor, Tensor]: 44 | if isinstance(data, tuple): 45 | assert len(data) == 1 46 | data = data[0] 47 | outputs = torch.transpose(data[0], 0, 1).contiguous() 48 | bboxes, scores, kpts = outputs.split([4, 1, 51], 1) 49 | scores, kpts = scores.squeeze(), kpts.squeeze() 50 | idx = scores > conf_thres 51 | if not idx.any(): # no bounding boxes or seg were created 52 | return bboxes.new_zeros((0, 4)), scores.new_zeros( 53 | (0, )), bboxes.new_zeros((0, 0, 0)) 54 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx] 55 | xycenter, wh = bboxes.chunk(2, -1) 56 | bboxes = torch.cat([xycenter - 0.5 * wh, xycenter + 0.5 * wh], -1) 57 | idx = nms(bboxes, scores, iou_thres) 58 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx] 59 | return bboxes, scores, kpts.reshape(idx.shape[0], -1, 3) 60 | 61 | 62 | def det_postprocess(data: Tuple[Tensor, Tensor, Tensor, Tensor]): 63 | assert len(data) == 4 64 | iou_thres: float = 0.65 65 | num_dets, bboxes, scores, labels = data[0][0], data[1][0], data[2][ 66 | 0], data[3][0] 67 | nums = num_dets.item() 68 | if nums == 0: 69 | return bboxes.new_zeros((0, 4)), scores.new_zeros( 70 | (0, )), labels.new_zeros((0, )) 71 | # check score negative 72 | scores[scores < 0] = 1 + scores[scores < 0] 73 | # add nms 74 | idx = nms(bboxes, scores, iou_thres) 75 | bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx] 76 | bboxes = bboxes[:nums] 77 | scores = scores[:nums] 78 | labels = labels[:nums] 79 | 80 | return bboxes, scores, labels 81 | 82 | 83 | def crop_mask(masks: Tensor, bboxes: Tensor) -> Tensor: 84 | n, h, w = masks.shape 85 | x1, y1, x2, y2 = torch.chunk(bboxes[:, :, None], 4, 1) # x1 shape(1,1,n) 86 | r = torch.arange(w, device=masks.device, 87 | dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 88 | c = torch.arange(h, device=masks.device, 89 | dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 90 | 91 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 92 | -------------------------------------------------------------------------------- /srcs/models/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Tuple, Union 3 | 4 | import cv2 5 | import numpy as np 6 | from numpy import ndarray 7 | 8 | # image suffixs 9 | SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff', 10 | '.webp', '.pfm') 11 | 12 | 13 | def letterbox(im: ndarray, 14 | new_shape: Union[Tuple, List] = (640, 640), 15 | color: Union[Tuple, List] = (114, 114, 114)) \ 16 | -> Tuple[ndarray, float, Tuple[float, float]]: 17 | # Resize and pad image while meeting stride-multiple constraints 18 | shape = im.shape[:2] # current shape [height, width] 19 | if isinstance(new_shape, int): 20 | new_shape = (new_shape, new_shape) 21 | # new_shape: [width, height] 22 | 23 | # Scale ratio (new / old) 24 | r = min(new_shape[0] / shape[1], new_shape[1] / shape[0]) 25 | # Compute padding [width, height] 26 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 27 | dw, dh = new_shape[0] - new_unpad[0], new_shape[1] - new_unpad[ 28 | 1] # wh padding 29 | 30 | dw /= 2 # divide padding into 2 sides 31 | dh /= 2 32 | 33 | if shape[::-1] != new_unpad: # resize 34 | im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) 35 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 36 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 37 | im = cv2.copyMakeBorder(im, 38 | top, 39 | bottom, 40 | left, 41 | right, 42 | cv2.BORDER_CONSTANT, 43 | value=color) # add border 44 | return im, r, (dw, dh) 45 | 46 | 47 | def blob(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]: 48 | seg = None 49 | if return_seg: 50 | seg = im.astype(np.float32) / 255 51 | im = im.transpose([2, 0, 1]) 52 | im = im[np.newaxis, ...] 53 | im = np.ascontiguousarray(im).astype(np.float32) / 255 54 | if return_seg: 55 | return im, seg 56 | else: 57 | return im 58 | 59 | 60 | def sigmoid(x: ndarray) -> ndarray: 61 | return 1. / (1. + np.exp(-x)) 62 | 63 | 64 | def bbox_iou(boxes1: ndarray, boxes2: ndarray) -> ndarray: 65 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \ 66 | (boxes1[..., 3] - boxes1[..., 1]) 67 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \ 68 | (boxes2[..., 3] - boxes2[..., 1]) 69 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 70 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 71 | inter_section = np.maximum(right_down - left_up, 0.0) 72 | inter_area = inter_section[..., 0] * inter_section[..., 1] 73 | union_area = boxes1_area + boxes2_area - inter_area 74 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) 75 | 76 | return ious 77 | 78 | 79 | def batched_nms(boxes: ndarray, 80 | scores: ndarray, 81 | iou_thres: float = 0.65, 82 | conf_thres: float = 0.25): 83 | labels = np.argmax(scores, axis=-1) 84 | scores = np.max(scores, axis=-1) 85 | 86 | cand = scores > conf_thres 87 | boxes = boxes[cand] 88 | scores = scores[cand] 89 | labels = labels[cand] 90 | 91 | keep_boxes = [] 92 | keep_scores = [] 93 | keep_labels = [] 94 | 95 | for cls in np.unique(labels): 96 | cls_mask = labels == cls 97 | cls_boxes = boxes[cls_mask] 98 | cls_scores = scores[cls_mask] 99 | 100 | while cls_boxes.shape[0] > 0: 101 | max_idx = np.argmax(cls_scores) 102 | max_box = cls_boxes[max_idx:max_idx + 1] 103 | max_score = cls_scores[max_idx:max_idx + 1] 104 | max_label = np.array([cls], dtype=np.int32) 105 | keep_boxes.append(max_box) 106 | keep_scores.append(max_score) 107 | keep_labels.append(max_label) 108 | other_boxes = np.delete(cls_boxes, max_idx, axis=0) 109 | other_scores = np.delete(cls_scores, max_idx, axis=0) 110 | ious = bbox_iou(max_box, other_boxes) 111 | iou_mask = ious < iou_thres 112 | if not iou_mask.any(): 113 | break 114 | cls_boxes = other_boxes[iou_mask] 115 | cls_scores = other_scores[iou_mask] 116 | 117 | if len(keep_boxes) == 0: 118 | keep_boxes = np.empty((0, 4), dtype=np.float32) 119 | keep_scores = np.empty((0, ), dtype=np.float32) 120 | keep_labels = np.empty((0, ), dtype=np.float32) 121 | 122 | else: 123 | keep_boxes = np.concatenate(keep_boxes, axis=0) 124 | keep_scores = np.concatenate(keep_scores, axis=0) 125 | keep_labels = np.concatenate(keep_labels, axis=0) 126 | 127 | return keep_boxes, keep_scores, keep_labels 128 | 129 | 130 | def nms(boxes: ndarray, 131 | scores: ndarray, 132 | iou_thres: float = 0.65, 133 | conf_thres: float = 0.25): 134 | labels = np.argmax(scores, axis=-1) 135 | scores = np.max(scores, axis=-1) 136 | 137 | cand = scores > conf_thres 138 | boxes = boxes[cand] 139 | scores = scores[cand] 140 | labels = labels[cand] 141 | 142 | keep_boxes = [] 143 | keep_scores = [] 144 | keep_labels = [] 145 | 146 | idxs = scores.argsort() 147 | while idxs.size > 0: 148 | max_score_index = idxs[-1] 149 | max_box = boxes[max_score_index:max_score_index + 1] 150 | max_score = scores[max_score_index:max_score_index + 1] 151 | max_label = np.array([labels[max_score_index]], dtype=np.int32) 152 | keep_boxes.append(max_box) 153 | keep_scores.append(max_score) 154 | keep_labels.append(max_label) 155 | if idxs.size == 1: 156 | break 157 | idxs = idxs[:-1] 158 | other_boxes = boxes[idxs] 159 | ious = bbox_iou(max_box, other_boxes) 160 | iou_mask = ious < iou_thres 161 | idxs = idxs[iou_mask] 162 | 163 | if len(keep_boxes) == 0: 164 | keep_boxes = np.empty((0, 4), dtype=np.float32) 165 | keep_scores = np.empty((0, ), dtype=np.float32) 166 | keep_labels = np.empty((0, ), dtype=np.float32) 167 | 168 | else: 169 | keep_boxes = np.concatenate(keep_boxes, axis=0) 170 | keep_scores = np.concatenate(keep_scores, axis=0) 171 | keep_labels = np.concatenate(keep_labels, axis=0) 172 | 173 | return keep_boxes, keep_scores, keep_labels 174 | 175 | 176 | def path_to_list(images_path: Union[str, Path]) -> List: 177 | if isinstance(images_path, str): 178 | images_path = Path(images_path) 179 | assert images_path.exists() 180 | if images_path.is_dir(): 181 | images = [ 182 | i.absolute() for i in images_path.iterdir() if i.suffix in SUFFIXS 183 | ] 184 | else: 185 | assert images_path.suffix in SUFFIXS 186 | images = [images_path.absolute()] 187 | return images 188 | 189 | 190 | def crop_mask(masks: ndarray, bboxes: ndarray) -> ndarray: 191 | n, h, w = masks.shape 192 | x1, y1, x2, y2 = np.split(bboxes[:, :, None], [1, 2, 3], 193 | 1) # x1 shape(1,1,n) 194 | r = np.arange(w, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 195 | c = np.arange(h, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 196 | 197 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 198 | 199 | 200 | def det_postprocess(data: Tuple[ndarray, ndarray, ndarray, ndarray]): 201 | assert len(data) == 4 202 | iou_thres: float = 0.65 203 | num_dets, bboxes, scores, labels = (i[0] for i in data) 204 | nums = num_dets.item() 205 | if nums == 0: 206 | return np.empty((0, 4), dtype=np.float32), np.empty( 207 | (0, ), dtype=np.float32), np.empty((0, ), dtype=np.int32) 208 | # check score negative 209 | scores[scores < 0] = 1 + scores[scores < 0] 210 | # add nms 211 | idx = nms(bboxes, scores, iou_thres) 212 | bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx] 213 | 214 | bboxes = bboxes[:nums] 215 | scores = scores[:nums] 216 | labels = labels[:nums] 217 | return bboxes, scores, labels 218 | 219 | 220 | def seg_postprocess( 221 | data: Tuple[ndarray], 222 | shape: Union[Tuple, List], 223 | conf_thres: float = 0.25, 224 | iou_thres: float = 0.65) \ 225 | -> Tuple[ndarray, ndarray, ndarray, ndarray]: 226 | assert len(data) == 2 227 | h, w = shape[0] // 4, shape[1] // 4 # 4x downsampling 228 | outputs, proto = (i[0] for i in data) 229 | bboxes, scores, labels, maskconf = np.split(outputs, [4, 5, 6], 1) 230 | scores, labels = scores.squeeze(), labels.squeeze() 231 | idx = scores > conf_thres 232 | if not idx.any(): # no bounding boxes or seg were created 233 | return np.empty((0, 4), dtype=np.float32), \ 234 | np.empty((0,), dtype=np.float32), \ 235 | np.empty((0,), dtype=np.int32), \ 236 | np.empty((0, 0, 0, 0), dtype=np.int32) 237 | 238 | bboxes, scores, labels, maskconf = \ 239 | bboxes[idx], scores[idx], labels[idx], maskconf[idx] 240 | cvbboxes = np.concatenate([bboxes[:, :2], bboxes[:, 2:] - bboxes[:, :2]], 241 | 1) 242 | labels = labels.astype(np.int32) 243 | v0, v1 = map(int, (cv2.__version__).split('.')[:2]) 244 | assert v0 == 4, 'OpenCV version is wrong' 245 | if v1 > 6: 246 | idx = cv2.dnn.NMSBoxesBatched(cvbboxes, scores, labels, conf_thres, 247 | iou_thres) 248 | else: 249 | idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres) 250 | bboxes, scores, labels, maskconf = \ 251 | bboxes[idx], scores[idx], labels[idx], maskconf[idx] 252 | masks = sigmoid(maskconf @ proto).reshape(-1, h, w) 253 | masks = crop_mask(masks, bboxes / 4.) 254 | masks = masks.transpose([1, 2, 0]) 255 | masks = cv2.resize(masks, (shape[1], shape[0]), 256 | interpolation=cv2.INTER_LINEAR) 257 | masks = masks.transpose(2, 0, 1) 258 | masks = np.ascontiguousarray((masks > 0.5)[..., None], dtype=np.float32) 259 | return bboxes, scores, labels, masks 260 | 261 | 262 | def pose_postprocess( 263 | data: Union[Tuple, ndarray], 264 | conf_thres: float = 0.25, 265 | iou_thres: float = 0.65) \ 266 | -> Tuple[ndarray, ndarray, ndarray]: 267 | if isinstance(data, tuple): 268 | assert len(data) == 1 269 | data = data[0] 270 | outputs = np.transpose(data[0], (1, 0)) 271 | bboxes, scores, kpts = np.split(outputs, [4, 5], 1) 272 | scores, kpts = scores.squeeze(), kpts.squeeze() 273 | idx = scores > conf_thres 274 | if not idx.any(): # no bounding boxes or seg were created 275 | return np.empty((0, 4), dtype=np.float32), np.empty( 276 | (0, ), dtype=np.float32), np.empty((0, 0, 0), dtype=np.float32) 277 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx] 278 | xycenter, wh = np.split(bboxes, [ 279 | 2, 280 | ], -1) 281 | cvbboxes = np.concatenate([xycenter - 0.5 * wh, wh], -1) 282 | idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres) 283 | cvbboxes, scores, kpts = cvbboxes[idx], scores[idx], kpts[idx] 284 | cvbboxes[:, 2:] += cvbboxes[:, :2] 285 | return cvbboxes, scores, kpts.reshape(idx.shape[0], -1, 3) 286 | -------------------------------------------------------------------------------- /srcs/tracker_trt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/python3 3 | """ 4 | Created on 2021/5/24 13:46 5 | @Author: Wang Cong 6 | @Email : iwangcong@outlook.com 7 | @Version : 0.1 8 | @File : tracker_trt.py 9 | """ 10 | import cv2 11 | import numpy as np 12 | 13 | from deep_sort.utils.parser import get_config 14 | from deep_sort.deep_sort import DeepSort 15 | 16 | cfg = get_config() 17 | cfg.merge_from_file("./deep_sort/configs/deep_sort.yaml") 18 | deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, 19 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 20 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 21 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, 22 | use_cuda=True) 23 | 24 | 25 | def draw_bboxes(image, bboxes, line_thickness): 26 | line_thickness = line_thickness or round( 27 | 0.002 * (image.shape[0] + image.shape[1]) / 2) + 1 28 | 29 | list_pts = [] 30 | point_radius = 4 31 | 32 | for (x1, y1, x2, y2, cls_id, pos_id) in bboxes: 33 | color = (0, 255, 0) 34 | 35 | check_point_x = x1 36 | check_point_y = int(y1 + ((y2 - y1) * 0.6)) 37 | 38 | c1, c2 = (x1, y1), (x2, y2) 39 | cv2.rectangle(image, c1, c2, color, thickness=line_thickness, lineType=cv2.LINE_AA) 40 | 41 | font_thickness = max(line_thickness - 1, 1) 42 | t_size = cv2.getTextSize(cls_id, 0, fontScale=line_thickness / 3, thickness=font_thickness)[0] 43 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 44 | cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA) # filled 45 | cv2.putText(image, '{} ID-{}'.format(cls_id, pos_id), (c1[0], c1[1] - 2), 0, line_thickness / 3, 46 | [225, 255, 255], thickness=font_thickness, lineType=cv2.LINE_AA) 47 | 48 | list_pts.append([check_point_x-point_radius, check_point_y-point_radius]) 49 | list_pts.append([check_point_x-point_radius, check_point_y+point_radius]) 50 | list_pts.append([check_point_x+point_radius, check_point_y+point_radius]) 51 | list_pts.append([check_point_x+point_radius, check_point_y-point_radius]) 52 | 53 | ndarray_pts = np.array(list_pts, np.int32) 54 | 55 | cv2.fillPoly(image, [ndarray_pts], color=(0, 0, 255)) 56 | 57 | list_pts.clear() 58 | 59 | return image 60 | 61 | def clear(): 62 | deepsort.clear() 63 | def update(bboxes, image): 64 | bbox_xywh = [] 65 | lbls = [] 66 | confs = [] 67 | bboxes2draw = [] 68 | 69 | if len(bboxes) > 0: 70 | for x1, y1, x2, y2, lbl, conf in bboxes: 71 | obj = [ 72 | int((x1 + x2) / 2), int((y1 + y2) / 2), 73 | x2 - x1, y2 - y1 74 | ] 75 | bbox_xywh.append(obj) 76 | lbls.append(lbl) 77 | confs.append(conf) 78 | 79 | xywhs = np.array(bbox_xywh) 80 | confss = np.array(confs) 81 | 82 | outputs = deepsort.update(xywhs, lbls, confss, image) 83 | 84 | for value in list(outputs): 85 | x1, y1, x2, y2, track_label, track_id = value 86 | bboxes2draw.append((int(x1), int(y1), int(x2), int(y2), track_label, int(track_id))) 87 | pass 88 | pass 89 | 90 | return bboxes2draw 91 | -------------------------------------------------------------------------------- /srcs/yolov8_bytetrack_trt.py: -------------------------------------------------------------------------------- 1 | from models import TRTModule 2 | import argparse 3 | from time import time 4 | import cv2 5 | from pathlib import Path 6 | import torch 7 | import ctypes 8 | from bytetrack.byte_tracker import BYTETracker 9 | 10 | from config import CLASSES, COLORS 11 | from models.torch_utils import det_postprocess 12 | from models.utils import blob, letterbox, path_to_list 13 | from datetime import datetime, timedelta 14 | import json 15 | import numpy as np 16 | import random 17 | 18 | 19 | 20 | class ROI: 21 | def __init__(self, x1, y1, x2, y2, roi_id): 22 | self.x1 = x1 23 | self.y1 = y1 24 | self.x2 = x2 25 | self.y2 = y2 26 | self.roi_id = roi_id 27 | self.count = 0 28 | 29 | 30 | DICT_ROIS = {} 31 | DEBOUNCE_PERIOD = timedelta(seconds=2) 32 | person_tracker = {} 33 | debounce_tracker = {} 34 | 35 | color_dict = {} 36 | 37 | def get_random_color(id): 38 | if id not in color_dict: 39 | color_dict[id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) 40 | return color_dict[id] 41 | 42 | 43 | 44 | def main(args): 45 | args_bytetrack = argparse.Namespace() 46 | args_bytetrack.track_thresh = 0.2 47 | args_bytetrack.track_buffer = 200 48 | args_bytetrack.mot20 = True 49 | args_bytetrack.match_thresh = 0.7 50 | 51 | tracker = BYTETracker(args_bytetrack) 52 | device = torch.device(args.device) 53 | Engine = TRTModule(args.engine, device) 54 | H, W = Engine.inp_info[0].shape[-2:] 55 | 56 | Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels']) 57 | 58 | fps = 0 59 | # input video 60 | cap = cv2.VideoCapture(args.vid) 61 | # input webcam 62 | # cap = cv2.VideoCapture(0) 63 | 64 | video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 65 | video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 66 | out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (video_width,video_height)) 67 | while(True): 68 | ret, frame = cap.read() 69 | 70 | if frame is None: 71 | print('No image input!') 72 | continue 73 | 74 | start = float(time()) 75 | fps_str = "FPS:" 76 | fps_str += "{:.2f}".format(fps) 77 | bgr = frame 78 | bgr, ratio, dwdh = letterbox(bgr, (W, H)) 79 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) 80 | 81 | tensor = blob(rgb, return_seg=False) 82 | 83 | dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device) 84 | 85 | tensor = torch.asarray(tensor, device=device) 86 | 87 | data = Engine(tensor) 88 | bboxes, scores, labels = det_postprocess(data) 89 | # print(labels) 90 | 91 | if bboxes.numel() == 0: 92 | continue 93 | 94 | bboxes -= dwdh 95 | bboxes /= ratio 96 | output = [] 97 | for (bbox, score, label) in zip(bboxes, scores, labels): 98 | if label == 0 and score.item() > 0.2: 99 | bbox = bbox.round().int().tolist() 100 | cls_id = int(label) 101 | cls = CLASSES[cls_id] 102 | # x1, y1, x2, y2, conf 103 | output.append([bbox[0], bbox[1], bbox[2], bbox[3], score.item()]) 104 | output = np.array(output) 105 | 106 | info_imgs = frame.shape[:2] 107 | img_size = info_imgs 108 | 109 | if output != []: 110 | online_targets = tracker.update(output, info_imgs, img_size) 111 | online_tlwhs = [] 112 | online_ids = [] 113 | online_scores = [] 114 | for t in online_targets: 115 | tlwh = t.tlwh 116 | tid = t.track_id 117 | online_tlwhs.append(tlwh) 118 | online_ids.append(tid) 119 | online_scores.append(t.score) 120 | 121 | if args.show: 122 | cv2.rectangle(frame, (int(tlwh[0]), int(tlwh[1])), (int(tlwh[0] + tlwh[2]), int(tlwh[1] + tlwh[3])), get_random_color(tid), 2) 123 | cv2.putText(frame, str(tid), (int(tlwh[0]), int(tlwh[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) 124 | 125 | end = float(time()) 126 | 127 | 128 | 129 | 130 | 131 | fps = 1/(end - start) 132 | print(fps_str) 133 | cv2.putText(frame, "YOLOV8-BYTETrack", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) 134 | cv2.putText(frame, fps_str, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) 135 | if args.show: 136 | cv2.imshow("output", frame) 137 | if cv2.waitKey(1) & 0xFF == ord('q'): 138 | break 139 | out.write(frame) 140 | 141 | cap.release() 142 | cv2.destroyAllWindows() 143 | # tracker_trt.clear() 144 | 145 | 146 | def parse_args(): 147 | parser = argparse.ArgumentParser() 148 | parser.add_argument('--engine', type=str, help='Engine file', default='../models/engine/yolov8n.engine') 149 | parser.add_argument('--vid', type=str, help='Video file', default='../sample_video/sample_2.mp4') 150 | parser.add_argument('--show', 151 | action='store_true', 152 | help='Show the results') 153 | parser.add_argument('--device', 154 | type=str, 155 | default='cuda:0', 156 | help='TensorRT infer device') 157 | args = parser.parse_args() 158 | return args 159 | 160 | 161 | if __name__ == '__main__': 162 | args = parse_args() 163 | main(args) 164 | 165 | -------------------------------------------------------------------------------- /srcs/yolov8_deepsort_trt.py: -------------------------------------------------------------------------------- 1 | from models import TRTModule 2 | import argparse 3 | from time import time 4 | import cv2 5 | from pathlib import Path 6 | import torch 7 | import ctypes 8 | import tracker_trt 9 | 10 | 11 | from config import CLASSES, COLORS 12 | from models.torch_utils import det_postprocess 13 | from models.utils import blob, letterbox, path_to_list 14 | from datetime import datetime, timedelta 15 | import json 16 | import random 17 | 18 | 19 | 20 | class ROI: 21 | def __init__(self, x1, y1, x2, y2, roi_id): 22 | self.x1 = x1 23 | self.y1 = y1 24 | self.x2 = x2 25 | self.y2 = y2 26 | self.roi_id = roi_id 27 | self.count = 0 28 | 29 | 30 | DICT_ROIS = {} 31 | DEBOUNCE_PERIOD = timedelta(seconds=2) 32 | person_tracker = {} 33 | debounce_tracker = {} 34 | 35 | 36 | color_dict = {} 37 | 38 | def get_random_color(id): 39 | if id not in color_dict: 40 | color_dict[id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) 41 | return color_dict[id] 42 | 43 | 44 | def main(args): 45 | 46 | 47 | device = torch.device(args.device) 48 | Engine = TRTModule(args.engine, device) 49 | H, W = Engine.inp_info[0].shape[-2:] 50 | 51 | Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels']) 52 | 53 | fps = 0 54 | # input video 55 | cap = cv2.VideoCapture(args.vid) 56 | # input webcam 57 | # cap = cv2.VideoCapture(0) 58 | 59 | video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 60 | video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 61 | out = cv2.VideoWriter('output_ds.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (video_width,video_height)) 62 | while(True): 63 | ret, frame = cap.read() 64 | 65 | if frame is None: 66 | print('No image input!') 67 | break 68 | 69 | start = float(time()) 70 | fps_str = "FPS:" 71 | fps_str += "{:.2f}".format(fps) 72 | bgr = frame 73 | bgr, ratio, dwdh = letterbox(bgr, (W, H)) 74 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) 75 | 76 | tensor = blob(rgb, return_seg=False) 77 | 78 | dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device) 79 | 80 | tensor = torch.asarray(tensor, device=device) 81 | 82 | data = Engine(tensor) 83 | bboxes, scores, labels = det_postprocess(data) 84 | # print(labels) 85 | 86 | if bboxes.numel() == 0: 87 | continue 88 | 89 | bboxes -= dwdh 90 | bboxes /= ratio 91 | detections = [] 92 | for (bbox, score, label) in zip(bboxes, scores, labels): 93 | if label == 0 and score.item() > 0.3: 94 | bbox = bbox.round().int().tolist() 95 | cls_id = int(label) 96 | cls = CLASSES[cls_id] 97 | detections.append((bbox[0], bbox[1], bbox[2] , bbox[3], cls, score.item())) 98 | end = float(time()) 99 | 100 | list_bbox = tracker_trt.update(detections,frame) 101 | for (x1, y1, x2, y2, cls, track_id) in list_bbox: 102 | color = [0, 255, 0] 103 | 104 | if args.show: 105 | # frame = draw_roi(frame) 106 | cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) 107 | cv2.putText(frame, f'{cls} {track_id}', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) 108 | 109 | 110 | 111 | fps = 1/(end - start) 112 | print(fps_str) 113 | 114 | cv2.putText(frame, fps_str, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) 115 | cv2.putText(frame, "YOLOV8-DEEP SORT", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) 116 | if args.show: 117 | cv2.imshow("output", frame) 118 | if cv2.waitKey(1) & 0xFF == ord('q'): 119 | break 120 | out.write(frame) 121 | 122 | out.release() 123 | cap.release() 124 | cv2.destroyAllWindows() 125 | # tracker_trt.clear() 126 | 127 | 128 | def parse_args(): 129 | parser = argparse.ArgumentParser() 130 | parser.add_argument('--engine', type=str, help='Engine file', default='../models/engine/yolov8n.engine') 131 | parser.add_argument('--vid', type=str, help='Video file', default='../sample_video/sample.mp4') 132 | parser.add_argument('--show', 133 | action='store_true', 134 | help='Show the results') 135 | parser.add_argument('--device', 136 | type=str, 137 | default='cuda:0', 138 | help='TensorRT infer device') 139 | args = parser.parse_args() 140 | return args 141 | 142 | 143 | if __name__ == '__main__': 144 | args = parse_args() 145 | main(args) 146 | 147 | --------------------------------------------------------------------------------