├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── assets
    └── sample_yolov8_bytetrack.gif
├── build_opencv.sh
├── models
    ├── engine
    │   └── .gitkeep
    ├── onnx
    │   ├── .gitkeep
    │   ├── deepsort.onnx
    │   └── yolov8n.onnx
    └── to_export
    │   └── .gitkeep
├── sample_video
    ├── .gitkeep
    └── sample_1.webm
└── srcs
    ├── bytetrack
        ├── basetrack.py
        ├── byte_tracker.py
        ├── kalman_filter.py
        └── matching.py
    ├── config.py
    ├── deep_sort
        ├── __init__.py
        ├── configs
        │   └── deep_sort.yaml
        ├── deep_sort
        │   ├── __init__.py
        │   ├── deep
        │   │   ├── __init__.py
        │   │   ├── checkpoint
        │   │   │   └── .gitkeep
        │   │   └── feature_extractor_trt.py
        │   ├── deep_sort_trt.py
        │   └── sort
        │   │   ├── __init__.py
        │   │   ├── detection.py
        │   │   ├── iou_matching.py
        │   │   ├── kalman_filter.py
        │   │   ├── linear_assignment.py
        │   │   ├── nn_matching.py
        │   │   ├── preprocessing.py
        │   │   ├── track.py
        │   │   └── tracker.py
        └── utils
        │   ├── __init__.py
        │   ├── asserts.py
        │   ├── draw.py
        │   ├── evaluation.py
        │   ├── io.py
        │   ├── json_logger.py
        │   ├── log.py
        │   ├── parser.py
        │   └── tools.py
    ├── models
        ├── __init__.py
        ├── api.py
        ├── common.py
        ├── cudart_api.py
        ├── engine.py
        ├── pycuda_api.py
        ├── torch_utils.py
        └── utils.py
    ├── tracker_trt.py
    ├── yolov8_bytetrack_trt.py
    └── yolov8_deepsort_trt.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | 
163 | 
164 | *.pt
165 | *.pth
166 | *.engine
167 | *.pkl
168 | *.h5
169 | *.npy
170 | *.npz
171 | opencv_build
172 | *.mp4
173 | *.avi
174 | tracking_bytetrack_output.txt
175 | run_bt.py
176 | *.llc
177 | *.webm
178 | 
179 | .idea
180 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "refs/YOLOv8-TensorRT"]
 2 | 	path = refs/YOLOv8-TensorRT
 3 | 	url = https://github.com/triple-Mu/YOLOv8-TensorRT
 4 | [submodule "refs/deepsort_tensorrt"]
 5 | 	path = refs/deepsort_tensorrt
 6 | 	url = https://github.com/GesilaA/deepsort_tensorrt
 7 | [submodule "refs/opencv"]
 8 | 	path = refs/opencv
 9 | 	url = https://github.com/opencv/opencv.git
10 | [submodule "refs/opencv_contrib"]
11 | 	path = refs/opencv_contrib
12 | 	url = https://github.com/opencv/opencv_contrib.git
13 | [submodule "refs/ByteTrack"]
14 | 	path = refs/ByteTrack
15 | 	url = https://github.com/ifzhang/ByteTrack.git
16 | [submodule "refs/BoostTrack"]
17 | 	path = refs/BoostTrack
18 | 	url = https://github.com/vukasin-stanojevic/BoostTrack
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Bang Nguyen Anh
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | 
  3 | # YOLO Object Tracking TensorRT
  4 | 
  5 | </div>
  6 | 
  7 | 
  8 | Using OpenCV to capture video from camera or video file, then use **YOLOv8 TensorRT** to detect objects and **DeepSORT TensorRT** or **BYTETrack** to track objects. 
  9 | 
 10 | Support for both **NVIDIA dGPU** and **Jetson** devices.
 11 | 
 12 | ## Demo
 13 | 
 14 | ### OpenCV + YOLOv8 + BYTETrack on NVIDA Geforce GTX 1660Ti
 15 | ![](assets/sample_yolov8_bytetrack.gif)
 16 | 
 17 | 
 18 | 
 19 | 
 20 | ## Performance
 21 | 
 22 | ### Both OpenCV YOLOv8 and DeepSORT TensorRT
 23 | Using OpenCV to capture video from camera or video file, then use YOLOv8 TensorRT to detect objects and DeepSORT TensorRT to track objects.
 24 | 
 25 | | Model | Device | FPS |
 26 | | --- | --- | --- |
 27 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA dGPU GTX 1660Ti 6Gb| ~ |
 28 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA Jetson Xavier NX 8Gb | ~ |
 29 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA Jetson Orin Nano 8Gb | ~34 |
 30 | 
 31 | ### YOLOv8 TensorRT model
 32 | 
 33 | Test speed of YOLOv8 TensorRT model using `trtexec` from TensorRT
 34 | 
 35 | `/usr/src/tensorrt/bin/trtexec` on NVIDIA Jetson
 36 | 
 37 | > batch size = 1
 38 | 
 39 | | Model | Device | Throughput (qps) | Latency(ms) |
 40 | | --- | --- | --- | --- |
 41 | | `yolov8n.engine` | NVIDIA dGPU GTX 1660Ti 6Gb| ~419.742 | ~2.91736 |
 42 | | `yolov8n.engine` | NVIDIA Jetson Xavier NX 8Gb | ~ | ~ |
 43 | | `yolov8n.engine` | NVIDIA Jetson Orin Nano 8Gb | ~137.469 | ~137.469 |
 44 | 
 45 | ### DeepSORT TensorRT model
 46 | 
 47 | Test speed of DeepSORT TensorRT model using `trtexec` from TensorRT
 48 | 
 49 | `/usr/src/tensorrt/bin/trtexec` on NVIDIA Jetson 
 50 | 
 51 | > batch size = 1
 52 | 
 53 | | Model | Device | Throughput (qps) | Latency(ms) |
 54 | | --- | --- | --- | --- |
 55 | | `deepsort.engine` | NVIDIA dGPU GTX 1660Ti 6Gb| ~614.738 | ~1.52197 | 
 56 | | `deepsort.engine` | NVIDIA Jetson Xavier NX 8Gb | ~ | ~ |
 57 | | `deepsort.engine` | NVIDIA Jetson Orin Nano 8Gb | ~546.135 | ~1.82227 |
 58 | 
 59 | ## For NVIDIA dGPU
 60 | 
 61 | ### Environment
 62 | 
 63 | - NVIDIA CUDA: 11.4
 64 | - NVIDIA TensorRT: 8.5.2
 65 | 
 66 | 
 67 | #### Clone repository
 68 | 
 69 | Clone repository and submodules
 70 | 
 71 | ```bash
 72 | git clone --recurse-submodules https://github.com/nabang1010/YOLOv8_DeepSORT_TensorRT.git
 73 | ```
 74 | 
 75 | #### Prepare enviroment
 76 | 
 77 | Create new enviroment
 78 | 
 79 | ```bash
 80 | conda create -n yolov8_ds python=3.8
 81 | ```
 82 | 
 83 | Activate enviroment
 84 | 
 85 | ```bash
 86 | conda activate yolov8_ds
 87 | ```
 88 | 
 89 | ### Prepare models
 90 | 
 91 | Go to **`refs/YOLOv8-TensorRT`** and install requirements for exporting models
 92 | 
 93 | ```bash
 94 | cd refs/YOLOv8-TensorRT
 95 | pip3 install -r requirements.txt
 96 | pip3 install tensorrt easydict pycuda lap cython_bbox
 97 | ```
 98 | Install `python3-libnvinfer`
 99 | 
100 | ```bash
101 | sudo apt-get install python3-libnvinfer
102 | ```
103 | 
104 | Download YOLOv8 weights from [ultralytics](https://github.com/ultralytics/ultralytics) here: [yolov8n.pt](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt) and save in folder **`models/to_export`**
105 | 
106 | **Export YOLOv8 ONNX model**
107 | 
108 | In **`refs/YOLOv8-TensorRT`** run the following command to export YOLOv8 ONNX model
109 | 
110 | ```bash
111 | python3 export-det.py \
112 | --weights ../../models/to_export/yolov8n.pt \
113 | --iou-thres 0.65 \
114 | --conf-thres 0.25 \
115 | --topk 100 \
116 | --opset 11 \
117 | --sim \
118 | --input-shape 1 3 640 640 \
119 | --device cuda:0
120 | ```
121 | 
122 | The output `.onnx` model will be saved in **`models/to_export`** folder, move the model to **`models/onnx`** folder 
123 | ```bash
124 | mv ../../models/to_export/yolov8n.onnx ../../models/onnx/yolov8n.onnx
125 | ```
126 | **Export YOLOv8 TensorRT model**
127 | 
128 | In **`refs/YOLOv8-TensorRT`** run the following command to export YOLOv8 TensorRT model
129 | 
130 | ```bash
131 | python3 build.py \
132 | --weights ../../models/onnx/yolov8n.onnx \
133 | --iou-thres 0.65 \
134 | --conf-thres 0.25 \
135 | --topk 100 \
136 | --fp16  \
137 | --device cuda:0
138 | ```
139 | The output `.engine` model will be saved in **`models/onnx`** folder, move the model to **`models/trt`** folder 
140 | 
141 | ```bash
142 | mv ../../models/onnx/yolov8n.engine ../../models/engine/yolov8n.engine
143 | ```
144 | 
145 | **Build OpenCV**
146 | 
147 | ```bash
148 | bash build_opencv.sh
149 | ```
150 | 
151 | **Export DeepSORT TensorRT model *(if use BYTETrack, ignore this step)***
152 | 
153 | 
154 | Install `libeigen3-dev`
155 | ```bash
156 | apt-get install libeigen3-dev
157 | ```
158 | Go to **`refs/deepsort_tensorrt`** and run the following command to build `onnx2engine`
159 | 
160 | ```bash
161 | cd refs/deepsort_tensorrt
162 | mkdir build
163 | cd build
164 | cmake ..
165 | make -j$(nproc)
166 | 
167 | ```
168 | 
169 | > If catch error `fatal error: Eigen/Core: No such file or directory`, replace `#include <Eigen/*>` with `#include <eigen3/Eigen/*>` in all files of this repo (`datatype.h`, `kalmanfilter.cpp`) and rebuild again.
170 | 
171 | > If catch error `error: looser exception specification on overriding virtual function 'virtual void Logger::log(nvinfer1::ILogger::Severity`  add `noexcept` before `override` in `logger.h` line 239 and rebuild again.
172 | 
173 | Run following command to export DeepSORT TensorRT model
174 | 
175 | ```bash
176 | ./build/onnx2engine ../../models/onnx/deepsort.onnx ../../models/engine/deepsort.engine
177 | ```
178 | ### Run script
179 | 
180 | **Go to `src` folder**
181 | 
182 | ```bash
183 | cd src
184 | ```
185 | 
186 | **Run YOLOv8 + DeepSORT**
187 | 
188 | ```bash
189 | python3 yolov8_deepsort_trt.py --show
190 | 
191 | ```
192 | **Run YOLOv8 + BYTETrack**
193 | 
194 | ```bash
195 | python3 yolov8_bytetrack_trt.py --show
196 | 
197 | ```
198 | 
199 | ## For NVIDIA Jetson Device
200 | 
201 | ***Coming soon***
202 | 
203 | 
204 | ---
205 | 
206 | # References
207 | 
208 | - [ultralytics](https://github.com/ultralytics/ultralytics) 
209 | - [YOLOv8-TensorRT](https://github.com/triple-Mu/YOLOv8-TensorRT)
210 | - [deepsort_tensorrt](https://github.com/GesilaA/deepsort_tensorrt)
211 | - [yolov5_deepsort_tensorrt](https://github.com/cong/yolov5_deepsort_tensorrt)
212 | - [ByteTrack](https://github.com/ifzhang/ByteTrack)
213 | 
214 | # Star History
215 | 
216 | <a href="https://www.star-history.com/#nabang1010/YOLO_Object_Tracking_TensorRT&Date">
217 |  <picture>
218 |    <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=nabang1010/YOLO_Object_Tracking_TensorRT&type=Date&theme=dark" />
219 |    <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=nabang1010/YOLO_Object_Tracking_TensorRT&type=Date" />
220 |    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=nabang1010/YOLO_Object_Tracking_TensorRT&type=Date" />
221 |  </picture>
222 | </a>
223 | 


--------------------------------------------------------------------------------
/assets/sample_yolov8_bytetrack.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/assets/sample_yolov8_bytetrack.gif


--------------------------------------------------------------------------------
/build_opencv.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | cd refs/opencv
 4 | sed -i 's/include <Eigen\/Core>/include <eigen3\/Eigen\/Core>/g' modules/core/include/opencv2/core/private.hpp
 5 | rm -rf build
 6 | mkdir build && cd build
 7 | apt install -y build-essential cmake git pkg-config libgtk-3-dev \
 8 | libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
 9 | libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
10 | gfortran openexr libatlas-base-dev python3-dev python3-numpy \
11 | libtbb2 libtbb-dev libdc1394-22-dev
12 | cmake -D CMAKE_BUILD_TYPE=RELEASE \
13 | -D CMAKE_INSTALL_PREFIX=/usr/local \
14 | -D EIGEN_INCLUDE_PATH=/usr/include/eigen3 \
15 | -D ENABLE_FAST_MATH=1 \
16 | -D CUDA_FAST_MATH=1 \
17 | -D WITH_CUBLAS=1 \
18 | -D OPENCV_GENERATE_PKGCONFIG=ON \
19 | -D OPENCV_EXTRA_MODULES_PATH= ../opencv_contrib/modules \
20 | -D WITH_GSTREAMER=ON \
21 | -D WITH_V4L=ON \
22 | -D WITH_LIBV4L=ON \
23 | -D BUILD_opencv_python2=ON \
24 | -D BUILD_opencv_python3=ON \
25 | ../
26 | # -D WITH_CUDA=ON \
27 | make -j$(nproc)
28 | make install
29 | ldconfig -v
30 | 
31 | 


--------------------------------------------------------------------------------
/models/engine/.gitkeep:
--------------------------------------------------------------------------------
1 | Store NVIDIA TensorRT Engine models


--------------------------------------------------------------------------------
/models/onnx/.gitkeep:
--------------------------------------------------------------------------------
1 | Store ONNX models to export


--------------------------------------------------------------------------------
/models/onnx/deepsort.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/models/onnx/deepsort.onnx


--------------------------------------------------------------------------------
/models/onnx/yolov8n.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/models/onnx/yolov8n.onnx


--------------------------------------------------------------------------------
/models/to_export/.gitkeep:
--------------------------------------------------------------------------------
1 | Store models to export


--------------------------------------------------------------------------------
/sample_video/.gitkeep:
--------------------------------------------------------------------------------
1 | Store sampe video


--------------------------------------------------------------------------------
/sample_video/sample_1.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/sample_video/sample_1.webm


--------------------------------------------------------------------------------
/srcs/bytetrack/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed


--------------------------------------------------------------------------------
/srcs/bytetrack/byte_tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | import os
  4 | import os.path as osp
  5 | import copy
  6 | import torch
  7 | import torch.nn.functional as F
  8 | 
  9 | from .kalman_filter import KalmanFilter
 10 | from .matching import iou_distance, fuse_score, linear_assignment
 11 | from .basetrack import BaseTrack, TrackState
 12 | 
 13 | class STrack(BaseTrack):
 14 |     shared_kalman = KalmanFilter()
 15 |     def __init__(self, tlwh, score):
 16 | 
 17 |         # wait activate
 18 |         self._tlwh = np.asarray(tlwh, dtype=np.float)
 19 |         self.kalman_filter = None
 20 |         self.mean, self.covariance = None, None
 21 |         self.is_activated = False
 22 | 
 23 |         self.score = score
 24 |         self.tracklet_len = 0
 25 | 
 26 |     def predict(self):
 27 |         mean_state = self.mean.copy()
 28 |         if self.state != TrackState.Tracked:
 29 |             mean_state[7] = 0
 30 |         self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
 31 | 
 32 |     @staticmethod
 33 |     def multi_predict(stracks):
 34 |         if len(stracks) > 0:
 35 |             multi_mean = np.asarray([st.mean.copy() for st in stracks])
 36 |             multi_covariance = np.asarray([st.covariance for st in stracks])
 37 |             for i, st in enumerate(stracks):
 38 |                 if st.state != TrackState.Tracked:
 39 |                     multi_mean[i][7] = 0
 40 |             multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
 41 |             for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 42 |                 stracks[i].mean = mean
 43 |                 stracks[i].covariance = cov
 44 | 
 45 |     def activate(self, kalman_filter, frame_id):
 46 |         """Start a new tracklet"""
 47 |         self.kalman_filter = kalman_filter
 48 |         self.track_id = self.next_id()
 49 |         self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
 50 | 
 51 |         self.tracklet_len = 0
 52 |         self.state = TrackState.Tracked
 53 |         if frame_id == 1:
 54 |             self.is_activated = True
 55 |         # self.is_activated = True
 56 |         self.frame_id = frame_id
 57 |         self.start_frame = frame_id
 58 | 
 59 |     def re_activate(self, new_track, frame_id, new_id=False):
 60 |         self.mean, self.covariance = self.kalman_filter.update(
 61 |             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
 62 |         )
 63 |         self.tracklet_len = 0
 64 |         self.state = TrackState.Tracked
 65 |         self.is_activated = True
 66 |         self.frame_id = frame_id
 67 |         if new_id:
 68 |             self.track_id = self.next_id()
 69 |         self.score = new_track.score
 70 | 
 71 |     def update(self, new_track, frame_id):
 72 |         """
 73 |         Update a matched track
 74 |         :type new_track: STrack
 75 |         :type frame_id: int
 76 |         :type update_feature: bool
 77 |         :return:
 78 |         """
 79 |         self.frame_id = frame_id
 80 |         self.tracklet_len += 1
 81 | 
 82 |         new_tlwh = new_track.tlwh
 83 |         self.mean, self.covariance = self.kalman_filter.update(
 84 |             self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
 85 |         self.state = TrackState.Tracked
 86 |         self.is_activated = True
 87 | 
 88 |         self.score = new_track.score
 89 | 
 90 |     @property
 91 |     # @jit(nopython=True)
 92 |     def tlwh(self):
 93 |         """Get current position in bounding box format `(top left x, top left y,
 94 |                 width, height)`.
 95 |         """
 96 |         if self.mean is None:
 97 |             return self._tlwh.copy()
 98 |         ret = self.mean[:4].copy()
 99 |         ret[2] *= ret[3]
100 |         ret[:2] -= ret[2:] / 2
101 |         return ret
102 | 
103 |     @property
104 |     # @jit(nopython=True)
105 |     def tlbr(self):
106 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
107 |         `(top left, bottom right)`.
108 |         """
109 |         ret = self.tlwh.copy()
110 |         ret[2:] += ret[:2]
111 |         return ret
112 | 
113 |     @staticmethod
114 |     # @jit(nopython=True)
115 |     def tlwh_to_xyah(tlwh):
116 |         """Convert bounding box to format `(center x, center y, aspect ratio,
117 |         height)`, where the aspect ratio is `width / height`.
118 |         """
119 |         ret = np.asarray(tlwh).copy()
120 |         ret[:2] += ret[2:] / 2
121 |         ret[2] /= ret[3]
122 |         return ret
123 | 
124 |     def to_xyah(self):
125 |         return self.tlwh_to_xyah(self.tlwh)
126 | 
127 |     @staticmethod
128 |     # @jit(nopython=True)
129 |     def tlbr_to_tlwh(tlbr):
130 |         ret = np.asarray(tlbr).copy()
131 |         ret[2:] -= ret[:2]
132 |         return ret
133 | 
134 |     @staticmethod
135 |     # @jit(nopython=True)
136 |     def tlwh_to_tlbr(tlwh):
137 |         ret = np.asarray(tlwh).copy()
138 |         ret[2:] += ret[:2]
139 |         return ret
140 | 
141 |     def __repr__(self):
142 |         return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
143 | 
144 | 
145 | class BYTETracker(object):
146 |     def __init__(self, args, frame_rate=30):
147 |         self.tracked_stracks = []  # type: list[STrack]
148 |         self.lost_stracks = []  # type: list[STrack]
149 |         self.removed_stracks = []  # type: list[STrack]
150 | 
151 |         self.frame_id = 0
152 |         self.args = args
153 |         #self.det_thresh = args.track_thresh
154 |         self.det_thresh = args.track_thresh + 0.1
155 |         self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
156 |         self.max_time_lost = self.buffer_size
157 |         self.kalman_filter = KalmanFilter()
158 | 
159 |     def update(self, output_results, img_info, img_size):
160 |         self.frame_id += 1
161 |         activated_starcks = []
162 |         refind_stracks = []
163 |         lost_stracks = []
164 |         removed_stracks = []
165 | 
166 |         if output_results.shape[1] == 5:
167 |             scores = output_results[:, 4]
168 |             bboxes = output_results[:, :4]
169 |         else:
170 |             output_results = output_results.cpu().numpy()
171 |             scores = output_results[:, 4] * output_results[:, 5]
172 |             bboxes = output_results[:, :4]  # x1y1x2y2
173 |         img_h, img_w = img_info[0], img_info[1]
174 |         scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w))
175 |         bboxes /= scale
176 | 
177 |         remain_inds = scores > self.args.track_thresh
178 |         inds_low = scores > 0.1
179 |         inds_high = scores < self.args.track_thresh
180 | 
181 |         inds_second = np.logical_and(inds_low, inds_high)
182 |         dets_second = bboxes[inds_second]
183 |         dets = bboxes[remain_inds]
184 |         scores_keep = scores[remain_inds]
185 |         scores_second = scores[inds_second]
186 | 
187 |         if len(dets) > 0:
188 |             '''Detections'''
189 |             detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
190 |                           (tlbr, s) in zip(dets, scores_keep)]
191 |         else:
192 |             detections = []
193 | 
194 |         ''' Add newly detected tracklets to tracked_stracks'''
195 |         unconfirmed = []
196 |         tracked_stracks = []  # type: list[STrack]
197 |         for track in self.tracked_stracks:
198 |             if not track.is_activated:
199 |                 unconfirmed.append(track)
200 |             else:
201 |                 tracked_stracks.append(track)
202 | 
203 |         ''' Step 2: First association, with high score detection boxes'''
204 |         strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
205 |         # Predict the current location with KF
206 |         STrack.multi_predict(strack_pool)
207 |         dists = iou_distance(strack_pool, detections)
208 |         if not self.args.mot20:
209 |             dists = fuse_score(dists, detections)
210 |         matches, u_track, u_detection = linear_assignment(dists, thresh=self.args.match_thresh)
211 | 
212 |         for itracked, idet in matches:
213 |             track = strack_pool[itracked]
214 |             det = detections[idet]
215 |             if track.state == TrackState.Tracked:
216 |                 track.update(detections[idet], self.frame_id)
217 |                 activated_starcks.append(track)
218 |             else:
219 |                 track.re_activate(det, self.frame_id, new_id=False)
220 |                 refind_stracks.append(track)
221 | 
222 |         ''' Step 3: Second association, with low score detection boxes'''
223 |         # association the untrack to the low score detections
224 |         if len(dets_second) > 0:
225 |             '''Detections'''
226 |             detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
227 |                           (tlbr, s) in zip(dets_second, scores_second)]
228 |         else:
229 |             detections_second = []
230 |         r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
231 |         dists = iou_distance(r_tracked_stracks, detections_second)
232 |         matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
233 |         for itracked, idet in matches:
234 |             track = r_tracked_stracks[itracked]
235 |             det = detections_second[idet]
236 |             if track.state == TrackState.Tracked:
237 |                 track.update(det, self.frame_id)
238 |                 activated_starcks.append(track)
239 |             else:
240 |                 track.re_activate(det, self.frame_id, new_id=False)
241 |                 refind_stracks.append(track)
242 | 
243 |         for it in u_track:
244 |             track = r_tracked_stracks[it]
245 |             if not track.state == TrackState.Lost:
246 |                 track.mark_lost()
247 |                 lost_stracks.append(track)
248 | 
249 |         '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
250 |         detections = [detections[i] for i in u_detection]
251 |         dists = iou_distance(unconfirmed, detections)
252 |         if not self.args.mot20:
253 |             dists = fuse_score(dists, detections)
254 |         matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
255 |         for itracked, idet in matches:
256 |             unconfirmed[itracked].update(detections[idet], self.frame_id)
257 |             activated_starcks.append(unconfirmed[itracked])
258 |         for it in u_unconfirmed:
259 |             track = unconfirmed[it]
260 |             track.mark_removed()
261 |             removed_stracks.append(track)
262 | 
263 |         """ Step 4: Init new stracks"""
264 |         for inew in u_detection:
265 |             track = detections[inew]
266 |             if track.score < self.det_thresh:
267 |                 continue
268 |             track.activate(self.kalman_filter, self.frame_id)
269 |             activated_starcks.append(track)
270 |         """ Step 5: Update state"""
271 |         for track in self.lost_stracks:
272 |             if self.frame_id - track.end_frame > self.max_time_lost:
273 |                 track.mark_removed()
274 |                 removed_stracks.append(track)
275 | 
276 |         # print('Ramained match {} s'.format(t4-t3))
277 | 
278 |         self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
279 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
280 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
281 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
282 |         self.lost_stracks.extend(lost_stracks)
283 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
284 |         self.removed_stracks.extend(removed_stracks)
285 |         self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
286 |         # get scores of lost tracks
287 |         output_stracks = [track for track in self.tracked_stracks if track.is_activated]
288 | 
289 |         return output_stracks
290 | 
291 | 
292 | def joint_stracks(tlista, tlistb):
293 |     exists = {}
294 |     res = []
295 |     for t in tlista:
296 |         exists[t.track_id] = 1
297 |         res.append(t)
298 |     for t in tlistb:
299 |         tid = t.track_id
300 |         if not exists.get(tid, 0):
301 |             exists[tid] = 1
302 |             res.append(t)
303 |     return res
304 | 
305 | 
306 | def sub_stracks(tlista, tlistb):
307 |     stracks = {}
308 |     for t in tlista:
309 |         stracks[t.track_id] = t
310 |     for t in tlistb:
311 |         tid = t.track_id
312 |         if stracks.get(tid, 0):
313 |             del stracks[tid]
314 |     return list(stracks.values())
315 | 
316 | 
317 | def remove_duplicate_stracks(stracksa, stracksb):
318 |     pdist = iou_distance(stracksa, stracksb)
319 |     pairs = np.where(pdist < 0.15)
320 |     dupa, dupb = list(), list()
321 |     for p, q in zip(*pairs):
322 |         timep = stracksa[p].frame_id - stracksa[p].start_frame
323 |         timeq = stracksb[q].frame_id - stracksb[q].start_frame
324 |         if timep > timeq:
325 |             dupb.append(q)
326 |         else:
327 |             dupa.append(p)
328 |     resa = [t for i, t in enumerate(stracksa) if not i in dupa]
329 |     resb = [t for i, t in enumerate(stracksb) if not i in dupb]
330 |     return resa, resb


--------------------------------------------------------------------------------
/srcs/bytetrack/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         #mean = np.dot(self._motion_mat, mean)
120 |         mean = np.dot(mean, self._motion_mat.T)
121 |         covariance = np.linalg.multi_dot((
122 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
123 | 
124 |         return mean, covariance
125 | 
126 |     def project(self, mean, covariance):
127 |         """Project state distribution to measurement space.
128 | 
129 |         Parameters
130 |         ----------
131 |         mean : ndarray
132 |             The state's mean vector (8 dimensional array).
133 |         covariance : ndarray
134 |             The state's covariance matrix (8x8 dimensional).
135 | 
136 |         Returns
137 |         -------
138 |         (ndarray, ndarray)
139 |             Returns the projected mean and covariance matrix of the given state
140 |             estimate.
141 | 
142 |         """
143 |         std = [
144 |             self._std_weight_position * mean[3],
145 |             self._std_weight_position * mean[3],
146 |             1e-1,
147 |             self._std_weight_position * mean[3]]
148 |         innovation_cov = np.diag(np.square(std))
149 | 
150 |         mean = np.dot(self._update_mat, mean)
151 |         covariance = np.linalg.multi_dot((
152 |             self._update_mat, covariance, self._update_mat.T))
153 |         return mean, covariance + innovation_cov
154 | 
155 |     def multi_predict(self, mean, covariance):
156 |         """Run Kalman filter prediction step (Vectorized version).
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The Nx8 dimensional mean matrix of the object states at the previous
161 |             time step.
162 |         covariance : ndarray
163 |             The Nx8x8 dimensional covariance matrics of the object states at the
164 |             previous time step.
165 |         Returns
166 |         -------
167 |         (ndarray, ndarray)
168 |             Returns the mean vector and covariance matrix of the predicted
169 |             state. Unobserved velocities are initialized to 0 mean.
170 |         """
171 |         std_pos = [
172 |             self._std_weight_position * mean[:, 3],
173 |             self._std_weight_position * mean[:, 3],
174 |             1e-2 * np.ones_like(mean[:, 3]),
175 |             self._std_weight_position * mean[:, 3]]
176 |         std_vel = [
177 |             self._std_weight_velocity * mean[:, 3],
178 |             self._std_weight_velocity * mean[:, 3],
179 |             1e-5 * np.ones_like(mean[:, 3]),
180 |             self._std_weight_velocity * mean[:, 3]]
181 |         sqr = np.square(np.r_[std_pos, std_vel]).T
182 | 
183 |         motion_cov = []
184 |         for i in range(len(mean)):
185 |             motion_cov.append(np.diag(sqr[i]))
186 |         motion_cov = np.asarray(motion_cov)
187 | 
188 |         mean = np.dot(mean, self._motion_mat.T)
189 |         left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
190 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
191 | 
192 |         return mean, covariance
193 | 
194 |     def update(self, mean, covariance, measurement):
195 |         """Run Kalman filter correction step.
196 | 
197 |         Parameters
198 |         ----------
199 |         mean : ndarray
200 |             The predicted state's mean vector (8 dimensional).
201 |         covariance : ndarray
202 |             The state's covariance matrix (8x8 dimensional).
203 |         measurement : ndarray
204 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
205 |             is the center position, a the aspect ratio, and h the height of the
206 |             bounding box.
207 | 
208 |         Returns
209 |         -------
210 |         (ndarray, ndarray)
211 |             Returns the measurement-corrected state distribution.
212 | 
213 |         """
214 |         projected_mean, projected_cov = self.project(mean, covariance)
215 | 
216 |         chol_factor, lower = scipy.linalg.cho_factor(
217 |             projected_cov, lower=True, check_finite=False)
218 |         kalman_gain = scipy.linalg.cho_solve(
219 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
220 |             check_finite=False).T
221 |         innovation = measurement - projected_mean
222 | 
223 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
224 |         new_covariance = covariance - np.linalg.multi_dot((
225 |             kalman_gain, projected_cov, kalman_gain.T))
226 |         return new_mean, new_covariance
227 | 
228 |     def gating_distance(self, mean, covariance, measurements,
229 |                         only_position=False, metric='maha'):
230 |         """Compute gating distance between state distribution and measurements.
231 |         A suitable distance threshold can be obtained from `chi2inv95`. If
232 |         `only_position` is False, the chi-square distribution has 4 degrees of
233 |         freedom, otherwise 2.
234 |         Parameters
235 |         ----------
236 |         mean : ndarray
237 |             Mean vector over the state distribution (8 dimensional).
238 |         covariance : ndarray
239 |             Covariance of the state distribution (8x8 dimensional).
240 |         measurements : ndarray
241 |             An Nx4 dimensional matrix of N measurements, each in
242 |             format (x, y, a, h) where (x, y) is the bounding box center
243 |             position, a the aspect ratio, and h the height.
244 |         only_position : Optional[bool]
245 |             If True, distance computation is done with respect to the bounding
246 |             box center position only.
247 |         Returns
248 |         -------
249 |         ndarray
250 |             Returns an array of length N, where the i-th element contains the
251 |             squared Mahalanobis distance between (mean, covariance) and
252 |             `measurements[i]`.
253 |         """
254 |         mean, covariance = self.project(mean, covariance)
255 |         if only_position:
256 |             mean, covariance = mean[:2], covariance[:2, :2]
257 |             measurements = measurements[:, :2]
258 | 
259 |         d = measurements - mean
260 |         if metric == 'gaussian':
261 |             return np.sum(d * d, axis=1)
262 |         elif metric == 'maha':
263 |             cholesky_factor = np.linalg.cholesky(covariance)
264 |             z = scipy.linalg.solve_triangular(
265 |                 cholesky_factor, d.T, lower=True, check_finite=False,
266 |                 overwrite_b=True)
267 |             squared_maha = np.sum(z * z, axis=0)
268 |             return squared_maha
269 |         else:
270 |             raise ValueError('invalid distance metric')


--------------------------------------------------------------------------------
/srcs/bytetrack/matching.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import scipy
  4 | import lap
  5 | from scipy.spatial.distance import cdist
  6 | 
  7 | from cython_bbox import bbox_overlaps as bbox_ious
  8 | from .kalman_filter import chi2inv95
  9 | import time
 10 | 
 11 | def merge_matches(m1, m2, shape):
 12 |     O,P,Q = shape
 13 |     m1 = np.asarray(m1)
 14 |     m2 = np.asarray(m2)
 15 | 
 16 |     M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
 17 |     M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
 18 | 
 19 |     mask = M1*M2
 20 |     match = mask.nonzero()
 21 |     match = list(zip(match[0], match[1]))
 22 |     unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
 23 |     unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
 24 | 
 25 |     return match, unmatched_O, unmatched_Q
 26 | 
 27 | 
 28 | def _indices_to_matches(cost_matrix, indices, thresh):
 29 |     matched_cost = cost_matrix[tuple(zip(*indices))]
 30 |     matched_mask = (matched_cost <= thresh)
 31 | 
 32 |     matches = indices[matched_mask]
 33 |     unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
 34 |     unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
 35 | 
 36 |     return matches, unmatched_a, unmatched_b
 37 | 
 38 | 
 39 | def linear_assignment(cost_matrix, thresh):
 40 |     if cost_matrix.size == 0:
 41 |         return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
 42 |     matches, unmatched_a, unmatched_b = [], [], []
 43 |     cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
 44 |     for ix, mx in enumerate(x):
 45 |         if mx >= 0:
 46 |             matches.append([ix, mx])
 47 |     unmatched_a = np.where(x < 0)[0]
 48 |     unmatched_b = np.where(y < 0)[0]
 49 |     matches = np.asarray(matches)
 50 |     return matches, unmatched_a, unmatched_b
 51 | 
 52 | 
 53 | def ious(atlbrs, btlbrs):
 54 |     """
 55 |     Compute cost based on IoU
 56 |     :type atlbrs: list[tlbr] | np.ndarray
 57 |     :type atlbrs: list[tlbr] | np.ndarray
 58 | 
 59 |     :rtype ious np.ndarray
 60 |     """
 61 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
 62 |     if ious.size == 0:
 63 |         return ious
 64 | 
 65 |     ious = bbox_ious(
 66 |         np.ascontiguousarray(atlbrs, dtype=np.float),
 67 |         np.ascontiguousarray(btlbrs, dtype=np.float)
 68 |     )
 69 | 
 70 |     return ious
 71 | 
 72 | 
 73 | def iou_distance(atracks, btracks):
 74 |     """
 75 |     Compute cost based on IoU
 76 |     :type atracks: list[STrack]
 77 |     :type btracks: list[STrack]
 78 | 
 79 |     :rtype cost_matrix np.ndarray
 80 |     """
 81 | 
 82 |     if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 83 |         atlbrs = atracks
 84 |         btlbrs = btracks
 85 |     else:
 86 |         atlbrs = [track.tlbr for track in atracks]
 87 |         btlbrs = [track.tlbr for track in btracks]
 88 |     _ious = ious(atlbrs, btlbrs)
 89 |     cost_matrix = 1 - _ious
 90 | 
 91 |     return cost_matrix
 92 | 
 93 | def v_iou_distance(atracks, btracks):
 94 |     """
 95 |     Compute cost based on IoU
 96 |     :type atracks: list[STrack]
 97 |     :type btracks: list[STrack]
 98 | 
 99 |     :rtype cost_matrix np.ndarray
100 |     """
101 | 
102 |     if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
103 |         atlbrs = atracks
104 |         btlbrs = btracks
105 |     else:
106 |         atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
107 |         btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
108 |     _ious = ious(atlbrs, btlbrs)
109 |     cost_matrix = 1 - _ious
110 | 
111 |     return cost_matrix
112 | 
113 | def embedding_distance(tracks, detections, metric='cosine'):
114 |     """
115 |     :param tracks: list[STrack]
116 |     :param detections: list[BaseTrack]
117 |     :param metric:
118 |     :return: cost_matrix np.ndarray
119 |     """
120 | 
121 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
122 |     if cost_matrix.size == 0:
123 |         return cost_matrix
124 |     det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
125 |     #for i, track in enumerate(tracks):
126 |         #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
127 |     track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
128 |     cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
129 |     return cost_matrix
130 | 
131 | 
132 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
133 |     if cost_matrix.size == 0:
134 |         return cost_matrix
135 |     gating_dim = 2 if only_position else 4
136 |     gating_threshold = chi2inv95[gating_dim]
137 |     measurements = np.asarray([det.to_xyah() for det in detections])
138 |     for row, track in enumerate(tracks):
139 |         gating_distance = kf.gating_distance(
140 |             track.mean, track.covariance, measurements, only_position)
141 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
142 |     return cost_matrix
143 | 
144 | 
145 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
146 |     if cost_matrix.size == 0:
147 |         return cost_matrix
148 |     gating_dim = 2 if only_position else 4
149 |     gating_threshold = chi2inv95[gating_dim]
150 |     measurements = np.asarray([det.to_xyah() for det in detections])
151 |     for row, track in enumerate(tracks):
152 |         gating_distance = kf.gating_distance(
153 |             track.mean, track.covariance, measurements, only_position, metric='maha')
154 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
155 |         cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
156 |     return cost_matrix
157 | 
158 | 
159 | def fuse_iou(cost_matrix, tracks, detections):
160 |     if cost_matrix.size == 0:
161 |         return cost_matrix
162 |     reid_sim = 1 - cost_matrix
163 |     iou_dist = iou_distance(tracks, detections)
164 |     iou_sim = 1 - iou_dist
165 |     fuse_sim = reid_sim * (1 + iou_sim) / 2
166 |     det_scores = np.array([det.score for det in detections])
167 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
168 |     #fuse_sim = fuse_sim * (1 + det_scores) / 2
169 |     fuse_cost = 1 - fuse_sim
170 |     return fuse_cost
171 | 
172 | 
173 | def fuse_score(cost_matrix, detections):
174 |     if cost_matrix.size == 0:
175 |         return cost_matrix
176 |     iou_sim = 1 - cost_matrix
177 |     det_scores = np.array([det.score for det in detections])
178 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
179 |     fuse_sim = iou_sim * det_scores
180 |     fuse_cost = 1 - fuse_sim
181 |     return fuse_cost


--------------------------------------------------------------------------------
/srcs/config.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | 
 5 | random.seed(0)
 6 | 
 7 | # detection model classes
 8 | CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 9 |            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
10 |            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
11 |            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
12 |            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
13 |            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
14 |            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
15 |            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
16 |            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
17 |            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
18 |            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
19 |            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
20 |            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
21 |            'scissors', 'teddy bear', 'hair drier', 'toothbrush')
22 | 
23 | # colors for per classes
24 | COLORS = {
25 |     cls: [random.randint(0, 255) for _ in range(3)]
26 |     for i, cls in enumerate(CLASSES)
27 | }
28 | 
29 | # colors for segment masks
30 | MASK_COLORS = np.array([(255, 56, 56), (255, 157, 151), (255, 112, 31),
31 |                         (255, 178, 29), (207, 210, 49), (72, 249, 10),
32 |                         (146, 204, 23), (61, 219, 134), (26, 147, 52),
33 |                         (0, 212, 187), (44, 153, 168), (0, 194, 255),
34 |                         (52, 69, 147), (100, 115, 255), (0, 24, 236),
35 |                         (132, 56, 255), (82, 0, 133), (203, 56, 255),
36 |                         (255, 149, 200), (255, 55, 199)],
37 |                        dtype=np.float32) / 255.
38 | 
39 | KPS_COLORS = [[0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
40 |               [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
41 |               [255, 128, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255],
42 |               [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255]]
43 | 
44 | SKELETON = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13],
45 |             [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3],
46 |             [2, 4], [3, 5], [4, 6], [5, 7]]
47 | 
48 | LIMB_COLORS = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255],
49 |                [255, 51, 255], [255, 51, 255], [255, 51, 255], [255, 128, 0],
50 |                [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
51 |                [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
52 |                [0, 255, 0], [0, 255, 0]]
53 | 
54 | # alpha for segment masks
55 | ALPHA = 0.5
56 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/__init__.py


--------------------------------------------------------------------------------
/srcs/deep_sort/configs/deep_sort.yaml:
--------------------------------------------------------------------------------
 1 | DEEPSORT:
 2 |   REID_CKPT: "/home/sai/yolov8_deepsort_jetson/model/deepsort.engine"
 3 |   MAX_DIST: 0.3
 4 |   MIN_CONFIDENCE: 0.3
 5 |   NMS_MAX_OVERLAP: 0.5
 6 |   MAX_IOU_DISTANCE: 0.7
 7 |   MAX_AGE: 70
 8 |   N_INIT: 3
 9 |   NN_BUDGET: 100
10 |   
11 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deep_sort_trt import DeepSort
 2 | 
 3 | __all__ = ['DeepSort', 'build_tracker']
 4 | 
 5 | 
 6 | def build_tracker(cfg, use_cuda):
 7 |     return DeepSort(cfg.DEEPSORT.REID_CKPT, 
 8 |                 max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
 9 |                 nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
10 |                 max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
11 |     
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/deep/__init__.py


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/deep/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep/feature_extractor_trt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #!/usr/bin/python3
  3 | """
  4 | Created on 2021/5/24 14:05
  5 | @Author: Wang Cong
  6 | @Email : iwangcong@outlook.com
  7 | @Version : 0.1
  8 | @File : feature_extractor_trt.py
  9 | """
 10 | import cv2
 11 | import numpy as np
 12 | import tensorrt as trt
 13 | import pycuda.driver as cuda
 14 | import pycuda.autoinit
 15 | INPUT_W = 128
 16 | INPUT_H = 64
 17 | 
 18 | 
 19 | class TrackerExtractor:
 20 | 
 21 |     def __init__(self, engine_file_path):
 22 |         self.img_size = 640
 23 |         self.threshold = 0.3
 24 |         self.stride = 1
 25 |         self.size = (64, 128)
 26 | 
 27 |         # Create a Context on this device,
 28 |         self.cfx = cuda.Device(0).make_context()
 29 |         stream = cuda.Stream()
 30 |         TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
 31 |         runtime = trt.Runtime(TRT_LOGGER)
 32 | 
 33 |         # Deserialize the engine from file
 34 |         print("Engine file path: ", engine_file_path)
 35 |         with open(engine_file_path, "rb") as f:
 36 |             engine = runtime.deserialize_cuda_engine(f.read())
 37 |         context = engine.create_execution_context()
 38 | 
 39 |         host_inputs = []
 40 |         cuda_inputs = []
 41 |         host_outputs = []
 42 |         cuda_outputs = []
 43 |         bindings = []
 44 | 
 45 |         for binding in engine:
 46 |             size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
 47 |             dtype = trt.nptype(engine.get_binding_dtype(binding))
 48 |             dims = engine.get_binding_shape(binding)
 49 |             if dims[0] < 0:
 50 |                 size *= -1
 51 |             # Allocate host and device buffers
 52 |             host_mem = cuda.pagelocked_empty(size, dtype)
 53 |             cuda_mem = cuda.mem_alloc(host_mem.nbytes)
 54 |             # Append the device buffer to device bindings.
 55 |             bindings.append(int(cuda_mem))
 56 |             # Append to the appropriate list.
 57 |             if engine.binding_is_input(binding):
 58 |                 host_inputs.append(host_mem)
 59 |                 cuda_inputs.append(cuda_mem)
 60 |             else:
 61 |                 host_outputs.append(host_mem)
 62 |                 cuda_outputs.append(cuda_mem)
 63 | 
 64 |         # Store
 65 |         self.stream = stream
 66 |         self.context = context
 67 |         self.engine = engine
 68 |         self.host_inputs = host_inputs
 69 |         self.cuda_inputs = cuda_inputs
 70 |         self.host_outputs = host_outputs
 71 |         self.cuda_outputs = cuda_outputs
 72 |         self.bindings = bindings
 73 | 
 74 |     def _preprocess(self, im_crops):
 75 |         """
 76 |             1. to float with scale from 0 to 1
 77 |             2. resize to (64, 128) as Market1501 dataset did
 78 |             3. concatenate to a numpy array
 79 |             3. to torch Tensor
 80 |             4. normalize
 81 |         """
 82 |         def _resize(im, size):
 83 |             return cv2.resize(im.astype(np.float32)/255., size)
 84 |         def _normalize(im):
 85 |             mean = [0.485, 0.456, 0.406]
 86 |             std = [0.229, 0.224, 0.225]
 87 |             return (im.astype(np.float32) - np.array(mean)) / np.array(std)
 88 |         imgs = []
 89 |         for im in im_crops:
 90 |             img = _normalize(_resize(im, self.size))
 91 |             # img = img.cpu().numpy()
 92 |             imgs.append(img)
 93 |         return imgs
 94 | 
 95 |     def track_extractor(self, im_crops):
 96 |         # threading.Thread.__init__(self)
 97 |         # Make self the active context, pushing it on top of the context stack.
 98 |         self.cfx.push()
 99 |         # Restore
100 |         stream = self.stream
101 |         context = self.context
102 |         engine = self.engine
103 |         host_inputs = self.host_inputs
104 |         cuda_inputs = self.cuda_inputs
105 |         host_outputs = self.host_outputs
106 |         cuda_outputs = self.cuda_outputs
107 |         bindings = self.bindings
108 |         # Do image preprocess
109 |         im_batchs = self._preprocess(im_crops)
110 |         features_trt = []
111 |         for im_batch in im_batchs:
112 |             # Copy input image to host buffer
113 |             np.copyto(host_inputs[0], im_batch.ravel())
114 |             # Transfer input data  to the GPU.
115 |             cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
116 |             # Run inference.
117 |             context.set_binding_shape(0, (1, 3, 128, 64))
118 |             context.execute_async(bindings=bindings, stream_handle=stream.handle)
119 |             # Transfer predictions back from the GPU.
120 |             cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
121 |             # Synchronize the stream
122 |             stream.synchronize()
123 |             # Remove any context from the top of the context stack, deactivating it.
124 |             # self.cfx.pop()
125 |             # Here we use the first row of output in that batch_size = 1
126 |             trt_outputs = host_outputs[0]
127 |             # Do postprocess
128 |             feature_trt = trt_outputs
129 |             features_trt.append(feature_trt)
130 |         return np.array(features_trt)
131 | 
132 |     def destroy(self):
133 |         self.cfx.pop()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
138 |     extr = TrackerExtractor("checkpoint/deepsort.engine")
139 |     feature = extr.track_extractor(img)
140 |     print(feature.shape)
141 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep_sort_trt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #!/usr/bin/python3
  3 | """
  4 | Created on 2021/5/24 13:46
  5 | @Author: Wang Cong
  6 | @Email : iwangcong@outlook.com
  7 | @Version : 0.1
  8 | @File : deep_sort_trt.py
  9 | """
 10 | import numpy as np
 11 | import time
 12 | from .deep.feature_extractor_trt import TrackerExtractor
 13 | from .sort.nn_matching import NearestNeighborDistanceMetric
 14 | from .sort.preprocessing import non_max_suppression
 15 | from .sort.detection import Detection
 16 | from .sort.tracker import Tracker
 17 | 
 18 | __all__ = ['DeepSort']
 19 | 
 20 | 
 21 | class DeepSort(object):
 22 |     def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7,
 23 |                  max_age=70, n_init=3, nn_budget=100, use_cuda=True):
 24 |         self.min_confidence = min_confidence
 25 |         self.nms_max_overlap = nms_max_overlap
 26 |         model_path = "../models/engine/deepsort.engine"
 27 |        
 28 |         self.extractor = TrackerExtractor(model_path)
 29 | 
 30 |         max_cosine_distance = max_dist
 31 |         nn_budget = 100
 32 |         metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
 33 |         self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
 34 | 
 35 |     def clear(self):
 36 |         self.extractor.context.pop()
 37 | 
 38 |     def update(self, bbox_xywh, lbls, confidences, ori_img):
 39 |         self.height, self.width = ori_img.shape[:2]
 40 |         # generate detections
 41 |         features = self._get_features(bbox_xywh, ori_img)
 42 |         bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
 43 |         detections = [Detection(bbox_tlwh[i], lbls[i], conf, features[i]) for i, conf in enumerate(confidences) if
 44 |                       conf > self.min_confidence]
 45 | 
 46 |         # run on non-maximum supression
 47 |         boxes = np.array([d.tlwh for d in detections])
 48 |         labels = np.array([d.label for d in detections])
 49 |         scores = np.array([d.confidence for d in detections])
 50 |         indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
 51 |         detections = [detections[i] for i in indices]
 52 | 
 53 |         # update tracker
 54 |         self.tracker.predict()
 55 |         self.tracker.update(detections)
 56 | 
 57 |         # output bbox identities
 58 |         outputs = []
 59 |         for track in self.tracker.tracks:
 60 |             if not track.is_confirmed() or track.time_since_update > 1:
 61 |                 continue
 62 |             box = track.to_tlwh()
 63 |             x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
 64 |             track_label = track.track_label
 65 |             track_id = track.track_id
 66 |             outputs.append(np.array([x1, y1, x2, y2, track_label, track_id]))
 67 |         if len(outputs) > 0:
 68 |             outputs = np.stack(outputs, axis=0)
 69 |         return outputs
 70 | 
 71 |     """
 72 |     TODO:
 73 |         Convert bbox from xc_yc_w_h to xtl_ytl_w_h
 74 |     Thanks JieChen91@github.com for reporting this bug!
 75 |     """
 76 | 
 77 |     @staticmethod
 78 |     def _xywh_to_tlwh(bbox_xywh):
 79 |         # if isinstance(bbox_xywh, np.ndarray):
 80 |         #     bbox_tlwh = bbox_xywh.copy()
 81 |         bbox_tlwh = bbox_xywh.copy()
 82 |         bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
 83 |         bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
 84 |         return bbox_tlwh
 85 | 
 86 |     def _xywh_to_xyxy(self, bbox_xywh):
 87 |         x, y, w, h = bbox_xywh
 88 |         x1 = max(int(x - w / 2), 0)
 89 |         x2 = min(int(x + w / 2), self.width - 1)
 90 |         y1 = max(int(y - h / 2), 0)
 91 |         y2 = min(int(y + h / 2), self.height - 1)
 92 |         return x1, y1, x2, y2
 93 | 
 94 |     def _tlwh_to_xyxy(self, bbox_tlwh):
 95 |         """
 96 |         Convert bbox from xtl_ytl_w_h to xc_yc_w_h
 97 |         Thanks JieChen91@github.com for reporting this bug!
 98 |         """
 99 |         x, y, w, h = bbox_tlwh
100 |         x1 = max(int(x), 0)
101 |         x2 = min(int(x + w), self.width - 1)
102 |         y1 = max(int(y), 0)
103 |         y2 = min(int(y + h), self.height - 1)
104 |         return x1, y1, x2, y2
105 | 
106 |     def _xyxy_to_tlwh(self, bbox_xyxy):
107 |         x1, y1, x2, y2 = bbox_xyxy
108 | 
109 |         t = x1
110 |         l = y1
111 |         w = int(x2 - x1)
112 |         h = int(y2 - y1)
113 |         return t, l, w, h
114 | 
115 |     def _get_features(self, bbox_xywh, ori_img):
116 |         im_crops = []
117 |         for box in bbox_xywh:
118 |             x1, y1, x2, y2 = self._xywh_to_xyxy(box)
119 |             im = ori_img[y1:y2, x1:x2]
120 |             im_crops.append(im)
121 |         if im_crops:
122 |             features = self.extractor.track_extractor(im_crops)
123 |         else:
124 |             features = np.array([])
125 |         return features
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/sort/__init__.py


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 | 
29 |     def __init__(self, tlwh, label, confidence, feature):
30 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
31 |         self.label = label
32 |         self.confidence = float(confidence)
33 |         self.feature = np.asarray(feature, dtype=np.float32)
34 | 
35 |     def to_tlbr(self):
36 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
37 |         `(top left, bottom right)`.
38 |         """
39 |         ret = self.tlwh.copy()
40 |         ret[2:] += ret[:2]
41 |         return ret
42 | 
43 |     def to_xyah(self):
44 |         """Convert bounding box to format `(center x, center y, aspect ratio,
45 |         height)`, where the aspect ratio is `width / height`.
46 |         """
47 |         ret = self.tlwh.copy()
48 |         ret[:2] += ret[2:] / 2
49 |         ret[2] /= ret[3]
50 |         return ret
51 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(self._motion_mat, mean)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 | 
154 |     def update(self, mean, covariance, measurement):
155 |         """Run Kalman filter correction step.
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The predicted state's mean vector (8 dimensional).
161 |         covariance : ndarray
162 |             The state's covariance matrix (8x8 dimensional).
163 |         measurement : ndarray
164 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 |             is the center position, a the aspect ratio, and h the height of the
166 |             bounding box.
167 | 
168 |         Returns
169 |         -------
170 |         (ndarray, ndarray)
171 |             Returns the measurement-corrected state distribution.
172 | 
173 |         """
174 |         projected_mean, projected_cov = self.project(mean, covariance)
175 | 
176 |         chol_factor, lower = scipy.linalg.cho_factor(
177 |             projected_cov, lower=True, check_finite=False)
178 |         kalman_gain = scipy.linalg.cho_solve(
179 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 |             check_finite=False).T
181 |         innovation = measurement - projected_mean
182 | 
183 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
184 |         new_covariance = covariance - np.linalg.multi_dot((
185 |             kalman_gain, projected_cov, kalman_gain.T))
186 |         return new_mean, new_covariance
187 | 
188 |     def gating_distance(self, mean, covariance, measurements,
189 |                         only_position=False):
190 |         """Compute gating distance between state distribution and measurements.
191 | 
192 |         A suitable distance threshold can be obtained from `chi2inv95`. If
193 |         `only_position` is False, the chi-square distribution has 4 degrees of
194 |         freedom, otherwise 2.
195 | 
196 |         Parameters
197 |         ----------
198 |         mean : ndarray
199 |             Mean vector over the state distribution (8 dimensional).
200 |         covariance : ndarray
201 |             Covariance of the state distribution (8x8 dimensional).
202 |         measurements : ndarray
203 |             An Nx4 dimensional matrix of N measurements, each in
204 |             format (x, y, a, h) where (x, y) is the bounding box center
205 |             position, a the aspect ratio, and h the height.
206 |         only_position : Optional[bool]
207 |             If True, distance computation is done with respect to the bounding
208 |             box center position only.
209 | 
210 |         Returns
211 |         -------
212 |         ndarray
213 |             Returns an array of length N, where the i-th element contains the
214 |             squared Mahalanobis distance between (mean, covariance) and
215 |             `measurements[i]`.
216 | 
217 |         """
218 |         mean, covariance = self.project(mean, covariance)
219 |         if only_position:
220 |             mean, covariance = mean[:2], covariance[:2, :2]
221 |             measurements = measurements[:, :2]
222 | 
223 |         cholesky_factor = np.linalg.cholesky(covariance)
224 |         d = measurements - mean
225 |         z = scipy.linalg.solve_triangular(
226 |             cholesky_factor, d.T, lower=True, check_finite=False,
227 |             overwrite_b=True)
228 |         squared_maha = np.sum(z * z, axis=0)
229 |         return squared_maha
230 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | # from sklearn.utils.linear_assignment_ import linear_assignment
  5 | from scipy.optimize import linear_sum_assignment as linear_assignment
  6 | from . import kalman_filter
  7 | 
  8 | 
  9 | INFTY_COST = 1e+5
 10 | 
 11 | 
 12 | def min_cost_matching(
 13 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 14 |         detection_indices=None):
 15 |     """Solve linear assignment problem.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 20 |         The distance metric is given a list of tracks and detections as well as
 21 |         a list of N track indices and M detection indices. The metric should
 22 |         return the NxM dimensional cost matrix, where element (i, j) is the
 23 |         association cost between the i-th track in the given track indices and
 24 |         the j-th detection in the given detection_indices.
 25 |     max_distance : float
 26 |         Gating threshold. Associations with cost larger than this value are
 27 |         disregarded.
 28 |     tracks : List[track.Track]
 29 |         A list of predicted tracks at the current time step.
 30 |     detections : List[detection.Detection]
 31 |         A list of detections at the current time step.
 32 |     track_indices : List[int]
 33 |         List of track indices that maps rows in `cost_matrix` to tracks in
 34 |         `tracks` (see description above).
 35 |     detection_indices : List[int]
 36 |         List of detection indices that maps columns in `cost_matrix` to
 37 |         detections in `detections` (see description above).
 38 | 
 39 |     Returns
 40 |     -------
 41 |     (List[(int, int)], List[int], List[int])
 42 |         Returns a tuple with the following three entries:
 43 |         * A list of matched track and detection indices.
 44 |         * A list of unmatched track indices.
 45 |         * A list of unmatched detection indices.
 46 | 
 47 |     """
 48 |     if track_indices is None:
 49 |         track_indices = np.arange(len(tracks))
 50 |     if detection_indices is None:
 51 |         detection_indices = np.arange(len(detections))
 52 | 
 53 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 54 |         return [], track_indices, detection_indices  # Nothing to match.
 55 | 
 56 |     cost_matrix = distance_metric(
 57 |         tracks, detections, track_indices, detection_indices)
 58 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 59 | 
 60 |     row_indices, col_indices = linear_assignment(cost_matrix)
 61 | 
 62 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 63 |     for col, detection_idx in enumerate(detection_indices):
 64 |         if col not in col_indices:
 65 |             unmatched_detections.append(detection_idx)
 66 |     for row, track_idx in enumerate(track_indices):
 67 |         if row not in row_indices:
 68 |             unmatched_tracks.append(track_idx)
 69 |     for row, col in zip(row_indices, col_indices):
 70 |         track_idx = track_indices[row]
 71 |         detection_idx = detection_indices[col]
 72 |         if cost_matrix[row, col] > max_distance:
 73 |             unmatched_tracks.append(track_idx)
 74 |             unmatched_detections.append(detection_idx)
 75 |         else:
 76 |             matches.append((track_idx, detection_idx))
 77 |     return matches, unmatched_tracks, unmatched_detections
 78 | 
 79 | 
 80 | def matching_cascade(
 81 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 82 |         track_indices=None, detection_indices=None):
 83 |     """Run matching cascade.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 88 |         The distance metric is given a list of tracks and detections as well as
 89 |         a list of N track indices and M detection indices. The metric should
 90 |         return the NxM dimensional cost matrix, where element (i, j) is the
 91 |         association cost between the i-th track in the given track indices and
 92 |         the j-th detection in the given detection indices.
 93 |     max_distance : float
 94 |         Gating threshold. Associations with cost larger than this value are
 95 |         disregarded.
 96 |     cascade_depth: int
 97 |         The cascade depth, should be se to the maximum track age.
 98 |     tracks : List[track.Track]
 99 |         A list of predicted tracks at the current time step.
100 |     detections : List[detection.Detection]
101 |         A list of detections at the current time step.
102 |     track_indices : Optional[List[int]]
103 |         List of track indices that maps rows in `cost_matrix` to tracks in
104 |         `tracks` (see description above). Defaults to all tracks.
105 |     detection_indices : Optional[List[int]]
106 |         List of detection indices that maps columns in `cost_matrix` to
107 |         detections in `detections` (see description above). Defaults to all
108 |         detections.
109 | 
110 |     Returns
111 |     -------
112 |     (List[(int, int)], List[int], List[int])
113 |         Returns a tuple with the following three entries:
114 |         * A list of matched track and detection indices.
115 |         * A list of unmatched track indices.
116 |         * A list of unmatched detection indices.
117 | 
118 |     """
119 |     if track_indices is None:
120 |         track_indices = list(range(len(tracks)))
121 |     if detection_indices is None:
122 |         detection_indices = list(range(len(detections)))
123 | 
124 |     unmatched_detections = detection_indices
125 |     matches = []
126 |     for level in range(cascade_depth):
127 |         if len(unmatched_detections) == 0:  # No detections left
128 |             break
129 | 
130 |         track_indices_l = [
131 |             k for k in track_indices
132 |             if tracks[k].time_since_update == 1 + level
133 |         ]
134 |         if len(track_indices_l) == 0:  # Nothing to match at this level
135 |             continue
136 | 
137 |         matches_l, _, unmatched_detections = \
138 |             min_cost_matching(
139 |                 distance_metric, max_distance, tracks, detections,
140 |                 track_indices_l, unmatched_detections)
141 |         matches += matches_l
142 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143 |     return matches, unmatched_tracks, unmatched_detections
144 | 
145 | 
146 | def gate_cost_matrix(
147 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148 |         gated_cost=INFTY_COST, only_position=False):
149 |     """Invalidate infeasible entries in cost matrix based on the state
150 |     distributions obtained by Kalman filtering.
151 | 
152 |     Parameters
153 |     ----------
154 |     kf : The Kalman filter.
155 |     cost_matrix : ndarray
156 |         The NxM dimensional cost matrix, where N is the number of track indices
157 |         and M is the number of detection indices, such that entry (i, j) is the
158 |         association cost between `tracks[track_indices[i]]` and
159 |         `detections[detection_indices[j]]`.
160 |     tracks : List[track.Track]
161 |         A list of predicted tracks at the current time step.
162 |     detections : List[detection.Detection]
163 |         A list of detections at the current time step.
164 |     track_indices : List[int]
165 |         List of track indices that maps rows in `cost_matrix` to tracks in
166 |         `tracks` (see description above).
167 |     detection_indices : List[int]
168 |         List of detection indices that maps columns in `cost_matrix` to
169 |         detections in `detections` (see description above).
170 |     gated_cost : Optional[float]
171 |         Entries in the cost matrix corresponding to infeasible associations are
172 |         set this value. Defaults to a very large value.
173 |     only_position : Optional[bool]
174 |         If True, only the x, y position of the state distribution is considered
175 |         during gating. Defaults to False.
176 | 
177 |     Returns
178 |     -------
179 |     ndarray
180 |         Returns the modified cost matrix.
181 | 
182 |     """
183 |     gating_dim = 2 if only_position else 4
184 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
185 |     measurements = np.asarray(
186 |         [detections[i].to_xyah() for i in detection_indices])
187 |     for row, track_idx in enumerate(track_indices):
188 |         track = tracks[track_idx]
189 |         gating_distance = kf.gating_distance(
190 |             track.mean, track.covariance, measurements, only_position)
191 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192 |     return cost_matrix
193 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     distances = _cosine_distance(x, y)
 96 |     return distances.min(axis=0)
 97 | 
 98 | 
 99 | class NearestNeighborDistanceMetric(object):
100 |     """
101 |     A nearest neighbor distance metric that, for each target, returns
102 |     the closest distance to any sample that has been observed so far.
103 | 
104 |     Parameters
105 |     ----------
106 |     metric : str
107 |         Either "euclidean" or "cosine".
108 |     matching_threshold: float
109 |         The matching threshold. Samples with larger distance are considered an
110 |         invalid match.
111 |     budget : Optional[int]
112 |         If not None, fix samples per class to at most this number. Removes
113 |         the oldest samples when the budget is reached.
114 | 
115 |     Attributes
116 |     ----------
117 |     samples : Dict[int -> List[ndarray]]
118 |         A dictionary that maps from target identities to the list of samples
119 |         that have been observed so far.
120 | 
121 |     """
122 | 
123 |     def __init__(self, metric, matching_threshold, budget=None):
124 | 
125 | 
126 |         if metric == "euclidean":
127 |             self._metric = _nn_euclidean_distance
128 |         elif metric == "cosine":
129 |             self._metric = _nn_cosine_distance
130 |         else:
131 |             raise ValueError(
132 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
133 |         self.matching_threshold = matching_threshold
134 |         self.budget = budget
135 |         self.samples = {}
136 | 
137 |     def partial_fit(self, features, targets, active_targets):
138 |         """Update the distance metric with new data.
139 | 
140 |         Parameters
141 |         ----------
142 |         features : ndarray
143 |             An NxM matrix of N features of dimensionality M.
144 |         targets : ndarray
145 |             An integer array of associated target identities.
146 |         active_targets : List[int]
147 |             A list of targets that are currently present in the scene.
148 | 
149 |         """
150 |         for feature, target in zip(features, targets):
151 |             self.samples.setdefault(target, []).append(feature)
152 |             if self.budget is not None:
153 |                 self.samples[target] = self.samples[target][-self.budget:]
154 |         self.samples = {k: self.samples[k] for k in active_targets}
155 | 
156 |     def distance(self, features, targets):
157 |         """Compute distance between features and targets.
158 | 
159 |         Parameters
160 |         ----------
161 |         features : ndarray
162 |             An NxM matrix of N features of dimensionality M.
163 |         targets : List[int]
164 |             A list of targets to match the given `features` against.
165 | 
166 |         Returns
167 |         -------
168 |         ndarray
169 |             Returns a cost matrix of shape len(targets), len(features), where
170 |             element (i, j) contains the closest squared distance between
171 |             `targets[i]` and `features[j]`.
172 | 
173 |         """
174 |         cost_matrix = np.zeros((len(targets), len(features)))
175 |         for i, target in enumerate(targets):
176 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
177 |         return cost_matrix
178 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_label, track_id, n_init, max_age,
 67 |                  feature=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_label = track_label
 71 |         self.track_id = track_id
 72 |         self.hits = 1
 73 |         self.age = 1
 74 |         self.time_since_update = 0
 75 | 
 76 |         self.state = TrackState.Tentative
 77 |         self.features = []
 78 |         if feature is not None:
 79 |             self.features.append(feature)
 80 | 
 81 |         self._n_init = n_init
 82 |         self._max_age = max_age
 83 | 
 84 |     def to_tlwh(self):
 85 |         """Get current position in bounding box format `(top left x, top left y,
 86 |         width, height)`.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         ndarray
 91 |             The bounding box.
 92 | 
 93 |         """
 94 |         ret = self.mean[:4].copy()
 95 |         ret[2] *= ret[3]
 96 |         ret[:2] -= ret[2:] / 2
 97 |         return ret
 98 | 
 99 |     def to_tlbr(self):
100 |         """Get current position in bounding box format `(min x, miny, max x,
101 |         max y)`.
102 | 
103 |         Returns
104 |         -------
105 |         ndarray
106 |             The bounding box.
107 | 
108 |         """
109 |         ret = self.to_tlwh()
110 |         ret[2:] = ret[:2] + ret[2:]
111 |         return ret
112 | 
113 |     def predict(self, kf):
114 |         """Propagate the state distribution to the current time step using a
115 |         Kalman filter prediction step.
116 | 
117 |         Parameters
118 |         ----------
119 |         kf : kalman_filter.KalmanFilter
120 |             The Kalman filter.
121 | 
122 |         """
123 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
124 |         self.age += 1
125 |         self.time_since_update += 1
126 | 
127 |     def update(self, kf, detection):
128 |         """Perform Kalman filter measurement update step and update the feature
129 |         cache.
130 | 
131 |         Parameters
132 |         ----------
133 |         kf : kalman_filter.KalmanFilter
134 |             The Kalman filter.
135 |         detection : Detection
136 |             The associated detection.
137 | 
138 |         """
139 |         self.mean, self.covariance = kf.update(
140 |             self.mean, self.covariance, detection.to_xyah())
141 |         self.features.append(detection.feature)
142 | 
143 |         self.hits += 1
144 |         self.time_since_update = 0
145 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
146 |             self.state = TrackState.Confirmed
147 | 
148 |     def mark_missed(self):
149 |         """Mark this track as missed (no association at the current time step).
150 |         """
151 |         if self.state == TrackState.Tentative:
152 |             self.state = TrackState.Deleted
153 |         elif self.time_since_update > self._max_age:
154 |             self.state = TrackState.Deleted
155 | 
156 |     def is_tentative(self):
157 |         """Returns True if this track is tentative (unconfirmed).
158 |         """
159 |         return self.state == TrackState.Tentative
160 | 
161 |     def is_confirmed(self):
162 |         """Returns True if this track is confirmed."""
163 |         return self.state == TrackState.Confirmed
164 | 
165 |     def is_deleted(self):
166 |         """Returns True if this track is dead and should be deleted."""
167 |         return self.state == TrackState.Deleted
168 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def update(self, detections):
 59 |         """Perform measurement update and track management.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         detections : List[deep_sort.detection.Detection]
 64 |             A list of detections at the current time step.
 65 | 
 66 |         """
 67 |         # Run matching cascade.
 68 |         matches, unmatched_tracks, unmatched_detections = \
 69 |             self._match(detections)
 70 | 
 71 |         # Update track set.
 72 |         for track_idx, detection_idx in matches:
 73 |             self.tracks[track_idx].update(
 74 |                 self.kf, detections[detection_idx])
 75 |         for track_idx in unmatched_tracks:
 76 |             self.tracks[track_idx].mark_missed()
 77 |         for detection_idx in unmatched_detections:
 78 |             self._initiate_track(detections[detection_idx])
 79 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 80 | 
 81 |         # Update distance metric.
 82 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 83 |         features, targets = [], []
 84 |         for track in self.tracks:
 85 |             if not track.is_confirmed():
 86 |                 continue
 87 |             features += track.features
 88 |             targets += [track.track_id for _ in track.features]
 89 |             track.features = []
 90 |         self.metric.partial_fit(
 91 |             np.asarray(features), np.asarray(targets), active_targets)
 92 | 
 93 |     def _match(self, detections):
 94 | 
 95 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 96 |             features = np.array([dets[i].feature for i in detection_indices])
 97 |             targets = np.array([tracks[i].track_id for i in track_indices])
 98 |             cost_matrix = self.metric.distance(features, targets)
 99 |             cost_matrix = linear_assignment.gate_cost_matrix(
100 |                 self.kf, cost_matrix, tracks, dets, track_indices,
101 |                 detection_indices)
102 | 
103 |             return cost_matrix
104 | 
105 |         # Split track set into confirmed and unconfirmed tracks.
106 |         confirmed_tracks = [
107 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 |         unconfirmed_tracks = [
109 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 | 
111 |         # Associate confirmed tracks using appearance features.
112 |         matches_a, unmatched_tracks_a, unmatched_detections = \
113 |             linear_assignment.matching_cascade(
114 |                 gated_metric, self.metric.matching_threshold, self.max_age,
115 |                 self.tracks, detections, confirmed_tracks)
116 | 
117 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
118 |         iou_track_candidates = unconfirmed_tracks + [
119 |             k for k in unmatched_tracks_a if
120 |             self.tracks[k].time_since_update == 1]
121 |         unmatched_tracks_a = [
122 |             k for k in unmatched_tracks_a if
123 |             self.tracks[k].time_since_update != 1]
124 |         matches_b, unmatched_tracks_b, unmatched_detections = \
125 |             linear_assignment.min_cost_matching(
126 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 |                 detections, iou_track_candidates, unmatched_detections)
128 | 
129 |         matches = matches_a + matches_b
130 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 |         return matches, unmatched_tracks, unmatched_detections
132 | 
133 |     def _initiate_track(self, detection):
134 |         mean, covariance = self.kf.initiate(detection.to_xyah())
135 |         self.tracks.append(Track(
136 |             mean, covariance, detection.label, self._next_id, self.n_init, self.max_age,
137 |             detection.feature))
138 |         self._next_id += 1
139 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/utils/__init__.py


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/asserts.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | 
 3 | 
 4 | def assert_in(file, files_to_check):
 5 |     if file not in files_to_check:
 6 |         raise AssertionError("{} does not exist in the list".format(str(file)))
 7 |     return True
 8 | 
 9 | 
10 | def assert_in_env(check_list: list):
11 |     for item in check_list:
12 |         assert_in(item, environ.keys())
13 |     return True
14 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 5 | 
 6 | 
 7 | def compute_color_for_labels(label):
 8 |     """
 9 |     Simple function that adds fixed color depending on the class
10 |     """
11 |     color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
12 |     return tuple(color)
13 | 
14 | 
15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)):
16 |     for i,box in enumerate(bbox):
17 |         x1,y1,x2,y2 = [int(i) for i in box]
18 |         x1 += offset[0]
19 |         x2 += offset[0]
20 |         y1 += offset[1]
21 |         y2 += offset[1]
22 |         # box text and bar
23 |         id = int(identities[i]) if identities is not None else 0    
24 |         color = compute_color_for_labels(id)
25 |         label = '{}{:d}'.format("", id)
26 |         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
27 |         cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
28 |         cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 |         cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
30 |     return img
31 | 
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     for i in range(82):
36 |         print(compute_color_for_labels(i))
37 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | from utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 | 
 57 |         # get distance matrix
 58 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 59 | 
 60 |         # acc
 61 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 62 | 
 63 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 64 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 65 |         else:
 66 |             events = None
 67 |         return events
 68 | 
 69 |     def eval_file(self, filename):
 70 |         self.reset_accumulator()
 71 | 
 72 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 73 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 74 |         for frame_id in frames:
 75 |             trk_objs = result_frame_dict.get(frame_id, [])
 76 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 77 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 78 | 
 79 |         return self.acc
 80 | 
 81 |     @staticmethod
 82 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 83 |         names = copy.deepcopy(names)
 84 |         if metrics is None:
 85 |             metrics = mm.metrics.motchallenge_metrics
 86 |         metrics = copy.deepcopy(metrics)
 87 | 
 88 |         mh = mm.metrics.create()
 89 |         summary = mh.compute_many(
 90 |             accs,
 91 |             metrics=metrics,
 92 |             names=names,
 93 |             generate_overall=True
 94 |         )
 95 | 
 96 |         return summary
 97 | 
 98 |     @staticmethod
 99 |     def save_summary(summary, filename):
100 |         import pandas as pd
101 |         writer = pd.ExcelWriter(filename)
102 |         summary.to_excel(writer)
103 |         writer.save()
104 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | # from utils.log import get_logger
  6 | 
  7 | 
  8 | def write_results(filename, results, data_type):
  9 |     if data_type == 'mot':
 10 |         save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
 11 |     elif data_type == 'kitti':
 12 |         save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
 13 |     else:
 14 |         raise ValueError(data_type)
 15 | 
 16 |     with open(filename, 'w') as f:
 17 |         for frame_id, tlwhs, track_ids in results:
 18 |             if data_type == 'kitti':
 19 |                 frame_id -= 1
 20 |             for tlwh, track_id in zip(tlwhs, track_ids):
 21 |                 if track_id < 0:
 22 |                     continue
 23 |                 x1, y1, w, h = tlwh
 24 |                 x2, y2 = x1 + w, y1 + h
 25 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
 26 |                 f.write(line)
 27 | 
 28 | 
 29 | # def write_results(filename, results_dict: Dict, data_type: str):
 30 | #     if not filename:
 31 | #         return
 32 | #     path = os.path.dirname(filename)
 33 | #     if not os.path.exists(path):
 34 | #         os.makedirs(path)
 35 | 
 36 | #     if data_type in ('mot', 'mcmot', 'lab'):
 37 | #         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 38 | #     elif data_type == 'kitti':
 39 | #         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 40 | #     else:
 41 | #         raise ValueError(data_type)
 42 | 
 43 | #     with open(filename, 'w') as f:
 44 | #         for frame_id, frame_data in results_dict.items():
 45 | #             if data_type == 'kitti':
 46 | #                 frame_id -= 1
 47 | #             for tlwh, track_id in frame_data:
 48 | #                 if track_id < 0:
 49 | #                     continue
 50 | #                 x1, y1, w, h = tlwh
 51 | #                 x2, y2 = x1 + w, y1 + h
 52 | #                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 53 | #                 f.write(line)
 54 | #     logger.info('Save results to {}'.format(filename))
 55 | 
 56 | 
 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 58 |     if data_type in ('mot', 'lab'):
 59 |         read_fun = read_mot_results
 60 |     else:
 61 |         raise ValueError('Unknown data type: {}'.format(data_type))
 62 | 
 63 |     return read_fun(filename, is_gt, is_ignore)
 64 | 
 65 | 
 66 | """
 67 | labels={'ped', ...			% 1
 68 | 'person_on_vhcl', ...	% 2
 69 | 'car', ...				% 3
 70 | 'bicycle', ...			% 4
 71 | 'mbike', ...			% 5
 72 | 'non_mot_vhcl', ...		% 6
 73 | 'static_person', ...	% 7
 74 | 'distractor', ...		% 8
 75 | 'occluder', ...			% 9
 76 | 'occluder_on_grnd', ...		%10
 77 | 'occluder_full', ...		% 11
 78 | 'reflection', ...		% 12
 79 | 'crowd' ...			% 13
 80 | };
 81 | """
 82 | 
 83 | 
 84 | def read_mot_results(filename, is_gt, is_ignore):
 85 |     valid_labels = {1}
 86 |     ignore_labels = {2, 7, 8, 12}
 87 |     results_dict = dict()
 88 |     if os.path.isfile(filename):
 89 |         with open(filename, 'r') as f:
 90 |             for line in f.readlines():
 91 |                 linelist = line.split(',')
 92 |                 if len(linelist) < 7:
 93 |                     continue
 94 |                 fid = int(linelist[0])
 95 |                 if fid < 1:
 96 |                     continue
 97 |                 results_dict.setdefault(fid, list())
 98 | 
 99 |                 if is_gt:
100 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
101 |                         label = int(float(linelist[7]))
102 |                         mark = int(float(linelist[6]))
103 |                         if mark == 0 or label not in valid_labels:
104 |                             continue
105 |                     score = 1
106 |                 elif is_ignore:
107 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
108 |                         label = int(float(linelist[7]))
109 |                         vis_ratio = float(linelist[8])
110 |                         if label not in ignore_labels and vis_ratio >= 0:
111 |                             continue
112 |                     else:
113 |                         continue
114 |                     score = 1
115 |                 else:
116 |                     score = float(linelist[6])
117 | 
118 |                 tlwh = tuple(map(float, linelist[2:6]))
119 |                 target_id = int(linelist[1])
120 | 
121 |                 results_dict[fid].append((tlwh, target_id, score))
122 | 
123 |     return results_dict
124 | 
125 | 
126 | def unzip_objs(objs):
127 |     if len(objs) > 0:
128 |         tlwhs, ids, scores = zip(*objs)
129 |     else:
130 |         tlwhs, ids, scores = [], [], []
131 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
132 | 
133 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/json_logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | References:
  3 |     https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
  4 | """
  5 | import json
  6 | from os import makedirs
  7 | from os.path import exists, join
  8 | from datetime import datetime
  9 | 
 10 | 
 11 | class JsonMeta(object):
 12 |     HOURS = 3
 13 |     MINUTES = 59
 14 |     SECONDS = 59
 15 |     PATH_TO_SAVE = 'LOGS'
 16 |     DEFAULT_FILE_NAME = 'remaining'
 17 | 
 18 | 
 19 | class BaseJsonLogger(object):
 20 |     """
 21 |     This is the base class that returns __dict__ of its own
 22 |     it also returns the dicts of objects in the attributes that are list instances
 23 | 
 24 |     """
 25 | 
 26 |     def dic(self):
 27 |         # returns dicts of objects
 28 |         out = {}
 29 |         for k, v in self.__dict__.items():
 30 |             if hasattr(v, 'dic'):
 31 |                 out[k] = v.dic()
 32 |             elif isinstance(v, list):
 33 |                 out[k] = self.list(v)
 34 |             else:
 35 |                 out[k] = v
 36 |         return out
 37 | 
 38 |     @staticmethod
 39 |     def list(values):
 40 |         # applies the dic method on items in the list
 41 |         return [v.dic() if hasattr(v, 'dic') else v for v in values]
 42 | 
 43 | 
 44 | class Label(BaseJsonLogger):
 45 |     """
 46 |     For each bounding box there are various categories with confidences. Label class keeps track of that information.
 47 |     """
 48 | 
 49 |     def __init__(self, category: str, confidence: float):
 50 |         self.category = category
 51 |         self.confidence = confidence
 52 | 
 53 | 
 54 | class Bbox(BaseJsonLogger):
 55 |     """
 56 |     This module stores the information for each frame and use them in JsonParser
 57 |     Attributes:
 58 |         labels (list): List of label module.
 59 |         top (int):
 60 |         left (int):
 61 |         width (int):
 62 |         height (int):
 63 | 
 64 |     Args:
 65 |         bbox_id (float):
 66 |         top (int):
 67 |         left (int):
 68 |         width (int):
 69 |         height (int):
 70 | 
 71 |     References:
 72 |         Check Label module for better understanding.
 73 | 
 74 | 
 75 |     """
 76 | 
 77 |     def __init__(self, bbox_id, top, left, width, height):
 78 |         self.labels = []
 79 |         self.bbox_id = bbox_id
 80 |         self.top = top
 81 |         self.left = left
 82 |         self.width = width
 83 |         self.height = height
 84 | 
 85 |     def add_label(self, category, confidence):
 86 |         # adds category and confidence only if top_k is not exceeded.
 87 |         self.labels.append(Label(category, confidence))
 88 | 
 89 |     def labels_full(self, value):
 90 |         return len(self.labels) == value
 91 | 
 92 | 
 93 | class Frame(BaseJsonLogger):
 94 |     """
 95 |     This module stores the information for each frame and use them in JsonParser
 96 |     Attributes:
 97 |         timestamp (float): The elapsed time of captured frame
 98 |         frame_id (int): The frame number of the captured video
 99 |         bboxes (list of Bbox objects): Stores the list of bbox objects.
100 | 
101 |     References:
102 |         Check Bbox class for better information
103 | 
104 |     Args:
105 |         timestamp (float):
106 |         frame_id (int):
107 | 
108 |     """
109 | 
110 |     def __init__(self, frame_id: int, timestamp: float = None):
111 |         self.frame_id = frame_id
112 |         self.timestamp = timestamp
113 |         self.bboxes = []
114 | 
115 |     def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
116 |         bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
117 |         if bbox_id not in bboxes_ids:
118 |             self.bboxes.append(Bbox(bbox_id, top, left, width, height))
119 |         else:
120 |             raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
121 | 
122 |     def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
123 |         bboxes = {bbox.id: bbox for bbox in self.bboxes}
124 |         if bbox_id in bboxes.keys():
125 |             res = bboxes.get(bbox_id)
126 |             res.add_label(category, confidence)
127 |         else:
128 |             raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
129 | 
130 | 
131 | class BboxToJsonLogger(BaseJsonLogger):
132 |     """
133 |     ُ This module is designed to automate the task of logging jsons. An example json is used
134 |     to show the contents of json file shortly
135 |     Example:
136 |           {
137 |           "video_details": {
138 |             "frame_width": 1920,
139 |             "frame_height": 1080,
140 |             "frame_rate": 20,
141 |             "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
142 |           },
143 |           "frames": [
144 |             {
145 |               "frame_id": 329,
146 |               "timestamp": 3365.1254
147 |               "bboxes": [
148 |                 {
149 |                   "labels": [
150 |                     {
151 |                       "category": "pedestrian",
152 |                       "confidence": 0.9
153 |                     }
154 |                   ],
155 |                   "bbox_id": 0,
156 |                   "top": 1257,
157 |                   "left": 138,
158 |                   "width": 68,
159 |                   "height": 109
160 |                 }
161 |               ]
162 |             }],
163 | 
164 |     Attributes:
165 |         frames (dict): It's a dictionary that maps each frame_id to json attributes.
166 |         video_details (dict): information about video file.
167 |         top_k_labels (int): shows the allowed number of labels
168 |         start_time (datetime object): we use it to automate the json output by time.
169 | 
170 |     Args:
171 |         top_k_labels (int): shows the allowed number of labels
172 | 
173 |     """
174 | 
175 |     def __init__(self, top_k_labels: int = 1):
176 |         self.frames = {}
177 |         self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
178 |                                                        video_name=None)
179 |         self.top_k_labels = top_k_labels
180 |         self.start_time = datetime.now()
181 | 
182 |     def set_top_k(self, value):
183 |         self.top_k_labels = value
184 | 
185 |     def frame_exists(self, frame_id: int) -> bool:
186 |         """
187 |         Args:
188 |             frame_id (int):
189 | 
190 |         Returns:
191 |             bool: true if frame_id is recognized
192 |         """
193 |         return frame_id in self.frames.keys()
194 | 
195 |     def add_frame(self, frame_id: int, timestamp: float = None) -> None:
196 |         """
197 |         Args:
198 |             frame_id (int):
199 |             timestamp (float): opencv captured frame time property
200 | 
201 |         Raises:
202 |              ValueError: if frame_id would not exist in class frames attribute
203 | 
204 |         Returns:
205 |             None
206 | 
207 |         """
208 |         if not self.frame_exists(frame_id):
209 |             self.frames[frame_id] = Frame(frame_id, timestamp)
210 |         else:
211 |             raise ValueError("Frame id: {} already exists".format(frame_id))
212 | 
213 |     def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
214 |         """
215 |         Args:
216 |             frame_id:
217 |             bbox_id:
218 | 
219 |         Returns:
220 |             bool: if bbox exists in frame bboxes list
221 |         """
222 |         bboxes = []
223 |         if self.frame_exists(frame_id=frame_id):
224 |             bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
225 |         return bbox_id in bboxes
226 | 
227 |     def find_bbox(self, frame_id: int, bbox_id: int):
228 |         """
229 | 
230 |         Args:
231 |             frame_id:
232 |             bbox_id:
233 | 
234 |         Returns:
235 |             bbox_id (int):
236 | 
237 |         Raises:
238 |             ValueError: if bbox_id does not exist in the bbox list of specific frame.
239 |         """
240 |         if not self.bbox_exists(frame_id, bbox_id):
241 |             raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
242 |         bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
243 |         return bboxes.get(bbox_id)
244 | 
245 |     def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
246 |         """
247 | 
248 |         Args:
249 |             frame_id (int):
250 |             bbox_id (int):
251 |             top (int):
252 |             left (int):
253 |             width (int):
254 |             height (int):
255 | 
256 |         Returns:
257 |             None
258 | 
259 |         Raises:
260 |             ValueError: if bbox_id already exist in frame information with frame_id
261 |             ValueError: if frame_id does not exist in frames attribute
262 |         """
263 |         if self.frame_exists(frame_id):
264 |             frame = self.frames[frame_id]
265 |             if not self.bbox_exists(frame_id, bbox_id):
266 |                 frame.add_bbox(bbox_id, top, left, width, height)
267 |             else:
268 |                 raise ValueError(
269 |                     "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
270 |         else:
271 |             raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
272 | 
273 |     def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
274 |         """
275 |         Args:
276 |             frame_id:
277 |             bbox_id:
278 |             category:
279 |             confidence: the confidence value returned from yolo detection
280 | 
281 |         Returns:
282 |             None
283 | 
284 |         Raises:
285 |             ValueError: if labels quota (top_k_labels) exceeds.
286 |         """
287 |         bbox = self.find_bbox(frame_id, bbox_id)
288 |         if not bbox.labels_full(self.top_k_labels):
289 |             bbox.add_label(category, confidence)
290 |         else:
291 |             raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
292 | 
293 |     def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
294 |                           video_name: str = None):
295 |         self.video_details['frame_width'] = frame_width
296 |         self.video_details['frame_height'] = frame_height
297 |         self.video_details['frame_rate'] = frame_rate
298 |         self.video_details['video_name'] = video_name
299 | 
300 |     def output(self):
301 |         output = {'video_details': self.video_details}
302 |         result = list(self.frames.values())
303 |         output['frames'] = [item.dic() for item in result]
304 |         return output
305 | 
306 |     def json_output(self, output_name):
307 |         """
308 |         Args:
309 |             output_name:
310 | 
311 |         Returns:
312 |             None
313 | 
314 |         Notes:
315 |             It creates the json output with `output_name` name.
316 |         """
317 |         if not output_name.endswith('.json'):
318 |             output_name += '.json'
319 |         with open(output_name, 'w') as file:
320 |             json.dump(self.output(), file)
321 |         file.close()
322 | 
323 |     def set_start(self):
324 |         self.start_time = datetime.now()
325 | 
326 |     def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
327 |                                 seconds: int = 60) -> None:
328 |         """
329 |         Notes:
330 |             Creates folder and then periodically stores the jsons on that address.
331 | 
332 |         Args:
333 |             output_dir (str): the directory where output files will be stored
334 |             hours (int):
335 |             minutes (int):
336 |             seconds (int):
337 | 
338 |         Returns:
339 |             None
340 | 
341 |         """
342 |         end = datetime.now()
343 |         interval = 0
344 |         interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
345 |         interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
346 |         interval += abs(min([seconds, JsonMeta.SECONDS]))
347 |         diff = (end - self.start_time).seconds
348 | 
349 |         if diff > interval:
350 |             output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
351 |             if not exists(output_dir):
352 |                 makedirs(output_dir)
353 |             output = join(output_dir, output_name)
354 |             self.json_output(output_name=output)
355 |             self.frames = {}
356 |             self.start_time = datetime.now()
357 | 
358 |     def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
359 |         """
360 |         saves as the number of frames quota increases higher.
361 |         :param frames_quota:
362 |         :param frame_counter:
363 |         :param output_dir:
364 |         :return:
365 |         """
366 |         pass
367 | 
368 |     def flush(self, output_dir):
369 |         """
370 |         Notes:
371 |             We use this function to output jsons whenever possible.
372 |             like the time that we exit the while loop of opencv.
373 | 
374 |         Args:
375 |             output_dir:
376 | 
377 |         Returns:
378 |             None
379 | 
380 |         """
381 |         filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
382 |         output = join(output_dir, filename)
383 |         self.json_output(output_name=output)
384 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.INFO)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/parser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from easydict import EasyDict as edict
 4 | 
 5 | class YamlParser(edict):
 6 |     """
 7 |     This is yaml parser based on EasyDict.
 8 |     """
 9 |     def __init__(self, cfg_dict=None, config_file=None):
10 |         if cfg_dict is None:
11 |             cfg_dict = {}
12 | 
13 |         if config_file is not None:
14 |             assert(os.path.isfile(config_file))
15 |             with open(config_file, 'r') as fo:
16 |                 cfg_dict.update(yaml.load(fo.read()))
17 | 
18 |         super(YamlParser, self).__init__(cfg_dict)
19 | 
20 |     
21 |     def merge_from_file(self, config_file):
22 |         with open(config_file, 'r') as fo:
23 |             self.update(yaml.load(fo.read(), Loader=yaml.FullLoader))
24 | 
25 |     
26 |     def merge_from_dict(self, config_dict):
27 |         self.update(config_dict)
28 | 
29 | 
30 | def get_config(config_file=None):
31 |     return YamlParser(config_file=config_file)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     cfg = YamlParser(config_file="../configs/yolov3.yaml")
36 |     cfg.merge_from_file("../configs/deep_sort.yaml")
37 | 
38 |     import ipdb; ipdb.set_trace()


--------------------------------------------------------------------------------
/srcs/deep_sort/utils/tools.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from time import time
 3 | 
 4 | 
 5 | def is_video(ext: str):
 6 |     """
 7 |     Returns true if ext exists in
 8 |     allowed_exts for video files.
 9 | 
10 |     Args:
11 |         ext:
12 | 
13 |     Returns:
14 | 
15 |     """
16 | 
17 |     allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
18 |     return any((ext.endswith(x) for x in allowed_exts))
19 | 
20 | 
21 | def tik_tok(func):
22 |     """
23 |     keep track of time for each process.
24 |     Args:
25 |         func:
26 | 
27 |     Returns:
28 | 
29 |     """
30 |     @wraps(func)
31 |     def _time_it(*args, **kwargs):
32 |         start = time()
33 |         try:
34 |             return func(*args, **kwargs)
35 |         finally:
36 |             end_ = time()
37 |             print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
38 | 
39 |     return _time_it
40 | 


--------------------------------------------------------------------------------
/srcs/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .engine import EngineBuilder, TRTModule, TRTProfilerV0, TRTProfilerV1  # isort:skip # noqa: E501
 2 | import warnings
 3 | 
 4 | import torch
 5 | 
 6 | warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning)
 7 | warnings.filterwarnings(action='ignore', category=torch.jit.ScriptWarning)
 8 | warnings.filterwarnings(action='ignore', category=UserWarning)
 9 | warnings.filterwarnings(action='ignore', category=FutureWarning)
10 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
11 | __all__ = ['EngineBuilder', 'TRTModule', 'TRTProfilerV0', 'TRTProfilerV1']
12 | 


--------------------------------------------------------------------------------
/srcs/models/api.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import List, OrderedDict, Tuple, Union
  3 | 
  4 | import numpy as np
  5 | import tensorrt as trt
  6 | 
  7 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
  8 | 
  9 | 
 10 | def trtweight(weights: np.ndarray) -> trt.Weights:
 11 |     weights = weights.astype(weights.dtype.name)
 12 |     return trt.Weights(weights)
 13 | 
 14 | 
 15 | def get_width(x: int, gw: float, divisor: int = 8) -> int:
 16 |     return int(np.ceil(x * gw / divisor) * divisor)
 17 | 
 18 | 
 19 | def get_depth(x: int, gd: float) -> int:
 20 |     return max(int(round(x * gd)), 1)
 21 | 
 22 | 
 23 | def Conv2d(network: trt.INetworkDefinition, weights: OrderedDict,
 24 |            input: trt.ITensor, out_channel: int, ksize: int, stride: int,
 25 |            group: int, layer_name: str) -> trt.ILayer:
 26 |     padding = ksize // 2
 27 |     conv_w = trtweight(weights[layer_name + '.weight'])
 28 |     conv_b = trtweight(weights[layer_name + '.bias'])
 29 |     conv = network.add_convolution_nd(input,
 30 |                                       num_output_maps=out_channel,
 31 |                                       kernel_shape=trt.DimsHW(ksize, ksize),
 32 |                                       kernel=conv_w,
 33 |                                       bias=conv_b)
 34 |     assert conv, 'Add convolution_nd layer failed'
 35 |     conv.stride_nd = trt.DimsHW(stride, stride)
 36 |     conv.padding_nd = trt.DimsHW(padding, padding)
 37 |     conv.num_groups = group
 38 |     return conv
 39 | 
 40 | 
 41 | def Conv(network: trt.INetworkDefinition, weights: OrderedDict,
 42 |          input: trt.ITensor, out_channel: int, ksize: int, stride: int,
 43 |          group: int, layer_name: str) -> trt.ILayer:
 44 |     padding = ksize // 2
 45 |     if ksize > 3:
 46 |         padding -= 1
 47 |     conv_w = trtweight(weights[layer_name + '.conv.weight'])
 48 |     conv_b = trtweight(weights[layer_name + '.conv.bias'])
 49 | 
 50 |     conv = network.add_convolution_nd(input,
 51 |                                       num_output_maps=out_channel,
 52 |                                       kernel_shape=trt.DimsHW(ksize, ksize),
 53 |                                       kernel=conv_w,
 54 |                                       bias=conv_b)
 55 |     assert conv, 'Add convolution_nd layer failed'
 56 |     conv.stride_nd = trt.DimsHW(stride, stride)
 57 |     conv.padding_nd = trt.DimsHW(padding, padding)
 58 |     conv.num_groups = group
 59 | 
 60 |     sigmoid = network.add_activation(conv.get_output(0),
 61 |                                      trt.ActivationType.SIGMOID)
 62 |     assert sigmoid, 'Add activation layer failed'
 63 |     dot_product = network.add_elementwise(conv.get_output(0),
 64 |                                           sigmoid.get_output(0),
 65 |                                           trt.ElementWiseOperation.PROD)
 66 |     assert dot_product, 'Add elementwise layer failed'
 67 |     return dot_product
 68 | 
 69 | 
 70 | def Bottleneck(network: trt.INetworkDefinition, weights: OrderedDict,
 71 |                input: trt.ITensor, c1: int, c2: int, shortcut: bool,
 72 |                group: int, scale: float, layer_name: str) -> trt.ILayer:
 73 |     c_ = int(c2 * scale)
 74 |     conv1 = Conv(network, weights, input, c_, 3, 1, 1, layer_name + '.cv1')
 75 |     conv2 = Conv(network, weights, conv1.get_output(0), c2, 3, 1, group,
 76 |                  layer_name + '.cv2')
 77 |     if shortcut and c1 == c2:
 78 |         ew = network.add_elementwise(input,
 79 |                                      conv2.get_output(0),
 80 |                                      op=trt.ElementWiseOperation.SUM)
 81 |         assert ew, 'Add elementwise layer failed'
 82 |         return ew
 83 |     return conv2
 84 | 
 85 | 
 86 | def C2f(network: trt.INetworkDefinition, weights: OrderedDict,
 87 |         input: trt.ITensor, cout: int, n: int, shortcut: bool, group: int,
 88 |         scale: float, layer_name: str) -> trt.ILayer:
 89 |     c_ = int(cout * scale)  # e:expand param
 90 |     conv1 = Conv(network, weights, input, 2 * c_, 1, 1, 1, layer_name + '.cv1')
 91 |     y1 = conv1.get_output(0)
 92 | 
 93 |     b, _, h, w = y1.shape
 94 |     slice = network.add_slice(y1, (0, c_, 0, 0), (b, c_, h, w), (1, 1, 1, 1))
 95 |     assert slice, 'Add slice layer failed'
 96 |     y2 = slice.get_output(0)
 97 | 
 98 |     input_tensors = [y1]
 99 |     for i in range(n):
100 |         b = Bottleneck(network, weights, y2, c_, c_, shortcut, group, 1.0,
101 |                        layer_name + '.m.' + str(i))
102 |         y2 = b.get_output(0)
103 |         input_tensors.append(y2)
104 | 
105 |     cat = network.add_concatenation(input_tensors)
106 |     assert cat, 'Add concatenation layer failed'
107 | 
108 |     conv2 = Conv(network, weights, cat.get_output(0), cout, 1, 1, 1,
109 |                  layer_name + '.cv2')
110 |     return conv2
111 | 
112 | 
113 | def SPPF(network: trt.INetworkDefinition, weights: OrderedDict,
114 |          input: trt.ITensor, c1: int, c2: int, ksize: int,
115 |          layer_name: str) -> trt.ILayer:
116 |     c_ = c1 // 2
117 |     conv1 = Conv(network, weights, input, c_, 1, 1, 1, layer_name + '.cv1')
118 | 
119 |     pool1 = network.add_pooling_nd(conv1.get_output(0), trt.PoolingType.MAX,
120 |                                    trt.DimsHW(ksize, ksize))
121 |     assert pool1, 'Add pooling_nd layer failed'
122 |     pool1.padding_nd = trt.DimsHW(ksize // 2, ksize // 2)
123 |     pool1.stride_nd = trt.DimsHW(1, 1)
124 | 
125 |     pool2 = network.add_pooling_nd(pool1.get_output(0), trt.PoolingType.MAX,
126 |                                    trt.DimsHW(ksize, ksize))
127 |     assert pool2, 'Add pooling_nd layer failed'
128 |     pool2.padding_nd = trt.DimsHW(ksize // 2, ksize // 2)
129 |     pool2.stride_nd = trt.DimsHW(1, 1)
130 | 
131 |     pool3 = network.add_pooling_nd(pool2.get_output(0), trt.PoolingType.MAX,
132 |                                    trt.DimsHW(ksize, ksize))
133 |     assert pool3, 'Add pooling_nd layer failed'
134 |     pool3.padding_nd = trt.DimsHW(ksize // 2, ksize // 2)
135 |     pool3.stride_nd = trt.DimsHW(1, 1)
136 | 
137 |     input_tensors = [
138 |         conv1.get_output(0),
139 |         pool1.get_output(0),
140 |         pool2.get_output(0),
141 |         pool3.get_output(0)
142 |     ]
143 |     cat = network.add_concatenation(input_tensors)
144 |     assert cat, 'Add concatenation layer failed'
145 |     conv2 = Conv(network, weights, cat.get_output(0), c2, 1, 1, 1,
146 |                  layer_name + '.cv2')
147 |     return conv2
148 | 
149 | 
150 | def Detect(
151 |     network: trt.INetworkDefinition,
152 |     weights: OrderedDict,
153 |     input: Union[List, Tuple],
154 |     s: Union[List, Tuple],
155 |     layer_name: str,
156 |     reg_max: int = 16,
157 |     fp16: bool = True,
158 |     iou: float = 0.65,
159 |     conf: float = 0.25,
160 |     topk: int = 100,
161 | ) -> trt.ILayer:
162 |     bboxes_branch = []
163 |     scores_branch = []
164 |     anchors = []
165 |     strides = []
166 |     for i, (inp, stride) in enumerate(zip(input, s)):
167 |         h, w = inp.shape[2:]
168 |         sx = np.arange(0, w).astype(np.float16 if fp16 else np.float32) + 0.5
169 |         sy = np.arange(0, h).astype(np.float16 if fp16 else np.float32) + 0.5
170 |         sy, sx = np.meshgrid(sy, sx)
171 |         a = np.ascontiguousarray(np.stack((sy, sx), -1).reshape(-1, 2))
172 |         anchors.append(a)
173 |         strides.append(
174 |             np.full((1, h * w),
175 |                     stride,
176 |                     dtype=np.float16 if fp16 else np.float32))
177 |         c2 = weights[f'{layer_name}.cv2.{i}.0.conv.weight'].shape[0]
178 |         c3 = weights[f'{layer_name}.cv3.{i}.0.conv.weight'].shape[0]
179 |         nc = weights[f'{layer_name}.cv3.0.2.weight'].shape[0]
180 |         reg_max_x4 = weights[layer_name + f'.cv2.{i}.2.weight'].shape[0]
181 |         assert reg_max_x4 == reg_max * 4
182 |         b_Conv_0 = Conv(network, weights, inp, c2, 3, 1, 1,
183 |                         layer_name + f'.cv2.{i}.0')
184 |         b_Conv_1 = Conv(network, weights, b_Conv_0.get_output(0), c2, 3, 1, 1,
185 |                         layer_name + f'.cv2.{i}.1')
186 |         b_Conv_2 = Conv2d(network, weights, b_Conv_1.get_output(0), reg_max_x4,
187 |                           1, 1, 1, layer_name + f'.cv2.{i}.2')
188 | 
189 |         b_out = b_Conv_2.get_output(0)
190 |         b_shape = network.add_constant([
191 |             4,
192 |         ], np.array(b_out.shape[0:1] + (4, reg_max, -1), dtype=np.int32))
193 |         assert b_shape, 'Add constant layer failed'
194 |         b_shuffle = network.add_shuffle(b_out)
195 |         assert b_shuffle, 'Add shuffle layer failed'
196 |         b_shuffle.set_input(1, b_shape.get_output(0))
197 |         b_shuffle.second_transpose = (0, 3, 1, 2)
198 | 
199 |         bboxes_branch.append(b_shuffle.get_output(0))
200 | 
201 |         s_Conv_0 = Conv(network, weights, inp, c3, 3, 1, 1,
202 |                         layer_name + f'.cv3.{i}.0')
203 |         s_Conv_1 = Conv(network, weights, s_Conv_0.get_output(0), c3, 3, 1, 1,
204 |                         layer_name + f'.cv3.{i}.1')
205 |         s_Conv_2 = Conv2d(network, weights, s_Conv_1.get_output(0), nc, 1, 1,
206 |                           1, layer_name + f'.cv3.{i}.2')
207 |         s_out = s_Conv_2.get_output(0)
208 |         s_shape = network.add_constant([
209 |             3,
210 |         ], np.array(s_out.shape[0:2] + (-1, ), dtype=np.int32))
211 |         assert s_shape, 'Add constant layer failed'
212 |         s_shuffle = network.add_shuffle(s_out)
213 |         assert s_shuffle, 'Add shuffle layer failed'
214 |         s_shuffle.set_input(1, s_shape.get_output(0))
215 |         s_shuffle.second_transpose = (0, 2, 1)
216 | 
217 |         scores_branch.append(s_shuffle.get_output(0))
218 | 
219 |     Cat_bboxes = network.add_concatenation(bboxes_branch)
220 |     assert Cat_bboxes, 'Add concatenation layer failed'
221 |     Cat_scores = network.add_concatenation(scores_branch)
222 |     assert Cat_scores, 'Add concatenation layer failed'
223 |     Cat_scores.axis = 1
224 | 
225 |     Softmax = network.add_softmax(Cat_bboxes.get_output(0))
226 |     assert Softmax, 'Add softmax layer failed'
227 |     Softmax.axes = 1 << 3
228 | 
229 |     SCORES = network.add_activation(Cat_scores.get_output(0),
230 |                                     trt.ActivationType.SIGMOID)
231 |     assert SCORES, 'Add activation layer failed'
232 | 
233 |     reg_max = np.arange(
234 |         0, reg_max).astype(np.float16 if fp16 else np.float32).reshape(
235 |             (1, 1, -1, 1))
236 |     constant = network.add_constant(reg_max.shape, reg_max)
237 |     assert constant, 'Add constant layer failed'
238 |     Matmul = network.add_matrix_multiply(Softmax.get_output(0),
239 |                                          trt.MatrixOperation.NONE,
240 |                                          constant.get_output(0),
241 |                                          trt.MatrixOperation.NONE)
242 |     assert Matmul, 'Add matrix_multiply layer failed'
243 |     pre_bboxes = network.add_gather(
244 |         Matmul.get_output(0),
245 |         network.add_constant([
246 |             1,
247 |         ], np.array([0], dtype=np.int32)).get_output(0), 3)
248 |     assert pre_bboxes, 'Add gather layer failed'
249 |     pre_bboxes.num_elementwise_dims = 1
250 | 
251 |     pre_bboxes_tensor = pre_bboxes.get_output(0)
252 |     b, c, _ = pre_bboxes_tensor.shape
253 |     slice_x1y1 = network.add_slice(pre_bboxes_tensor, (0, 0, 0), (b, c, 2),
254 |                                    (1, 1, 1))
255 |     assert slice_x1y1, 'Add slice layer failed'
256 |     slice_x2y2 = network.add_slice(pre_bboxes_tensor, (0, 0, 2), (b, c, 2),
257 |                                    (1, 1, 1))
258 |     assert slice_x2y2, 'Add slice layer failed'
259 |     anchors = np.concatenate(anchors, 0)[np.newaxis]
260 |     anchors = network.add_constant(anchors.shape, anchors)
261 |     assert anchors, 'Add constant layer failed'
262 |     strides = np.concatenate(strides, 1)[..., np.newaxis]
263 |     strides = network.add_constant(strides.shape, strides)
264 |     assert strides, 'Add constant layer failed'
265 | 
266 |     Sub = network.add_elementwise(anchors.get_output(0),
267 |                                   slice_x1y1.get_output(0),
268 |                                   trt.ElementWiseOperation.SUB)
269 |     assert Sub, 'Add elementwise layer failed'
270 |     Add = network.add_elementwise(anchors.get_output(0),
271 |                                   slice_x2y2.get_output(0),
272 |                                   trt.ElementWiseOperation.SUM)
273 |     assert Add, 'Add elementwise layer failed'
274 |     x1y1 = Sub.get_output(0)
275 |     x2y2 = Add.get_output(0)
276 | 
277 |     Cat_bboxes_ = network.add_concatenation([x1y1, x2y2])
278 |     assert Cat_bboxes_, 'Add concatenation layer failed'
279 |     Cat_bboxes_.axis = 2
280 | 
281 |     BBOXES = network.add_elementwise(Cat_bboxes_.get_output(0),
282 |                                      strides.get_output(0),
283 |                                      trt.ElementWiseOperation.PROD)
284 |     assert BBOXES, 'Add elementwise layer failed'
285 |     plugin_creator = trt.get_plugin_registry().get_plugin_creator(
286 |         'EfficientNMS_TRT', '1')
287 |     assert plugin_creator, 'Plugin EfficientNMS_TRT is not registried'
288 | 
289 |     background_class = trt.PluginField('background_class',
290 |                                        np.array(-1, np.int32),
291 |                                        trt.PluginFieldType.INT32)
292 |     box_coding = trt.PluginField('box_coding', np.array(0, np.int32),
293 |                                  trt.PluginFieldType.INT32)
294 |     iou_threshold = trt.PluginField('iou_threshold',
295 |                                     np.array(iou, dtype=np.float32),
296 |                                     trt.PluginFieldType.FLOAT32)
297 |     max_output_boxes = trt.PluginField('max_output_boxes',
298 |                                        np.array(topk, np.int32),
299 |                                        trt.PluginFieldType.INT32)
300 |     plugin_version = trt.PluginField('plugin_version', np.array('1'),
301 |                                      trt.PluginFieldType.CHAR)
302 |     score_activation = trt.PluginField('score_activation',
303 |                                        np.array(0, np.int32),
304 |                                        trt.PluginFieldType.INT32)
305 |     score_threshold = trt.PluginField('score_threshold',
306 |                                       np.array(conf, dtype=np.float32),
307 |                                       trt.PluginFieldType.FLOAT32)
308 | 
309 |     batched_nms_op = plugin_creator.create_plugin(
310 |         name='batched_nms',
311 |         field_collection=trt.PluginFieldCollection([
312 |             background_class, box_coding, iou_threshold, max_output_boxes,
313 |             plugin_version, score_activation, score_threshold
314 |         ]))
315 | 
316 |     batched_nms = network.add_plugin_v2(
317 |         inputs=[BBOXES.get_output(0),
318 |                 SCORES.get_output(0)],
319 |         plugin=batched_nms_op)
320 | 
321 |     batched_nms.get_output(0).name = 'num_dets'
322 |     batched_nms.get_output(1).name = 'bboxes'
323 |     batched_nms.get_output(2).name = 'scores'
324 |     batched_nms.get_output(3).name = 'labels'
325 | 
326 |     return batched_nms
327 | 


--------------------------------------------------------------------------------
/srcs/models/common.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch import Graph, Tensor, Value
  6 | 
  7 | 
  8 | def make_anchors(feats: Tensor,
  9 |                  strides: Tensor,
 10 |                  grid_cell_offset: float = 0.5) -> Tuple[Tensor, Tensor]:
 11 |     anchor_points, stride_tensor = [], []
 12 |     assert feats is not None
 13 |     dtype, device = feats[0].dtype, feats[0].device
 14 |     for i, stride in enumerate(strides):
 15 |         _, _, h, w = feats[i].shape
 16 |         sx = torch.arange(end=w, device=device,
 17 |                           dtype=dtype) + grid_cell_offset  # shift x
 18 |         sy = torch.arange(end=h, device=device,
 19 |                           dtype=dtype) + grid_cell_offset  # shift y
 20 |         sy, sx = torch.meshgrid(sy, sx)
 21 |         anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
 22 |         stride_tensor.append(
 23 |             torch.full((h * w, 1), stride, dtype=dtype, device=device))
 24 |     return torch.cat(anchor_points), torch.cat(stride_tensor)
 25 | 
 26 | 
 27 | class TRT_NMS(torch.autograd.Function):
 28 | 
 29 |     @staticmethod
 30 |     def forward(
 31 |             ctx: Graph,
 32 |             boxes: Tensor,
 33 |             scores: Tensor,
 34 |             iou_threshold: float = 0.65,
 35 |             score_threshold: float = 0.25,
 36 |             max_output_boxes: int = 100,
 37 |             background_class: int = -1,
 38 |             box_coding: int = 0,
 39 |             plugin_version: str = '1',
 40 |             score_activation: int = 0
 41 |     ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
 42 |         batch_size, num_boxes, num_classes = scores.shape
 43 |         num_dets = torch.randint(0,
 44 |                                  max_output_boxes, (batch_size, 1),
 45 |                                  dtype=torch.int32)
 46 |         boxes = torch.randn(batch_size, max_output_boxes, 4)
 47 |         scores = torch.randn(batch_size, max_output_boxes)
 48 |         labels = torch.randint(0,
 49 |                                num_classes, (batch_size, max_output_boxes),
 50 |                                dtype=torch.int32)
 51 | 
 52 |         return num_dets, boxes, scores, labels
 53 | 
 54 |     @staticmethod
 55 |     def symbolic(
 56 |             g,
 57 |             boxes: Value,
 58 |             scores: Value,
 59 |             iou_threshold: float = 0.45,
 60 |             score_threshold: float = 0.25,
 61 |             max_output_boxes: int = 100,
 62 |             background_class: int = -1,
 63 |             box_coding: int = 0,
 64 |             score_activation: int = 0,
 65 |             plugin_version: str = '1') -> Tuple[Value, Value, Value, Value]:
 66 |         out = g.op('TRT::EfficientNMS_TRT',
 67 |                    boxes,
 68 |                    scores,
 69 |                    iou_threshold_f=iou_threshold,
 70 |                    score_threshold_f=score_threshold,
 71 |                    max_output_boxes_i=max_output_boxes,
 72 |                    background_class_i=background_class,
 73 |                    box_coding_i=box_coding,
 74 |                    plugin_version_s=plugin_version,
 75 |                    score_activation_i=score_activation,
 76 |                    outputs=4)
 77 |         nums_dets, boxes, scores, classes = out
 78 |         return nums_dets, boxes, scores, classes
 79 | 
 80 | 
 81 | class C2f(nn.Module):
 82 | 
 83 |     def __init__(self, *args, **kwargs):
 84 |         super().__init__()
 85 | 
 86 |     def forward(self, x):
 87 |         x = self.cv1(x)
 88 |         x = [x, x[:, self.c:, ...]]
 89 |         x.extend(m(x[-1]) for m in self.m)
 90 |         x.pop(1)
 91 |         return self.cv2(torch.cat(x, 1))
 92 | 
 93 | 
 94 | class PostDetect(nn.Module):
 95 |     export = True
 96 |     shape = None
 97 |     dynamic = False
 98 |     iou_thres = 0.65
 99 |     conf_thres = 0.25
100 |     topk = 100
101 | 
102 |     def __init__(self, *args, **kwargs):
103 |         super().__init__()
104 | 
105 |     def forward(self, x):
106 |         shape = x[0].shape
107 |         b, res, b_reg_num = shape[0], [], self.reg_max * 4
108 |         for i in range(self.nl):
109 |             res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1))
110 |         if self.dynamic or self.shape != shape:
111 |             self.anchors, self.strides = (x.transpose(
112 |                 0, 1) for x in make_anchors(x, self.stride, 0.5))
113 |             self.shape = shape
114 |         x = [i.view(b, self.no, -1) for i in res]
115 |         y = torch.cat(x, 2)
116 |         boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid()
117 |         boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2)
118 |         boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes)
119 |         boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...]
120 |         boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1)
121 |         boxes = boxes * self.strides
122 | 
123 |         return TRT_NMS.apply(boxes.transpose(1, 2), scores.transpose(1, 2),
124 |                              self.iou_thres, self.conf_thres, self.topk)
125 | 
126 | 
127 | class PostSeg(nn.Module):
128 |     export = True
129 |     shape = None
130 |     dynamic = False
131 | 
132 |     def __init__(self, *args, **kwargs):
133 |         super().__init__()
134 | 
135 |     def forward(self, x):
136 |         p = self.proto(x[0])  # mask protos
137 |         bs = p.shape[0]  # batch size
138 |         mc = torch.cat(
139 |             [self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)],
140 |             2)  # mask coefficients
141 |         boxes, scores, labels = self.forward_det(x)
142 |         out = torch.cat([boxes, scores, labels.float(), mc.transpose(1, 2)], 2)
143 |         return out, p.flatten(2)
144 | 
145 |     def forward_det(self, x):
146 |         shape = x[0].shape
147 |         b, res, b_reg_num = shape[0], [], self.reg_max * 4
148 |         for i in range(self.nl):
149 |             res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1))
150 |         if self.dynamic or self.shape != shape:
151 |             self.anchors, self.strides = \
152 |                 (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
153 |             self.shape = shape
154 |         x = [i.view(b, self.no, -1) for i in res]
155 |         y = torch.cat(x, 2)
156 |         boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid()
157 |         boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2)
158 |         boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes)
159 |         boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...]
160 |         boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1)
161 |         boxes = boxes * self.strides
162 |         scores, labels = scores.transpose(1, 2).max(dim=-1, keepdim=True)
163 |         return boxes.transpose(1, 2), scores, labels
164 | 
165 | 
166 | def optim(module: nn.Module):
167 |     s = str(type(module))[6:-2].split('.')[-1]
168 |     if s == 'Detect':
169 |         setattr(module, '__class__', PostDetect)
170 |     elif s == 'Segment':
171 |         setattr(module, '__class__', PostSeg)
172 |     elif s == 'C2f':
173 |         setattr(module, '__class__', C2f)
174 | 


--------------------------------------------------------------------------------
/srcs/models/cudart_api.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | from dataclasses import dataclass
  4 | from pathlib import Path
  5 | from typing import List, Optional, Tuple, Union
  6 | 
  7 | import numpy as np
  8 | import tensorrt as trt
  9 | from cuda import cudart
 10 | from numpy import ndarray
 11 | 
 12 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
 13 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
 14 | 
 15 | 
 16 | @dataclass
 17 | class Tensor:
 18 |     name: str
 19 |     dtype: np.dtype
 20 |     shape: Tuple
 21 |     cpu: ndarray
 22 |     gpu: int
 23 | 
 24 | 
 25 | class TRTEngine:
 26 | 
 27 |     def __init__(self, weight: Union[str, Path]) -> None:
 28 |         self.weight = Path(weight) if isinstance(weight, str) else weight
 29 |         status, self.stream = cudart.cudaStreamCreate()
 30 |         assert status.value == 0
 31 |         self.__init_engine()
 32 |         self.__init_bindings()
 33 |         self.__warm_up()
 34 | 
 35 |     def __init_engine(self) -> None:
 36 |         logger = trt.Logger(trt.Logger.WARNING)
 37 |         trt.init_libnvinfer_plugins(logger, namespace='')
 38 |         with trt.Runtime(logger) as runtime:
 39 |             model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
 40 | 
 41 |         context = model.create_execution_context()
 42 | 
 43 |         names = [model.get_binding_name(i) for i in range(model.num_bindings)]
 44 |         self.num_bindings = model.num_bindings
 45 |         self.bindings: List[int] = [0] * self.num_bindings
 46 |         num_inputs, num_outputs = 0, 0
 47 | 
 48 |         for i in range(model.num_bindings):
 49 |             if model.binding_is_input(i):
 50 |                 num_inputs += 1
 51 |             else:
 52 |                 num_outputs += 1
 53 | 
 54 |         self.num_inputs = num_inputs
 55 |         self.num_outputs = num_outputs
 56 |         self.model = model
 57 |         self.context = context
 58 |         self.input_names = names[:num_inputs]
 59 |         self.output_names = names[num_inputs:]
 60 | 
 61 |     def __init_bindings(self) -> None:
 62 |         dynamic = False
 63 |         inp_info = []
 64 |         out_info = []
 65 |         out_ptrs = []
 66 |         for i, name in enumerate(self.input_names):
 67 |             assert self.model.get_binding_name(i) == name
 68 |             dtype = trt.nptype(self.model.get_binding_dtype(i))
 69 |             shape = tuple(self.model.get_binding_shape(i))
 70 |             if -1 in shape:
 71 |                 dynamic |= True
 72 |             if not dynamic:
 73 |                 cpu = np.empty(shape, dtype)
 74 |                 status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream)
 75 |                 assert status.value == 0
 76 |                 cudart.cudaMemcpyAsync(
 77 |                     gpu, cpu.ctypes.data, cpu.nbytes,
 78 |                     cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
 79 |             else:
 80 |                 cpu, gpu = np.empty(0), 0
 81 |             inp_info.append(Tensor(name, dtype, shape, cpu, gpu))
 82 |         for i, name in enumerate(self.output_names):
 83 |             i += self.num_inputs
 84 |             assert self.model.get_binding_name(i) == name
 85 |             dtype = trt.nptype(self.model.get_binding_dtype(i))
 86 |             shape = tuple(self.model.get_binding_shape(i))
 87 |             if not dynamic:
 88 |                 cpu = np.empty(shape, dtype=dtype)
 89 |                 status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream)
 90 |                 assert status.value == 0
 91 |                 cudart.cudaMemcpyAsync(
 92 |                     gpu, cpu.ctypes.data, cpu.nbytes,
 93 |                     cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
 94 |                 out_ptrs.append(gpu)
 95 |             else:
 96 |                 cpu, gpu = np.empty(0), 0
 97 |             out_info.append(Tensor(name, dtype, shape, cpu, gpu))
 98 | 
 99 |         self.is_dynamic = dynamic
100 |         self.inp_info = inp_info
101 |         self.out_info = out_info
102 |         self.out_ptrs = out_ptrs
103 | 
104 |     def __warm_up(self) -> None:
105 |         if self.is_dynamic:
106 |             print('You engine has dynamic axes, please warm up by yourself !')
107 |             return
108 |         for _ in range(10):
109 |             inputs = []
110 |             for i in self.inp_info:
111 |                 inputs.append(i.cpu)
112 |             self.__call__(inputs)
113 | 
114 |     def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None:
115 |         self.context.profiler = profiler \
116 |             if profiler is not None else trt.Profiler()
117 | 
118 |     def __call__(self, *inputs) -> Union[Tuple, ndarray]:
119 | 
120 |         assert len(inputs) == self.num_inputs
121 |         contiguous_inputs: List[ndarray] = [
122 |             np.ascontiguousarray(i) for i in inputs
123 |         ]
124 | 
125 |         for i in range(self.num_inputs):
126 | 
127 |             if self.is_dynamic:
128 |                 self.context.set_binding_shape(
129 |                     i, tuple(contiguous_inputs[i].shape))
130 |                 status, self.inp_info[i].gpu = cudart.cudaMallocAsync(
131 |                     contiguous_inputs[i].nbytes, self.stream)
132 |                 assert status.value == 0
133 |             cudart.cudaMemcpyAsync(
134 |                 self.inp_info[i].gpu, contiguous_inputs[i].ctypes.data,
135 |                 contiguous_inputs[i].nbytes,
136 |                 cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
137 |             self.bindings[i] = self.inp_info[i].gpu
138 | 
139 |         output_gpu_ptrs: List[int] = []
140 |         outputs: List[ndarray] = []
141 | 
142 |         for i in range(self.num_outputs):
143 |             j = i + self.num_inputs
144 |             if self.is_dynamic:
145 |                 shape = tuple(self.context.get_binding_shape(j))
146 |                 dtype = self.out_info[i].dtype
147 |                 cpu = np.empty(shape, dtype=dtype)
148 |                 status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream)
149 |                 assert status.value == 0
150 |                 cudart.cudaMemcpyAsync(
151 |                     gpu, cpu.ctypes.data, cpu.nbytes,
152 |                     cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
153 |             else:
154 |                 cpu = self.out_info[i].cpu
155 |                 gpu = self.out_info[i].gpu
156 |             outputs.append(cpu)
157 |             output_gpu_ptrs.append(gpu)
158 |             self.bindings[j] = gpu
159 | 
160 |         self.context.execute_async_v2(self.bindings, self.stream)
161 |         cudart.cudaStreamSynchronize(self.stream)
162 | 
163 |         for i, o in enumerate(output_gpu_ptrs):
164 |             cudart.cudaMemcpyAsync(
165 |                 outputs[i].ctypes.data, o, outputs[i].nbytes,
166 |                 cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream)
167 | 
168 |         return tuple(outputs) if len(outputs) > 1 else outputs[0]
169 | 


--------------------------------------------------------------------------------
/srcs/models/engine.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | from collections import defaultdict, namedtuple
  4 | from pathlib import Path
  5 | from typing import List, Optional, Tuple, Union
  6 | 
  7 | import onnx
  8 | import tensorrt as trt
  9 | import torch
 10 | 
 11 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
 12 | 
 13 | 
 14 | class EngineBuilder:
 15 |     seg = False
 16 | 
 17 |     def __init__(
 18 |             self,
 19 |             checkpoint: Union[str, Path],
 20 |             device: Optional[Union[str, int, torch.device]] = None) -> None:
 21 |         checkpoint = Path(checkpoint) if isinstance(checkpoint,
 22 |                                                     str) else checkpoint
 23 |         assert checkpoint.exists() and checkpoint.suffix in ('.onnx', '.pkl')
 24 |         self.api = checkpoint.suffix == '.pkl'
 25 |         if isinstance(device, str):
 26 |             device = torch.device(device)
 27 |         elif isinstance(device, int):
 28 |             device = torch.device(f'cuda:{device}')
 29 | 
 30 |         self.checkpoint = checkpoint
 31 |         self.device = device
 32 | 
 33 |     def __build_engine(self,
 34 |                        fp16: bool = True,
 35 |                        input_shape: Union[List, Tuple] = (1, 3, 640, 640),
 36 |                        iou_thres: float = 0.65,
 37 |                        conf_thres: float = 0.25,
 38 |                        topk: int = 100,
 39 |                        with_profiling: bool = True) -> None:
 40 |         logger = trt.Logger(trt.Logger.WARNING)
 41 |         trt.init_libnvinfer_plugins(logger, namespace='')
 42 |         builder = trt.Builder(logger)
 43 |         config = builder.create_builder_config()
 44 |         config.max_workspace_size = torch.cuda.get_device_properties(
 45 |             self.device).total_memory
 46 |         flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
 47 |         network = builder.create_network(flag)
 48 | 
 49 |         self.logger = logger
 50 |         self.builder = builder
 51 |         self.network = network
 52 |         if self.api:
 53 |             self.build_from_api(fp16, input_shape, iou_thres, conf_thres, topk)
 54 |         else:
 55 |             self.build_from_onnx(iou_thres, conf_thres, topk)
 56 |         if fp16 and self.builder.platform_has_fast_fp16:
 57 |             config.set_flag(trt.BuilderFlag.FP16)
 58 |         self.weight = self.checkpoint.with_suffix('.engine')
 59 | 
 60 |         if with_profiling:
 61 |             config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED
 62 |         with self.builder.build_engine(self.network, config) as engine:
 63 |             self.weight.write_bytes(engine.serialize())
 64 |         self.logger.log(
 65 |             trt.Logger.WARNING, f'Build tensorrt engine finish.\n'
 66 |             f'Save in {str(self.weight.absolute())}')
 67 | 
 68 |     def build(self,
 69 |               fp16: bool = True,
 70 |               input_shape: Union[List, Tuple] = (1, 3, 640, 640),
 71 |               iou_thres: float = 0.65,
 72 |               conf_thres: float = 0.25,
 73 |               topk: int = 100,
 74 |               with_profiling=True) -> None:
 75 |         self.__build_engine(fp16, input_shape, iou_thres, conf_thres, topk,
 76 |                             with_profiling)
 77 | 
 78 |     def build_from_onnx(self,
 79 |                         iou_thres: float = 0.65,
 80 |                         conf_thres: float = 0.25,
 81 |                         topk: int = 100):
 82 |         parser = trt.OnnxParser(self.network, self.logger)
 83 |         onnx_model = onnx.load(str(self.checkpoint))
 84 |         if not self.seg:
 85 |             onnx_model.graph.node[-1].attribute[2].i = topk
 86 |             onnx_model.graph.node[-1].attribute[3].f = conf_thres
 87 |             onnx_model.graph.node[-1].attribute[4].f = iou_thres
 88 | 
 89 |         if not parser.parse(onnx_model.SerializeToString()):
 90 |             raise RuntimeError(
 91 |                 f'failed to load ONNX file: {str(self.checkpoint)}')
 92 |         inputs = [
 93 |             self.network.get_input(i) for i in range(self.network.num_inputs)
 94 |         ]
 95 |         outputs = [
 96 |             self.network.get_output(i) for i in range(self.network.num_outputs)
 97 |         ]
 98 | 
 99 |         for inp in inputs:
100 |             self.logger.log(
101 |                 trt.Logger.WARNING,
102 |                 f'input "{inp.name}" with shape: {inp.shape} '
103 |                 f'dtype: {inp.dtype}')
104 |         for out in outputs:
105 |             self.logger.log(
106 |                 trt.Logger.WARNING,
107 |                 f'output "{out.name}" with shape: {out.shape} '
108 |                 f'dtype: {out.dtype}')
109 | 
110 |     def build_from_api(
111 |         self,
112 |         fp16: bool = True,
113 |         input_shape: Union[List, Tuple] = (1, 3, 640, 640),
114 |         iou_thres: float = 0.65,
115 |         conf_thres: float = 0.25,
116 |         topk: int = 100,
117 |     ):
118 |         assert not self.seg
119 |         from .api import SPPF, C2f, Conv, Detect, get_depth, get_width
120 | 
121 |         with open(self.checkpoint, 'rb') as f:
122 |             state_dict = pickle.load(f)
123 |         mapping = {0.25: 1024, 0.5: 1024, 0.75: 768, 1.0: 512, 1.25: 512}
124 | 
125 |         GW = state_dict['GW']
126 |         GD = state_dict['GD']
127 |         width_64 = get_width(64, GW)
128 |         width_128 = get_width(128, GW)
129 |         width_256 = get_width(256, GW)
130 |         width_512 = get_width(512, GW)
131 |         width_1024 = get_width(mapping[GW], GW)
132 |         depth_3 = get_depth(3, GD)
133 |         depth_6 = get_depth(6, GD)
134 |         strides = state_dict['strides']
135 |         reg_max = state_dict['reg_max']
136 |         images = self.network.add_input(name='images',
137 |                                         dtype=trt.float32,
138 |                                         shape=trt.Dims4(input_shape))
139 |         assert images, 'Add input failed'
140 | 
141 |         Conv_0 = Conv(self.network, state_dict, images, width_64, 3, 2, 1,
142 |                       'Conv.0')
143 |         Conv_1 = Conv(self.network, state_dict, Conv_0.get_output(0),
144 |                       width_128, 3, 2, 1, 'Conv.1')
145 |         C2f_2 = C2f(self.network, state_dict, Conv_1.get_output(0), width_128,
146 |                     depth_3, True, 1, 0.5, 'C2f.2')
147 |         Conv_3 = Conv(self.network, state_dict, C2f_2.get_output(0), width_256,
148 |                       3, 2, 1, 'Conv.3')
149 |         C2f_4 = C2f(self.network, state_dict, Conv_3.get_output(0), width_256,
150 |                     depth_6, True, 1, 0.5, 'C2f.4')
151 |         Conv_5 = Conv(self.network, state_dict, C2f_4.get_output(0), width_512,
152 |                       3, 2, 1, 'Conv.5')
153 |         C2f_6 = C2f(self.network, state_dict, Conv_5.get_output(0), width_512,
154 |                     depth_6, True, 1, 0.5, 'C2f.6')
155 |         Conv_7 = Conv(self.network, state_dict, C2f_6.get_output(0),
156 |                       width_1024, 3, 2, 1, 'Conv.7')
157 |         C2f_8 = C2f(self.network, state_dict, Conv_7.get_output(0), width_1024,
158 |                     depth_3, True, 1, 0.5, 'C2f.8')
159 |         SPPF_9 = SPPF(self.network, state_dict, C2f_8.get_output(0),
160 |                       width_1024, width_1024, 5, 'SPPF.9')
161 |         Upsample_10 = self.network.add_resize(SPPF_9.get_output(0))
162 |         assert Upsample_10, 'Add Upsample_10 failed'
163 |         Upsample_10.resize_mode = trt.ResizeMode.NEAREST
164 |         Upsample_10.shape = Upsample_10.get_output(
165 |             0).shape[:2] + C2f_6.get_output(0).shape[2:]
166 |         input_tensors11 = [Upsample_10.get_output(0), C2f_6.get_output(0)]
167 |         Cat_11 = self.network.add_concatenation(input_tensors11)
168 |         C2f_12 = C2f(self.network, state_dict, Cat_11.get_output(0), width_512,
169 |                      depth_3, False, 1, 0.5, 'C2f.12')
170 |         Upsample13 = self.network.add_resize(C2f_12.get_output(0))
171 |         assert Upsample13, 'Add Upsample13 failed'
172 |         Upsample13.resize_mode = trt.ResizeMode.NEAREST
173 |         Upsample13.shape = Upsample13.get_output(
174 |             0).shape[:2] + C2f_4.get_output(0).shape[2:]
175 |         input_tensors14 = [Upsample13.get_output(0), C2f_4.get_output(0)]
176 |         Cat_14 = self.network.add_concatenation(input_tensors14)
177 |         C2f_15 = C2f(self.network, state_dict, Cat_14.get_output(0), width_256,
178 |                      depth_3, False, 1, 0.5, 'C2f.15')
179 |         Conv_16 = Conv(self.network, state_dict, C2f_15.get_output(0),
180 |                        width_256, 3, 2, 1, 'Conv.16')
181 |         input_tensors17 = [Conv_16.get_output(0), C2f_12.get_output(0)]
182 |         Cat_17 = self.network.add_concatenation(input_tensors17)
183 |         C2f_18 = C2f(self.network, state_dict, Cat_17.get_output(0), width_512,
184 |                      depth_3, False, 1, 0.5, 'C2f.18')
185 |         Conv_19 = Conv(self.network, state_dict, C2f_18.get_output(0),
186 |                        width_512, 3, 2, 1, 'Conv.19')
187 |         input_tensors20 = [Conv_19.get_output(0), SPPF_9.get_output(0)]
188 |         Cat_20 = self.network.add_concatenation(input_tensors20)
189 |         C2f_21 = C2f(self.network, state_dict, Cat_20.get_output(0),
190 |                      width_1024, depth_3, False, 1, 0.5, 'C2f.21')
191 |         input_tensors22 = [
192 |             C2f_15.get_output(0),
193 |             C2f_18.get_output(0),
194 |             C2f_21.get_output(0)
195 |         ]
196 |         batched_nms = Detect(self.network, state_dict, input_tensors22,
197 |                              strides, 'Detect.22', reg_max, fp16, iou_thres,
198 |                              conf_thres, topk)
199 |         for o in range(batched_nms.num_outputs):
200 |             self.network.mark_output(batched_nms.get_output(o))
201 | 
202 | 
203 | class TRTModule(torch.nn.Module):
204 |     dtypeMapping = {
205 |         trt.bool: torch.bool,
206 |         trt.int8: torch.int8,
207 |         trt.int32: torch.int32,
208 |         trt.float16: torch.float16,
209 |         trt.float32: torch.float32
210 |     }
211 | 
212 |     def __init__(self, weight: Union[str, Path],
213 |                  device: Optional[torch.device]) -> None:
214 |         super(TRTModule, self).__init__()
215 |         self.weight = Path(weight) if isinstance(weight, str) else weight
216 |         self.device = device if device is not None else torch.device('cuda:0')
217 |         self.stream = torch.cuda.Stream(device=device)
218 |         self.__init_engine()
219 |         self.__init_bindings()
220 | 
221 |     def __init_engine(self) -> None:
222 |         logger = trt.Logger(trt.Logger.WARNING)
223 |         trt.init_libnvinfer_plugins(logger, namespace='')
224 |         with trt.Runtime(logger) as runtime:
225 |             model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
226 | 
227 |         context = model.create_execution_context()
228 |         num_bindings = model.num_bindings
229 |         names = [model.get_binding_name(i) for i in range(num_bindings)]
230 | 
231 |         self.bindings: List[int] = [0] * num_bindings
232 |         num_inputs, num_outputs = 0, 0
233 | 
234 |         for i in range(num_bindings):
235 |             if model.binding_is_input(i):
236 |                 num_inputs += 1
237 |             else:
238 |                 num_outputs += 1
239 | 
240 |         self.num_bindings = num_bindings
241 |         self.num_inputs = num_inputs
242 |         self.num_outputs = num_outputs
243 |         self.model = model
244 |         self.context = context
245 |         self.input_names = names[:num_inputs]
246 |         self.output_names = names[num_inputs:]
247 |         self.idx = list(range(self.num_outputs))
248 | 
249 |     def __init_bindings(self) -> None:
250 |         idynamic = odynamic = False
251 |         Tensor = namedtuple('Tensor', ('name', 'dtype', 'shape'))
252 |         inp_info = []
253 |         out_info = []
254 |         for i, name in enumerate(self.input_names):
255 |             assert self.model.get_binding_name(i) == name
256 |             dtype = self.dtypeMapping[self.model.get_binding_dtype(i)]
257 |             shape = tuple(self.model.get_binding_shape(i))
258 |             if -1 in shape:
259 |                 idynamic |= True
260 |             inp_info.append(Tensor(name, dtype, shape))
261 |         for i, name in enumerate(self.output_names):
262 |             i += self.num_inputs
263 |             assert self.model.get_binding_name(i) == name
264 |             dtype = self.dtypeMapping[self.model.get_binding_dtype(i)]
265 |             shape = tuple(self.model.get_binding_shape(i))
266 |             if -1 in shape:
267 |                 odynamic |= True
268 |             out_info.append(Tensor(name, dtype, shape))
269 | 
270 |         if not odynamic:
271 |             self.output_tensor = [
272 |                 torch.empty(info.shape, dtype=info.dtype, device=self.device)
273 |                 for info in out_info
274 |             ]
275 |         self.idynamic = idynamic
276 |         self.odynamic = odynamic
277 |         self.inp_info = inp_info
278 |         self.out_info = out_info
279 | 
280 |     def set_profiler(self, profiler: Optional[trt.IProfiler]):
281 |         self.context.profiler = profiler \
282 |             if profiler is not None else trt.Profiler()
283 | 
284 |     def set_desired(self, desired: Optional[Union[List, Tuple]]):
285 |         if isinstance(desired,
286 |                       (list, tuple)) and len(desired) == self.num_outputs:
287 |             self.idx = [self.output_names.index(i) for i in desired]
288 | 
289 |     def forward(self, *inputs) -> Union[Tuple, torch.Tensor]:
290 | 
291 |         assert len(inputs) == self.num_inputs
292 |         contiguous_inputs: List[torch.Tensor] = [
293 |             i.contiguous() for i in inputs
294 |         ]
295 | 
296 |         for i in range(self.num_inputs):
297 |             self.bindings[i] = contiguous_inputs[i].data_ptr()
298 |             if self.idynamic:
299 |                 self.context.set_binding_shape(
300 |                     i, tuple(contiguous_inputs[i].shape))
301 | 
302 |         outputs: List[torch.Tensor] = []
303 | 
304 |         for i in range(self.num_outputs):
305 |             j = i + self.num_inputs
306 |             if self.odynamic:
307 |                 shape = tuple(self.context.get_binding_shape(j))
308 |                 output = torch.empty(size=shape,
309 |                                      dtype=self.out_info[i].dtype,
310 |                                      device=self.device)
311 |             else:
312 |                 output = self.output_tensor[i]
313 |             self.bindings[j] = output.data_ptr()
314 |             outputs.append(output)
315 | 
316 |         self.context.execute_async_v2(self.bindings, self.stream.cuda_stream)
317 |         self.stream.synchronize()
318 | 
319 |         return tuple(outputs[i]
320 |                      for i in self.idx) if len(outputs) > 1 else outputs[0]
321 | 
322 | 
323 | class TRTProfilerV1(trt.IProfiler):
324 | 
325 |     def __init__(self):
326 |         trt.IProfiler.__init__(self)
327 |         self.total_runtime = 0.0
328 |         self.recorder = defaultdict(float)
329 | 
330 |     def report_layer_time(self, layer_name: str, ms: float):
331 |         self.total_runtime += ms * 1000
332 |         self.recorder[layer_name] += ms * 1000
333 | 
334 |     def report(self):
335 |         f = '\t%40s\t\t\t\t%10.4f'
336 |         print('\t%40s\t\t\t\t%10s' % ('layername', 'cost(us)'))
337 |         for name, cost in sorted(self.recorder.items(), key=lambda x: -x[1]):
338 |             print(
339 |                 f %
340 |                 (name if len(name) < 40 else name[:35] + ' ' + '*' * 4, cost))
341 |         print(f'\nTotal Inference Time: {self.total_runtime:.4f}(us)')
342 | 
343 | 
344 | class TRTProfilerV0(trt.IProfiler):
345 | 
346 |     def __init__(self):
347 |         trt.IProfiler.__init__(self)
348 | 
349 |     def report_layer_time(self, layer_name: str, ms: float):
350 |         f = '\t%40s\t\t\t\t%10.4fms'
351 |         print(f % (layer_name if len(layer_name) < 40 else layer_name[:35] +
352 |                    ' ' + '*' * 4, ms))
353 | 


--------------------------------------------------------------------------------
/srcs/models/pycuda_api.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | from dataclasses import dataclass
  4 | from pathlib import Path
  5 | from typing import List, Optional, Tuple, Union
  6 | 
  7 | import numpy as np
  8 | import pycuda.autoinit  # noqa F401
  9 | import pycuda.driver as cuda
 10 | import tensorrt as trt
 11 | from numpy import ndarray
 12 | 
 13 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
 14 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
 15 | 
 16 | 
 17 | @dataclass
 18 | class Tensor:
 19 |     name: str
 20 |     dtype: np.dtype
 21 |     shape: Tuple
 22 |     cpu: ndarray
 23 |     gpu: int
 24 | 
 25 | 
 26 | class TRTEngine:
 27 | 
 28 |     def __init__(self, weight: Union[str, Path]) -> None:
 29 |         self.weight = Path(weight) if isinstance(weight, str) else weight
 30 |         self.stream = cuda.Stream(0)
 31 |         self.__init_engine()
 32 |         self.__init_bindings()
 33 |         self.__warm_up()
 34 | 
 35 |     def __init_engine(self) -> None:
 36 |         logger = trt.Logger(trt.Logger.WARNING)
 37 |         trt.init_libnvinfer_plugins(logger, namespace='')
 38 |         with trt.Runtime(logger) as runtime:
 39 |             model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
 40 | 
 41 |         context = model.create_execution_context()
 42 | 
 43 |         names = [model.get_binding_name(i) for i in range(model.num_bindings)]
 44 |         self.num_bindings = model.num_bindings
 45 |         self.bindings: List[int] = [0] * self.num_bindings
 46 |         num_inputs, num_outputs = 0, 0
 47 | 
 48 |         for i in range(model.num_bindings):
 49 |             if model.binding_is_input(i):
 50 |                 num_inputs += 1
 51 |             else:
 52 |                 num_outputs += 1
 53 | 
 54 |         self.num_inputs = num_inputs
 55 |         self.num_outputs = num_outputs
 56 |         self.model = model
 57 |         self.context = context
 58 |         self.input_names = names[:num_inputs]
 59 |         self.output_names = names[num_inputs:]
 60 | 
 61 |     def __init_bindings(self) -> None:
 62 |         dynamic = False
 63 |         inp_info = []
 64 |         out_info = []
 65 |         out_ptrs = []
 66 |         for i, name in enumerate(self.input_names):
 67 |             assert self.model.get_binding_name(i) == name
 68 |             dtype = trt.nptype(self.model.get_binding_dtype(i))
 69 |             shape = tuple(self.model.get_binding_shape(i))
 70 |             if -1 in shape:
 71 |                 dynamic |= True
 72 |             if not dynamic:
 73 |                 cpu = np.empty(shape, dtype)
 74 |                 gpu = cuda.mem_alloc(cpu.nbytes)
 75 |                 cuda.memcpy_htod_async(gpu, cpu, self.stream)
 76 |             else:
 77 |                 cpu, gpu = np.empty(0), 0
 78 |             inp_info.append(Tensor(name, dtype, shape, cpu, gpu))
 79 |         for i, name in enumerate(self.output_names):
 80 |             i += self.num_inputs
 81 |             assert self.model.get_binding_name(i) == name
 82 |             dtype = trt.nptype(self.model.get_binding_dtype(i))
 83 |             shape = tuple(self.model.get_binding_shape(i))
 84 |             if not dynamic:
 85 |                 cpu = np.empty(shape, dtype=dtype)
 86 |                 gpu = cuda.mem_alloc(cpu.nbytes)
 87 |                 cuda.memcpy_htod_async(gpu, cpu, self.stream)
 88 |                 out_ptrs.append(gpu)
 89 |             else:
 90 |                 cpu, gpu = np.empty(0), 0
 91 |             out_info.append(Tensor(name, dtype, shape, cpu, gpu))
 92 | 
 93 |         self.is_dynamic = dynamic
 94 |         self.inp_info = inp_info
 95 |         self.out_info = out_info
 96 |         self.out_ptrs = out_ptrs
 97 | 
 98 |     def __warm_up(self) -> None:
 99 |         if self.is_dynamic:
100 |             print('You engine has dynamic axes, please warm up by yourself !')
101 |             return
102 |         for _ in range(10):
103 |             inputs = []
104 |             for i in self.inp_info:
105 |                 inputs.append(i.cpu)
106 |             self.__call__(inputs)
107 | 
108 |     def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None:
109 |         self.context.profiler = profiler \
110 |             if profiler is not None else trt.Profiler()
111 | 
112 |     def __call__(self, *inputs) -> Union[Tuple, ndarray]:
113 | 
114 |         assert len(inputs) == self.num_inputs
115 |         contiguous_inputs: List[ndarray] = [
116 |             np.ascontiguousarray(i) for i in inputs
117 |         ]
118 | 
119 |         for i in range(self.num_inputs):
120 | 
121 |             if self.is_dynamic:
122 |                 self.context.set_binding_shape(
123 |                     i, tuple(contiguous_inputs[i].shape))
124 |                 self.inp_info[i].gpu = cuda.mem_alloc(
125 |                     contiguous_inputs[i].nbytes)
126 | 
127 |             cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],
128 |                                    self.stream)
129 |             self.bindings[i] = int(self.inp_info[i].gpu)
130 | 
131 |         output_gpu_ptrs: List[int] = []
132 |         outputs: List[ndarray] = []
133 | 
134 |         for i in range(self.num_outputs):
135 |             j = i + self.num_inputs
136 |             if self.is_dynamic:
137 |                 shape = tuple(self.context.get_binding_shape(j))
138 |                 dtype = self.out_info[i].dtype
139 |                 cpu = np.empty(shape, dtype=dtype)
140 |                 gpu = cuda.mem_alloc(cpu.nbytes)
141 |                 cuda.memcpy_htod_async(gpu, cpu, self.stream)
142 |             else:
143 |                 cpu = self.out_info[i].cpu
144 |                 gpu = self.out_info[i].gpu
145 |             outputs.append(cpu)
146 |             output_gpu_ptrs.append(gpu)
147 |             self.bindings[j] = int(gpu)
148 | 
149 |         self.context.execute_async_v2(self.bindings, self.stream.handle)
150 |         self.stream.synchronize()
151 | 
152 |         for i, o in enumerate(output_gpu_ptrs):
153 |             cuda.memcpy_dtoh_async(outputs[i], o, self.stream)
154 | 
155 |         return tuple(outputs) if len(outputs) > 1 else outputs[0]
156 | 


--------------------------------------------------------------------------------
/srcs/models/torch_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple, Union
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torch import Tensor
 6 | from torchvision.ops import batched_nms, nms
 7 | 
 8 | 
 9 | def seg_postprocess(
10 |         data: Tuple[Tensor],
11 |         shape: Union[Tuple, List],
12 |         conf_thres: float = 0.25,
13 |         iou_thres: float = 0.65) \
14 |         -> Tuple[Tensor, Tensor, Tensor, Tensor]:
15 |     assert len(data) == 2
16 |     h, w = shape[0] // 4, shape[1] // 4  # 4x downsampling
17 |     outputs, proto = data[0][0], data[1][0]
18 |     bboxes, scores, labels, maskconf = outputs.split([4, 1, 1, 32], 1)
19 |     scores, labels = scores.squeeze(), labels.squeeze()
20 |     idx = scores > conf_thres
21 |     if not idx.any():  # no bounding boxes or seg were created
22 |         return bboxes.new_zeros((0, 4)), scores.new_zeros(
23 |             (0, )), labels.new_zeros((0, )), bboxes.new_zeros((0, 0, 0, 0))
24 |     bboxes, scores, labels, maskconf = \
25 |         bboxes[idx], scores[idx], labels[idx], maskconf[idx]
26 |     idx = batched_nms(bboxes, scores, labels, iou_thres)
27 |     bboxes, scores, labels, maskconf = \
28 |         bboxes[idx], scores[idx], labels[idx].int(), maskconf[idx]
29 |     masks = (maskconf @ proto).sigmoid().view(-1, h, w)
30 |     masks = crop_mask(masks, bboxes / 4.)
31 |     masks = F.interpolate(masks[None],
32 |                           shape,
33 |                           mode='bilinear',
34 |                           align_corners=False)[0]
35 |     masks = masks.gt_(0.5)[..., None]
36 |     return bboxes, scores, labels, masks
37 | 
38 | 
39 | def pose_postprocess(
40 |         data: Union[Tuple, Tensor],
41 |         conf_thres: float = 0.25,
42 |         iou_thres: float = 0.65) \
43 |         -> Tuple[Tensor, Tensor, Tensor]:
44 |     if isinstance(data, tuple):
45 |         assert len(data) == 1
46 |         data = data[0]
47 |     outputs = torch.transpose(data[0], 0, 1).contiguous()
48 |     bboxes, scores, kpts = outputs.split([4, 1, 51], 1)
49 |     scores, kpts = scores.squeeze(), kpts.squeeze()
50 |     idx = scores > conf_thres
51 |     if not idx.any():  # no bounding boxes or seg were created
52 |         return bboxes.new_zeros((0, 4)), scores.new_zeros(
53 |             (0, )), bboxes.new_zeros((0, 0, 0))
54 |     bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
55 |     xycenter, wh = bboxes.chunk(2, -1)
56 |     bboxes = torch.cat([xycenter - 0.5 * wh, xycenter + 0.5 * wh], -1)
57 |     idx = nms(bboxes, scores, iou_thres)
58 |     bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
59 |     return bboxes, scores, kpts.reshape(idx.shape[0], -1, 3)
60 | 
61 | 
62 | def det_postprocess(data: Tuple[Tensor, Tensor, Tensor, Tensor]):
63 |     assert len(data) == 4
64 |     iou_thres: float = 0.65
65 |     num_dets, bboxes, scores, labels = data[0][0], data[1][0], data[2][
66 |         0], data[3][0]
67 |     nums = num_dets.item()
68 |     if nums == 0:
69 |         return bboxes.new_zeros((0, 4)), scores.new_zeros(
70 |             (0, )), labels.new_zeros((0, ))
71 |     # check score negative
72 |     scores[scores < 0] = 1 + scores[scores < 0]
73 |     # add nms
74 |     idx = nms(bboxes, scores, iou_thres)
75 |     bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx]
76 |     bboxes = bboxes[:nums]
77 |     scores = scores[:nums]
78 |     labels = labels[:nums]
79 | 
80 |     return bboxes, scores, labels
81 | 
82 | 
83 | def crop_mask(masks: Tensor, bboxes: Tensor) -> Tensor:
84 |     n, h, w = masks.shape
85 |     x1, y1, x2, y2 = torch.chunk(bboxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
86 |     r = torch.arange(w, device=masks.device,
87 |                      dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
88 |     c = torch.arange(h, device=masks.device,
89 |                      dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
90 | 
91 |     return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
92 | 


--------------------------------------------------------------------------------
/srcs/models/utils.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import List, Tuple, Union
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | from numpy import ndarray
  7 | 
  8 | # image suffixs
  9 | SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff',
 10 |            '.webp', '.pfm')
 11 | 
 12 | 
 13 | def letterbox(im: ndarray,
 14 |               new_shape: Union[Tuple, List] = (640, 640),
 15 |               color: Union[Tuple, List] = (114, 114, 114)) \
 16 |         -> Tuple[ndarray, float, Tuple[float, float]]:
 17 |     # Resize and pad image while meeting stride-multiple constraints
 18 |     shape = im.shape[:2]  # current shape [height, width]
 19 |     if isinstance(new_shape, int):
 20 |         new_shape = (new_shape, new_shape)
 21 |     # new_shape: [width, height]
 22 | 
 23 |     # Scale ratio (new / old)
 24 |     r = min(new_shape[0] / shape[1], new_shape[1] / shape[0])
 25 |     # Compute padding [width, height]
 26 |     new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
 27 |     dw, dh = new_shape[0] - new_unpad[0], new_shape[1] - new_unpad[
 28 |         1]  # wh padding
 29 | 
 30 |     dw /= 2  # divide padding into 2 sides
 31 |     dh /= 2
 32 | 
 33 |     if shape[::-1] != new_unpad:  # resize
 34 |         im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
 35 |     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
 36 |     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
 37 |     im = cv2.copyMakeBorder(im,
 38 |                             top,
 39 |                             bottom,
 40 |                             left,
 41 |                             right,
 42 |                             cv2.BORDER_CONSTANT,
 43 |                             value=color)  # add border
 44 |     return im, r, (dw, dh)
 45 | 
 46 | 
 47 | def blob(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]:
 48 |     seg = None
 49 |     if return_seg:
 50 |         seg = im.astype(np.float32) / 255
 51 |     im = im.transpose([2, 0, 1])
 52 |     im = im[np.newaxis, ...]
 53 |     im = np.ascontiguousarray(im).astype(np.float32) / 255
 54 |     if return_seg:
 55 |         return im, seg
 56 |     else:
 57 |         return im
 58 | 
 59 | 
 60 | def sigmoid(x: ndarray) -> ndarray:
 61 |     return 1. / (1. + np.exp(-x))
 62 | 
 63 | 
 64 | def bbox_iou(boxes1: ndarray, boxes2: ndarray) -> ndarray:
 65 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \
 66 |                   (boxes1[..., 3] - boxes1[..., 1])
 67 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \
 68 |                   (boxes2[..., 3] - boxes2[..., 1])
 69 |     left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
 70 |     right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
 71 |     inter_section = np.maximum(right_down - left_up, 0.0)
 72 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
 73 |     union_area = boxes1_area + boxes2_area - inter_area
 74 |     ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
 75 | 
 76 |     return ious
 77 | 
 78 | 
 79 | def batched_nms(boxes: ndarray,
 80 |                 scores: ndarray,
 81 |                 iou_thres: float = 0.65,
 82 |                 conf_thres: float = 0.25):
 83 |     labels = np.argmax(scores, axis=-1)
 84 |     scores = np.max(scores, axis=-1)
 85 | 
 86 |     cand = scores > conf_thres
 87 |     boxes = boxes[cand]
 88 |     scores = scores[cand]
 89 |     labels = labels[cand]
 90 | 
 91 |     keep_boxes = []
 92 |     keep_scores = []
 93 |     keep_labels = []
 94 | 
 95 |     for cls in np.unique(labels):
 96 |         cls_mask = labels == cls
 97 |         cls_boxes = boxes[cls_mask]
 98 |         cls_scores = scores[cls_mask]
 99 | 
100 |         while cls_boxes.shape[0] > 0:
101 |             max_idx = np.argmax(cls_scores)
102 |             max_box = cls_boxes[max_idx:max_idx + 1]
103 |             max_score = cls_scores[max_idx:max_idx + 1]
104 |             max_label = np.array([cls], dtype=np.int32)
105 |             keep_boxes.append(max_box)
106 |             keep_scores.append(max_score)
107 |             keep_labels.append(max_label)
108 |             other_boxes = np.delete(cls_boxes, max_idx, axis=0)
109 |             other_scores = np.delete(cls_scores, max_idx, axis=0)
110 |             ious = bbox_iou(max_box, other_boxes)
111 |             iou_mask = ious < iou_thres
112 |             if not iou_mask.any():
113 |                 break
114 |             cls_boxes = other_boxes[iou_mask]
115 |             cls_scores = other_scores[iou_mask]
116 | 
117 |     if len(keep_boxes) == 0:
118 |         keep_boxes = np.empty((0, 4), dtype=np.float32)
119 |         keep_scores = np.empty((0, ), dtype=np.float32)
120 |         keep_labels = np.empty((0, ), dtype=np.float32)
121 | 
122 |     else:
123 |         keep_boxes = np.concatenate(keep_boxes, axis=0)
124 |         keep_scores = np.concatenate(keep_scores, axis=0)
125 |         keep_labels = np.concatenate(keep_labels, axis=0)
126 | 
127 |     return keep_boxes, keep_scores, keep_labels
128 | 
129 | 
130 | def nms(boxes: ndarray,
131 |         scores: ndarray,
132 |         iou_thres: float = 0.65,
133 |         conf_thres: float = 0.25):
134 |     labels = np.argmax(scores, axis=-1)
135 |     scores = np.max(scores, axis=-1)
136 | 
137 |     cand = scores > conf_thres
138 |     boxes = boxes[cand]
139 |     scores = scores[cand]
140 |     labels = labels[cand]
141 | 
142 |     keep_boxes = []
143 |     keep_scores = []
144 |     keep_labels = []
145 | 
146 |     idxs = scores.argsort()
147 |     while idxs.size > 0:
148 |         max_score_index = idxs[-1]
149 |         max_box = boxes[max_score_index:max_score_index + 1]
150 |         max_score = scores[max_score_index:max_score_index + 1]
151 |         max_label = np.array([labels[max_score_index]], dtype=np.int32)
152 |         keep_boxes.append(max_box)
153 |         keep_scores.append(max_score)
154 |         keep_labels.append(max_label)
155 |         if idxs.size == 1:
156 |             break
157 |         idxs = idxs[:-1]
158 |         other_boxes = boxes[idxs]
159 |         ious = bbox_iou(max_box, other_boxes)
160 |         iou_mask = ious < iou_thres
161 |         idxs = idxs[iou_mask]
162 | 
163 |     if len(keep_boxes) == 0:
164 |         keep_boxes = np.empty((0, 4), dtype=np.float32)
165 |         keep_scores = np.empty((0, ), dtype=np.float32)
166 |         keep_labels = np.empty((0, ), dtype=np.float32)
167 | 
168 |     else:
169 |         keep_boxes = np.concatenate(keep_boxes, axis=0)
170 |         keep_scores = np.concatenate(keep_scores, axis=0)
171 |         keep_labels = np.concatenate(keep_labels, axis=0)
172 | 
173 |     return keep_boxes, keep_scores, keep_labels
174 | 
175 | 
176 | def path_to_list(images_path: Union[str, Path]) -> List:
177 |     if isinstance(images_path, str):
178 |         images_path = Path(images_path)
179 |     assert images_path.exists()
180 |     if images_path.is_dir():
181 |         images = [
182 |             i.absolute() for i in images_path.iterdir() if i.suffix in SUFFIXS
183 |         ]
184 |     else:
185 |         assert images_path.suffix in SUFFIXS
186 |         images = [images_path.absolute()]
187 |     return images
188 | 
189 | 
190 | def crop_mask(masks: ndarray, bboxes: ndarray) -> ndarray:
191 |     n, h, w = masks.shape
192 |     x1, y1, x2, y2 = np.split(bboxes[:, :, None], [1, 2, 3],
193 |                               1)  # x1 shape(1,1,n)
194 |     r = np.arange(w, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
195 |     c = np.arange(h, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
196 | 
197 |     return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
198 | 
199 | 
200 | def det_postprocess(data: Tuple[ndarray, ndarray, ndarray, ndarray]):
201 |     assert len(data) == 4
202 |     iou_thres: float = 0.65
203 |     num_dets, bboxes, scores, labels = (i[0] for i in data)
204 |     nums = num_dets.item()
205 |     if nums == 0:
206 |         return np.empty((0, 4), dtype=np.float32), np.empty(
207 |             (0, ), dtype=np.float32), np.empty((0, ), dtype=np.int32)
208 |     # check score negative
209 |     scores[scores < 0] = 1 + scores[scores < 0]
210 |     # add nms
211 |     idx = nms(bboxes, scores, iou_thres)
212 |     bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx]
213 | 
214 |     bboxes = bboxes[:nums]
215 |     scores = scores[:nums]
216 |     labels = labels[:nums]
217 |     return bboxes, scores, labels
218 | 
219 | 
220 | def seg_postprocess(
221 |         data: Tuple[ndarray],
222 |         shape: Union[Tuple, List],
223 |         conf_thres: float = 0.25,
224 |         iou_thres: float = 0.65) \
225 |         -> Tuple[ndarray, ndarray, ndarray, ndarray]:
226 |     assert len(data) == 2
227 |     h, w = shape[0] // 4, shape[1] // 4  # 4x downsampling
228 |     outputs, proto = (i[0] for i in data)
229 |     bboxes, scores, labels, maskconf = np.split(outputs, [4, 5, 6], 1)
230 |     scores, labels = scores.squeeze(), labels.squeeze()
231 |     idx = scores > conf_thres
232 |     if not idx.any():  # no bounding boxes or seg were created
233 |         return np.empty((0, 4), dtype=np.float32), \
234 |             np.empty((0,), dtype=np.float32), \
235 |             np.empty((0,), dtype=np.int32), \
236 |             np.empty((0, 0, 0, 0), dtype=np.int32)
237 | 
238 |     bboxes, scores, labels, maskconf = \
239 |         bboxes[idx], scores[idx], labels[idx], maskconf[idx]
240 |     cvbboxes = np.concatenate([bboxes[:, :2], bboxes[:, 2:] - bboxes[:, :2]],
241 |                               1)
242 |     labels = labels.astype(np.int32)
243 |     v0, v1 = map(int, (cv2.__version__).split('.')[:2])
244 |     assert v0 == 4, 'OpenCV version is wrong'
245 |     if v1 > 6:
246 |         idx = cv2.dnn.NMSBoxesBatched(cvbboxes, scores, labels, conf_thres,
247 |                                       iou_thres)
248 |     else:
249 |         idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres)
250 |     bboxes, scores, labels, maskconf = \
251 |         bboxes[idx], scores[idx], labels[idx], maskconf[idx]
252 |     masks = sigmoid(maskconf @ proto).reshape(-1, h, w)
253 |     masks = crop_mask(masks, bboxes / 4.)
254 |     masks = masks.transpose([1, 2, 0])
255 |     masks = cv2.resize(masks, (shape[1], shape[0]),
256 |                        interpolation=cv2.INTER_LINEAR)
257 |     masks = masks.transpose(2, 0, 1)
258 |     masks = np.ascontiguousarray((masks > 0.5)[..., None], dtype=np.float32)
259 |     return bboxes, scores, labels, masks
260 | 
261 | 
262 | def pose_postprocess(
263 |         data: Union[Tuple, ndarray],
264 |         conf_thres: float = 0.25,
265 |         iou_thres: float = 0.65) \
266 |         -> Tuple[ndarray, ndarray, ndarray]:
267 |     if isinstance(data, tuple):
268 |         assert len(data) == 1
269 |         data = data[0]
270 |     outputs = np.transpose(data[0], (1, 0))
271 |     bboxes, scores, kpts = np.split(outputs, [4, 5], 1)
272 |     scores, kpts = scores.squeeze(), kpts.squeeze()
273 |     idx = scores > conf_thres
274 |     if not idx.any():  # no bounding boxes or seg were created
275 |         return np.empty((0, 4), dtype=np.float32), np.empty(
276 |             (0, ), dtype=np.float32), np.empty((0, 0, 0), dtype=np.float32)
277 |     bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
278 |     xycenter, wh = np.split(bboxes, [
279 |         2,
280 |     ], -1)
281 |     cvbboxes = np.concatenate([xycenter - 0.5 * wh, wh], -1)
282 |     idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres)
283 |     cvbboxes, scores, kpts = cvbboxes[idx], scores[idx], kpts[idx]
284 |     cvbboxes[:, 2:] += cvbboxes[:, :2]
285 |     return cvbboxes, scores, kpts.reshape(idx.shape[0], -1, 3)
286 | 


--------------------------------------------------------------------------------
/srcs/tracker_trt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #!/usr/bin/python3
 3 | """
 4 | Created on 2021/5/24 13:46
 5 | @Author: Wang Cong
 6 | @Email : iwangcong@outlook.com
 7 | @Version : 0.1
 8 | @File : tracker_trt.py
 9 | """
10 | import cv2
11 | import numpy as np
12 | 
13 | from deep_sort.utils.parser import get_config
14 | from deep_sort.deep_sort import DeepSort
15 | 
16 | cfg = get_config()
17 | cfg.merge_from_file("./deep_sort/configs/deep_sort.yaml")
18 | deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
19 |                     max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
20 |                     nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
21 |                     max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
22 |                     use_cuda=True)
23 | 
24 | 
25 | def draw_bboxes(image, bboxes, line_thickness):
26 |     line_thickness = line_thickness or round(
27 |         0.002 * (image.shape[0] + image.shape[1]) / 2) + 1
28 | 
29 |     list_pts = []
30 |     point_radius = 4
31 | 
32 |     for (x1, y1, x2, y2, cls_id, pos_id) in bboxes:
33 |         color = (0, 255, 0)
34 | 
35 |         check_point_x = x1
36 |         check_point_y = int(y1 + ((y2 - y1) * 0.6))
37 | 
38 |         c1, c2 = (x1, y1), (x2, y2)
39 |         cv2.rectangle(image, c1, c2, color, thickness=line_thickness, lineType=cv2.LINE_AA)
40 | 
41 |         font_thickness = max(line_thickness - 1, 1)
42 |         t_size = cv2.getTextSize(cls_id, 0, fontScale=line_thickness / 3, thickness=font_thickness)[0]
43 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
44 |         cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA)  # filled
45 |         cv2.putText(image, '{} ID-{}'.format(cls_id, pos_id), (c1[0], c1[1] - 2), 0, line_thickness / 3,
46 |                     [225, 255, 255], thickness=font_thickness, lineType=cv2.LINE_AA)
47 | 
48 |         list_pts.append([check_point_x-point_radius, check_point_y-point_radius])
49 |         list_pts.append([check_point_x-point_radius, check_point_y+point_radius])
50 |         list_pts.append([check_point_x+point_radius, check_point_y+point_radius])
51 |         list_pts.append([check_point_x+point_radius, check_point_y-point_radius])
52 | 
53 |         ndarray_pts = np.array(list_pts, np.int32)
54 | 
55 |         cv2.fillPoly(image, [ndarray_pts], color=(0, 0, 255))
56 | 
57 |         list_pts.clear()
58 | 
59 |     return image
60 | 
61 | def clear():
62 |     deepsort.clear()
63 | def update(bboxes, image):
64 |     bbox_xywh = []
65 |     lbls = []
66 |     confs = []
67 |     bboxes2draw = []
68 | 
69 |     if len(bboxes) > 0:
70 |         for x1, y1, x2, y2, lbl, conf in bboxes:
71 |             obj = [
72 |                 int((x1 + x2) / 2), int((y1 + y2) / 2),
73 |                 x2 - x1, y2 - y1
74 |             ]
75 |             bbox_xywh.append(obj)
76 |             lbls.append(lbl)
77 |             confs.append(conf)
78 | 
79 |         xywhs = np.array(bbox_xywh)
80 |         confss = np.array(confs)
81 | 
82 |         outputs = deepsort.update(xywhs, lbls, confss, image)
83 | 
84 |         for value in list(outputs):
85 |             x1, y1, x2, y2, track_label, track_id = value
86 |             bboxes2draw.append((int(x1), int(y1), int(x2), int(y2), track_label, int(track_id)))
87 |         pass
88 |     pass
89 | 
90 |     return bboxes2draw
91 | 


--------------------------------------------------------------------------------
/srcs/yolov8_bytetrack_trt.py:
--------------------------------------------------------------------------------
  1 | from models import TRTModule
  2 | import argparse
  3 | from time import time
  4 | import cv2
  5 | from pathlib import Path
  6 | import torch
  7 | import ctypes
  8 | from bytetrack.byte_tracker import BYTETracker
  9 | 
 10 | from config import CLASSES, COLORS
 11 | from models.torch_utils import det_postprocess
 12 | from models.utils import blob, letterbox, path_to_list
 13 | from datetime import datetime, timedelta
 14 | import json
 15 | import numpy as np
 16 | import random
 17 | 
 18 | 
 19 | 
 20 | class ROI:
 21 |     def __init__(self, x1, y1, x2, y2, roi_id):
 22 |         self.x1 = x1
 23 |         self.y1 = y1
 24 |         self.x2 = x2
 25 |         self.y2 = y2
 26 |         self.roi_id = roi_id
 27 |         self.count = 0
 28 |         
 29 | 
 30 | DICT_ROIS = {}
 31 | DEBOUNCE_PERIOD = timedelta(seconds=2)
 32 | person_tracker = {}
 33 | debounce_tracker = {}
 34 | 
 35 | color_dict = {}
 36 | 
 37 | def get_random_color(id):
 38 |     if id not in color_dict:
 39 |         color_dict[id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
 40 |     return color_dict[id]
 41 | 
 42 | 
 43 | 
 44 | def main(args):
 45 |     args_bytetrack = argparse.Namespace()
 46 |     args_bytetrack.track_thresh = 0.2
 47 |     args_bytetrack.track_buffer = 200
 48 |     args_bytetrack.mot20 = True
 49 |     args_bytetrack.match_thresh = 0.7
 50 | 
 51 |     tracker = BYTETracker(args_bytetrack)
 52 |     device = torch.device(args.device)
 53 |     Engine = TRTModule(args.engine, device)
 54 |     H, W = Engine.inp_info[0].shape[-2:]
 55 | 
 56 |     Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])
 57 | 
 58 |     fps = 0
 59 |     # input video
 60 |     cap = cv2.VideoCapture(args.vid)
 61 |     # input webcam
 62 |     # cap = cv2.VideoCapture(0)
 63 |     
 64 |     video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 65 |     video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 66 |     out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (video_width,video_height))
 67 |     while(True):
 68 |         ret, frame = cap.read()
 69 |         
 70 |         if frame is None:
 71 |             print('No image input!')
 72 |             continue
 73 |         
 74 |         start = float(time())
 75 |         fps_str = "FPS:"
 76 |         fps_str += "{:.2f}".format(fps)
 77 |         bgr = frame
 78 |         bgr, ratio, dwdh = letterbox(bgr, (W, H))
 79 |         rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
 80 |         
 81 |         tensor = blob(rgb, return_seg=False)
 82 |         
 83 |         dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
 84 |         
 85 |         tensor = torch.asarray(tensor, device=device)
 86 |         
 87 |         data = Engine(tensor)
 88 |         bboxes, scores, labels = det_postprocess(data)
 89 |         # print(labels)
 90 |         
 91 |         if bboxes.numel() == 0:
 92 |             continue
 93 |         
 94 |         bboxes -= dwdh
 95 |         bboxes /= ratio
 96 |         output = []
 97 |         for (bbox, score, label) in zip(bboxes, scores, labels):
 98 |             if label == 0 and score.item() > 0.2:
 99 |                 bbox = bbox.round().int().tolist()
100 |                 cls_id = int(label)
101 |                 cls = CLASSES[cls_id]
102 |                 # x1, y1, x2, y2, conf
103 |                 output.append([bbox[0], bbox[1], bbox[2], bbox[3], score.item()])
104 |         output = np.array(output)
105 |                 
106 |         info_imgs = frame.shape[:2]
107 |         img_size = info_imgs
108 |         
109 |         if output != []:
110 |             online_targets = tracker.update(output, info_imgs, img_size)
111 |             online_tlwhs = []
112 |             online_ids = []
113 |             online_scores = []
114 |             for t in online_targets:
115 |                 tlwh = t.tlwh
116 |                 tid = t.track_id
117 |                 online_tlwhs.append(tlwh)
118 |                 online_ids.append(tid)
119 |                 online_scores.append(t.score)
120 |                 
121 |                 if args.show:
122 |                     cv2.rectangle(frame, (int(tlwh[0]), int(tlwh[1])), (int(tlwh[0] + tlwh[2]), int(tlwh[1] + tlwh[3])), get_random_color(tid), 2)
123 |                     cv2.putText(frame, str(tid), (int(tlwh[0]), int(tlwh[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
124 |             
125 |         end = float(time())
126 |         
127 |   
128 |         
129 | 
130 |     
131 |         fps = 1/(end - start)
132 |         print(fps_str)
133 |         cv2.putText(frame, "YOLOV8-BYTETrack", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
134 |         cv2.putText(frame, fps_str, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
135 |         if args.show:
136 |             cv2.imshow("output", frame)
137 |             if cv2.waitKey(1) & 0xFF == ord('q'):
138 |                 break        
139 |         out.write(frame)
140 | 
141 |     cap.release()
142 |     cv2.destroyAllWindows()
143 |     # tracker_trt.clear()
144 | 
145 | 
146 | def parse_args():
147 |     parser = argparse.ArgumentParser()
148 |     parser.add_argument('--engine', type=str, help='Engine file', default='../models/engine/yolov8n.engine')
149 |     parser.add_argument('--vid', type=str, help='Video file', default='../sample_video/sample_2.mp4')
150 |     parser.add_argument('--show',
151 |                         action='store_true',
152 |                         help='Show the results')
153 |     parser.add_argument('--device',
154 |                         type=str,
155 |                         default='cuda:0',
156 |                         help='TensorRT infer device')
157 |     args = parser.parse_args()
158 |     return args
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     args = parse_args()
163 |     main(args)
164 | 
165 | 


--------------------------------------------------------------------------------
/srcs/yolov8_deepsort_trt.py:
--------------------------------------------------------------------------------
  1 | from models import TRTModule
  2 | import argparse
  3 | from time import time
  4 | import cv2
  5 | from pathlib import Path
  6 | import torch
  7 | import ctypes
  8 | import tracker_trt
  9 | 
 10 | 
 11 | from config import CLASSES, COLORS
 12 | from models.torch_utils import det_postprocess
 13 | from models.utils import blob, letterbox, path_to_list
 14 | from datetime import datetime, timedelta
 15 | import json
 16 | import random
 17 | 
 18 | 
 19 | 
 20 | class ROI:
 21 |     def __init__(self, x1, y1, x2, y2, roi_id):
 22 |         self.x1 = x1
 23 |         self.y1 = y1
 24 |         self.x2 = x2
 25 |         self.y2 = y2
 26 |         self.roi_id = roi_id
 27 |         self.count = 0
 28 |         
 29 | 
 30 | DICT_ROIS = {}
 31 | DEBOUNCE_PERIOD = timedelta(seconds=2)
 32 | person_tracker = {}
 33 | debounce_tracker = {}
 34 | 
 35 | 
 36 | color_dict = {}
 37 | 
 38 | def get_random_color(id):
 39 |     if id not in color_dict:
 40 |         color_dict[id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
 41 |     return color_dict[id]
 42 | 
 43 | 
 44 | def main(args):
 45 |     
 46 |     
 47 |     device = torch.device(args.device)
 48 |     Engine = TRTModule(args.engine, device)
 49 |     H, W = Engine.inp_info[0].shape[-2:]
 50 | 
 51 |     Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])
 52 | 
 53 |     fps = 0
 54 |     # input video
 55 |     cap = cv2.VideoCapture(args.vid)
 56 |     # input webcam
 57 |     # cap = cv2.VideoCapture(0)
 58 |     
 59 |     video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 60 |     video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 61 |     out = cv2.VideoWriter('output_ds.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (video_width,video_height))
 62 |     while(True):
 63 |         ret, frame = cap.read()
 64 |         
 65 |         if frame is None:
 66 |             print('No image input!')
 67 |             break
 68 |         
 69 |         start = float(time())
 70 |         fps_str = "FPS:"
 71 |         fps_str += "{:.2f}".format(fps)
 72 |         bgr = frame
 73 |         bgr, ratio, dwdh = letterbox(bgr, (W, H))
 74 |         rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
 75 |         
 76 |         tensor = blob(rgb, return_seg=False)
 77 |         
 78 |         dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
 79 |         
 80 |         tensor = torch.asarray(tensor, device=device)
 81 |         
 82 |         data = Engine(tensor)
 83 |         bboxes, scores, labels = det_postprocess(data)
 84 |         # print(labels)
 85 |         
 86 |         if bboxes.numel() == 0:
 87 |             continue
 88 |         
 89 |         bboxes -= dwdh
 90 |         bboxes /= ratio
 91 |         detections = []
 92 |         for (bbox, score, label) in zip(bboxes, scores, labels):
 93 |             if label == 0 and score.item() > 0.3:
 94 |                 bbox = bbox.round().int().tolist()
 95 |                 cls_id = int(label)
 96 |                 cls = CLASSES[cls_id]
 97 |                 detections.append((bbox[0], bbox[1], bbox[2] , bbox[3], cls, score.item()))
 98 |         end = float(time())
 99 |         
100 |         list_bbox = tracker_trt.update(detections,frame)
101 |         for (x1, y1, x2, y2, cls, track_id) in list_bbox:
102 |             color = [0, 255, 0]
103 |                 
104 |             if args.show:
105 |                 # frame = draw_roi(frame)
106 |                 cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
107 |                 cv2.putText(frame, f'{cls} {track_id}', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
108 |         
109 | 
110 |     
111 |         fps = 1/(end - start)
112 |         print(fps_str)
113 |          
114 |         cv2.putText(frame, fps_str, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
115 |         cv2.putText(frame, "YOLOV8-DEEP SORT", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
116 |         if args.show:
117 |             cv2.imshow("output", frame)
118 |             if cv2.waitKey(1) & 0xFF == ord('q'):
119 |                 break        
120 |         out.write(frame)
121 |         
122 |     out.release()
123 |     cap.release()
124 |     cv2.destroyAllWindows()
125 |     # tracker_trt.clear()
126 | 
127 | 
128 | def parse_args():
129 |     parser = argparse.ArgumentParser()
130 |     parser.add_argument('--engine', type=str, help='Engine file', default='../models/engine/yolov8n.engine')
131 |     parser.add_argument('--vid', type=str, help='Video file', default='../sample_video/sample.mp4')
132 |     parser.add_argument('--show',
133 |                         action='store_true',
134 |                         help='Show the results')
135 |     parser.add_argument('--device',
136 |                         type=str,
137 |                         default='cuda:0',
138 |                         help='TensorRT infer device')
139 |     args = parser.parse_args()
140 |     return args
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     args = parse_args()
145 |     main(args)
146 | 
147 | 


--------------------------------------------------------------------------------