├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── assets
└── sample_yolov8_bytetrack.gif
├── build_opencv.sh
├── models
├── engine
│ └── .gitkeep
├── onnx
│ ├── .gitkeep
│ ├── deepsort.onnx
│ └── yolov8n.onnx
└── to_export
│ └── .gitkeep
├── sample_video
├── .gitkeep
└── sample_1.webm
└── srcs
├── bytetrack
├── basetrack.py
├── byte_tracker.py
├── kalman_filter.py
└── matching.py
├── config.py
├── deep_sort
├── __init__.py
├── configs
│ └── deep_sort.yaml
├── deep_sort
│ ├── __init__.py
│ ├── deep
│ │ ├── __init__.py
│ │ ├── checkpoint
│ │ │ └── .gitkeep
│ │ └── feature_extractor_trt.py
│ ├── deep_sort_trt.py
│ └── sort
│ │ ├── __init__.py
│ │ ├── detection.py
│ │ ├── iou_matching.py
│ │ ├── kalman_filter.py
│ │ ├── linear_assignment.py
│ │ ├── nn_matching.py
│ │ ├── preprocessing.py
│ │ ├── track.py
│ │ └── tracker.py
└── utils
│ ├── __init__.py
│ ├── asserts.py
│ ├── draw.py
│ ├── evaluation.py
│ ├── io.py
│ ├── json_logger.py
│ ├── log.py
│ ├── parser.py
│ └── tools.py
├── models
├── __init__.py
├── api.py
├── common.py
├── cudart_api.py
├── engine.py
├── pycuda_api.py
├── torch_utils.py
└── utils.py
├── tracker_trt.py
├── yolov8_bytetrack_trt.py
└── yolov8_deepsort_trt.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 |
163 |
164 | *.pt
165 | *.pth
166 | *.engine
167 | *.pkl
168 | *.h5
169 | *.npy
170 | *.npz
171 | opencv_build
172 | *.mp4
173 | *.avi
174 | tracking_bytetrack_output.txt
175 | run_bt.py
176 | *.llc
177 | *.webm
178 |
179 | .idea
180 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "refs/YOLOv8-TensorRT"]
2 | path = refs/YOLOv8-TensorRT
3 | url = https://github.com/triple-Mu/YOLOv8-TensorRT
4 | [submodule "refs/deepsort_tensorrt"]
5 | path = refs/deepsort_tensorrt
6 | url = https://github.com/GesilaA/deepsort_tensorrt
7 | [submodule "refs/opencv"]
8 | path = refs/opencv
9 | url = https://github.com/opencv/opencv.git
10 | [submodule "refs/opencv_contrib"]
11 | path = refs/opencv_contrib
12 | url = https://github.com/opencv/opencv_contrib.git
13 | [submodule "refs/ByteTrack"]
14 | path = refs/ByteTrack
15 | url = https://github.com/ifzhang/ByteTrack.git
16 | [submodule "refs/BoostTrack"]
17 | path = refs/BoostTrack
18 | url = https://github.com/vukasin-stanojevic/BoostTrack
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Bang Nguyen Anh
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # YOLO Object Tracking TensorRT
4 |
5 |
6 |
7 |
8 | Using OpenCV to capture video from camera or video file, then use **YOLOv8 TensorRT** to detect objects and **DeepSORT TensorRT** or **BYTETrack** to track objects.
9 |
10 | Support for both **NVIDIA dGPU** and **Jetson** devices.
11 |
12 | ## Demo
13 |
14 | ### OpenCV + YOLOv8 + BYTETrack on NVIDA Geforce GTX 1660Ti
15 | 
16 |
17 |
18 |
19 |
20 | ## Performance
21 |
22 | ### Both OpenCV YOLOv8 and DeepSORT TensorRT
23 | Using OpenCV to capture video from camera or video file, then use YOLOv8 TensorRT to detect objects and DeepSORT TensorRT to track objects.
24 |
25 | | Model | Device | FPS |
26 | | --- | --- | --- |
27 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA dGPU GTX 1660Ti 6Gb| ~ |
28 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA Jetson Xavier NX 8Gb | ~ |
29 | | OpenCV + YOLOv8n + DeepSORT | NVIDIA Jetson Orin Nano 8Gb | ~34 |
30 |
31 | ### YOLOv8 TensorRT model
32 |
33 | Test speed of YOLOv8 TensorRT model using `trtexec` from TensorRT
34 |
35 | `/usr/src/tensorrt/bin/trtexec` on NVIDIA Jetson
36 |
37 | > batch size = 1
38 |
39 | | Model | Device | Throughput (qps) | Latency(ms) |
40 | | --- | --- | --- | --- |
41 | | `yolov8n.engine` | NVIDIA dGPU GTX 1660Ti 6Gb| ~419.742 | ~2.91736 |
42 | | `yolov8n.engine` | NVIDIA Jetson Xavier NX 8Gb | ~ | ~ |
43 | | `yolov8n.engine` | NVIDIA Jetson Orin Nano 8Gb | ~137.469 | ~137.469 |
44 |
45 | ### DeepSORT TensorRT model
46 |
47 | Test speed of DeepSORT TensorRT model using `trtexec` from TensorRT
48 |
49 | `/usr/src/tensorrt/bin/trtexec` on NVIDIA Jetson
50 |
51 | > batch size = 1
52 |
53 | | Model | Device | Throughput (qps) | Latency(ms) |
54 | | --- | --- | --- | --- |
55 | | `deepsort.engine` | NVIDIA dGPU GTX 1660Ti 6Gb| ~614.738 | ~1.52197 |
56 | | `deepsort.engine` | NVIDIA Jetson Xavier NX 8Gb | ~ | ~ |
57 | | `deepsort.engine` | NVIDIA Jetson Orin Nano 8Gb | ~546.135 | ~1.82227 |
58 |
59 | ## For NVIDIA dGPU
60 |
61 | ### Environment
62 |
63 | - NVIDIA CUDA: 11.4
64 | - NVIDIA TensorRT: 8.5.2
65 |
66 |
67 | #### Clone repository
68 |
69 | Clone repository and submodules
70 |
71 | ```bash
72 | git clone --recurse-submodules https://github.com/nabang1010/YOLOv8_DeepSORT_TensorRT.git
73 | ```
74 |
75 | #### Prepare enviroment
76 |
77 | Create new enviroment
78 |
79 | ```bash
80 | conda create -n yolov8_ds python=3.8
81 | ```
82 |
83 | Activate enviroment
84 |
85 | ```bash
86 | conda activate yolov8_ds
87 | ```
88 |
89 | ### Prepare models
90 |
91 | Go to **`refs/YOLOv8-TensorRT`** and install requirements for exporting models
92 |
93 | ```bash
94 | cd refs/YOLOv8-TensorRT
95 | pip3 install -r requirements.txt
96 | pip3 install tensorrt easydict pycuda lap cython_bbox
97 | ```
98 | Install `python3-libnvinfer`
99 |
100 | ```bash
101 | sudo apt-get install python3-libnvinfer
102 | ```
103 |
104 | Download YOLOv8 weights from [ultralytics](https://github.com/ultralytics/ultralytics) here: [yolov8n.pt](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt) and save in folder **`models/to_export`**
105 |
106 | **Export YOLOv8 ONNX model**
107 |
108 | In **`refs/YOLOv8-TensorRT`** run the following command to export YOLOv8 ONNX model
109 |
110 | ```bash
111 | python3 export-det.py \
112 | --weights ../../models/to_export/yolov8n.pt \
113 | --iou-thres 0.65 \
114 | --conf-thres 0.25 \
115 | --topk 100 \
116 | --opset 11 \
117 | --sim \
118 | --input-shape 1 3 640 640 \
119 | --device cuda:0
120 | ```
121 |
122 | The output `.onnx` model will be saved in **`models/to_export`** folder, move the model to **`models/onnx`** folder
123 | ```bash
124 | mv ../../models/to_export/yolov8n.onnx ../../models/onnx/yolov8n.onnx
125 | ```
126 | **Export YOLOv8 TensorRT model**
127 |
128 | In **`refs/YOLOv8-TensorRT`** run the following command to export YOLOv8 TensorRT model
129 |
130 | ```bash
131 | python3 build.py \
132 | --weights ../../models/onnx/yolov8n.onnx \
133 | --iou-thres 0.65 \
134 | --conf-thres 0.25 \
135 | --topk 100 \
136 | --fp16 \
137 | --device cuda:0
138 | ```
139 | The output `.engine` model will be saved in **`models/onnx`** folder, move the model to **`models/trt`** folder
140 |
141 | ```bash
142 | mv ../../models/onnx/yolov8n.engine ../../models/engine/yolov8n.engine
143 | ```
144 |
145 | **Build OpenCV**
146 |
147 | ```bash
148 | bash build_opencv.sh
149 | ```
150 |
151 | **Export DeepSORT TensorRT model *(if use BYTETrack, ignore this step)***
152 |
153 |
154 | Install `libeigen3-dev`
155 | ```bash
156 | apt-get install libeigen3-dev
157 | ```
158 | Go to **`refs/deepsort_tensorrt`** and run the following command to build `onnx2engine`
159 |
160 | ```bash
161 | cd refs/deepsort_tensorrt
162 | mkdir build
163 | cd build
164 | cmake ..
165 | make -j$(nproc)
166 |
167 | ```
168 |
169 | > If catch error `fatal error: Eigen/Core: No such file or directory`, replace `#include ` with `#include ` in all files of this repo (`datatype.h`, `kalmanfilter.cpp`) and rebuild again.
170 |
171 | > If catch error `error: looser exception specification on overriding virtual function 'virtual void Logger::log(nvinfer1::ILogger::Severity` add `noexcept` before `override` in `logger.h` line 239 and rebuild again.
172 |
173 | Run following command to export DeepSORT TensorRT model
174 |
175 | ```bash
176 | ./build/onnx2engine ../../models/onnx/deepsort.onnx ../../models/engine/deepsort.engine
177 | ```
178 | ### Run script
179 |
180 | **Go to `src` folder**
181 |
182 | ```bash
183 | cd src
184 | ```
185 |
186 | **Run YOLOv8 + DeepSORT**
187 |
188 | ```bash
189 | python3 yolov8_deepsort_trt.py --show
190 |
191 | ```
192 | **Run YOLOv8 + BYTETrack**
193 |
194 | ```bash
195 | python3 yolov8_bytetrack_trt.py --show
196 |
197 | ```
198 |
199 | ## For NVIDIA Jetson Device
200 |
201 | ***Coming soon***
202 |
203 |
204 | ---
205 |
206 | # References
207 |
208 | - [ultralytics](https://github.com/ultralytics/ultralytics)
209 | - [YOLOv8-TensorRT](https://github.com/triple-Mu/YOLOv8-TensorRT)
210 | - [deepsort_tensorrt](https://github.com/GesilaA/deepsort_tensorrt)
211 | - [yolov5_deepsort_tensorrt](https://github.com/cong/yolov5_deepsort_tensorrt)
212 | - [ByteTrack](https://github.com/ifzhang/ByteTrack)
213 |
214 | # Star History
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
--------------------------------------------------------------------------------
/assets/sample_yolov8_bytetrack.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/assets/sample_yolov8_bytetrack.gif
--------------------------------------------------------------------------------
/build_opencv.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | cd refs/opencv
4 | sed -i 's/include /include /g' modules/core/include/opencv2/core/private.hpp
5 | rm -rf build
6 | mkdir build && cd build
7 | apt install -y build-essential cmake git pkg-config libgtk-3-dev \
8 | libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
9 | libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
10 | gfortran openexr libatlas-base-dev python3-dev python3-numpy \
11 | libtbb2 libtbb-dev libdc1394-22-dev
12 | cmake -D CMAKE_BUILD_TYPE=RELEASE \
13 | -D CMAKE_INSTALL_PREFIX=/usr/local \
14 | -D EIGEN_INCLUDE_PATH=/usr/include/eigen3 \
15 | -D ENABLE_FAST_MATH=1 \
16 | -D CUDA_FAST_MATH=1 \
17 | -D WITH_CUBLAS=1 \
18 | -D OPENCV_GENERATE_PKGCONFIG=ON \
19 | -D OPENCV_EXTRA_MODULES_PATH= ../opencv_contrib/modules \
20 | -D WITH_GSTREAMER=ON \
21 | -D WITH_V4L=ON \
22 | -D WITH_LIBV4L=ON \
23 | -D BUILD_opencv_python2=ON \
24 | -D BUILD_opencv_python3=ON \
25 | ../
26 | # -D WITH_CUDA=ON \
27 | make -j$(nproc)
28 | make install
29 | ldconfig -v
30 |
31 |
--------------------------------------------------------------------------------
/models/engine/.gitkeep:
--------------------------------------------------------------------------------
1 | Store NVIDIA TensorRT Engine models
--------------------------------------------------------------------------------
/models/onnx/.gitkeep:
--------------------------------------------------------------------------------
1 | Store ONNX models to export
--------------------------------------------------------------------------------
/models/onnx/deepsort.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/models/onnx/deepsort.onnx
--------------------------------------------------------------------------------
/models/onnx/yolov8n.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/models/onnx/yolov8n.onnx
--------------------------------------------------------------------------------
/models/to_export/.gitkeep:
--------------------------------------------------------------------------------
1 | Store models to export
--------------------------------------------------------------------------------
/sample_video/.gitkeep:
--------------------------------------------------------------------------------
1 | Store sampe video
--------------------------------------------------------------------------------
/sample_video/sample_1.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/sample_video/sample_1.webm
--------------------------------------------------------------------------------
/srcs/bytetrack/basetrack.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import OrderedDict
3 |
4 |
5 | class TrackState(object):
6 | New = 0
7 | Tracked = 1
8 | Lost = 2
9 | Removed = 3
10 |
11 |
12 | class BaseTrack(object):
13 | _count = 0
14 |
15 | track_id = 0
16 | is_activated = False
17 | state = TrackState.New
18 |
19 | history = OrderedDict()
20 | features = []
21 | curr_feature = None
22 | score = 0
23 | start_frame = 0
24 | frame_id = 0
25 | time_since_update = 0
26 |
27 | # multi-camera
28 | location = (np.inf, np.inf)
29 |
30 | @property
31 | def end_frame(self):
32 | return self.frame_id
33 |
34 | @staticmethod
35 | def next_id():
36 | BaseTrack._count += 1
37 | return BaseTrack._count
38 |
39 | def activate(self, *args):
40 | raise NotImplementedError
41 |
42 | def predict(self):
43 | raise NotImplementedError
44 |
45 | def update(self, *args, **kwargs):
46 | raise NotImplementedError
47 |
48 | def mark_lost(self):
49 | self.state = TrackState.Lost
50 |
51 | def mark_removed(self):
52 | self.state = TrackState.Removed
--------------------------------------------------------------------------------
/srcs/bytetrack/byte_tracker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import deque
3 | import os
4 | import os.path as osp
5 | import copy
6 | import torch
7 | import torch.nn.functional as F
8 |
9 | from .kalman_filter import KalmanFilter
10 | from .matching import iou_distance, fuse_score, linear_assignment
11 | from .basetrack import BaseTrack, TrackState
12 |
13 | class STrack(BaseTrack):
14 | shared_kalman = KalmanFilter()
15 | def __init__(self, tlwh, score):
16 |
17 | # wait activate
18 | self._tlwh = np.asarray(tlwh, dtype=np.float)
19 | self.kalman_filter = None
20 | self.mean, self.covariance = None, None
21 | self.is_activated = False
22 |
23 | self.score = score
24 | self.tracklet_len = 0
25 |
26 | def predict(self):
27 | mean_state = self.mean.copy()
28 | if self.state != TrackState.Tracked:
29 | mean_state[7] = 0
30 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
31 |
32 | @staticmethod
33 | def multi_predict(stracks):
34 | if len(stracks) > 0:
35 | multi_mean = np.asarray([st.mean.copy() for st in stracks])
36 | multi_covariance = np.asarray([st.covariance for st in stracks])
37 | for i, st in enumerate(stracks):
38 | if st.state != TrackState.Tracked:
39 | multi_mean[i][7] = 0
40 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
41 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
42 | stracks[i].mean = mean
43 | stracks[i].covariance = cov
44 |
45 | def activate(self, kalman_filter, frame_id):
46 | """Start a new tracklet"""
47 | self.kalman_filter = kalman_filter
48 | self.track_id = self.next_id()
49 | self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
50 |
51 | self.tracklet_len = 0
52 | self.state = TrackState.Tracked
53 | if frame_id == 1:
54 | self.is_activated = True
55 | # self.is_activated = True
56 | self.frame_id = frame_id
57 | self.start_frame = frame_id
58 |
59 | def re_activate(self, new_track, frame_id, new_id=False):
60 | self.mean, self.covariance = self.kalman_filter.update(
61 | self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
62 | )
63 | self.tracklet_len = 0
64 | self.state = TrackState.Tracked
65 | self.is_activated = True
66 | self.frame_id = frame_id
67 | if new_id:
68 | self.track_id = self.next_id()
69 | self.score = new_track.score
70 |
71 | def update(self, new_track, frame_id):
72 | """
73 | Update a matched track
74 | :type new_track: STrack
75 | :type frame_id: int
76 | :type update_feature: bool
77 | :return:
78 | """
79 | self.frame_id = frame_id
80 | self.tracklet_len += 1
81 |
82 | new_tlwh = new_track.tlwh
83 | self.mean, self.covariance = self.kalman_filter.update(
84 | self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
85 | self.state = TrackState.Tracked
86 | self.is_activated = True
87 |
88 | self.score = new_track.score
89 |
90 | @property
91 | # @jit(nopython=True)
92 | def tlwh(self):
93 | """Get current position in bounding box format `(top left x, top left y,
94 | width, height)`.
95 | """
96 | if self.mean is None:
97 | return self._tlwh.copy()
98 | ret = self.mean[:4].copy()
99 | ret[2] *= ret[3]
100 | ret[:2] -= ret[2:] / 2
101 | return ret
102 |
103 | @property
104 | # @jit(nopython=True)
105 | def tlbr(self):
106 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
107 | `(top left, bottom right)`.
108 | """
109 | ret = self.tlwh.copy()
110 | ret[2:] += ret[:2]
111 | return ret
112 |
113 | @staticmethod
114 | # @jit(nopython=True)
115 | def tlwh_to_xyah(tlwh):
116 | """Convert bounding box to format `(center x, center y, aspect ratio,
117 | height)`, where the aspect ratio is `width / height`.
118 | """
119 | ret = np.asarray(tlwh).copy()
120 | ret[:2] += ret[2:] / 2
121 | ret[2] /= ret[3]
122 | return ret
123 |
124 | def to_xyah(self):
125 | return self.tlwh_to_xyah(self.tlwh)
126 |
127 | @staticmethod
128 | # @jit(nopython=True)
129 | def tlbr_to_tlwh(tlbr):
130 | ret = np.asarray(tlbr).copy()
131 | ret[2:] -= ret[:2]
132 | return ret
133 |
134 | @staticmethod
135 | # @jit(nopython=True)
136 | def tlwh_to_tlbr(tlwh):
137 | ret = np.asarray(tlwh).copy()
138 | ret[2:] += ret[:2]
139 | return ret
140 |
141 | def __repr__(self):
142 | return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
143 |
144 |
145 | class BYTETracker(object):
146 | def __init__(self, args, frame_rate=30):
147 | self.tracked_stracks = [] # type: list[STrack]
148 | self.lost_stracks = [] # type: list[STrack]
149 | self.removed_stracks = [] # type: list[STrack]
150 |
151 | self.frame_id = 0
152 | self.args = args
153 | #self.det_thresh = args.track_thresh
154 | self.det_thresh = args.track_thresh + 0.1
155 | self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
156 | self.max_time_lost = self.buffer_size
157 | self.kalman_filter = KalmanFilter()
158 |
159 | def update(self, output_results, img_info, img_size):
160 | self.frame_id += 1
161 | activated_starcks = []
162 | refind_stracks = []
163 | lost_stracks = []
164 | removed_stracks = []
165 |
166 | if output_results.shape[1] == 5:
167 | scores = output_results[:, 4]
168 | bboxes = output_results[:, :4]
169 | else:
170 | output_results = output_results.cpu().numpy()
171 | scores = output_results[:, 4] * output_results[:, 5]
172 | bboxes = output_results[:, :4] # x1y1x2y2
173 | img_h, img_w = img_info[0], img_info[1]
174 | scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w))
175 | bboxes /= scale
176 |
177 | remain_inds = scores > self.args.track_thresh
178 | inds_low = scores > 0.1
179 | inds_high = scores < self.args.track_thresh
180 |
181 | inds_second = np.logical_and(inds_low, inds_high)
182 | dets_second = bboxes[inds_second]
183 | dets = bboxes[remain_inds]
184 | scores_keep = scores[remain_inds]
185 | scores_second = scores[inds_second]
186 |
187 | if len(dets) > 0:
188 | '''Detections'''
189 | detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
190 | (tlbr, s) in zip(dets, scores_keep)]
191 | else:
192 | detections = []
193 |
194 | ''' Add newly detected tracklets to tracked_stracks'''
195 | unconfirmed = []
196 | tracked_stracks = [] # type: list[STrack]
197 | for track in self.tracked_stracks:
198 | if not track.is_activated:
199 | unconfirmed.append(track)
200 | else:
201 | tracked_stracks.append(track)
202 |
203 | ''' Step 2: First association, with high score detection boxes'''
204 | strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
205 | # Predict the current location with KF
206 | STrack.multi_predict(strack_pool)
207 | dists = iou_distance(strack_pool, detections)
208 | if not self.args.mot20:
209 | dists = fuse_score(dists, detections)
210 | matches, u_track, u_detection = linear_assignment(dists, thresh=self.args.match_thresh)
211 |
212 | for itracked, idet in matches:
213 | track = strack_pool[itracked]
214 | det = detections[idet]
215 | if track.state == TrackState.Tracked:
216 | track.update(detections[idet], self.frame_id)
217 | activated_starcks.append(track)
218 | else:
219 | track.re_activate(det, self.frame_id, new_id=False)
220 | refind_stracks.append(track)
221 |
222 | ''' Step 3: Second association, with low score detection boxes'''
223 | # association the untrack to the low score detections
224 | if len(dets_second) > 0:
225 | '''Detections'''
226 | detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
227 | (tlbr, s) in zip(dets_second, scores_second)]
228 | else:
229 | detections_second = []
230 | r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
231 | dists = iou_distance(r_tracked_stracks, detections_second)
232 | matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
233 | for itracked, idet in matches:
234 | track = r_tracked_stracks[itracked]
235 | det = detections_second[idet]
236 | if track.state == TrackState.Tracked:
237 | track.update(det, self.frame_id)
238 | activated_starcks.append(track)
239 | else:
240 | track.re_activate(det, self.frame_id, new_id=False)
241 | refind_stracks.append(track)
242 |
243 | for it in u_track:
244 | track = r_tracked_stracks[it]
245 | if not track.state == TrackState.Lost:
246 | track.mark_lost()
247 | lost_stracks.append(track)
248 |
249 | '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
250 | detections = [detections[i] for i in u_detection]
251 | dists = iou_distance(unconfirmed, detections)
252 | if not self.args.mot20:
253 | dists = fuse_score(dists, detections)
254 | matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
255 | for itracked, idet in matches:
256 | unconfirmed[itracked].update(detections[idet], self.frame_id)
257 | activated_starcks.append(unconfirmed[itracked])
258 | for it in u_unconfirmed:
259 | track = unconfirmed[it]
260 | track.mark_removed()
261 | removed_stracks.append(track)
262 |
263 | """ Step 4: Init new stracks"""
264 | for inew in u_detection:
265 | track = detections[inew]
266 | if track.score < self.det_thresh:
267 | continue
268 | track.activate(self.kalman_filter, self.frame_id)
269 | activated_starcks.append(track)
270 | """ Step 5: Update state"""
271 | for track in self.lost_stracks:
272 | if self.frame_id - track.end_frame > self.max_time_lost:
273 | track.mark_removed()
274 | removed_stracks.append(track)
275 |
276 | # print('Ramained match {} s'.format(t4-t3))
277 |
278 | self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
279 | self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
280 | self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
281 | self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
282 | self.lost_stracks.extend(lost_stracks)
283 | self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
284 | self.removed_stracks.extend(removed_stracks)
285 | self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
286 | # get scores of lost tracks
287 | output_stracks = [track for track in self.tracked_stracks if track.is_activated]
288 |
289 | return output_stracks
290 |
291 |
292 | def joint_stracks(tlista, tlistb):
293 | exists = {}
294 | res = []
295 | for t in tlista:
296 | exists[t.track_id] = 1
297 | res.append(t)
298 | for t in tlistb:
299 | tid = t.track_id
300 | if not exists.get(tid, 0):
301 | exists[tid] = 1
302 | res.append(t)
303 | return res
304 |
305 |
306 | def sub_stracks(tlista, tlistb):
307 | stracks = {}
308 | for t in tlista:
309 | stracks[t.track_id] = t
310 | for t in tlistb:
311 | tid = t.track_id
312 | if stracks.get(tid, 0):
313 | del stracks[tid]
314 | return list(stracks.values())
315 |
316 |
317 | def remove_duplicate_stracks(stracksa, stracksb):
318 | pdist = iou_distance(stracksa, stracksb)
319 | pairs = np.where(pdist < 0.15)
320 | dupa, dupb = list(), list()
321 | for p, q in zip(*pairs):
322 | timep = stracksa[p].frame_id - stracksa[p].start_frame
323 | timeq = stracksb[q].frame_id - stracksb[q].start_frame
324 | if timep > timeq:
325 | dupb.append(q)
326 | else:
327 | dupa.append(p)
328 | resa = [t for i, t in enumerate(stracksa) if not i in dupa]
329 | resb = [t for i, t in enumerate(stracksb) if not i in dupb]
330 | return resa, resb
--------------------------------------------------------------------------------
/srcs/bytetrack/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 |
6 | """
7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9 | function and used as Mahalanobis gating threshold.
10 | """
11 | chi2inv95 = {
12 | 1: 3.8415,
13 | 2: 5.9915,
14 | 3: 7.8147,
15 | 4: 9.4877,
16 | 5: 11.070,
17 | 6: 12.592,
18 | 7: 14.067,
19 | 8: 15.507,
20 | 9: 16.919}
21 |
22 |
23 | class KalmanFilter(object):
24 | """
25 | A simple Kalman filter for tracking bounding boxes in image space.
26 |
27 | The 8-dimensional state space
28 |
29 | x, y, a, h, vx, vy, va, vh
30 |
31 | contains the bounding box center position (x, y), aspect ratio a, height h,
32 | and their respective velocities.
33 |
34 | Object motion follows a constant velocity model. The bounding box location
35 | (x, y, a, h) is taken as direct observation of the state space (linear
36 | observation model).
37 |
38 | """
39 |
40 | def __init__(self):
41 | ndim, dt = 4, 1.
42 |
43 | # Create Kalman filter model matrices.
44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45 | for i in range(ndim):
46 | self._motion_mat[i, ndim + i] = dt
47 | self._update_mat = np.eye(ndim, 2 * ndim)
48 |
49 | # Motion and observation uncertainty are chosen relative to the current
50 | # state estimate. These weights control the amount of uncertainty in
51 | # the model. This is a bit hacky.
52 | self._std_weight_position = 1. / 20
53 | self._std_weight_velocity = 1. / 160
54 |
55 | def initiate(self, measurement):
56 | """Create track from unassociated measurement.
57 |
58 | Parameters
59 | ----------
60 | measurement : ndarray
61 | Bounding box coordinates (x, y, a, h) with center position (x, y),
62 | aspect ratio a, and height h.
63 |
64 | Returns
65 | -------
66 | (ndarray, ndarray)
67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
68 | dimensional) of the new track. Unobserved velocities are initialized
69 | to 0 mean.
70 |
71 | """
72 | mean_pos = measurement
73 | mean_vel = np.zeros_like(mean_pos)
74 | mean = np.r_[mean_pos, mean_vel]
75 |
76 | std = [
77 | 2 * self._std_weight_position * measurement[3],
78 | 2 * self._std_weight_position * measurement[3],
79 | 1e-2,
80 | 2 * self._std_weight_position * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 10 * self._std_weight_velocity * measurement[3],
83 | 1e-5,
84 | 10 * self._std_weight_velocity * measurement[3]]
85 | covariance = np.diag(np.square(std))
86 | return mean, covariance
87 |
88 | def predict(self, mean, covariance):
89 | """Run Kalman filter prediction step.
90 |
91 | Parameters
92 | ----------
93 | mean : ndarray
94 | The 8 dimensional mean vector of the object state at the previous
95 | time step.
96 | covariance : ndarray
97 | The 8x8 dimensional covariance matrix of the object state at the
98 | previous time step.
99 |
100 | Returns
101 | -------
102 | (ndarray, ndarray)
103 | Returns the mean vector and covariance matrix of the predicted
104 | state. Unobserved velocities are initialized to 0 mean.
105 |
106 | """
107 | std_pos = [
108 | self._std_weight_position * mean[3],
109 | self._std_weight_position * mean[3],
110 | 1e-2,
111 | self._std_weight_position * mean[3]]
112 | std_vel = [
113 | self._std_weight_velocity * mean[3],
114 | self._std_weight_velocity * mean[3],
115 | 1e-5,
116 | self._std_weight_velocity * mean[3]]
117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 |
119 | #mean = np.dot(self._motion_mat, mean)
120 | mean = np.dot(mean, self._motion_mat.T)
121 | covariance = np.linalg.multi_dot((
122 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
123 |
124 | return mean, covariance
125 |
126 | def project(self, mean, covariance):
127 | """Project state distribution to measurement space.
128 |
129 | Parameters
130 | ----------
131 | mean : ndarray
132 | The state's mean vector (8 dimensional array).
133 | covariance : ndarray
134 | The state's covariance matrix (8x8 dimensional).
135 |
136 | Returns
137 | -------
138 | (ndarray, ndarray)
139 | Returns the projected mean and covariance matrix of the given state
140 | estimate.
141 |
142 | """
143 | std = [
144 | self._std_weight_position * mean[3],
145 | self._std_weight_position * mean[3],
146 | 1e-1,
147 | self._std_weight_position * mean[3]]
148 | innovation_cov = np.diag(np.square(std))
149 |
150 | mean = np.dot(self._update_mat, mean)
151 | covariance = np.linalg.multi_dot((
152 | self._update_mat, covariance, self._update_mat.T))
153 | return mean, covariance + innovation_cov
154 |
155 | def multi_predict(self, mean, covariance):
156 | """Run Kalman filter prediction step (Vectorized version).
157 | Parameters
158 | ----------
159 | mean : ndarray
160 | The Nx8 dimensional mean matrix of the object states at the previous
161 | time step.
162 | covariance : ndarray
163 | The Nx8x8 dimensional covariance matrics of the object states at the
164 | previous time step.
165 | Returns
166 | -------
167 | (ndarray, ndarray)
168 | Returns the mean vector and covariance matrix of the predicted
169 | state. Unobserved velocities are initialized to 0 mean.
170 | """
171 | std_pos = [
172 | self._std_weight_position * mean[:, 3],
173 | self._std_weight_position * mean[:, 3],
174 | 1e-2 * np.ones_like(mean[:, 3]),
175 | self._std_weight_position * mean[:, 3]]
176 | std_vel = [
177 | self._std_weight_velocity * mean[:, 3],
178 | self._std_weight_velocity * mean[:, 3],
179 | 1e-5 * np.ones_like(mean[:, 3]),
180 | self._std_weight_velocity * mean[:, 3]]
181 | sqr = np.square(np.r_[std_pos, std_vel]).T
182 |
183 | motion_cov = []
184 | for i in range(len(mean)):
185 | motion_cov.append(np.diag(sqr[i]))
186 | motion_cov = np.asarray(motion_cov)
187 |
188 | mean = np.dot(mean, self._motion_mat.T)
189 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
190 | covariance = np.dot(left, self._motion_mat.T) + motion_cov
191 |
192 | return mean, covariance
193 |
194 | def update(self, mean, covariance, measurement):
195 | """Run Kalman filter correction step.
196 |
197 | Parameters
198 | ----------
199 | mean : ndarray
200 | The predicted state's mean vector (8 dimensional).
201 | covariance : ndarray
202 | The state's covariance matrix (8x8 dimensional).
203 | measurement : ndarray
204 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
205 | is the center position, a the aspect ratio, and h the height of the
206 | bounding box.
207 |
208 | Returns
209 | -------
210 | (ndarray, ndarray)
211 | Returns the measurement-corrected state distribution.
212 |
213 | """
214 | projected_mean, projected_cov = self.project(mean, covariance)
215 |
216 | chol_factor, lower = scipy.linalg.cho_factor(
217 | projected_cov, lower=True, check_finite=False)
218 | kalman_gain = scipy.linalg.cho_solve(
219 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
220 | check_finite=False).T
221 | innovation = measurement - projected_mean
222 |
223 | new_mean = mean + np.dot(innovation, kalman_gain.T)
224 | new_covariance = covariance - np.linalg.multi_dot((
225 | kalman_gain, projected_cov, kalman_gain.T))
226 | return new_mean, new_covariance
227 |
228 | def gating_distance(self, mean, covariance, measurements,
229 | only_position=False, metric='maha'):
230 | """Compute gating distance between state distribution and measurements.
231 | A suitable distance threshold can be obtained from `chi2inv95`. If
232 | `only_position` is False, the chi-square distribution has 4 degrees of
233 | freedom, otherwise 2.
234 | Parameters
235 | ----------
236 | mean : ndarray
237 | Mean vector over the state distribution (8 dimensional).
238 | covariance : ndarray
239 | Covariance of the state distribution (8x8 dimensional).
240 | measurements : ndarray
241 | An Nx4 dimensional matrix of N measurements, each in
242 | format (x, y, a, h) where (x, y) is the bounding box center
243 | position, a the aspect ratio, and h the height.
244 | only_position : Optional[bool]
245 | If True, distance computation is done with respect to the bounding
246 | box center position only.
247 | Returns
248 | -------
249 | ndarray
250 | Returns an array of length N, where the i-th element contains the
251 | squared Mahalanobis distance between (mean, covariance) and
252 | `measurements[i]`.
253 | """
254 | mean, covariance = self.project(mean, covariance)
255 | if only_position:
256 | mean, covariance = mean[:2], covariance[:2, :2]
257 | measurements = measurements[:, :2]
258 |
259 | d = measurements - mean
260 | if metric == 'gaussian':
261 | return np.sum(d * d, axis=1)
262 | elif metric == 'maha':
263 | cholesky_factor = np.linalg.cholesky(covariance)
264 | z = scipy.linalg.solve_triangular(
265 | cholesky_factor, d.T, lower=True, check_finite=False,
266 | overwrite_b=True)
267 | squared_maha = np.sum(z * z, axis=0)
268 | return squared_maha
269 | else:
270 | raise ValueError('invalid distance metric')
--------------------------------------------------------------------------------
/srcs/bytetrack/matching.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import scipy
4 | import lap
5 | from scipy.spatial.distance import cdist
6 |
7 | from cython_bbox import bbox_overlaps as bbox_ious
8 | from .kalman_filter import chi2inv95
9 | import time
10 |
11 | def merge_matches(m1, m2, shape):
12 | O,P,Q = shape
13 | m1 = np.asarray(m1)
14 | m2 = np.asarray(m2)
15 |
16 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
17 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
18 |
19 | mask = M1*M2
20 | match = mask.nonzero()
21 | match = list(zip(match[0], match[1]))
22 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
23 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
24 |
25 | return match, unmatched_O, unmatched_Q
26 |
27 |
28 | def _indices_to_matches(cost_matrix, indices, thresh):
29 | matched_cost = cost_matrix[tuple(zip(*indices))]
30 | matched_mask = (matched_cost <= thresh)
31 |
32 | matches = indices[matched_mask]
33 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
34 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
35 |
36 | return matches, unmatched_a, unmatched_b
37 |
38 |
39 | def linear_assignment(cost_matrix, thresh):
40 | if cost_matrix.size == 0:
41 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
42 | matches, unmatched_a, unmatched_b = [], [], []
43 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
44 | for ix, mx in enumerate(x):
45 | if mx >= 0:
46 | matches.append([ix, mx])
47 | unmatched_a = np.where(x < 0)[0]
48 | unmatched_b = np.where(y < 0)[0]
49 | matches = np.asarray(matches)
50 | return matches, unmatched_a, unmatched_b
51 |
52 |
53 | def ious(atlbrs, btlbrs):
54 | """
55 | Compute cost based on IoU
56 | :type atlbrs: list[tlbr] | np.ndarray
57 | :type atlbrs: list[tlbr] | np.ndarray
58 |
59 | :rtype ious np.ndarray
60 | """
61 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
62 | if ious.size == 0:
63 | return ious
64 |
65 | ious = bbox_ious(
66 | np.ascontiguousarray(atlbrs, dtype=np.float),
67 | np.ascontiguousarray(btlbrs, dtype=np.float)
68 | )
69 |
70 | return ious
71 |
72 |
73 | def iou_distance(atracks, btracks):
74 | """
75 | Compute cost based on IoU
76 | :type atracks: list[STrack]
77 | :type btracks: list[STrack]
78 |
79 | :rtype cost_matrix np.ndarray
80 | """
81 |
82 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
83 | atlbrs = atracks
84 | btlbrs = btracks
85 | else:
86 | atlbrs = [track.tlbr for track in atracks]
87 | btlbrs = [track.tlbr for track in btracks]
88 | _ious = ious(atlbrs, btlbrs)
89 | cost_matrix = 1 - _ious
90 |
91 | return cost_matrix
92 |
93 | def v_iou_distance(atracks, btracks):
94 | """
95 | Compute cost based on IoU
96 | :type atracks: list[STrack]
97 | :type btracks: list[STrack]
98 |
99 | :rtype cost_matrix np.ndarray
100 | """
101 |
102 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
103 | atlbrs = atracks
104 | btlbrs = btracks
105 | else:
106 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
107 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
108 | _ious = ious(atlbrs, btlbrs)
109 | cost_matrix = 1 - _ious
110 |
111 | return cost_matrix
112 |
113 | def embedding_distance(tracks, detections, metric='cosine'):
114 | """
115 | :param tracks: list[STrack]
116 | :param detections: list[BaseTrack]
117 | :param metric:
118 | :return: cost_matrix np.ndarray
119 | """
120 |
121 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
122 | if cost_matrix.size == 0:
123 | return cost_matrix
124 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
125 | #for i, track in enumerate(tracks):
126 | #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
127 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
128 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
129 | return cost_matrix
130 |
131 |
132 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
133 | if cost_matrix.size == 0:
134 | return cost_matrix
135 | gating_dim = 2 if only_position else 4
136 | gating_threshold = chi2inv95[gating_dim]
137 | measurements = np.asarray([det.to_xyah() for det in detections])
138 | for row, track in enumerate(tracks):
139 | gating_distance = kf.gating_distance(
140 | track.mean, track.covariance, measurements, only_position)
141 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
142 | return cost_matrix
143 |
144 |
145 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
146 | if cost_matrix.size == 0:
147 | return cost_matrix
148 | gating_dim = 2 if only_position else 4
149 | gating_threshold = chi2inv95[gating_dim]
150 | measurements = np.asarray([det.to_xyah() for det in detections])
151 | for row, track in enumerate(tracks):
152 | gating_distance = kf.gating_distance(
153 | track.mean, track.covariance, measurements, only_position, metric='maha')
154 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
155 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
156 | return cost_matrix
157 |
158 |
159 | def fuse_iou(cost_matrix, tracks, detections):
160 | if cost_matrix.size == 0:
161 | return cost_matrix
162 | reid_sim = 1 - cost_matrix
163 | iou_dist = iou_distance(tracks, detections)
164 | iou_sim = 1 - iou_dist
165 | fuse_sim = reid_sim * (1 + iou_sim) / 2
166 | det_scores = np.array([det.score for det in detections])
167 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
168 | #fuse_sim = fuse_sim * (1 + det_scores) / 2
169 | fuse_cost = 1 - fuse_sim
170 | return fuse_cost
171 |
172 |
173 | def fuse_score(cost_matrix, detections):
174 | if cost_matrix.size == 0:
175 | return cost_matrix
176 | iou_sim = 1 - cost_matrix
177 | det_scores = np.array([det.score for det in detections])
178 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
179 | fuse_sim = iou_sim * det_scores
180 | fuse_cost = 1 - fuse_sim
181 | return fuse_cost
--------------------------------------------------------------------------------
/srcs/config.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 |
5 | random.seed(0)
6 |
7 | # detection model classes
8 | CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
9 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
10 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
11 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
12 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
14 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
15 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
16 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
17 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
18 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
19 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
20 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
21 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
22 |
23 | # colors for per classes
24 | COLORS = {
25 | cls: [random.randint(0, 255) for _ in range(3)]
26 | for i, cls in enumerate(CLASSES)
27 | }
28 |
29 | # colors for segment masks
30 | MASK_COLORS = np.array([(255, 56, 56), (255, 157, 151), (255, 112, 31),
31 | (255, 178, 29), (207, 210, 49), (72, 249, 10),
32 | (146, 204, 23), (61, 219, 134), (26, 147, 52),
33 | (0, 212, 187), (44, 153, 168), (0, 194, 255),
34 | (52, 69, 147), (100, 115, 255), (0, 24, 236),
35 | (132, 56, 255), (82, 0, 133), (203, 56, 255),
36 | (255, 149, 200), (255, 55, 199)],
37 | dtype=np.float32) / 255.
38 |
39 | KPS_COLORS = [[0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
40 | [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
41 | [255, 128, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255],
42 | [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255]]
43 |
44 | SKELETON = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13],
45 | [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3],
46 | [2, 4], [3, 5], [4, 6], [5, 7]]
47 |
48 | LIMB_COLORS = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255],
49 | [255, 51, 255], [255, 51, 255], [255, 51, 255], [255, 128, 0],
50 | [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
51 | [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
52 | [0, 255, 0], [0, 255, 0]]
53 |
54 | # alpha for segment masks
55 | ALPHA = 0.5
56 |
--------------------------------------------------------------------------------
/srcs/deep_sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/__init__.py
--------------------------------------------------------------------------------
/srcs/deep_sort/configs/deep_sort.yaml:
--------------------------------------------------------------------------------
1 | DEEPSORT:
2 | REID_CKPT: "/home/sai/yolov8_deepsort_jetson/model/deepsort.engine"
3 | MAX_DIST: 0.3
4 | MIN_CONFIDENCE: 0.3
5 | NMS_MAX_OVERLAP: 0.5
6 | MAX_IOU_DISTANCE: 0.7
7 | MAX_AGE: 70
8 | N_INIT: 3
9 | NN_BUDGET: 100
10 |
11 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/__init__.py:
--------------------------------------------------------------------------------
1 | from .deep_sort_trt import DeepSort
2 |
3 | __all__ = ['DeepSort', 'build_tracker']
4 |
5 |
6 | def build_tracker(cfg, use_cuda):
7 | return DeepSort(cfg.DEEPSORT.REID_CKPT,
8 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
9 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
10 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/deep/__init__.py
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/deep/checkpoint/.gitkeep
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep/feature_extractor_trt.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #!/usr/bin/python3
3 | """
4 | Created on 2021/5/24 14:05
5 | @Author: Wang Cong
6 | @Email : iwangcong@outlook.com
7 | @Version : 0.1
8 | @File : feature_extractor_trt.py
9 | """
10 | import cv2
11 | import numpy as np
12 | import tensorrt as trt
13 | import pycuda.driver as cuda
14 | import pycuda.autoinit
15 | INPUT_W = 128
16 | INPUT_H = 64
17 |
18 |
19 | class TrackerExtractor:
20 |
21 | def __init__(self, engine_file_path):
22 | self.img_size = 640
23 | self.threshold = 0.3
24 | self.stride = 1
25 | self.size = (64, 128)
26 |
27 | # Create a Context on this device,
28 | self.cfx = cuda.Device(0).make_context()
29 | stream = cuda.Stream()
30 | TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
31 | runtime = trt.Runtime(TRT_LOGGER)
32 |
33 | # Deserialize the engine from file
34 | print("Engine file path: ", engine_file_path)
35 | with open(engine_file_path, "rb") as f:
36 | engine = runtime.deserialize_cuda_engine(f.read())
37 | context = engine.create_execution_context()
38 |
39 | host_inputs = []
40 | cuda_inputs = []
41 | host_outputs = []
42 | cuda_outputs = []
43 | bindings = []
44 |
45 | for binding in engine:
46 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
47 | dtype = trt.nptype(engine.get_binding_dtype(binding))
48 | dims = engine.get_binding_shape(binding)
49 | if dims[0] < 0:
50 | size *= -1
51 | # Allocate host and device buffers
52 | host_mem = cuda.pagelocked_empty(size, dtype)
53 | cuda_mem = cuda.mem_alloc(host_mem.nbytes)
54 | # Append the device buffer to device bindings.
55 | bindings.append(int(cuda_mem))
56 | # Append to the appropriate list.
57 | if engine.binding_is_input(binding):
58 | host_inputs.append(host_mem)
59 | cuda_inputs.append(cuda_mem)
60 | else:
61 | host_outputs.append(host_mem)
62 | cuda_outputs.append(cuda_mem)
63 |
64 | # Store
65 | self.stream = stream
66 | self.context = context
67 | self.engine = engine
68 | self.host_inputs = host_inputs
69 | self.cuda_inputs = cuda_inputs
70 | self.host_outputs = host_outputs
71 | self.cuda_outputs = cuda_outputs
72 | self.bindings = bindings
73 |
74 | def _preprocess(self, im_crops):
75 | """
76 | 1. to float with scale from 0 to 1
77 | 2. resize to (64, 128) as Market1501 dataset did
78 | 3. concatenate to a numpy array
79 | 3. to torch Tensor
80 | 4. normalize
81 | """
82 | def _resize(im, size):
83 | return cv2.resize(im.astype(np.float32)/255., size)
84 | def _normalize(im):
85 | mean = [0.485, 0.456, 0.406]
86 | std = [0.229, 0.224, 0.225]
87 | return (im.astype(np.float32) - np.array(mean)) / np.array(std)
88 | imgs = []
89 | for im in im_crops:
90 | img = _normalize(_resize(im, self.size))
91 | # img = img.cpu().numpy()
92 | imgs.append(img)
93 | return imgs
94 |
95 | def track_extractor(self, im_crops):
96 | # threading.Thread.__init__(self)
97 | # Make self the active context, pushing it on top of the context stack.
98 | self.cfx.push()
99 | # Restore
100 | stream = self.stream
101 | context = self.context
102 | engine = self.engine
103 | host_inputs = self.host_inputs
104 | cuda_inputs = self.cuda_inputs
105 | host_outputs = self.host_outputs
106 | cuda_outputs = self.cuda_outputs
107 | bindings = self.bindings
108 | # Do image preprocess
109 | im_batchs = self._preprocess(im_crops)
110 | features_trt = []
111 | for im_batch in im_batchs:
112 | # Copy input image to host buffer
113 | np.copyto(host_inputs[0], im_batch.ravel())
114 | # Transfer input data to the GPU.
115 | cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
116 | # Run inference.
117 | context.set_binding_shape(0, (1, 3, 128, 64))
118 | context.execute_async(bindings=bindings, stream_handle=stream.handle)
119 | # Transfer predictions back from the GPU.
120 | cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
121 | # Synchronize the stream
122 | stream.synchronize()
123 | # Remove any context from the top of the context stack, deactivating it.
124 | # self.cfx.pop()
125 | # Here we use the first row of output in that batch_size = 1
126 | trt_outputs = host_outputs[0]
127 | # Do postprocess
128 | feature_trt = trt_outputs
129 | features_trt.append(feature_trt)
130 | return np.array(features_trt)
131 |
132 | def destroy(self):
133 | self.cfx.pop()
134 |
135 |
136 | if __name__ == '__main__':
137 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
138 | extr = TrackerExtractor("checkpoint/deepsort.engine")
139 | feature = extr.track_extractor(img)
140 | print(feature.shape)
141 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/deep_sort_trt.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #!/usr/bin/python3
3 | """
4 | Created on 2021/5/24 13:46
5 | @Author: Wang Cong
6 | @Email : iwangcong@outlook.com
7 | @Version : 0.1
8 | @File : deep_sort_trt.py
9 | """
10 | import numpy as np
11 | import time
12 | from .deep.feature_extractor_trt import TrackerExtractor
13 | from .sort.nn_matching import NearestNeighborDistanceMetric
14 | from .sort.preprocessing import non_max_suppression
15 | from .sort.detection import Detection
16 | from .sort.tracker import Tracker
17 |
18 | __all__ = ['DeepSort']
19 |
20 |
21 | class DeepSort(object):
22 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7,
23 | max_age=70, n_init=3, nn_budget=100, use_cuda=True):
24 | self.min_confidence = min_confidence
25 | self.nms_max_overlap = nms_max_overlap
26 | model_path = "../models/engine/deepsort.engine"
27 |
28 | self.extractor = TrackerExtractor(model_path)
29 |
30 | max_cosine_distance = max_dist
31 | nn_budget = 100
32 | metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
33 | self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
34 |
35 | def clear(self):
36 | self.extractor.context.pop()
37 |
38 | def update(self, bbox_xywh, lbls, confidences, ori_img):
39 | self.height, self.width = ori_img.shape[:2]
40 | # generate detections
41 | features = self._get_features(bbox_xywh, ori_img)
42 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
43 | detections = [Detection(bbox_tlwh[i], lbls[i], conf, features[i]) for i, conf in enumerate(confidences) if
44 | conf > self.min_confidence]
45 |
46 | # run on non-maximum supression
47 | boxes = np.array([d.tlwh for d in detections])
48 | labels = np.array([d.label for d in detections])
49 | scores = np.array([d.confidence for d in detections])
50 | indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
51 | detections = [detections[i] for i in indices]
52 |
53 | # update tracker
54 | self.tracker.predict()
55 | self.tracker.update(detections)
56 |
57 | # output bbox identities
58 | outputs = []
59 | for track in self.tracker.tracks:
60 | if not track.is_confirmed() or track.time_since_update > 1:
61 | continue
62 | box = track.to_tlwh()
63 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
64 | track_label = track.track_label
65 | track_id = track.track_id
66 | outputs.append(np.array([x1, y1, x2, y2, track_label, track_id]))
67 | if len(outputs) > 0:
68 | outputs = np.stack(outputs, axis=0)
69 | return outputs
70 |
71 | """
72 | TODO:
73 | Convert bbox from xc_yc_w_h to xtl_ytl_w_h
74 | Thanks JieChen91@github.com for reporting this bug!
75 | """
76 |
77 | @staticmethod
78 | def _xywh_to_tlwh(bbox_xywh):
79 | # if isinstance(bbox_xywh, np.ndarray):
80 | # bbox_tlwh = bbox_xywh.copy()
81 | bbox_tlwh = bbox_xywh.copy()
82 | bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
83 | bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
84 | return bbox_tlwh
85 |
86 | def _xywh_to_xyxy(self, bbox_xywh):
87 | x, y, w, h = bbox_xywh
88 | x1 = max(int(x - w / 2), 0)
89 | x2 = min(int(x + w / 2), self.width - 1)
90 | y1 = max(int(y - h / 2), 0)
91 | y2 = min(int(y + h / 2), self.height - 1)
92 | return x1, y1, x2, y2
93 |
94 | def _tlwh_to_xyxy(self, bbox_tlwh):
95 | """
96 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h
97 | Thanks JieChen91@github.com for reporting this bug!
98 | """
99 | x, y, w, h = bbox_tlwh
100 | x1 = max(int(x), 0)
101 | x2 = min(int(x + w), self.width - 1)
102 | y1 = max(int(y), 0)
103 | y2 = min(int(y + h), self.height - 1)
104 | return x1, y1, x2, y2
105 |
106 | def _xyxy_to_tlwh(self, bbox_xyxy):
107 | x1, y1, x2, y2 = bbox_xyxy
108 |
109 | t = x1
110 | l = y1
111 | w = int(x2 - x1)
112 | h = int(y2 - y1)
113 | return t, l, w, h
114 |
115 | def _get_features(self, bbox_xywh, ori_img):
116 | im_crops = []
117 | for box in bbox_xywh:
118 | x1, y1, x2, y2 = self._xywh_to_xyxy(box)
119 | im = ori_img[y1:y2, x1:x2]
120 | im_crops.append(im)
121 | if im_crops:
122 | features = self.extractor.track_extractor(im_crops)
123 | else:
124 | features = np.array([])
125 | return features
126 |
127 |
128 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/deep_sort/sort/__init__.py
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/detection.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | class Detection(object):
6 | """
7 | This class represents a bounding box detection in a single image.
8 |
9 | Parameters
10 | ----------
11 | tlwh : array_like
12 | Bounding box in format `(x, y, w, h)`.
13 | confidence : float
14 | Detector confidence score.
15 | feature : array_like
16 | A feature vector that describes the object contained in this image.
17 |
18 | Attributes
19 | ----------
20 | tlwh : ndarray
21 | Bounding box in format `(top left x, top left y, width, height)`.
22 | confidence : ndarray
23 | Detector confidence score.
24 | feature : ndarray | NoneType
25 | A feature vector that describes the object contained in this image.
26 |
27 | """
28 |
29 | def __init__(self, tlwh, label, confidence, feature):
30 | self.tlwh = np.asarray(tlwh, dtype=np.float)
31 | self.label = label
32 | self.confidence = float(confidence)
33 | self.feature = np.asarray(feature, dtype=np.float32)
34 |
35 | def to_tlbr(self):
36 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
37 | `(top left, bottom right)`.
38 | """
39 | ret = self.tlwh.copy()
40 | ret[2:] += ret[:2]
41 | return ret
42 |
43 | def to_xyah(self):
44 | """Convert bounding box to format `(center x, center y, aspect ratio,
45 | height)`, where the aspect ratio is `width / height`.
46 | """
47 | ret = self.tlwh.copy()
48 | ret[:2] += ret[2:] / 2
49 | ret[2] /= ret[3]
50 | return ret
51 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/iou_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import linear_assignment
5 |
6 |
7 | def iou(bbox, candidates):
8 | """Computer intersection over union.
9 |
10 | Parameters
11 | ----------
12 | bbox : ndarray
13 | A bounding box in format `(top left x, top left y, width, height)`.
14 | candidates : ndarray
15 | A matrix of candidate bounding boxes (one per row) in the same format
16 | as `bbox`.
17 |
18 | Returns
19 | -------
20 | ndarray
21 | The intersection over union in [0, 1] between the `bbox` and each
22 | candidate. A higher score means a larger fraction of the `bbox` is
23 | occluded by the candidate.
24 |
25 | """
26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 | candidates_tl = candidates[:, :2]
28 | candidates_br = candidates[:, :2] + candidates[:, 2:]
29 |
30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 | wh = np.maximum(0., br - tl)
35 |
36 | area_intersection = wh.prod(axis=1)
37 | area_bbox = bbox[2:].prod()
38 | area_candidates = candidates[:, 2:].prod(axis=1)
39 | return area_intersection / (area_bbox + area_candidates - area_intersection)
40 |
41 |
42 | def iou_cost(tracks, detections, track_indices=None,
43 | detection_indices=None):
44 | """An intersection over union distance metric.
45 |
46 | Parameters
47 | ----------
48 | tracks : List[deep_sort.track.Track]
49 | A list of tracks.
50 | detections : List[deep_sort.detection.Detection]
51 | A list of detections.
52 | track_indices : Optional[List[int]]
53 | A list of indices to tracks that should be matched. Defaults to
54 | all `tracks`.
55 | detection_indices : Optional[List[int]]
56 | A list of indices to detections that should be matched. Defaults
57 | to all `detections`.
58 |
59 | Returns
60 | -------
61 | ndarray
62 | Returns a cost matrix of shape
63 | len(track_indices), len(detection_indices) where entry (i, j) is
64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 |
66 | """
67 | if track_indices is None:
68 | track_indices = np.arange(len(tracks))
69 | if detection_indices is None:
70 | detection_indices = np.arange(len(detections))
71 |
72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 | for row, track_idx in enumerate(track_indices):
74 | if tracks[track_idx].time_since_update > 1:
75 | cost_matrix[row, :] = linear_assignment.INFTY_COST
76 | continue
77 |
78 | bbox = tracks[track_idx].to_tlwh()
79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 | return cost_matrix
82 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 |
6 | """
7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9 | function and used as Mahalanobis gating threshold.
10 | """
11 | chi2inv95 = {
12 | 1: 3.8415,
13 | 2: 5.9915,
14 | 3: 7.8147,
15 | 4: 9.4877,
16 | 5: 11.070,
17 | 6: 12.592,
18 | 7: 14.067,
19 | 8: 15.507,
20 | 9: 16.919}
21 |
22 |
23 | class KalmanFilter(object):
24 | """
25 | A simple Kalman filter for tracking bounding boxes in image space.
26 |
27 | The 8-dimensional state space
28 |
29 | x, y, a, h, vx, vy, va, vh
30 |
31 | contains the bounding box center position (x, y), aspect ratio a, height h,
32 | and their respective velocities.
33 |
34 | Object motion follows a constant velocity model. The bounding box location
35 | (x, y, a, h) is taken as direct observation of the state space (linear
36 | observation model).
37 |
38 | """
39 |
40 | def __init__(self):
41 | ndim, dt = 4, 1.
42 |
43 | # Create Kalman filter model matrices.
44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45 | for i in range(ndim):
46 | self._motion_mat[i, ndim + i] = dt
47 | self._update_mat = np.eye(ndim, 2 * ndim)
48 |
49 | # Motion and observation uncertainty are chosen relative to the current
50 | # state estimate. These weights control the amount of uncertainty in
51 | # the model. This is a bit hacky.
52 | self._std_weight_position = 1. / 20
53 | self._std_weight_velocity = 1. / 160
54 |
55 | def initiate(self, measurement):
56 | """Create track from unassociated measurement.
57 |
58 | Parameters
59 | ----------
60 | measurement : ndarray
61 | Bounding box coordinates (x, y, a, h) with center position (x, y),
62 | aspect ratio a, and height h.
63 |
64 | Returns
65 | -------
66 | (ndarray, ndarray)
67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
68 | dimensional) of the new track. Unobserved velocities are initialized
69 | to 0 mean.
70 |
71 | """
72 | mean_pos = measurement
73 | mean_vel = np.zeros_like(mean_pos)
74 | mean = np.r_[mean_pos, mean_vel]
75 |
76 | std = [
77 | 2 * self._std_weight_position * measurement[3],
78 | 2 * self._std_weight_position * measurement[3],
79 | 1e-2,
80 | 2 * self._std_weight_position * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 10 * self._std_weight_velocity * measurement[3],
83 | 1e-5,
84 | 10 * self._std_weight_velocity * measurement[3]]
85 | covariance = np.diag(np.square(std))
86 | return mean, covariance
87 |
88 | def predict(self, mean, covariance):
89 | """Run Kalman filter prediction step.
90 |
91 | Parameters
92 | ----------
93 | mean : ndarray
94 | The 8 dimensional mean vector of the object state at the previous
95 | time step.
96 | covariance : ndarray
97 | The 8x8 dimensional covariance matrix of the object state at the
98 | previous time step.
99 |
100 | Returns
101 | -------
102 | (ndarray, ndarray)
103 | Returns the mean vector and covariance matrix of the predicted
104 | state. Unobserved velocities are initialized to 0 mean.
105 |
106 | """
107 | std_pos = [
108 | self._std_weight_position * mean[3],
109 | self._std_weight_position * mean[3],
110 | 1e-2,
111 | self._std_weight_position * mean[3]]
112 | std_vel = [
113 | self._std_weight_velocity * mean[3],
114 | self._std_weight_velocity * mean[3],
115 | 1e-5,
116 | self._std_weight_velocity * mean[3]]
117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 |
119 | mean = np.dot(self._motion_mat, mean)
120 | covariance = np.linalg.multi_dot((
121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 |
123 | return mean, covariance
124 |
125 | def project(self, mean, covariance):
126 | """Project state distribution to measurement space.
127 |
128 | Parameters
129 | ----------
130 | mean : ndarray
131 | The state's mean vector (8 dimensional array).
132 | covariance : ndarray
133 | The state's covariance matrix (8x8 dimensional).
134 |
135 | Returns
136 | -------
137 | (ndarray, ndarray)
138 | Returns the projected mean and covariance matrix of the given state
139 | estimate.
140 |
141 | """
142 | std = [
143 | self._std_weight_position * mean[3],
144 | self._std_weight_position * mean[3],
145 | 1e-1,
146 | self._std_weight_position * mean[3]]
147 | innovation_cov = np.diag(np.square(std))
148 |
149 | mean = np.dot(self._update_mat, mean)
150 | covariance = np.linalg.multi_dot((
151 | self._update_mat, covariance, self._update_mat.T))
152 | return mean, covariance + innovation_cov
153 |
154 | def update(self, mean, covariance, measurement):
155 | """Run Kalman filter correction step.
156 |
157 | Parameters
158 | ----------
159 | mean : ndarray
160 | The predicted state's mean vector (8 dimensional).
161 | covariance : ndarray
162 | The state's covariance matrix (8x8 dimensional).
163 | measurement : ndarray
164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 | is the center position, a the aspect ratio, and h the height of the
166 | bounding box.
167 |
168 | Returns
169 | -------
170 | (ndarray, ndarray)
171 | Returns the measurement-corrected state distribution.
172 |
173 | """
174 | projected_mean, projected_cov = self.project(mean, covariance)
175 |
176 | chol_factor, lower = scipy.linalg.cho_factor(
177 | projected_cov, lower=True, check_finite=False)
178 | kalman_gain = scipy.linalg.cho_solve(
179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 | check_finite=False).T
181 | innovation = measurement - projected_mean
182 |
183 | new_mean = mean + np.dot(innovation, kalman_gain.T)
184 | new_covariance = covariance - np.linalg.multi_dot((
185 | kalman_gain, projected_cov, kalman_gain.T))
186 | return new_mean, new_covariance
187 |
188 | def gating_distance(self, mean, covariance, measurements,
189 | only_position=False):
190 | """Compute gating distance between state distribution and measurements.
191 |
192 | A suitable distance threshold can be obtained from `chi2inv95`. If
193 | `only_position` is False, the chi-square distribution has 4 degrees of
194 | freedom, otherwise 2.
195 |
196 | Parameters
197 | ----------
198 | mean : ndarray
199 | Mean vector over the state distribution (8 dimensional).
200 | covariance : ndarray
201 | Covariance of the state distribution (8x8 dimensional).
202 | measurements : ndarray
203 | An Nx4 dimensional matrix of N measurements, each in
204 | format (x, y, a, h) where (x, y) is the bounding box center
205 | position, a the aspect ratio, and h the height.
206 | only_position : Optional[bool]
207 | If True, distance computation is done with respect to the bounding
208 | box center position only.
209 |
210 | Returns
211 | -------
212 | ndarray
213 | Returns an array of length N, where the i-th element contains the
214 | squared Mahalanobis distance between (mean, covariance) and
215 | `measurements[i]`.
216 |
217 | """
218 | mean, covariance = self.project(mean, covariance)
219 | if only_position:
220 | mean, covariance = mean[:2], covariance[:2, :2]
221 | measurements = measurements[:, :2]
222 |
223 | cholesky_factor = np.linalg.cholesky(covariance)
224 | d = measurements - mean
225 | z = scipy.linalg.solve_triangular(
226 | cholesky_factor, d.T, lower=True, check_finite=False,
227 | overwrite_b=True)
228 | squared_maha = np.sum(z * z, axis=0)
229 | return squared_maha
230 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/linear_assignment.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | # from sklearn.utils.linear_assignment_ import linear_assignment
5 | from scipy.optimize import linear_sum_assignment as linear_assignment
6 | from . import kalman_filter
7 |
8 |
9 | INFTY_COST = 1e+5
10 |
11 |
12 | def min_cost_matching(
13 | distance_metric, max_distance, tracks, detections, track_indices=None,
14 | detection_indices=None):
15 | """Solve linear assignment problem.
16 |
17 | Parameters
18 | ----------
19 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
20 | The distance metric is given a list of tracks and detections as well as
21 | a list of N track indices and M detection indices. The metric should
22 | return the NxM dimensional cost matrix, where element (i, j) is the
23 | association cost between the i-th track in the given track indices and
24 | the j-th detection in the given detection_indices.
25 | max_distance : float
26 | Gating threshold. Associations with cost larger than this value are
27 | disregarded.
28 | tracks : List[track.Track]
29 | A list of predicted tracks at the current time step.
30 | detections : List[detection.Detection]
31 | A list of detections at the current time step.
32 | track_indices : List[int]
33 | List of track indices that maps rows in `cost_matrix` to tracks in
34 | `tracks` (see description above).
35 | detection_indices : List[int]
36 | List of detection indices that maps columns in `cost_matrix` to
37 | detections in `detections` (see description above).
38 |
39 | Returns
40 | -------
41 | (List[(int, int)], List[int], List[int])
42 | Returns a tuple with the following three entries:
43 | * A list of matched track and detection indices.
44 | * A list of unmatched track indices.
45 | * A list of unmatched detection indices.
46 |
47 | """
48 | if track_indices is None:
49 | track_indices = np.arange(len(tracks))
50 | if detection_indices is None:
51 | detection_indices = np.arange(len(detections))
52 |
53 | if len(detection_indices) == 0 or len(track_indices) == 0:
54 | return [], track_indices, detection_indices # Nothing to match.
55 |
56 | cost_matrix = distance_metric(
57 | tracks, detections, track_indices, detection_indices)
58 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
59 |
60 | row_indices, col_indices = linear_assignment(cost_matrix)
61 |
62 | matches, unmatched_tracks, unmatched_detections = [], [], []
63 | for col, detection_idx in enumerate(detection_indices):
64 | if col not in col_indices:
65 | unmatched_detections.append(detection_idx)
66 | for row, track_idx in enumerate(track_indices):
67 | if row not in row_indices:
68 | unmatched_tracks.append(track_idx)
69 | for row, col in zip(row_indices, col_indices):
70 | track_idx = track_indices[row]
71 | detection_idx = detection_indices[col]
72 | if cost_matrix[row, col] > max_distance:
73 | unmatched_tracks.append(track_idx)
74 | unmatched_detections.append(detection_idx)
75 | else:
76 | matches.append((track_idx, detection_idx))
77 | return matches, unmatched_tracks, unmatched_detections
78 |
79 |
80 | def matching_cascade(
81 | distance_metric, max_distance, cascade_depth, tracks, detections,
82 | track_indices=None, detection_indices=None):
83 | """Run matching cascade.
84 |
85 | Parameters
86 | ----------
87 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
88 | The distance metric is given a list of tracks and detections as well as
89 | a list of N track indices and M detection indices. The metric should
90 | return the NxM dimensional cost matrix, where element (i, j) is the
91 | association cost between the i-th track in the given track indices and
92 | the j-th detection in the given detection indices.
93 | max_distance : float
94 | Gating threshold. Associations with cost larger than this value are
95 | disregarded.
96 | cascade_depth: int
97 | The cascade depth, should be se to the maximum track age.
98 | tracks : List[track.Track]
99 | A list of predicted tracks at the current time step.
100 | detections : List[detection.Detection]
101 | A list of detections at the current time step.
102 | track_indices : Optional[List[int]]
103 | List of track indices that maps rows in `cost_matrix` to tracks in
104 | `tracks` (see description above). Defaults to all tracks.
105 | detection_indices : Optional[List[int]]
106 | List of detection indices that maps columns in `cost_matrix` to
107 | detections in `detections` (see description above). Defaults to all
108 | detections.
109 |
110 | Returns
111 | -------
112 | (List[(int, int)], List[int], List[int])
113 | Returns a tuple with the following three entries:
114 | * A list of matched track and detection indices.
115 | * A list of unmatched track indices.
116 | * A list of unmatched detection indices.
117 |
118 | """
119 | if track_indices is None:
120 | track_indices = list(range(len(tracks)))
121 | if detection_indices is None:
122 | detection_indices = list(range(len(detections)))
123 |
124 | unmatched_detections = detection_indices
125 | matches = []
126 | for level in range(cascade_depth):
127 | if len(unmatched_detections) == 0: # No detections left
128 | break
129 |
130 | track_indices_l = [
131 | k for k in track_indices
132 | if tracks[k].time_since_update == 1 + level
133 | ]
134 | if len(track_indices_l) == 0: # Nothing to match at this level
135 | continue
136 |
137 | matches_l, _, unmatched_detections = \
138 | min_cost_matching(
139 | distance_metric, max_distance, tracks, detections,
140 | track_indices_l, unmatched_detections)
141 | matches += matches_l
142 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143 | return matches, unmatched_tracks, unmatched_detections
144 |
145 |
146 | def gate_cost_matrix(
147 | kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148 | gated_cost=INFTY_COST, only_position=False):
149 | """Invalidate infeasible entries in cost matrix based on the state
150 | distributions obtained by Kalman filtering.
151 |
152 | Parameters
153 | ----------
154 | kf : The Kalman filter.
155 | cost_matrix : ndarray
156 | The NxM dimensional cost matrix, where N is the number of track indices
157 | and M is the number of detection indices, such that entry (i, j) is the
158 | association cost between `tracks[track_indices[i]]` and
159 | `detections[detection_indices[j]]`.
160 | tracks : List[track.Track]
161 | A list of predicted tracks at the current time step.
162 | detections : List[detection.Detection]
163 | A list of detections at the current time step.
164 | track_indices : List[int]
165 | List of track indices that maps rows in `cost_matrix` to tracks in
166 | `tracks` (see description above).
167 | detection_indices : List[int]
168 | List of detection indices that maps columns in `cost_matrix` to
169 | detections in `detections` (see description above).
170 | gated_cost : Optional[float]
171 | Entries in the cost matrix corresponding to infeasible associations are
172 | set this value. Defaults to a very large value.
173 | only_position : Optional[bool]
174 | If True, only the x, y position of the state distribution is considered
175 | during gating. Defaults to False.
176 |
177 | Returns
178 | -------
179 | ndarray
180 | Returns the modified cost matrix.
181 |
182 | """
183 | gating_dim = 2 if only_position else 4
184 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
185 | measurements = np.asarray(
186 | [detections[i].to_xyah() for i in detection_indices])
187 | for row, track_idx in enumerate(track_indices):
188 | track = tracks[track_idx]
189 | gating_distance = kf.gating_distance(
190 | track.mean, track.covariance, measurements, only_position)
191 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192 | return cost_matrix
193 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/nn_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | def _pdist(a, b):
6 | """Compute pair-wise squared distance between points in `a` and `b`.
7 |
8 | Parameters
9 | ----------
10 | a : array_like
11 | An NxM matrix of N samples of dimensionality M.
12 | b : array_like
13 | An LxM matrix of L samples of dimensionality M.
14 |
15 | Returns
16 | -------
17 | ndarray
18 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
19 | contains the squared distance between `a[i]` and `b[j]`.
20 |
21 | """
22 | a, b = np.asarray(a), np.asarray(b)
23 | if len(a) == 0 or len(b) == 0:
24 | return np.zeros((len(a), len(b)))
25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27 | r2 = np.clip(r2, 0., float(np.inf))
28 | return r2
29 |
30 |
31 | def _cosine_distance(a, b, data_is_normalized=False):
32 | """Compute pair-wise cosine distance between points in `a` and `b`.
33 |
34 | Parameters
35 | ----------
36 | a : array_like
37 | An NxM matrix of N samples of dimensionality M.
38 | b : array_like
39 | An LxM matrix of L samples of dimensionality M.
40 | data_is_normalized : Optional[bool]
41 | If True, assumes rows in a and b are unit length vectors.
42 | Otherwise, a and b are explicitly normalized to lenght 1.
43 |
44 | Returns
45 | -------
46 | ndarray
47 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
48 | contains the squared distance between `a[i]` and `b[j]`.
49 |
50 | """
51 | if not data_is_normalized:
52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54 | return 1. - np.dot(a, b.T)
55 |
56 |
57 | def _nn_euclidean_distance(x, y):
58 | """ Helper function for nearest neighbor distance metric (Euclidean).
59 |
60 | Parameters
61 | ----------
62 | x : ndarray
63 | A matrix of N row-vectors (sample points).
64 | y : ndarray
65 | A matrix of M row-vectors (query points).
66 |
67 | Returns
68 | -------
69 | ndarray
70 | A vector of length M that contains for each entry in `y` the
71 | smallest Euclidean distance to a sample in `x`.
72 |
73 | """
74 | distances = _pdist(x, y)
75 | return np.maximum(0.0, distances.min(axis=0))
76 |
77 |
78 | def _nn_cosine_distance(x, y):
79 | """ Helper function for nearest neighbor distance metric (cosine).
80 |
81 | Parameters
82 | ----------
83 | x : ndarray
84 | A matrix of N row-vectors (sample points).
85 | y : ndarray
86 | A matrix of M row-vectors (query points).
87 |
88 | Returns
89 | -------
90 | ndarray
91 | A vector of length M that contains for each entry in `y` the
92 | smallest cosine distance to a sample in `x`.
93 |
94 | """
95 | distances = _cosine_distance(x, y)
96 | return distances.min(axis=0)
97 |
98 |
99 | class NearestNeighborDistanceMetric(object):
100 | """
101 | A nearest neighbor distance metric that, for each target, returns
102 | the closest distance to any sample that has been observed so far.
103 |
104 | Parameters
105 | ----------
106 | metric : str
107 | Either "euclidean" or "cosine".
108 | matching_threshold: float
109 | The matching threshold. Samples with larger distance are considered an
110 | invalid match.
111 | budget : Optional[int]
112 | If not None, fix samples per class to at most this number. Removes
113 | the oldest samples when the budget is reached.
114 |
115 | Attributes
116 | ----------
117 | samples : Dict[int -> List[ndarray]]
118 | A dictionary that maps from target identities to the list of samples
119 | that have been observed so far.
120 |
121 | """
122 |
123 | def __init__(self, metric, matching_threshold, budget=None):
124 |
125 |
126 | if metric == "euclidean":
127 | self._metric = _nn_euclidean_distance
128 | elif metric == "cosine":
129 | self._metric = _nn_cosine_distance
130 | else:
131 | raise ValueError(
132 | "Invalid metric; must be either 'euclidean' or 'cosine'")
133 | self.matching_threshold = matching_threshold
134 | self.budget = budget
135 | self.samples = {}
136 |
137 | def partial_fit(self, features, targets, active_targets):
138 | """Update the distance metric with new data.
139 |
140 | Parameters
141 | ----------
142 | features : ndarray
143 | An NxM matrix of N features of dimensionality M.
144 | targets : ndarray
145 | An integer array of associated target identities.
146 | active_targets : List[int]
147 | A list of targets that are currently present in the scene.
148 |
149 | """
150 | for feature, target in zip(features, targets):
151 | self.samples.setdefault(target, []).append(feature)
152 | if self.budget is not None:
153 | self.samples[target] = self.samples[target][-self.budget:]
154 | self.samples = {k: self.samples[k] for k in active_targets}
155 |
156 | def distance(self, features, targets):
157 | """Compute distance between features and targets.
158 |
159 | Parameters
160 | ----------
161 | features : ndarray
162 | An NxM matrix of N features of dimensionality M.
163 | targets : List[int]
164 | A list of targets to match the given `features` against.
165 |
166 | Returns
167 | -------
168 | ndarray
169 | Returns a cost matrix of shape len(targets), len(features), where
170 | element (i, j) contains the closest squared distance between
171 | `targets[i]` and `features[j]`.
172 |
173 | """
174 | cost_matrix = np.zeros((len(targets), len(features)))
175 | for i, target in enumerate(targets):
176 | cost_matrix[i, :] = self._metric(self.samples[target], features)
177 | return cost_matrix
178 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/preprocessing.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import cv2
4 |
5 |
6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
7 | """Suppress overlapping detections.
8 |
9 | Original code from [1]_ has been adapted to include confidence score.
10 |
11 | .. [1] http://www.pyimagesearch.com/2015/02/16/
12 | faster-non-maximum-suppression-python/
13 |
14 | Examples
15 | --------
16 |
17 | >>> boxes = [d.roi for d in detections]
18 | >>> scores = [d.confidence for d in detections]
19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 | >>> detections = [detections[i] for i in indices]
21 |
22 | Parameters
23 | ----------
24 | boxes : ndarray
25 | Array of ROIs (x, y, width, height).
26 | max_bbox_overlap : float
27 | ROIs that overlap more than this values are suppressed.
28 | scores : Optional[array_like]
29 | Detector confidence score.
30 |
31 | Returns
32 | -------
33 | List[int]
34 | Returns indices of detections that have survived non-maxima suppression.
35 |
36 | """
37 | if len(boxes) == 0:
38 | return []
39 |
40 | boxes = boxes.astype(np.float)
41 | pick = []
42 |
43 | x1 = boxes[:, 0]
44 | y1 = boxes[:, 1]
45 | x2 = boxes[:, 2] + boxes[:, 0]
46 | y2 = boxes[:, 3] + boxes[:, 1]
47 |
48 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 | if scores is not None:
50 | idxs = np.argsort(scores)
51 | else:
52 | idxs = np.argsort(y2)
53 |
54 | while len(idxs) > 0:
55 | last = len(idxs) - 1
56 | i = idxs[last]
57 | pick.append(i)
58 |
59 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 |
64 | w = np.maximum(0, xx2 - xx1 + 1)
65 | h = np.maximum(0, yy2 - yy1 + 1)
66 |
67 | overlap = (w * h) / area[idxs[:last]]
68 |
69 | idxs = np.delete(
70 | idxs, np.concatenate(
71 | ([last], np.where(overlap > max_bbox_overlap)[0])))
72 |
73 | return pick
74 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/track.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 |
3 |
4 | class TrackState:
5 | """
6 | Enumeration type for the single target track state. Newly created tracks are
7 | classified as `tentative` until enough evidence has been collected. Then,
8 | the track state is changed to `confirmed`. Tracks that are no longer alive
9 | are classified as `deleted` to mark them for removal from the set of active
10 | tracks.
11 |
12 | """
13 |
14 | Tentative = 1
15 | Confirmed = 2
16 | Deleted = 3
17 |
18 |
19 | class Track:
20 | """
21 | A single target track with state space `(x, y, a, h)` and associated
22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the
23 | aspect ratio and `h` is the height.
24 |
25 | Parameters
26 | ----------
27 | mean : ndarray
28 | Mean vector of the initial state distribution.
29 | covariance : ndarray
30 | Covariance matrix of the initial state distribution.
31 | track_id : int
32 | A unique track identifier.
33 | n_init : int
34 | Number of consecutive detections before the track is confirmed. The
35 | track state is set to `Deleted` if a miss occurs within the first
36 | `n_init` frames.
37 | max_age : int
38 | The maximum number of consecutive misses before the track state is
39 | set to `Deleted`.
40 | feature : Optional[ndarray]
41 | Feature vector of the detection this track originates from. If not None,
42 | this feature is added to the `features` cache.
43 |
44 | Attributes
45 | ----------
46 | mean : ndarray
47 | Mean vector of the initial state distribution.
48 | covariance : ndarray
49 | Covariance matrix of the initial state distribution.
50 | track_id : int
51 | A unique track identifier.
52 | hits : int
53 | Total number of measurement updates.
54 | age : int
55 | Total number of frames since first occurance.
56 | time_since_update : int
57 | Total number of frames since last measurement update.
58 | state : TrackState
59 | The current track state.
60 | features : List[ndarray]
61 | A cache of features. On each measurement update, the associated feature
62 | vector is added to this list.
63 |
64 | """
65 |
66 | def __init__(self, mean, covariance, track_label, track_id, n_init, max_age,
67 | feature=None):
68 | self.mean = mean
69 | self.covariance = covariance
70 | self.track_label = track_label
71 | self.track_id = track_id
72 | self.hits = 1
73 | self.age = 1
74 | self.time_since_update = 0
75 |
76 | self.state = TrackState.Tentative
77 | self.features = []
78 | if feature is not None:
79 | self.features.append(feature)
80 |
81 | self._n_init = n_init
82 | self._max_age = max_age
83 |
84 | def to_tlwh(self):
85 | """Get current position in bounding box format `(top left x, top left y,
86 | width, height)`.
87 |
88 | Returns
89 | -------
90 | ndarray
91 | The bounding box.
92 |
93 | """
94 | ret = self.mean[:4].copy()
95 | ret[2] *= ret[3]
96 | ret[:2] -= ret[2:] / 2
97 | return ret
98 |
99 | def to_tlbr(self):
100 | """Get current position in bounding box format `(min x, miny, max x,
101 | max y)`.
102 |
103 | Returns
104 | -------
105 | ndarray
106 | The bounding box.
107 |
108 | """
109 | ret = self.to_tlwh()
110 | ret[2:] = ret[:2] + ret[2:]
111 | return ret
112 |
113 | def predict(self, kf):
114 | """Propagate the state distribution to the current time step using a
115 | Kalman filter prediction step.
116 |
117 | Parameters
118 | ----------
119 | kf : kalman_filter.KalmanFilter
120 | The Kalman filter.
121 |
122 | """
123 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
124 | self.age += 1
125 | self.time_since_update += 1
126 |
127 | def update(self, kf, detection):
128 | """Perform Kalman filter measurement update step and update the feature
129 | cache.
130 |
131 | Parameters
132 | ----------
133 | kf : kalman_filter.KalmanFilter
134 | The Kalman filter.
135 | detection : Detection
136 | The associated detection.
137 |
138 | """
139 | self.mean, self.covariance = kf.update(
140 | self.mean, self.covariance, detection.to_xyah())
141 | self.features.append(detection.feature)
142 |
143 | self.hits += 1
144 | self.time_since_update = 0
145 | if self.state == TrackState.Tentative and self.hits >= self._n_init:
146 | self.state = TrackState.Confirmed
147 |
148 | def mark_missed(self):
149 | """Mark this track as missed (no association at the current time step).
150 | """
151 | if self.state == TrackState.Tentative:
152 | self.state = TrackState.Deleted
153 | elif self.time_since_update > self._max_age:
154 | self.state = TrackState.Deleted
155 |
156 | def is_tentative(self):
157 | """Returns True if this track is tentative (unconfirmed).
158 | """
159 | return self.state == TrackState.Tentative
160 |
161 | def is_confirmed(self):
162 | """Returns True if this track is confirmed."""
163 | return self.state == TrackState.Confirmed
164 |
165 | def is_deleted(self):
166 | """Returns True if this track is dead and should be deleted."""
167 | return self.state == TrackState.Deleted
168 |
--------------------------------------------------------------------------------
/srcs/deep_sort/deep_sort/sort/tracker.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import kalman_filter
5 | from . import linear_assignment
6 | from . import iou_matching
7 | from .track import Track
8 |
9 |
10 | class Tracker:
11 | """
12 | This is the multi-target tracker.
13 |
14 | Parameters
15 | ----------
16 | metric : nn_matching.NearestNeighborDistanceMetric
17 | A distance metric for measurement-to-track association.
18 | max_age : int
19 | Maximum number of missed misses before a track is deleted.
20 | n_init : int
21 | Number of consecutive detections before the track is confirmed. The
22 | track state is set to `Deleted` if a miss occurs within the first
23 | `n_init` frames.
24 |
25 | Attributes
26 | ----------
27 | metric : nn_matching.NearestNeighborDistanceMetric
28 | The distance metric used for measurement to track association.
29 | max_age : int
30 | Maximum number of missed misses before a track is deleted.
31 | n_init : int
32 | Number of frames that a track remains in initialization phase.
33 | kf : kalman_filter.KalmanFilter
34 | A Kalman filter to filter target trajectories in image space.
35 | tracks : List[Track]
36 | The list of active tracks at the current time step.
37 |
38 | """
39 |
40 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
41 | self.metric = metric
42 | self.max_iou_distance = max_iou_distance
43 | self.max_age = max_age
44 | self.n_init = n_init
45 |
46 | self.kf = kalman_filter.KalmanFilter()
47 | self.tracks = []
48 | self._next_id = 1
49 |
50 | def predict(self):
51 | """Propagate track state distributions one time step forward.
52 |
53 | This function should be called once every time step, before `update`.
54 | """
55 | for track in self.tracks:
56 | track.predict(self.kf)
57 |
58 | def update(self, detections):
59 | """Perform measurement update and track management.
60 |
61 | Parameters
62 | ----------
63 | detections : List[deep_sort.detection.Detection]
64 | A list of detections at the current time step.
65 |
66 | """
67 | # Run matching cascade.
68 | matches, unmatched_tracks, unmatched_detections = \
69 | self._match(detections)
70 |
71 | # Update track set.
72 | for track_idx, detection_idx in matches:
73 | self.tracks[track_idx].update(
74 | self.kf, detections[detection_idx])
75 | for track_idx in unmatched_tracks:
76 | self.tracks[track_idx].mark_missed()
77 | for detection_idx in unmatched_detections:
78 | self._initiate_track(detections[detection_idx])
79 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
80 |
81 | # Update distance metric.
82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
83 | features, targets = [], []
84 | for track in self.tracks:
85 | if not track.is_confirmed():
86 | continue
87 | features += track.features
88 | targets += [track.track_id for _ in track.features]
89 | track.features = []
90 | self.metric.partial_fit(
91 | np.asarray(features), np.asarray(targets), active_targets)
92 |
93 | def _match(self, detections):
94 |
95 | def gated_metric(tracks, dets, track_indices, detection_indices):
96 | features = np.array([dets[i].feature for i in detection_indices])
97 | targets = np.array([tracks[i].track_id for i in track_indices])
98 | cost_matrix = self.metric.distance(features, targets)
99 | cost_matrix = linear_assignment.gate_cost_matrix(
100 | self.kf, cost_matrix, tracks, dets, track_indices,
101 | detection_indices)
102 |
103 | return cost_matrix
104 |
105 | # Split track set into confirmed and unconfirmed tracks.
106 | confirmed_tracks = [
107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 | unconfirmed_tracks = [
109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 |
111 | # Associate confirmed tracks using appearance features.
112 | matches_a, unmatched_tracks_a, unmatched_detections = \
113 | linear_assignment.matching_cascade(
114 | gated_metric, self.metric.matching_threshold, self.max_age,
115 | self.tracks, detections, confirmed_tracks)
116 |
117 | # Associate remaining tracks together with unconfirmed tracks using IOU.
118 | iou_track_candidates = unconfirmed_tracks + [
119 | k for k in unmatched_tracks_a if
120 | self.tracks[k].time_since_update == 1]
121 | unmatched_tracks_a = [
122 | k for k in unmatched_tracks_a if
123 | self.tracks[k].time_since_update != 1]
124 | matches_b, unmatched_tracks_b, unmatched_detections = \
125 | linear_assignment.min_cost_matching(
126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 | detections, iou_track_candidates, unmatched_detections)
128 |
129 | matches = matches_a + matches_b
130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 | return matches, unmatched_tracks, unmatched_detections
132 |
133 | def _initiate_track(self, detection):
134 | mean, covariance = self.kf.initiate(detection.to_xyah())
135 | self.tracks.append(Track(
136 | mean, covariance, detection.label, self._next_id, self.n_init, self.max_age,
137 | detection.feature))
138 | self._next_id += 1
139 |
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nabang1010/YOLO_Object_Tracking_TensorRT/49a6758e4c2e8f3eaa428c08caeaa0768d2c3736/srcs/deep_sort/utils/__init__.py
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/asserts.py:
--------------------------------------------------------------------------------
1 | from os import environ
2 |
3 |
4 | def assert_in(file, files_to_check):
5 | if file not in files_to_check:
6 | raise AssertionError("{} does not exist in the list".format(str(file)))
7 | return True
8 |
9 |
10 | def assert_in_env(check_list: list):
11 | for item in check_list:
12 | assert_in(item, environ.keys())
13 | return True
14 |
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/draw.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
5 |
6 |
7 | def compute_color_for_labels(label):
8 | """
9 | Simple function that adds fixed color depending on the class
10 | """
11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
12 | return tuple(color)
13 |
14 |
15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)):
16 | for i,box in enumerate(bbox):
17 | x1,y1,x2,y2 = [int(i) for i in box]
18 | x1 += offset[0]
19 | x2 += offset[0]
20 | y1 += offset[1]
21 | y2 += offset[1]
22 | # box text and bar
23 | id = int(identities[i]) if identities is not None else 0
24 | color = compute_color_for_labels(id)
25 | label = '{}{:d}'.format("", id)
26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
30 | return img
31 |
32 |
33 |
34 | if __name__ == '__main__':
35 | for i in range(82):
36 | print(compute_color_for_labels(i))
37 |
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/evaluation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import copy
4 | import motmetrics as mm
5 | mm.lap.default_solver = 'lap'
6 | from utils.io import read_results, unzip_objs
7 |
8 |
9 | class Evaluator(object):
10 |
11 | def __init__(self, data_root, seq_name, data_type):
12 | self.data_root = data_root
13 | self.seq_name = seq_name
14 | self.data_type = data_type
15 |
16 | self.load_annotations()
17 | self.reset_accumulator()
18 |
19 | def load_annotations(self):
20 | assert self.data_type == 'mot'
21 |
22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
25 |
26 | def reset_accumulator(self):
27 | self.acc = mm.MOTAccumulator(auto_id=True)
28 |
29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
30 | # results
31 | trk_tlwhs = np.copy(trk_tlwhs)
32 | trk_ids = np.copy(trk_ids)
33 |
34 | # gts
35 | gt_objs = self.gt_frame_dict.get(frame_id, [])
36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
37 |
38 | # ignore boxes
39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
40 | ignore_tlwhs = unzip_objs(ignore_objs)[0]
41 |
42 |
43 | # remove ignored results
44 | keep = np.ones(len(trk_tlwhs), dtype=bool)
45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
46 | if len(iou_distance) > 0:
47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
49 | match_ious = iou_distance[match_is, match_js]
50 |
51 | match_js = np.asarray(match_js, dtype=int)
52 | match_js = match_js[np.logical_not(np.isnan(match_ious))]
53 | keep[match_js] = False
54 | trk_tlwhs = trk_tlwhs[keep]
55 | trk_ids = trk_ids[keep]
56 |
57 | # get distance matrix
58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
59 |
60 | # acc
61 | self.acc.update(gt_ids, trk_ids, iou_distance)
62 |
63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
65 | else:
66 | events = None
67 | return events
68 |
69 | def eval_file(self, filename):
70 | self.reset_accumulator()
71 |
72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False)
73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
74 | for frame_id in frames:
75 | trk_objs = result_frame_dict.get(frame_id, [])
76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
78 |
79 | return self.acc
80 |
81 | @staticmethod
82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
83 | names = copy.deepcopy(names)
84 | if metrics is None:
85 | metrics = mm.metrics.motchallenge_metrics
86 | metrics = copy.deepcopy(metrics)
87 |
88 | mh = mm.metrics.create()
89 | summary = mh.compute_many(
90 | accs,
91 | metrics=metrics,
92 | names=names,
93 | generate_overall=True
94 | )
95 |
96 | return summary
97 |
98 | @staticmethod
99 | def save_summary(summary, filename):
100 | import pandas as pd
101 | writer = pd.ExcelWriter(filename)
102 | summary.to_excel(writer)
103 | writer.save()
104 |
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/io.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Dict
3 | import numpy as np
4 |
5 | # from utils.log import get_logger
6 |
7 |
8 | def write_results(filename, results, data_type):
9 | if data_type == 'mot':
10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
11 | elif data_type == 'kitti':
12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
13 | else:
14 | raise ValueError(data_type)
15 |
16 | with open(filename, 'w') as f:
17 | for frame_id, tlwhs, track_ids in results:
18 | if data_type == 'kitti':
19 | frame_id -= 1
20 | for tlwh, track_id in zip(tlwhs, track_ids):
21 | if track_id < 0:
22 | continue
23 | x1, y1, w, h = tlwh
24 | x2, y2 = x1 + w, y1 + h
25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
26 | f.write(line)
27 |
28 |
29 | # def write_results(filename, results_dict: Dict, data_type: str):
30 | # if not filename:
31 | # return
32 | # path = os.path.dirname(filename)
33 | # if not os.path.exists(path):
34 | # os.makedirs(path)
35 |
36 | # if data_type in ('mot', 'mcmot', 'lab'):
37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
38 | # elif data_type == 'kitti':
39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
40 | # else:
41 | # raise ValueError(data_type)
42 |
43 | # with open(filename, 'w') as f:
44 | # for frame_id, frame_data in results_dict.items():
45 | # if data_type == 'kitti':
46 | # frame_id -= 1
47 | # for tlwh, track_id in frame_data:
48 | # if track_id < 0:
49 | # continue
50 | # x1, y1, w, h = tlwh
51 | # x2, y2 = x1 + w, y1 + h
52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
53 | # f.write(line)
54 | # logger.info('Save results to {}'.format(filename))
55 |
56 |
57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
58 | if data_type in ('mot', 'lab'):
59 | read_fun = read_mot_results
60 | else:
61 | raise ValueError('Unknown data type: {}'.format(data_type))
62 |
63 | return read_fun(filename, is_gt, is_ignore)
64 |
65 |
66 | """
67 | labels={'ped', ... % 1
68 | 'person_on_vhcl', ... % 2
69 | 'car', ... % 3
70 | 'bicycle', ... % 4
71 | 'mbike', ... % 5
72 | 'non_mot_vhcl', ... % 6
73 | 'static_person', ... % 7
74 | 'distractor', ... % 8
75 | 'occluder', ... % 9
76 | 'occluder_on_grnd', ... %10
77 | 'occluder_full', ... % 11
78 | 'reflection', ... % 12
79 | 'crowd' ... % 13
80 | };
81 | """
82 |
83 |
84 | def read_mot_results(filename, is_gt, is_ignore):
85 | valid_labels = {1}
86 | ignore_labels = {2, 7, 8, 12}
87 | results_dict = dict()
88 | if os.path.isfile(filename):
89 | with open(filename, 'r') as f:
90 | for line in f.readlines():
91 | linelist = line.split(',')
92 | if len(linelist) < 7:
93 | continue
94 | fid = int(linelist[0])
95 | if fid < 1:
96 | continue
97 | results_dict.setdefault(fid, list())
98 |
99 | if is_gt:
100 | if 'MOT16-' in filename or 'MOT17-' in filename:
101 | label = int(float(linelist[7]))
102 | mark = int(float(linelist[6]))
103 | if mark == 0 or label not in valid_labels:
104 | continue
105 | score = 1
106 | elif is_ignore:
107 | if 'MOT16-' in filename or 'MOT17-' in filename:
108 | label = int(float(linelist[7]))
109 | vis_ratio = float(linelist[8])
110 | if label not in ignore_labels and vis_ratio >= 0:
111 | continue
112 | else:
113 | continue
114 | score = 1
115 | else:
116 | score = float(linelist[6])
117 |
118 | tlwh = tuple(map(float, linelist[2:6]))
119 | target_id = int(linelist[1])
120 |
121 | results_dict[fid].append((tlwh, target_id, score))
122 |
123 | return results_dict
124 |
125 |
126 | def unzip_objs(objs):
127 | if len(objs) > 0:
128 | tlwhs, ids, scores = zip(*objs)
129 | else:
130 | tlwhs, ids, scores = [], [], []
131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
132 |
133 | return tlwhs, ids, scores
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/json_logger.py:
--------------------------------------------------------------------------------
1 | """
2 | References:
3 | https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
4 | """
5 | import json
6 | from os import makedirs
7 | from os.path import exists, join
8 | from datetime import datetime
9 |
10 |
11 | class JsonMeta(object):
12 | HOURS = 3
13 | MINUTES = 59
14 | SECONDS = 59
15 | PATH_TO_SAVE = 'LOGS'
16 | DEFAULT_FILE_NAME = 'remaining'
17 |
18 |
19 | class BaseJsonLogger(object):
20 | """
21 | This is the base class that returns __dict__ of its own
22 | it also returns the dicts of objects in the attributes that are list instances
23 |
24 | """
25 |
26 | def dic(self):
27 | # returns dicts of objects
28 | out = {}
29 | for k, v in self.__dict__.items():
30 | if hasattr(v, 'dic'):
31 | out[k] = v.dic()
32 | elif isinstance(v, list):
33 | out[k] = self.list(v)
34 | else:
35 | out[k] = v
36 | return out
37 |
38 | @staticmethod
39 | def list(values):
40 | # applies the dic method on items in the list
41 | return [v.dic() if hasattr(v, 'dic') else v for v in values]
42 |
43 |
44 | class Label(BaseJsonLogger):
45 | """
46 | For each bounding box there are various categories with confidences. Label class keeps track of that information.
47 | """
48 |
49 | def __init__(self, category: str, confidence: float):
50 | self.category = category
51 | self.confidence = confidence
52 |
53 |
54 | class Bbox(BaseJsonLogger):
55 | """
56 | This module stores the information for each frame and use them in JsonParser
57 | Attributes:
58 | labels (list): List of label module.
59 | top (int):
60 | left (int):
61 | width (int):
62 | height (int):
63 |
64 | Args:
65 | bbox_id (float):
66 | top (int):
67 | left (int):
68 | width (int):
69 | height (int):
70 |
71 | References:
72 | Check Label module for better understanding.
73 |
74 |
75 | """
76 |
77 | def __init__(self, bbox_id, top, left, width, height):
78 | self.labels = []
79 | self.bbox_id = bbox_id
80 | self.top = top
81 | self.left = left
82 | self.width = width
83 | self.height = height
84 |
85 | def add_label(self, category, confidence):
86 | # adds category and confidence only if top_k is not exceeded.
87 | self.labels.append(Label(category, confidence))
88 |
89 | def labels_full(self, value):
90 | return len(self.labels) == value
91 |
92 |
93 | class Frame(BaseJsonLogger):
94 | """
95 | This module stores the information for each frame and use them in JsonParser
96 | Attributes:
97 | timestamp (float): The elapsed time of captured frame
98 | frame_id (int): The frame number of the captured video
99 | bboxes (list of Bbox objects): Stores the list of bbox objects.
100 |
101 | References:
102 | Check Bbox class for better information
103 |
104 | Args:
105 | timestamp (float):
106 | frame_id (int):
107 |
108 | """
109 |
110 | def __init__(self, frame_id: int, timestamp: float = None):
111 | self.frame_id = frame_id
112 | self.timestamp = timestamp
113 | self.bboxes = []
114 |
115 | def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
116 | bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
117 | if bbox_id not in bboxes_ids:
118 | self.bboxes.append(Bbox(bbox_id, top, left, width, height))
119 | else:
120 | raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
121 |
122 | def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
123 | bboxes = {bbox.id: bbox for bbox in self.bboxes}
124 | if bbox_id in bboxes.keys():
125 | res = bboxes.get(bbox_id)
126 | res.add_label(category, confidence)
127 | else:
128 | raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
129 |
130 |
131 | class BboxToJsonLogger(BaseJsonLogger):
132 | """
133 | ُ This module is designed to automate the task of logging jsons. An example json is used
134 | to show the contents of json file shortly
135 | Example:
136 | {
137 | "video_details": {
138 | "frame_width": 1920,
139 | "frame_height": 1080,
140 | "frame_rate": 20,
141 | "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
142 | },
143 | "frames": [
144 | {
145 | "frame_id": 329,
146 | "timestamp": 3365.1254
147 | "bboxes": [
148 | {
149 | "labels": [
150 | {
151 | "category": "pedestrian",
152 | "confidence": 0.9
153 | }
154 | ],
155 | "bbox_id": 0,
156 | "top": 1257,
157 | "left": 138,
158 | "width": 68,
159 | "height": 109
160 | }
161 | ]
162 | }],
163 |
164 | Attributes:
165 | frames (dict): It's a dictionary that maps each frame_id to json attributes.
166 | video_details (dict): information about video file.
167 | top_k_labels (int): shows the allowed number of labels
168 | start_time (datetime object): we use it to automate the json output by time.
169 |
170 | Args:
171 | top_k_labels (int): shows the allowed number of labels
172 |
173 | """
174 |
175 | def __init__(self, top_k_labels: int = 1):
176 | self.frames = {}
177 | self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
178 | video_name=None)
179 | self.top_k_labels = top_k_labels
180 | self.start_time = datetime.now()
181 |
182 | def set_top_k(self, value):
183 | self.top_k_labels = value
184 |
185 | def frame_exists(self, frame_id: int) -> bool:
186 | """
187 | Args:
188 | frame_id (int):
189 |
190 | Returns:
191 | bool: true if frame_id is recognized
192 | """
193 | return frame_id in self.frames.keys()
194 |
195 | def add_frame(self, frame_id: int, timestamp: float = None) -> None:
196 | """
197 | Args:
198 | frame_id (int):
199 | timestamp (float): opencv captured frame time property
200 |
201 | Raises:
202 | ValueError: if frame_id would not exist in class frames attribute
203 |
204 | Returns:
205 | None
206 |
207 | """
208 | if not self.frame_exists(frame_id):
209 | self.frames[frame_id] = Frame(frame_id, timestamp)
210 | else:
211 | raise ValueError("Frame id: {} already exists".format(frame_id))
212 |
213 | def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
214 | """
215 | Args:
216 | frame_id:
217 | bbox_id:
218 |
219 | Returns:
220 | bool: if bbox exists in frame bboxes list
221 | """
222 | bboxes = []
223 | if self.frame_exists(frame_id=frame_id):
224 | bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
225 | return bbox_id in bboxes
226 |
227 | def find_bbox(self, frame_id: int, bbox_id: int):
228 | """
229 |
230 | Args:
231 | frame_id:
232 | bbox_id:
233 |
234 | Returns:
235 | bbox_id (int):
236 |
237 | Raises:
238 | ValueError: if bbox_id does not exist in the bbox list of specific frame.
239 | """
240 | if not self.bbox_exists(frame_id, bbox_id):
241 | raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
242 | bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
243 | return bboxes.get(bbox_id)
244 |
245 | def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
246 | """
247 |
248 | Args:
249 | frame_id (int):
250 | bbox_id (int):
251 | top (int):
252 | left (int):
253 | width (int):
254 | height (int):
255 |
256 | Returns:
257 | None
258 |
259 | Raises:
260 | ValueError: if bbox_id already exist in frame information with frame_id
261 | ValueError: if frame_id does not exist in frames attribute
262 | """
263 | if self.frame_exists(frame_id):
264 | frame = self.frames[frame_id]
265 | if not self.bbox_exists(frame_id, bbox_id):
266 | frame.add_bbox(bbox_id, top, left, width, height)
267 | else:
268 | raise ValueError(
269 | "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
270 | else:
271 | raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
272 |
273 | def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
274 | """
275 | Args:
276 | frame_id:
277 | bbox_id:
278 | category:
279 | confidence: the confidence value returned from yolo detection
280 |
281 | Returns:
282 | None
283 |
284 | Raises:
285 | ValueError: if labels quota (top_k_labels) exceeds.
286 | """
287 | bbox = self.find_bbox(frame_id, bbox_id)
288 | if not bbox.labels_full(self.top_k_labels):
289 | bbox.add_label(category, confidence)
290 | else:
291 | raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
292 |
293 | def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
294 | video_name: str = None):
295 | self.video_details['frame_width'] = frame_width
296 | self.video_details['frame_height'] = frame_height
297 | self.video_details['frame_rate'] = frame_rate
298 | self.video_details['video_name'] = video_name
299 |
300 | def output(self):
301 | output = {'video_details': self.video_details}
302 | result = list(self.frames.values())
303 | output['frames'] = [item.dic() for item in result]
304 | return output
305 |
306 | def json_output(self, output_name):
307 | """
308 | Args:
309 | output_name:
310 |
311 | Returns:
312 | None
313 |
314 | Notes:
315 | It creates the json output with `output_name` name.
316 | """
317 | if not output_name.endswith('.json'):
318 | output_name += '.json'
319 | with open(output_name, 'w') as file:
320 | json.dump(self.output(), file)
321 | file.close()
322 |
323 | def set_start(self):
324 | self.start_time = datetime.now()
325 |
326 | def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
327 | seconds: int = 60) -> None:
328 | """
329 | Notes:
330 | Creates folder and then periodically stores the jsons on that address.
331 |
332 | Args:
333 | output_dir (str): the directory where output files will be stored
334 | hours (int):
335 | minutes (int):
336 | seconds (int):
337 |
338 | Returns:
339 | None
340 |
341 | """
342 | end = datetime.now()
343 | interval = 0
344 | interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
345 | interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
346 | interval += abs(min([seconds, JsonMeta.SECONDS]))
347 | diff = (end - self.start_time).seconds
348 |
349 | if diff > interval:
350 | output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
351 | if not exists(output_dir):
352 | makedirs(output_dir)
353 | output = join(output_dir, output_name)
354 | self.json_output(output_name=output)
355 | self.frames = {}
356 | self.start_time = datetime.now()
357 |
358 | def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
359 | """
360 | saves as the number of frames quota increases higher.
361 | :param frames_quota:
362 | :param frame_counter:
363 | :param output_dir:
364 | :return:
365 | """
366 | pass
367 |
368 | def flush(self, output_dir):
369 | """
370 | Notes:
371 | We use this function to output jsons whenever possible.
372 | like the time that we exit the while loop of opencv.
373 |
374 | Args:
375 | output_dir:
376 |
377 | Returns:
378 | None
379 |
380 | """
381 | filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
382 | output = join(output_dir, filename)
383 | self.json_output(output_name=output)
384 |
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 |
4 | def get_logger(name='root'):
5 | formatter = logging.Formatter(
6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
8 |
9 | handler = logging.StreamHandler()
10 | handler.setFormatter(formatter)
11 |
12 | logger = logging.getLogger(name)
13 | logger.setLevel(logging.INFO)
14 | logger.addHandler(handler)
15 | return logger
16 |
17 |
18 |
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/parser.py:
--------------------------------------------------------------------------------
1 | import os
2 | import yaml
3 | from easydict import EasyDict as edict
4 |
5 | class YamlParser(edict):
6 | """
7 | This is yaml parser based on EasyDict.
8 | """
9 | def __init__(self, cfg_dict=None, config_file=None):
10 | if cfg_dict is None:
11 | cfg_dict = {}
12 |
13 | if config_file is not None:
14 | assert(os.path.isfile(config_file))
15 | with open(config_file, 'r') as fo:
16 | cfg_dict.update(yaml.load(fo.read()))
17 |
18 | super(YamlParser, self).__init__(cfg_dict)
19 |
20 |
21 | def merge_from_file(self, config_file):
22 | with open(config_file, 'r') as fo:
23 | self.update(yaml.load(fo.read(), Loader=yaml.FullLoader))
24 |
25 |
26 | def merge_from_dict(self, config_dict):
27 | self.update(config_dict)
28 |
29 |
30 | def get_config(config_file=None):
31 | return YamlParser(config_file=config_file)
32 |
33 |
34 | if __name__ == "__main__":
35 | cfg = YamlParser(config_file="../configs/yolov3.yaml")
36 | cfg.merge_from_file("../configs/deep_sort.yaml")
37 |
38 | import ipdb; ipdb.set_trace()
--------------------------------------------------------------------------------
/srcs/deep_sort/utils/tools.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 | from time import time
3 |
4 |
5 | def is_video(ext: str):
6 | """
7 | Returns true if ext exists in
8 | allowed_exts for video files.
9 |
10 | Args:
11 | ext:
12 |
13 | Returns:
14 |
15 | """
16 |
17 | allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
18 | return any((ext.endswith(x) for x in allowed_exts))
19 |
20 |
21 | def tik_tok(func):
22 | """
23 | keep track of time for each process.
24 | Args:
25 | func:
26 |
27 | Returns:
28 |
29 | """
30 | @wraps(func)
31 | def _time_it(*args, **kwargs):
32 | start = time()
33 | try:
34 | return func(*args, **kwargs)
35 | finally:
36 | end_ = time()
37 | print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
38 |
39 | return _time_it
40 |
--------------------------------------------------------------------------------
/srcs/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .engine import EngineBuilder, TRTModule, TRTProfilerV0, TRTProfilerV1 # isort:skip # noqa: E501
2 | import warnings
3 |
4 | import torch
5 |
6 | warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning)
7 | warnings.filterwarnings(action='ignore', category=torch.jit.ScriptWarning)
8 | warnings.filterwarnings(action='ignore', category=UserWarning)
9 | warnings.filterwarnings(action='ignore', category=FutureWarning)
10 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
11 | __all__ = ['EngineBuilder', 'TRTModule', 'TRTProfilerV0', 'TRTProfilerV1']
12 |
--------------------------------------------------------------------------------
/srcs/models/api.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from typing import List, OrderedDict, Tuple, Union
3 |
4 | import numpy as np
5 | import tensorrt as trt
6 |
7 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
8 |
9 |
10 | def trtweight(weights: np.ndarray) -> trt.Weights:
11 | weights = weights.astype(weights.dtype.name)
12 | return trt.Weights(weights)
13 |
14 |
15 | def get_width(x: int, gw: float, divisor: int = 8) -> int:
16 | return int(np.ceil(x * gw / divisor) * divisor)
17 |
18 |
19 | def get_depth(x: int, gd: float) -> int:
20 | return max(int(round(x * gd)), 1)
21 |
22 |
23 | def Conv2d(network: trt.INetworkDefinition, weights: OrderedDict,
24 | input: trt.ITensor, out_channel: int, ksize: int, stride: int,
25 | group: int, layer_name: str) -> trt.ILayer:
26 | padding = ksize // 2
27 | conv_w = trtweight(weights[layer_name + '.weight'])
28 | conv_b = trtweight(weights[layer_name + '.bias'])
29 | conv = network.add_convolution_nd(input,
30 | num_output_maps=out_channel,
31 | kernel_shape=trt.DimsHW(ksize, ksize),
32 | kernel=conv_w,
33 | bias=conv_b)
34 | assert conv, 'Add convolution_nd layer failed'
35 | conv.stride_nd = trt.DimsHW(stride, stride)
36 | conv.padding_nd = trt.DimsHW(padding, padding)
37 | conv.num_groups = group
38 | return conv
39 |
40 |
41 | def Conv(network: trt.INetworkDefinition, weights: OrderedDict,
42 | input: trt.ITensor, out_channel: int, ksize: int, stride: int,
43 | group: int, layer_name: str) -> trt.ILayer:
44 | padding = ksize // 2
45 | if ksize > 3:
46 | padding -= 1
47 | conv_w = trtweight(weights[layer_name + '.conv.weight'])
48 | conv_b = trtweight(weights[layer_name + '.conv.bias'])
49 |
50 | conv = network.add_convolution_nd(input,
51 | num_output_maps=out_channel,
52 | kernel_shape=trt.DimsHW(ksize, ksize),
53 | kernel=conv_w,
54 | bias=conv_b)
55 | assert conv, 'Add convolution_nd layer failed'
56 | conv.stride_nd = trt.DimsHW(stride, stride)
57 | conv.padding_nd = trt.DimsHW(padding, padding)
58 | conv.num_groups = group
59 |
60 | sigmoid = network.add_activation(conv.get_output(0),
61 | trt.ActivationType.SIGMOID)
62 | assert sigmoid, 'Add activation layer failed'
63 | dot_product = network.add_elementwise(conv.get_output(0),
64 | sigmoid.get_output(0),
65 | trt.ElementWiseOperation.PROD)
66 | assert dot_product, 'Add elementwise layer failed'
67 | return dot_product
68 |
69 |
70 | def Bottleneck(network: trt.INetworkDefinition, weights: OrderedDict,
71 | input: trt.ITensor, c1: int, c2: int, shortcut: bool,
72 | group: int, scale: float, layer_name: str) -> trt.ILayer:
73 | c_ = int(c2 * scale)
74 | conv1 = Conv(network, weights, input, c_, 3, 1, 1, layer_name + '.cv1')
75 | conv2 = Conv(network, weights, conv1.get_output(0), c2, 3, 1, group,
76 | layer_name + '.cv2')
77 | if shortcut and c1 == c2:
78 | ew = network.add_elementwise(input,
79 | conv2.get_output(0),
80 | op=trt.ElementWiseOperation.SUM)
81 | assert ew, 'Add elementwise layer failed'
82 | return ew
83 | return conv2
84 |
85 |
86 | def C2f(network: trt.INetworkDefinition, weights: OrderedDict,
87 | input: trt.ITensor, cout: int, n: int, shortcut: bool, group: int,
88 | scale: float, layer_name: str) -> trt.ILayer:
89 | c_ = int(cout * scale) # e:expand param
90 | conv1 = Conv(network, weights, input, 2 * c_, 1, 1, 1, layer_name + '.cv1')
91 | y1 = conv1.get_output(0)
92 |
93 | b, _, h, w = y1.shape
94 | slice = network.add_slice(y1, (0, c_, 0, 0), (b, c_, h, w), (1, 1, 1, 1))
95 | assert slice, 'Add slice layer failed'
96 | y2 = slice.get_output(0)
97 |
98 | input_tensors = [y1]
99 | for i in range(n):
100 | b = Bottleneck(network, weights, y2, c_, c_, shortcut, group, 1.0,
101 | layer_name + '.m.' + str(i))
102 | y2 = b.get_output(0)
103 | input_tensors.append(y2)
104 |
105 | cat = network.add_concatenation(input_tensors)
106 | assert cat, 'Add concatenation layer failed'
107 |
108 | conv2 = Conv(network, weights, cat.get_output(0), cout, 1, 1, 1,
109 | layer_name + '.cv2')
110 | return conv2
111 |
112 |
113 | def SPPF(network: trt.INetworkDefinition, weights: OrderedDict,
114 | input: trt.ITensor, c1: int, c2: int, ksize: int,
115 | layer_name: str) -> trt.ILayer:
116 | c_ = c1 // 2
117 | conv1 = Conv(network, weights, input, c_, 1, 1, 1, layer_name + '.cv1')
118 |
119 | pool1 = network.add_pooling_nd(conv1.get_output(0), trt.PoolingType.MAX,
120 | trt.DimsHW(ksize, ksize))
121 | assert pool1, 'Add pooling_nd layer failed'
122 | pool1.padding_nd = trt.DimsHW(ksize // 2, ksize // 2)
123 | pool1.stride_nd = trt.DimsHW(1, 1)
124 |
125 | pool2 = network.add_pooling_nd(pool1.get_output(0), trt.PoolingType.MAX,
126 | trt.DimsHW(ksize, ksize))
127 | assert pool2, 'Add pooling_nd layer failed'
128 | pool2.padding_nd = trt.DimsHW(ksize // 2, ksize // 2)
129 | pool2.stride_nd = trt.DimsHW(1, 1)
130 |
131 | pool3 = network.add_pooling_nd(pool2.get_output(0), trt.PoolingType.MAX,
132 | trt.DimsHW(ksize, ksize))
133 | assert pool3, 'Add pooling_nd layer failed'
134 | pool3.padding_nd = trt.DimsHW(ksize // 2, ksize // 2)
135 | pool3.stride_nd = trt.DimsHW(1, 1)
136 |
137 | input_tensors = [
138 | conv1.get_output(0),
139 | pool1.get_output(0),
140 | pool2.get_output(0),
141 | pool3.get_output(0)
142 | ]
143 | cat = network.add_concatenation(input_tensors)
144 | assert cat, 'Add concatenation layer failed'
145 | conv2 = Conv(network, weights, cat.get_output(0), c2, 1, 1, 1,
146 | layer_name + '.cv2')
147 | return conv2
148 |
149 |
150 | def Detect(
151 | network: trt.INetworkDefinition,
152 | weights: OrderedDict,
153 | input: Union[List, Tuple],
154 | s: Union[List, Tuple],
155 | layer_name: str,
156 | reg_max: int = 16,
157 | fp16: bool = True,
158 | iou: float = 0.65,
159 | conf: float = 0.25,
160 | topk: int = 100,
161 | ) -> trt.ILayer:
162 | bboxes_branch = []
163 | scores_branch = []
164 | anchors = []
165 | strides = []
166 | for i, (inp, stride) in enumerate(zip(input, s)):
167 | h, w = inp.shape[2:]
168 | sx = np.arange(0, w).astype(np.float16 if fp16 else np.float32) + 0.5
169 | sy = np.arange(0, h).astype(np.float16 if fp16 else np.float32) + 0.5
170 | sy, sx = np.meshgrid(sy, sx)
171 | a = np.ascontiguousarray(np.stack((sy, sx), -1).reshape(-1, 2))
172 | anchors.append(a)
173 | strides.append(
174 | np.full((1, h * w),
175 | stride,
176 | dtype=np.float16 if fp16 else np.float32))
177 | c2 = weights[f'{layer_name}.cv2.{i}.0.conv.weight'].shape[0]
178 | c3 = weights[f'{layer_name}.cv3.{i}.0.conv.weight'].shape[0]
179 | nc = weights[f'{layer_name}.cv3.0.2.weight'].shape[0]
180 | reg_max_x4 = weights[layer_name + f'.cv2.{i}.2.weight'].shape[0]
181 | assert reg_max_x4 == reg_max * 4
182 | b_Conv_0 = Conv(network, weights, inp, c2, 3, 1, 1,
183 | layer_name + f'.cv2.{i}.0')
184 | b_Conv_1 = Conv(network, weights, b_Conv_0.get_output(0), c2, 3, 1, 1,
185 | layer_name + f'.cv2.{i}.1')
186 | b_Conv_2 = Conv2d(network, weights, b_Conv_1.get_output(0), reg_max_x4,
187 | 1, 1, 1, layer_name + f'.cv2.{i}.2')
188 |
189 | b_out = b_Conv_2.get_output(0)
190 | b_shape = network.add_constant([
191 | 4,
192 | ], np.array(b_out.shape[0:1] + (4, reg_max, -1), dtype=np.int32))
193 | assert b_shape, 'Add constant layer failed'
194 | b_shuffle = network.add_shuffle(b_out)
195 | assert b_shuffle, 'Add shuffle layer failed'
196 | b_shuffle.set_input(1, b_shape.get_output(0))
197 | b_shuffle.second_transpose = (0, 3, 1, 2)
198 |
199 | bboxes_branch.append(b_shuffle.get_output(0))
200 |
201 | s_Conv_0 = Conv(network, weights, inp, c3, 3, 1, 1,
202 | layer_name + f'.cv3.{i}.0')
203 | s_Conv_1 = Conv(network, weights, s_Conv_0.get_output(0), c3, 3, 1, 1,
204 | layer_name + f'.cv3.{i}.1')
205 | s_Conv_2 = Conv2d(network, weights, s_Conv_1.get_output(0), nc, 1, 1,
206 | 1, layer_name + f'.cv3.{i}.2')
207 | s_out = s_Conv_2.get_output(0)
208 | s_shape = network.add_constant([
209 | 3,
210 | ], np.array(s_out.shape[0:2] + (-1, ), dtype=np.int32))
211 | assert s_shape, 'Add constant layer failed'
212 | s_shuffle = network.add_shuffle(s_out)
213 | assert s_shuffle, 'Add shuffle layer failed'
214 | s_shuffle.set_input(1, s_shape.get_output(0))
215 | s_shuffle.second_transpose = (0, 2, 1)
216 |
217 | scores_branch.append(s_shuffle.get_output(0))
218 |
219 | Cat_bboxes = network.add_concatenation(bboxes_branch)
220 | assert Cat_bboxes, 'Add concatenation layer failed'
221 | Cat_scores = network.add_concatenation(scores_branch)
222 | assert Cat_scores, 'Add concatenation layer failed'
223 | Cat_scores.axis = 1
224 |
225 | Softmax = network.add_softmax(Cat_bboxes.get_output(0))
226 | assert Softmax, 'Add softmax layer failed'
227 | Softmax.axes = 1 << 3
228 |
229 | SCORES = network.add_activation(Cat_scores.get_output(0),
230 | trt.ActivationType.SIGMOID)
231 | assert SCORES, 'Add activation layer failed'
232 |
233 | reg_max = np.arange(
234 | 0, reg_max).astype(np.float16 if fp16 else np.float32).reshape(
235 | (1, 1, -1, 1))
236 | constant = network.add_constant(reg_max.shape, reg_max)
237 | assert constant, 'Add constant layer failed'
238 | Matmul = network.add_matrix_multiply(Softmax.get_output(0),
239 | trt.MatrixOperation.NONE,
240 | constant.get_output(0),
241 | trt.MatrixOperation.NONE)
242 | assert Matmul, 'Add matrix_multiply layer failed'
243 | pre_bboxes = network.add_gather(
244 | Matmul.get_output(0),
245 | network.add_constant([
246 | 1,
247 | ], np.array([0], dtype=np.int32)).get_output(0), 3)
248 | assert pre_bboxes, 'Add gather layer failed'
249 | pre_bboxes.num_elementwise_dims = 1
250 |
251 | pre_bboxes_tensor = pre_bboxes.get_output(0)
252 | b, c, _ = pre_bboxes_tensor.shape
253 | slice_x1y1 = network.add_slice(pre_bboxes_tensor, (0, 0, 0), (b, c, 2),
254 | (1, 1, 1))
255 | assert slice_x1y1, 'Add slice layer failed'
256 | slice_x2y2 = network.add_slice(pre_bboxes_tensor, (0, 0, 2), (b, c, 2),
257 | (1, 1, 1))
258 | assert slice_x2y2, 'Add slice layer failed'
259 | anchors = np.concatenate(anchors, 0)[np.newaxis]
260 | anchors = network.add_constant(anchors.shape, anchors)
261 | assert anchors, 'Add constant layer failed'
262 | strides = np.concatenate(strides, 1)[..., np.newaxis]
263 | strides = network.add_constant(strides.shape, strides)
264 | assert strides, 'Add constant layer failed'
265 |
266 | Sub = network.add_elementwise(anchors.get_output(0),
267 | slice_x1y1.get_output(0),
268 | trt.ElementWiseOperation.SUB)
269 | assert Sub, 'Add elementwise layer failed'
270 | Add = network.add_elementwise(anchors.get_output(0),
271 | slice_x2y2.get_output(0),
272 | trt.ElementWiseOperation.SUM)
273 | assert Add, 'Add elementwise layer failed'
274 | x1y1 = Sub.get_output(0)
275 | x2y2 = Add.get_output(0)
276 |
277 | Cat_bboxes_ = network.add_concatenation([x1y1, x2y2])
278 | assert Cat_bboxes_, 'Add concatenation layer failed'
279 | Cat_bboxes_.axis = 2
280 |
281 | BBOXES = network.add_elementwise(Cat_bboxes_.get_output(0),
282 | strides.get_output(0),
283 | trt.ElementWiseOperation.PROD)
284 | assert BBOXES, 'Add elementwise layer failed'
285 | plugin_creator = trt.get_plugin_registry().get_plugin_creator(
286 | 'EfficientNMS_TRT', '1')
287 | assert plugin_creator, 'Plugin EfficientNMS_TRT is not registried'
288 |
289 | background_class = trt.PluginField('background_class',
290 | np.array(-1, np.int32),
291 | trt.PluginFieldType.INT32)
292 | box_coding = trt.PluginField('box_coding', np.array(0, np.int32),
293 | trt.PluginFieldType.INT32)
294 | iou_threshold = trt.PluginField('iou_threshold',
295 | np.array(iou, dtype=np.float32),
296 | trt.PluginFieldType.FLOAT32)
297 | max_output_boxes = trt.PluginField('max_output_boxes',
298 | np.array(topk, np.int32),
299 | trt.PluginFieldType.INT32)
300 | plugin_version = trt.PluginField('plugin_version', np.array('1'),
301 | trt.PluginFieldType.CHAR)
302 | score_activation = trt.PluginField('score_activation',
303 | np.array(0, np.int32),
304 | trt.PluginFieldType.INT32)
305 | score_threshold = trt.PluginField('score_threshold',
306 | np.array(conf, dtype=np.float32),
307 | trt.PluginFieldType.FLOAT32)
308 |
309 | batched_nms_op = plugin_creator.create_plugin(
310 | name='batched_nms',
311 | field_collection=trt.PluginFieldCollection([
312 | background_class, box_coding, iou_threshold, max_output_boxes,
313 | plugin_version, score_activation, score_threshold
314 | ]))
315 |
316 | batched_nms = network.add_plugin_v2(
317 | inputs=[BBOXES.get_output(0),
318 | SCORES.get_output(0)],
319 | plugin=batched_nms_op)
320 |
321 | batched_nms.get_output(0).name = 'num_dets'
322 | batched_nms.get_output(1).name = 'bboxes'
323 | batched_nms.get_output(2).name = 'scores'
324 | batched_nms.get_output(3).name = 'labels'
325 |
326 | return batched_nms
327 |
--------------------------------------------------------------------------------
/srcs/models/common.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch import Graph, Tensor, Value
6 |
7 |
8 | def make_anchors(feats: Tensor,
9 | strides: Tensor,
10 | grid_cell_offset: float = 0.5) -> Tuple[Tensor, Tensor]:
11 | anchor_points, stride_tensor = [], []
12 | assert feats is not None
13 | dtype, device = feats[0].dtype, feats[0].device
14 | for i, stride in enumerate(strides):
15 | _, _, h, w = feats[i].shape
16 | sx = torch.arange(end=w, device=device,
17 | dtype=dtype) + grid_cell_offset # shift x
18 | sy = torch.arange(end=h, device=device,
19 | dtype=dtype) + grid_cell_offset # shift y
20 | sy, sx = torch.meshgrid(sy, sx)
21 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
22 | stride_tensor.append(
23 | torch.full((h * w, 1), stride, dtype=dtype, device=device))
24 | return torch.cat(anchor_points), torch.cat(stride_tensor)
25 |
26 |
27 | class TRT_NMS(torch.autograd.Function):
28 |
29 | @staticmethod
30 | def forward(
31 | ctx: Graph,
32 | boxes: Tensor,
33 | scores: Tensor,
34 | iou_threshold: float = 0.65,
35 | score_threshold: float = 0.25,
36 | max_output_boxes: int = 100,
37 | background_class: int = -1,
38 | box_coding: int = 0,
39 | plugin_version: str = '1',
40 | score_activation: int = 0
41 | ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
42 | batch_size, num_boxes, num_classes = scores.shape
43 | num_dets = torch.randint(0,
44 | max_output_boxes, (batch_size, 1),
45 | dtype=torch.int32)
46 | boxes = torch.randn(batch_size, max_output_boxes, 4)
47 | scores = torch.randn(batch_size, max_output_boxes)
48 | labels = torch.randint(0,
49 | num_classes, (batch_size, max_output_boxes),
50 | dtype=torch.int32)
51 |
52 | return num_dets, boxes, scores, labels
53 |
54 | @staticmethod
55 | def symbolic(
56 | g,
57 | boxes: Value,
58 | scores: Value,
59 | iou_threshold: float = 0.45,
60 | score_threshold: float = 0.25,
61 | max_output_boxes: int = 100,
62 | background_class: int = -1,
63 | box_coding: int = 0,
64 | score_activation: int = 0,
65 | plugin_version: str = '1') -> Tuple[Value, Value, Value, Value]:
66 | out = g.op('TRT::EfficientNMS_TRT',
67 | boxes,
68 | scores,
69 | iou_threshold_f=iou_threshold,
70 | score_threshold_f=score_threshold,
71 | max_output_boxes_i=max_output_boxes,
72 | background_class_i=background_class,
73 | box_coding_i=box_coding,
74 | plugin_version_s=plugin_version,
75 | score_activation_i=score_activation,
76 | outputs=4)
77 | nums_dets, boxes, scores, classes = out
78 | return nums_dets, boxes, scores, classes
79 |
80 |
81 | class C2f(nn.Module):
82 |
83 | def __init__(self, *args, **kwargs):
84 | super().__init__()
85 |
86 | def forward(self, x):
87 | x = self.cv1(x)
88 | x = [x, x[:, self.c:, ...]]
89 | x.extend(m(x[-1]) for m in self.m)
90 | x.pop(1)
91 | return self.cv2(torch.cat(x, 1))
92 |
93 |
94 | class PostDetect(nn.Module):
95 | export = True
96 | shape = None
97 | dynamic = False
98 | iou_thres = 0.65
99 | conf_thres = 0.25
100 | topk = 100
101 |
102 | def __init__(self, *args, **kwargs):
103 | super().__init__()
104 |
105 | def forward(self, x):
106 | shape = x[0].shape
107 | b, res, b_reg_num = shape[0], [], self.reg_max * 4
108 | for i in range(self.nl):
109 | res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1))
110 | if self.dynamic or self.shape != shape:
111 | self.anchors, self.strides = (x.transpose(
112 | 0, 1) for x in make_anchors(x, self.stride, 0.5))
113 | self.shape = shape
114 | x = [i.view(b, self.no, -1) for i in res]
115 | y = torch.cat(x, 2)
116 | boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid()
117 | boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2)
118 | boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes)
119 | boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...]
120 | boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1)
121 | boxes = boxes * self.strides
122 |
123 | return TRT_NMS.apply(boxes.transpose(1, 2), scores.transpose(1, 2),
124 | self.iou_thres, self.conf_thres, self.topk)
125 |
126 |
127 | class PostSeg(nn.Module):
128 | export = True
129 | shape = None
130 | dynamic = False
131 |
132 | def __init__(self, *args, **kwargs):
133 | super().__init__()
134 |
135 | def forward(self, x):
136 | p = self.proto(x[0]) # mask protos
137 | bs = p.shape[0] # batch size
138 | mc = torch.cat(
139 | [self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)],
140 | 2) # mask coefficients
141 | boxes, scores, labels = self.forward_det(x)
142 | out = torch.cat([boxes, scores, labels.float(), mc.transpose(1, 2)], 2)
143 | return out, p.flatten(2)
144 |
145 | def forward_det(self, x):
146 | shape = x[0].shape
147 | b, res, b_reg_num = shape[0], [], self.reg_max * 4
148 | for i in range(self.nl):
149 | res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1))
150 | if self.dynamic or self.shape != shape:
151 | self.anchors, self.strides = \
152 | (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
153 | self.shape = shape
154 | x = [i.view(b, self.no, -1) for i in res]
155 | y = torch.cat(x, 2)
156 | boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid()
157 | boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2)
158 | boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes)
159 | boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...]
160 | boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1)
161 | boxes = boxes * self.strides
162 | scores, labels = scores.transpose(1, 2).max(dim=-1, keepdim=True)
163 | return boxes.transpose(1, 2), scores, labels
164 |
165 |
166 | def optim(module: nn.Module):
167 | s = str(type(module))[6:-2].split('.')[-1]
168 | if s == 'Detect':
169 | setattr(module, '__class__', PostDetect)
170 | elif s == 'Segment':
171 | setattr(module, '__class__', PostSeg)
172 | elif s == 'C2f':
173 | setattr(module, '__class__', C2f)
174 |
--------------------------------------------------------------------------------
/srcs/models/cudart_api.py:
--------------------------------------------------------------------------------
1 | import os
2 | import warnings
3 | from dataclasses import dataclass
4 | from pathlib import Path
5 | from typing import List, Optional, Tuple, Union
6 |
7 | import numpy as np
8 | import tensorrt as trt
9 | from cuda import cudart
10 | from numpy import ndarray
11 |
12 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
13 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
14 |
15 |
16 | @dataclass
17 | class Tensor:
18 | name: str
19 | dtype: np.dtype
20 | shape: Tuple
21 | cpu: ndarray
22 | gpu: int
23 |
24 |
25 | class TRTEngine:
26 |
27 | def __init__(self, weight: Union[str, Path]) -> None:
28 | self.weight = Path(weight) if isinstance(weight, str) else weight
29 | status, self.stream = cudart.cudaStreamCreate()
30 | assert status.value == 0
31 | self.__init_engine()
32 | self.__init_bindings()
33 | self.__warm_up()
34 |
35 | def __init_engine(self) -> None:
36 | logger = trt.Logger(trt.Logger.WARNING)
37 | trt.init_libnvinfer_plugins(logger, namespace='')
38 | with trt.Runtime(logger) as runtime:
39 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
40 |
41 | context = model.create_execution_context()
42 |
43 | names = [model.get_binding_name(i) for i in range(model.num_bindings)]
44 | self.num_bindings = model.num_bindings
45 | self.bindings: List[int] = [0] * self.num_bindings
46 | num_inputs, num_outputs = 0, 0
47 |
48 | for i in range(model.num_bindings):
49 | if model.binding_is_input(i):
50 | num_inputs += 1
51 | else:
52 | num_outputs += 1
53 |
54 | self.num_inputs = num_inputs
55 | self.num_outputs = num_outputs
56 | self.model = model
57 | self.context = context
58 | self.input_names = names[:num_inputs]
59 | self.output_names = names[num_inputs:]
60 |
61 | def __init_bindings(self) -> None:
62 | dynamic = False
63 | inp_info = []
64 | out_info = []
65 | out_ptrs = []
66 | for i, name in enumerate(self.input_names):
67 | assert self.model.get_binding_name(i) == name
68 | dtype = trt.nptype(self.model.get_binding_dtype(i))
69 | shape = tuple(self.model.get_binding_shape(i))
70 | if -1 in shape:
71 | dynamic |= True
72 | if not dynamic:
73 | cpu = np.empty(shape, dtype)
74 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream)
75 | assert status.value == 0
76 | cudart.cudaMemcpyAsync(
77 | gpu, cpu.ctypes.data, cpu.nbytes,
78 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
79 | else:
80 | cpu, gpu = np.empty(0), 0
81 | inp_info.append(Tensor(name, dtype, shape, cpu, gpu))
82 | for i, name in enumerate(self.output_names):
83 | i += self.num_inputs
84 | assert self.model.get_binding_name(i) == name
85 | dtype = trt.nptype(self.model.get_binding_dtype(i))
86 | shape = tuple(self.model.get_binding_shape(i))
87 | if not dynamic:
88 | cpu = np.empty(shape, dtype=dtype)
89 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream)
90 | assert status.value == 0
91 | cudart.cudaMemcpyAsync(
92 | gpu, cpu.ctypes.data, cpu.nbytes,
93 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
94 | out_ptrs.append(gpu)
95 | else:
96 | cpu, gpu = np.empty(0), 0
97 | out_info.append(Tensor(name, dtype, shape, cpu, gpu))
98 |
99 | self.is_dynamic = dynamic
100 | self.inp_info = inp_info
101 | self.out_info = out_info
102 | self.out_ptrs = out_ptrs
103 |
104 | def __warm_up(self) -> None:
105 | if self.is_dynamic:
106 | print('You engine has dynamic axes, please warm up by yourself !')
107 | return
108 | for _ in range(10):
109 | inputs = []
110 | for i in self.inp_info:
111 | inputs.append(i.cpu)
112 | self.__call__(inputs)
113 |
114 | def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None:
115 | self.context.profiler = profiler \
116 | if profiler is not None else trt.Profiler()
117 |
118 | def __call__(self, *inputs) -> Union[Tuple, ndarray]:
119 |
120 | assert len(inputs) == self.num_inputs
121 | contiguous_inputs: List[ndarray] = [
122 | np.ascontiguousarray(i) for i in inputs
123 | ]
124 |
125 | for i in range(self.num_inputs):
126 |
127 | if self.is_dynamic:
128 | self.context.set_binding_shape(
129 | i, tuple(contiguous_inputs[i].shape))
130 | status, self.inp_info[i].gpu = cudart.cudaMallocAsync(
131 | contiguous_inputs[i].nbytes, self.stream)
132 | assert status.value == 0
133 | cudart.cudaMemcpyAsync(
134 | self.inp_info[i].gpu, contiguous_inputs[i].ctypes.data,
135 | contiguous_inputs[i].nbytes,
136 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
137 | self.bindings[i] = self.inp_info[i].gpu
138 |
139 | output_gpu_ptrs: List[int] = []
140 | outputs: List[ndarray] = []
141 |
142 | for i in range(self.num_outputs):
143 | j = i + self.num_inputs
144 | if self.is_dynamic:
145 | shape = tuple(self.context.get_binding_shape(j))
146 | dtype = self.out_info[i].dtype
147 | cpu = np.empty(shape, dtype=dtype)
148 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream)
149 | assert status.value == 0
150 | cudart.cudaMemcpyAsync(
151 | gpu, cpu.ctypes.data, cpu.nbytes,
152 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
153 | else:
154 | cpu = self.out_info[i].cpu
155 | gpu = self.out_info[i].gpu
156 | outputs.append(cpu)
157 | output_gpu_ptrs.append(gpu)
158 | self.bindings[j] = gpu
159 |
160 | self.context.execute_async_v2(self.bindings, self.stream)
161 | cudart.cudaStreamSynchronize(self.stream)
162 |
163 | for i, o in enumerate(output_gpu_ptrs):
164 | cudart.cudaMemcpyAsync(
165 | outputs[i].ctypes.data, o, outputs[i].nbytes,
166 | cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream)
167 |
168 | return tuple(outputs) if len(outputs) > 1 else outputs[0]
169 |
--------------------------------------------------------------------------------
/srcs/models/engine.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | from collections import defaultdict, namedtuple
4 | from pathlib import Path
5 | from typing import List, Optional, Tuple, Union
6 |
7 | import onnx
8 | import tensorrt as trt
9 | import torch
10 |
11 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
12 |
13 |
14 | class EngineBuilder:
15 | seg = False
16 |
17 | def __init__(
18 | self,
19 | checkpoint: Union[str, Path],
20 | device: Optional[Union[str, int, torch.device]] = None) -> None:
21 | checkpoint = Path(checkpoint) if isinstance(checkpoint,
22 | str) else checkpoint
23 | assert checkpoint.exists() and checkpoint.suffix in ('.onnx', '.pkl')
24 | self.api = checkpoint.suffix == '.pkl'
25 | if isinstance(device, str):
26 | device = torch.device(device)
27 | elif isinstance(device, int):
28 | device = torch.device(f'cuda:{device}')
29 |
30 | self.checkpoint = checkpoint
31 | self.device = device
32 |
33 | def __build_engine(self,
34 | fp16: bool = True,
35 | input_shape: Union[List, Tuple] = (1, 3, 640, 640),
36 | iou_thres: float = 0.65,
37 | conf_thres: float = 0.25,
38 | topk: int = 100,
39 | with_profiling: bool = True) -> None:
40 | logger = trt.Logger(trt.Logger.WARNING)
41 | trt.init_libnvinfer_plugins(logger, namespace='')
42 | builder = trt.Builder(logger)
43 | config = builder.create_builder_config()
44 | config.max_workspace_size = torch.cuda.get_device_properties(
45 | self.device).total_memory
46 | flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
47 | network = builder.create_network(flag)
48 |
49 | self.logger = logger
50 | self.builder = builder
51 | self.network = network
52 | if self.api:
53 | self.build_from_api(fp16, input_shape, iou_thres, conf_thres, topk)
54 | else:
55 | self.build_from_onnx(iou_thres, conf_thres, topk)
56 | if fp16 and self.builder.platform_has_fast_fp16:
57 | config.set_flag(trt.BuilderFlag.FP16)
58 | self.weight = self.checkpoint.with_suffix('.engine')
59 |
60 | if with_profiling:
61 | config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED
62 | with self.builder.build_engine(self.network, config) as engine:
63 | self.weight.write_bytes(engine.serialize())
64 | self.logger.log(
65 | trt.Logger.WARNING, f'Build tensorrt engine finish.\n'
66 | f'Save in {str(self.weight.absolute())}')
67 |
68 | def build(self,
69 | fp16: bool = True,
70 | input_shape: Union[List, Tuple] = (1, 3, 640, 640),
71 | iou_thres: float = 0.65,
72 | conf_thres: float = 0.25,
73 | topk: int = 100,
74 | with_profiling=True) -> None:
75 | self.__build_engine(fp16, input_shape, iou_thres, conf_thres, topk,
76 | with_profiling)
77 |
78 | def build_from_onnx(self,
79 | iou_thres: float = 0.65,
80 | conf_thres: float = 0.25,
81 | topk: int = 100):
82 | parser = trt.OnnxParser(self.network, self.logger)
83 | onnx_model = onnx.load(str(self.checkpoint))
84 | if not self.seg:
85 | onnx_model.graph.node[-1].attribute[2].i = topk
86 | onnx_model.graph.node[-1].attribute[3].f = conf_thres
87 | onnx_model.graph.node[-1].attribute[4].f = iou_thres
88 |
89 | if not parser.parse(onnx_model.SerializeToString()):
90 | raise RuntimeError(
91 | f'failed to load ONNX file: {str(self.checkpoint)}')
92 | inputs = [
93 | self.network.get_input(i) for i in range(self.network.num_inputs)
94 | ]
95 | outputs = [
96 | self.network.get_output(i) for i in range(self.network.num_outputs)
97 | ]
98 |
99 | for inp in inputs:
100 | self.logger.log(
101 | trt.Logger.WARNING,
102 | f'input "{inp.name}" with shape: {inp.shape} '
103 | f'dtype: {inp.dtype}')
104 | for out in outputs:
105 | self.logger.log(
106 | trt.Logger.WARNING,
107 | f'output "{out.name}" with shape: {out.shape} '
108 | f'dtype: {out.dtype}')
109 |
110 | def build_from_api(
111 | self,
112 | fp16: bool = True,
113 | input_shape: Union[List, Tuple] = (1, 3, 640, 640),
114 | iou_thres: float = 0.65,
115 | conf_thres: float = 0.25,
116 | topk: int = 100,
117 | ):
118 | assert not self.seg
119 | from .api import SPPF, C2f, Conv, Detect, get_depth, get_width
120 |
121 | with open(self.checkpoint, 'rb') as f:
122 | state_dict = pickle.load(f)
123 | mapping = {0.25: 1024, 0.5: 1024, 0.75: 768, 1.0: 512, 1.25: 512}
124 |
125 | GW = state_dict['GW']
126 | GD = state_dict['GD']
127 | width_64 = get_width(64, GW)
128 | width_128 = get_width(128, GW)
129 | width_256 = get_width(256, GW)
130 | width_512 = get_width(512, GW)
131 | width_1024 = get_width(mapping[GW], GW)
132 | depth_3 = get_depth(3, GD)
133 | depth_6 = get_depth(6, GD)
134 | strides = state_dict['strides']
135 | reg_max = state_dict['reg_max']
136 | images = self.network.add_input(name='images',
137 | dtype=trt.float32,
138 | shape=trt.Dims4(input_shape))
139 | assert images, 'Add input failed'
140 |
141 | Conv_0 = Conv(self.network, state_dict, images, width_64, 3, 2, 1,
142 | 'Conv.0')
143 | Conv_1 = Conv(self.network, state_dict, Conv_0.get_output(0),
144 | width_128, 3, 2, 1, 'Conv.1')
145 | C2f_2 = C2f(self.network, state_dict, Conv_1.get_output(0), width_128,
146 | depth_3, True, 1, 0.5, 'C2f.2')
147 | Conv_3 = Conv(self.network, state_dict, C2f_2.get_output(0), width_256,
148 | 3, 2, 1, 'Conv.3')
149 | C2f_4 = C2f(self.network, state_dict, Conv_3.get_output(0), width_256,
150 | depth_6, True, 1, 0.5, 'C2f.4')
151 | Conv_5 = Conv(self.network, state_dict, C2f_4.get_output(0), width_512,
152 | 3, 2, 1, 'Conv.5')
153 | C2f_6 = C2f(self.network, state_dict, Conv_5.get_output(0), width_512,
154 | depth_6, True, 1, 0.5, 'C2f.6')
155 | Conv_7 = Conv(self.network, state_dict, C2f_6.get_output(0),
156 | width_1024, 3, 2, 1, 'Conv.7')
157 | C2f_8 = C2f(self.network, state_dict, Conv_7.get_output(0), width_1024,
158 | depth_3, True, 1, 0.5, 'C2f.8')
159 | SPPF_9 = SPPF(self.network, state_dict, C2f_8.get_output(0),
160 | width_1024, width_1024, 5, 'SPPF.9')
161 | Upsample_10 = self.network.add_resize(SPPF_9.get_output(0))
162 | assert Upsample_10, 'Add Upsample_10 failed'
163 | Upsample_10.resize_mode = trt.ResizeMode.NEAREST
164 | Upsample_10.shape = Upsample_10.get_output(
165 | 0).shape[:2] + C2f_6.get_output(0).shape[2:]
166 | input_tensors11 = [Upsample_10.get_output(0), C2f_6.get_output(0)]
167 | Cat_11 = self.network.add_concatenation(input_tensors11)
168 | C2f_12 = C2f(self.network, state_dict, Cat_11.get_output(0), width_512,
169 | depth_3, False, 1, 0.5, 'C2f.12')
170 | Upsample13 = self.network.add_resize(C2f_12.get_output(0))
171 | assert Upsample13, 'Add Upsample13 failed'
172 | Upsample13.resize_mode = trt.ResizeMode.NEAREST
173 | Upsample13.shape = Upsample13.get_output(
174 | 0).shape[:2] + C2f_4.get_output(0).shape[2:]
175 | input_tensors14 = [Upsample13.get_output(0), C2f_4.get_output(0)]
176 | Cat_14 = self.network.add_concatenation(input_tensors14)
177 | C2f_15 = C2f(self.network, state_dict, Cat_14.get_output(0), width_256,
178 | depth_3, False, 1, 0.5, 'C2f.15')
179 | Conv_16 = Conv(self.network, state_dict, C2f_15.get_output(0),
180 | width_256, 3, 2, 1, 'Conv.16')
181 | input_tensors17 = [Conv_16.get_output(0), C2f_12.get_output(0)]
182 | Cat_17 = self.network.add_concatenation(input_tensors17)
183 | C2f_18 = C2f(self.network, state_dict, Cat_17.get_output(0), width_512,
184 | depth_3, False, 1, 0.5, 'C2f.18')
185 | Conv_19 = Conv(self.network, state_dict, C2f_18.get_output(0),
186 | width_512, 3, 2, 1, 'Conv.19')
187 | input_tensors20 = [Conv_19.get_output(0), SPPF_9.get_output(0)]
188 | Cat_20 = self.network.add_concatenation(input_tensors20)
189 | C2f_21 = C2f(self.network, state_dict, Cat_20.get_output(0),
190 | width_1024, depth_3, False, 1, 0.5, 'C2f.21')
191 | input_tensors22 = [
192 | C2f_15.get_output(0),
193 | C2f_18.get_output(0),
194 | C2f_21.get_output(0)
195 | ]
196 | batched_nms = Detect(self.network, state_dict, input_tensors22,
197 | strides, 'Detect.22', reg_max, fp16, iou_thres,
198 | conf_thres, topk)
199 | for o in range(batched_nms.num_outputs):
200 | self.network.mark_output(batched_nms.get_output(o))
201 |
202 |
203 | class TRTModule(torch.nn.Module):
204 | dtypeMapping = {
205 | trt.bool: torch.bool,
206 | trt.int8: torch.int8,
207 | trt.int32: torch.int32,
208 | trt.float16: torch.float16,
209 | trt.float32: torch.float32
210 | }
211 |
212 | def __init__(self, weight: Union[str, Path],
213 | device: Optional[torch.device]) -> None:
214 | super(TRTModule, self).__init__()
215 | self.weight = Path(weight) if isinstance(weight, str) else weight
216 | self.device = device if device is not None else torch.device('cuda:0')
217 | self.stream = torch.cuda.Stream(device=device)
218 | self.__init_engine()
219 | self.__init_bindings()
220 |
221 | def __init_engine(self) -> None:
222 | logger = trt.Logger(trt.Logger.WARNING)
223 | trt.init_libnvinfer_plugins(logger, namespace='')
224 | with trt.Runtime(logger) as runtime:
225 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
226 |
227 | context = model.create_execution_context()
228 | num_bindings = model.num_bindings
229 | names = [model.get_binding_name(i) for i in range(num_bindings)]
230 |
231 | self.bindings: List[int] = [0] * num_bindings
232 | num_inputs, num_outputs = 0, 0
233 |
234 | for i in range(num_bindings):
235 | if model.binding_is_input(i):
236 | num_inputs += 1
237 | else:
238 | num_outputs += 1
239 |
240 | self.num_bindings = num_bindings
241 | self.num_inputs = num_inputs
242 | self.num_outputs = num_outputs
243 | self.model = model
244 | self.context = context
245 | self.input_names = names[:num_inputs]
246 | self.output_names = names[num_inputs:]
247 | self.idx = list(range(self.num_outputs))
248 |
249 | def __init_bindings(self) -> None:
250 | idynamic = odynamic = False
251 | Tensor = namedtuple('Tensor', ('name', 'dtype', 'shape'))
252 | inp_info = []
253 | out_info = []
254 | for i, name in enumerate(self.input_names):
255 | assert self.model.get_binding_name(i) == name
256 | dtype = self.dtypeMapping[self.model.get_binding_dtype(i)]
257 | shape = tuple(self.model.get_binding_shape(i))
258 | if -1 in shape:
259 | idynamic |= True
260 | inp_info.append(Tensor(name, dtype, shape))
261 | for i, name in enumerate(self.output_names):
262 | i += self.num_inputs
263 | assert self.model.get_binding_name(i) == name
264 | dtype = self.dtypeMapping[self.model.get_binding_dtype(i)]
265 | shape = tuple(self.model.get_binding_shape(i))
266 | if -1 in shape:
267 | odynamic |= True
268 | out_info.append(Tensor(name, dtype, shape))
269 |
270 | if not odynamic:
271 | self.output_tensor = [
272 | torch.empty(info.shape, dtype=info.dtype, device=self.device)
273 | for info in out_info
274 | ]
275 | self.idynamic = idynamic
276 | self.odynamic = odynamic
277 | self.inp_info = inp_info
278 | self.out_info = out_info
279 |
280 | def set_profiler(self, profiler: Optional[trt.IProfiler]):
281 | self.context.profiler = profiler \
282 | if profiler is not None else trt.Profiler()
283 |
284 | def set_desired(self, desired: Optional[Union[List, Tuple]]):
285 | if isinstance(desired,
286 | (list, tuple)) and len(desired) == self.num_outputs:
287 | self.idx = [self.output_names.index(i) for i in desired]
288 |
289 | def forward(self, *inputs) -> Union[Tuple, torch.Tensor]:
290 |
291 | assert len(inputs) == self.num_inputs
292 | contiguous_inputs: List[torch.Tensor] = [
293 | i.contiguous() for i in inputs
294 | ]
295 |
296 | for i in range(self.num_inputs):
297 | self.bindings[i] = contiguous_inputs[i].data_ptr()
298 | if self.idynamic:
299 | self.context.set_binding_shape(
300 | i, tuple(contiguous_inputs[i].shape))
301 |
302 | outputs: List[torch.Tensor] = []
303 |
304 | for i in range(self.num_outputs):
305 | j = i + self.num_inputs
306 | if self.odynamic:
307 | shape = tuple(self.context.get_binding_shape(j))
308 | output = torch.empty(size=shape,
309 | dtype=self.out_info[i].dtype,
310 | device=self.device)
311 | else:
312 | output = self.output_tensor[i]
313 | self.bindings[j] = output.data_ptr()
314 | outputs.append(output)
315 |
316 | self.context.execute_async_v2(self.bindings, self.stream.cuda_stream)
317 | self.stream.synchronize()
318 |
319 | return tuple(outputs[i]
320 | for i in self.idx) if len(outputs) > 1 else outputs[0]
321 |
322 |
323 | class TRTProfilerV1(trt.IProfiler):
324 |
325 | def __init__(self):
326 | trt.IProfiler.__init__(self)
327 | self.total_runtime = 0.0
328 | self.recorder = defaultdict(float)
329 |
330 | def report_layer_time(self, layer_name: str, ms: float):
331 | self.total_runtime += ms * 1000
332 | self.recorder[layer_name] += ms * 1000
333 |
334 | def report(self):
335 | f = '\t%40s\t\t\t\t%10.4f'
336 | print('\t%40s\t\t\t\t%10s' % ('layername', 'cost(us)'))
337 | for name, cost in sorted(self.recorder.items(), key=lambda x: -x[1]):
338 | print(
339 | f %
340 | (name if len(name) < 40 else name[:35] + ' ' + '*' * 4, cost))
341 | print(f'\nTotal Inference Time: {self.total_runtime:.4f}(us)')
342 |
343 |
344 | class TRTProfilerV0(trt.IProfiler):
345 |
346 | def __init__(self):
347 | trt.IProfiler.__init__(self)
348 |
349 | def report_layer_time(self, layer_name: str, ms: float):
350 | f = '\t%40s\t\t\t\t%10.4fms'
351 | print(f % (layer_name if len(layer_name) < 40 else layer_name[:35] +
352 | ' ' + '*' * 4, ms))
353 |
--------------------------------------------------------------------------------
/srcs/models/pycuda_api.py:
--------------------------------------------------------------------------------
1 | import os
2 | import warnings
3 | from dataclasses import dataclass
4 | from pathlib import Path
5 | from typing import List, Optional, Tuple, Union
6 |
7 | import numpy as np
8 | import pycuda.autoinit # noqa F401
9 | import pycuda.driver as cuda
10 | import tensorrt as trt
11 | from numpy import ndarray
12 |
13 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
14 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
15 |
16 |
17 | @dataclass
18 | class Tensor:
19 | name: str
20 | dtype: np.dtype
21 | shape: Tuple
22 | cpu: ndarray
23 | gpu: int
24 |
25 |
26 | class TRTEngine:
27 |
28 | def __init__(self, weight: Union[str, Path]) -> None:
29 | self.weight = Path(weight) if isinstance(weight, str) else weight
30 | self.stream = cuda.Stream(0)
31 | self.__init_engine()
32 | self.__init_bindings()
33 | self.__warm_up()
34 |
35 | def __init_engine(self) -> None:
36 | logger = trt.Logger(trt.Logger.WARNING)
37 | trt.init_libnvinfer_plugins(logger, namespace='')
38 | with trt.Runtime(logger) as runtime:
39 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
40 |
41 | context = model.create_execution_context()
42 |
43 | names = [model.get_binding_name(i) for i in range(model.num_bindings)]
44 | self.num_bindings = model.num_bindings
45 | self.bindings: List[int] = [0] * self.num_bindings
46 | num_inputs, num_outputs = 0, 0
47 |
48 | for i in range(model.num_bindings):
49 | if model.binding_is_input(i):
50 | num_inputs += 1
51 | else:
52 | num_outputs += 1
53 |
54 | self.num_inputs = num_inputs
55 | self.num_outputs = num_outputs
56 | self.model = model
57 | self.context = context
58 | self.input_names = names[:num_inputs]
59 | self.output_names = names[num_inputs:]
60 |
61 | def __init_bindings(self) -> None:
62 | dynamic = False
63 | inp_info = []
64 | out_info = []
65 | out_ptrs = []
66 | for i, name in enumerate(self.input_names):
67 | assert self.model.get_binding_name(i) == name
68 | dtype = trt.nptype(self.model.get_binding_dtype(i))
69 | shape = tuple(self.model.get_binding_shape(i))
70 | if -1 in shape:
71 | dynamic |= True
72 | if not dynamic:
73 | cpu = np.empty(shape, dtype)
74 | gpu = cuda.mem_alloc(cpu.nbytes)
75 | cuda.memcpy_htod_async(gpu, cpu, self.stream)
76 | else:
77 | cpu, gpu = np.empty(0), 0
78 | inp_info.append(Tensor(name, dtype, shape, cpu, gpu))
79 | for i, name in enumerate(self.output_names):
80 | i += self.num_inputs
81 | assert self.model.get_binding_name(i) == name
82 | dtype = trt.nptype(self.model.get_binding_dtype(i))
83 | shape = tuple(self.model.get_binding_shape(i))
84 | if not dynamic:
85 | cpu = np.empty(shape, dtype=dtype)
86 | gpu = cuda.mem_alloc(cpu.nbytes)
87 | cuda.memcpy_htod_async(gpu, cpu, self.stream)
88 | out_ptrs.append(gpu)
89 | else:
90 | cpu, gpu = np.empty(0), 0
91 | out_info.append(Tensor(name, dtype, shape, cpu, gpu))
92 |
93 | self.is_dynamic = dynamic
94 | self.inp_info = inp_info
95 | self.out_info = out_info
96 | self.out_ptrs = out_ptrs
97 |
98 | def __warm_up(self) -> None:
99 | if self.is_dynamic:
100 | print('You engine has dynamic axes, please warm up by yourself !')
101 | return
102 | for _ in range(10):
103 | inputs = []
104 | for i in self.inp_info:
105 | inputs.append(i.cpu)
106 | self.__call__(inputs)
107 |
108 | def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None:
109 | self.context.profiler = profiler \
110 | if profiler is not None else trt.Profiler()
111 |
112 | def __call__(self, *inputs) -> Union[Tuple, ndarray]:
113 |
114 | assert len(inputs) == self.num_inputs
115 | contiguous_inputs: List[ndarray] = [
116 | np.ascontiguousarray(i) for i in inputs
117 | ]
118 |
119 | for i in range(self.num_inputs):
120 |
121 | if self.is_dynamic:
122 | self.context.set_binding_shape(
123 | i, tuple(contiguous_inputs[i].shape))
124 | self.inp_info[i].gpu = cuda.mem_alloc(
125 | contiguous_inputs[i].nbytes)
126 |
127 | cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],
128 | self.stream)
129 | self.bindings[i] = int(self.inp_info[i].gpu)
130 |
131 | output_gpu_ptrs: List[int] = []
132 | outputs: List[ndarray] = []
133 |
134 | for i in range(self.num_outputs):
135 | j = i + self.num_inputs
136 | if self.is_dynamic:
137 | shape = tuple(self.context.get_binding_shape(j))
138 | dtype = self.out_info[i].dtype
139 | cpu = np.empty(shape, dtype=dtype)
140 | gpu = cuda.mem_alloc(cpu.nbytes)
141 | cuda.memcpy_htod_async(gpu, cpu, self.stream)
142 | else:
143 | cpu = self.out_info[i].cpu
144 | gpu = self.out_info[i].gpu
145 | outputs.append(cpu)
146 | output_gpu_ptrs.append(gpu)
147 | self.bindings[j] = int(gpu)
148 |
149 | self.context.execute_async_v2(self.bindings, self.stream.handle)
150 | self.stream.synchronize()
151 |
152 | for i, o in enumerate(output_gpu_ptrs):
153 | cuda.memcpy_dtoh_async(outputs[i], o, self.stream)
154 |
155 | return tuple(outputs) if len(outputs) > 1 else outputs[0]
156 |
--------------------------------------------------------------------------------
/srcs/models/torch_utils.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple, Union
2 |
3 | import torch
4 | import torch.nn.functional as F
5 | from torch import Tensor
6 | from torchvision.ops import batched_nms, nms
7 |
8 |
9 | def seg_postprocess(
10 | data: Tuple[Tensor],
11 | shape: Union[Tuple, List],
12 | conf_thres: float = 0.25,
13 | iou_thres: float = 0.65) \
14 | -> Tuple[Tensor, Tensor, Tensor, Tensor]:
15 | assert len(data) == 2
16 | h, w = shape[0] // 4, shape[1] // 4 # 4x downsampling
17 | outputs, proto = data[0][0], data[1][0]
18 | bboxes, scores, labels, maskconf = outputs.split([4, 1, 1, 32], 1)
19 | scores, labels = scores.squeeze(), labels.squeeze()
20 | idx = scores > conf_thres
21 | if not idx.any(): # no bounding boxes or seg were created
22 | return bboxes.new_zeros((0, 4)), scores.new_zeros(
23 | (0, )), labels.new_zeros((0, )), bboxes.new_zeros((0, 0, 0, 0))
24 | bboxes, scores, labels, maskconf = \
25 | bboxes[idx], scores[idx], labels[idx], maskconf[idx]
26 | idx = batched_nms(bboxes, scores, labels, iou_thres)
27 | bboxes, scores, labels, maskconf = \
28 | bboxes[idx], scores[idx], labels[idx].int(), maskconf[idx]
29 | masks = (maskconf @ proto).sigmoid().view(-1, h, w)
30 | masks = crop_mask(masks, bboxes / 4.)
31 | masks = F.interpolate(masks[None],
32 | shape,
33 | mode='bilinear',
34 | align_corners=False)[0]
35 | masks = masks.gt_(0.5)[..., None]
36 | return bboxes, scores, labels, masks
37 |
38 |
39 | def pose_postprocess(
40 | data: Union[Tuple, Tensor],
41 | conf_thres: float = 0.25,
42 | iou_thres: float = 0.65) \
43 | -> Tuple[Tensor, Tensor, Tensor]:
44 | if isinstance(data, tuple):
45 | assert len(data) == 1
46 | data = data[0]
47 | outputs = torch.transpose(data[0], 0, 1).contiguous()
48 | bboxes, scores, kpts = outputs.split([4, 1, 51], 1)
49 | scores, kpts = scores.squeeze(), kpts.squeeze()
50 | idx = scores > conf_thres
51 | if not idx.any(): # no bounding boxes or seg were created
52 | return bboxes.new_zeros((0, 4)), scores.new_zeros(
53 | (0, )), bboxes.new_zeros((0, 0, 0))
54 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
55 | xycenter, wh = bboxes.chunk(2, -1)
56 | bboxes = torch.cat([xycenter - 0.5 * wh, xycenter + 0.5 * wh], -1)
57 | idx = nms(bboxes, scores, iou_thres)
58 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
59 | return bboxes, scores, kpts.reshape(idx.shape[0], -1, 3)
60 |
61 |
62 | def det_postprocess(data: Tuple[Tensor, Tensor, Tensor, Tensor]):
63 | assert len(data) == 4
64 | iou_thres: float = 0.65
65 | num_dets, bboxes, scores, labels = data[0][0], data[1][0], data[2][
66 | 0], data[3][0]
67 | nums = num_dets.item()
68 | if nums == 0:
69 | return bboxes.new_zeros((0, 4)), scores.new_zeros(
70 | (0, )), labels.new_zeros((0, ))
71 | # check score negative
72 | scores[scores < 0] = 1 + scores[scores < 0]
73 | # add nms
74 | idx = nms(bboxes, scores, iou_thres)
75 | bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx]
76 | bboxes = bboxes[:nums]
77 | scores = scores[:nums]
78 | labels = labels[:nums]
79 |
80 | return bboxes, scores, labels
81 |
82 |
83 | def crop_mask(masks: Tensor, bboxes: Tensor) -> Tensor:
84 | n, h, w = masks.shape
85 | x1, y1, x2, y2 = torch.chunk(bboxes[:, :, None], 4, 1) # x1 shape(1,1,n)
86 | r = torch.arange(w, device=masks.device,
87 | dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
88 | c = torch.arange(h, device=masks.device,
89 | dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
90 |
91 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
92 |
--------------------------------------------------------------------------------
/srcs/models/utils.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List, Tuple, Union
3 |
4 | import cv2
5 | import numpy as np
6 | from numpy import ndarray
7 |
8 | # image suffixs
9 | SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff',
10 | '.webp', '.pfm')
11 |
12 |
13 | def letterbox(im: ndarray,
14 | new_shape: Union[Tuple, List] = (640, 640),
15 | color: Union[Tuple, List] = (114, 114, 114)) \
16 | -> Tuple[ndarray, float, Tuple[float, float]]:
17 | # Resize and pad image while meeting stride-multiple constraints
18 | shape = im.shape[:2] # current shape [height, width]
19 | if isinstance(new_shape, int):
20 | new_shape = (new_shape, new_shape)
21 | # new_shape: [width, height]
22 |
23 | # Scale ratio (new / old)
24 | r = min(new_shape[0] / shape[1], new_shape[1] / shape[0])
25 | # Compute padding [width, height]
26 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
27 | dw, dh = new_shape[0] - new_unpad[0], new_shape[1] - new_unpad[
28 | 1] # wh padding
29 |
30 | dw /= 2 # divide padding into 2 sides
31 | dh /= 2
32 |
33 | if shape[::-1] != new_unpad: # resize
34 | im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
35 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
36 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
37 | im = cv2.copyMakeBorder(im,
38 | top,
39 | bottom,
40 | left,
41 | right,
42 | cv2.BORDER_CONSTANT,
43 | value=color) # add border
44 | return im, r, (dw, dh)
45 |
46 |
47 | def blob(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]:
48 | seg = None
49 | if return_seg:
50 | seg = im.astype(np.float32) / 255
51 | im = im.transpose([2, 0, 1])
52 | im = im[np.newaxis, ...]
53 | im = np.ascontiguousarray(im).astype(np.float32) / 255
54 | if return_seg:
55 | return im, seg
56 | else:
57 | return im
58 |
59 |
60 | def sigmoid(x: ndarray) -> ndarray:
61 | return 1. / (1. + np.exp(-x))
62 |
63 |
64 | def bbox_iou(boxes1: ndarray, boxes2: ndarray) -> ndarray:
65 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \
66 | (boxes1[..., 3] - boxes1[..., 1])
67 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \
68 | (boxes2[..., 3] - boxes2[..., 1])
69 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
70 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
71 | inter_section = np.maximum(right_down - left_up, 0.0)
72 | inter_area = inter_section[..., 0] * inter_section[..., 1]
73 | union_area = boxes1_area + boxes2_area - inter_area
74 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
75 |
76 | return ious
77 |
78 |
79 | def batched_nms(boxes: ndarray,
80 | scores: ndarray,
81 | iou_thres: float = 0.65,
82 | conf_thres: float = 0.25):
83 | labels = np.argmax(scores, axis=-1)
84 | scores = np.max(scores, axis=-1)
85 |
86 | cand = scores > conf_thres
87 | boxes = boxes[cand]
88 | scores = scores[cand]
89 | labels = labels[cand]
90 |
91 | keep_boxes = []
92 | keep_scores = []
93 | keep_labels = []
94 |
95 | for cls in np.unique(labels):
96 | cls_mask = labels == cls
97 | cls_boxes = boxes[cls_mask]
98 | cls_scores = scores[cls_mask]
99 |
100 | while cls_boxes.shape[0] > 0:
101 | max_idx = np.argmax(cls_scores)
102 | max_box = cls_boxes[max_idx:max_idx + 1]
103 | max_score = cls_scores[max_idx:max_idx + 1]
104 | max_label = np.array([cls], dtype=np.int32)
105 | keep_boxes.append(max_box)
106 | keep_scores.append(max_score)
107 | keep_labels.append(max_label)
108 | other_boxes = np.delete(cls_boxes, max_idx, axis=0)
109 | other_scores = np.delete(cls_scores, max_idx, axis=0)
110 | ious = bbox_iou(max_box, other_boxes)
111 | iou_mask = ious < iou_thres
112 | if not iou_mask.any():
113 | break
114 | cls_boxes = other_boxes[iou_mask]
115 | cls_scores = other_scores[iou_mask]
116 |
117 | if len(keep_boxes) == 0:
118 | keep_boxes = np.empty((0, 4), dtype=np.float32)
119 | keep_scores = np.empty((0, ), dtype=np.float32)
120 | keep_labels = np.empty((0, ), dtype=np.float32)
121 |
122 | else:
123 | keep_boxes = np.concatenate(keep_boxes, axis=0)
124 | keep_scores = np.concatenate(keep_scores, axis=0)
125 | keep_labels = np.concatenate(keep_labels, axis=0)
126 |
127 | return keep_boxes, keep_scores, keep_labels
128 |
129 |
130 | def nms(boxes: ndarray,
131 | scores: ndarray,
132 | iou_thres: float = 0.65,
133 | conf_thres: float = 0.25):
134 | labels = np.argmax(scores, axis=-1)
135 | scores = np.max(scores, axis=-1)
136 |
137 | cand = scores > conf_thres
138 | boxes = boxes[cand]
139 | scores = scores[cand]
140 | labels = labels[cand]
141 |
142 | keep_boxes = []
143 | keep_scores = []
144 | keep_labels = []
145 |
146 | idxs = scores.argsort()
147 | while idxs.size > 0:
148 | max_score_index = idxs[-1]
149 | max_box = boxes[max_score_index:max_score_index + 1]
150 | max_score = scores[max_score_index:max_score_index + 1]
151 | max_label = np.array([labels[max_score_index]], dtype=np.int32)
152 | keep_boxes.append(max_box)
153 | keep_scores.append(max_score)
154 | keep_labels.append(max_label)
155 | if idxs.size == 1:
156 | break
157 | idxs = idxs[:-1]
158 | other_boxes = boxes[idxs]
159 | ious = bbox_iou(max_box, other_boxes)
160 | iou_mask = ious < iou_thres
161 | idxs = idxs[iou_mask]
162 |
163 | if len(keep_boxes) == 0:
164 | keep_boxes = np.empty((0, 4), dtype=np.float32)
165 | keep_scores = np.empty((0, ), dtype=np.float32)
166 | keep_labels = np.empty((0, ), dtype=np.float32)
167 |
168 | else:
169 | keep_boxes = np.concatenate(keep_boxes, axis=0)
170 | keep_scores = np.concatenate(keep_scores, axis=0)
171 | keep_labels = np.concatenate(keep_labels, axis=0)
172 |
173 | return keep_boxes, keep_scores, keep_labels
174 |
175 |
176 | def path_to_list(images_path: Union[str, Path]) -> List:
177 | if isinstance(images_path, str):
178 | images_path = Path(images_path)
179 | assert images_path.exists()
180 | if images_path.is_dir():
181 | images = [
182 | i.absolute() for i in images_path.iterdir() if i.suffix in SUFFIXS
183 | ]
184 | else:
185 | assert images_path.suffix in SUFFIXS
186 | images = [images_path.absolute()]
187 | return images
188 |
189 |
190 | def crop_mask(masks: ndarray, bboxes: ndarray) -> ndarray:
191 | n, h, w = masks.shape
192 | x1, y1, x2, y2 = np.split(bboxes[:, :, None], [1, 2, 3],
193 | 1) # x1 shape(1,1,n)
194 | r = np.arange(w, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
195 | c = np.arange(h, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
196 |
197 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
198 |
199 |
200 | def det_postprocess(data: Tuple[ndarray, ndarray, ndarray, ndarray]):
201 | assert len(data) == 4
202 | iou_thres: float = 0.65
203 | num_dets, bboxes, scores, labels = (i[0] for i in data)
204 | nums = num_dets.item()
205 | if nums == 0:
206 | return np.empty((0, 4), dtype=np.float32), np.empty(
207 | (0, ), dtype=np.float32), np.empty((0, ), dtype=np.int32)
208 | # check score negative
209 | scores[scores < 0] = 1 + scores[scores < 0]
210 | # add nms
211 | idx = nms(bboxes, scores, iou_thres)
212 | bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx]
213 |
214 | bboxes = bboxes[:nums]
215 | scores = scores[:nums]
216 | labels = labels[:nums]
217 | return bboxes, scores, labels
218 |
219 |
220 | def seg_postprocess(
221 | data: Tuple[ndarray],
222 | shape: Union[Tuple, List],
223 | conf_thres: float = 0.25,
224 | iou_thres: float = 0.65) \
225 | -> Tuple[ndarray, ndarray, ndarray, ndarray]:
226 | assert len(data) == 2
227 | h, w = shape[0] // 4, shape[1] // 4 # 4x downsampling
228 | outputs, proto = (i[0] for i in data)
229 | bboxes, scores, labels, maskconf = np.split(outputs, [4, 5, 6], 1)
230 | scores, labels = scores.squeeze(), labels.squeeze()
231 | idx = scores > conf_thres
232 | if not idx.any(): # no bounding boxes or seg were created
233 | return np.empty((0, 4), dtype=np.float32), \
234 | np.empty((0,), dtype=np.float32), \
235 | np.empty((0,), dtype=np.int32), \
236 | np.empty((0, 0, 0, 0), dtype=np.int32)
237 |
238 | bboxes, scores, labels, maskconf = \
239 | bboxes[idx], scores[idx], labels[idx], maskconf[idx]
240 | cvbboxes = np.concatenate([bboxes[:, :2], bboxes[:, 2:] - bboxes[:, :2]],
241 | 1)
242 | labels = labels.astype(np.int32)
243 | v0, v1 = map(int, (cv2.__version__).split('.')[:2])
244 | assert v0 == 4, 'OpenCV version is wrong'
245 | if v1 > 6:
246 | idx = cv2.dnn.NMSBoxesBatched(cvbboxes, scores, labels, conf_thres,
247 | iou_thres)
248 | else:
249 | idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres)
250 | bboxes, scores, labels, maskconf = \
251 | bboxes[idx], scores[idx], labels[idx], maskconf[idx]
252 | masks = sigmoid(maskconf @ proto).reshape(-1, h, w)
253 | masks = crop_mask(masks, bboxes / 4.)
254 | masks = masks.transpose([1, 2, 0])
255 | masks = cv2.resize(masks, (shape[1], shape[0]),
256 | interpolation=cv2.INTER_LINEAR)
257 | masks = masks.transpose(2, 0, 1)
258 | masks = np.ascontiguousarray((masks > 0.5)[..., None], dtype=np.float32)
259 | return bboxes, scores, labels, masks
260 |
261 |
262 | def pose_postprocess(
263 | data: Union[Tuple, ndarray],
264 | conf_thres: float = 0.25,
265 | iou_thres: float = 0.65) \
266 | -> Tuple[ndarray, ndarray, ndarray]:
267 | if isinstance(data, tuple):
268 | assert len(data) == 1
269 | data = data[0]
270 | outputs = np.transpose(data[0], (1, 0))
271 | bboxes, scores, kpts = np.split(outputs, [4, 5], 1)
272 | scores, kpts = scores.squeeze(), kpts.squeeze()
273 | idx = scores > conf_thres
274 | if not idx.any(): # no bounding boxes or seg were created
275 | return np.empty((0, 4), dtype=np.float32), np.empty(
276 | (0, ), dtype=np.float32), np.empty((0, 0, 0), dtype=np.float32)
277 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
278 | xycenter, wh = np.split(bboxes, [
279 | 2,
280 | ], -1)
281 | cvbboxes = np.concatenate([xycenter - 0.5 * wh, wh], -1)
282 | idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres)
283 | cvbboxes, scores, kpts = cvbboxes[idx], scores[idx], kpts[idx]
284 | cvbboxes[:, 2:] += cvbboxes[:, :2]
285 | return cvbboxes, scores, kpts.reshape(idx.shape[0], -1, 3)
286 |
--------------------------------------------------------------------------------
/srcs/tracker_trt.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #!/usr/bin/python3
3 | """
4 | Created on 2021/5/24 13:46
5 | @Author: Wang Cong
6 | @Email : iwangcong@outlook.com
7 | @Version : 0.1
8 | @File : tracker_trt.py
9 | """
10 | import cv2
11 | import numpy as np
12 |
13 | from deep_sort.utils.parser import get_config
14 | from deep_sort.deep_sort import DeepSort
15 |
16 | cfg = get_config()
17 | cfg.merge_from_file("./deep_sort/configs/deep_sort.yaml")
18 | deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
19 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
20 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
21 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
22 | use_cuda=True)
23 |
24 |
25 | def draw_bboxes(image, bboxes, line_thickness):
26 | line_thickness = line_thickness or round(
27 | 0.002 * (image.shape[0] + image.shape[1]) / 2) + 1
28 |
29 | list_pts = []
30 | point_radius = 4
31 |
32 | for (x1, y1, x2, y2, cls_id, pos_id) in bboxes:
33 | color = (0, 255, 0)
34 |
35 | check_point_x = x1
36 | check_point_y = int(y1 + ((y2 - y1) * 0.6))
37 |
38 | c1, c2 = (x1, y1), (x2, y2)
39 | cv2.rectangle(image, c1, c2, color, thickness=line_thickness, lineType=cv2.LINE_AA)
40 |
41 | font_thickness = max(line_thickness - 1, 1)
42 | t_size = cv2.getTextSize(cls_id, 0, fontScale=line_thickness / 3, thickness=font_thickness)[0]
43 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
44 | cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA) # filled
45 | cv2.putText(image, '{} ID-{}'.format(cls_id, pos_id), (c1[0], c1[1] - 2), 0, line_thickness / 3,
46 | [225, 255, 255], thickness=font_thickness, lineType=cv2.LINE_AA)
47 |
48 | list_pts.append([check_point_x-point_radius, check_point_y-point_radius])
49 | list_pts.append([check_point_x-point_radius, check_point_y+point_radius])
50 | list_pts.append([check_point_x+point_radius, check_point_y+point_radius])
51 | list_pts.append([check_point_x+point_radius, check_point_y-point_radius])
52 |
53 | ndarray_pts = np.array(list_pts, np.int32)
54 |
55 | cv2.fillPoly(image, [ndarray_pts], color=(0, 0, 255))
56 |
57 | list_pts.clear()
58 |
59 | return image
60 |
61 | def clear():
62 | deepsort.clear()
63 | def update(bboxes, image):
64 | bbox_xywh = []
65 | lbls = []
66 | confs = []
67 | bboxes2draw = []
68 |
69 | if len(bboxes) > 0:
70 | for x1, y1, x2, y2, lbl, conf in bboxes:
71 | obj = [
72 | int((x1 + x2) / 2), int((y1 + y2) / 2),
73 | x2 - x1, y2 - y1
74 | ]
75 | bbox_xywh.append(obj)
76 | lbls.append(lbl)
77 | confs.append(conf)
78 |
79 | xywhs = np.array(bbox_xywh)
80 | confss = np.array(confs)
81 |
82 | outputs = deepsort.update(xywhs, lbls, confss, image)
83 |
84 | for value in list(outputs):
85 | x1, y1, x2, y2, track_label, track_id = value
86 | bboxes2draw.append((int(x1), int(y1), int(x2), int(y2), track_label, int(track_id)))
87 | pass
88 | pass
89 |
90 | return bboxes2draw
91 |
--------------------------------------------------------------------------------
/srcs/yolov8_bytetrack_trt.py:
--------------------------------------------------------------------------------
1 | from models import TRTModule
2 | import argparse
3 | from time import time
4 | import cv2
5 | from pathlib import Path
6 | import torch
7 | import ctypes
8 | from bytetrack.byte_tracker import BYTETracker
9 |
10 | from config import CLASSES, COLORS
11 | from models.torch_utils import det_postprocess
12 | from models.utils import blob, letterbox, path_to_list
13 | from datetime import datetime, timedelta
14 | import json
15 | import numpy as np
16 | import random
17 |
18 |
19 |
20 | class ROI:
21 | def __init__(self, x1, y1, x2, y2, roi_id):
22 | self.x1 = x1
23 | self.y1 = y1
24 | self.x2 = x2
25 | self.y2 = y2
26 | self.roi_id = roi_id
27 | self.count = 0
28 |
29 |
30 | DICT_ROIS = {}
31 | DEBOUNCE_PERIOD = timedelta(seconds=2)
32 | person_tracker = {}
33 | debounce_tracker = {}
34 |
35 | color_dict = {}
36 |
37 | def get_random_color(id):
38 | if id not in color_dict:
39 | color_dict[id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
40 | return color_dict[id]
41 |
42 |
43 |
44 | def main(args):
45 | args_bytetrack = argparse.Namespace()
46 | args_bytetrack.track_thresh = 0.2
47 | args_bytetrack.track_buffer = 200
48 | args_bytetrack.mot20 = True
49 | args_bytetrack.match_thresh = 0.7
50 |
51 | tracker = BYTETracker(args_bytetrack)
52 | device = torch.device(args.device)
53 | Engine = TRTModule(args.engine, device)
54 | H, W = Engine.inp_info[0].shape[-2:]
55 |
56 | Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])
57 |
58 | fps = 0
59 | # input video
60 | cap = cv2.VideoCapture(args.vid)
61 | # input webcam
62 | # cap = cv2.VideoCapture(0)
63 |
64 | video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
65 | video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
66 | out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (video_width,video_height))
67 | while(True):
68 | ret, frame = cap.read()
69 |
70 | if frame is None:
71 | print('No image input!')
72 | continue
73 |
74 | start = float(time())
75 | fps_str = "FPS:"
76 | fps_str += "{:.2f}".format(fps)
77 | bgr = frame
78 | bgr, ratio, dwdh = letterbox(bgr, (W, H))
79 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
80 |
81 | tensor = blob(rgb, return_seg=False)
82 |
83 | dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
84 |
85 | tensor = torch.asarray(tensor, device=device)
86 |
87 | data = Engine(tensor)
88 | bboxes, scores, labels = det_postprocess(data)
89 | # print(labels)
90 |
91 | if bboxes.numel() == 0:
92 | continue
93 |
94 | bboxes -= dwdh
95 | bboxes /= ratio
96 | output = []
97 | for (bbox, score, label) in zip(bboxes, scores, labels):
98 | if label == 0 and score.item() > 0.2:
99 | bbox = bbox.round().int().tolist()
100 | cls_id = int(label)
101 | cls = CLASSES[cls_id]
102 | # x1, y1, x2, y2, conf
103 | output.append([bbox[0], bbox[1], bbox[2], bbox[3], score.item()])
104 | output = np.array(output)
105 |
106 | info_imgs = frame.shape[:2]
107 | img_size = info_imgs
108 |
109 | if output != []:
110 | online_targets = tracker.update(output, info_imgs, img_size)
111 | online_tlwhs = []
112 | online_ids = []
113 | online_scores = []
114 | for t in online_targets:
115 | tlwh = t.tlwh
116 | tid = t.track_id
117 | online_tlwhs.append(tlwh)
118 | online_ids.append(tid)
119 | online_scores.append(t.score)
120 |
121 | if args.show:
122 | cv2.rectangle(frame, (int(tlwh[0]), int(tlwh[1])), (int(tlwh[0] + tlwh[2]), int(tlwh[1] + tlwh[3])), get_random_color(tid), 2)
123 | cv2.putText(frame, str(tid), (int(tlwh[0]), int(tlwh[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
124 |
125 | end = float(time())
126 |
127 |
128 |
129 |
130 |
131 | fps = 1/(end - start)
132 | print(fps_str)
133 | cv2.putText(frame, "YOLOV8-BYTETrack", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
134 | cv2.putText(frame, fps_str, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
135 | if args.show:
136 | cv2.imshow("output", frame)
137 | if cv2.waitKey(1) & 0xFF == ord('q'):
138 | break
139 | out.write(frame)
140 |
141 | cap.release()
142 | cv2.destroyAllWindows()
143 | # tracker_trt.clear()
144 |
145 |
146 | def parse_args():
147 | parser = argparse.ArgumentParser()
148 | parser.add_argument('--engine', type=str, help='Engine file', default='../models/engine/yolov8n.engine')
149 | parser.add_argument('--vid', type=str, help='Video file', default='../sample_video/sample_2.mp4')
150 | parser.add_argument('--show',
151 | action='store_true',
152 | help='Show the results')
153 | parser.add_argument('--device',
154 | type=str,
155 | default='cuda:0',
156 | help='TensorRT infer device')
157 | args = parser.parse_args()
158 | return args
159 |
160 |
161 | if __name__ == '__main__':
162 | args = parse_args()
163 | main(args)
164 |
165 |
--------------------------------------------------------------------------------
/srcs/yolov8_deepsort_trt.py:
--------------------------------------------------------------------------------
1 | from models import TRTModule
2 | import argparse
3 | from time import time
4 | import cv2
5 | from pathlib import Path
6 | import torch
7 | import ctypes
8 | import tracker_trt
9 |
10 |
11 | from config import CLASSES, COLORS
12 | from models.torch_utils import det_postprocess
13 | from models.utils import blob, letterbox, path_to_list
14 | from datetime import datetime, timedelta
15 | import json
16 | import random
17 |
18 |
19 |
20 | class ROI:
21 | def __init__(self, x1, y1, x2, y2, roi_id):
22 | self.x1 = x1
23 | self.y1 = y1
24 | self.x2 = x2
25 | self.y2 = y2
26 | self.roi_id = roi_id
27 | self.count = 0
28 |
29 |
30 | DICT_ROIS = {}
31 | DEBOUNCE_PERIOD = timedelta(seconds=2)
32 | person_tracker = {}
33 | debounce_tracker = {}
34 |
35 |
36 | color_dict = {}
37 |
38 | def get_random_color(id):
39 | if id not in color_dict:
40 | color_dict[id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
41 | return color_dict[id]
42 |
43 |
44 | def main(args):
45 |
46 |
47 | device = torch.device(args.device)
48 | Engine = TRTModule(args.engine, device)
49 | H, W = Engine.inp_info[0].shape[-2:]
50 |
51 | Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])
52 |
53 | fps = 0
54 | # input video
55 | cap = cv2.VideoCapture(args.vid)
56 | # input webcam
57 | # cap = cv2.VideoCapture(0)
58 |
59 | video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
60 | video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
61 | out = cv2.VideoWriter('output_ds.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (video_width,video_height))
62 | while(True):
63 | ret, frame = cap.read()
64 |
65 | if frame is None:
66 | print('No image input!')
67 | break
68 |
69 | start = float(time())
70 | fps_str = "FPS:"
71 | fps_str += "{:.2f}".format(fps)
72 | bgr = frame
73 | bgr, ratio, dwdh = letterbox(bgr, (W, H))
74 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
75 |
76 | tensor = blob(rgb, return_seg=False)
77 |
78 | dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
79 |
80 | tensor = torch.asarray(tensor, device=device)
81 |
82 | data = Engine(tensor)
83 | bboxes, scores, labels = det_postprocess(data)
84 | # print(labels)
85 |
86 | if bboxes.numel() == 0:
87 | continue
88 |
89 | bboxes -= dwdh
90 | bboxes /= ratio
91 | detections = []
92 | for (bbox, score, label) in zip(bboxes, scores, labels):
93 | if label == 0 and score.item() > 0.3:
94 | bbox = bbox.round().int().tolist()
95 | cls_id = int(label)
96 | cls = CLASSES[cls_id]
97 | detections.append((bbox[0], bbox[1], bbox[2] , bbox[3], cls, score.item()))
98 | end = float(time())
99 |
100 | list_bbox = tracker_trt.update(detections,frame)
101 | for (x1, y1, x2, y2, cls, track_id) in list_bbox:
102 | color = [0, 255, 0]
103 |
104 | if args.show:
105 | # frame = draw_roi(frame)
106 | cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
107 | cv2.putText(frame, f'{cls} {track_id}', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
108 |
109 |
110 |
111 | fps = 1/(end - start)
112 | print(fps_str)
113 |
114 | cv2.putText(frame, fps_str, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
115 | cv2.putText(frame, "YOLOV8-DEEP SORT", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
116 | if args.show:
117 | cv2.imshow("output", frame)
118 | if cv2.waitKey(1) & 0xFF == ord('q'):
119 | break
120 | out.write(frame)
121 |
122 | out.release()
123 | cap.release()
124 | cv2.destroyAllWindows()
125 | # tracker_trt.clear()
126 |
127 |
128 | def parse_args():
129 | parser = argparse.ArgumentParser()
130 | parser.add_argument('--engine', type=str, help='Engine file', default='../models/engine/yolov8n.engine')
131 | parser.add_argument('--vid', type=str, help='Video file', default='../sample_video/sample.mp4')
132 | parser.add_argument('--show',
133 | action='store_true',
134 | help='Show the results')
135 | parser.add_argument('--device',
136 | type=str,
137 | default='cuda:0',
138 | help='TensorRT infer device')
139 | args = parser.parse_args()
140 | return args
141 |
142 |
143 | if __name__ == '__main__':
144 | args = parse_args()
145 | main(args)
146 |
147 |
--------------------------------------------------------------------------------