├── .dockerignore
├── .github
├── FUNDING.yml
└── ISSUE_TEMPLATE
│ ├── config.yml
│ └── unexpected-behaviors.md
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.py
├── assets
├── aerial_demo.gif
└── dense_demo.gif
├── cfg
└── mot.json
├── eval
├── results
│ ├── MOT20-01.txt
│ ├── MOT20-02.txt
│ ├── MOT20-03.txt
│ └── MOT20-05.txt
└── seqmap.txt
├── fastmot
├── __init__.py
├── detector.py
├── feature_extractor.py
├── flow.py
├── kalman_filter.py
├── models
│ ├── __init__.py
│ ├── calibrator.py
│ ├── label.py
│ ├── reid.py
│ ├── ssd.py
│ └── yolo.py
├── mot.py
├── plugins
│ ├── Makefile
│ ├── README.md
│ ├── get_compute.py
│ ├── yolo_layer.cu
│ └── yolo_layer.h
├── track.py
├── tracker.py
├── utils
│ ├── __init__.py
│ ├── decoder.py
│ ├── distance.py
│ ├── inference.py
│ ├── matching.py
│ ├── numba.py
│ ├── profiler.py
│ ├── rect.py
│ └── visualization.py
└── videoio.py
├── requirements.txt
└── scripts
├── download_data.sh
├── download_models.sh
├── install_jetson.sh
└── yolo2onnx.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | **/__pycache__/
3 | **/*.py[cod]
4 | **/*$py.class
5 | **/*.so
6 | **/*.o
7 |
8 | # temporary files
9 | .DS_Store
10 | .cache
11 |
12 | # model files
13 | **/*.pb
14 | **/*.onnx
15 |
16 | # TensorRT stuff
17 | **/*.trt
18 | **/*calib_cache
19 |
20 | # video and image files
21 | *.jpg
22 | *.jpeg
23 | *.png
24 | *.bmp
25 | *.tif
26 | *.tiff
27 | *.heic
28 | *.JPG
29 | *.JPEG
30 | *.PNG
31 | *.BMP
32 | *.TIF
33 | *.TIFF
34 | *.HEIC
35 | *.mp4
36 | *.mov
37 | *.MOV
38 | *.avi
39 |
40 | # dataset
41 | **/VOCdevkit/
42 |
43 | # vscode stuff
44 | .vscode/
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [GeekAlexis]
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: GitHub Community Support
4 | url: https://github.community/
5 | about: Please ask and answer questions here.
6 | - name: GitHub Security Bug Bounty
7 | url: https://bounty.github.com/
8 | about: Please report security vulnerabilities here.
9 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/unexpected-behaviors.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Unexpected behaviors
3 | about: Report unexpected behaviors/bugs or ask questions
4 | title: Please read & provide the following
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | Your issue may already be reported!
11 | Please search the issues before creating one.
12 |
13 | ## Current Behavior
14 |
15 |
16 |
17 | ## How to Reproduce
18 |
19 |
20 |
21 | ## Describe what you want to do
22 | 1. What input videos you will provide, if any:
23 | 2. What outputs you are expecting:
24 | 3. Ask your questions here, if any:
25 |
26 |
27 | ## Your Environment
28 |
29 |
30 | * Desktop
31 | * Operating System and version:
32 | * NVIDIA Driver version:
33 | * Used the docker image?
34 | * NVIDIA Jetson
35 | * Which Jetson?
36 | * Jetpack version:
37 | * Ran install_jetson.sh?
38 | * Reinstalled OpenCV from Jetpack?
39 |
40 | ## Common issues
41 | 1. GStreamer warnings are normal
42 | 2. If you have issues with GStreamer on Desktop, disable GStreamer and build FFMPEG instead in Dockerfile
43 | 2. TensorRT plugin and engine files have to be built on the target platform and cannot be copied from a different architecture
44 | 3. Reinstalled OpenCV is usually not as optimized as the one shipped in Jetpack
45 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.so
6 | *.o
7 | .idea/
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Mac temporary files
132 | .DS_Store
133 | ._*
134 |
135 | # model files
136 | *.pb
137 | *.onnx
138 |
139 | # TensorRT stuff
140 | *.trt
141 | *calib_cache
142 |
143 | # video and image files
144 | *.jpg
145 | *.jpeg
146 | *.png
147 | *.bmp
148 | *.tif
149 | *.tiff
150 | *.heic
151 | *.JPG
152 | *.JPEG
153 | *.PNG
154 | *.BMP
155 | *.TIF
156 | *.TIFF
157 | *.HEIC
158 | *.mp4
159 | *.mov
160 | *.MOV
161 | *.avi
162 |
163 | # dataset
164 | VOCdevkit/
165 |
166 | # vscode stuff
167 | .vscode/
168 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG TRT_IMAGE_VERSION=20.09
2 | FROM nvcr.io/nvidia/tensorrt:${TRT_IMAGE_VERSION}-py3
3 |
4 | ARG TRT_IMAGE_VERSION
5 | ARG OPENCV_VERSION=4.1.1
6 | ARG APP_DIR=/usr/src/app
7 | ARG SCRIPT_DIR=/opt/tensorrt/python
8 | ARG DEBIAN_FRONTEND=noninteractive
9 |
10 | ENV HOME=${APP_DIR}
11 | ENV TZ=America/Los_Angeles
12 |
13 | ENV OPENBLAS_MAIN_FREE=1
14 | ENV OPENBLAS_NUM_THREADS=1
15 | ENV NO_AT_BRIDGE=1
16 |
17 | # Install OpenCV and FastMOT dependencies
18 | RUN apt-get -y update && \
19 | apt-get install -y --no-install-recommends \
20 | wget unzip tzdata \
21 | build-essential cmake pkg-config \
22 | libgtk-3-dev libcanberra-gtk3-module \
23 | libjpeg-dev libpng-dev libtiff-dev \
24 | libavcodec-dev libavformat-dev libswscale-dev \
25 | libv4l-dev libxvidcore-dev libx264-dev \
26 | gfortran libatlas-base-dev \
27 | python3-dev \
28 | gstreamer1.0-tools \
29 | libgstreamer1.0-dev \
30 | libgstreamer-plugins-base1.0-dev \
31 | gstreamer1.0-libav \
32 | gstreamer1.0-plugins-good \
33 | gstreamer1.0-plugins-bad \
34 | gstreamer1.0-plugins-ugly \
35 | libtbb2 libtbb-dev libdc1394-22-dev && \
36 | pip install -U --no-cache-dir setuptools pip && \
37 | pip install --no-cache-dir numpy==1.18.0
38 |
39 | # Build OpenCV
40 | WORKDIR ${HOME}
41 | RUN wget https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \
42 | unzip ${OPENCV_VERSION}.zip && rm ${OPENCV_VERSION}.zip && \
43 | mv opencv-${OPENCV_VERSION} OpenCV && \
44 | wget https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \
45 | unzip ${OPENCV_VERSION}.zip && rm ${OPENCV_VERSION}.zip && \
46 | mv opencv_contrib-${OPENCV_VERSION} OpenCV/opencv_contrib
47 |
48 | # If you have issues with GStreamer, set -DWITH_GSTREAMER=OFF and -DWITH_FFMPEG=ON
49 | WORKDIR ${HOME}/OpenCV/build
50 | RUN cmake \
51 | -DCMAKE_BUILD_TYPE=RELEASE \
52 | -DCMAKE_INSTALL_PREFIX=/usr/local \
53 | -DOPENCV_EXTRA_MODULES_PATH=${HOME}/OpenCV/opencv_contrib/modules \
54 | -DINSTALL_PYTHON_EXAMPLES=ON \
55 | -DINSTALL_C_EXAMPLES=OFF \
56 | -DBUILD_opencv_python2=OFF \
57 | -DBUILD_TESTS=OFF \
58 | -DBUILD_PERF_TESTS=OFF \
59 | -DBUILD_EXAMPLES=ON \
60 | -DBUILD_PROTOBUF=OFF \
61 | -DENABLE_FAST_MATH=ON \
62 | -DWITH_TBB=ON \
63 | -DWITH_LIBV4L=ON \
64 | -DWITH_CUDA=OFF \
65 | -DWITH_GSTREAMER=ON \
66 | -DWITH_GSTREAMER_0_10=OFF \
67 | -DWITH_FFMPEG=OFF .. && \
68 | make -j$(nproc) && \
69 | make install && \
70 | ldconfig && \
71 | rm -rf ${HOME}/OpenCV && \
72 | rm -rf /var/lib/apt/lists/* && \
73 | apt-get autoremove
74 |
75 | # Install Python dependencies
76 | WORKDIR ${APP_DIR}/FastMOT
77 | COPY requirements.txt .
78 |
79 | # Specify your GPU compute with --build-arg for CuPy (e.g. "arch=compute_75,code=sm_75")
80 | ARG CUPY_NVCC_GENERATE_CODE
81 |
82 | # TensorFlow < 2 is not supported in ubuntu 20.04
83 | RUN if [[ -z ${CUPY_NVCC_GENERATE_CODE} ]]; then \
84 | echo "CUPY_NVCC_GENERATE_CODE not set, building CuPy for all architectures (slower)"; \
85 | fi && \
86 | if dpkg --compare-versions ${TRT_IMAGE_VERSION} ge 20.12; then \
87 | CUPY_NUM_BUILD_JOBS=$(nproc) pip install --no-cache-dir -r <(grep -ivE "tensorflow" requirements.txt); \
88 | else \
89 | dpkg -i ${SCRIPT_DIR}/*-tf_*.deb && \
90 | CUPY_NUM_BUILD_JOBS=$(nproc) pip install --no-cache-dir -r requirements.txt; \
91 | fi
92 |
93 | # ------------------------------------ Extras Below ------------------------------------
94 |
95 | # Stop the container (changes are kept)
96 | # docker stop $(docker ps -ql)
97 |
98 | # Start the container
99 | # docker start -ai $(docker ps -ql)
100 |
101 | # Delete the container
102 | # docker rm $(docker ps -ql)
103 |
104 | # Save changes before deleting the container
105 | # docker commit $(docker ps -ql) fastmot:latest
106 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Yukai Yang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FastMOT
2 |
3 | [](https://hits.seeyoufarm.com) [](LICENSE) [](https://zenodo.org/badge/latestdoi/237143671)
4 |
5 |
6 |
7 | ## News
8 | - (2021.8.17) Support multi-class tracking
9 | - (2021.7.4) Support yolov4-p5 and yolov4-p6
10 | - (2021.2.13) Support Scaled-YOLOv4 (i.e. yolov4-csp/yolov4x-mish/yolov4-csp-swish)
11 | - (2021.1.3) Add DIoU-NMS for postprocessing
12 | - (2020.11.28) Docker container provided for x86 Ubuntu
13 |
14 | ## Description
15 | FastMOT is a custom multiple object tracker that implements:
16 | - YOLO detector
17 | - SSD detector
18 | - Deep SORT + OSNet ReID
19 | - KLT tracker
20 | - Camera motion compensation
21 |
22 | Two-stage trackers like Deep SORT run detection and feature extraction sequentially, which often becomes a bottleneck. FastMOT significantly speeds up the entire system to run in **real-time** even on Jetson. Motion compensation improves tracking for scenes with moving camera, where Deep SORT and FairMOT fail.
23 |
24 | To achieve faster processing, FastMOT only runs the detector and feature extractor every N frames, while KLT fills in the gaps efficiently. FastMOT also re-identifies objects that moved out of frame to keep the same IDs.
25 |
26 | YOLOv4 was trained on CrowdHuman (82% mAP@0.5) and SSD's are pretrained COCO models from TensorFlow. Both detection and feature extraction use the **TensorRT** backend and perform asynchronous inference. In addition, most algorithms, including KLT, Kalman filter, and data association, are optimized using Numba.
27 |
28 | ## Performance
29 | ### Results on MOT20 train set
30 | | Detector Skip | MOTA | IDF1 | HOTA | MOTP | MT | ML |
31 | |:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
32 | | N = 1 | 66.8% | 56.4% | 45.0% | 79.3% | 912 | 274 |
33 | | N = 5 | 65.1% | 57.1% | 44.3% | 77.9% | 860 | 317 |
34 |
35 | ### FPS on MOT17 sequences
36 | | Sequence | Density | FPS |
37 | |:-------|:-------:|:-------:|
38 | | MOT17-13 | 5 - 30 | 42 |
39 | | MOT17-04 | 30 - 50 | 26 |
40 | | MOT17-03 | 50 - 80 | 18 |
41 |
42 | Performance is evaluated with YOLOv4 using [TrackEval](https://github.com/JonathonLuiten/TrackEval). Note that neither YOLOv4 nor OSNet was trained or finetuned on the MOT20 dataset, so train set results should generalize well. FPS results are obtained on Jetson Xavier NX (20W 2core mode).
43 |
44 | FastMOT has MOTA scores close to **state-of-the-art** trackers from the MOT Challenge. Increasing N shows small impact on MOTA. Tracking speed can reach up to **42 FPS** depending on the number of objects. Lighter models (e.g. YOLOv4-tiny) are recommended for a more constrained device like Jetson Nano. FPS is expected to be in the range of **50 - 150** on desktop CPU/GPU.
45 |
46 | ## Requirements
47 | - CUDA >= 10
48 | - cuDNN >= 7
49 | - TensorRT >= 7
50 | - OpenCV >= 3.3
51 | - Numpy >= 1.17
52 | - Scipy >= 1.5
53 | - Numba == 0.48
54 | - CuPy == 9.2
55 | - TensorFlow < 2.0 (for SSD support)
56 |
57 | ### Install for x86 Ubuntu
58 | Make sure to have [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) installed. The image requires NVIDIA Driver version >= 450 for Ubuntu 18.04 and >= 465.19.01 for Ubuntu 20.04. Build and run the docker image:
59 | ```bash
60 | # Add --build-arg TRT_IMAGE_VERSION=21.05 for Ubuntu 20.04
61 | # Add --build-arg CUPY_NVCC_GENERATE_CODE=... to speed up build for your GPU, e.g. "arch=compute_75,code=sm_75"
62 | docker build -t fastmot:latest .
63 |
64 | # Run xhost local:root first if you cannot visualize inside the container
65 | docker run --gpus all --rm -it -v $(pwd):/usr/src/app/FastMOT -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=unix$DISPLAY -e TZ=$(cat /etc/timezone) fastmot:latest
66 | ```
67 | ### Install for Jetson Nano/TX2/Xavier NX/Xavier
68 | Make sure to have [JetPack >= 4.4](https://developer.nvidia.com/embedded/jetpack) installed and run the script:
69 | ```bash
70 | ./scripts/install_jetson.sh
71 | ```
72 | ### Download models
73 | Pretrained OSNet, SSD, and my YOLOv4 ONNX model are included.
74 | ```bash
75 | ./scripts/download_models.sh
76 | ```
77 | ### Build YOLOv4 TensorRT plugin
78 | ```bash
79 | cd fastmot/plugins
80 | make
81 | ```
82 | ### Download VOC dataset for INT8 calibration
83 | Only required for SSD (not supported on Ubuntu 20.04)
84 | ```bash
85 | ./scripts/download_data.sh
86 | ```
87 |
88 | ## Usage
89 | ```bash
90 | python3 app.py --input-uri ... --mot
91 | ```
92 | - Image sequence: `--input-uri %06d.jpg`
93 | - Video file: `--input-uri file.mp4`
94 | - USB webcam: `--input-uri /dev/video0`
95 | - MIPI CSI camera: `--input-uri csi://0`
96 | - RTSP stream: `--input-uri rtsp://:@:/`
97 | - HTTP stream: `--input-uri http://:@:/`
98 |
99 | Use `--show` to visualize, `--output-uri` to save output, and `--txt` for MOT compliant results.
100 |
101 | Show help message for all options:
102 | ```bash
103 | python3 app.py -h
104 | ```
105 | Note that the first run will be slow due to Numba compilation. To use the FFMPEG backend on x86, set `WITH_GSTREAMER = False` [here](https://github.com/GeekAlexis/FastMOT/blob/3a4cad87743c226cf603a70b3f15961b9baf6873/fastmot/videoio.py#L11)
106 |
107 | More options can be configured in cfg/mot.json
108 |
109 | - Set `resolution` and `frame_rate` that corresponds to the source data or camera configuration (optional). They are required for image sequence, camera sources, and saving txt results. List all configurations for a USB/CSI camera:
110 | ```bash
111 | v4l2-ctl -d /dev/video0 --list-formats-ext
112 | ```
113 | - To swap network, modify `model` under a detector. For example, you can choose from `SSDInceptionV2`, `SSDMobileNetV1`, or `SSDMobileNetV2` for SSD.
114 | - If more accuracy is desired and FPS is not an issue, lower `detector_frame_skip`. Similarly, raise `detector_frame_skip` to speed up tracking at the cost of accuracy. You may also want to change `max_age` such that `max_age` × `detector_frame_skip` ≈ 30
115 | - Modify `visualizer_cfg` to toggle drawing options.
116 | - All parameters are documented in the API.
117 |
118 |
119 |
120 | ## Track custom classes
121 | FastMOT can be easily extended to a custom class (e.g. vehicle). You need to train both YOLO and a ReID network on your object class. Check [Darknet](https://github.com/AlexeyAB/darknet) for training YOLO and [fast-reid](https://github.com/JDAI-CV/fast-reid) for training ReID. After training, convert weights to ONNX format. The TensorRT plugin adapted from [tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos/) is only compatible with Darknet.
122 |
123 | FastMOT also supports multi-class tracking. It is recommended to train a ReID network for each class to extract features separately.
124 | ### Convert YOLO to ONNX
125 | 1. Install ONNX version 1.4.1 (not the latest version)
126 | ```bash
127 | pip3 install onnx==1.4.1
128 | ```
129 | 2. Convert using your custom cfg and weights
130 | ```bash
131 | ./scripts/yolo2onnx.py --config yolov4.cfg --weights yolov4.weights
132 | ```
133 | ### Add custom YOLOv3/v4
134 | 1. Subclass `fastmot.models.YOLO` like here: https://github.com/GeekAlexis/FastMOT/blob/32c217a7d289f15a3bb0c1820982df947c82a650/fastmot/models/yolo.py#L100-L109
135 | ```
136 | ENGINE_PATH : Path
137 | Path to TensorRT engine.
138 | If not found, TensorRT engine will be converted from the ONNX model
139 | at runtime and cached for later use.
140 | MODEL_PATH : Path
141 | Path to ONNX model.
142 | NUM_CLASSES : int
143 | Total number of trained classes.
144 | LETTERBOX : bool
145 | Keep aspect ratio when resizing.
146 | NEW_COORDS : bool
147 | new_coords Darknet parameter for each yolo layer.
148 | INPUT_SHAPE : tuple
149 | Input size in the format `(channel, height, width)`.
150 | LAYER_FACTORS : List[int]
151 | Scale factors with respect to the input size for each yolo layer.
152 | SCALES : List[float]
153 | scale_x_y Darknet parameter for each yolo layer.
154 | ANCHORS : List[List[int]]
155 | Anchors grouped by each yolo layer.
156 | ```
157 | Note anchors may not follow the same order in the Darknet cfg file. You need to mask out the anchors for each yolo layer using the indices in `mask` in Darknet cfg.
158 | Unlike YOLOv4, the anchors are usually in reverse for YOLOv3 and YOLOv3/v4-tiny
159 | 2. Set class labels to your object classes with `fastmot.models.set_label_map`
160 | 3. Modify cfg/mot.json: set `model` in `yolo_detector_cfg` to the added Python class name and set `class_ids` of interest. You may want to play with `conf_thresh` based on model performance
161 | ### Add custom ReID
162 | 1. Subclass `fastmot.models.ReID` like here: https://github.com/GeekAlexis/FastMOT/blob/32c217a7d289f15a3bb0c1820982df947c82a650/fastmot/models/reid.py#L50-L55
163 | ```
164 | ENGINE_PATH : Path
165 | Path to TensorRT engine.
166 | If not found, TensorRT engine will be converted from the ONNX model
167 | at runtime and cached for later use.
168 | MODEL_PATH : Path
169 | Path to ONNX model.
170 | INPUT_SHAPE : tuple
171 | Input size in the format `(channel, height, width)`.
172 | OUTPUT_LAYOUT : int
173 | Feature dimension output by the model.
174 | METRIC : {'euclidean', 'cosine'}
175 | Distance metric used to match features.
176 | ```
177 | 2. Modify cfg/mot.json: set `model` in `feature_extractor_cfgs` to the added Python class name. For more than one class, add more feature extractor configurations to the list `feature_extractor_cfgs`. You may want to play with `max_assoc_cost` and `max_reid_cost` based on model performance
178 |
179 | ## Citation
180 | If you find this repo useful in your project or research, please star and consider citing it:
181 | ```bibtex
182 | @software{yukai_yang_2020_4294717,
183 | author = {Yukai Yang},
184 | title = {{FastMOT: High-Performance Multiple Object Tracking Based on Deep SORT and KLT}},
185 | month = nov,
186 | year = 2020,
187 | publisher = {Zenodo},
188 | version = {v1.0.0},
189 | doi = {10.5281/zenodo.4294717},
190 | url = {https://doi.org/10.5281/zenodo.4294717}
191 | }
192 | ```
193 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from pathlib import Path
4 | from types import SimpleNamespace
5 | import argparse
6 | import logging
7 | import json
8 | import cv2
9 |
10 | import fastmot
11 | import fastmot.models
12 | from fastmot.utils import ConfigDecoder, Profiler
13 |
14 |
15 | def main():
16 | parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
17 | optional = parser._action_groups.pop()
18 | required = parser.add_argument_group('required arguments')
19 | group = parser.add_mutually_exclusive_group()
20 | required.add_argument('-i', '--input-uri', metavar="URI", required=True, help=
21 | 'URI to input stream\n'
22 | '1) image sequence (e.g. %%06d.jpg)\n'
23 | '2) video file (e.g. file.mp4)\n'
24 | '3) MIPI CSI camera (e.g. csi://0)\n'
25 | '4) USB camera (e.g. /dev/video0)\n'
26 | '5) RTSP stream (e.g. rtsp://:@:/)\n'
27 | '6) HTTP stream (e.g. http://:@:/)\n')
28 | optional.add_argument('-c', '--config', metavar="FILE",
29 | default=Path(__file__).parent / 'cfg' / 'mot.json',
30 | help='path to JSON configuration file')
31 | optional.add_argument('-l', '--labels', metavar="FILE",
32 | help='path to label names (e.g. coco.names)')
33 | optional.add_argument('-o', '--output-uri', metavar="URI",
34 | help='URI to output video file')
35 | optional.add_argument('-t', '--txt', metavar="FILE",
36 | help='path to output MOT Challenge format results (e.g. MOT20-01.txt)')
37 | optional.add_argument('-m', '--mot', action='store_true', help='run multiple object tracker')
38 | optional.add_argument('-s', '--show', action='store_true', help='show visualizations')
39 | group.add_argument('-q', '--quiet', action='store_true', help='reduce output verbosity')
40 | group.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity')
41 | parser._action_groups.append(optional)
42 | args = parser.parse_args()
43 | if args.txt is not None and not args.mot:
44 | raise parser.error('argument -t/--txt: not allowed without argument -m/--mot')
45 |
46 | # set up logging
47 | logging.basicConfig(format='%(asctime)s [%(levelname)8s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
48 | logger = logging.getLogger(fastmot.__name__)
49 | if args.quiet:
50 | logger.setLevel(logging.WARNING)
51 | elif args.verbose:
52 | logger.setLevel(logging.DEBUG)
53 | else:
54 | logger.setLevel(logging.INFO)
55 |
56 | # load config file
57 | with open(args.config) as cfg_file:
58 | config = json.load(cfg_file, cls=ConfigDecoder, object_hook=lambda d: SimpleNamespace(**d))
59 |
60 | # load labels if given
61 | if args.labels is not None:
62 | with open(args.labels) as label_file:
63 | label_map = label_file.read().splitlines()
64 | fastmot.models.set_label_map(label_map)
65 |
66 | stream = fastmot.VideoIO(config.resize_to, args.input_uri, args.output_uri, **vars(config.stream_cfg))
67 |
68 | mot = None
69 | txt = None
70 | if args.mot:
71 | draw = args.show or args.output_uri is not None
72 | mot = fastmot.MOT(config.resize_to, **vars(config.mot_cfg), draw=draw)
73 | mot.reset(stream.cap_dt)
74 | if args.txt is not None:
75 | Path(args.txt).parent.mkdir(parents=True, exist_ok=True)
76 | txt = open(args.txt, 'w')
77 | if args.show:
78 | cv2.namedWindow('Video', cv2.WINDOW_AUTOSIZE)
79 |
80 | logger.info('Starting video capture...')
81 | stream.start_capture()
82 | try:
83 | with Profiler('app') as prof:
84 | while not args.show or cv2.getWindowProperty('Video', 0) >= 0:
85 | frame = stream.read()
86 | if frame is None:
87 | break
88 |
89 | if args.mot:
90 | mot.step(frame)
91 | if txt is not None:
92 | for track in mot.visible_tracks():
93 | tl = track.tlbr[:2] / config.resize_to * stream.resolution
94 | br = track.tlbr[2:] / config.resize_to * stream.resolution
95 | w, h = br - tl + 1
96 | txt.write(f'{mot.frame_count},{track.trk_id},{tl[0]:.6f},{tl[1]:.6f},'
97 | f'{w:.6f},{h:.6f},-1,-1,-1\n')
98 |
99 | if args.show:
100 | cv2.imshow('Video', frame)
101 | if cv2.waitKey(1) & 0xFF == 27:
102 | break
103 | if args.output_uri is not None:
104 | stream.write(frame)
105 | finally:
106 | # clean up resources
107 | if txt is not None:
108 | txt.close()
109 | stream.release()
110 | cv2.destroyAllWindows()
111 |
112 | # timing statistics
113 | if args.mot:
114 | avg_fps = round(mot.frame_count / prof.duration)
115 | logger.info('Average FPS: %d', avg_fps)
116 | mot.print_timing_info()
117 |
118 |
119 | if __name__ == '__main__':
120 | main()
121 |
--------------------------------------------------------------------------------
/assets/aerial_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GeekAlexis/FastMOT/e607640f80ebdd9a6686db50faf3bd2a7ab43cfc/assets/aerial_demo.gif
--------------------------------------------------------------------------------
/assets/dense_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GeekAlexis/FastMOT/e607640f80ebdd9a6686db50faf3bd2a7ab43cfc/assets/dense_demo.gif
--------------------------------------------------------------------------------
/cfg/mot.json:
--------------------------------------------------------------------------------
1 | {
2 | "resize_to": [1280, 720],
3 |
4 | "stream_cfg": {
5 | "resolution": [1920, 1080],
6 | "frame_rate": 30,
7 | "buffer_size": 10
8 | },
9 |
10 | "mot_cfg": {
11 | "detector_type": "YOLO",
12 | "detector_frame_skip": 5,
13 | "class_ids": [1],
14 |
15 | "ssd_detector_cfg": {
16 | "model": "SSDInceptionV2",
17 | "tile_overlap": 0.25,
18 | "tiling_grid": [4, 2],
19 | "conf_thresh": 0.5,
20 | "merge_thresh": 0.6,
21 | "max_area": 120000
22 | },
23 | "yolo_detector_cfg": {
24 | "model": "YOLOv4",
25 | "conf_thresh": 0.25,
26 | "nms_thresh": 0.5,
27 | "max_area": 800000,
28 | "min_aspect_ratio": 1.2
29 | },
30 | "public_detector_cfg": {
31 | "sequence_path": "MOT20/train/MOT20-01",
32 | "conf_thresh": 0.5,
33 | "max_area": 800000
34 | },
35 |
36 | "feature_extractor_cfgs": [
37 | {
38 | "model": "OSNet025",
39 | "batch_size": 16
40 | }
41 | ],
42 |
43 | "tracker_cfg": {
44 | "max_age": 6,
45 | "age_penalty": 2,
46 | "motion_weight": 0.2,
47 | "max_assoc_cost": 0.8,
48 | "max_reid_cost": 0.6,
49 | "iou_thresh": 0.4,
50 | "duplicate_thresh": 0.8,
51 | "occlusion_thresh": 0.7,
52 | "conf_thresh": 0.5,
53 | "confirm_hits": 1,
54 | "history_size": 50,
55 |
56 | "kalman_filter_cfg": {
57 | "std_factor_acc": 2.25,
58 | "std_offset_acc": 78.5,
59 | "std_factor_det": [0.08, 0.08],
60 | "std_factor_klt": [0.14, 0.14],
61 | "min_std_det": [4.0, 4.0],
62 | "min_std_klt": [5.0, 5.0],
63 | "init_pos_weight": 5,
64 | "init_vel_weight": 12,
65 | "vel_coupling": 0.6,
66 | "vel_half_life": 2
67 | },
68 |
69 | "flow_cfg": {
70 | "bg_feat_scale_factor": [0.1, 0.1],
71 | "opt_flow_scale_factor": [0.5, 0.5],
72 | "feat_density": 0.005,
73 | "feat_dist_factor": 0.06,
74 | "ransac_max_iter": 500,
75 | "ransac_conf": 0.99,
76 | "max_error": 100,
77 | "inlier_thresh": 4,
78 | "bg_feat_thresh": 10,
79 | "obj_feat_params": {
80 | "maxCorners": 1000,
81 | "qualityLevel": 0.06,
82 | "blockSize": 3
83 | },
84 | "opt_flow_params": {
85 | "winSize": [5, 5],
86 | "maxLevel": 5,
87 | "criteria": [3, 10, 0.03]
88 | }
89 | }
90 | },
91 |
92 | "visualizer_cfg": {
93 | "draw_detections": false,
94 | "draw_confidence": false,
95 | "draw_covariance": false,
96 | "draw_klt": false,
97 | "draw_obj_flow": false,
98 | "draw_bg_flow": false,
99 | "draw_trajectory": false
100 |
101 | }
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/eval/seqmap.txt:
--------------------------------------------------------------------------------
1 | name
2 | MOT20-01
3 | MOT20-02
4 | MOT20-03
5 | MOT20-05
--------------------------------------------------------------------------------
/fastmot/__init__.py:
--------------------------------------------------------------------------------
1 | from .videoio import VideoIO
2 | from .mot import MOT
3 | from .feature_extractor import FeatureExtractor
4 | from .tracker import MultiTracker
5 | from .kalman_filter import KalmanFilter
6 | from .flow import Flow
7 | from .track import Track
--------------------------------------------------------------------------------
/fastmot/detector.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from pathlib import Path
3 | import configparser
4 | import abc
5 | import numpy as np
6 | import numba as nb
7 | import cupy as cp
8 | import cupyx.scipy.ndimage
9 | import cv2
10 |
11 | from . import models
12 | from .utils import TRTInference
13 | from .utils.rect import as_tlbr, aspect_ratio, to_tlbr, get_size, area
14 | from .utils.rect import enclosing, multi_crop, iom, diou_nms
15 | from .utils.numba import find_split_indices
16 |
17 |
18 | DET_DTYPE = np.dtype(
19 | [('tlbr', float, 4),
20 | ('label', int),
21 | ('conf', float)],
22 | align=True
23 | )
24 |
25 |
26 | class Detector(abc.ABC):
27 | @abc.abstractmethod
28 | def __init__(self, size):
29 | self.size = size
30 |
31 | def __call__(self, frame):
32 | """Detect objects synchronously."""
33 | self.detect_async(frame)
34 | return self.postprocess()
35 |
36 | @abc.abstractmethod
37 | def detect_async(self, frame):
38 | raise NotImplementedError
39 |
40 | @abc.abstractmethod
41 | def postprocess(self):
42 | raise NotImplementedError
43 |
44 |
45 | class SSDDetector(Detector):
46 | def __init__(self, size,
47 | class_ids,
48 | model='SSDInceptionV2',
49 | tile_overlap=0.25,
50 | tiling_grid=(4, 2),
51 | conf_thresh=0.5,
52 | merge_thresh=0.6,
53 | max_area=120000):
54 | """An object detector for SSD models.
55 |
56 | Parameters
57 | ----------
58 | size : tuple
59 | Width and height of each frame.
60 | class_ids : sequence
61 | Class IDs to detect. Note class ID starts at zero.
62 | model : str, optional
63 | SSD model to use.
64 | Must be the name of a class that inherits `models.SSD`.
65 | tile_overlap : float, optional
66 | Ratio of overlap to width and height of each tile.
67 | tiling_grid : tuple, optional
68 | Width and height of tile layout to split each frame for batch inference.
69 | conf_thresh : float, optional
70 | Detection confidence threshold.
71 | merge_thresh : float, optional
72 | Overlap threshold to merge bounding boxes across tiles.
73 | max_area : int, optional
74 | Max area of bounding boxes to detect.
75 | """
76 | super().__init__(size)
77 | self.model = models.SSD.get_model(model)
78 | assert 0 <= tile_overlap <= 1
79 | self.tile_overlap = tile_overlap
80 | assert tiling_grid[0] >= 1 and tiling_grid[1] >= 1
81 | self.tiling_grid = tiling_grid
82 | assert 0 <= conf_thresh <= 1
83 | self.conf_thresh = conf_thresh
84 | assert 0 <= merge_thresh <= 1
85 | self.merge_thresh = merge_thresh
86 | assert max_area >= 0
87 | self.max_area = max_area
88 |
89 | self.label_mask = np.zeros(self.model.NUM_CLASSES, dtype=np.bool_)
90 | try:
91 | self.label_mask[tuple(class_ids),] = True
92 | except IndexError as err:
93 | raise ValueError('Unsupported class IDs') from err
94 |
95 | self.batch_size = int(np.prod(self.tiling_grid))
96 | self.tiles, self.tiling_region_sz = self._generate_tiles()
97 | self.scale_factor = tuple(np.array(self.size) / self.tiling_region_sz)
98 | self.backend = TRTInference(self.model, self.batch_size)
99 | self.inp_handle = self.backend.input.host.reshape(self.batch_size, *self.model.INPUT_SHAPE)
100 |
101 | def detect_async(self, frame):
102 | """Detects objects asynchronously."""
103 | self._preprocess(frame)
104 | self.backend.infer_async()
105 |
106 | def postprocess(self):
107 | """Synchronizes, applies postprocessing, and returns a record array
108 | of detections (DET_DTYPE).
109 | This API should be called after `detect_async`.
110 | Detections are sorted in ascending order by class ID.
111 | """
112 | det_out = self.backend.synchronize()[0]
113 | detections, tile_ids = self._filter_dets(det_out, self.tiles, self.model.TOPK,
114 | self.label_mask, self.max_area,
115 | self.conf_thresh, self.scale_factor)
116 | detections = self._merge_dets(detections, tile_ids)
117 | return detections
118 |
119 | def _preprocess(self, frame):
120 | frame = cv2.resize(frame, self.tiling_region_sz)
121 | self._normalize(frame, self.tiles, self.inp_handle)
122 |
123 | def _generate_tiles(self):
124 | tile_size = np.array(self.model.INPUT_SHAPE[:0:-1])
125 | tiling_grid = np.array(self.tiling_grid)
126 | step_size = (1 - self.tile_overlap) * tile_size
127 | total_size = (tiling_grid - 1) * step_size + tile_size
128 | total_size = np.rint(total_size).astype(int)
129 | tiles = np.array([to_tlbr((c * step_size[0], r * step_size[1], *tile_size))
130 | for r in range(tiling_grid[1]) for c in range(tiling_grid[0])])
131 | return tiles, tuple(total_size)
132 |
133 | def _merge_dets(self, detections, tile_ids):
134 | detections = np.fromiter(detections, DET_DTYPE, len(detections)).view(np.recarray)
135 | tile_ids = np.fromiter(tile_ids, int, len(tile_ids))
136 | if len(detections) == 0:
137 | return detections
138 | detections = self._merge(detections, tile_ids, self.batch_size, self.merge_thresh)
139 | return detections.view(np.recarray)
140 |
141 | @staticmethod
142 | @nb.njit(parallel=True, fastmath=True, cache=True)
143 | def _normalize(frame, tiles, out):
144 | imgs = multi_crop(frame, tiles)
145 | for i in nb.prange(len(imgs)):
146 | bgr = imgs[i]
147 | # BGR to RGB
148 | rgb = bgr[..., ::-1]
149 | # HWC -> CHW
150 | chw = rgb.transpose(2, 0, 1)
151 | # Normalize to [-1.0, 1.0] interval
152 | out[i] = chw * (2 / 255.) - 1.
153 |
154 | @staticmethod
155 | @nb.njit(fastmath=True, cache=True)
156 | def _filter_dets(det_out, tiles, topk, label_mask, max_area, thresh, scale_factor):
157 | detections = []
158 | tile_ids = []
159 | for tile_idx in range(len(tiles)):
160 | tile = tiles[tile_idx]
161 | w, h = get_size(tile)
162 | tile_offset = tile_idx * topk
163 | for det_idx in range(topk):
164 | offset = (tile_offset + det_idx) * 7
165 | label = int(det_out[offset + 1])
166 | conf = det_out[offset + 2]
167 | if conf < thresh:
168 | break
169 | if label_mask[label]:
170 | xmin = (det_out[offset + 3] * w + tile[0]) * scale_factor[0]
171 | ymin = (det_out[offset + 4] * h + tile[1]) * scale_factor[1]
172 | xmax = (det_out[offset + 5] * w + tile[0]) * scale_factor[0]
173 | ymax = (det_out[offset + 6] * h + tile[1]) * scale_factor[1]
174 | tlbr = as_tlbr((xmin, ymin, xmax, ymax))
175 | if 0 < area(tlbr) <= max_area:
176 | detections.append((tlbr, label, conf))
177 | tile_ids.append(tile_idx)
178 | return detections, tile_ids
179 |
180 | @staticmethod
181 | @nb.njit(fastmath=True, cache=True)
182 | def _merge(dets, tile_ids, num_tile, thresh):
183 | # find duplicate neighbors across tiles
184 | neighbors = [[0 for _ in range(0)] for _ in range(len(dets))]
185 | for i, det in enumerate(dets):
186 | max_ioms = np.zeros(num_tile)
187 | for j, other in enumerate(dets):
188 | if tile_ids[i] != tile_ids[j] and det.label == other.label:
189 | overlap = iom(det.tlbr, other.tlbr)
190 | # use the detection with the greatest IoM from each tile
191 | if overlap >= thresh and overlap > max_ioms[tile_ids[j]]:
192 | max_ioms[tile_ids[j]] = overlap
193 | neighbors[i].append(j)
194 |
195 | # merge neighbors using depth-first search
196 | keep = set(range(len(dets)))
197 | stack = []
198 | for i in range(len(dets)):
199 | if len(neighbors[i]) > 0 and tile_ids[i] != -1:
200 | tile_ids[i] = -1
201 | stack.append(i)
202 | candidates = []
203 | while len(stack) > 0:
204 | for j in neighbors[stack.pop()]:
205 | if tile_ids[j] != -1:
206 | candidates.append(j)
207 | tile_ids[j] = -1
208 | stack.append(j)
209 | for k in candidates:
210 | dets[i].tlbr[:] = enclosing(dets[i].tlbr, dets[k].tlbr)
211 | dets[i].conf = max(dets[i].conf, dets[k].conf)
212 | keep.discard(k)
213 | dets = dets[np.array(list(keep))]
214 |
215 | # sort detections by class
216 | dets = dets[np.argsort(dets.label)]
217 | return dets
218 |
219 |
220 | class YOLODetector(Detector):
221 | def __init__(self, size,
222 | class_ids,
223 | model='YOLOv4',
224 | conf_thresh=0.25,
225 | nms_thresh=0.5,
226 | max_area=800000,
227 | min_aspect_ratio=1.2):
228 | """An object detector for YOLO models.
229 |
230 | Parameters
231 | ----------
232 | size : tuple
233 | Width and height of each frame.
234 | class_ids : sequence
235 | Class IDs to detect. Note class ID starts at zero.
236 | model : str, optional
237 | YOLO model to use.
238 | Must be the name of a class that inherits `models.YOLO`.
239 | conf_thresh : float, optional
240 | Detection confidence threshold.
241 | nms_thresh : float, optional
242 | Nonmaximum suppression overlap threshold.
243 | Set higher to detect crowded objects.
244 | max_area : int, optional
245 | Max area of bounding boxes to detect.
246 | min_aspect_ratio : float, optional
247 | Min aspect ratio (height over width) of bounding boxes to detect.
248 | Set to 0.1 for square shaped objects.
249 | """
250 | super().__init__(size)
251 | self.model = models.YOLO.get_model(model)
252 | assert 0 <= conf_thresh <= 1
253 | self.conf_thresh = conf_thresh
254 | assert 0 <= nms_thresh <= 1
255 | self.nms_thresh = nms_thresh
256 | assert max_area >= 0
257 | self.max_area = max_area
258 | assert min_aspect_ratio >= 0
259 | self.min_aspect_ratio = min_aspect_ratio
260 |
261 | self.label_mask = np.zeros(self.model.NUM_CLASSES, dtype=np.bool_)
262 | try:
263 | self.label_mask[tuple(class_ids),] = True
264 | except IndexError as err:
265 | raise ValueError('Unsupported class IDs') from err
266 |
267 | self.backend = TRTInference(self.model, 1)
268 | self.inp_handle, self.upscaled_sz, self.bbox_offset = self._create_letterbox()
269 |
270 | def detect_async(self, frame):
271 | """Detects objects asynchronously."""
272 | self._preprocess(frame)
273 | self.backend.infer_async(from_device=True)
274 |
275 | def postprocess(self):
276 | """Synchronizes, applies postprocessing, and returns a record array
277 | of detections (DET_DTYPE).
278 | This API should be called after `detect_async`.
279 | Detections are sorted in ascending order by class ID.
280 | """
281 | det_out = self.backend.synchronize()
282 | det_out = np.concatenate(det_out).reshape(-1, 7)
283 | detections = self._filter_dets(det_out, self.upscaled_sz, self.bbox_offset,
284 | self.label_mask, self.conf_thresh, self.nms_thresh,
285 | self.max_area, self.min_aspect_ratio)
286 | detections = np.fromiter(detections, DET_DTYPE, len(detections)).view(np.recarray)
287 | return detections
288 |
289 | def _preprocess(self, frame):
290 | zoom = np.roll(self.inp_handle.shape, -1) / frame.shape
291 | with self.backend.stream:
292 | frame_dev = cp.asarray(frame)
293 | # resize
294 | small_dev = cupyx.scipy.ndimage.zoom(frame_dev, zoom, order=1, mode='opencv', grid_mode=True)
295 | # BGR to RGB
296 | rgb_dev = small_dev[..., ::-1]
297 | # HWC -> CHW
298 | chw_dev = rgb_dev.transpose(2, 0, 1)
299 | # normalize to [0, 1] interval
300 | cp.multiply(chw_dev, 1 / 255., out=self.inp_handle)
301 |
302 | def _create_letterbox(self):
303 | src_size = np.array(self.size)
304 | dst_size = np.array(self.model.INPUT_SHAPE[:0:-1])
305 | if self.model.LETTERBOX:
306 | scale_factor = min(dst_size / src_size)
307 | scaled_size = np.rint(src_size * scale_factor).astype(int)
308 | img_offset = (dst_size - scaled_size) / 2
309 | roi = np.s_[:, img_offset[1]:img_offset[1] + scaled_size[1],
310 | img_offset[0]:img_offset[0] + scaled_size[0]]
311 | upscaled_sz = np.rint(dst_size / scale_factor).astype(int)
312 | bbox_offset = (upscaled_sz - src_size) / 2
313 | else:
314 | roi = np.s_[:]
315 | upscaled_sz = src_size
316 | bbox_offset = np.zeros(2)
317 | inp_reshaped = self.backend.input.device.reshape(self.model.INPUT_SHAPE)
318 | inp_reshaped[:] = 0.5 # initial value for letterbox
319 | inp_handle = inp_reshaped[roi]
320 | return inp_handle, upscaled_sz, bbox_offset
321 |
322 | @staticmethod
323 | @nb.njit(fastmath=True, cache=True)
324 | def _filter_dets(det_out, size, offset, label_mask, conf_thresh, nms_thresh, max_area, min_ar):
325 | """
326 | det_out: a list of 3 tensors, where each tensor
327 | contains a multiple of 7 float32 numbers in
328 | the order of [x, y, w, h, box_confidence, class_id, class_prob]
329 | """
330 | # filter by class and score
331 | keep = []
332 | for i in range(len(det_out)):
333 | if label_mask[int(det_out[i, 5])]:
334 | score = det_out[i, 4] * det_out[i, 6]
335 | if score >= conf_thresh:
336 | keep.append(i)
337 | det_out = det_out[np.array(keep)]
338 |
339 | # scale to pixel values
340 | det_out[:, :4] *= np.append(size, size)
341 | det_out[:, :2] -= offset
342 |
343 | # per-class NMS
344 | det_out = det_out[np.argsort(det_out[:, 5])]
345 | split_indices = find_split_indices(det_out[:, 5])
346 | all_indices = np.arange(len(det_out))
347 |
348 | keep = []
349 | for i in range(len(split_indices) + 1):
350 | begin = 0 if i == 0 else split_indices[i - 1]
351 | end = len(det_out) if i == len(split_indices) else split_indices[i]
352 | cls_dets = det_out[begin:end]
353 | cls_keep = diou_nms(cls_dets[:, :4], cls_dets[:, 4], nms_thresh)
354 | keep.extend(all_indices[begin:end][cls_keep])
355 | nms_dets = det_out[np.array(keep)]
356 |
357 | # create detections
358 | detections = []
359 | for i in range(len(nms_dets)):
360 | tlbr = to_tlbr(nms_dets[i, :4])
361 | label = int(nms_dets[i, 5])
362 | conf = nms_dets[i, 4] * nms_dets[i, 6]
363 | if 0 < area(tlbr) <= max_area and aspect_ratio(tlbr) >= min_ar:
364 | detections.append((tlbr, label, conf))
365 | return detections
366 |
367 |
368 | class PublicDetector(Detector):
369 | def __init__(self, size,
370 | class_ids,
371 | frame_skip,
372 | sequence_path=None,
373 | conf_thresh=0.5,
374 | max_area=800000):
375 | """Class to use MOT Challenge's public detections.
376 |
377 | Parameters
378 | ----------
379 | size : tuple
380 | Width and height of each frame.
381 | class_ids : sequence
382 | Class IDs to detect. Only 1 (i.e. person) is supported.
383 | frame_skip : int
384 | Detector frame skip.
385 | sequence_path : str, optional
386 | Relative path to MOT Challenge's sequence directory.
387 | conf_thresh : float, optional
388 | Detection confidence threshold.
389 | max_area : int, optional
390 | Max area of bounding boxes to detect.
391 | """
392 | super().__init__(size)
393 | assert tuple(class_ids) == (1,)
394 | self.frame_skip = frame_skip
395 | assert sequence_path is not None
396 | self.seq_root = Path(__file__).parents[1] / sequence_path
397 | assert 0 <= conf_thresh <= 1
398 | self.conf_thresh = conf_thresh
399 | assert max_area >= 0
400 | self.max_area = max_area
401 |
402 | assert self.seq_root.exists()
403 | seqinfo = configparser.ConfigParser()
404 | seqinfo.read(self.seq_root / 'seqinfo.ini')
405 | self.seq_size = (int(seqinfo['Sequence']['imWidth']), int(seqinfo['Sequence']['imHeight']))
406 |
407 | self.detections = defaultdict(list)
408 | self.frame_id = 0
409 |
410 | det_txt = self.seq_root / 'det' / 'det.txt'
411 | for mot_challenge_det in np.loadtxt(det_txt, delimiter=','):
412 | frame_id = int(mot_challenge_det[0]) - 1
413 | tlbr = to_tlbr(mot_challenge_det[2:6])
414 | # mot_challenge_det[6]
415 | conf = 1.0
416 | # mot_challenge_det[7]
417 | label = 1 # person
418 | # scale inside frame
419 | tlbr[:2] = tlbr[:2] / self.seq_size * self.size
420 | tlbr[2:] = tlbr[2:] / self.seq_size * self.size
421 | tlbr = np.rint(tlbr)
422 | if conf >= self.conf_thresh and area(tlbr) <= self.max_area:
423 | self.detections[frame_id].append((tlbr, label, conf))
424 |
425 | def detect_async(self, frame):
426 | pass
427 |
428 | def postprocess(self):
429 | detections = np.array(self.detections[self.frame_id], DET_DTYPE).view(np.recarray)
430 | self.frame_id += self.frame_skip
431 | return detections
432 |
--------------------------------------------------------------------------------
/fastmot/feature_extractor.py:
--------------------------------------------------------------------------------
1 | from multiprocessing.pool import ThreadPool
2 | import numpy as np
3 | import numba as nb
4 | import cv2
5 |
6 | from . import models
7 | from .utils import TRTInference
8 | from .utils.rect import multi_crop
9 |
10 |
11 | class FeatureExtractor:
12 | def __init__(self, model='OSNet025', batch_size=16):
13 | """A feature extractor for ReID embeddings.
14 |
15 | Parameters
16 | ----------
17 | model : str, optional
18 | ReID model to use.
19 | Must be the name of a class that inherits `models.ReID`.
20 | batch_size : int, optional
21 | Batch size for inference.
22 | """
23 | self.model = models.ReID.get_model(model)
24 | assert batch_size >= 1
25 | self.batch_size = batch_size
26 |
27 | self.feature_dim = self.model.OUTPUT_LAYOUT
28 | self.backend = TRTInference(self.model, self.batch_size)
29 | self.inp_handle = self.backend.input.host.reshape(self.batch_size, *self.model.INPUT_SHAPE)
30 | self.pool = ThreadPool()
31 |
32 | self.embeddings = []
33 | self.last_num_features = 0
34 |
35 | def __del__(self):
36 | self.pool.close()
37 | self.pool.join()
38 |
39 | def __call__(self, frame, tlbrs):
40 | """Extract feature embeddings from bounding boxes synchronously."""
41 | self.extract_async(frame, tlbrs)
42 | return self.postprocess()
43 |
44 | @property
45 | def metric(self):
46 | return self.model.METRIC
47 |
48 | def extract_async(self, frame, tlbrs):
49 | """Extract feature embeddings from bounding boxes asynchronously."""
50 | imgs = multi_crop(frame, tlbrs)
51 | self.embeddings, cur_imgs = [], []
52 | # pipeline inference and preprocessing the next batch in parallel
53 | for offset in range(0, len(imgs), self.batch_size):
54 | cur_imgs = imgs[offset:offset + self.batch_size]
55 | self.pool.starmap(self._preprocess, enumerate(cur_imgs))
56 | if offset > 0:
57 | embedding_out = self.backend.synchronize()[0]
58 | self.embeddings.append(embedding_out)
59 | self.backend.infer_async()
60 | self.last_num_features = len(cur_imgs)
61 |
62 | def postprocess(self):
63 | """Synchronizes, applies postprocessing, and returns a NxM matrix of N
64 | extracted embeddings with dimension M.
65 | This API should be called after `extract_async`.
66 | """
67 | if self.last_num_features == 0:
68 | return np.empty((0, self.feature_dim))
69 |
70 | embedding_out = self.backend.synchronize()[0][:self.last_num_features * self.feature_dim]
71 | self.embeddings.append(embedding_out)
72 | embeddings = np.concatenate(self.embeddings).reshape(-1, self.feature_dim)
73 | embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)
74 | return embeddings
75 |
76 | def null_embeddings(self, detections):
77 | """Returns a NxM matrix of N identical embeddings with dimension M.
78 | This API effectively disables feature extraction.
79 | """
80 | embeddings = np.ones((len(detections), self.feature_dim))
81 | embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)
82 | return embeddings
83 |
84 | def _preprocess(self, idx, img):
85 | img = cv2.resize(img, self.model.INPUT_SHAPE[:0:-1])
86 | self._normalize(img, self.inp_handle[idx])
87 |
88 | @staticmethod
89 | @nb.njit(fastmath=True, nogil=True, cache=True)
90 | def _normalize(img, out):
91 | # BGR to RGB
92 | rgb = img[..., ::-1]
93 | # HWC -> CHW
94 | chw = rgb.transpose(2, 0, 1)
95 | # Normalize using ImageNet's mean and std
96 | out[0, ...] = (chw[0, ...] / 255. - 0.485) / 0.229
97 | out[1, ...] = (chw[1, ...] / 255. - 0.456) / 0.224
98 | out[2, ...] = (chw[2, ...] / 255. - 0.406) / 0.225
99 |
--------------------------------------------------------------------------------
/fastmot/flow.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import itertools
3 | import numpy as np
4 | import numba as nb
5 | import cupyx
6 | import cv2
7 |
8 | from .utils.rect import to_tlbr, get_size, get_center
9 | from .utils.rect import intersection, crop
10 | from .utils.numba import mask_area, transform
11 |
12 |
13 | LOGGER = logging.getLogger(__name__)
14 |
15 |
16 | class Flow:
17 | def __init__(self, size,
18 | bg_feat_scale_factor=(0.1, 0.1),
19 | opt_flow_scale_factor=(0.5, 0.5),
20 | feat_density=0.005,
21 | feat_dist_factor=0.06,
22 | ransac_max_iter=500,
23 | ransac_conf=0.99,
24 | max_error=100,
25 | inlier_thresh=4,
26 | bg_feat_thresh=10,
27 | obj_feat_params=None,
28 | opt_flow_params=None):
29 | """A KLT tracker based on optical flow feature point matching.
30 | Camera motion is simultaneously estimated by tracking feature points
31 | on the background.
32 |
33 | Parameters
34 | ----------
35 | size : tuple
36 | Width and height of each frame.
37 | bg_feat_scale_factor : tuple, optional
38 | Width and height scale factors to resize frame for background feature detection.
39 | opt_flow_scale_factor : tuple, optional
40 | Width and height scale factors to resize frame for optical flow.
41 | feat_density : float, optional
42 | Min feature point density to keep inside the bounding box.
43 | feat_dist_factor : float, optional
44 | Target size scale factor to estimate min feature point distance.
45 | ransac_max_iter : int, optional
46 | Max RANSAC iterations to filter matched outliers.
47 | ransac_conf : float, optional
48 | RANSAC confidence threshold to filter matched outliers.
49 | max_error : int, optional
50 | Max optical flow error.
51 | inlier_thresh : int, optional
52 | Min number of inliers for valid matching.
53 | bg_feat_thresh : int, optional
54 | FAST threshold for background feature detection.
55 | obj_feat_params : SimpleNamespace, optional
56 | GFTT parameters for object feature detection, see `cv2.goodFeaturesToTrack`.
57 | opt_flow_params : SimpleNamespace, optional
58 | Optical flow parameters, see `cv2.calcOpticalFlowPyrLK`.
59 | """
60 | self.size = size
61 | assert 0 < bg_feat_scale_factor[0] <= 1 and 0 < bg_feat_scale_factor[1] <= 1
62 | self.bg_feat_scale_factor = bg_feat_scale_factor
63 | assert 0 < opt_flow_scale_factor[0] <= 1 and 0 < opt_flow_scale_factor[1] <= 1
64 | self.opt_flow_scale_factor = opt_flow_scale_factor
65 | assert 0 <= feat_density <= 1
66 | self.feat_density = feat_density
67 | assert feat_dist_factor >= 0
68 | self.feat_dist_factor = feat_dist_factor
69 | assert ransac_max_iter >= 0
70 | self.ransac_max_iter = ransac_max_iter
71 | assert 0 <= ransac_conf <= 1
72 | self.ransac_conf = ransac_conf
73 | assert 0 <= max_error <= 255
74 | self.max_error = max_error
75 | assert inlier_thresh >= 1
76 | self.inlier_thresh = inlier_thresh
77 | assert bg_feat_thresh >= 0
78 | self.bg_feat_thresh = bg_feat_thresh
79 |
80 | self.obj_feat_params = {
81 | "maxCorners": 1000,
82 | "qualityLevel": 0.06,
83 | "blockSize": 3
84 | }
85 | self.opt_flow_params = {
86 | "winSize": (5, 5),
87 | "maxLevel": 5,
88 | "criteria": (3, 10, 0.03)
89 | }
90 | if obj_feat_params is not None:
91 | self.obj_feat_params.update(vars(obj_feat_params))
92 | if opt_flow_params is None:
93 | self.opt_flow_params.update(vars(opt_flow_params))
94 |
95 | self.bg_feat_detector = cv2.FastFeatureDetector_create(threshold=self.bg_feat_thresh)
96 |
97 | # background feature points for visualization
98 | self.bg_keypoints = None
99 | self.prev_bg_keypoints = None
100 |
101 | # preallocate frame buffers
102 | opt_flow_sz = (
103 | round(self.opt_flow_scale_factor[0] * self.size[0]),
104 | round(self.opt_flow_scale_factor[1] * self.size[1])
105 | )
106 | self.frame_gray = cupyx.empty_pinned(self.size[::-1], np.uint8)
107 | self.frame_small = cupyx.empty_pinned(opt_flow_sz[::-1], np.uint8)
108 | self.prev_frame_gray = cupyx.empty_like_pinned(self.frame_gray)
109 | self.prev_frame_small = cupyx.empty_like_pinned(self.frame_small)
110 |
111 | bg_feat_sz = (
112 | round(self.bg_feat_scale_factor[0] * self.size[0]),
113 | round(self.bg_feat_scale_factor[1] * self.size[1])
114 | )
115 | self.prev_frame_bg = cupyx.empty_pinned(bg_feat_sz[::-1], np.uint8)
116 | self.bg_mask_small = cupyx.empty_like_pinned(self.prev_frame_bg)
117 |
118 | self.fg_mask = cupyx.empty_like_pinned(self.frame_gray)
119 | self.frame_rect = to_tlbr((0, 0, *self.size))
120 |
121 | def init(self, frame):
122 | """Preprocesses the first frame to prepare for subsequent `predict`.
123 |
124 | Parameters
125 | ----------
126 | frame : ndarray
127 | Initial frame.
128 | """
129 | cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY, dst=self.prev_frame_gray)
130 | cv2.resize(self.prev_frame_gray, self.prev_frame_small.shape[::-1],
131 | dst=self.prev_frame_small)
132 | self.bg_keypoints = np.empty((0, 2), np.float32)
133 | self.prev_bg_keypoints = np.empty((0, 2), np.float32)
134 |
135 | def predict(self, frame, tracks):
136 | """Predicts tracklet positions in the next frame and estimates camera motion.
137 |
138 | Parameters
139 | ----------
140 | frame : ndarray
141 | The next frame.
142 | tracks : List[Track]
143 | List of tracks to predict.
144 | Feature points of each track are updated in place.
145 |
146 | Returns
147 | -------
148 | Dict[int, ndarray], ndarray
149 | Returns a dictionary with track IDs as keys and predicted bounding
150 | boxes of [x1, x2, y1, y2] as values, and a 3x3 homography matrix.
151 | """
152 | # preprocess frame
153 | cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY, dst=self.frame_gray)
154 | cv2.resize(self.frame_gray, self.frame_small.shape[::-1], dst=self.frame_small)
155 |
156 | # order tracks from closest to farthest
157 | tracks.sort(reverse=True)
158 |
159 | # detect target feature points
160 | all_prev_pts = []
161 | self.fg_mask[:] = 255
162 | for track in tracks:
163 | inside_tlbr = intersection(track.tlbr, self.frame_rect)
164 | target_mask = crop(self.fg_mask, inside_tlbr)
165 | target_area = mask_area(target_mask)
166 | keypoints = self._rect_filter(track.keypoints, inside_tlbr, self.fg_mask)
167 | # only detect new keypoints when too few are propagated
168 | if len(keypoints) < self.feat_density * target_area:
169 | img = crop(self.prev_frame_gray, inside_tlbr)
170 | feature_dist = self._estimate_feature_dist(target_area, self.feat_dist_factor)
171 | keypoints = cv2.goodFeaturesToTrack(img, mask=target_mask,
172 | minDistance=feature_dist,
173 | **self.obj_feat_params)
174 | if keypoints is None:
175 | keypoints = np.empty((0, 2), np.float32)
176 | else:
177 | keypoints = self._ellipse_filter(keypoints, track.tlbr, inside_tlbr[:2])
178 | # batch keypoints
179 | all_prev_pts.append(keypoints)
180 | # zero out target in foreground mask
181 | target_mask[:] = 0
182 | target_ends = list(itertools.accumulate(len(pts) for pts in
183 | all_prev_pts)) if all_prev_pts else [0]
184 | target_begins = itertools.chain([0], target_ends[:-1])
185 |
186 | # detect background feature points
187 | cv2.resize(self.prev_frame_gray, self.prev_frame_bg.shape[::-1], dst=self.prev_frame_bg)
188 | cv2.resize(self.fg_mask, self.bg_mask_small.shape[::-1], dst=self.bg_mask_small,
189 | interpolation=cv2.INTER_NEAREST)
190 | keypoints = self.bg_feat_detector.detect(self.prev_frame_bg, mask=self.bg_mask_small)
191 | if len(keypoints) == 0:
192 | self.bg_keypoints = np.empty((0, 2), np.float32)
193 | self.prev_frame_gray, self.frame_gray = self.frame_gray, self.prev_frame_gray
194 | self.prev_frame_small, self.frame_small = self.frame_small, self.prev_frame_small
195 | LOGGER.warning('Camera motion estimation failed')
196 | return {}, None
197 | keypoints = np.float32([kp.pt for kp in keypoints])
198 | keypoints = self._unscale_pts(keypoints, self.bg_feat_scale_factor)
199 | bg_begin = target_ends[-1]
200 | all_prev_pts.append(keypoints)
201 |
202 | # match features using optical flow
203 | all_prev_pts = np.concatenate(all_prev_pts)
204 | scaled_prev_pts = self._scale_pts(all_prev_pts, self.opt_flow_scale_factor)
205 | all_cur_pts, status, err = cv2.calcOpticalFlowPyrLK(self.prev_frame_small, self.frame_small,
206 | scaled_prev_pts, None,
207 | **self.opt_flow_params)
208 | status = self._get_status(status, err, self.max_error)
209 | all_cur_pts = self._unscale_pts(all_cur_pts, self.opt_flow_scale_factor, status)
210 |
211 | # save preprocessed frame buffers for next prediction
212 | self.prev_frame_gray, self.frame_gray = self.frame_gray, self.prev_frame_gray
213 | self.prev_frame_small, self.frame_small = self.frame_small, self.prev_frame_small
214 |
215 | # estimate camera motion
216 | homography = None
217 | prev_bg_pts, matched_bg_pts = self._get_good_match(all_prev_pts, all_cur_pts,
218 | status, bg_begin, -1)
219 | if len(matched_bg_pts) < 4:
220 | self.bg_keypoints = np.empty((0, 2), np.float32)
221 | LOGGER.warning('Camera motion estimation failed')
222 | return {}, None
223 | homography, inlier_mask = cv2.findHomography(prev_bg_pts, matched_bg_pts,
224 | method=cv2.RANSAC,
225 | maxIters=self.ransac_max_iter,
226 | confidence=self.ransac_conf)
227 | self.prev_bg_keypoints, self.bg_keypoints = self._get_inliers(prev_bg_pts, matched_bg_pts,
228 | inlier_mask)
229 | if homography is None or len(self.bg_keypoints) < self.inlier_thresh:
230 | self.bg_keypoints = np.empty((0, 2), np.float32)
231 | LOGGER.warning('Camera motion estimation failed')
232 | return {}, None
233 |
234 | # estimate target bounding boxes
235 | next_bboxes = {}
236 | self.fg_mask[:] = 255
237 | for begin, end, track in zip(target_begins, target_ends, tracks):
238 | prev_pts, matched_pts = self._get_good_match(all_prev_pts, all_cur_pts,
239 | status, begin, end)
240 | prev_pts, matched_pts = self._fg_filter(prev_pts, matched_pts, self.fg_mask, self.size)
241 | if len(matched_pts) < 3:
242 | track.keypoints = np.empty((0, 2), np.float32)
243 | continue
244 | # model motion as partial affine
245 | affine_mat, inlier_mask = cv2.estimateAffinePartial2D(prev_pts, matched_pts,
246 | method=cv2.RANSAC,
247 | maxIters=self.ransac_max_iter,
248 | confidence=self.ransac_conf)
249 | if affine_mat is None:
250 | track.keypoints = np.empty((0, 2), np.float32)
251 | continue
252 | est_tlbr = self._estimate_bbox(track.tlbr, affine_mat)
253 | track.prev_keypoints, track.keypoints = self._get_inliers(prev_pts, matched_pts,
254 | inlier_mask)
255 | if (intersection(est_tlbr, self.frame_rect) is None or
256 | len(track.keypoints) < self.inlier_thresh):
257 | track.keypoints = np.empty((0, 2), np.float32)
258 | continue
259 | next_bboxes[track.trk_id] = est_tlbr
260 | track.inlier_ratio = len(track.keypoints) / len(matched_pts)
261 | # zero out predicted target in foreground mask
262 | target_mask = crop(self.fg_mask, est_tlbr)
263 | target_mask[:] = 0
264 | return next_bboxes, homography
265 |
266 | @staticmethod
267 | @nb.njit(fastmath=True, cache=True)
268 | def _estimate_feature_dist(target_area, feat_dist_factor):
269 | est_feat_dist = round(np.sqrt(target_area) * feat_dist_factor)
270 | return max(est_feat_dist, 1)
271 |
272 | @staticmethod
273 | @nb.njit(fastmath=True, cache=True)
274 | def _estimate_bbox(tlbr, affine_mat):
275 | tl = transform(tlbr[:2], affine_mat).ravel()
276 | scale = np.linalg.norm(affine_mat[:2, 0])
277 | scale = 1. if scale < 0.9 or scale > 1.1 else scale
278 | w, h = get_size(tlbr)
279 | return to_tlbr((tl[0], tl[1], w * scale, h * scale))
280 |
281 | @staticmethod
282 | @nb.njit(fastmath=True, cache=True)
283 | def _rect_filter(pts, tlbr, fg_mask):
284 | if len(pts) == 0:
285 | return np.empty((0, 2), np.float32)
286 | pts2i = np.rint(pts).astype(np.int32)
287 | # filter out points outside the rectangle
288 | ge_le = (pts2i >= tlbr[:2]) & (pts2i <= tlbr[2:])
289 | inside = np.where(ge_le[:, 0] & ge_le[:, 1])
290 | pts, pts2i = pts[inside], pts2i[inside]
291 | # keep points inside the foreground area
292 | keep = np.array([i for i in range(len(pts2i)) if
293 | fg_mask[pts2i[i][1], pts2i[i][0]] == 255])
294 | return pts[keep]
295 |
296 | @staticmethod
297 | @nb.njit(fastmath=True, cache=True)
298 | def _ellipse_filter(pts, tlbr, offset):
299 | offset = np.asarray(offset, np.float32)
300 | center = np.array(get_center(tlbr))
301 | semi_axes = np.array(get_size(tlbr)) * 0.5
302 | pts = pts.reshape(-1, 2)
303 | pts = pts + offset
304 | # filter out points outside the ellipse
305 | keep = np.sum(((pts - center) / semi_axes)**2, axis=1) <= 1.
306 | return pts[keep]
307 |
308 | @staticmethod
309 | @nb.njit(fastmath=True, cache=True)
310 | def _fg_filter(prev_pts, cur_pts, fg_mask, frame_sz):
311 | if len(cur_pts) == 0:
312 | return prev_pts, cur_pts
313 | size = np.array(frame_sz)
314 | pts2i = np.rint(cur_pts).astype(np.int32)
315 | # filter out points outside the frame
316 | ge_lt = (pts2i >= 0) & (pts2i < size)
317 | inside = ge_lt[:, 0] & ge_lt[:, 1]
318 | prev_pts, cur_pts = prev_pts[inside], cur_pts[inside]
319 | pts2i = pts2i[inside]
320 | # keep points inside the foreground area
321 | keep = np.array([i for i in range(len(pts2i)) if
322 | fg_mask[pts2i[i][1], pts2i[i][0]] == 255])
323 | return prev_pts[keep], cur_pts[keep]
324 |
325 | @staticmethod
326 | @nb.njit(fastmath=True, cache=True)
327 | def _scale_pts(pts, scale_factor):
328 | scale_factor = np.array(scale_factor, np.float32)
329 | pts = pts * scale_factor
330 | pts = pts.reshape(-1, 1, 2)
331 | return pts
332 |
333 | @staticmethod
334 | @nb.njit(fastmath=True, cache=True)
335 | def _unscale_pts(pts, scale_factor, mask=None):
336 | scale_factor = np.array(scale_factor, np.float32)
337 | unscale_factor = 1 / scale_factor
338 | pts = pts.reshape(-1, 2)
339 | if mask is None:
340 | pts = pts * unscale_factor
341 | else:
342 | idx = np.where(mask)
343 | pts[idx] = pts[idx] * unscale_factor
344 | return pts
345 |
346 | @staticmethod
347 | @nb.njit(fastmath=True, cache=True)
348 | def _get_status(status, err, max_err):
349 | return status.ravel().astype(np.bool_) & (err.ravel() < max_err)
350 |
351 | @staticmethod
352 | @nb.njit(fastmath=True, cache=True)
353 | def _get_good_match(prev_pts, cur_pts, status, begin, end):
354 | keep = np.where(status[begin:end])
355 | prev_pts = prev_pts[begin:end][keep]
356 | cur_pts = cur_pts[begin:end][keep]
357 | return prev_pts, cur_pts
358 |
359 | @staticmethod
360 | @nb.njit(fastmath=True, cache=True)
361 | def _get_inliers(prev_pts, cur_pts, inlier_mask):
362 | keep = np.where(inlier_mask.ravel())
363 | return prev_pts[keep], cur_pts[keep]
364 |
--------------------------------------------------------------------------------
/fastmot/kalman_filter.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | import numpy as np
3 | import numba as nb
4 |
5 | from .utils.rect import get_size
6 |
7 |
8 | class MeasType(Enum):
9 | FLOW = 0
10 | DETECTOR = 1
11 |
12 |
13 | class KalmanFilter:
14 | def __init__(self,
15 | std_factor_acc=2.25,
16 | std_offset_acc=78.5,
17 | std_factor_det=(0.08, 0.08),
18 | std_factor_klt=(0.14, 0.14),
19 | min_std_det=(4.0, 4.0),
20 | min_std_klt=(5.0, 5.0),
21 | init_pos_weight=5,
22 | init_vel_weight=12,
23 | vel_coupling=0.6,
24 | vel_half_life=2):
25 | """A simple Kalman filter for tracking bounding boxes in image space.
26 | The 8-dimensional state space
27 | x1, y1, x2, y2, v_x1, v_y1, v_x2, v_y2
28 | contains the bounding box top left corner, bottom right corner,
29 | and their respective velocities.
30 | Object motion follows a modified constant velocity model.
31 | Velocity will decay over time without measurement and bounding box
32 | corners are coupled together to minimize drifting.
33 |
34 | Parameters
35 | ----------
36 | std_factor_acc : float, optional
37 | Object size scale factor to calculate acceleration standard deviation
38 | for process noise.
39 | std_offset_acc : float, optional
40 | Object size offset to calculate acceleration standard deviation
41 | for process noise. Set larger for fast moving objects.
42 | std_factor_det : tuple, optional
43 | Object width and height scale factors to calculate detector measurement
44 | noise standard deviation.
45 | std_factor_klt : tuple, optional
46 | Object wdith and height scale factors to calculate KLT measurement
47 | noise standard deviation.
48 | min_std_det : tuple, optional
49 | Min detector measurement noise standard deviations.
50 | min_std_klt : tuple, optional
51 | Min KLT measurement noise standard deviations.
52 | init_pos_weight : int, optional
53 | Scale factor to initialize position state standard deviation.
54 | init_vel_weight : int, optional
55 | Scale factor to initialize velocity state standard deviation.
56 | Set larger for fast moving objects.
57 | vel_coupling : float, optional
58 | Factor to couple bounding box corners.
59 | Set 0.5 for max coupling and 1.0 to disable coupling.
60 | vel_half_life : int, optional
61 | Half life in seconds to decay velocity state.
62 | """
63 | assert std_factor_acc >= 0
64 | self.std_factor_acc = std_factor_acc
65 | self.std_offset_acc = std_offset_acc
66 | assert std_factor_det[0] >= 0 and std_factor_det[1] >= 0
67 | self.std_factor_det = std_factor_det
68 | assert std_factor_klt[0] >= 0 and std_factor_klt[1] >= 0
69 | self.std_factor_klt = std_factor_klt
70 | assert min_std_det[0] >= 0 and min_std_det[1] >= 0
71 | self.min_std_det = min_std_det
72 | assert min_std_klt[0] >= 0 and min_std_klt[1] >= 0
73 | self.min_std_klt = min_std_klt
74 | assert init_pos_weight >= 0
75 | self.init_pos_weight = init_pos_weight
76 | assert init_vel_weight >= 0
77 | self.init_vel_weight = init_vel_weight
78 | assert 0 <= vel_coupling <= 1
79 | self.vel_coupling = vel_coupling
80 | assert vel_half_life > 0
81 | self.vel_half_life = vel_half_life
82 |
83 | dt = 1 / 30.
84 | self.acc_cov, self.meas_mat, self.trans_mat = self._init_mat(dt)
85 |
86 | def reset_dt(self, dt):
87 | """Resets process noise, measurement and transition matrices from dt.
88 |
89 | Parameters
90 | ----------
91 | dt : float
92 | Time interval in seconds between each frame.
93 | """
94 | self.acc_cov, self.meas_mat, self.trans_mat = self._init_mat(dt)
95 |
96 | def create(self, det_meas):
97 | """Creates Kalman filter state from unassociated measurement.
98 |
99 | Parameters
100 | ----------
101 | det_meas : ndarray
102 | Detected bounding box of [x1, x2, y1, y2].
103 |
104 | Returns
105 | -------
106 | ndarray, ndarray
107 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
108 | dimensional) of the new track.
109 | """
110 | mean_pos = det_meas
111 | mean_vel = np.zeros_like(mean_pos)
112 | mean = np.r_[mean_pos, mean_vel]
113 |
114 | w, h = get_size(det_meas)
115 | std = np.array([
116 | max(self.init_pos_weight * self.std_factor_det[0] * w, self.min_std_det[0]),
117 | max(self.init_pos_weight * self.std_factor_det[1] * h, self.min_std_det[1]),
118 | max(self.init_pos_weight * self.std_factor_det[0] * w, self.min_std_det[0]),
119 | max(self.init_pos_weight * self.std_factor_det[1] * h, self.min_std_det[1]),
120 | max(self.init_vel_weight * self.std_factor_det[0] * w, self.min_std_det[0]),
121 | max(self.init_vel_weight * self.std_factor_det[1] * h, self.min_std_det[1]),
122 | max(self.init_vel_weight * self.std_factor_det[0] * w, self.min_std_det[0]),
123 | max(self.init_vel_weight * self.std_factor_det[1] * h, self.min_std_det[1])
124 | ], dtype=np.float64)
125 | covariance = np.diag(np.square(std))
126 | return mean, covariance
127 |
128 | def predict(self, mean, covariance):
129 | """Runs Kalman filter prediction step.
130 |
131 | Parameters
132 | ----------
133 | mean : ndarray
134 | The 8 dimensional mean vector of the object state at the previous
135 | time step.
136 | covariance : ndarray
137 | The 8x8 dimensional covariance matrix of the object state at the
138 | previous time step.
139 |
140 | Returns
141 | -------
142 | ndarray, ndarray
143 | Returns the mean vector and covariance matrix of the predicted
144 | state.
145 | """
146 | return self._predict(mean, covariance, self.trans_mat, self.acc_cov,
147 | self.std_factor_acc, self.std_offset_acc)
148 |
149 | def project(self, mean, covariance, meas_type, multiplier=1.):
150 | """Projects state distribution to measurement space.
151 |
152 | Parameters
153 | ----------
154 | mean : ndarray
155 | The state's mean vector (8 dimensional array).
156 | covariance : ndarray
157 | The state's covariance matrix (8x8 dimensional).
158 | meas_type : MeasType
159 | Measurement type indicating where the measurement comes from.
160 | multiplier : float
161 | Multiplier used to adjust the measurement std.
162 |
163 | Returns
164 | -------
165 | ndarray, ndarray
166 | Returns the projected mean and covariance matrix of the given state
167 | estimate.
168 | """
169 | if meas_type == MeasType.FLOW:
170 | std_factor = self.std_factor_klt
171 | min_std = self.min_std_klt
172 | elif meas_type == MeasType.DETECTOR:
173 | std_factor = self.std_factor_det
174 | min_std = self.min_std_det
175 | else:
176 | raise ValueError('Invalid measurement type')
177 |
178 | return self._project(mean, covariance, self.meas_mat, std_factor, min_std, multiplier)
179 |
180 | def update(self, mean, covariance, measurement, meas_type, multiplier=1.):
181 | """Runs Kalman filter correction step.
182 |
183 | Parameters
184 | ----------
185 | mean : ndarray
186 | The predicted state's mean vector (8 dimensional).
187 | covariance : ndarray
188 | The state's covariance matrix (8x8 dimensional).
189 | measurement : ndarray
190 | Bounding box of [x1, x2, y1, y2].
191 | meas_type : MeasType
192 | Measurement type indicating where the measurement comes from.
193 | multiplier : float
194 | Multiplier used to adjust the measurement std.
195 |
196 | Returns
197 | -------
198 | ndarray, ndarray
199 | Returns the measurement-corrected state distribution.
200 | """
201 | projected_mean, projected_cov = self.project(mean, covariance, meas_type, multiplier)
202 |
203 | return self._update(mean, covariance, projected_mean,
204 | projected_cov, measurement, self.meas_mat)
205 |
206 | def motion_distance(self, mean, covariance, measurements):
207 | """Computes mahalanobis distance between `measurements` and state distribution.
208 |
209 | Parameters
210 | ----------
211 | mean : ndarray
212 | The predicted state's mean vector (8 dimensional).
213 | covariance : ndarray
214 | The state's covariance matrix (8x8 dimensional).
215 | measurements : array_like
216 | An Nx4 matrix of N samples of [x1, x2, y1, y2].
217 |
218 | Returns
219 | -------
220 | ndarray
221 | Returns a array of size N such that element i
222 | contains the squared mahalanobis distance for `measurements[i]`.
223 | """
224 | projected_mean, projected_cov = self.project(mean, covariance, MeasType.DETECTOR)
225 | return self._maha_distance(projected_mean, projected_cov, measurements)
226 |
227 | @staticmethod
228 | @nb.njit(fastmath=True, cache=True)
229 | def warp(mean, covariance, H):
230 | """Warps kalman filter state using a homography transformation.
231 | https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=1301&context=studentpub
232 |
233 | Parameters
234 | ----------
235 | mean : ndarray
236 | The predicted state's mean vector (8 dimensional).
237 | covariance : ndarray
238 | The state's covariance matrix (8x8 dimensional).
239 | H : ndarray
240 | A 3x3 homography matrix.
241 |
242 | Returns
243 | -------
244 | ndarray, ndarray
245 | Returns the mean vector and covariance matrix of the transformed
246 | state.
247 | """
248 | H1 = np.ascontiguousarray(H[:2, :2])
249 | h2 = np.ascontiguousarray(H[:2, 2])
250 | h3 = np.ascontiguousarray(H[2, :2])
251 | h4 = 1.
252 |
253 | E1 = np.eye(8, 2)
254 | E3 = np.eye(8, 2, -4)
255 | M = E1 @ H1 @ E1.T + E3 @ H1 @ E3.T
256 | M31 = E3 @ H1 @ E1.T
257 | w12 = E1 @ h2
258 | w13 = E1 @ h3
259 | w33 = E3 @ h3
260 | u = M @ mean + w12
261 | v = M31 @ mean + E3 @ h2
262 | a = np.dot(w13, mean) + h4
263 | b = np.dot(w33, mean)
264 | # transform top left mean
265 | mean_tl = u / a - b * v / a**2
266 | # compute top left Jacobian
267 | F_tl = M / a - (np.outer(u, w13) + b * M31 + np.outer(v, w33)) / a**2 + \
268 | (2 * b * np.outer(v, w13)) / a**3
269 |
270 | E2 = np.eye(8, 2, -2)
271 | E4 = np.eye(8, 2, -6)
272 | M = E2 @ H1 @ E2.T + E4 @ H1 @ E4.T
273 | M42 = E4 @ H1 @ E2.T
274 | w22 = E2 @ h2
275 | w23 = E2 @ h3
276 | w43 = E4 @ h3
277 | u = M @ mean + w22
278 | v = M42 @ mean + E4 @ h2
279 | a = np.dot(w23, mean) + h4
280 | b = np.dot(w43, mean)
281 | # transform bottom right mean
282 | mean_br = u / a - b * v / a**2
283 | # compute bottom right Jacobian
284 | F_br = M / a - (np.outer(u, w23) + b * M42 + np.outer(v, w43)) / a**2 + \
285 | (2 * b * np.outer(v, w23)) / a**3
286 |
287 | # add them together
288 | mean = mean_tl + mean_br
289 | F = F_tl + F_br
290 | # tranform covariance with Jacobian
291 | covariance = F @ covariance @ F.T
292 | return mean, covariance
293 |
294 | def _init_mat(self, dt):
295 | # acceleration-based process noise
296 | acc_cov = np.diag([0.25 * dt**4] * 4 + [dt**2] * 4)
297 | acc_cov[4:, :4] = np.eye(4) * (0.5 * dt**3)
298 | acc_cov[:4, 4:] = np.eye(4) * (0.5 * dt**3)
299 |
300 | meas_mat = np.eye(4, 8)
301 | trans_mat = np.eye(8)
302 | for i in range(4):
303 | trans_mat[i, i + 4] = self.vel_coupling * dt
304 | trans_mat[i, (i + 2) % 4 + 4] = (1. - self.vel_coupling) * dt
305 | trans_mat[i + 4, i + 4] = 0.5**(dt / self.vel_half_life)
306 | return acc_cov, meas_mat, trans_mat
307 |
308 | @staticmethod
309 | @nb.njit(fastmath=True, cache=True)
310 | def _predict(mean, covariance, trans_mat, acc_cov, std_factor_acc, std_offset_acc):
311 | size = max(get_size(mean[:4])) # max(w, h)
312 | std = std_factor_acc * size + std_offset_acc
313 | motion_cov = acc_cov * std**2
314 |
315 | mean = trans_mat @ mean
316 | covariance = trans_mat @ covariance @ trans_mat.T + motion_cov
317 | # ensure positive definiteness
318 | covariance = 0.5 * (covariance + covariance.T)
319 | return mean, covariance
320 |
321 | @staticmethod
322 | @nb.njit(fastmath=True, cache=True)
323 | def _project(mean, covariance, meas_mat, std_factor, min_std, multiplier):
324 | w, h = get_size(mean[:4])
325 | std = np.array([
326 | max(std_factor[0] * w, min_std[0]),
327 | max(std_factor[1] * h, min_std[1]),
328 | max(std_factor[0] * w, min_std[0]),
329 | max(std_factor[1] * h, min_std[1])
330 | ], dtype=np.float64)
331 | meas_cov = np.diag(np.square(std * multiplier))
332 |
333 | mean = meas_mat @ mean
334 | covariance = meas_mat @ covariance @ meas_mat.T
335 | innovation_cov = covariance + meas_cov
336 | return mean, innovation_cov
337 |
338 | @staticmethod
339 | @nb.njit(fastmath=True, cache=True)
340 | def _update(mean, covariance, proj_mean, proj_cov, measurement, meas_mat):
341 | kalman_gain = np.linalg.solve(proj_cov, (covariance @ meas_mat.T).T).T
342 | innovation = measurement - proj_mean
343 | mean = mean + innovation @ kalman_gain.T
344 | covariance = covariance - kalman_gain @ proj_cov @ kalman_gain.T
345 | return mean, covariance
346 |
347 | @staticmethod
348 | @nb.njit(fastmath=True, cache=True)
349 | def _maha_distance(mean, covariance, measurements):
350 | diff = measurements - mean
351 | L = np.linalg.cholesky(covariance)
352 | y = np.linalg.solve(L, diff.T)
353 | return np.sum(y**2, axis=0)
354 |
--------------------------------------------------------------------------------
/fastmot/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd import SSD
2 | from .yolo import YOLO
3 | from .reid import ReID
4 | from .label import get_label_name, set_label_map
--------------------------------------------------------------------------------
/fastmot/models/calibrator.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import cupy as cp
4 | import tensorrt as trt
5 | import cv2
6 |
7 |
8 | class SSDEntropyCalibrator(trt.IInt8EntropyCalibrator2):
9 | def __init__(self, model_shape, data_dir, cache_file):
10 | # Whenever you specify a custom constructor for a TensorRT class,
11 | # you MUST call the constructor of the parent explicitly.
12 | trt.IInt8EntropyCalibrator2.__init__(self)
13 |
14 | self.model_shape = model_shape
15 | self.num_calib_imgs = 100 # the number of images from the dataset to use for calibration
16 | self.batch_size = 10
17 | self.batch_shape = (self.batch_size, *self.model_shape)
18 | self.cache_file = cache_file
19 |
20 | calib_imgs = [os.path.join(data_dir, f) for f in os.listdir(data_dir)]
21 | self.calib_imgs = np.random.choice(calib_imgs, self.num_calib_imgs)
22 | self.counter = 0 # for keeping track of how many files we have read
23 |
24 | self.input_dev = cp.empty(self.batch_shape, dtype=np.float32)
25 |
26 | def get_batch_size(self):
27 | return self.batch_size
28 |
29 | # TensorRT passes along the names of the engine bindings to the get_batch function.
30 | # You don't necessarily have to use them, but they can be useful to understand the order of
31 | # the inputs. The bindings list is expected to have the same ordering as 'names'.
32 | def get_batch(self, names):
33 |
34 | # if there are not enough calibration images to form a batch,
35 | # we have reached the end of our data set
36 | if self.counter == self.num_calib_imgs:
37 | return None
38 |
39 | batch_imgs = np.zeros((self.batch_size, trt.volume(self.model_shape)))
40 | for i in range(self.batch_size):
41 | img = cv2.imread(self.calib_imgs[self.counter + i])
42 | img = cv2.resize(img, (self.model_shape[2], self.model_shape[1]))
43 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
44 | # HWC -> CHW
45 | img = img.transpose((2, 0, 1))
46 | # Normalize to [-1.0, 1.0] interval (expected by model)
47 | img = (2.0 / 255.0) * img - 1.0
48 | # add this image to the batch array
49 | batch_imgs[i, :] = img.ravel()
50 |
51 | # increase the counter for this batch
52 | self.counter += self.batch_size
53 |
54 | # Copy to device, then return a list containing pointers to input device buffers.
55 | batch_imgs = batch_imgs.astype(np.float32)
56 | self.input_dev.data.copy_from_host(batch_imgs.ctypes.data, batch_imgs.nbytes)
57 | return [self.input_dev.data.ptr]
58 |
59 | def read_calibration_cache(self):
60 | # If there is a cache, use it instead of calibrating again.
61 | if os.path.exists(self.cache_file):
62 | with open(self.cache_file, "rb") as f:
63 | return f.read()
64 |
65 | def write_calibration_cache(self, cache):
66 | with open(self.cache_file, "wb") as f:
67 | f.write(cache)
68 |
--------------------------------------------------------------------------------
/fastmot/models/label.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Sequence
2 |
3 |
4 | """
5 | 91-class COCO labels
6 | `unlabled` (id = 0) is replaced with `head` for CrowdHuman
7 | These are different from the default 80-class COCO labels used by YOLO
8 | """
9 | _label_map = (
10 | 'head',
11 | 'person',
12 | 'bicycle',
13 | 'car',
14 | 'motorcycle',
15 | 'airplane',
16 | 'bus',
17 | 'train',
18 | 'truck',
19 | 'boat',
20 | 'traffic light',
21 | 'fire hydrant',
22 | 'street sign',
23 | 'stop sign',
24 | 'parking meter',
25 | 'bench',
26 | 'bird',
27 | 'cat',
28 | 'dog',
29 | 'horse',
30 | 'sheep',
31 | 'cow',
32 | 'elephant',
33 | 'bear',
34 | 'zebra',
35 | 'giraffe',
36 | 'hat',
37 | 'backpack',
38 | 'umbrella',
39 | 'shoe',
40 | 'eye glasses',
41 | 'handbag',
42 | 'tie',
43 | 'suitcase',
44 | 'frisbee',
45 | 'skis',
46 | 'snowboard',
47 | 'sports ball',
48 | 'kite',
49 | 'baseball bat',
50 | 'baseball glove',
51 | 'skateboard',
52 | 'surfboard',
53 | 'tennis racket',
54 | 'bottle',
55 | 'plate',
56 | 'wine glass',
57 | 'cup',
58 | 'fork',
59 | 'knife',
60 | 'spoon',
61 | 'bowl',
62 | 'banana',
63 | 'apple',
64 | 'sandwich',
65 | 'orange',
66 | 'broccoli',
67 | 'carrot',
68 | 'hot dog',
69 | 'pizza',
70 | 'donut',
71 | 'cake',
72 | 'chair',
73 | 'couch',
74 | 'potted plant',
75 | 'bed',
76 | 'mirror',
77 | 'dining table',
78 | 'window',
79 | 'desk',
80 | 'toilet',
81 | 'door',
82 | 'tv',
83 | 'laptop',
84 | 'mouse',
85 | 'remote',
86 | 'keyboard',
87 | 'cell phone',
88 | 'microwave',
89 | 'oven',
90 | 'toaster',
91 | 'sink',
92 | 'refrigerator',
93 | 'blender',
94 | 'book',
95 | 'clock',
96 | 'vase',
97 | 'scissors',
98 | 'teddy bear',
99 | 'hair drier',
100 | 'toothbrush',
101 | )
102 |
103 |
104 | def get_label_name(class_id):
105 | """Look up label name given a class ID."""
106 | return _label_map[class_id]
107 |
108 |
109 | def set_label_map(label_map):
110 | """Set label name mapping from class IDs.
111 |
112 | Parameters
113 | ----------
114 | label_map : sequence
115 | A sequence of label names.
116 | The index of each label determines its class ID.
117 | """
118 | assert isinstance(label_map, Sequence)
119 | assert len(label_map) > 0
120 | global _label_map
121 | _label_map = tuple(label_map)
122 |
--------------------------------------------------------------------------------
/fastmot/models/reid.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import logging
3 | import tensorrt as trt
4 |
5 |
6 | EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
7 | LOGGER = logging.getLogger(__name__)
8 |
9 |
10 | class ReID:
11 | """Base class for ReID models.
12 |
13 | Attributes
14 | ----------
15 | PLUGIN_PATH : Path, optional
16 | Path to TensorRT plugin.
17 | ENGINE_PATH : Path
18 | Path to TensorRT engine.
19 | If not found, TensorRT engine will be converted from the ONNX model
20 | at runtime and cached for later use.
21 | MODEL_PATH : Path
22 | Path to ONNX model.
23 | INPUT_SHAPE : tuple
24 | Input size in the format `(channel, height, width)`.
25 | OUTPUT_LAYOUT : int
26 | Feature dimension output by the model.
27 | METRIC : {'euclidean', 'cosine'}
28 | Distance metric used to match features.
29 | """
30 | __registry = {}
31 |
32 | PLUGIN_PATH = None
33 | ENGINE_PATH = None
34 | MODEL_PATH = None
35 | INPUT_SHAPE = None
36 | OUTPUT_LAYOUT = None
37 | METRIC = None
38 |
39 | def __init_subclass__(cls, **kwargs):
40 | super().__init_subclass__(**kwargs)
41 | cls.__registry[cls.__name__] = cls
42 |
43 | @classmethod
44 | def get_model(cls, name):
45 | return cls.__registry[name]
46 |
47 | @classmethod
48 | def build_engine(cls, trt_logger, batch_size):
49 | with trt.Builder(trt_logger) as builder, builder.create_network(EXPLICIT_BATCH) as network, \
50 | trt.OnnxParser(network, trt_logger) as parser:
51 |
52 | builder.max_batch_size = batch_size
53 | LOGGER.info('Building engine with batch size: %d', batch_size)
54 | LOGGER.info('This may take a while...')
55 |
56 | # parse model file
57 | with open(cls.MODEL_PATH, 'rb') as model_file:
58 | if not parser.parse(model_file.read()):
59 | LOGGER.critical('Failed to parse the ONNX file')
60 | for err in range(parser.num_errors):
61 | LOGGER.error(parser.get_error(err))
62 | return None
63 |
64 | # reshape input to the right batch size
65 | net_input = network.get_input(0)
66 | assert cls.INPUT_SHAPE == net_input.shape[1:]
67 | net_input.shape = (batch_size, *cls.INPUT_SHAPE)
68 |
69 | config = builder.create_builder_config()
70 | config.max_workspace_size = 1 << 30
71 | if builder.platform_has_fast_fp16:
72 | config.set_flag(trt.BuilderFlag.FP16)
73 |
74 | profile = builder.create_optimization_profile()
75 | profile.set_shape(
76 | net_input.name, # input tensor name
77 | (batch_size, *cls.INPUT_SHAPE), # min shape
78 | (batch_size, *cls.INPUT_SHAPE), # opt shape
79 | (batch_size, *cls.INPUT_SHAPE) # max shape
80 | )
81 | config.add_optimization_profile(profile)
82 |
83 | # engine = builder.build_cuda_engine(network)
84 | engine = builder.build_engine(network, config)
85 | if engine is None:
86 | LOGGER.critical('Failed to build engine')
87 | return None
88 |
89 | LOGGER.info("Completed creating engine")
90 | with open(cls.ENGINE_PATH, 'wb') as engine_file:
91 | engine_file.write(engine.serialize())
92 | return engine
93 |
94 |
95 | class OSNet025(ReID):
96 | ENGINE_PATH = Path(__file__).parent / 'osnet_x0_25_msmt17.trt'
97 | MODEL_PATH = Path(__file__).parent / 'osnet_x0_25_msmt17.onnx'
98 | INPUT_SHAPE = (3, 256, 128)
99 | OUTPUT_LAYOUT = 512
100 | METRIC = 'euclidean'
101 |
102 |
103 | class OSNet10(ReID):
104 | """Multi-source model trained on MSMT17, DukeMTMC, and CUHK03, not provided."""
105 | ENGINE_PATH = Path(__file__).parent / 'osnet_x1_0_msdc.trt'
106 | MODEL_PATH = Path(__file__).parent / 'osnet_x1_0_msdc.onnx'
107 | INPUT_SHAPE = (3, 256, 128)
108 | OUTPUT_LAYOUT = 512
109 | METRIC = 'cosine'
110 |
--------------------------------------------------------------------------------
/fastmot/models/ssd.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import logging
3 | import tensorrt as trt
4 |
5 |
6 | LOGGER = logging.getLogger(__name__)
7 |
8 |
9 | class SSD:
10 | """Base class for SSD models.
11 |
12 | Attributes
13 | ----------
14 | PLUGIN_PATH : Path, optional
15 | Path to TensorRT plugin.
16 | ENGINE_PATH : Path
17 | Path to TensorRT engine.
18 | If not found, TensorRT engine will be converted from TensorFlow model
19 | at runtime and cached for later use.
20 | MODEL_PATH : Path
21 | Path to TensorFlow model.
22 | NUM_CLASSES : int
23 | Total number of trained classes.s
24 | INPUT_SHAPE : tuple
25 | Input size in the format `(channel, height, width)`.
26 | OUTPUT_NAME : str
27 | Output tensor name.
28 | NMS_THRESH : float
29 | Nonmaximum suppression threshold.
30 | TOPK : int
31 | Max number of detections to output.
32 | """
33 | __registry = {}
34 |
35 | PLUGIN_PATH = None
36 | ENGINE_PATH = None
37 | MODEL_PATH = None
38 | NUM_CLASSES = None
39 | INPUT_SHAPE = None
40 | OUTPUT_NAME = None
41 | NMS_THRESH = None
42 | TOPK = None
43 |
44 | def __init_subclass__(cls, **kwargs):
45 | super().__init_subclass__(**kwargs)
46 | cls.__registry[cls.__name__] = cls
47 |
48 | @classmethod
49 | def get_model(cls, name):
50 | return cls.__registry[name]
51 |
52 | @classmethod
53 | def add_plugin(cls, graph):
54 | raise NotImplementedError
55 |
56 | @classmethod
57 | def build_engine(cls, trt_logger, batch_size, calib_dataset=Path.home() / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'):
58 | import graphsurgeon as gs
59 | import uff
60 | from . import calibrator
61 |
62 | if trt.__version__[0] >= '8':
63 | raise RuntimeError('SSD requires TensorRT version < 8')
64 |
65 | # convert TensorFlow graph into UFF
66 | dynamic_graph = gs.DynamicGraph(str(cls.MODEL_PATH))
67 | dynamic_graph = cls.add_plugin(dynamic_graph)
68 | uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), [cls.OUTPUT_NAME], quiet=True)
69 |
70 | with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser:
71 | builder.max_workspace_size = 1 << 30
72 | builder.max_batch_size = batch_size
73 | LOGGER.info('Building engine with batch size: %d', batch_size)
74 | LOGGER.info('This may take a while...')
75 |
76 | if builder.platform_has_fast_fp16:
77 | builder.fp16_mode = True
78 | if builder.platform_has_fast_int8:
79 | builder.int8_mode = True
80 | builder.int8_calibrator = calibrator.SSDEntropyCalibrator(cls.INPUT_SHAPE,
81 | data_dir=calib_dataset,
82 | cache_file=Path(__file__).parent /
83 | f'{cls.__name__}_calib_cache')
84 |
85 | parser.register_input('Input', cls.INPUT_SHAPE)
86 | parser.register_output('MarkOutput_0')
87 | parser.parse_buffer(uff_model, network)
88 | engine = builder.build_cuda_engine(network)
89 | if engine is None:
90 | LOGGER.critical('Failed to build engine')
91 | return None
92 |
93 | LOGGER.info("Completed creating engine")
94 | with open(cls.ENGINE_PATH, 'wb') as engine_file:
95 | engine_file.write(engine.serialize())
96 | return engine
97 |
98 |
99 | class SSDMobileNetV1(SSD):
100 | ENGINE_PATH = Path(__file__).parent / 'ssd_mobilenet_v1_coco.trt'
101 | MODEL_PATH = Path(__file__).parent / 'ssd_mobilenet_v1_coco.pb'
102 | NUM_CLASSES = 91
103 | INPUT_SHAPE = (3, 300, 300)
104 | OUTPUT_NAME = 'NMS'
105 | NMS_THRESH = 0.5
106 | TOPK = 100
107 |
108 | @classmethod
109 | def add_plugin(cls, graph):
110 | import tensorflow as tf
111 | import graphsurgeon as gs
112 |
113 | all_assert_nodes = graph.find_nodes_by_op("Assert")
114 | graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
115 | all_identity_nodes = graph.find_nodes_by_op("Identity")
116 | graph.forward_inputs(all_identity_nodes)
117 |
118 | Input = gs.create_plugin_node(
119 | name="Input",
120 | op="Placeholder",
121 | dtype=tf.float32,
122 | shape=[1, *cls.INPUT_SHAPE]
123 | )
124 |
125 | PriorBox = gs.create_plugin_node(
126 | name="MultipleGridAnchorGenerator",
127 | op="GridAnchor_TRT",
128 | minSize=0.2,
129 | maxSize=0.95,
130 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
131 | variance=[0.1, 0.1, 0.2, 0.2],
132 | featureMapShapes=[19, 10, 5, 3, 2, 1],
133 | numLayers=6
134 | )
135 |
136 | NMS = gs.create_plugin_node(
137 | name="NMS",
138 | op="NMS_TRT",
139 | shareLocation=1,
140 | varianceEncodedInTarget=0,
141 | backgroundLabelId=0,
142 | confidenceThreshold=1e-8,
143 | nmsThreshold=cls.NMS_THRESH,
144 | topK=100,
145 | keepTopK=100,
146 | numClasses=91,
147 | inputOrder=[0, 2, 1],
148 | confSigmoid=1,
149 | isNormalized=1
150 | )
151 |
152 | concat_priorbox = gs.create_node(
153 | "concat_priorbox",
154 | op="ConcatV2",
155 | dtype=tf.float32,
156 | axis=2
157 | )
158 |
159 | concat_box_loc = gs.create_plugin_node(
160 | "concat_box_loc",
161 | op="FlattenConcat_TRT",
162 | dtype=tf.float32,
163 | axis=1,
164 | ignoreBatch=0
165 | )
166 |
167 | concat_box_conf = gs.create_plugin_node(
168 | "concat_box_conf",
169 | op="FlattenConcat_TRT",
170 | dtype=tf.float32,
171 | axis=1,
172 | ignoreBatch=0
173 | )
174 |
175 | namespace_plugin_map = {
176 | "MultipleGridAnchorGenerator": PriorBox,
177 | "Postprocessor": NMS,
178 | "Preprocessor": Input,
179 | "ToFloat": Input,
180 | "image_tensor": Input,
181 | "MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
182 | "concat": concat_box_loc,
183 | "concat_1": concat_box_conf
184 | }
185 |
186 | # Create a new graph by collapsing namespaces
187 | graph.collapse_namespaces(namespace_plugin_map)
188 | # Remove the outputs, so we just have a single output node (NMS).
189 | # If remove_exclusive_dependencies is True, the whole graph will be removed!
190 | graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
191 | graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input")
192 | graph.find_nodes_by_name("Input")[0].input.remove("image_tensor:0")
193 |
194 | return graph
195 |
196 |
197 | class SSDMobileNetV2(SSD):
198 | ENGINE_PATH = Path(__file__).parent / 'ssd_mobilenet_v2_coco.trt'
199 | MODEL_PATH = Path(__file__).parent / 'ssd_mobilenet_v2_coco.pb'
200 | NUM_CLASSES = 91
201 | INPUT_SHAPE = (3, 300, 300)
202 | OUTPUT_NAME = 'NMS'
203 | NMS_THRESH = 0.5
204 | TOPK = 100
205 |
206 | @classmethod
207 | def add_plugin(cls, graph):
208 | import tensorflow as tf
209 | import graphsurgeon as gs
210 |
211 | all_assert_nodes = graph.find_nodes_by_op("Assert")
212 | graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
213 | all_identity_nodes = graph.find_nodes_by_op("Identity")
214 | graph.forward_inputs(all_identity_nodes)
215 |
216 | Input = gs.create_plugin_node(
217 | name="Input",
218 | op="Placeholder",
219 | dtype=tf.float32,
220 | shape=[1, *cls.INPUT_SHAPE]
221 | )
222 |
223 | PriorBox = gs.create_plugin_node(
224 | name="GridAnchor",
225 | op="GridAnchor_TRT",
226 | minSize=0.2,
227 | maxSize=0.95,
228 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
229 | variance=[0.1, 0.1, 0.2, 0.2],
230 | featureMapShapes=[19, 10, 5, 3, 2, 1],
231 | numLayers=6
232 | )
233 |
234 | NMS = gs.create_plugin_node(
235 | name="NMS",
236 | op="NMS_TRT",
237 | shareLocation=1,
238 | varianceEncodedInTarget=0,
239 | backgroundLabelId=0,
240 | confidenceThreshold=1e-8,
241 | nmsThreshold=cls.NMS_THRESH,
242 | topK=100,
243 | keepTopK=100,
244 | numClasses=91,
245 | inputOrder=[1, 0, 2],
246 | confSigmoid=1,
247 | isNormalized=1
248 | )
249 |
250 | concat_priorbox = gs.create_node(
251 | "concat_priorbox",
252 | op="ConcatV2",
253 | dtype=tf.float32,
254 | axis=2
255 | )
256 |
257 | concat_box_loc = gs.create_plugin_node(
258 | "concat_box_loc",
259 | op="FlattenConcat_TRT",
260 | dtype=tf.float32,
261 | axis=1,
262 | ignoreBatch=0
263 | )
264 |
265 | concat_box_conf = gs.create_plugin_node(
266 | "concat_box_conf",
267 | op="FlattenConcat_TRT",
268 | dtype=tf.float32,
269 | axis=1,
270 | ignoreBatch=0
271 | )
272 |
273 | namespace_plugin_map = {
274 | "MultipleGridAnchorGenerator": PriorBox,
275 | "Postprocessor": NMS,
276 | "Preprocessor": Input,
277 | "ToFloat": Input,
278 | "image_tensor": Input,
279 | "Concatenate": concat_priorbox,
280 | "concat": concat_box_loc,
281 | "concat_1": concat_box_conf
282 | }
283 |
284 | # Create a new graph by collapsing namespaces
285 | graph.collapse_namespaces(namespace_plugin_map)
286 | # Remove the outputs, so we just have a single output node (NMS).
287 | # If remove_exclusive_dependencies is True, the whole graph will be removed!
288 | graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
289 | graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input")
290 |
291 | return graph
292 |
293 |
294 | class SSDInceptionV2(SSD):
295 | ENGINE_PATH = Path(__file__).parent / 'ssd_inception_v2_coco.trt'
296 | MODEL_PATH = Path(__file__).parent / 'ssd_inception_v2_coco.pb'
297 | NUM_CLASSES = 91
298 | INPUT_SHAPE = (3, 300, 300)
299 | OUTPUT_NAME = 'NMS'
300 | NMS_THRESH = 0.5
301 | TOPK = 100
302 |
303 | @classmethod
304 | def add_plugin(cls, graph):
305 | import tensorflow as tf
306 | import graphsurgeon as gs
307 |
308 | all_assert_nodes = graph.find_nodes_by_op("Assert")
309 | graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
310 | all_identity_nodes = graph.find_nodes_by_op("Identity")
311 | graph.forward_inputs(all_identity_nodes)
312 |
313 | # Create TRT plugin nodes to replace unsupported ops in Tensorflow graph
314 | Input = gs.create_plugin_node(
315 | name="Input",
316 | op="Placeholder",
317 | dtype=tf.float32,
318 | shape=[1, *cls.INPUT_SHAPE]
319 | )
320 |
321 | PriorBox = gs.create_plugin_node(
322 | name="GridAnchor",
323 | op="GridAnchor_TRT",
324 | minSize=0.2,
325 | maxSize=0.95,
326 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
327 | variance=[0.1, 0.1, 0.2, 0.2],
328 | featureMapShapes=[19, 10, 5, 3, 2, 1],
329 | numLayers=6
330 | )
331 |
332 | NMS = gs.create_plugin_node(
333 | name="NMS",
334 | op="NMS_TRT",
335 | shareLocation=1,
336 | varianceEncodedInTarget=0,
337 | backgroundLabelId=0,
338 | confidenceThreshold=1e-8,
339 | nmsThreshold=cls.NMS_THRESH,
340 | topK=100,
341 | keepTopK=100,
342 | numClasses=91,
343 | inputOrder=[0, 2, 1],
344 | confSigmoid=1,
345 | isNormalized=1
346 | )
347 |
348 | concat_priorbox = gs.create_node(
349 | "concat_priorbox",
350 | op="ConcatV2",
351 | dtype=tf.float32,
352 | axis=2
353 | )
354 |
355 | concat_box_loc = gs.create_plugin_node(
356 | "concat_box_loc",
357 | op="FlattenConcat_TRT",
358 | dtype=tf.float32,
359 | axis=1,
360 | ignoreBatch=0
361 | )
362 |
363 | concat_box_conf = gs.create_plugin_node(
364 | "concat_box_conf",
365 | op="FlattenConcat_TRT",
366 | dtype=tf.float32,
367 | axis=1,
368 | ignoreBatch=0
369 | )
370 |
371 | # Create a mapping of namespace names -> plugin nodes.
372 | namespace_plugin_map = {
373 | "MultipleGridAnchorGenerator": PriorBox,
374 | "Postprocessor": NMS,
375 | "Preprocessor": Input,
376 | "ToFloat": Input,
377 | "image_tensor": Input,
378 | "MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
379 | "concat": concat_box_loc,
380 | "concat_1": concat_box_conf
381 | }
382 |
383 | # Create a new graph by collapsing namespaces
384 | graph.collapse_namespaces(namespace_plugin_map)
385 | # Remove the outputs, so we just have a single output node (NMS).
386 | # If remove_exclusive_dependencies is True, the whole graph will be removed!
387 | graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
388 | return graph
389 |
--------------------------------------------------------------------------------
/fastmot/models/yolo.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import logging
3 | import numpy as np
4 | import tensorrt as trt
5 |
6 |
7 | EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
8 | LOGGER = logging.getLogger(__name__)
9 |
10 |
11 | class YOLO:
12 | """Base class for YOLO models.
13 |
14 | Attributes
15 | ----------
16 | PLUGIN_PATH : Path, optional
17 | Path to TensorRT plugin.
18 | ENGINE_PATH : Path
19 | Path to TensorRT engine.
20 | If not found, TensorRT engine will be converted from the ONNX model
21 | at runtime and cached for later use.
22 | MODEL_PATH : Path
23 | Path to ONNX model.
24 | NUM_CLASSES : int
25 | Total number of trained classes.
26 | LETTERBOX : bool
27 | Keep aspect ratio when resizing.
28 | NEW_COORDS : bool
29 | new_coords Darknet parameter for each yolo layer.
30 | INPUT_SHAPE : tuple
31 | Input size in the format `(channel, height, width)`.
32 | LAYER_FACTORS : List[int]
33 | Scale factors with respect to the input size for each yolo layer.
34 | SCALES : List[float]
35 | scale_x_y Darknet parameter for each yolo layer.
36 | ANCHORS : List[List[int]]
37 | Anchors grouped by each yolo layer.
38 | """
39 | __registry = {}
40 |
41 | PLUGIN_PATH = Path(__file__).parents[1] / 'plugins' / 'libyolo_layer.so'
42 | ENGINE_PATH = None
43 | MODEL_PATH = None
44 | NUM_CLASSES = None
45 | LETTERBOX = False
46 | NEW_COORDS = False
47 | INPUT_SHAPE = None
48 | LAYER_FACTORS = None
49 | SCALES = None
50 | ANCHORS = None
51 |
52 | def __init_subclass__(cls, **kwargs):
53 | super().__init_subclass__(**kwargs)
54 | cls.__registry[cls.__name__] = cls
55 |
56 | @classmethod
57 | def get_model(cls, name):
58 | return cls.__registry[name]
59 |
60 | @classmethod
61 | def add_plugin(cls, network):
62 | def get_plugin_creator(plugin_name):
63 | plugin_creators = trt.get_plugin_registry().plugin_creator_list
64 | for plugin_creator in plugin_creators:
65 | if plugin_creator.name == plugin_name:
66 | return plugin_creator
67 | return None
68 |
69 | assert len(cls.LAYER_FACTORS) == network.num_outputs
70 | assert len(cls.SCALES) == network.num_outputs
71 | assert len(cls.ANCHORS) == network.num_outputs
72 | assert all(s >= 1.0 for s in cls.SCALES)
73 |
74 | plugin_creator = get_plugin_creator('YoloLayer_TRT')
75 | if not plugin_creator:
76 | raise RuntimeError('Failed to get YoloLayer_TRT plugin creator')
77 |
78 | old_tensors = [network.get_output(i) for i in range(network.num_outputs)]
79 | new_tensors = []
80 | for i, old_tensor in enumerate(old_tensors):
81 | yolo_width = cls.INPUT_SHAPE[2] // cls.LAYER_FACTORS[i]
82 | yolo_height = cls.INPUT_SHAPE[1] // cls.LAYER_FACTORS[i]
83 | num_anchors = len(cls.ANCHORS[i]) // 2
84 | plugin = network.add_plugin_v2(
85 | [old_tensor],
86 | plugin_creator.create_plugin('YoloLayer_TRT', trt.PluginFieldCollection([
87 | trt.PluginField("yoloWidth", np.array(yolo_width, dtype=np.int32), trt.PluginFieldType.INT32),
88 | trt.PluginField("yoloHeight", np.array(yolo_height, dtype=np.int32), trt.PluginFieldType.INT32),
89 | trt.PluginField("inputMultiplier", np.array(cls.LAYER_FACTORS[i], dtype=np.int32), trt.PluginFieldType.INT32),
90 | trt.PluginField("newCoords", np.array(cls.NEW_COORDS, dtype=np.int32), trt.PluginFieldType.INT32),
91 | trt.PluginField("numClasses", np.array(cls.NUM_CLASSES, dtype=np.int32), trt.PluginFieldType.INT32),
92 | trt.PluginField("numAnchors", np.array(num_anchors, dtype=np.int32), trt.PluginFieldType.INT32),
93 | trt.PluginField("anchors", np.array(cls.ANCHORS[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
94 | trt.PluginField("scaleXY", np.array(cls.SCALES[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
95 | ]))
96 | )
97 | new_tensors.append(plugin.get_output(0))
98 |
99 | for new_tensor in new_tensors:
100 | network.mark_output(new_tensor)
101 | for old_tensor in old_tensors:
102 | network.unmark_output(old_tensor)
103 | return network
104 |
105 | @classmethod
106 | def build_engine(cls, trt_logger, batch_size):
107 | with trt.Builder(trt_logger) as builder, builder.create_network(EXPLICIT_BATCH) as network, \
108 | trt.OnnxParser(network, trt_logger) as parser:
109 |
110 | builder.max_batch_size = batch_size
111 | LOGGER.info('Building engine with batch size: %d', batch_size)
112 | LOGGER.info('This may take a while...')
113 |
114 | # parse model file
115 | with open(cls.MODEL_PATH, 'rb') as model_file:
116 | if not parser.parse(model_file.read()):
117 | LOGGER.critical('Failed to parse the ONNX file')
118 | for err in range(parser.num_errors):
119 | LOGGER.error(parser.get_error(err))
120 | return None
121 |
122 | # yolo*.onnx is generated with batch size 64
123 | # reshape input to the right batch size
124 | net_input = network.get_input(0)
125 | assert cls.INPUT_SHAPE == net_input.shape[1:]
126 | net_input.shape = (batch_size, *cls.INPUT_SHAPE)
127 |
128 | config = builder.create_builder_config()
129 | config.max_workspace_size = 1 << 30
130 | if builder.platform_has_fast_fp16:
131 | config.set_flag(trt.BuilderFlag.FP16)
132 |
133 | profile = builder.create_optimization_profile()
134 | profile.set_shape(
135 | net_input.name, # input tensor name
136 | (batch_size, *cls.INPUT_SHAPE), # min shape
137 | (batch_size, *cls.INPUT_SHAPE), # opt shape
138 | (batch_size, *cls.INPUT_SHAPE) # max shape
139 | )
140 | config.add_optimization_profile(profile)
141 |
142 | network = cls.add_plugin(network)
143 | engine = builder.build_engine(network, config)
144 | if engine is None:
145 | LOGGER.critical('Failed to build engine')
146 | return None
147 |
148 | LOGGER.info("Completed creating engine")
149 | with open(cls.ENGINE_PATH, 'wb') as engine_file:
150 | engine_file.write(engine.serialize())
151 | return engine
152 |
153 |
154 | class YOLOv4(YOLO):
155 | ENGINE_PATH = Path(__file__).parent / 'yolov4_crowdhuman.trt'
156 | MODEL_PATH = Path(__file__).parent / 'yolov4_crowdhuman.onnx'
157 | NUM_CLASSES = 2
158 | INPUT_SHAPE = (3, 512, 512)
159 | LAYER_FACTORS = [8, 16, 32]
160 | SCALES = [1.2, 1.1, 1.05]
161 | ANCHORS = [[11,22, 24,60, 37,116],
162 | [54,186, 69,268, 89,369],
163 | [126,491, 194,314, 278,520]]
164 |
165 |
166 | """
167 | The following models are supported but not provided.
168 | Modify paths, # classes, input shape, and anchors according to your Darknet cfg for custom model.
169 | """
170 |
171 | class YOLOv4CSP(YOLO):
172 | ENGINE_PATH = Path(__file__).parent / 'yolov4-csp.trt'
173 | MODEL_PATH = Path(__file__).parent / 'yolov4-csp.onnx'
174 | NUM_CLASSES = 1
175 | LETTERBOX = True
176 | NEW_COORDS = True
177 | INPUT_SHAPE = (3, 640, 640)
178 | LAYER_FACTORS = [8, 16, 32]
179 | SCALES = [2.0, 2.0, 2.0]
180 | ANCHORS = [[12,16, 19,36, 40,28],
181 | [36,75, 76,55, 72,146],
182 | [142,110, 192,243, 459,401]]
183 |
184 |
185 | class YOLOv4xMish(YOLO):
186 | ENGINE_PATH = Path(__file__).parent / 'yolov4x-mish.trt'
187 | MODEL_PATH = Path(__file__).parent / 'yolov4x-mish.onnx'
188 | NUM_CLASSES = 1
189 | LETTERBOX = True
190 | NEW_COORDS = True
191 | INPUT_SHAPE = (3, 640, 640)
192 | LAYER_FACTORS = [8, 16, 32]
193 | SCALES = [2.0, 2.0, 2.0]
194 | ANCHORS = [[12,16, 19,36, 40,28],
195 | [36,75, 76,55, 72,146],
196 | [142,110, 192,243, 459,401]]
197 |
198 |
199 | class YOLOv4CSPSwish(YOLO):
200 | ENGINE_PATH = Path(__file__).parent / 'yolov4-csp-swish.trt'
201 | MODEL_PATH = Path(__file__).parent / 'yolov4-csp-swish.onnx'
202 | NUM_CLASSES = 1
203 | LETTERBOX = True
204 | NEW_COORDS = True
205 | INPUT_SHAPE = (3, 640, 640)
206 | LAYER_FACTORS = [8, 16, 32]
207 | SCALES = [2.0, 2.0, 2.0]
208 | ANCHORS = [[12,16, 19,36, 40,28],
209 | [36,75, 76,55, 72,146],
210 | [142,110, 192,243, 459,401]]
211 |
212 |
213 | class YOLOv4CSPxSwish(YOLO):
214 | ENGINE_PATH = Path(__file__).parent / 'yolov4-csp-x-swish.trt'
215 | MODEL_PATH = Path(__file__).parent / 'yolov4-csp-x-swish.onnx'
216 | NUM_CLASSES = 1
217 | LETTERBOX = True
218 | NEW_COORDS = True
219 | INPUT_SHAPE = (3, 640, 640)
220 | LAYER_FACTORS = [8, 16, 32]
221 | SCALES = [2.0, 2.0, 2.0]
222 | ANCHORS = [[12,16, 19,36, 40,28],
223 | [36,75, 76,55, 72,146],
224 | [142,110, 192,243, 459,401]]
225 |
226 |
227 | class YOLOv4P5(YOLO):
228 | ENGINE_PATH = Path(__file__).parent / 'yolov4-p5.trt'
229 | MODEL_PATH = Path(__file__).parent / 'yolov4-p5.onnx'
230 | NUM_CLASSES = 1
231 | LETTERBOX = True
232 | NEW_COORDS = True
233 | INPUT_SHAPE = (3, 896, 896)
234 | LAYER_FACTORS = [8, 16, 32]
235 | SCALES = [2.0, 2.0, 2.0]
236 | ANCHORS = [[13,17, 31,25, 24,51, 61,45],
237 | [48,102, 119,96, 97,189, 217,184],
238 | [171,384, 324,451, 616,618, 800,800]]
239 |
240 |
241 | class YOLOv4P6(YOLO):
242 | ENGINE_PATH = Path(__file__).parent / 'yolov4-p6.trt'
243 | MODEL_PATH = Path(__file__).parent / 'yolov4-p6.onnx'
244 | NUM_CLASSES = 1
245 | LETTERBOX = True
246 | NEW_COORDS = True
247 | INPUT_SHAPE = (3, 1280, 1280)
248 | LAYER_FACTORS = [8, 16, 32, 64]
249 | SCALES = [2.0, 2.0, 2.0, 2.0]
250 | ANCHORS = [[13,17, 31,25, 24,51, 61,45],
251 | [61,45, 48,102, 119,96, 97,189],
252 | [97,189, 217,184, 171,384, 324,451],
253 | [324,451, 545,357, 616,618, 1024,1024]]
254 |
255 |
256 | class YOLOv4Tiny(YOLO):
257 | ENGINE_PATH = Path(__file__).parent / 'yolov4-tiny.trt'
258 | MODEL_PATH = Path(__file__).parent / 'yolov4-tiny.onnx'
259 | NUM_CLASSES = 1
260 | INPUT_SHAPE = (3, 416, 416)
261 | LAYER_FACTORS = [32, 16]
262 | SCALES = [1.05, 1.05]
263 | ANCHORS = [[81,82, 135,169, 344,319],
264 | [23,27, 37,58, 81,82]]
265 |
266 |
267 | class YOLOv3(YOLO):
268 | ENGINE_PATH = Path(__file__).parent / 'yolov3.trt'
269 | MODEL_PATH = Path(__file__).parent / 'yolov3.onnx'
270 | NUM_CLASSES = 1
271 | INPUT_SHAPE = (3, 416, 416)
272 | LAYER_FACTORS = [32, 16, 8]
273 | SCALES = [1., 1.]
274 | ANCHORS = [[116,90, 156,198, 373,326],
275 | [30,61, 62,45, 59,119],
276 | [10,13, 16,30, 33,23]]
277 |
278 |
279 | class YOLOv3SPP(YOLO):
280 | ENGINE_PATH = Path(__file__).parent / 'yolov3-spp.trt'
281 | MODEL_PATH = Path(__file__).parent / 'yolov3-spp.onnx'
282 | NUM_CLASSES = 1
283 | INPUT_SHAPE = (3, 608, 608)
284 | LAYER_FACTORS = [32, 16, 8]
285 | SCALES = [1., 1.]
286 | ANCHORS = [[116,90, 156,198, 373,326],
287 | [30,61, 62,45, 59,119],
288 | [10,13, 16,30, 33,23]]
289 |
290 |
291 | class YOLOv3Tiny(YOLO):
292 | ENGINE_PATH = Path(__file__).parent / 'yolov3-tiny.trt'
293 | MODEL_PATH = Path(__file__).parent / 'yolov3-tiny.onnx'
294 | NUM_CLASSES = 1
295 | INPUT_SHAPE = (3, 416, 416)
296 | LAYER_FACTORS = [32, 16]
297 | SCALES = [1., 1.]
298 | ANCHORS = [[81,82, 135,169, 344,319],
299 | [10,14, 23,27, 37,58]]
300 |
--------------------------------------------------------------------------------
/fastmot/mot.py:
--------------------------------------------------------------------------------
1 | from types import SimpleNamespace
2 | from enum import Enum
3 | import logging
4 | import numpy as np
5 | import numba as nb
6 | import cv2
7 |
8 | from .detector import SSDDetector, YOLODetector, PublicDetector
9 | from .feature_extractor import FeatureExtractor
10 | from .tracker import MultiTracker
11 | from .utils import Profiler
12 | from .utils.visualization import Visualizer
13 | from .utils.numba import bisect_right
14 |
15 |
16 | LOGGER = logging.getLogger(__name__)
17 |
18 |
19 | class DetectorType(Enum):
20 | SSD = 0
21 | YOLO = 1
22 | PUBLIC = 2
23 |
24 |
25 | class MOT:
26 | def __init__(self, size,
27 | detector_type='YOLO',
28 | detector_frame_skip=5,
29 | class_ids=(1,),
30 | ssd_detector_cfg=None,
31 | yolo_detector_cfg=None,
32 | public_detector_cfg=None,
33 | feature_extractor_cfgs=None,
34 | tracker_cfg=None,
35 | visualizer_cfg=None,
36 | draw=False):
37 | """Top level module that integrates detection, feature extraction,
38 | and tracking together.
39 |
40 | Parameters
41 | ----------
42 | size : tuple
43 | Width and height of each frame.
44 | detector_type : {'SSD', 'YOLO', 'public'}, optional
45 | Type of detector to use.
46 | detector_frame_skip : int, optional
47 | Number of frames to skip for the detector.
48 | class_ids : sequence, optional
49 | Class IDs to track. Note class ID starts at zero.
50 | ssd_detector_cfg : SimpleNamespace, optional
51 | SSD detector configuration.
52 | yolo_detector_cfg : SimpleNamespace, optional
53 | YOLO detector configuration.
54 | public_detector_cfg : SimpleNamespace, optional
55 | Public detector configuration.
56 | feature_extractor_cfgs : List[SimpleNamespace], optional
57 | Feature extractor configurations for all classes.
58 | Each configuration corresponds to the class at the same index in sorted `class_ids`.
59 | tracker_cfg : SimpleNamespace, optional
60 | Tracker configuration.
61 | visualizer_cfg : SimpleNamespace, optional
62 | Visualization configuration.
63 | draw : bool, optional
64 | Draw visualizations.
65 | """
66 | self.size = size
67 | self.detector_type = DetectorType[detector_type.upper()]
68 | assert detector_frame_skip >= 1
69 | self.detector_frame_skip = detector_frame_skip
70 | self.class_ids = tuple(np.unique(class_ids))
71 | self.draw = draw
72 |
73 | if ssd_detector_cfg is None:
74 | ssd_detector_cfg = SimpleNamespace()
75 | if yolo_detector_cfg is None:
76 | yolo_detector_cfg = SimpleNamespace()
77 | if public_detector_cfg is None:
78 | public_detector_cfg = SimpleNamespace()
79 | if feature_extractor_cfgs is None:
80 | feature_extractor_cfgs = (SimpleNamespace(),)
81 | if tracker_cfg is None:
82 | tracker_cfg = SimpleNamespace()
83 | if visualizer_cfg is None:
84 | visualizer_cfg = SimpleNamespace()
85 | if len(feature_extractor_cfgs) != len(class_ids):
86 | raise ValueError('Number of feature extractors must match length of class IDs')
87 |
88 | LOGGER.info('Loading detector model...')
89 | if self.detector_type == DetectorType.SSD:
90 | self.detector = SSDDetector(self.size, self.class_ids, **vars(ssd_detector_cfg))
91 | elif self.detector_type == DetectorType.YOLO:
92 | self.detector = YOLODetector(self.size, self.class_ids, **vars(yolo_detector_cfg))
93 | elif self.detector_type == DetectorType.PUBLIC:
94 | self.detector = PublicDetector(self.size, self.class_ids, self.detector_frame_skip,
95 | **vars(public_detector_cfg))
96 |
97 | LOGGER.info('Loading feature extractor models...')
98 | self.extractors = [FeatureExtractor(**vars(cfg)) for cfg in feature_extractor_cfgs]
99 | self.tracker = MultiTracker(self.size, self.extractors[0].metric, **vars(tracker_cfg))
100 | self.visualizer = Visualizer(**vars(visualizer_cfg))
101 | self.frame_count = 0
102 |
103 | def visible_tracks(self):
104 | """Retrieve visible tracks from the tracker
105 |
106 | Returns
107 | -------
108 | Iterator[Track]
109 | Confirmed and active tracks from the tracker.
110 | """
111 | return (track for track in self.tracker.tracks.values()
112 | if track.confirmed and track.active)
113 |
114 | def reset(self, cap_dt):
115 | """Resets multiple object tracker. Must be called before `step`.
116 |
117 | Parameters
118 | ----------
119 | cap_dt : float
120 | Time interval in seconds between each frame.
121 | """
122 | self.frame_count = 0
123 | self.tracker.reset(cap_dt)
124 |
125 | def step(self, frame):
126 | """Runs multiple object tracker on the next frame.
127 |
128 | Parameters
129 | ----------
130 | frame : ndarray
131 | The next frame.
132 | """
133 | detections = []
134 | if self.frame_count == 0:
135 | detections = self.detector(frame)
136 | self.tracker.init(frame, detections)
137 | elif self.frame_count % self.detector_frame_skip == 0:
138 | with Profiler('preproc'):
139 | self.detector.detect_async(frame)
140 |
141 | with Profiler('detect'):
142 | with Profiler('track'):
143 | self.tracker.compute_flow(frame)
144 | detections = self.detector.postprocess()
145 |
146 | with Profiler('extract'):
147 | cls_bboxes = self._split_bboxes_by_cls(detections.tlbr, detections.label,
148 | self.class_ids)
149 | for extractor, bboxes in zip(self.extractors, cls_bboxes):
150 | extractor.extract_async(frame, bboxes)
151 |
152 | with Profiler('track', aggregate=True):
153 | self.tracker.apply_kalman()
154 |
155 | embeddings = []
156 | for extractor in self.extractors:
157 | embeddings.append(extractor.postprocess())
158 | embeddings = np.concatenate(embeddings) if len(embeddings) > 1 else embeddings[0]
159 |
160 | with Profiler('assoc'):
161 | self.tracker.update(self.frame_count, detections, embeddings)
162 | else:
163 | with Profiler('track'):
164 | self.tracker.track(frame)
165 |
166 | if self.draw:
167 | self._draw(frame, detections)
168 | self.frame_count += 1
169 |
170 | @staticmethod
171 | def print_timing_info():
172 | LOGGER.debug('=================Timing Stats=================')
173 | LOGGER.debug(f"{'track time:':<37}{Profiler.get_avg_millis('track'):>6.3f} ms")
174 | LOGGER.debug(f"{'preprocess time:':<37}{Profiler.get_avg_millis('preproc'):>6.3f} ms")
175 | LOGGER.debug(f"{'detect/flow time:':<37}{Profiler.get_avg_millis('detect'):>6.3f} ms")
176 | LOGGER.debug(f"{'feature extract/kalman filter time:':<37}"
177 | f"{Profiler.get_avg_millis('extract'):>6.3f} ms")
178 | LOGGER.debug(f"{'association time:':<37}{Profiler.get_avg_millis('assoc'):>6.3f} ms")
179 |
180 | @staticmethod
181 | @nb.njit(cache=True)
182 | def _split_bboxes_by_cls(bboxes, labels, class_ids):
183 | cls_bboxes = []
184 | begin = 0
185 | for cls_id in class_ids:
186 | end = bisect_right(labels, cls_id, begin)
187 | cls_bboxes.append(bboxes[begin:end])
188 | begin = end
189 | return cls_bboxes
190 |
191 | def _draw(self, frame, detections):
192 | visible_tracks = list(self.visible_tracks())
193 | self.visualizer.render(frame, visible_tracks, detections, self.tracker.klt_bboxes.values(),
194 | self.tracker.flow.prev_bg_keypoints, self.tracker.flow.bg_keypoints)
195 | cv2.putText(frame, f'visible: {len(visible_tracks)}', (30, 30),
196 | cv2.FONT_HERSHEY_SIMPLEX, 1, 0, 2, cv2.LINE_AA)
197 |
--------------------------------------------------------------------------------
/fastmot/plugins/Makefile:
--------------------------------------------------------------------------------
1 | CC=g++
2 | LD=ld
3 | CXXFLAGS=-Wall -std=c++11 -g -O
4 |
5 | NVCC=nvcc
6 |
7 | # Space separated compute capabilities e.g. computes=70 75. If not present will automatically fetch device's compute
8 | computes=
9 |
10 | ifeq ($(computes), )
11 | computes=$(shell python get_compute.py || exit 1)
12 | $(info computes: $(computes))
13 | endif
14 |
15 | NVCCFLAGS=$(foreach compute, $(computes),-gencode arch=compute_$(compute),code=[sm_$(compute),compute_$(compute)])
16 | $(info NVCCFLAGS: $(NVCCFLAGS))
17 |
18 | # These are the directories where TensorRT is installed
19 | TENSORRT_INCS=-I"/usr/include/x86_64-linux-gnu"
20 | TENSORRT_LIBS=-L"/usr/lib/x86_64-linux-gnu"
21 |
22 | # INCS and LIBS
23 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin"
24 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group
25 |
26 | .PHONY: all clean
27 |
28 | all: libyolo_layer.so
29 |
30 | clean:
31 | rm -f *.so *.o
32 |
33 | libyolo_layer.so: yolo_layer.o
34 | $(CC) -shared -o $@ $< $(LIBS)
35 |
36 | yolo_layer.o: yolo_layer.cu yolo_layer.h
37 | $(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $<
38 |
--------------------------------------------------------------------------------
/fastmot/plugins/README.md:
--------------------------------------------------------------------------------
1 | YOLO layer plugins are adapted from [tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos). The original code is under [MIT License](https://github.com/jkjung-avt/tensorrt_demos/blob/master/LICENSE).
2 |
--------------------------------------------------------------------------------
/fastmot/plugins/get_compute.py:
--------------------------------------------------------------------------------
1 | """
2 | Outputs some information on CUDA-enabled devices on your computer,
3 | including current memory usage.
4 |
5 | It's a port of https://gist.github.com/f0k/0d6431e3faa60bffc788f8b4daa029b1
6 | from C to Python with ctypes, so it can run without compiling anything. Note
7 | that this is a direct translation with no attempt to make the code Pythonic.
8 | It's meant as a general demonstration on how to obtain CUDA device information
9 | from Python without resorting to nvidia-smi or a compiled Python extension.
10 | """
11 |
12 | import sys
13 | import ctypes
14 |
15 |
16 | CUDA_SUCCESS = 0
17 |
18 |
19 | def main():
20 | libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
21 | for libname in libnames:
22 | try:
23 | cuda = ctypes.CDLL(libname)
24 | except OSError:
25 | continue
26 | else:
27 | break
28 | else:
29 | return
30 |
31 | gpu_archs = set()
32 |
33 | n_gpus = ctypes.c_int()
34 | cc_major = ctypes.c_int()
35 | cc_minor = ctypes.c_int()
36 |
37 | result = ctypes.c_int()
38 | device = ctypes.c_int()
39 | error_str = ctypes.c_char_p()
40 |
41 | result = cuda.cuInit(0)
42 | if result != CUDA_SUCCESS:
43 | cuda.cuGetErrorString(result, ctypes.byref(error_str))
44 | print('cuInit failed with error code %d: %s' % (result, error_str.value.decode()))
45 | return 1
46 |
47 | result = cuda.cuDeviceGetCount(ctypes.byref(n_gpus))
48 | if result != CUDA_SUCCESS:
49 | cuda.cuGetErrorString(result, ctypes.byref(error_str))
50 | print('cuDeviceGetCount failed with error code %d: %s' % (result, error_str.value.decode()))
51 | return 1
52 |
53 | for i in range(n_gpus.value):
54 | if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) == CUDA_SUCCESS:
55 | gpu_archs.add(str(cc_major.value) + str(cc_minor.value))
56 | print(' '.join(gpu_archs))
57 |
58 | return 0
59 |
60 |
61 | if __name__ == '__main__':
62 | sys.exit(main())
63 |
--------------------------------------------------------------------------------
/fastmot/plugins/yolo_layer.cu:
--------------------------------------------------------------------------------
1 | #include "yolo_layer.h"
2 |
3 | using namespace Yolo;
4 |
5 | namespace
6 | {
7 | // Write values into buffer
8 | template
9 | void write(char*& buffer, const T& val)
10 | {
11 | *reinterpret_cast(buffer) = val;
12 | buffer += sizeof(T);
13 | }
14 |
15 | // Read values from buffer
16 | template
17 | void read(const char*& buffer, T& val)
18 | {
19 | val = *reinterpret_cast(buffer);
20 | buffer += sizeof(T);
21 | }
22 | } // namespace
23 |
24 | namespace nvinfer1
25 | {
26 | YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords)
27 | {
28 | mYoloWidth = yolo_width;
29 | mYoloHeight = yolo_height;
30 | mNumAnchors = num_anchors;
31 | memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float));
32 | mNumClasses = num_classes;
33 | mInputWidth = input_width;
34 | mInputHeight = input_height;
35 | mScaleXY = scale_x_y;
36 | mNewCoords = new_coords;
37 |
38 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
39 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
40 | }
41 |
42 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
43 | {
44 | const char *d = reinterpret_cast(data), *a = d;
45 | read(d, mThreadCount);
46 | read(d, mYoloWidth);
47 | read(d, mYoloHeight);
48 | read(d, mNumAnchors);
49 | memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float));
50 | d += MAX_ANCHORS * 2 * sizeof(float);
51 | read(d, mNumClasses);
52 | read(d, mInputWidth);
53 | read(d, mInputHeight);
54 | read(d, mScaleXY);
55 | read(d, mNewCoords);
56 |
57 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
58 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
59 |
60 | assert(d == a + length);
61 | }
62 |
63 | IPluginV2IOExt* YoloLayerPlugin::clone() const NOEXCEPT
64 | {
65 | YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY, mNewCoords);
66 | p->setPluginNamespace(mPluginNamespace);
67 | return p;
68 | }
69 |
70 | void YoloLayerPlugin::terminate() NOEXCEPT
71 | {
72 | CHECK(cudaFree(mAnchors));
73 | }
74 |
75 | size_t YoloLayerPlugin::getSerializationSize() const NOEXCEPT
76 | {
77 | return sizeof(mThreadCount) + \
78 | sizeof(mYoloWidth) + sizeof(mYoloHeight) + \
79 | sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \
80 | sizeof(mNumClasses) + \
81 | sizeof(mInputWidth) + sizeof(mInputHeight) + \
82 | sizeof(mScaleXY) + sizeof(mNewCoords);
83 | }
84 |
85 | void YoloLayerPlugin::serialize(void* buffer) const NOEXCEPT
86 | {
87 | char* d = static_cast(buffer), *a = d;
88 | write(d, mThreadCount);
89 | write(d, mYoloWidth);
90 | write(d, mYoloHeight);
91 | write(d, mNumAnchors);
92 | memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float));
93 | d += MAX_ANCHORS * 2 * sizeof(float);
94 | write(d, mNumClasses);
95 | write(d, mInputWidth);
96 | write(d, mInputHeight);
97 | write(d, mScaleXY);
98 | write(d, mNewCoords);
99 |
100 | assert(d == a + getSerializationSize());
101 | }
102 |
103 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) NOEXCEPT
104 | {
105 | assert(index == 0);
106 | assert(nbInputDims == 1);
107 | assert(inputs[0].d[0] == (mNumClasses + 5) * mNumAnchors);
108 | assert(inputs[0].d[1] == mYoloHeight);
109 | assert(inputs[0].d[2] == mYoloWidth);
110 | // output detection results to the channel dimension
111 | int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float);
112 | return Dims3(totalsize, 1, 1);
113 | }
114 |
115 | inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); }
116 |
117 | inline __device__ float scale_sigmoidGPU(float x, float s)
118 | {
119 | return s * sigmoidGPU(x) - (s - 1.0f) * 0.5f;
120 | }
121 |
122 | // CalDetection(): This kernel processes 1 yolo layer calculation. It
123 | // distributes calculations so that 1 GPU thread would be responsible
124 | // for each grid/anchor combination.
125 | // NOTE: The output (x, y, w, h) are between 0.0 and 1.0
126 | // (relative to orginal image width and height).
127 | __global__ void CalDetection(const float *input, float *output,
128 | int batch_size,
129 | int yolo_width, int yolo_height,
130 | int num_anchors, const float *anchors,
131 | int num_classes, int input_w, int input_h,
132 | float scale_x_y)
133 | {
134 | int idx = threadIdx.x + blockDim.x * blockIdx.x;
135 | Detection* det = ((Detection*) output) + idx;
136 | int total_grids = yolo_width * yolo_height;
137 | if (idx >= batch_size * total_grids * num_anchors) return;
138 |
139 | int info_len = 5 + num_classes;
140 | //int batch_idx = idx / (total_grids * num_anchors);
141 | int group_idx = idx / total_grids;
142 | int anchor_idx = group_idx % num_anchors;
143 | const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids);
144 |
145 | int class_id;
146 | float max_cls_logit = -CUDART_INF_F; // minus infinity
147 | for (int i = 5; i < info_len; ++i) {
148 | float l = *(cur_input + i * total_grids);
149 | if (l > max_cls_logit) {
150 | max_cls_logit = l;
151 | class_id = i - 5;
152 | }
153 | }
154 | float max_cls_prob = sigmoidGPU(max_cls_logit);
155 | float box_prob = sigmoidGPU(*(cur_input + 4 * total_grids));
156 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH)
157 | // return;
158 |
159 | int row = (idx % total_grids) / yolo_width;
160 | int col = (idx % total_grids) % yolo_width;
161 |
162 | det->bbox[0] = (col + scale_sigmoidGPU(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width; // [0, 1]
163 | det->bbox[1] = (row + scale_sigmoidGPU(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height; // [0, 1]
164 | det->bbox[2] = __expf(*(cur_input + 2 * total_grids)) * *(anchors + 2 * anchor_idx + 0) / input_w; // [0, 1]
165 | det->bbox[3] = __expf(*(cur_input + 3 * total_grids)) * *(anchors + 2 * anchor_idx + 1) / input_h; // [0, 1]
166 |
167 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left
168 | det->bbox[1] -= det->bbox[3] / 2;
169 |
170 | det->det_confidence = box_prob;
171 | det->class_id = class_id;
172 | det->class_confidence = max_cls_prob;
173 | }
174 |
175 | inline __device__ float scale(float x, float s)
176 | {
177 | return s * x - (s - 1.0f) * 0.5f;
178 | }
179 |
180 | inline __device__ float square(float x)
181 | {
182 | return x * x;
183 | }
184 |
185 | __global__ void CalDetection_NewCoords(const float *input, float *output,
186 | int batch_size,
187 | int yolo_width, int yolo_height,
188 | int num_anchors, const float *anchors,
189 | int num_classes, int input_w, int input_h,
190 | float scale_x_y)
191 | {
192 | int idx = threadIdx.x + blockDim.x * blockIdx.x;
193 | Detection* det = ((Detection*) output) + idx;
194 | int total_grids = yolo_width * yolo_height;
195 | if (idx >= batch_size * total_grids * num_anchors) return;
196 |
197 | int info_len = 5 + num_classes;
198 | //int batch_idx = idx / (total_grids * num_anchors);
199 | int group_idx = idx / total_grids;
200 | int anchor_idx = group_idx % num_anchors;
201 | const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids);
202 |
203 | int class_id;
204 | float max_cls_prob = -CUDART_INF_F; // minus infinity
205 | for (int i = 5; i < info_len; ++i) {
206 | float l = *(cur_input + i * total_grids);
207 | if (l > max_cls_prob) {
208 | max_cls_prob = l;
209 | class_id = i - 5;
210 | }
211 | }
212 | float box_prob = *(cur_input + 4 * total_grids);
213 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH)
214 | // return;
215 |
216 | int row = (idx % total_grids) / yolo_width;
217 | int col = (idx % total_grids) % yolo_width;
218 |
219 | det->bbox[0] = (col + scale(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width; // [0, 1]
220 | det->bbox[1] = (row + scale(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height; // [0, 1]
221 | det->bbox[2] = square(*(cur_input + 2 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 0) / input_w; // [0, 1]
222 | det->bbox[3] = square(*(cur_input + 3 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 1) / input_h; // [0, 1]
223 |
224 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left
225 | det->bbox[1] -= det->bbox[3] / 2;
226 |
227 | det->det_confidence = box_prob;
228 | det->class_id = class_id;
229 | det->class_confidence = max_cls_prob;
230 | }
231 |
232 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize)
233 | {
234 | int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight;
235 |
236 | //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection)));
237 |
238 | if (mNewCoords) {
239 | CalDetection_NewCoords<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
240 | (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
241 | } else {
242 | CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
243 | (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
244 | }
245 | }
246 |
247 | #if NV_TENSORRT_MAJOR == 8
248 | int32_t YoloLayerPlugin::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) NOEXCEPT
249 | #else
250 | int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
251 | #endif
252 | {
253 | forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize);
254 | return 0;
255 | }
256 |
257 | YoloPluginCreator::YoloPluginCreator()
258 | {
259 | mPluginAttributes.clear();
260 |
261 | mFC.nbFields = mPluginAttributes.size();
262 | mFC.fields = mPluginAttributes.data();
263 | }
264 |
265 | const char* YoloPluginCreator::getPluginName() const NOEXCEPT
266 | {
267 | return "YoloLayer_TRT";
268 | }
269 |
270 | const char* YoloPluginCreator::getPluginVersion() const NOEXCEPT
271 | {
272 | return "1";
273 | }
274 |
275 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() NOEXCEPT
276 | {
277 | return &mFC;
278 | }
279 |
280 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) NOEXCEPT
281 | {
282 | assert(!strcmp(name, getPluginName()));
283 | const PluginField* fields = fc->fields;
284 | int yolo_width, yolo_height, num_anchors = 0;
285 | float anchors[MAX_ANCHORS * 2];
286 | int num_classes, input_multiplier, new_coords = 0;
287 | float scale_x_y = 1.0;
288 |
289 | for (int i = 0; i < fc->nbFields; ++i)
290 | {
291 | const char* attrName = fields[i].name;
292 | if (!strcmp(attrName, "yoloWidth"))
293 | {
294 | assert(fields[i].type == PluginFieldType::kINT32);
295 | yolo_width = *(static_cast(fields[i].data));
296 | }
297 | else if (!strcmp(attrName, "yoloHeight"))
298 | {
299 | assert(fields[i].type == PluginFieldType::kINT32);
300 | yolo_height = *(static_cast(fields[i].data));
301 | }
302 | else if (!strcmp(attrName, "numAnchors"))
303 | {
304 | assert(fields[i].type == PluginFieldType::kINT32);
305 | num_anchors = *(static_cast(fields[i].data));
306 | }
307 | else if (!strcmp(attrName, "numClasses"))
308 | {
309 | assert(fields[i].type == PluginFieldType::kINT32);
310 | num_classes = *(static_cast(fields[i].data));
311 | }
312 | else if (!strcmp(attrName, "inputMultiplier"))
313 | {
314 | assert(fields[i].type == PluginFieldType::kINT32);
315 | input_multiplier = *(static_cast(fields[i].data));
316 | }
317 | else if (!strcmp(attrName, "anchors")){
318 | assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS);
319 | assert(fields[i].type == PluginFieldType::kFLOAT32);
320 | memcpy(anchors, static_cast(fields[i].data), num_anchors * 2 * sizeof(float));
321 | }
322 | else if (!strcmp(attrName, "scaleXY"))
323 | {
324 | assert(fields[i].type == PluginFieldType::kFLOAT32);
325 | scale_x_y = *(static_cast(fields[i].data));
326 | }
327 | else if (!strcmp(attrName, "newCoords"))
328 | {
329 | assert(fields[i].type == PluginFieldType::kINT32);
330 | new_coords = *(static_cast(fields[i].data));
331 | }
332 | else
333 | {
334 | std::cerr << "Unknown attribute: " << attrName << std::endl;
335 | assert(0);
336 | }
337 | }
338 | assert(yolo_width > 0 && yolo_height > 0);
339 | assert(anchors[0] > 0.0f && anchors[1] > 0.0f);
340 | assert(num_classes > 0);
341 | assert(input_multiplier == 128 || input_multiplier == 64 ||
342 | input_multiplier == 32 || input_multiplier == 16 ||
343 | input_multiplier == 8);
344 | assert(scale_x_y >= 1.0);
345 |
346 | YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, yolo_width * input_multiplier, yolo_height * input_multiplier, scale_x_y, new_coords);
347 | obj->setPluginNamespace(mNamespace.c_str());
348 | return obj;
349 | }
350 |
351 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) NOEXCEPT
352 | {
353 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
354 | obj->setPluginNamespace(mNamespace.c_str());
355 | return obj;
356 | }
357 |
358 | PluginFieldCollection YoloPluginCreator::mFC{};
359 | std::vector YoloPluginCreator::mPluginAttributes;
360 | } // namespace nvinfer1
361 |
--------------------------------------------------------------------------------
/fastmot/plugins/yolo_layer.h:
--------------------------------------------------------------------------------
1 | #ifndef _YOLO_LAYER_H
2 | #define _YOLO_LAYER_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "math_constants.h"
9 | #include "NvInfer.h"
10 |
11 | #define MAX_ANCHORS 6
12 |
13 | #if NV_TENSORRT_MAJOR >= 8
14 | #define NOEXCEPT noexcept
15 | #else
16 | #define NOEXCEPT
17 | #endif
18 |
19 | #define CHECK(status) \
20 | do { \
21 | auto ret = status; \
22 | if (ret != 0) { \
23 | std::cerr << "Cuda failure in file '" << __FILE__ \
24 | << "' line " << __LINE__ \
25 | << ": " << ret << std::endl; \
26 | abort(); \
27 | } \
28 | } while (0)
29 |
30 | namespace Yolo
31 | {
32 | static constexpr float IGNORE_THRESH = 0.01f;
33 |
34 | struct alignas(float) Detection {
35 | float bbox[4]; // x, y, w, h
36 | float det_confidence;
37 | float class_id;
38 | float class_confidence;
39 | };
40 | }
41 |
42 | namespace nvinfer1
43 | {
44 | class YoloLayerPlugin: public IPluginV2IOExt
45 | {
46 | public:
47 | YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords);
48 | YoloLayerPlugin(const void* data, size_t length);
49 |
50 | ~YoloLayerPlugin() override = default;
51 |
52 | IPluginV2IOExt* clone() const NOEXCEPT override;
53 |
54 | int initialize() NOEXCEPT override { return 0; }
55 |
56 | void terminate() NOEXCEPT override;
57 |
58 | void destroy() NOEXCEPT override { delete this; }
59 |
60 | size_t getSerializationSize() const NOEXCEPT override;
61 |
62 | void serialize(void* buffer) const NOEXCEPT override;
63 |
64 | int getNbOutputs() const NOEXCEPT override { return 1; }
65 |
66 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) NOEXCEPT override;
67 |
68 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override { return 0; }
69 |
70 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const NOEXCEPT override { return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; }
71 |
72 | const char* getPluginType() const NOEXCEPT override { return "YoloLayer_TRT"; }
73 |
74 | const char* getPluginVersion() const NOEXCEPT override { return "1"; }
75 |
76 | void setPluginNamespace(const char* pluginNamespace) NOEXCEPT override { mPluginNamespace = pluginNamespace; }
77 |
78 | const char* getPluginNamespace() const NOEXCEPT override { return mPluginNamespace; }
79 |
80 | DataType getOutputDataType(int index, const DataType* inputTypes, int nbInputs) const NOEXCEPT override { return DataType::kFLOAT; }
81 |
82 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override { return false; }
83 |
84 | bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override { return false; }
85 |
86 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override {}
87 |
88 | //using IPluginV2IOExt::configurePlugin;
89 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) NOEXCEPT override {}
90 |
91 | void detachFromContext() NOEXCEPT override {}
92 |
93 | #if NV_TENSORRT_MAJOR >= 8
94 | int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) NOEXCEPT override;
95 | #else
96 | int enqueue(int batchSize, const void* const * inputs, void** outputs, void* workspace, cudaStream_t stream) NOEXCEPT override;
97 | #endif
98 |
99 | private:
100 | void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1);
101 |
102 | int mThreadCount = 64;
103 | int mYoloWidth, mYoloHeight, mNumAnchors;
104 | float mAnchorsHost[MAX_ANCHORS * 2];
105 | float *mAnchors; // allocated on GPU
106 | int mNumClasses;
107 | int mInputWidth, mInputHeight;
108 | float mScaleXY;
109 | int mNewCoords = 0;
110 |
111 | const char* mPluginNamespace;
112 | };
113 |
114 | class YoloPluginCreator : public IPluginCreator
115 | {
116 | public:
117 | YoloPluginCreator();
118 |
119 | ~YoloPluginCreator() override = default;
120 |
121 | const char* getPluginName() const NOEXCEPT override;
122 |
123 | const char* getPluginVersion() const NOEXCEPT override;
124 |
125 | const PluginFieldCollection* getFieldNames() NOEXCEPT override;
126 |
127 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) NOEXCEPT override;
128 |
129 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) NOEXCEPT override;
130 |
131 | void setPluginNamespace(const char* libNamespace) NOEXCEPT override
132 | {
133 | mNamespace = libNamespace;
134 | }
135 |
136 | const char* getPluginNamespace() const NOEXCEPT override
137 | {
138 | return mNamespace.c_str();
139 | }
140 |
141 | private:
142 | static PluginFieldCollection mFC;
143 | static std::vector mPluginAttributes;
144 | std::string mNamespace;
145 | };
146 |
147 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
148 | };
149 |
150 | #endif
151 |
--------------------------------------------------------------------------------
/fastmot/track.py:
--------------------------------------------------------------------------------
1 | from collections import deque
2 | import numpy as np
3 | import numba as nb
4 |
5 | from .models import get_label_name
6 | from .utils.distance import cdist, cosine
7 | from .utils.numba import apply_along_axis, normalize_vec
8 | from .utils.rect import get_center
9 |
10 |
11 | class ClusterFeature:
12 | def __init__(self, num_clusters, metric):
13 | self.num_clusters = num_clusters
14 | self.metric = metric
15 | self.clusters = None
16 | self.cluster_sizes = None
17 | self._next_idx = 0
18 |
19 | def __len__(self):
20 | return self._next_idx
21 |
22 | def __call__(self):
23 | return self.clusters[:self._next_idx]
24 |
25 | def update(self, embedding):
26 | if self._next_idx < self.num_clusters:
27 | if self.clusters is None:
28 | self.clusters = np.empty((self.num_clusters, len(embedding)), embedding.dtype)
29 | self.cluster_sizes = np.zeros(self.num_clusters, int)
30 | self.clusters[self._next_idx] = embedding
31 | self.cluster_sizes[self._next_idx] += 1
32 | self._next_idx += 1
33 | else:
34 | nearest_idx = self._get_nearest_cluster(self.clusters, embedding)
35 | self.cluster_sizes[nearest_idx] += 1
36 | self._seq_kmeans(self.clusters, self.cluster_sizes, embedding, nearest_idx)
37 |
38 | def distance(self, embeddings):
39 | if self.clusters is None:
40 | return np.ones(len(embeddings))
41 | clusters = normalize_vec(self.clusters[:self._next_idx])
42 | return apply_along_axis(np.min, cdist(clusters, embeddings, self.metric), axis=0)
43 |
44 | def merge(self, features, other, other_features):
45 | if len(features) > len(other_features):
46 | for feature in other_features:
47 | if feature is not None:
48 | self.update(feature)
49 | else:
50 | for feature in features:
51 | if feature is not None:
52 | other.update(feature)
53 | self.clusters = other.clusters.copy()
54 | self.clusters_sizes = other.cluster_sizes.copy()
55 | self._next_idx = other._next_idx
56 |
57 | @staticmethod
58 | @nb.njit(fastmath=True, cache=True)
59 | def _get_nearest_cluster(clusters, embedding):
60 | return np.argmin(cosine(np.atleast_2d(embedding), clusters))
61 |
62 | @staticmethod
63 | @nb.njit(fastmath=True, cache=True)
64 | def _seq_kmeans(clusters, cluster_sizes, embedding, idx):
65 | div_size = 1. / cluster_sizes[idx]
66 | clusters[idx] += (embedding - clusters[idx]) * div_size
67 |
68 |
69 | class SmoothFeature:
70 | def __init__(self, learning_rate):
71 | self.lr = learning_rate
72 | self.smooth = None
73 |
74 | def __call__(self):
75 | return self.smooth
76 |
77 | def update(self, embedding):
78 | if self.smooth is None:
79 | self.smooth = embedding.copy()
80 | else:
81 | self._rolling(self.smooth, embedding, self.lr)
82 |
83 | @staticmethod
84 | @nb.njit(fastmath=True, cache=True)
85 | def _rolling(smooth, embedding, lr):
86 | smooth[:] = (1. - lr) * smooth + lr * embedding
87 | norm_factor = 1. / np.linalg.norm(smooth)
88 | smooth *= norm_factor
89 |
90 |
91 | class AverageFeature:
92 | def __init__(self):
93 | self.sum = None
94 | self.avg = None
95 | self.count = 0
96 |
97 | def __call__(self):
98 | return self.avg
99 |
100 | def is_valid(self):
101 | return self.count > 0
102 |
103 | def update(self, embedding):
104 | self.count += 1
105 | if self.sum is None:
106 | self.sum = embedding.copy()
107 | self.avg = embedding.copy()
108 | else:
109 | self._average(self.sum, self.avg, embedding, self.count)
110 |
111 | def merge(self, other):
112 | self.count += other.count
113 | if self.sum is None:
114 | self.sum = other.sum
115 | self.avg = other.avg
116 | elif other.sum is not None:
117 | self._average(self.sum, self.avg, other.sum, self.count)
118 |
119 | @staticmethod
120 | @nb.njit(fastmath=True, cache=True)
121 | def _average(sum, avg, vec, count):
122 | sum += vec
123 | div_cnt = 1. / count
124 | avg[:] = sum * div_cnt
125 | norm_factor = 1. / np.linalg.norm(avg)
126 | avg *= norm_factor
127 |
128 |
129 | class Track:
130 | _count = 0
131 |
132 | def __init__(self, frame_id, tlbr, state, label, confirm_hits=1, buffer_size=30):
133 | self.trk_id = self.next_id()
134 | self.start_frame = frame_id
135 | self.frame_ids = deque([frame_id], maxlen=buffer_size)
136 | self.bboxes = deque([tlbr], maxlen=buffer_size)
137 | self.confirm_hits = confirm_hits
138 | self.state = state
139 | self.label = label
140 |
141 | self.age = 0
142 | self.hits = 0
143 | self.avg_feat = AverageFeature()
144 | self.last_feat = None
145 |
146 | self.inlier_ratio = 1.
147 | self.keypoints = np.empty((0, 2), np.float32)
148 | self.prev_keypoints = np.empty((0, 2), np.float32)
149 |
150 | def __str__(self):
151 | x, y = get_center(self.tlbr)
152 | return f'{get_label_name(self.label):<10} {self.trk_id:>3} at ({int(x):>4}, {int(y):>4})'
153 |
154 | def __repr__(self):
155 | return self.__str__()
156 |
157 | def __len__(self):
158 | return self.end_frame - self.start_frame
159 |
160 | def __lt__(self, other):
161 | # ordered by approximate distance to the image plane, closer is greater
162 | return (self.tlbr[-1], -self.age) < (other.tlbr[-1], -other.age)
163 |
164 | @property
165 | def tlbr(self):
166 | return self.bboxes[-1]
167 |
168 | @property
169 | def end_frame(self):
170 | return self.frame_ids[-1]
171 |
172 | @property
173 | def active(self):
174 | return self.age < 2
175 |
176 | @property
177 | def confirmed(self):
178 | return self.hits >= self.confirm_hits
179 |
180 | def update(self, tlbr, state):
181 | self.bboxes.append(tlbr)
182 | self.state = state
183 |
184 | def add_detection(self, frame_id, tlbr, state, embedding, is_valid=True):
185 | self.frame_ids.append(frame_id)
186 | self.bboxes.append(tlbr)
187 | self.state = state
188 | if is_valid:
189 | self.last_feat = embedding
190 | self.avg_feat.update(embedding)
191 | self.age = 0
192 | self.hits += 1
193 |
194 | def reinstate(self, frame_id, tlbr, state, embedding):
195 | self.start_frame = frame_id
196 | self.frame_ids.append(frame_id)
197 | self.bboxes.append(tlbr)
198 | self.state = state
199 | self.last_feat = embedding
200 | self.avg_feat.update(embedding)
201 | self.age = 0
202 | self.keypoints = np.empty((0, 2), np.float32)
203 | self.prev_keypoints = np.empty((0, 2), np.float32)
204 |
205 | def mark_missed(self):
206 | self.age += 1
207 |
208 | def merge_continuation(self, other):
209 | self.frame_ids.extend(other.frame_ids)
210 | self.bboxes.extend(other.bboxes)
211 | self.state = other.state
212 | self.age = other.age
213 | self.hits += other.hits
214 |
215 | self.keypoints = other.keypoints
216 | self.prev_keypoints = other.prev_keypoints
217 |
218 | if other.last_feat is not None:
219 | self.last_feat = other.last_feat
220 | self.avg_feat.merge(other.avg_feat)
221 |
222 | @staticmethod
223 | def next_id():
224 | Track._count += 1
225 | return Track._count
226 |
--------------------------------------------------------------------------------
/fastmot/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference import TRTInference
2 | from .decoder import ConfigDecoder
3 | from .profiler import Profiler
--------------------------------------------------------------------------------
/fastmot/utils/decoder.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | class ConfigDecoder(json.JSONDecoder):
5 | def __init__(self, **kwargs):
6 | json.JSONDecoder.__init__(self, **kwargs)
7 | # Use the custom JSONArray
8 | self.parse_array = self.JSONArray
9 | # Use the python implemenation of the scanner
10 | self.scan_once = json.scanner.py_make_scanner(self)
11 |
12 | def JSONArray(self, s_and_end, scan_once, **kwargs):
13 | values, end = json.decoder.JSONArray(s_and_end, scan_once, **kwargs)
14 | return tuple(values), end
15 |
--------------------------------------------------------------------------------
/fastmot/utils/distance.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | import numpy as np
3 | import numba as nb
4 |
5 | from .rect import area, get_center
6 |
7 |
8 | INF_DIST = 1e5
9 |
10 |
11 | class Metric(Enum):
12 | EUCLIDEAN = 0
13 | COSINE = 1
14 |
15 |
16 | @nb.njit(parallel=True, fastmath=True, cache=True)
17 | def cdist(XA, XB, metric, empty_mask=None, fill_val=None):
18 | """Numba implementation of Scipy's cdist"""
19 | assert XA.ndim == XB.ndim == 2
20 | assert XA.shape[1] == XB.shape[1]
21 | if empty_mask is not None:
22 | assert empty_mask.ndim == 2
23 | assert empty_mask.shape[0] == XA.shape[0]
24 | assert empty_mask.shape[1] == XB.shape[0]
25 | filler = 1. if fill_val is None else fill_val
26 |
27 | if metric == Metric.EUCLIDEAN:
28 | return euclidean(XA, XB, empty_mask, filler)
29 | elif metric == Metric.COSINE:
30 | return cosine(XA, XB, empty_mask, filler)
31 | else:
32 | raise ValueError('Unsupported distance metric')
33 |
34 |
35 | @nb.njit(parallel=True, fastmath=True, cache=True)
36 | def pdist(X, metric):
37 | """Numba implementation of Scipy's pdist"""
38 | assert X.ndim == 2
39 |
40 | if metric == Metric.EUCLIDEAN:
41 | return euclidean(X, X, symmetric=True)
42 | elif metric == Metric.COSINE:
43 | return cosine(X, X, symmetric=True)
44 | else:
45 | raise ValueError('Unsupported distance metric')
46 |
47 |
48 | @nb.njit(parallel=True, fastmath=True, cache=True, inline='always')
49 | def euclidean(XA, XB, empty_mask=None, filler=1., symmetric=False):
50 | """Numba implementation of Scipy's euclidean"""
51 | Y = np.empty((XA.shape[0], XB.shape[0]))
52 | for i in nb.prange(XA.shape[0]):
53 | for j in range(XB.shape[0]):
54 | if symmetric and i >= j:
55 | Y[i, j] = INF_DIST
56 | elif empty_mask is not None and empty_mask[i, j]:
57 | Y[i, j] = filler
58 | else:
59 | norm = 0.
60 | for k in range(XA.shape[1]):
61 | norm += (XA[i, k] - XB[j, k])**2
62 | Y[i, j] = np.sqrt(norm)
63 | return Y
64 |
65 |
66 | @nb.njit(parallel=True, fastmath=True, cache=True, inline='always')
67 | def cosine(XA, XB, empty_mask=None, filler=1., symmetric=False):
68 | """Numba implementation of Scipy's cosine"""
69 | Y = np.empty((XA.shape[0], XB.shape[0]))
70 | for i in nb.prange(XA.shape[0]):
71 | for j in range(XB.shape[0]):
72 | if symmetric and i >= j:
73 | Y[i, j] = INF_DIST
74 | elif empty_mask is not None and empty_mask[i, j]:
75 | Y[i, j] = filler
76 | else:
77 | dot = 0.
78 | a_norm = 0.
79 | b_norm = 0.
80 | for k in range(XA.shape[1]):
81 | dot += XA[i, k] * XB[j, k]
82 | a_norm += XA[i, k] * XA[i, k]
83 | b_norm += XB[j, k] * XB[j, k]
84 | a_norm = np.sqrt(a_norm)
85 | b_norm = np.sqrt(b_norm)
86 | Y[i, j] = 1. - dot / (a_norm * b_norm)
87 | return Y
88 |
89 |
90 | @nb.njit(parallel=False, fastmath=True, cache=True)
91 | def iou_dist(tlbrs1, tlbrs2):
92 | """Computes pairwise IoU distance."""
93 | assert tlbrs1.ndim == tlbrs2.ndim == 2
94 | assert tlbrs1.shape[1] == tlbrs2.shape[1] == 4
95 |
96 | Y = np.empty((tlbrs1.shape[0], tlbrs2.shape[0]))
97 | for i in nb.prange(tlbrs1.shape[0]):
98 | area1 = area(tlbrs1[i, :])
99 | for j in range(tlbrs2.shape[0]):
100 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1
101 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1
102 | if iw > 0 and ih > 0:
103 | area_inter = iw * ih
104 | area_union = area1 + area(tlbrs2[j, :]) - area_inter
105 | Y[i, j] = 1. - area_inter / area_union
106 | else:
107 | Y[i, j] = 1.
108 | return Y
109 |
110 |
111 | @nb.njit(parallel=False, fastmath=True, cache=True)
112 | def giou_dist(tlbrs1, tlbrs2):
113 | """Computes pairwise GIoU distance."""
114 | assert tlbrs1.ndim == tlbrs2.ndim == 2
115 | assert tlbrs1.shape[1] == tlbrs2.shape[1] == 4
116 |
117 | Y = np.empty((tlbrs1.shape[0], tlbrs2.shape[0]))
118 | for i in nb.prange(tlbrs1.shape[0]):
119 | area1 = area(tlbrs1[i, :])
120 | for j in range(tlbrs2.shape[0]):
121 | iou = 0.
122 | area_union = area1 + area(tlbrs2[j, :])
123 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1
124 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1
125 | if iw > 0 and ih > 0:
126 | area_inter = iw * ih
127 | area_union -= area_inter
128 | iou = area_inter / area_union
129 | ew = max(tlbrs1[i, 2], tlbrs2[j, 2]) - min(tlbrs1[i, 0], tlbrs2[j, 0]) + 1
130 | eh = max(tlbrs1[i, 3], tlbrs2[j, 3]) - min(tlbrs1[i, 1], tlbrs2[j, 1]) + 1
131 | area_encls = ew * eh
132 | giou = iou - (area_encls - area_union) / area_encls
133 | Y[i, j] = (1. - giou) * 0.5
134 | return Y
135 |
136 |
137 | @nb.njit(parallel=True, fastmath=True, cache=True)
138 | def diou_dist(tlbrs1, tlbrs2):
139 | """Computes pairwise DIoU distance."""
140 | assert tlbrs1.ndim == tlbrs2.ndim == 2
141 | assert tlbrs1.shape[1] == tlbrs2.shape[1] == 4
142 |
143 | Y = np.empty((tlbrs1.shape[0], tlbrs2.shape[0]))
144 | for i in nb.prange(tlbrs1.shape[0]):
145 | area1 = area(tlbrs1[i, :])
146 | x1, y1 = get_center(tlbrs1[i, :])
147 | for j in range(tlbrs2.shape[0]):
148 | iou = 0.
149 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1
150 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1
151 | if iw > 0 and ih > 0:
152 | area_inter = iw * ih
153 | area_union = area1 + area(tlbrs2[j, :]) - area_inter
154 | iou = area_inter / area_union
155 | ew = max(tlbrs1[i, 2], tlbrs2[j, 2]) - min(tlbrs1[i, 0], tlbrs2[j, 0]) + 1
156 | eh = max(tlbrs1[i, 3], tlbrs2[j, 3]) - min(tlbrs1[i, 1], tlbrs2[j, 1]) + 1
157 | c = ew**2 + eh**2
158 | x2, y2 = get_center(tlbrs2[j, :])
159 | d = (x2 - x1)**2 + (y2 - y1)**2
160 | diou = iou - (d / c)**0.6
161 | Y[i, j] = (1. - diou) * 0.5
162 | return Y
163 |
--------------------------------------------------------------------------------
/fastmot/utils/inference.py:
--------------------------------------------------------------------------------
1 | import ctypes
2 | import cupy as cp
3 | import cupyx
4 | import tensorrt as trt
5 |
6 |
7 | class HostDeviceMem:
8 | def __init__(self, size, dtype):
9 | self.size = size
10 | self.dtype = dtype
11 | self.host = cupyx.empty_pinned(size, dtype)
12 | self.device = cp.empty(size, dtype)
13 |
14 | def __str__(self):
15 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
16 |
17 | def __repr__(self):
18 | return self.__str__()
19 |
20 | @property
21 | def nbytes(self):
22 | return self.host.nbytes
23 |
24 | @property
25 | def hostptr(self):
26 | return self.host.ctypes.data
27 |
28 | @property
29 | def devptr(self):
30 | return self.device.data.ptr
31 |
32 | def copy_htod_async(self, stream):
33 | self.device.data.copy_from_host_async(self.hostptr, self.nbytes, stream)
34 |
35 | def copy_dtoh_async(self, stream):
36 | self.device.data.copy_to_host_async(self.hostptr, self.nbytes, stream)
37 |
38 |
39 | class TRTInference:
40 | # initialize TensorRT
41 | TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
42 | trt.init_libnvinfer_plugins(TRT_LOGGER, '')
43 |
44 | def __init__(self, model, batch_size):
45 | self.model = model
46 | self.batch_size = batch_size
47 |
48 | # load plugin if the model requires one
49 | if self.model.PLUGIN_PATH is not None:
50 | try:
51 | ctypes.cdll.LoadLibrary(self.model.PLUGIN_PATH)
52 | except OSError as err:
53 | raise RuntimeError('Plugin not found') from err
54 |
55 | # load trt engine or build one if not found
56 | if not self.model.ENGINE_PATH.exists():
57 | self.engine = self.model.build_engine(TRTInference.TRT_LOGGER, self.batch_size)
58 | else:
59 | runtime = trt.Runtime(TRTInference.TRT_LOGGER)
60 | with open(self.model.ENGINE_PATH, 'rb') as engine_file:
61 | self.engine = runtime.deserialize_cuda_engine(engine_file.read())
62 | if self.engine is None:
63 | raise RuntimeError('Unable to load the engine file')
64 | if self.engine.has_implicit_batch_dimension:
65 | assert self.batch_size <= self.engine.max_batch_size
66 | self.context = self.engine.create_execution_context()
67 | self.stream = cp.cuda.Stream()
68 |
69 | # allocate buffers
70 | self.bindings = []
71 | self.outputs = []
72 | self.input = None
73 | for binding in self.engine:
74 | shape = self.engine.get_binding_shape(binding)
75 | size = trt.volume(shape)
76 | if self.engine.has_implicit_batch_dimension:
77 | size *= self.batch_size
78 | dtype = trt.nptype(self.engine.get_binding_dtype(binding))
79 | # allocate host and device buffers
80 | buffer = HostDeviceMem(size, dtype)
81 | # append the device buffer to device bindings
82 | self.bindings.append(buffer.devptr)
83 | if self.engine.binding_is_input(binding):
84 | if not self.engine.has_implicit_batch_dimension:
85 | assert self.batch_size == shape[0]
86 | # expect one input
87 | self.input = buffer
88 | else:
89 | self.outputs.append(buffer)
90 | assert self.input is not None
91 |
92 | # timing events
93 | self.start = cp.cuda.Event()
94 | self.end = cp.cuda.Event()
95 |
96 | def __del__(self):
97 | if hasattr(self, 'context'):
98 | self.context.__del__()
99 | if hasattr(self, 'engine'):
100 | self.engine.__del__()
101 |
102 | def infer(self):
103 | self.infer_async()
104 | return self.synchronize()
105 |
106 | def infer_async(self, from_device=False):
107 | self.start.record(self.stream)
108 | if not from_device:
109 | self.input.copy_htod_async(self.stream)
110 | if self.engine.has_implicit_batch_dimension:
111 | self.context.execute_async(batch_size=self.batch_size, bindings=self.bindings,
112 | stream_handle=self.stream.ptr)
113 | else:
114 | self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.ptr)
115 | for out in self.outputs:
116 | out.copy_dtoh_async(self.stream)
117 | self.end.record(self.stream)
118 |
119 | def synchronize(self):
120 | self.stream.synchronize()
121 | return [out.host for out in self.outputs]
122 |
123 | def get_infer_time(self):
124 | self.end.synchronize()
125 | return cp.cuda.get_elapsed_time(self.start, self.end)
126 |
--------------------------------------------------------------------------------
/fastmot/utils/matching.py:
--------------------------------------------------------------------------------
1 | from scipy.optimize import linear_sum_assignment
2 | import numpy as np
3 | import numba as nb
4 |
5 |
6 | CHI_SQ_INV_95 = 9.4877 # 0.95 quantile of chi-square distribution
7 | INF_COST = 1e5
8 |
9 |
10 | def linear_assignment(cost, row_ids, col_ids):
11 | """Solves the linear assignment problem.
12 |
13 | Parameters
14 | ----------
15 | cost : ndarray
16 | The cost matrix.
17 | row_ids : List[int]
18 | IDs that correspond to each row in the cost matrix.
19 | col_ids : List[int]
20 | IDs that correspond to each column in the cost matrix.
21 |
22 | Returns
23 | -------
24 | List[tuple], List[int], List[int]
25 | Matched row and column IDs, unmatched row IDs, and unmatched column IDs.
26 | """
27 | m_rows, m_cols = linear_sum_assignment(cost)
28 | row_ids = np.fromiter(row_ids, int, len(row_ids))
29 | col_ids = np.fromiter(col_ids, int, len(col_ids))
30 | return _get_assignment_matches(cost, row_ids, col_ids, m_rows, m_cols)
31 |
32 |
33 | def greedy_match(cost, row_ids, col_ids, max_cost):
34 | """Performs greedy matching until the cost exceeds `max_cost`.
35 |
36 | Parameters
37 | ----------
38 | cost : ndarray
39 | The cost matrix.
40 | row_ids : List[int]
41 | IDs that correspond to each row in the cost matrix.
42 | col_ids : List[int]
43 | IDs that correspond to each column in the cost matrix.
44 | max_cost : float
45 | Maximum cost allowed to match a row with a column.
46 |
47 | Returns
48 | -------
49 | List[tuple], List[int], List[int]
50 | Matched row and column IDs, unmatched row IDs, and unmatched column IDs.
51 | """
52 | row_ids = np.fromiter(row_ids, int, len(row_ids))
53 | col_ids = np.fromiter(col_ids, int, len(col_ids))
54 | return _greedy_match(cost, row_ids, col_ids, max_cost)
55 |
56 |
57 | @nb.njit(fastmath=True, cache=True)
58 | def _get_assignment_matches(cost, row_ids, col_ids, m_rows, m_cols):
59 | unmatched_rows = list(set(range(cost.shape[0])) - set(m_rows))
60 | unmatched_cols = list(set(range(cost.shape[1])) - set(m_cols))
61 | unmatched_row_ids = [row_ids[row] for row in unmatched_rows]
62 | unmatched_col_ids = [col_ids[col] for col in unmatched_cols]
63 | matches = []
64 | for row, col in zip(m_rows, m_cols):
65 | if cost[row, col] < INF_COST:
66 | matches.append((row_ids[row], col_ids[col]))
67 | else:
68 | unmatched_row_ids.append(row_ids[row])
69 | unmatched_col_ids.append(col_ids[col])
70 | return matches, unmatched_row_ids, unmatched_col_ids
71 |
72 |
73 | @nb.njit(fastmath=True, cache=True)
74 | def _greedy_match(cost, row_ids, col_ids, max_cost):
75 | indices_rows = np.arange(cost.shape[0])
76 | indices_cols = np.arange(cost.shape[1])
77 |
78 | matches = []
79 | while cost.shape[0] > 0 and cost.shape[1] > 0:
80 | idx = np.argmin(cost)
81 | i, j = idx // cost.shape[1], idx % cost.shape[1]
82 | if cost[i, j] <= max_cost:
83 | matches.append((row_ids[indices_rows[i]], col_ids[indices_cols[j]]))
84 | row_mask = np.ones(cost.shape[0], np.bool_)
85 | col_mask = np.ones(cost.shape[1], np.bool_)
86 | row_mask[i] = False
87 | col_mask[j] = False
88 |
89 | indices_rows = indices_rows[row_mask]
90 | indices_cols = indices_cols[col_mask]
91 | cost = cost[row_mask, :][:, col_mask]
92 | else:
93 | break
94 |
95 | unmatched_row_ids = [row_ids[row] for row in indices_rows]
96 | unmatched_col_ids = [col_ids[col] for col in indices_cols]
97 | return matches, unmatched_row_ids, unmatched_col_ids
98 |
99 |
100 | @nb.njit(fastmath=True, cache=True)
101 | def fuse_motion(cost, m_dist, m_weight):
102 | """Fuse each row of cost matrix with motion information."""
103 | norm_factor = 1. / CHI_SQ_INV_95
104 | f_weight = 1. - m_weight
105 | cost[:] = f_weight * cost + m_weight * norm_factor * m_dist
106 | cost[m_dist > CHI_SQ_INV_95] = INF_COST
107 |
108 |
109 | @nb.njit(parallel=False, fastmath=True, cache=True)
110 | def gate_cost(cost, row_labels, col_labels, max_cost=None):
111 | """Gate cost matrix if cost exceeds the maximum."""
112 | for i in nb.prange(cost.shape[0]):
113 | for j in range(cost.shape[1]):
114 | if (row_labels[i] != col_labels[j] or
115 | max_cost is not None and cost[i, j] > max_cost):
116 | cost[i, j] = INF_COST
117 |
--------------------------------------------------------------------------------
/fastmot/utils/numba.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numba as nb
3 |
4 |
5 | @nb.njit(fastmath=True, cache=True)
6 | def apply_along_axis(func1d, mat, axis):
7 | """Numba utility to apply reduction to a given axis."""
8 | assert mat.ndim == 2
9 | assert axis in [0, 1]
10 | if axis == 0:
11 | result = np.empty(mat.shape[1], mat.dtype)
12 | for i in range(len(result)):
13 | result[i, :] = func1d(mat[:, i])
14 | else:
15 | result = np.empty(mat.shape[0], mat.dtype)
16 | for i in range(len(result)):
17 | result[i, :] = func1d(mat[i, :])
18 | return result
19 |
20 |
21 | @nb.njit(parallel=True, fastmath=True, cache=True)
22 | def normalize_vec(vectors):
23 | """Numba utility to normalize an array of vectors."""
24 | assert vectors.ndim == 2
25 | out = np.empty_like(vectors)
26 | for i in nb.prange(vectors.shape[0]):
27 | norm_factor = 1. / np.linalg.norm(vectors[i, :])
28 | out[i, :] = norm_factor * vectors[i, :]
29 | return out
30 |
31 |
32 | @nb.njit(fastmath=True, cache=True)
33 | def mask_area(mask):
34 | """Utility to calculate the area of a mask."""
35 | count = 0
36 | for val in mask.ravel():
37 | if val != 0:
38 | count += 1
39 | return count
40 |
41 |
42 | @nb.njit(fastmath=True, cache=True)
43 | def bisect_right(arr, val, left=0):
44 | """Utility to search a value in a sorted array."""
45 | right = len(arr)
46 | while left < right:
47 | mid = left + (right - left) // 2
48 | if arr[mid] >= val:
49 | left = mid + 1
50 | else:
51 | right = mid
52 | return left
53 |
54 |
55 | @nb.njit(fastmath=True, cache=True)
56 | def find_split_indices(arr):
57 | """Utility to find indices of unique elements in sorted array."""
58 | prev = arr[0]
59 | split_indices = []
60 | for i, val in enumerate(arr[1:], 1):
61 | if val != prev:
62 | split_indices.append(i)
63 | prev = val
64 | return np.array(split_indices)
65 |
66 |
67 | @nb.njit(fastmath=True, cache=True, inline='always')
68 | def transform(pts, m):
69 | """Numba implementation of OpenCV's transform."""
70 | pts = np.asarray(pts, dtype=np.float64)
71 | pts = np.atleast_2d(pts)
72 |
73 | augment = np.ones((len(pts), 1))
74 | pts = np.concatenate((pts, augment), axis=1)
75 | return pts @ m.T
76 |
77 |
78 | @nb.njit(fastmath=True, cache=True, inline='always')
79 | def perspective_transform(pts, m):
80 | """Numba implementation of OpenCV's perspectiveTransform."""
81 | pts = np.asarray(pts, dtype=np.float64)
82 | pts = np.atleast_2d(pts)
83 |
84 | augment = np.ones((len(pts), 1))
85 | pts = np.concatenate((pts, augment), axis=1).T
86 | pts = m @ pts
87 | pts = pts / pts[-1]
88 | return pts[:2].T
89 |
--------------------------------------------------------------------------------
/fastmot/utils/profiler.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import Counter
3 |
4 |
5 | class Profiler:
6 | __call_count = Counter()
7 | __time_elapsed = Counter()
8 |
9 | def __init__(self, name, aggregate=False):
10 | self.name = name
11 | if not aggregate:
12 | Profiler.__call_count[self.name] += 1
13 |
14 | def __enter__(self):
15 | self.start = time.perf_counter()
16 | return self
17 |
18 | def __exit__(self, type, value, traceback):
19 | self.end = time.perf_counter()
20 | self.duration = self.end - self.start
21 | Profiler.__time_elapsed[self.name] += self.duration
22 |
23 | @classmethod
24 | def reset(cls):
25 | cls.__call_count.clear()
26 | cls.__time_elapsed.clear()
27 |
28 | @classmethod
29 | def get_avg_millis(cls, name):
30 | call_count = cls.__call_count[name]
31 | if call_count == 0:
32 | return 0.
33 | return cls.__time_elapsed[name] * 1000 / call_count
34 |
--------------------------------------------------------------------------------
/fastmot/utils/rect.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numba as nb
3 |
4 |
5 | @nb.njit(cache=True, inline='always')
6 | def as_tlbr(tlbr):
7 | """Construct a rectangle from a tuple or np.ndarray."""
8 | _tlbr = np.empty(4)
9 | _tlbr[0] = round(float(tlbr[0]), 0)
10 | _tlbr[1] = round(float(tlbr[1]), 0)
11 | _tlbr[2] = round(float(tlbr[2]), 0)
12 | _tlbr[3] = round(float(tlbr[3]), 0)
13 | return _tlbr
14 |
15 |
16 | @nb.njit(cache=True, inline='always')
17 | def get_size(tlbr):
18 | return tlbr[2] - tlbr[0] + 1, tlbr[3] - tlbr[1] + 1
19 |
20 |
21 | @nb.njit(cache=True, inline='always')
22 | def aspect_ratio(tlbr):
23 | w, h = get_size(tlbr)
24 | return h / w if w > 0 else 0.
25 |
26 |
27 | @nb.njit(cache=True, inline='always')
28 | def area(tlbr):
29 | w, h = get_size(tlbr)
30 | if w <= 0 or h <= 0:
31 | return 0.
32 | return w * h
33 |
34 |
35 | @nb.njit(cache=True, inline='always')
36 | def get_center(tlbr):
37 | return (tlbr[0] + tlbr[2]) / 2, (tlbr[1] + tlbr[3]) / 2
38 |
39 |
40 | @nb.njit(cache=True, inline='always')
41 | def to_tlwh(tlbr):
42 | tlwh = np.empty(4)
43 | tlwh[:2] = tlbr[:2]
44 | tlwh[2:] = get_size(tlbr)
45 | return tlwh
46 |
47 |
48 | @nb.njit(cache=True, inline='always')
49 | def to_tlbr(tlwh):
50 | tlbr = np.empty(4)
51 | xmin = float(tlwh[0])
52 | ymin = float(tlwh[1])
53 | tlbr[0] = round(xmin, 0)
54 | tlbr[1] = round(ymin, 0)
55 | tlbr[2] = round(xmin + float(tlwh[2]) - 1., 0)
56 | tlbr[3] = round(ymin + float(tlwh[3]) - 1., 0)
57 | return tlbr
58 |
59 |
60 | @nb.njit(cache=True, inline='always')
61 | def intersection(tlbr1, tlbr2):
62 | tlbr = np.empty(4)
63 | tlbr[0] = max(tlbr1[0], tlbr2[0])
64 | tlbr[1] = max(tlbr1[1], tlbr2[1])
65 | tlbr[2] = min(tlbr1[2], tlbr2[2])
66 | tlbr[3] = min(tlbr1[3], tlbr2[3])
67 | if tlbr[2] < tlbr[0] or tlbr[3] < tlbr[1]:
68 | return None
69 | return tlbr
70 |
71 |
72 | @nb.njit(cache=True, inline='always')
73 | def enclosing(tlbr1, tlbr2):
74 | tlbr = np.empty(4)
75 | tlbr[0] = min(tlbr1[0], tlbr2[0])
76 | tlbr[1] = min(tlbr1[1], tlbr2[1])
77 | tlbr[2] = max(tlbr1[2], tlbr2[2])
78 | tlbr[3] = max(tlbr1[3], tlbr2[3])
79 | return tlbr
80 |
81 |
82 | @nb.njit(cache=True, inline='always')
83 | def crop(img, tlbr):
84 | assert tlbr is not None
85 | xmin = max(int(tlbr[0]), 0)
86 | ymin = max(int(tlbr[1]), 0)
87 | xmax = max(int(tlbr[2]), 0)
88 | ymax = max(int(tlbr[3]), 0)
89 | return img[ymin:ymax + 1, xmin:xmax + 1]
90 |
91 |
92 | @nb.njit(cache=True, inline='always')
93 | def multi_crop(img, tlbrs):
94 | _tlbrs = tlbrs.astype(np.int_)
95 | _tlbrs = np.maximum(_tlbrs, 0)
96 | return [img[_tlbrs[i, 1]:_tlbrs[i, 3] + 1, _tlbrs[i, 0]:_tlbrs[i, 2] + 1]
97 | for i in range(len(_tlbrs))]
98 |
99 |
100 | @nb.njit(fastmath=True, cache=True, inline='always')
101 | def ios(tlbr1, tlbr2):
102 | """Computes intersection over self."""
103 | iw = min(tlbr1[2], tlbr2[2]) - max(tlbr1[0], tlbr2[0]) + 1
104 | ih = min(tlbr1[3], tlbr2[3]) - max(tlbr1[1], tlbr2[1]) + 1
105 | if iw <= 0 or ih <= 0:
106 | return 0.
107 | area_inter = iw * ih
108 | area_self = area(tlbr1)
109 | return area_inter / area_self
110 |
111 |
112 | @nb.njit(fastmath=True, cache=True, inline='always')
113 | def iom(tlbr1, tlbr2):
114 | """Computes intersection over minimum."""
115 | iw = min(tlbr1[2], tlbr2[2]) - max(tlbr1[0], tlbr2[0]) + 1
116 | ih = min(tlbr1[3], tlbr2[3]) - max(tlbr1[1], tlbr2[1]) + 1
117 | if iw <= 0 or ih <= 0:
118 | return 0.
119 | area_inter = iw * ih
120 | area_min = min(area(tlbr1), area(tlbr2))
121 | return area_inter / area_min
122 |
123 |
124 | @nb.njit(parallel=False, fastmath=True, cache=True)
125 | def bbox_ious(tlbrs1, tlbrs2):
126 | """Computes pairwise bounding box overlaps using IoU."""
127 | ious = np.empty((tlbrs1.shape[0], tlbrs2.shape[0]))
128 | for i in nb.prange(tlbrs1.shape[0]):
129 | area1 = area(tlbrs1[i, :])
130 | for j in range(tlbrs2.shape[0]):
131 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1
132 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1
133 | if iw > 0 and ih > 0:
134 | area_inter = iw * ih
135 | area_union = area1 + area(tlbrs2[j, :]) - area_inter
136 | ious[i, j] = area_inter / area_union
137 | else:
138 | ious[i, j] = 0.
139 | return ious
140 |
141 |
142 | @nb.njit(parallel=False, fastmath=True, cache=True)
143 | def find_occluded(tlbrs, occlusion_thresh):
144 | """Computes a mask of occluded bounding boxes."""
145 | occluded_mask = np.zeros(tlbrs.shape[0], dtype=np.bool_)
146 | for i in nb.prange(tlbrs.shape[0]):
147 | area_self = area(tlbrs[i, :])
148 | for j in range(tlbrs.shape[0]):
149 | if i != j:
150 | iw = min(tlbrs[i, 2], tlbrs[j, 2]) - max(tlbrs[i, 0], tlbrs[j, 0]) + 1
151 | ih = min(tlbrs[i, 3], tlbrs[j, 3]) - max(tlbrs[i, 1], tlbrs[j, 1]) + 1
152 | if iw > 0 and ih > 0:
153 | ios = iw * ih / area_self
154 | if ios >= occlusion_thresh:
155 | occluded_mask[i] = True
156 | break
157 | return occluded_mask
158 |
159 |
160 | @nb.njit(fastmath=True, cache=True)
161 | def nms(tlwhs, scores, nms_thresh):
162 | """Applies Non-Maximum Suppression on the bounding boxes [x, y, w, h].
163 | Returns an array with the indexes of the bounding boxes we want to keep.
164 | """
165 | areas = tlwhs[:, 2] * tlwhs[:, 3]
166 | ordered = scores.argsort()[::-1]
167 |
168 | tls = tlwhs[:, :2]
169 | brs = tlwhs[:, :2] + tlwhs[:, 2:] - 1
170 |
171 | keep = []
172 | while ordered.size > 0:
173 | # index of the current element
174 | i = ordered[0]
175 | keep.append(i)
176 |
177 | other_tls = tls[ordered[1:]]
178 | other_brs = brs[ordered[1:]]
179 |
180 | # compute IoU
181 | inter_xmin = np.maximum(tls[i, 0], other_tls[:, 0])
182 | inter_ymin = np.maximum(tls[i, 1], other_tls[:, 1])
183 | inter_xmax = np.minimum(brs[i, 0], other_brs[:, 0])
184 | inter_ymax = np.minimum(brs[i, 1], other_brs[:, 1])
185 |
186 | inter_w = np.maximum(0, inter_xmax - inter_xmin + 1)
187 | inter_h = np.maximum(0, inter_ymax - inter_ymin + 1)
188 | inter_area = inter_w * inter_h
189 | union_area = areas[i] + areas[ordered[1:]] - inter_area
190 | iou = inter_area / union_area
191 |
192 | idx = np.where(iou <= nms_thresh)[0]
193 | ordered = ordered[idx + 1]
194 | keep = np.array(keep)
195 | return keep
196 |
197 |
198 | @nb.njit(fastmath=True, cache=True)
199 | def diou_nms(tlwhs, scores, nms_thresh, beta=0.6):
200 | """Applies Non-Maximum Suppression using the DIoU metric."""
201 | areas = tlwhs[:, 2] * tlwhs[:, 3]
202 | ordered = scores.argsort()[::-1]
203 |
204 | tls = tlwhs[:, :2]
205 | brs = tlwhs[:, :2] + tlwhs[:, 2:] - 1
206 | centers = (tls + brs) / 2
207 |
208 | keep = []
209 | while ordered.size > 0:
210 | # index of the current element
211 | i = ordered[0]
212 | keep.append(i)
213 |
214 | other_tls = tls[ordered[1:]]
215 | other_brs = brs[ordered[1:]]
216 |
217 | # compute IoU
218 | inter_xmin = np.maximum(tls[i, 0], other_tls[:, 0])
219 | inter_ymin = np.maximum(tls[i, 1], other_tls[:, 1])
220 | inter_xmax = np.minimum(brs[i, 0], other_brs[:, 0])
221 | inter_ymax = np.minimum(brs[i, 1], other_brs[:, 1])
222 |
223 | inter_w = np.maximum(0, inter_xmax - inter_xmin + 1)
224 | inter_h = np.maximum(0, inter_ymax - inter_ymin + 1)
225 | inter_area = inter_w * inter_h
226 | union_area = areas[i] + areas[ordered[1:]] - inter_area
227 | iou = inter_area / union_area
228 |
229 | # compute DIoU
230 | encls_xmin = np.minimum(tls[i, 0], other_tls[:, 0])
231 | encls_ymin = np.minimum(tls[i, 1], other_tls[:, 1])
232 | encls_xmax = np.maximum(brs[i, 0], other_brs[:, 0])
233 | encls_ymax = np.maximum(brs[i, 1], other_brs[:, 1])
234 |
235 | encls_w = encls_xmax - encls_xmin + 1
236 | encls_h = encls_ymax - encls_ymin + 1
237 | c = encls_w**2 + encls_h**2
238 | d = np.sum((centers[i] - centers[ordered[1:]])**2, axis=1)
239 | diou = iou - (d / c)**beta
240 |
241 | idx = np.where(diou <= nms_thresh)[0]
242 | ordered = ordered[idx + 1]
243 | keep = np.array(keep)
244 | return keep
245 |
--------------------------------------------------------------------------------
/fastmot/utils/visualization.py:
--------------------------------------------------------------------------------
1 | import colorsys
2 | import numpy as np
3 | import cv2
4 | from .rect import get_center
5 |
6 |
7 | GOLDEN_RATIO = 0.618033988749895
8 |
9 |
10 | def draw_tracks(frame, tracks, show_flow=False, show_cov=False,
11 | show_traj=False):
12 | for track in tracks:
13 | draw_bbox(frame, track.tlbr, get_color(track.trk_id), 2, str(track.trk_id))
14 | if show_traj:
15 | draw_trajectory(frame, track.bboxes, track.trk_id)
16 | if show_flow:
17 | draw_feature_match(frame, track.prev_keypoints, track.keypoints, (0, 255, 255))
18 | if show_cov:
19 | draw_covariance(frame, track.tlbr, track.state[1])
20 |
21 |
22 | def draw_detections(frame, detections, color=(255, 255, 255), show_conf=False):
23 | for det in detections:
24 | text = f'{det.label}: {det.conf:.2f}' if show_conf else None
25 | draw_bbox(frame, det.tlbr, color, 1, text)
26 |
27 |
28 | def draw_trajectory(frame, bboxes, trk_id):
29 | tlbrs = np.reshape(list(bboxes), (len(bboxes), 4))
30 | centers = tuple(map(lambda box: get_center(box), tlbrs[::4]))
31 | color = get_color(trk_id)
32 | pts = np.array(centers, dtype=np.int32)
33 | cv2.polylines(frame, [pts], False, color, thickness=1)
34 |
35 |
36 | def draw_klt_bboxes(frame, klt_bboxes, color=(0, 0, 0)):
37 | for tlbr in klt_bboxes:
38 | draw_bbox(frame, tlbr, color, 1)
39 |
40 |
41 | def draw_tiles(frame, tiles, scale_factor, color=(0, 0, 0)):
42 | for tile in tiles:
43 | tlbr = np.rint(tile * np.tile(scale_factor, 2))
44 | draw_bbox(frame, tlbr, color, 1)
45 |
46 |
47 | def draw_background_flow(frame, prev_bg_keypoints, bg_keypoints, color=(0, 0, 255)):
48 | draw_feature_match(frame, prev_bg_keypoints, bg_keypoints, color)
49 |
50 |
51 | def get_color(idx, s=0.8, vmin=0.7):
52 | h = np.fmod(idx * GOLDEN_RATIO, 1.)
53 | v = 1. - np.fmod(idx * GOLDEN_RATIO, 1. - vmin)
54 | r, g, b = colorsys.hsv_to_rgb(h, s, v)
55 | return int(255 * b), int(255 * g), int(255 * r)
56 |
57 |
58 | def draw_bbox(frame, tlbr, color, thickness, text=None):
59 | tlbr = tlbr.astype(int)
60 | tl, br = tuple(tlbr[:2]), tuple(tlbr[2:])
61 | cv2.rectangle(frame, tl, br, color, thickness)
62 | if text is not None:
63 | (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_DUPLEX, 0.5, 1)
64 | cv2.rectangle(frame, tl, (tl[0] + text_width - 1, tl[1] + text_height - 1),
65 | color, cv2.FILLED)
66 | cv2.putText(frame, text, (tl[0], tl[1] + text_height - 1), cv2.FONT_HERSHEY_DUPLEX,
67 | 0.5, 0, 1, cv2.LINE_AA)
68 |
69 |
70 | def draw_feature_match(frame, prev_pts, cur_pts, color):
71 | if len(cur_pts) > 0:
72 | cur_pts = np.rint(cur_pts).astype(np.int32)
73 | for pt in cur_pts:
74 | cv2.circle(frame, tuple(pt), 1, color, cv2.FILLED)
75 | if len(prev_pts) > 0:
76 | prev_pts = np.rint(prev_pts).astype(np.int32)
77 | for pt1, pt2 in zip(prev_pts, cur_pts):
78 | cv2.line(frame, tuple(pt1), tuple(pt2), color, 1, cv2.LINE_AA)
79 |
80 |
81 | def draw_covariance(frame, tlbr, covariance):
82 | tlbr = tlbr.astype(int)
83 | tl, br = tuple(tlbr[:2]), tuple(tlbr[2:])
84 |
85 | def ellipse(cov):
86 | vals, vecs = np.linalg.eigh(cov)
87 | order = vals.argsort()[::-1]
88 | # 95% confidence ellipse
89 | vals, vecs = np.sqrt(vals[order] * 5.9915), vecs[:, order]
90 | axes = int(vals[0] + 0.5), int(vals[1] + 0.5)
91 | angle = np.degrees(np.arctan2(vecs[1, 0], vecs[0, 0]))
92 | return axes, angle
93 |
94 | axes, angle = ellipse(covariance[:2, :2])
95 | cv2.ellipse(frame, tl, axes, angle, 0, 360, (255, 255, 255), 1, cv2.LINE_AA)
96 | axes, angle = ellipse(covariance[2:4, 2:4])
97 | cv2.ellipse(frame, br, axes, angle, 0, 360, (255, 255, 255), 1, cv2.LINE_AA)
98 |
99 |
100 | class Visualizer:
101 | def __init__(self,
102 | draw_detections=False,
103 | draw_confidence=False,
104 | draw_covariance=False,
105 | draw_klt=False,
106 | draw_obj_flow=False,
107 | draw_bg_flow=False,
108 | draw_trajectory=False):
109 | """Class for visualization.
110 |
111 | Parameters
112 | ----------
113 | draw_detections : bool, optional
114 | Enable drawing detections.
115 | draw_confidence : bool, optional
116 | Enable drawing detection confidence, ignored if `draw_detections` is disabled.
117 | draw_covariance : bool, optional
118 | Enable drawing Kalman filter position covariance.
119 | draw_klt : bool, optional
120 | Enable drawing KLT bounding boxes.
121 | draw_obj_flow : bool, optional
122 | Enable drawing object flow matches.
123 | draw_bg_flow : bool, optional
124 | Enable drawing background flow matches.
125 | draw_trajectory: bool, optional
126 | Enable drawing trajectories of boxes
127 | """
128 | self.draw_detections = draw_detections
129 | self.draw_confidence = draw_confidence
130 | self.draw_covariance = draw_covariance
131 | self.draw_klt = draw_klt
132 | self.draw_obj_flow = draw_obj_flow
133 | self.draw_bg_flow = draw_bg_flow
134 | self.draw_trajectory = draw_trajectory
135 |
136 | def render(self, frame, tracks, detections, klt_bboxes, prev_bg_keypoints, bg_keypoints):
137 | """Render visualizations onto the frame."""
138 | draw_tracks(frame, tracks, show_flow=self.draw_obj_flow,
139 | show_cov=self.draw_covariance,
140 | show_traj=self.draw_trajectory)
141 | if self.draw_detections:
142 | draw_detections(frame, detections, show_conf=self.draw_confidence)
143 | if self.draw_klt:
144 | draw_klt_bboxes(frame, klt_bboxes)
145 | if self.draw_bg_flow:
146 | draw_background_flow(frame, prev_bg_keypoints, bg_keypoints)
147 |
--------------------------------------------------------------------------------
/fastmot/videoio.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from enum import Enum
3 | from collections import deque
4 | from urllib.parse import urlparse
5 | import subprocess
6 | import threading
7 | import logging
8 | import cv2
9 |
10 |
11 | LOGGER = logging.getLogger(__name__)
12 | WITH_GSTREAMER = True
13 |
14 |
15 | class Protocol(Enum):
16 | IMAGE = 0
17 | VIDEO = 1
18 | CSI = 2
19 | V4L2 = 3
20 | RTSP = 4
21 | HTTP = 5
22 |
23 |
24 | class VideoIO:
25 | def __init__(self, size, input_uri,
26 | output_uri=None,
27 | resolution=(1920, 1080),
28 | frame_rate=30,
29 | buffer_size=10,
30 | proc_fps=30):
31 | """Class for video capturing and output saving.
32 | Encoding, decoding, and scaling can be accelerated using the GStreamer backend.
33 |
34 | Parameters
35 | ----------
36 | size : tuple
37 | Width and height of each frame to output.
38 | input_uri : str
39 | URI to input stream. It could be image sequence (e.g. '%06d.jpg'), video file (e.g. 'file.mp4'),
40 | MIPI CSI camera (e.g. 'csi://0'), USB/V4L2 camera (e.g. '/dev/video0'),
41 | RTSP stream (e.g. 'rtsp://:@:/'),
42 | or HTTP live stream (e.g. 'http://:@:/')
43 | output_uri : str, optionals
44 | URI to an output video file.
45 | resolution : tuple, optional
46 | Original resolution of the input source.
47 | Useful to set a certain capture mode of a USB/CSI camera.
48 | frame_rate : int, optional
49 | Frame rate of the input source.
50 | Required if frame rate cannot be deduced, e.g. image sequence and/or RTSP.
51 | Useful to set a certain capture mode of a USB/CSI camera.
52 | buffer_size : int, optional
53 | Number of frames to buffer.
54 | For live sources, a larger buffer drops less frames but increases latency.
55 | proc_fps : int, optional
56 | Estimated processing speed that may limit the capture interval `cap_dt`.
57 | This depends on hardware and processing complexity.
58 | """
59 | self.size = size
60 | self.input_uri = input_uri
61 | self.output_uri = output_uri
62 | self.resolution = resolution
63 | assert frame_rate > 0
64 | self.frame_rate = frame_rate
65 | assert buffer_size >= 1
66 | self.buffer_size = buffer_size
67 | assert proc_fps > 0
68 | self.proc_fps = proc_fps
69 |
70 | self.protocol = self._parse_uri(self.input_uri)
71 | self.is_live = self.protocol != Protocol.IMAGE and self.protocol != Protocol.VIDEO
72 | if WITH_GSTREAMER:
73 | self.source = cv2.VideoCapture(self._gst_cap_pipeline(), cv2.CAP_GSTREAMER)
74 | else:
75 | self.source = cv2.VideoCapture(self.input_uri)
76 |
77 | self.frame_queue = deque([], maxlen=self.buffer_size)
78 | self.cond = threading.Condition()
79 | self.exit_event = threading.Event()
80 | self.cap_thread = threading.Thread(target=self._capture_frames)
81 |
82 | ret, frame = self.source.read()
83 | if not ret:
84 | raise RuntimeError('Unable to read video stream')
85 | self.frame_queue.append(frame)
86 |
87 | width = self.source.get(cv2.CAP_PROP_FRAME_WIDTH)
88 | height = self.source.get(cv2.CAP_PROP_FRAME_HEIGHT)
89 | self.cap_fps = self.source.get(cv2.CAP_PROP_FPS)
90 | self.do_resize = (width, height) != self.size
91 | if self.cap_fps == 0:
92 | self.cap_fps = self.frame_rate # fallback to config if unknown
93 | LOGGER.info('%dx%d stream @ %d FPS', width, height, self.cap_fps)
94 |
95 | if self.output_uri is not None:
96 | Path(self.output_uri).parent.mkdir(parents=True, exist_ok=True)
97 | output_fps = 1 / self.cap_dt
98 | if WITH_GSTREAMER:
99 | self.writer = cv2.VideoWriter(self._gst_write_pipeline(), cv2.CAP_GSTREAMER, 0,
100 | output_fps, self.size, True)
101 | else:
102 | fourcc = cv2.VideoWriter_fourcc(*'avc1')
103 | self.writer = cv2.VideoWriter(self.output_uri, fourcc, output_fps, self.size, True)
104 |
105 | @property
106 | def cap_dt(self):
107 | # limit capture interval at processing latency for live sources
108 | return 1 / min(self.cap_fps, self.proc_fps) if self.is_live else 1 / self.cap_fps
109 |
110 | def start_capture(self):
111 | """Start capturing from file or device."""
112 | if not self.source.isOpened():
113 | self.source.open(self._gst_cap_pipeline(), cv2.CAP_GSTREAMER)
114 | if not self.cap_thread.is_alive():
115 | self.cap_thread.start()
116 |
117 | def stop_capture(self):
118 | """Stop capturing from file or device."""
119 | with self.cond:
120 | self.exit_event.set()
121 | self.cond.notify()
122 | self.frame_queue.clear()
123 | self.cap_thread.join()
124 |
125 | def read(self):
126 | """Reads the next video frame.
127 |
128 | Returns
129 | -------
130 | ndarray
131 | Returns None if there are no more frames.
132 | """
133 | with self.cond:
134 | while len(self.frame_queue) == 0 and not self.exit_event.is_set():
135 | self.cond.wait()
136 | if len(self.frame_queue) == 0 and self.exit_event.is_set():
137 | return None
138 | frame = self.frame_queue.popleft()
139 | self.cond.notify()
140 | if self.do_resize:
141 | frame = cv2.resize(frame, self.size)
142 | return frame
143 |
144 | def write(self, frame):
145 | """Writes the next video frame."""
146 | assert hasattr(self, 'writer')
147 | self.writer.write(frame)
148 |
149 | def release(self):
150 | """Cleans up input and output sources."""
151 | self.stop_capture()
152 | if hasattr(self, 'writer'):
153 | self.writer.release()
154 | self.source.release()
155 |
156 | def _gst_cap_pipeline(self):
157 | gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
158 | if 'nvvidconv' in gst_elements and self.protocol != Protocol.V4L2:
159 | # format conversion for hardware decoder
160 | cvt_pipeline = (
161 | 'nvvidconv interpolation-method=5 ! '
162 | 'video/x-raw, width=%d, height=%d, format=BGRx !'
163 | 'videoconvert ! appsink sync=false'
164 | % self.size
165 | )
166 | else:
167 | cvt_pipeline = (
168 | 'videoscale ! '
169 | 'video/x-raw, width=%d, height=%d !'
170 | 'videoconvert ! appsink sync=false'
171 | % self.size
172 | )
173 |
174 | if self.protocol == Protocol.IMAGE:
175 | pipeline = (
176 | 'multifilesrc location=%s index=1 caps="image/%s,framerate=%d/1" ! decodebin ! '
177 | % (
178 | self.input_uri,
179 | self._img_format(self.input_uri),
180 | self.frame_rate
181 | )
182 | )
183 | elif self.protocol == Protocol.VIDEO:
184 | pipeline = 'filesrc location=%s ! decodebin ! ' % self.input_uri
185 | elif self.protocol == Protocol.CSI:
186 | if 'nvarguscamerasrc' in gst_elements:
187 | pipeline = (
188 | 'nvarguscamerasrc sensor_id=%s ! '
189 | 'video/x-raw(memory:NVMM), width=%d, height=%d, '
190 | 'format=NV12, framerate=%d/1 ! '
191 | % (
192 | self.input_uri[6:],
193 | *self.resolution,
194 | self.frame_rate
195 | )
196 | )
197 | else:
198 | raise RuntimeError('GStreamer CSI plugin not found')
199 | elif self.protocol == Protocol.V4L2:
200 | if 'v4l2src' in gst_elements:
201 | pipeline = (
202 | 'v4l2src device=%s ! '
203 | 'video/x-raw, width=%d, height=%d, '
204 | 'format=YUY2, framerate=%d/1 ! '
205 | % (
206 | self.input_uri,
207 | *self.resolution,
208 | self.frame_rate
209 | )
210 | )
211 | else:
212 | raise RuntimeError('GStreamer V4L2 plugin not found')
213 | elif self.protocol == Protocol.RTSP:
214 | pipeline = (
215 | 'rtspsrc location=%s latency=0 ! '
216 | 'capsfilter caps=application/x-rtp,media=video ! decodebin ! ' % self.input_uri
217 | )
218 | elif self.protocol == Protocol.HTTP:
219 | pipeline = 'souphttpsrc location=%s is-live=true ! decodebin ! ' % self.input_uri
220 | return pipeline + cvt_pipeline
221 |
222 | def _gst_write_pipeline(self):
223 | gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
224 | # use hardware encoder if found
225 | if 'omxh264enc' in gst_elements:
226 | h264_encoder = 'omxh264enc preset-level=2'
227 | elif 'x264enc' in gst_elements:
228 | h264_encoder = 'x264enc pass=4'
229 | else:
230 | raise RuntimeError('GStreamer H.264 encoder not found')
231 | pipeline = (
232 | 'appsrc ! autovideoconvert ! %s ! qtmux ! filesink location=%s '
233 | % (
234 | h264_encoder,
235 | self.output_uri
236 | )
237 | )
238 | return pipeline
239 |
240 | def _capture_frames(self):
241 | while not self.exit_event.is_set():
242 | ret, frame = self.source.read()
243 | with self.cond:
244 | if not ret:
245 | self.exit_event.set()
246 | self.cond.notify()
247 | break
248 | # keep unprocessed frames in the buffer for file
249 | if not self.is_live:
250 | while (len(self.frame_queue) == self.buffer_size and
251 | not self.exit_event.is_set()):
252 | self.cond.wait()
253 | self.frame_queue.append(frame)
254 | self.cond.notify()
255 |
256 | @staticmethod
257 | def _parse_uri(uri):
258 | result = urlparse(uri)
259 | if result.scheme == 'csi':
260 | protocol = Protocol.CSI
261 | elif result.scheme == 'rtsp':
262 | protocol = Protocol.RTSP
263 | elif result.scheme == 'http':
264 | protocol = Protocol.HTTP
265 | else:
266 | if '/dev/video' in result.path:
267 | protocol = Protocol.V4L2
268 | elif '%' in result.path:
269 | protocol = Protocol.IMAGE
270 | else:
271 | protocol = Protocol.VIDEO
272 | return protocol
273 |
274 | @staticmethod
275 | def _img_format(uri):
276 | img_format = Path(uri).suffix[1:]
277 | return 'jpeg' if img_format == 'jpg' else img_format
278 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.17
2 | scipy >= 1.5
3 | numba == 0.48
4 | tensorflow < 2.0
5 | cupy == 9.2
--------------------------------------------------------------------------------
/scripts/download_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$HOME
4 |
5 | set -e
6 |
7 | PASCAL_VOC2007_DATASET=$DIR/VOCtrainval_06-Nov-2007.tar
8 | wget -O $PASCAL_VOC2007_DATASET http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
9 | tar -xf $PASCAL_VOC2007_DATASET -C $DIR
10 | rm $PASCAL_VOC2007_DATASET
--------------------------------------------------------------------------------
/scripts/download_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BASEDIR=$(dirname "$0")
4 | DIR=$BASEDIR/../fastmot/models
5 |
6 | set -e
7 |
8 | pip3 install gdown
9 |
10 | gdown https://drive.google.com/uc?id=1MLC2lKnQvAQgBKZP1EXB6UdmqujY9qVd -O $DIR/osnet_x0_25_msmt17.onnx
11 | gdown https://drive.google.com/uc?id=1-Cqk2P72P4feYLJGtJFPcCxN5JttzTfX -O $DIR/ssd_inception_v2_coco.pb
12 | gdown https://drive.google.com/uc?id=1IfSveiXaub-L6PO9mqne5pk2EByzb25z -O $DIR/ssd_mobilenet_v1_coco.pb
13 | gdown https://drive.google.com/uc?id=1ste0fQevAjF4UqD3JsCtu1rUAwCTmETN -O $DIR/ssd_mobilenet_v2_coco.pb
14 | gdown https://drive.google.com/uc?id=1-kXZpA6y8pNbDMMD7N--IWIjwqqnAIGZ -O $DIR/yolov4_crowdhuman.onnx
--------------------------------------------------------------------------------
/scripts/install_jetson.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | L4T_VERSION=$(dpkg-query --showformat='${Version}' --show nvidia-l4t-core | cut -f1 -d'-')
6 |
7 | # Jetpack>=4.4 (OpenCV, CUDA, TensorRT) is required
8 | if dpkg --compare-versions $L4T_VERSION ge 32.6; then
9 | TF_VERSION=1.15.5
10 | NV_VERSION=21.6
11 | JP_VERSION=46
12 | elif dpkg --compare-versions $L4T_VERSION ge 32.5; then
13 | TF_VERSION=1.15.4
14 | NV_VERSION=20.12
15 | JP_VERSION=45
16 | elif dpkg --compare-versions $L4T_VERSION ge 32.4; then
17 | TF_VERSION=1.15.2
18 | NV_VERSION=20.4
19 | JP_VERSION=44
20 | else
21 | echo "Error: unsupported L4T version $L4T_VERSION"
22 | exit 1
23 | fi
24 |
25 | # Set up CUDA environment
26 | if [ ! -x "$(command -v nvcc)" ]; then
27 | echo "export PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> ~/.bashrc
28 | echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> ~/.bashrc
29 | source ~/.bashrc
30 | fi
31 |
32 | # NumPy and TensorFlow
33 | sudo apt-get update
34 | sudo apt-get install -y python3-pip libhdf5-serial-dev hdf5-tools libcanberra-gtk-module
35 | sudo -H pip3 install numpy
36 | sudo ln -s /usr/include/locale.h /usr/include/xlocale.h
37 | sudo -H pip3 install --no-cache-dir --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v$JP_VERSION tensorflow==$TF_VERSION+nv$NV_VERSION
38 |
39 | # SciPy
40 | sudo apt-get install -y libatlas-base-dev gfortran
41 | sudo -H pip3 install scipy==1.5
42 |
43 | # Numba
44 | sudo apt-get install -y llvm-8 llvm-8-dev
45 | sudo -H LLVM_CONFIG=/usr/bin/llvm-config-8 pip3 install numba==0.48
46 |
47 | # CuPy
48 | echo "Installing CuPy, this may take a while..."
49 | sudo -H CUPY_NVCC_GENERATE_CODE="current" CUPY_NUM_BUILD_JOBS=$(nproc) pip3 install cupy==9.2
50 |
--------------------------------------------------------------------------------