├── .dockerignore ├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ ├── config.yml │ └── unexpected-behaviors.md ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.py ├── assets ├── aerial_demo.gif └── dense_demo.gif ├── cfg └── mot.json ├── eval ├── results │ ├── MOT20-01.txt │ ├── MOT20-02.txt │ ├── MOT20-03.txt │ └── MOT20-05.txt └── seqmap.txt ├── fastmot ├── __init__.py ├── detector.py ├── feature_extractor.py ├── flow.py ├── kalman_filter.py ├── models │ ├── __init__.py │ ├── calibrator.py │ ├── label.py │ ├── reid.py │ ├── ssd.py │ └── yolo.py ├── mot.py ├── plugins │ ├── Makefile │ ├── README.md │ ├── get_compute.py │ ├── yolo_layer.cu │ └── yolo_layer.h ├── track.py ├── tracker.py ├── utils │ ├── __init__.py │ ├── decoder.py │ ├── distance.py │ ├── inference.py │ ├── matching.py │ ├── numba.py │ ├── profiler.py │ ├── rect.py │ └── visualization.py └── videoio.py ├── requirements.txt └── scripts ├── download_data.sh ├── download_models.sh ├── install_jetson.sh └── yolo2onnx.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | **/__pycache__/ 3 | **/*.py[cod] 4 | **/*$py.class 5 | **/*.so 6 | **/*.o 7 | 8 | # temporary files 9 | .DS_Store 10 | .cache 11 | 12 | # model files 13 | **/*.pb 14 | **/*.onnx 15 | 16 | # TensorRT stuff 17 | **/*.trt 18 | **/*calib_cache 19 | 20 | # video and image files 21 | *.jpg 22 | *.jpeg 23 | *.png 24 | *.bmp 25 | *.tif 26 | *.tiff 27 | *.heic 28 | *.JPG 29 | *.JPEG 30 | *.PNG 31 | *.BMP 32 | *.TIF 33 | *.TIFF 34 | *.HEIC 35 | *.mp4 36 | *.mov 37 | *.MOV 38 | *.avi 39 | 40 | # dataset 41 | **/VOCdevkit/ 42 | 43 | # vscode stuff 44 | .vscode/ -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [GeekAlexis] 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: GitHub Community Support 4 | url: https://github.community/ 5 | about: Please ask and answer questions here. 6 | - name: GitHub Security Bug Bounty 7 | url: https://bounty.github.com/ 8 | about: Please report security vulnerabilities here. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/unexpected-behaviors.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Unexpected behaviors 3 | about: Report unexpected behaviors/bugs or ask questions 4 | title: Please read & provide the following 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Your issue may already be reported! 11 | Please search the issues before creating one. 12 | 13 | ## Current Behavior 14 | 15 | 16 | 17 | ## How to Reproduce 18 | 19 | 20 | 21 | ## Describe what you want to do 22 | 1. What input videos you will provide, if any: 23 | 2. What outputs you are expecting: 24 | 3. Ask your questions here, if any: 25 | 26 | 27 | ## Your Environment 28 | 29 | 30 | * Desktop 31 | * Operating System and version: 32 | * NVIDIA Driver version: 33 | * Used the docker image? 34 | * NVIDIA Jetson 35 | * Which Jetson? 36 | * Jetpack version: 37 | * Ran install_jetson.sh? 38 | * Reinstalled OpenCV from Jetpack? 39 | 40 | ## Common issues 41 | 1. GStreamer warnings are normal 42 | 2. If you have issues with GStreamer on Desktop, disable GStreamer and build FFMPEG instead in Dockerfile 43 | 2. TensorRT plugin and engine files have to be built on the target platform and cannot be copied from a different architecture 44 | 3. Reinstalled OpenCV is usually not as optimized as the one shipped in Jetpack 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | *.o 7 | .idea/ 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Mac temporary files 132 | .DS_Store 133 | ._* 134 | 135 | # model files 136 | *.pb 137 | *.onnx 138 | 139 | # TensorRT stuff 140 | *.trt 141 | *calib_cache 142 | 143 | # video and image files 144 | *.jpg 145 | *.jpeg 146 | *.png 147 | *.bmp 148 | *.tif 149 | *.tiff 150 | *.heic 151 | *.JPG 152 | *.JPEG 153 | *.PNG 154 | *.BMP 155 | *.TIF 156 | *.TIFF 157 | *.HEIC 158 | *.mp4 159 | *.mov 160 | *.MOV 161 | *.avi 162 | 163 | # dataset 164 | VOCdevkit/ 165 | 166 | # vscode stuff 167 | .vscode/ 168 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TRT_IMAGE_VERSION=20.09 2 | FROM nvcr.io/nvidia/tensorrt:${TRT_IMAGE_VERSION}-py3 3 | 4 | ARG TRT_IMAGE_VERSION 5 | ARG OPENCV_VERSION=4.1.1 6 | ARG APP_DIR=/usr/src/app 7 | ARG SCRIPT_DIR=/opt/tensorrt/python 8 | ARG DEBIAN_FRONTEND=noninteractive 9 | 10 | ENV HOME=${APP_DIR} 11 | ENV TZ=America/Los_Angeles 12 | 13 | ENV OPENBLAS_MAIN_FREE=1 14 | ENV OPENBLAS_NUM_THREADS=1 15 | ENV NO_AT_BRIDGE=1 16 | 17 | # Install OpenCV and FastMOT dependencies 18 | RUN apt-get -y update && \ 19 | apt-get install -y --no-install-recommends \ 20 | wget unzip tzdata \ 21 | build-essential cmake pkg-config \ 22 | libgtk-3-dev libcanberra-gtk3-module \ 23 | libjpeg-dev libpng-dev libtiff-dev \ 24 | libavcodec-dev libavformat-dev libswscale-dev \ 25 | libv4l-dev libxvidcore-dev libx264-dev \ 26 | gfortran libatlas-base-dev \ 27 | python3-dev \ 28 | gstreamer1.0-tools \ 29 | libgstreamer1.0-dev \ 30 | libgstreamer-plugins-base1.0-dev \ 31 | gstreamer1.0-libav \ 32 | gstreamer1.0-plugins-good \ 33 | gstreamer1.0-plugins-bad \ 34 | gstreamer1.0-plugins-ugly \ 35 | libtbb2 libtbb-dev libdc1394-22-dev && \ 36 | pip install -U --no-cache-dir setuptools pip && \ 37 | pip install --no-cache-dir numpy==1.18.0 38 | 39 | # Build OpenCV 40 | WORKDIR ${HOME} 41 | RUN wget https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \ 42 | unzip ${OPENCV_VERSION}.zip && rm ${OPENCV_VERSION}.zip && \ 43 | mv opencv-${OPENCV_VERSION} OpenCV && \ 44 | wget https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \ 45 | unzip ${OPENCV_VERSION}.zip && rm ${OPENCV_VERSION}.zip && \ 46 | mv opencv_contrib-${OPENCV_VERSION} OpenCV/opencv_contrib 47 | 48 | # If you have issues with GStreamer, set -DWITH_GSTREAMER=OFF and -DWITH_FFMPEG=ON 49 | WORKDIR ${HOME}/OpenCV/build 50 | RUN cmake \ 51 | -DCMAKE_BUILD_TYPE=RELEASE \ 52 | -DCMAKE_INSTALL_PREFIX=/usr/local \ 53 | -DOPENCV_EXTRA_MODULES_PATH=${HOME}/OpenCV/opencv_contrib/modules \ 54 | -DINSTALL_PYTHON_EXAMPLES=ON \ 55 | -DINSTALL_C_EXAMPLES=OFF \ 56 | -DBUILD_opencv_python2=OFF \ 57 | -DBUILD_TESTS=OFF \ 58 | -DBUILD_PERF_TESTS=OFF \ 59 | -DBUILD_EXAMPLES=ON \ 60 | -DBUILD_PROTOBUF=OFF \ 61 | -DENABLE_FAST_MATH=ON \ 62 | -DWITH_TBB=ON \ 63 | -DWITH_LIBV4L=ON \ 64 | -DWITH_CUDA=OFF \ 65 | -DWITH_GSTREAMER=ON \ 66 | -DWITH_GSTREAMER_0_10=OFF \ 67 | -DWITH_FFMPEG=OFF .. && \ 68 | make -j$(nproc) && \ 69 | make install && \ 70 | ldconfig && \ 71 | rm -rf ${HOME}/OpenCV && \ 72 | rm -rf /var/lib/apt/lists/* && \ 73 | apt-get autoremove 74 | 75 | # Install Python dependencies 76 | WORKDIR ${APP_DIR}/FastMOT 77 | COPY requirements.txt . 78 | 79 | # Specify your GPU compute with --build-arg for CuPy (e.g. "arch=compute_75,code=sm_75") 80 | ARG CUPY_NVCC_GENERATE_CODE 81 | 82 | # TensorFlow < 2 is not supported in ubuntu 20.04 83 | RUN if [[ -z ${CUPY_NVCC_GENERATE_CODE} ]]; then \ 84 | echo "CUPY_NVCC_GENERATE_CODE not set, building CuPy for all architectures (slower)"; \ 85 | fi && \ 86 | if dpkg --compare-versions ${TRT_IMAGE_VERSION} ge 20.12; then \ 87 | CUPY_NUM_BUILD_JOBS=$(nproc) pip install --no-cache-dir -r <(grep -ivE "tensorflow" requirements.txt); \ 88 | else \ 89 | dpkg -i ${SCRIPT_DIR}/*-tf_*.deb && \ 90 | CUPY_NUM_BUILD_JOBS=$(nproc) pip install --no-cache-dir -r requirements.txt; \ 91 | fi 92 | 93 | # ------------------------------------ Extras Below ------------------------------------ 94 | 95 | # Stop the container (changes are kept) 96 | # docker stop $(docker ps -ql) 97 | 98 | # Start the container 99 | # docker start -ai $(docker ps -ql) 100 | 101 | # Delete the container 102 | # docker rm $(docker ps -ql) 103 | 104 | # Save changes before deleting the container 105 | # docker commit $(docker ps -ql) fastmot:latest 106 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Yukai Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastMOT 2 | 3 | [![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FGeekAlexis%2FFastMOT&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false)](https://hits.seeyoufarm.com) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![DOI](https://zenodo.org/badge/237143671.svg)](https://zenodo.org/badge/latestdoi/237143671) 4 | 5 | 6 | 7 | ## News 8 | - (2021.8.17) Support multi-class tracking 9 | - (2021.7.4) Support yolov4-p5 and yolov4-p6 10 | - (2021.2.13) Support Scaled-YOLOv4 (i.e. yolov4-csp/yolov4x-mish/yolov4-csp-swish) 11 | - (2021.1.3) Add DIoU-NMS for postprocessing 12 | - (2020.11.28) Docker container provided for x86 Ubuntu 13 | 14 | ## Description 15 | FastMOT is a custom multiple object tracker that implements: 16 | - YOLO detector 17 | - SSD detector 18 | - Deep SORT + OSNet ReID 19 | - KLT tracker 20 | - Camera motion compensation 21 | 22 | Two-stage trackers like Deep SORT run detection and feature extraction sequentially, which often becomes a bottleneck. FastMOT significantly speeds up the entire system to run in **real-time** even on Jetson. Motion compensation improves tracking for scenes with moving camera, where Deep SORT and FairMOT fail. 23 | 24 | To achieve faster processing, FastMOT only runs the detector and feature extractor every N frames, while KLT fills in the gaps efficiently. FastMOT also re-identifies objects that moved out of frame to keep the same IDs. 25 | 26 | YOLOv4 was trained on CrowdHuman (82% mAP@0.5) and SSD's are pretrained COCO models from TensorFlow. Both detection and feature extraction use the **TensorRT** backend and perform asynchronous inference. In addition, most algorithms, including KLT, Kalman filter, and data association, are optimized using Numba. 27 | 28 | ## Performance 29 | ### Results on MOT20 train set 30 | | Detector Skip | MOTA | IDF1 | HOTA | MOTP | MT | ML | 31 | |:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:| 32 | | N = 1 | 66.8% | 56.4% | 45.0% | 79.3% | 912 | 274 | 33 | | N = 5 | 65.1% | 57.1% | 44.3% | 77.9% | 860 | 317 | 34 | 35 | ### FPS on MOT17 sequences 36 | | Sequence | Density | FPS | 37 | |:-------|:-------:|:-------:| 38 | | MOT17-13 | 5 - 30 | 42 | 39 | | MOT17-04 | 30 - 50 | 26 | 40 | | MOT17-03 | 50 - 80 | 18 | 41 | 42 | Performance is evaluated with YOLOv4 using [TrackEval](https://github.com/JonathonLuiten/TrackEval). Note that neither YOLOv4 nor OSNet was trained or finetuned on the MOT20 dataset, so train set results should generalize well. FPS results are obtained on Jetson Xavier NX (20W 2core mode). 43 | 44 | FastMOT has MOTA scores close to **state-of-the-art** trackers from the MOT Challenge. Increasing N shows small impact on MOTA. Tracking speed can reach up to **42 FPS** depending on the number of objects. Lighter models (e.g. YOLOv4-tiny) are recommended for a more constrained device like Jetson Nano. FPS is expected to be in the range of **50 - 150** on desktop CPU/GPU. 45 | 46 | ## Requirements 47 | - CUDA >= 10 48 | - cuDNN >= 7 49 | - TensorRT >= 7 50 | - OpenCV >= 3.3 51 | - Numpy >= 1.17 52 | - Scipy >= 1.5 53 | - Numba == 0.48 54 | - CuPy == 9.2 55 | - TensorFlow < 2.0 (for SSD support) 56 | 57 | ### Install for x86 Ubuntu 58 | Make sure to have [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) installed. The image requires NVIDIA Driver version >= 450 for Ubuntu 18.04 and >= 465.19.01 for Ubuntu 20.04. Build and run the docker image: 59 | ```bash 60 | # Add --build-arg TRT_IMAGE_VERSION=21.05 for Ubuntu 20.04 61 | # Add --build-arg CUPY_NVCC_GENERATE_CODE=... to speed up build for your GPU, e.g. "arch=compute_75,code=sm_75" 62 | docker build -t fastmot:latest . 63 | 64 | # Run xhost local:root first if you cannot visualize inside the container 65 | docker run --gpus all --rm -it -v $(pwd):/usr/src/app/FastMOT -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=unix$DISPLAY -e TZ=$(cat /etc/timezone) fastmot:latest 66 | ``` 67 | ### Install for Jetson Nano/TX2/Xavier NX/Xavier 68 | Make sure to have [JetPack >= 4.4](https://developer.nvidia.com/embedded/jetpack) installed and run the script: 69 | ```bash 70 | ./scripts/install_jetson.sh 71 | ``` 72 | ### Download models 73 | Pretrained OSNet, SSD, and my YOLOv4 ONNX model are included. 74 | ```bash 75 | ./scripts/download_models.sh 76 | ``` 77 | ### Build YOLOv4 TensorRT plugin 78 | ```bash 79 | cd fastmot/plugins 80 | make 81 | ``` 82 | ### Download VOC dataset for INT8 calibration 83 | Only required for SSD (not supported on Ubuntu 20.04) 84 | ```bash 85 | ./scripts/download_data.sh 86 | ``` 87 | 88 | ## Usage 89 | ```bash 90 | python3 app.py --input-uri ... --mot 91 | ``` 92 | - Image sequence: `--input-uri %06d.jpg` 93 | - Video file: `--input-uri file.mp4` 94 | - USB webcam: `--input-uri /dev/video0` 95 | - MIPI CSI camera: `--input-uri csi://0` 96 | - RTSP stream: `--input-uri rtsp://:@:/` 97 | - HTTP stream: `--input-uri http://:@:/` 98 | 99 | Use `--show` to visualize, `--output-uri` to save output, and `--txt` for MOT compliant results. 100 | 101 | Show help message for all options: 102 | ```bash 103 | python3 app.py -h 104 | ``` 105 | Note that the first run will be slow due to Numba compilation. To use the FFMPEG backend on x86, set `WITH_GSTREAMER = False` [here](https://github.com/GeekAlexis/FastMOT/blob/3a4cad87743c226cf603a70b3f15961b9baf6873/fastmot/videoio.py#L11) 106 |
107 | More options can be configured in cfg/mot.json 108 | 109 | - Set `resolution` and `frame_rate` that corresponds to the source data or camera configuration (optional). They are required for image sequence, camera sources, and saving txt results. List all configurations for a USB/CSI camera: 110 | ```bash 111 | v4l2-ctl -d /dev/video0 --list-formats-ext 112 | ``` 113 | - To swap network, modify `model` under a detector. For example, you can choose from `SSDInceptionV2`, `SSDMobileNetV1`, or `SSDMobileNetV2` for SSD. 114 | - If more accuracy is desired and FPS is not an issue, lower `detector_frame_skip`. Similarly, raise `detector_frame_skip` to speed up tracking at the cost of accuracy. You may also want to change `max_age` such that `max_age` × `detector_frame_skip` ≈ 30 115 | - Modify `visualizer_cfg` to toggle drawing options. 116 | - All parameters are documented in the API. 117 | 118 |
119 | 120 | ## Track custom classes 121 | FastMOT can be easily extended to a custom class (e.g. vehicle). You need to train both YOLO and a ReID network on your object class. Check [Darknet](https://github.com/AlexeyAB/darknet) for training YOLO and [fast-reid](https://github.com/JDAI-CV/fast-reid) for training ReID. After training, convert weights to ONNX format. The TensorRT plugin adapted from [tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos/) is only compatible with Darknet. 122 | 123 | FastMOT also supports multi-class tracking. It is recommended to train a ReID network for each class to extract features separately. 124 | ### Convert YOLO to ONNX 125 | 1. Install ONNX version 1.4.1 (not the latest version) 126 | ```bash 127 | pip3 install onnx==1.4.1 128 | ``` 129 | 2. Convert using your custom cfg and weights 130 | ```bash 131 | ./scripts/yolo2onnx.py --config yolov4.cfg --weights yolov4.weights 132 | ``` 133 | ### Add custom YOLOv3/v4 134 | 1. Subclass `fastmot.models.YOLO` like here: https://github.com/GeekAlexis/FastMOT/blob/32c217a7d289f15a3bb0c1820982df947c82a650/fastmot/models/yolo.py#L100-L109 135 | ``` 136 | ENGINE_PATH : Path 137 | Path to TensorRT engine. 138 | If not found, TensorRT engine will be converted from the ONNX model 139 | at runtime and cached for later use. 140 | MODEL_PATH : Path 141 | Path to ONNX model. 142 | NUM_CLASSES : int 143 | Total number of trained classes. 144 | LETTERBOX : bool 145 | Keep aspect ratio when resizing. 146 | NEW_COORDS : bool 147 | new_coords Darknet parameter for each yolo layer. 148 | INPUT_SHAPE : tuple 149 | Input size in the format `(channel, height, width)`. 150 | LAYER_FACTORS : List[int] 151 | Scale factors with respect to the input size for each yolo layer. 152 | SCALES : List[float] 153 | scale_x_y Darknet parameter for each yolo layer. 154 | ANCHORS : List[List[int]] 155 | Anchors grouped by each yolo layer. 156 | ``` 157 | Note anchors may not follow the same order in the Darknet cfg file. You need to mask out the anchors for each yolo layer using the indices in `mask` in Darknet cfg. 158 | Unlike YOLOv4, the anchors are usually in reverse for YOLOv3 and YOLOv3/v4-tiny 159 | 2. Set class labels to your object classes with `fastmot.models.set_label_map` 160 | 3. Modify cfg/mot.json: set `model` in `yolo_detector_cfg` to the added Python class name and set `class_ids` of interest. You may want to play with `conf_thresh` based on model performance 161 | ### Add custom ReID 162 | 1. Subclass `fastmot.models.ReID` like here: https://github.com/GeekAlexis/FastMOT/blob/32c217a7d289f15a3bb0c1820982df947c82a650/fastmot/models/reid.py#L50-L55 163 | ``` 164 | ENGINE_PATH : Path 165 | Path to TensorRT engine. 166 | If not found, TensorRT engine will be converted from the ONNX model 167 | at runtime and cached for later use. 168 | MODEL_PATH : Path 169 | Path to ONNX model. 170 | INPUT_SHAPE : tuple 171 | Input size in the format `(channel, height, width)`. 172 | OUTPUT_LAYOUT : int 173 | Feature dimension output by the model. 174 | METRIC : {'euclidean', 'cosine'} 175 | Distance metric used to match features. 176 | ``` 177 | 2. Modify cfg/mot.json: set `model` in `feature_extractor_cfgs` to the added Python class name. For more than one class, add more feature extractor configurations to the list `feature_extractor_cfgs`. You may want to play with `max_assoc_cost` and `max_reid_cost` based on model performance 178 | 179 | ## Citation 180 | If you find this repo useful in your project or research, please star and consider citing it: 181 | ```bibtex 182 | @software{yukai_yang_2020_4294717, 183 | author = {Yukai Yang}, 184 | title = {{FastMOT: High-Performance Multiple Object Tracking Based on Deep SORT and KLT}}, 185 | month = nov, 186 | year = 2020, 187 | publisher = {Zenodo}, 188 | version = {v1.0.0}, 189 | doi = {10.5281/zenodo.4294717}, 190 | url = {https://doi.org/10.5281/zenodo.4294717} 191 | } 192 | ``` 193 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from pathlib import Path 4 | from types import SimpleNamespace 5 | import argparse 6 | import logging 7 | import json 8 | import cv2 9 | 10 | import fastmot 11 | import fastmot.models 12 | from fastmot.utils import ConfigDecoder, Profiler 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) 17 | optional = parser._action_groups.pop() 18 | required = parser.add_argument_group('required arguments') 19 | group = parser.add_mutually_exclusive_group() 20 | required.add_argument('-i', '--input-uri', metavar="URI", required=True, help= 21 | 'URI to input stream\n' 22 | '1) image sequence (e.g. %%06d.jpg)\n' 23 | '2) video file (e.g. file.mp4)\n' 24 | '3) MIPI CSI camera (e.g. csi://0)\n' 25 | '4) USB camera (e.g. /dev/video0)\n' 26 | '5) RTSP stream (e.g. rtsp://:@:/)\n' 27 | '6) HTTP stream (e.g. http://:@:/)\n') 28 | optional.add_argument('-c', '--config', metavar="FILE", 29 | default=Path(__file__).parent / 'cfg' / 'mot.json', 30 | help='path to JSON configuration file') 31 | optional.add_argument('-l', '--labels', metavar="FILE", 32 | help='path to label names (e.g. coco.names)') 33 | optional.add_argument('-o', '--output-uri', metavar="URI", 34 | help='URI to output video file') 35 | optional.add_argument('-t', '--txt', metavar="FILE", 36 | help='path to output MOT Challenge format results (e.g. MOT20-01.txt)') 37 | optional.add_argument('-m', '--mot', action='store_true', help='run multiple object tracker') 38 | optional.add_argument('-s', '--show', action='store_true', help='show visualizations') 39 | group.add_argument('-q', '--quiet', action='store_true', help='reduce output verbosity') 40 | group.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') 41 | parser._action_groups.append(optional) 42 | args = parser.parse_args() 43 | if args.txt is not None and not args.mot: 44 | raise parser.error('argument -t/--txt: not allowed without argument -m/--mot') 45 | 46 | # set up logging 47 | logging.basicConfig(format='%(asctime)s [%(levelname)8s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 48 | logger = logging.getLogger(fastmot.__name__) 49 | if args.quiet: 50 | logger.setLevel(logging.WARNING) 51 | elif args.verbose: 52 | logger.setLevel(logging.DEBUG) 53 | else: 54 | logger.setLevel(logging.INFO) 55 | 56 | # load config file 57 | with open(args.config) as cfg_file: 58 | config = json.load(cfg_file, cls=ConfigDecoder, object_hook=lambda d: SimpleNamespace(**d)) 59 | 60 | # load labels if given 61 | if args.labels is not None: 62 | with open(args.labels) as label_file: 63 | label_map = label_file.read().splitlines() 64 | fastmot.models.set_label_map(label_map) 65 | 66 | stream = fastmot.VideoIO(config.resize_to, args.input_uri, args.output_uri, **vars(config.stream_cfg)) 67 | 68 | mot = None 69 | txt = None 70 | if args.mot: 71 | draw = args.show or args.output_uri is not None 72 | mot = fastmot.MOT(config.resize_to, **vars(config.mot_cfg), draw=draw) 73 | mot.reset(stream.cap_dt) 74 | if args.txt is not None: 75 | Path(args.txt).parent.mkdir(parents=True, exist_ok=True) 76 | txt = open(args.txt, 'w') 77 | if args.show: 78 | cv2.namedWindow('Video', cv2.WINDOW_AUTOSIZE) 79 | 80 | logger.info('Starting video capture...') 81 | stream.start_capture() 82 | try: 83 | with Profiler('app') as prof: 84 | while not args.show or cv2.getWindowProperty('Video', 0) >= 0: 85 | frame = stream.read() 86 | if frame is None: 87 | break 88 | 89 | if args.mot: 90 | mot.step(frame) 91 | if txt is not None: 92 | for track in mot.visible_tracks(): 93 | tl = track.tlbr[:2] / config.resize_to * stream.resolution 94 | br = track.tlbr[2:] / config.resize_to * stream.resolution 95 | w, h = br - tl + 1 96 | txt.write(f'{mot.frame_count},{track.trk_id},{tl[0]:.6f},{tl[1]:.6f},' 97 | f'{w:.6f},{h:.6f},-1,-1,-1\n') 98 | 99 | if args.show: 100 | cv2.imshow('Video', frame) 101 | if cv2.waitKey(1) & 0xFF == 27: 102 | break 103 | if args.output_uri is not None: 104 | stream.write(frame) 105 | finally: 106 | # clean up resources 107 | if txt is not None: 108 | txt.close() 109 | stream.release() 110 | cv2.destroyAllWindows() 111 | 112 | # timing statistics 113 | if args.mot: 114 | avg_fps = round(mot.frame_count / prof.duration) 115 | logger.info('Average FPS: %d', avg_fps) 116 | mot.print_timing_info() 117 | 118 | 119 | if __name__ == '__main__': 120 | main() 121 | -------------------------------------------------------------------------------- /assets/aerial_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeekAlexis/FastMOT/e607640f80ebdd9a6686db50faf3bd2a7ab43cfc/assets/aerial_demo.gif -------------------------------------------------------------------------------- /assets/dense_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeekAlexis/FastMOT/e607640f80ebdd9a6686db50faf3bd2a7ab43cfc/assets/dense_demo.gif -------------------------------------------------------------------------------- /cfg/mot.json: -------------------------------------------------------------------------------- 1 | { 2 | "resize_to": [1280, 720], 3 | 4 | "stream_cfg": { 5 | "resolution": [1920, 1080], 6 | "frame_rate": 30, 7 | "buffer_size": 10 8 | }, 9 | 10 | "mot_cfg": { 11 | "detector_type": "YOLO", 12 | "detector_frame_skip": 5, 13 | "class_ids": [1], 14 | 15 | "ssd_detector_cfg": { 16 | "model": "SSDInceptionV2", 17 | "tile_overlap": 0.25, 18 | "tiling_grid": [4, 2], 19 | "conf_thresh": 0.5, 20 | "merge_thresh": 0.6, 21 | "max_area": 120000 22 | }, 23 | "yolo_detector_cfg": { 24 | "model": "YOLOv4", 25 | "conf_thresh": 0.25, 26 | "nms_thresh": 0.5, 27 | "max_area": 800000, 28 | "min_aspect_ratio": 1.2 29 | }, 30 | "public_detector_cfg": { 31 | "sequence_path": "MOT20/train/MOT20-01", 32 | "conf_thresh": 0.5, 33 | "max_area": 800000 34 | }, 35 | 36 | "feature_extractor_cfgs": [ 37 | { 38 | "model": "OSNet025", 39 | "batch_size": 16 40 | } 41 | ], 42 | 43 | "tracker_cfg": { 44 | "max_age": 6, 45 | "age_penalty": 2, 46 | "motion_weight": 0.2, 47 | "max_assoc_cost": 0.8, 48 | "max_reid_cost": 0.6, 49 | "iou_thresh": 0.4, 50 | "duplicate_thresh": 0.8, 51 | "occlusion_thresh": 0.7, 52 | "conf_thresh": 0.5, 53 | "confirm_hits": 1, 54 | "history_size": 50, 55 | 56 | "kalman_filter_cfg": { 57 | "std_factor_acc": 2.25, 58 | "std_offset_acc": 78.5, 59 | "std_factor_det": [0.08, 0.08], 60 | "std_factor_klt": [0.14, 0.14], 61 | "min_std_det": [4.0, 4.0], 62 | "min_std_klt": [5.0, 5.0], 63 | "init_pos_weight": 5, 64 | "init_vel_weight": 12, 65 | "vel_coupling": 0.6, 66 | "vel_half_life": 2 67 | }, 68 | 69 | "flow_cfg": { 70 | "bg_feat_scale_factor": [0.1, 0.1], 71 | "opt_flow_scale_factor": [0.5, 0.5], 72 | "feat_density": 0.005, 73 | "feat_dist_factor": 0.06, 74 | "ransac_max_iter": 500, 75 | "ransac_conf": 0.99, 76 | "max_error": 100, 77 | "inlier_thresh": 4, 78 | "bg_feat_thresh": 10, 79 | "obj_feat_params": { 80 | "maxCorners": 1000, 81 | "qualityLevel": 0.06, 82 | "blockSize": 3 83 | }, 84 | "opt_flow_params": { 85 | "winSize": [5, 5], 86 | "maxLevel": 5, 87 | "criteria": [3, 10, 0.03] 88 | } 89 | } 90 | }, 91 | 92 | "visualizer_cfg": { 93 | "draw_detections": false, 94 | "draw_confidence": false, 95 | "draw_covariance": false, 96 | "draw_klt": false, 97 | "draw_obj_flow": false, 98 | "draw_bg_flow": false, 99 | "draw_trajectory": false 100 | 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /eval/seqmap.txt: -------------------------------------------------------------------------------- 1 | name 2 | MOT20-01 3 | MOT20-02 4 | MOT20-03 5 | MOT20-05 -------------------------------------------------------------------------------- /fastmot/__init__.py: -------------------------------------------------------------------------------- 1 | from .videoio import VideoIO 2 | from .mot import MOT 3 | from .feature_extractor import FeatureExtractor 4 | from .tracker import MultiTracker 5 | from .kalman_filter import KalmanFilter 6 | from .flow import Flow 7 | from .track import Track -------------------------------------------------------------------------------- /fastmot/detector.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from pathlib import Path 3 | import configparser 4 | import abc 5 | import numpy as np 6 | import numba as nb 7 | import cupy as cp 8 | import cupyx.scipy.ndimage 9 | import cv2 10 | 11 | from . import models 12 | from .utils import TRTInference 13 | from .utils.rect import as_tlbr, aspect_ratio, to_tlbr, get_size, area 14 | from .utils.rect import enclosing, multi_crop, iom, diou_nms 15 | from .utils.numba import find_split_indices 16 | 17 | 18 | DET_DTYPE = np.dtype( 19 | [('tlbr', float, 4), 20 | ('label', int), 21 | ('conf', float)], 22 | align=True 23 | ) 24 | 25 | 26 | class Detector(abc.ABC): 27 | @abc.abstractmethod 28 | def __init__(self, size): 29 | self.size = size 30 | 31 | def __call__(self, frame): 32 | """Detect objects synchronously.""" 33 | self.detect_async(frame) 34 | return self.postprocess() 35 | 36 | @abc.abstractmethod 37 | def detect_async(self, frame): 38 | raise NotImplementedError 39 | 40 | @abc.abstractmethod 41 | def postprocess(self): 42 | raise NotImplementedError 43 | 44 | 45 | class SSDDetector(Detector): 46 | def __init__(self, size, 47 | class_ids, 48 | model='SSDInceptionV2', 49 | tile_overlap=0.25, 50 | tiling_grid=(4, 2), 51 | conf_thresh=0.5, 52 | merge_thresh=0.6, 53 | max_area=120000): 54 | """An object detector for SSD models. 55 | 56 | Parameters 57 | ---------- 58 | size : tuple 59 | Width and height of each frame. 60 | class_ids : sequence 61 | Class IDs to detect. Note class ID starts at zero. 62 | model : str, optional 63 | SSD model to use. 64 | Must be the name of a class that inherits `models.SSD`. 65 | tile_overlap : float, optional 66 | Ratio of overlap to width and height of each tile. 67 | tiling_grid : tuple, optional 68 | Width and height of tile layout to split each frame for batch inference. 69 | conf_thresh : float, optional 70 | Detection confidence threshold. 71 | merge_thresh : float, optional 72 | Overlap threshold to merge bounding boxes across tiles. 73 | max_area : int, optional 74 | Max area of bounding boxes to detect. 75 | """ 76 | super().__init__(size) 77 | self.model = models.SSD.get_model(model) 78 | assert 0 <= tile_overlap <= 1 79 | self.tile_overlap = tile_overlap 80 | assert tiling_grid[0] >= 1 and tiling_grid[1] >= 1 81 | self.tiling_grid = tiling_grid 82 | assert 0 <= conf_thresh <= 1 83 | self.conf_thresh = conf_thresh 84 | assert 0 <= merge_thresh <= 1 85 | self.merge_thresh = merge_thresh 86 | assert max_area >= 0 87 | self.max_area = max_area 88 | 89 | self.label_mask = np.zeros(self.model.NUM_CLASSES, dtype=np.bool_) 90 | try: 91 | self.label_mask[tuple(class_ids),] = True 92 | except IndexError as err: 93 | raise ValueError('Unsupported class IDs') from err 94 | 95 | self.batch_size = int(np.prod(self.tiling_grid)) 96 | self.tiles, self.tiling_region_sz = self._generate_tiles() 97 | self.scale_factor = tuple(np.array(self.size) / self.tiling_region_sz) 98 | self.backend = TRTInference(self.model, self.batch_size) 99 | self.inp_handle = self.backend.input.host.reshape(self.batch_size, *self.model.INPUT_SHAPE) 100 | 101 | def detect_async(self, frame): 102 | """Detects objects asynchronously.""" 103 | self._preprocess(frame) 104 | self.backend.infer_async() 105 | 106 | def postprocess(self): 107 | """Synchronizes, applies postprocessing, and returns a record array 108 | of detections (DET_DTYPE). 109 | This API should be called after `detect_async`. 110 | Detections are sorted in ascending order by class ID. 111 | """ 112 | det_out = self.backend.synchronize()[0] 113 | detections, tile_ids = self._filter_dets(det_out, self.tiles, self.model.TOPK, 114 | self.label_mask, self.max_area, 115 | self.conf_thresh, self.scale_factor) 116 | detections = self._merge_dets(detections, tile_ids) 117 | return detections 118 | 119 | def _preprocess(self, frame): 120 | frame = cv2.resize(frame, self.tiling_region_sz) 121 | self._normalize(frame, self.tiles, self.inp_handle) 122 | 123 | def _generate_tiles(self): 124 | tile_size = np.array(self.model.INPUT_SHAPE[:0:-1]) 125 | tiling_grid = np.array(self.tiling_grid) 126 | step_size = (1 - self.tile_overlap) * tile_size 127 | total_size = (tiling_grid - 1) * step_size + tile_size 128 | total_size = np.rint(total_size).astype(int) 129 | tiles = np.array([to_tlbr((c * step_size[0], r * step_size[1], *tile_size)) 130 | for r in range(tiling_grid[1]) for c in range(tiling_grid[0])]) 131 | return tiles, tuple(total_size) 132 | 133 | def _merge_dets(self, detections, tile_ids): 134 | detections = np.fromiter(detections, DET_DTYPE, len(detections)).view(np.recarray) 135 | tile_ids = np.fromiter(tile_ids, int, len(tile_ids)) 136 | if len(detections) == 0: 137 | return detections 138 | detections = self._merge(detections, tile_ids, self.batch_size, self.merge_thresh) 139 | return detections.view(np.recarray) 140 | 141 | @staticmethod 142 | @nb.njit(parallel=True, fastmath=True, cache=True) 143 | def _normalize(frame, tiles, out): 144 | imgs = multi_crop(frame, tiles) 145 | for i in nb.prange(len(imgs)): 146 | bgr = imgs[i] 147 | # BGR to RGB 148 | rgb = bgr[..., ::-1] 149 | # HWC -> CHW 150 | chw = rgb.transpose(2, 0, 1) 151 | # Normalize to [-1.0, 1.0] interval 152 | out[i] = chw * (2 / 255.) - 1. 153 | 154 | @staticmethod 155 | @nb.njit(fastmath=True, cache=True) 156 | def _filter_dets(det_out, tiles, topk, label_mask, max_area, thresh, scale_factor): 157 | detections = [] 158 | tile_ids = [] 159 | for tile_idx in range(len(tiles)): 160 | tile = tiles[tile_idx] 161 | w, h = get_size(tile) 162 | tile_offset = tile_idx * topk 163 | for det_idx in range(topk): 164 | offset = (tile_offset + det_idx) * 7 165 | label = int(det_out[offset + 1]) 166 | conf = det_out[offset + 2] 167 | if conf < thresh: 168 | break 169 | if label_mask[label]: 170 | xmin = (det_out[offset + 3] * w + tile[0]) * scale_factor[0] 171 | ymin = (det_out[offset + 4] * h + tile[1]) * scale_factor[1] 172 | xmax = (det_out[offset + 5] * w + tile[0]) * scale_factor[0] 173 | ymax = (det_out[offset + 6] * h + tile[1]) * scale_factor[1] 174 | tlbr = as_tlbr((xmin, ymin, xmax, ymax)) 175 | if 0 < area(tlbr) <= max_area: 176 | detections.append((tlbr, label, conf)) 177 | tile_ids.append(tile_idx) 178 | return detections, tile_ids 179 | 180 | @staticmethod 181 | @nb.njit(fastmath=True, cache=True) 182 | def _merge(dets, tile_ids, num_tile, thresh): 183 | # find duplicate neighbors across tiles 184 | neighbors = [[0 for _ in range(0)] for _ in range(len(dets))] 185 | for i, det in enumerate(dets): 186 | max_ioms = np.zeros(num_tile) 187 | for j, other in enumerate(dets): 188 | if tile_ids[i] != tile_ids[j] and det.label == other.label: 189 | overlap = iom(det.tlbr, other.tlbr) 190 | # use the detection with the greatest IoM from each tile 191 | if overlap >= thresh and overlap > max_ioms[tile_ids[j]]: 192 | max_ioms[tile_ids[j]] = overlap 193 | neighbors[i].append(j) 194 | 195 | # merge neighbors using depth-first search 196 | keep = set(range(len(dets))) 197 | stack = [] 198 | for i in range(len(dets)): 199 | if len(neighbors[i]) > 0 and tile_ids[i] != -1: 200 | tile_ids[i] = -1 201 | stack.append(i) 202 | candidates = [] 203 | while len(stack) > 0: 204 | for j in neighbors[stack.pop()]: 205 | if tile_ids[j] != -1: 206 | candidates.append(j) 207 | tile_ids[j] = -1 208 | stack.append(j) 209 | for k in candidates: 210 | dets[i].tlbr[:] = enclosing(dets[i].tlbr, dets[k].tlbr) 211 | dets[i].conf = max(dets[i].conf, dets[k].conf) 212 | keep.discard(k) 213 | dets = dets[np.array(list(keep))] 214 | 215 | # sort detections by class 216 | dets = dets[np.argsort(dets.label)] 217 | return dets 218 | 219 | 220 | class YOLODetector(Detector): 221 | def __init__(self, size, 222 | class_ids, 223 | model='YOLOv4', 224 | conf_thresh=0.25, 225 | nms_thresh=0.5, 226 | max_area=800000, 227 | min_aspect_ratio=1.2): 228 | """An object detector for YOLO models. 229 | 230 | Parameters 231 | ---------- 232 | size : tuple 233 | Width and height of each frame. 234 | class_ids : sequence 235 | Class IDs to detect. Note class ID starts at zero. 236 | model : str, optional 237 | YOLO model to use. 238 | Must be the name of a class that inherits `models.YOLO`. 239 | conf_thresh : float, optional 240 | Detection confidence threshold. 241 | nms_thresh : float, optional 242 | Nonmaximum suppression overlap threshold. 243 | Set higher to detect crowded objects. 244 | max_area : int, optional 245 | Max area of bounding boxes to detect. 246 | min_aspect_ratio : float, optional 247 | Min aspect ratio (height over width) of bounding boxes to detect. 248 | Set to 0.1 for square shaped objects. 249 | """ 250 | super().__init__(size) 251 | self.model = models.YOLO.get_model(model) 252 | assert 0 <= conf_thresh <= 1 253 | self.conf_thresh = conf_thresh 254 | assert 0 <= nms_thresh <= 1 255 | self.nms_thresh = nms_thresh 256 | assert max_area >= 0 257 | self.max_area = max_area 258 | assert min_aspect_ratio >= 0 259 | self.min_aspect_ratio = min_aspect_ratio 260 | 261 | self.label_mask = np.zeros(self.model.NUM_CLASSES, dtype=np.bool_) 262 | try: 263 | self.label_mask[tuple(class_ids),] = True 264 | except IndexError as err: 265 | raise ValueError('Unsupported class IDs') from err 266 | 267 | self.backend = TRTInference(self.model, 1) 268 | self.inp_handle, self.upscaled_sz, self.bbox_offset = self._create_letterbox() 269 | 270 | def detect_async(self, frame): 271 | """Detects objects asynchronously.""" 272 | self._preprocess(frame) 273 | self.backend.infer_async(from_device=True) 274 | 275 | def postprocess(self): 276 | """Synchronizes, applies postprocessing, and returns a record array 277 | of detections (DET_DTYPE). 278 | This API should be called after `detect_async`. 279 | Detections are sorted in ascending order by class ID. 280 | """ 281 | det_out = self.backend.synchronize() 282 | det_out = np.concatenate(det_out).reshape(-1, 7) 283 | detections = self._filter_dets(det_out, self.upscaled_sz, self.bbox_offset, 284 | self.label_mask, self.conf_thresh, self.nms_thresh, 285 | self.max_area, self.min_aspect_ratio) 286 | detections = np.fromiter(detections, DET_DTYPE, len(detections)).view(np.recarray) 287 | return detections 288 | 289 | def _preprocess(self, frame): 290 | zoom = np.roll(self.inp_handle.shape, -1) / frame.shape 291 | with self.backend.stream: 292 | frame_dev = cp.asarray(frame) 293 | # resize 294 | small_dev = cupyx.scipy.ndimage.zoom(frame_dev, zoom, order=1, mode='opencv', grid_mode=True) 295 | # BGR to RGB 296 | rgb_dev = small_dev[..., ::-1] 297 | # HWC -> CHW 298 | chw_dev = rgb_dev.transpose(2, 0, 1) 299 | # normalize to [0, 1] interval 300 | cp.multiply(chw_dev, 1 / 255., out=self.inp_handle) 301 | 302 | def _create_letterbox(self): 303 | src_size = np.array(self.size) 304 | dst_size = np.array(self.model.INPUT_SHAPE[:0:-1]) 305 | if self.model.LETTERBOX: 306 | scale_factor = min(dst_size / src_size) 307 | scaled_size = np.rint(src_size * scale_factor).astype(int) 308 | img_offset = (dst_size - scaled_size) / 2 309 | roi = np.s_[:, img_offset[1]:img_offset[1] + scaled_size[1], 310 | img_offset[0]:img_offset[0] + scaled_size[0]] 311 | upscaled_sz = np.rint(dst_size / scale_factor).astype(int) 312 | bbox_offset = (upscaled_sz - src_size) / 2 313 | else: 314 | roi = np.s_[:] 315 | upscaled_sz = src_size 316 | bbox_offset = np.zeros(2) 317 | inp_reshaped = self.backend.input.device.reshape(self.model.INPUT_SHAPE) 318 | inp_reshaped[:] = 0.5 # initial value for letterbox 319 | inp_handle = inp_reshaped[roi] 320 | return inp_handle, upscaled_sz, bbox_offset 321 | 322 | @staticmethod 323 | @nb.njit(fastmath=True, cache=True) 324 | def _filter_dets(det_out, size, offset, label_mask, conf_thresh, nms_thresh, max_area, min_ar): 325 | """ 326 | det_out: a list of 3 tensors, where each tensor 327 | contains a multiple of 7 float32 numbers in 328 | the order of [x, y, w, h, box_confidence, class_id, class_prob] 329 | """ 330 | # filter by class and score 331 | keep = [] 332 | for i in range(len(det_out)): 333 | if label_mask[int(det_out[i, 5])]: 334 | score = det_out[i, 4] * det_out[i, 6] 335 | if score >= conf_thresh: 336 | keep.append(i) 337 | det_out = det_out[np.array(keep)] 338 | 339 | # scale to pixel values 340 | det_out[:, :4] *= np.append(size, size) 341 | det_out[:, :2] -= offset 342 | 343 | # per-class NMS 344 | det_out = det_out[np.argsort(det_out[:, 5])] 345 | split_indices = find_split_indices(det_out[:, 5]) 346 | all_indices = np.arange(len(det_out)) 347 | 348 | keep = [] 349 | for i in range(len(split_indices) + 1): 350 | begin = 0 if i == 0 else split_indices[i - 1] 351 | end = len(det_out) if i == len(split_indices) else split_indices[i] 352 | cls_dets = det_out[begin:end] 353 | cls_keep = diou_nms(cls_dets[:, :4], cls_dets[:, 4], nms_thresh) 354 | keep.extend(all_indices[begin:end][cls_keep]) 355 | nms_dets = det_out[np.array(keep)] 356 | 357 | # create detections 358 | detections = [] 359 | for i in range(len(nms_dets)): 360 | tlbr = to_tlbr(nms_dets[i, :4]) 361 | label = int(nms_dets[i, 5]) 362 | conf = nms_dets[i, 4] * nms_dets[i, 6] 363 | if 0 < area(tlbr) <= max_area and aspect_ratio(tlbr) >= min_ar: 364 | detections.append((tlbr, label, conf)) 365 | return detections 366 | 367 | 368 | class PublicDetector(Detector): 369 | def __init__(self, size, 370 | class_ids, 371 | frame_skip, 372 | sequence_path=None, 373 | conf_thresh=0.5, 374 | max_area=800000): 375 | """Class to use MOT Challenge's public detections. 376 | 377 | Parameters 378 | ---------- 379 | size : tuple 380 | Width and height of each frame. 381 | class_ids : sequence 382 | Class IDs to detect. Only 1 (i.e. person) is supported. 383 | frame_skip : int 384 | Detector frame skip. 385 | sequence_path : str, optional 386 | Relative path to MOT Challenge's sequence directory. 387 | conf_thresh : float, optional 388 | Detection confidence threshold. 389 | max_area : int, optional 390 | Max area of bounding boxes to detect. 391 | """ 392 | super().__init__(size) 393 | assert tuple(class_ids) == (1,) 394 | self.frame_skip = frame_skip 395 | assert sequence_path is not None 396 | self.seq_root = Path(__file__).parents[1] / sequence_path 397 | assert 0 <= conf_thresh <= 1 398 | self.conf_thresh = conf_thresh 399 | assert max_area >= 0 400 | self.max_area = max_area 401 | 402 | assert self.seq_root.exists() 403 | seqinfo = configparser.ConfigParser() 404 | seqinfo.read(self.seq_root / 'seqinfo.ini') 405 | self.seq_size = (int(seqinfo['Sequence']['imWidth']), int(seqinfo['Sequence']['imHeight'])) 406 | 407 | self.detections = defaultdict(list) 408 | self.frame_id = 0 409 | 410 | det_txt = self.seq_root / 'det' / 'det.txt' 411 | for mot_challenge_det in np.loadtxt(det_txt, delimiter=','): 412 | frame_id = int(mot_challenge_det[0]) - 1 413 | tlbr = to_tlbr(mot_challenge_det[2:6]) 414 | # mot_challenge_det[6] 415 | conf = 1.0 416 | # mot_challenge_det[7] 417 | label = 1 # person 418 | # scale inside frame 419 | tlbr[:2] = tlbr[:2] / self.seq_size * self.size 420 | tlbr[2:] = tlbr[2:] / self.seq_size * self.size 421 | tlbr = np.rint(tlbr) 422 | if conf >= self.conf_thresh and area(tlbr) <= self.max_area: 423 | self.detections[frame_id].append((tlbr, label, conf)) 424 | 425 | def detect_async(self, frame): 426 | pass 427 | 428 | def postprocess(self): 429 | detections = np.array(self.detections[self.frame_id], DET_DTYPE).view(np.recarray) 430 | self.frame_id += self.frame_skip 431 | return detections 432 | -------------------------------------------------------------------------------- /fastmot/feature_extractor.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.pool import ThreadPool 2 | import numpy as np 3 | import numba as nb 4 | import cv2 5 | 6 | from . import models 7 | from .utils import TRTInference 8 | from .utils.rect import multi_crop 9 | 10 | 11 | class FeatureExtractor: 12 | def __init__(self, model='OSNet025', batch_size=16): 13 | """A feature extractor for ReID embeddings. 14 | 15 | Parameters 16 | ---------- 17 | model : str, optional 18 | ReID model to use. 19 | Must be the name of a class that inherits `models.ReID`. 20 | batch_size : int, optional 21 | Batch size for inference. 22 | """ 23 | self.model = models.ReID.get_model(model) 24 | assert batch_size >= 1 25 | self.batch_size = batch_size 26 | 27 | self.feature_dim = self.model.OUTPUT_LAYOUT 28 | self.backend = TRTInference(self.model, self.batch_size) 29 | self.inp_handle = self.backend.input.host.reshape(self.batch_size, *self.model.INPUT_SHAPE) 30 | self.pool = ThreadPool() 31 | 32 | self.embeddings = [] 33 | self.last_num_features = 0 34 | 35 | def __del__(self): 36 | self.pool.close() 37 | self.pool.join() 38 | 39 | def __call__(self, frame, tlbrs): 40 | """Extract feature embeddings from bounding boxes synchronously.""" 41 | self.extract_async(frame, tlbrs) 42 | return self.postprocess() 43 | 44 | @property 45 | def metric(self): 46 | return self.model.METRIC 47 | 48 | def extract_async(self, frame, tlbrs): 49 | """Extract feature embeddings from bounding boxes asynchronously.""" 50 | imgs = multi_crop(frame, tlbrs) 51 | self.embeddings, cur_imgs = [], [] 52 | # pipeline inference and preprocessing the next batch in parallel 53 | for offset in range(0, len(imgs), self.batch_size): 54 | cur_imgs = imgs[offset:offset + self.batch_size] 55 | self.pool.starmap(self._preprocess, enumerate(cur_imgs)) 56 | if offset > 0: 57 | embedding_out = self.backend.synchronize()[0] 58 | self.embeddings.append(embedding_out) 59 | self.backend.infer_async() 60 | self.last_num_features = len(cur_imgs) 61 | 62 | def postprocess(self): 63 | """Synchronizes, applies postprocessing, and returns a NxM matrix of N 64 | extracted embeddings with dimension M. 65 | This API should be called after `extract_async`. 66 | """ 67 | if self.last_num_features == 0: 68 | return np.empty((0, self.feature_dim)) 69 | 70 | embedding_out = self.backend.synchronize()[0][:self.last_num_features * self.feature_dim] 71 | self.embeddings.append(embedding_out) 72 | embeddings = np.concatenate(self.embeddings).reshape(-1, self.feature_dim) 73 | embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True) 74 | return embeddings 75 | 76 | def null_embeddings(self, detections): 77 | """Returns a NxM matrix of N identical embeddings with dimension M. 78 | This API effectively disables feature extraction. 79 | """ 80 | embeddings = np.ones((len(detections), self.feature_dim)) 81 | embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True) 82 | return embeddings 83 | 84 | def _preprocess(self, idx, img): 85 | img = cv2.resize(img, self.model.INPUT_SHAPE[:0:-1]) 86 | self._normalize(img, self.inp_handle[idx]) 87 | 88 | @staticmethod 89 | @nb.njit(fastmath=True, nogil=True, cache=True) 90 | def _normalize(img, out): 91 | # BGR to RGB 92 | rgb = img[..., ::-1] 93 | # HWC -> CHW 94 | chw = rgb.transpose(2, 0, 1) 95 | # Normalize using ImageNet's mean and std 96 | out[0, ...] = (chw[0, ...] / 255. - 0.485) / 0.229 97 | out[1, ...] = (chw[1, ...] / 255. - 0.456) / 0.224 98 | out[2, ...] = (chw[2, ...] / 255. - 0.406) / 0.225 99 | -------------------------------------------------------------------------------- /fastmot/flow.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import itertools 3 | import numpy as np 4 | import numba as nb 5 | import cupyx 6 | import cv2 7 | 8 | from .utils.rect import to_tlbr, get_size, get_center 9 | from .utils.rect import intersection, crop 10 | from .utils.numba import mask_area, transform 11 | 12 | 13 | LOGGER = logging.getLogger(__name__) 14 | 15 | 16 | class Flow: 17 | def __init__(self, size, 18 | bg_feat_scale_factor=(0.1, 0.1), 19 | opt_flow_scale_factor=(0.5, 0.5), 20 | feat_density=0.005, 21 | feat_dist_factor=0.06, 22 | ransac_max_iter=500, 23 | ransac_conf=0.99, 24 | max_error=100, 25 | inlier_thresh=4, 26 | bg_feat_thresh=10, 27 | obj_feat_params=None, 28 | opt_flow_params=None): 29 | """A KLT tracker based on optical flow feature point matching. 30 | Camera motion is simultaneously estimated by tracking feature points 31 | on the background. 32 | 33 | Parameters 34 | ---------- 35 | size : tuple 36 | Width and height of each frame. 37 | bg_feat_scale_factor : tuple, optional 38 | Width and height scale factors to resize frame for background feature detection. 39 | opt_flow_scale_factor : tuple, optional 40 | Width and height scale factors to resize frame for optical flow. 41 | feat_density : float, optional 42 | Min feature point density to keep inside the bounding box. 43 | feat_dist_factor : float, optional 44 | Target size scale factor to estimate min feature point distance. 45 | ransac_max_iter : int, optional 46 | Max RANSAC iterations to filter matched outliers. 47 | ransac_conf : float, optional 48 | RANSAC confidence threshold to filter matched outliers. 49 | max_error : int, optional 50 | Max optical flow error. 51 | inlier_thresh : int, optional 52 | Min number of inliers for valid matching. 53 | bg_feat_thresh : int, optional 54 | FAST threshold for background feature detection. 55 | obj_feat_params : SimpleNamespace, optional 56 | GFTT parameters for object feature detection, see `cv2.goodFeaturesToTrack`. 57 | opt_flow_params : SimpleNamespace, optional 58 | Optical flow parameters, see `cv2.calcOpticalFlowPyrLK`. 59 | """ 60 | self.size = size 61 | assert 0 < bg_feat_scale_factor[0] <= 1 and 0 < bg_feat_scale_factor[1] <= 1 62 | self.bg_feat_scale_factor = bg_feat_scale_factor 63 | assert 0 < opt_flow_scale_factor[0] <= 1 and 0 < opt_flow_scale_factor[1] <= 1 64 | self.opt_flow_scale_factor = opt_flow_scale_factor 65 | assert 0 <= feat_density <= 1 66 | self.feat_density = feat_density 67 | assert feat_dist_factor >= 0 68 | self.feat_dist_factor = feat_dist_factor 69 | assert ransac_max_iter >= 0 70 | self.ransac_max_iter = ransac_max_iter 71 | assert 0 <= ransac_conf <= 1 72 | self.ransac_conf = ransac_conf 73 | assert 0 <= max_error <= 255 74 | self.max_error = max_error 75 | assert inlier_thresh >= 1 76 | self.inlier_thresh = inlier_thresh 77 | assert bg_feat_thresh >= 0 78 | self.bg_feat_thresh = bg_feat_thresh 79 | 80 | self.obj_feat_params = { 81 | "maxCorners": 1000, 82 | "qualityLevel": 0.06, 83 | "blockSize": 3 84 | } 85 | self.opt_flow_params = { 86 | "winSize": (5, 5), 87 | "maxLevel": 5, 88 | "criteria": (3, 10, 0.03) 89 | } 90 | if obj_feat_params is not None: 91 | self.obj_feat_params.update(vars(obj_feat_params)) 92 | if opt_flow_params is None: 93 | self.opt_flow_params.update(vars(opt_flow_params)) 94 | 95 | self.bg_feat_detector = cv2.FastFeatureDetector_create(threshold=self.bg_feat_thresh) 96 | 97 | # background feature points for visualization 98 | self.bg_keypoints = None 99 | self.prev_bg_keypoints = None 100 | 101 | # preallocate frame buffers 102 | opt_flow_sz = ( 103 | round(self.opt_flow_scale_factor[0] * self.size[0]), 104 | round(self.opt_flow_scale_factor[1] * self.size[1]) 105 | ) 106 | self.frame_gray = cupyx.empty_pinned(self.size[::-1], np.uint8) 107 | self.frame_small = cupyx.empty_pinned(opt_flow_sz[::-1], np.uint8) 108 | self.prev_frame_gray = cupyx.empty_like_pinned(self.frame_gray) 109 | self.prev_frame_small = cupyx.empty_like_pinned(self.frame_small) 110 | 111 | bg_feat_sz = ( 112 | round(self.bg_feat_scale_factor[0] * self.size[0]), 113 | round(self.bg_feat_scale_factor[1] * self.size[1]) 114 | ) 115 | self.prev_frame_bg = cupyx.empty_pinned(bg_feat_sz[::-1], np.uint8) 116 | self.bg_mask_small = cupyx.empty_like_pinned(self.prev_frame_bg) 117 | 118 | self.fg_mask = cupyx.empty_like_pinned(self.frame_gray) 119 | self.frame_rect = to_tlbr((0, 0, *self.size)) 120 | 121 | def init(self, frame): 122 | """Preprocesses the first frame to prepare for subsequent `predict`. 123 | 124 | Parameters 125 | ---------- 126 | frame : ndarray 127 | Initial frame. 128 | """ 129 | cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY, dst=self.prev_frame_gray) 130 | cv2.resize(self.prev_frame_gray, self.prev_frame_small.shape[::-1], 131 | dst=self.prev_frame_small) 132 | self.bg_keypoints = np.empty((0, 2), np.float32) 133 | self.prev_bg_keypoints = np.empty((0, 2), np.float32) 134 | 135 | def predict(self, frame, tracks): 136 | """Predicts tracklet positions in the next frame and estimates camera motion. 137 | 138 | Parameters 139 | ---------- 140 | frame : ndarray 141 | The next frame. 142 | tracks : List[Track] 143 | List of tracks to predict. 144 | Feature points of each track are updated in place. 145 | 146 | Returns 147 | ------- 148 | Dict[int, ndarray], ndarray 149 | Returns a dictionary with track IDs as keys and predicted bounding 150 | boxes of [x1, x2, y1, y2] as values, and a 3x3 homography matrix. 151 | """ 152 | # preprocess frame 153 | cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY, dst=self.frame_gray) 154 | cv2.resize(self.frame_gray, self.frame_small.shape[::-1], dst=self.frame_small) 155 | 156 | # order tracks from closest to farthest 157 | tracks.sort(reverse=True) 158 | 159 | # detect target feature points 160 | all_prev_pts = [] 161 | self.fg_mask[:] = 255 162 | for track in tracks: 163 | inside_tlbr = intersection(track.tlbr, self.frame_rect) 164 | target_mask = crop(self.fg_mask, inside_tlbr) 165 | target_area = mask_area(target_mask) 166 | keypoints = self._rect_filter(track.keypoints, inside_tlbr, self.fg_mask) 167 | # only detect new keypoints when too few are propagated 168 | if len(keypoints) < self.feat_density * target_area: 169 | img = crop(self.prev_frame_gray, inside_tlbr) 170 | feature_dist = self._estimate_feature_dist(target_area, self.feat_dist_factor) 171 | keypoints = cv2.goodFeaturesToTrack(img, mask=target_mask, 172 | minDistance=feature_dist, 173 | **self.obj_feat_params) 174 | if keypoints is None: 175 | keypoints = np.empty((0, 2), np.float32) 176 | else: 177 | keypoints = self._ellipse_filter(keypoints, track.tlbr, inside_tlbr[:2]) 178 | # batch keypoints 179 | all_prev_pts.append(keypoints) 180 | # zero out target in foreground mask 181 | target_mask[:] = 0 182 | target_ends = list(itertools.accumulate(len(pts) for pts in 183 | all_prev_pts)) if all_prev_pts else [0] 184 | target_begins = itertools.chain([0], target_ends[:-1]) 185 | 186 | # detect background feature points 187 | cv2.resize(self.prev_frame_gray, self.prev_frame_bg.shape[::-1], dst=self.prev_frame_bg) 188 | cv2.resize(self.fg_mask, self.bg_mask_small.shape[::-1], dst=self.bg_mask_small, 189 | interpolation=cv2.INTER_NEAREST) 190 | keypoints = self.bg_feat_detector.detect(self.prev_frame_bg, mask=self.bg_mask_small) 191 | if len(keypoints) == 0: 192 | self.bg_keypoints = np.empty((0, 2), np.float32) 193 | self.prev_frame_gray, self.frame_gray = self.frame_gray, self.prev_frame_gray 194 | self.prev_frame_small, self.frame_small = self.frame_small, self.prev_frame_small 195 | LOGGER.warning('Camera motion estimation failed') 196 | return {}, None 197 | keypoints = np.float32([kp.pt for kp in keypoints]) 198 | keypoints = self._unscale_pts(keypoints, self.bg_feat_scale_factor) 199 | bg_begin = target_ends[-1] 200 | all_prev_pts.append(keypoints) 201 | 202 | # match features using optical flow 203 | all_prev_pts = np.concatenate(all_prev_pts) 204 | scaled_prev_pts = self._scale_pts(all_prev_pts, self.opt_flow_scale_factor) 205 | all_cur_pts, status, err = cv2.calcOpticalFlowPyrLK(self.prev_frame_small, self.frame_small, 206 | scaled_prev_pts, None, 207 | **self.opt_flow_params) 208 | status = self._get_status(status, err, self.max_error) 209 | all_cur_pts = self._unscale_pts(all_cur_pts, self.opt_flow_scale_factor, status) 210 | 211 | # save preprocessed frame buffers for next prediction 212 | self.prev_frame_gray, self.frame_gray = self.frame_gray, self.prev_frame_gray 213 | self.prev_frame_small, self.frame_small = self.frame_small, self.prev_frame_small 214 | 215 | # estimate camera motion 216 | homography = None 217 | prev_bg_pts, matched_bg_pts = self._get_good_match(all_prev_pts, all_cur_pts, 218 | status, bg_begin, -1) 219 | if len(matched_bg_pts) < 4: 220 | self.bg_keypoints = np.empty((0, 2), np.float32) 221 | LOGGER.warning('Camera motion estimation failed') 222 | return {}, None 223 | homography, inlier_mask = cv2.findHomography(prev_bg_pts, matched_bg_pts, 224 | method=cv2.RANSAC, 225 | maxIters=self.ransac_max_iter, 226 | confidence=self.ransac_conf) 227 | self.prev_bg_keypoints, self.bg_keypoints = self._get_inliers(prev_bg_pts, matched_bg_pts, 228 | inlier_mask) 229 | if homography is None or len(self.bg_keypoints) < self.inlier_thresh: 230 | self.bg_keypoints = np.empty((0, 2), np.float32) 231 | LOGGER.warning('Camera motion estimation failed') 232 | return {}, None 233 | 234 | # estimate target bounding boxes 235 | next_bboxes = {} 236 | self.fg_mask[:] = 255 237 | for begin, end, track in zip(target_begins, target_ends, tracks): 238 | prev_pts, matched_pts = self._get_good_match(all_prev_pts, all_cur_pts, 239 | status, begin, end) 240 | prev_pts, matched_pts = self._fg_filter(prev_pts, matched_pts, self.fg_mask, self.size) 241 | if len(matched_pts) < 3: 242 | track.keypoints = np.empty((0, 2), np.float32) 243 | continue 244 | # model motion as partial affine 245 | affine_mat, inlier_mask = cv2.estimateAffinePartial2D(prev_pts, matched_pts, 246 | method=cv2.RANSAC, 247 | maxIters=self.ransac_max_iter, 248 | confidence=self.ransac_conf) 249 | if affine_mat is None: 250 | track.keypoints = np.empty((0, 2), np.float32) 251 | continue 252 | est_tlbr = self._estimate_bbox(track.tlbr, affine_mat) 253 | track.prev_keypoints, track.keypoints = self._get_inliers(prev_pts, matched_pts, 254 | inlier_mask) 255 | if (intersection(est_tlbr, self.frame_rect) is None or 256 | len(track.keypoints) < self.inlier_thresh): 257 | track.keypoints = np.empty((0, 2), np.float32) 258 | continue 259 | next_bboxes[track.trk_id] = est_tlbr 260 | track.inlier_ratio = len(track.keypoints) / len(matched_pts) 261 | # zero out predicted target in foreground mask 262 | target_mask = crop(self.fg_mask, est_tlbr) 263 | target_mask[:] = 0 264 | return next_bboxes, homography 265 | 266 | @staticmethod 267 | @nb.njit(fastmath=True, cache=True) 268 | def _estimate_feature_dist(target_area, feat_dist_factor): 269 | est_feat_dist = round(np.sqrt(target_area) * feat_dist_factor) 270 | return max(est_feat_dist, 1) 271 | 272 | @staticmethod 273 | @nb.njit(fastmath=True, cache=True) 274 | def _estimate_bbox(tlbr, affine_mat): 275 | tl = transform(tlbr[:2], affine_mat).ravel() 276 | scale = np.linalg.norm(affine_mat[:2, 0]) 277 | scale = 1. if scale < 0.9 or scale > 1.1 else scale 278 | w, h = get_size(tlbr) 279 | return to_tlbr((tl[0], tl[1], w * scale, h * scale)) 280 | 281 | @staticmethod 282 | @nb.njit(fastmath=True, cache=True) 283 | def _rect_filter(pts, tlbr, fg_mask): 284 | if len(pts) == 0: 285 | return np.empty((0, 2), np.float32) 286 | pts2i = np.rint(pts).astype(np.int32) 287 | # filter out points outside the rectangle 288 | ge_le = (pts2i >= tlbr[:2]) & (pts2i <= tlbr[2:]) 289 | inside = np.where(ge_le[:, 0] & ge_le[:, 1]) 290 | pts, pts2i = pts[inside], pts2i[inside] 291 | # keep points inside the foreground area 292 | keep = np.array([i for i in range(len(pts2i)) if 293 | fg_mask[pts2i[i][1], pts2i[i][0]] == 255]) 294 | return pts[keep] 295 | 296 | @staticmethod 297 | @nb.njit(fastmath=True, cache=True) 298 | def _ellipse_filter(pts, tlbr, offset): 299 | offset = np.asarray(offset, np.float32) 300 | center = np.array(get_center(tlbr)) 301 | semi_axes = np.array(get_size(tlbr)) * 0.5 302 | pts = pts.reshape(-1, 2) 303 | pts = pts + offset 304 | # filter out points outside the ellipse 305 | keep = np.sum(((pts - center) / semi_axes)**2, axis=1) <= 1. 306 | return pts[keep] 307 | 308 | @staticmethod 309 | @nb.njit(fastmath=True, cache=True) 310 | def _fg_filter(prev_pts, cur_pts, fg_mask, frame_sz): 311 | if len(cur_pts) == 0: 312 | return prev_pts, cur_pts 313 | size = np.array(frame_sz) 314 | pts2i = np.rint(cur_pts).astype(np.int32) 315 | # filter out points outside the frame 316 | ge_lt = (pts2i >= 0) & (pts2i < size) 317 | inside = ge_lt[:, 0] & ge_lt[:, 1] 318 | prev_pts, cur_pts = prev_pts[inside], cur_pts[inside] 319 | pts2i = pts2i[inside] 320 | # keep points inside the foreground area 321 | keep = np.array([i for i in range(len(pts2i)) if 322 | fg_mask[pts2i[i][1], pts2i[i][0]] == 255]) 323 | return prev_pts[keep], cur_pts[keep] 324 | 325 | @staticmethod 326 | @nb.njit(fastmath=True, cache=True) 327 | def _scale_pts(pts, scale_factor): 328 | scale_factor = np.array(scale_factor, np.float32) 329 | pts = pts * scale_factor 330 | pts = pts.reshape(-1, 1, 2) 331 | return pts 332 | 333 | @staticmethod 334 | @nb.njit(fastmath=True, cache=True) 335 | def _unscale_pts(pts, scale_factor, mask=None): 336 | scale_factor = np.array(scale_factor, np.float32) 337 | unscale_factor = 1 / scale_factor 338 | pts = pts.reshape(-1, 2) 339 | if mask is None: 340 | pts = pts * unscale_factor 341 | else: 342 | idx = np.where(mask) 343 | pts[idx] = pts[idx] * unscale_factor 344 | return pts 345 | 346 | @staticmethod 347 | @nb.njit(fastmath=True, cache=True) 348 | def _get_status(status, err, max_err): 349 | return status.ravel().astype(np.bool_) & (err.ravel() < max_err) 350 | 351 | @staticmethod 352 | @nb.njit(fastmath=True, cache=True) 353 | def _get_good_match(prev_pts, cur_pts, status, begin, end): 354 | keep = np.where(status[begin:end]) 355 | prev_pts = prev_pts[begin:end][keep] 356 | cur_pts = cur_pts[begin:end][keep] 357 | return prev_pts, cur_pts 358 | 359 | @staticmethod 360 | @nb.njit(fastmath=True, cache=True) 361 | def _get_inliers(prev_pts, cur_pts, inlier_mask): 362 | keep = np.where(inlier_mask.ravel()) 363 | return prev_pts[keep], cur_pts[keep] 364 | -------------------------------------------------------------------------------- /fastmot/kalman_filter.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import numpy as np 3 | import numba as nb 4 | 5 | from .utils.rect import get_size 6 | 7 | 8 | class MeasType(Enum): 9 | FLOW = 0 10 | DETECTOR = 1 11 | 12 | 13 | class KalmanFilter: 14 | def __init__(self, 15 | std_factor_acc=2.25, 16 | std_offset_acc=78.5, 17 | std_factor_det=(0.08, 0.08), 18 | std_factor_klt=(0.14, 0.14), 19 | min_std_det=(4.0, 4.0), 20 | min_std_klt=(5.0, 5.0), 21 | init_pos_weight=5, 22 | init_vel_weight=12, 23 | vel_coupling=0.6, 24 | vel_half_life=2): 25 | """A simple Kalman filter for tracking bounding boxes in image space. 26 | The 8-dimensional state space 27 | x1, y1, x2, y2, v_x1, v_y1, v_x2, v_y2 28 | contains the bounding box top left corner, bottom right corner, 29 | and their respective velocities. 30 | Object motion follows a modified constant velocity model. 31 | Velocity will decay over time without measurement and bounding box 32 | corners are coupled together to minimize drifting. 33 | 34 | Parameters 35 | ---------- 36 | std_factor_acc : float, optional 37 | Object size scale factor to calculate acceleration standard deviation 38 | for process noise. 39 | std_offset_acc : float, optional 40 | Object size offset to calculate acceleration standard deviation 41 | for process noise. Set larger for fast moving objects. 42 | std_factor_det : tuple, optional 43 | Object width and height scale factors to calculate detector measurement 44 | noise standard deviation. 45 | std_factor_klt : tuple, optional 46 | Object wdith and height scale factors to calculate KLT measurement 47 | noise standard deviation. 48 | min_std_det : tuple, optional 49 | Min detector measurement noise standard deviations. 50 | min_std_klt : tuple, optional 51 | Min KLT measurement noise standard deviations. 52 | init_pos_weight : int, optional 53 | Scale factor to initialize position state standard deviation. 54 | init_vel_weight : int, optional 55 | Scale factor to initialize velocity state standard deviation. 56 | Set larger for fast moving objects. 57 | vel_coupling : float, optional 58 | Factor to couple bounding box corners. 59 | Set 0.5 for max coupling and 1.0 to disable coupling. 60 | vel_half_life : int, optional 61 | Half life in seconds to decay velocity state. 62 | """ 63 | assert std_factor_acc >= 0 64 | self.std_factor_acc = std_factor_acc 65 | self.std_offset_acc = std_offset_acc 66 | assert std_factor_det[0] >= 0 and std_factor_det[1] >= 0 67 | self.std_factor_det = std_factor_det 68 | assert std_factor_klt[0] >= 0 and std_factor_klt[1] >= 0 69 | self.std_factor_klt = std_factor_klt 70 | assert min_std_det[0] >= 0 and min_std_det[1] >= 0 71 | self.min_std_det = min_std_det 72 | assert min_std_klt[0] >= 0 and min_std_klt[1] >= 0 73 | self.min_std_klt = min_std_klt 74 | assert init_pos_weight >= 0 75 | self.init_pos_weight = init_pos_weight 76 | assert init_vel_weight >= 0 77 | self.init_vel_weight = init_vel_weight 78 | assert 0 <= vel_coupling <= 1 79 | self.vel_coupling = vel_coupling 80 | assert vel_half_life > 0 81 | self.vel_half_life = vel_half_life 82 | 83 | dt = 1 / 30. 84 | self.acc_cov, self.meas_mat, self.trans_mat = self._init_mat(dt) 85 | 86 | def reset_dt(self, dt): 87 | """Resets process noise, measurement and transition matrices from dt. 88 | 89 | Parameters 90 | ---------- 91 | dt : float 92 | Time interval in seconds between each frame. 93 | """ 94 | self.acc_cov, self.meas_mat, self.trans_mat = self._init_mat(dt) 95 | 96 | def create(self, det_meas): 97 | """Creates Kalman filter state from unassociated measurement. 98 | 99 | Parameters 100 | ---------- 101 | det_meas : ndarray 102 | Detected bounding box of [x1, x2, y1, y2]. 103 | 104 | Returns 105 | ------- 106 | ndarray, ndarray 107 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 108 | dimensional) of the new track. 109 | """ 110 | mean_pos = det_meas 111 | mean_vel = np.zeros_like(mean_pos) 112 | mean = np.r_[mean_pos, mean_vel] 113 | 114 | w, h = get_size(det_meas) 115 | std = np.array([ 116 | max(self.init_pos_weight * self.std_factor_det[0] * w, self.min_std_det[0]), 117 | max(self.init_pos_weight * self.std_factor_det[1] * h, self.min_std_det[1]), 118 | max(self.init_pos_weight * self.std_factor_det[0] * w, self.min_std_det[0]), 119 | max(self.init_pos_weight * self.std_factor_det[1] * h, self.min_std_det[1]), 120 | max(self.init_vel_weight * self.std_factor_det[0] * w, self.min_std_det[0]), 121 | max(self.init_vel_weight * self.std_factor_det[1] * h, self.min_std_det[1]), 122 | max(self.init_vel_weight * self.std_factor_det[0] * w, self.min_std_det[0]), 123 | max(self.init_vel_weight * self.std_factor_det[1] * h, self.min_std_det[1]) 124 | ], dtype=np.float64) 125 | covariance = np.diag(np.square(std)) 126 | return mean, covariance 127 | 128 | def predict(self, mean, covariance): 129 | """Runs Kalman filter prediction step. 130 | 131 | Parameters 132 | ---------- 133 | mean : ndarray 134 | The 8 dimensional mean vector of the object state at the previous 135 | time step. 136 | covariance : ndarray 137 | The 8x8 dimensional covariance matrix of the object state at the 138 | previous time step. 139 | 140 | Returns 141 | ------- 142 | ndarray, ndarray 143 | Returns the mean vector and covariance matrix of the predicted 144 | state. 145 | """ 146 | return self._predict(mean, covariance, self.trans_mat, self.acc_cov, 147 | self.std_factor_acc, self.std_offset_acc) 148 | 149 | def project(self, mean, covariance, meas_type, multiplier=1.): 150 | """Projects state distribution to measurement space. 151 | 152 | Parameters 153 | ---------- 154 | mean : ndarray 155 | The state's mean vector (8 dimensional array). 156 | covariance : ndarray 157 | The state's covariance matrix (8x8 dimensional). 158 | meas_type : MeasType 159 | Measurement type indicating where the measurement comes from. 160 | multiplier : float 161 | Multiplier used to adjust the measurement std. 162 | 163 | Returns 164 | ------- 165 | ndarray, ndarray 166 | Returns the projected mean and covariance matrix of the given state 167 | estimate. 168 | """ 169 | if meas_type == MeasType.FLOW: 170 | std_factor = self.std_factor_klt 171 | min_std = self.min_std_klt 172 | elif meas_type == MeasType.DETECTOR: 173 | std_factor = self.std_factor_det 174 | min_std = self.min_std_det 175 | else: 176 | raise ValueError('Invalid measurement type') 177 | 178 | return self._project(mean, covariance, self.meas_mat, std_factor, min_std, multiplier) 179 | 180 | def update(self, mean, covariance, measurement, meas_type, multiplier=1.): 181 | """Runs Kalman filter correction step. 182 | 183 | Parameters 184 | ---------- 185 | mean : ndarray 186 | The predicted state's mean vector (8 dimensional). 187 | covariance : ndarray 188 | The state's covariance matrix (8x8 dimensional). 189 | measurement : ndarray 190 | Bounding box of [x1, x2, y1, y2]. 191 | meas_type : MeasType 192 | Measurement type indicating where the measurement comes from. 193 | multiplier : float 194 | Multiplier used to adjust the measurement std. 195 | 196 | Returns 197 | ------- 198 | ndarray, ndarray 199 | Returns the measurement-corrected state distribution. 200 | """ 201 | projected_mean, projected_cov = self.project(mean, covariance, meas_type, multiplier) 202 | 203 | return self._update(mean, covariance, projected_mean, 204 | projected_cov, measurement, self.meas_mat) 205 | 206 | def motion_distance(self, mean, covariance, measurements): 207 | """Computes mahalanobis distance between `measurements` and state distribution. 208 | 209 | Parameters 210 | ---------- 211 | mean : ndarray 212 | The predicted state's mean vector (8 dimensional). 213 | covariance : ndarray 214 | The state's covariance matrix (8x8 dimensional). 215 | measurements : array_like 216 | An Nx4 matrix of N samples of [x1, x2, y1, y2]. 217 | 218 | Returns 219 | ------- 220 | ndarray 221 | Returns a array of size N such that element i 222 | contains the squared mahalanobis distance for `measurements[i]`. 223 | """ 224 | projected_mean, projected_cov = self.project(mean, covariance, MeasType.DETECTOR) 225 | return self._maha_distance(projected_mean, projected_cov, measurements) 226 | 227 | @staticmethod 228 | @nb.njit(fastmath=True, cache=True) 229 | def warp(mean, covariance, H): 230 | """Warps kalman filter state using a homography transformation. 231 | https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=1301&context=studentpub 232 | 233 | Parameters 234 | ---------- 235 | mean : ndarray 236 | The predicted state's mean vector (8 dimensional). 237 | covariance : ndarray 238 | The state's covariance matrix (8x8 dimensional). 239 | H : ndarray 240 | A 3x3 homography matrix. 241 | 242 | Returns 243 | ------- 244 | ndarray, ndarray 245 | Returns the mean vector and covariance matrix of the transformed 246 | state. 247 | """ 248 | H1 = np.ascontiguousarray(H[:2, :2]) 249 | h2 = np.ascontiguousarray(H[:2, 2]) 250 | h3 = np.ascontiguousarray(H[2, :2]) 251 | h4 = 1. 252 | 253 | E1 = np.eye(8, 2) 254 | E3 = np.eye(8, 2, -4) 255 | M = E1 @ H1 @ E1.T + E3 @ H1 @ E3.T 256 | M31 = E3 @ H1 @ E1.T 257 | w12 = E1 @ h2 258 | w13 = E1 @ h3 259 | w33 = E3 @ h3 260 | u = M @ mean + w12 261 | v = M31 @ mean + E3 @ h2 262 | a = np.dot(w13, mean) + h4 263 | b = np.dot(w33, mean) 264 | # transform top left mean 265 | mean_tl = u / a - b * v / a**2 266 | # compute top left Jacobian 267 | F_tl = M / a - (np.outer(u, w13) + b * M31 + np.outer(v, w33)) / a**2 + \ 268 | (2 * b * np.outer(v, w13)) / a**3 269 | 270 | E2 = np.eye(8, 2, -2) 271 | E4 = np.eye(8, 2, -6) 272 | M = E2 @ H1 @ E2.T + E4 @ H1 @ E4.T 273 | M42 = E4 @ H1 @ E2.T 274 | w22 = E2 @ h2 275 | w23 = E2 @ h3 276 | w43 = E4 @ h3 277 | u = M @ mean + w22 278 | v = M42 @ mean + E4 @ h2 279 | a = np.dot(w23, mean) + h4 280 | b = np.dot(w43, mean) 281 | # transform bottom right mean 282 | mean_br = u / a - b * v / a**2 283 | # compute bottom right Jacobian 284 | F_br = M / a - (np.outer(u, w23) + b * M42 + np.outer(v, w43)) / a**2 + \ 285 | (2 * b * np.outer(v, w23)) / a**3 286 | 287 | # add them together 288 | mean = mean_tl + mean_br 289 | F = F_tl + F_br 290 | # tranform covariance with Jacobian 291 | covariance = F @ covariance @ F.T 292 | return mean, covariance 293 | 294 | def _init_mat(self, dt): 295 | # acceleration-based process noise 296 | acc_cov = np.diag([0.25 * dt**4] * 4 + [dt**2] * 4) 297 | acc_cov[4:, :4] = np.eye(4) * (0.5 * dt**3) 298 | acc_cov[:4, 4:] = np.eye(4) * (0.5 * dt**3) 299 | 300 | meas_mat = np.eye(4, 8) 301 | trans_mat = np.eye(8) 302 | for i in range(4): 303 | trans_mat[i, i + 4] = self.vel_coupling * dt 304 | trans_mat[i, (i + 2) % 4 + 4] = (1. - self.vel_coupling) * dt 305 | trans_mat[i + 4, i + 4] = 0.5**(dt / self.vel_half_life) 306 | return acc_cov, meas_mat, trans_mat 307 | 308 | @staticmethod 309 | @nb.njit(fastmath=True, cache=True) 310 | def _predict(mean, covariance, trans_mat, acc_cov, std_factor_acc, std_offset_acc): 311 | size = max(get_size(mean[:4])) # max(w, h) 312 | std = std_factor_acc * size + std_offset_acc 313 | motion_cov = acc_cov * std**2 314 | 315 | mean = trans_mat @ mean 316 | covariance = trans_mat @ covariance @ trans_mat.T + motion_cov 317 | # ensure positive definiteness 318 | covariance = 0.5 * (covariance + covariance.T) 319 | return mean, covariance 320 | 321 | @staticmethod 322 | @nb.njit(fastmath=True, cache=True) 323 | def _project(mean, covariance, meas_mat, std_factor, min_std, multiplier): 324 | w, h = get_size(mean[:4]) 325 | std = np.array([ 326 | max(std_factor[0] * w, min_std[0]), 327 | max(std_factor[1] * h, min_std[1]), 328 | max(std_factor[0] * w, min_std[0]), 329 | max(std_factor[1] * h, min_std[1]) 330 | ], dtype=np.float64) 331 | meas_cov = np.diag(np.square(std * multiplier)) 332 | 333 | mean = meas_mat @ mean 334 | covariance = meas_mat @ covariance @ meas_mat.T 335 | innovation_cov = covariance + meas_cov 336 | return mean, innovation_cov 337 | 338 | @staticmethod 339 | @nb.njit(fastmath=True, cache=True) 340 | def _update(mean, covariance, proj_mean, proj_cov, measurement, meas_mat): 341 | kalman_gain = np.linalg.solve(proj_cov, (covariance @ meas_mat.T).T).T 342 | innovation = measurement - proj_mean 343 | mean = mean + innovation @ kalman_gain.T 344 | covariance = covariance - kalman_gain @ proj_cov @ kalman_gain.T 345 | return mean, covariance 346 | 347 | @staticmethod 348 | @nb.njit(fastmath=True, cache=True) 349 | def _maha_distance(mean, covariance, measurements): 350 | diff = measurements - mean 351 | L = np.linalg.cholesky(covariance) 352 | y = np.linalg.solve(L, diff.T) 353 | return np.sum(y**2, axis=0) 354 | -------------------------------------------------------------------------------- /fastmot/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .ssd import SSD 2 | from .yolo import YOLO 3 | from .reid import ReID 4 | from .label import get_label_name, set_label_map -------------------------------------------------------------------------------- /fastmot/models/calibrator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cupy as cp 4 | import tensorrt as trt 5 | import cv2 6 | 7 | 8 | class SSDEntropyCalibrator(trt.IInt8EntropyCalibrator2): 9 | def __init__(self, model_shape, data_dir, cache_file): 10 | # Whenever you specify a custom constructor for a TensorRT class, 11 | # you MUST call the constructor of the parent explicitly. 12 | trt.IInt8EntropyCalibrator2.__init__(self) 13 | 14 | self.model_shape = model_shape 15 | self.num_calib_imgs = 100 # the number of images from the dataset to use for calibration 16 | self.batch_size = 10 17 | self.batch_shape = (self.batch_size, *self.model_shape) 18 | self.cache_file = cache_file 19 | 20 | calib_imgs = [os.path.join(data_dir, f) for f in os.listdir(data_dir)] 21 | self.calib_imgs = np.random.choice(calib_imgs, self.num_calib_imgs) 22 | self.counter = 0 # for keeping track of how many files we have read 23 | 24 | self.input_dev = cp.empty(self.batch_shape, dtype=np.float32) 25 | 26 | def get_batch_size(self): 27 | return self.batch_size 28 | 29 | # TensorRT passes along the names of the engine bindings to the get_batch function. 30 | # You don't necessarily have to use them, but they can be useful to understand the order of 31 | # the inputs. The bindings list is expected to have the same ordering as 'names'. 32 | def get_batch(self, names): 33 | 34 | # if there are not enough calibration images to form a batch, 35 | # we have reached the end of our data set 36 | if self.counter == self.num_calib_imgs: 37 | return None 38 | 39 | batch_imgs = np.zeros((self.batch_size, trt.volume(self.model_shape))) 40 | for i in range(self.batch_size): 41 | img = cv2.imread(self.calib_imgs[self.counter + i]) 42 | img = cv2.resize(img, (self.model_shape[2], self.model_shape[1])) 43 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 44 | # HWC -> CHW 45 | img = img.transpose((2, 0, 1)) 46 | # Normalize to [-1.0, 1.0] interval (expected by model) 47 | img = (2.0 / 255.0) * img - 1.0 48 | # add this image to the batch array 49 | batch_imgs[i, :] = img.ravel() 50 | 51 | # increase the counter for this batch 52 | self.counter += self.batch_size 53 | 54 | # Copy to device, then return a list containing pointers to input device buffers. 55 | batch_imgs = batch_imgs.astype(np.float32) 56 | self.input_dev.data.copy_from_host(batch_imgs.ctypes.data, batch_imgs.nbytes) 57 | return [self.input_dev.data.ptr] 58 | 59 | def read_calibration_cache(self): 60 | # If there is a cache, use it instead of calibrating again. 61 | if os.path.exists(self.cache_file): 62 | with open(self.cache_file, "rb") as f: 63 | return f.read() 64 | 65 | def write_calibration_cache(self, cache): 66 | with open(self.cache_file, "wb") as f: 67 | f.write(cache) 68 | -------------------------------------------------------------------------------- /fastmot/models/label.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | 4 | """ 5 | 91-class COCO labels 6 | `unlabled` (id = 0) is replaced with `head` for CrowdHuman 7 | These are different from the default 80-class COCO labels used by YOLO 8 | """ 9 | _label_map = ( 10 | 'head', 11 | 'person', 12 | 'bicycle', 13 | 'car', 14 | 'motorcycle', 15 | 'airplane', 16 | 'bus', 17 | 'train', 18 | 'truck', 19 | 'boat', 20 | 'traffic light', 21 | 'fire hydrant', 22 | 'street sign', 23 | 'stop sign', 24 | 'parking meter', 25 | 'bench', 26 | 'bird', 27 | 'cat', 28 | 'dog', 29 | 'horse', 30 | 'sheep', 31 | 'cow', 32 | 'elephant', 33 | 'bear', 34 | 'zebra', 35 | 'giraffe', 36 | 'hat', 37 | 'backpack', 38 | 'umbrella', 39 | 'shoe', 40 | 'eye glasses', 41 | 'handbag', 42 | 'tie', 43 | 'suitcase', 44 | 'frisbee', 45 | 'skis', 46 | 'snowboard', 47 | 'sports ball', 48 | 'kite', 49 | 'baseball bat', 50 | 'baseball glove', 51 | 'skateboard', 52 | 'surfboard', 53 | 'tennis racket', 54 | 'bottle', 55 | 'plate', 56 | 'wine glass', 57 | 'cup', 58 | 'fork', 59 | 'knife', 60 | 'spoon', 61 | 'bowl', 62 | 'banana', 63 | 'apple', 64 | 'sandwich', 65 | 'orange', 66 | 'broccoli', 67 | 'carrot', 68 | 'hot dog', 69 | 'pizza', 70 | 'donut', 71 | 'cake', 72 | 'chair', 73 | 'couch', 74 | 'potted plant', 75 | 'bed', 76 | 'mirror', 77 | 'dining table', 78 | 'window', 79 | 'desk', 80 | 'toilet', 81 | 'door', 82 | 'tv', 83 | 'laptop', 84 | 'mouse', 85 | 'remote', 86 | 'keyboard', 87 | 'cell phone', 88 | 'microwave', 89 | 'oven', 90 | 'toaster', 91 | 'sink', 92 | 'refrigerator', 93 | 'blender', 94 | 'book', 95 | 'clock', 96 | 'vase', 97 | 'scissors', 98 | 'teddy bear', 99 | 'hair drier', 100 | 'toothbrush', 101 | ) 102 | 103 | 104 | def get_label_name(class_id): 105 | """Look up label name given a class ID.""" 106 | return _label_map[class_id] 107 | 108 | 109 | def set_label_map(label_map): 110 | """Set label name mapping from class IDs. 111 | 112 | Parameters 113 | ---------- 114 | label_map : sequence 115 | A sequence of label names. 116 | The index of each label determines its class ID. 117 | """ 118 | assert isinstance(label_map, Sequence) 119 | assert len(label_map) > 0 120 | global _label_map 121 | _label_map = tuple(label_map) 122 | -------------------------------------------------------------------------------- /fastmot/models/reid.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import logging 3 | import tensorrt as trt 4 | 5 | 6 | EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 7 | LOGGER = logging.getLogger(__name__) 8 | 9 | 10 | class ReID: 11 | """Base class for ReID models. 12 | 13 | Attributes 14 | ---------- 15 | PLUGIN_PATH : Path, optional 16 | Path to TensorRT plugin. 17 | ENGINE_PATH : Path 18 | Path to TensorRT engine. 19 | If not found, TensorRT engine will be converted from the ONNX model 20 | at runtime and cached for later use. 21 | MODEL_PATH : Path 22 | Path to ONNX model. 23 | INPUT_SHAPE : tuple 24 | Input size in the format `(channel, height, width)`. 25 | OUTPUT_LAYOUT : int 26 | Feature dimension output by the model. 27 | METRIC : {'euclidean', 'cosine'} 28 | Distance metric used to match features. 29 | """ 30 | __registry = {} 31 | 32 | PLUGIN_PATH = None 33 | ENGINE_PATH = None 34 | MODEL_PATH = None 35 | INPUT_SHAPE = None 36 | OUTPUT_LAYOUT = None 37 | METRIC = None 38 | 39 | def __init_subclass__(cls, **kwargs): 40 | super().__init_subclass__(**kwargs) 41 | cls.__registry[cls.__name__] = cls 42 | 43 | @classmethod 44 | def get_model(cls, name): 45 | return cls.__registry[name] 46 | 47 | @classmethod 48 | def build_engine(cls, trt_logger, batch_size): 49 | with trt.Builder(trt_logger) as builder, builder.create_network(EXPLICIT_BATCH) as network, \ 50 | trt.OnnxParser(network, trt_logger) as parser: 51 | 52 | builder.max_batch_size = batch_size 53 | LOGGER.info('Building engine with batch size: %d', batch_size) 54 | LOGGER.info('This may take a while...') 55 | 56 | # parse model file 57 | with open(cls.MODEL_PATH, 'rb') as model_file: 58 | if not parser.parse(model_file.read()): 59 | LOGGER.critical('Failed to parse the ONNX file') 60 | for err in range(parser.num_errors): 61 | LOGGER.error(parser.get_error(err)) 62 | return None 63 | 64 | # reshape input to the right batch size 65 | net_input = network.get_input(0) 66 | assert cls.INPUT_SHAPE == net_input.shape[1:] 67 | net_input.shape = (batch_size, *cls.INPUT_SHAPE) 68 | 69 | config = builder.create_builder_config() 70 | config.max_workspace_size = 1 << 30 71 | if builder.platform_has_fast_fp16: 72 | config.set_flag(trt.BuilderFlag.FP16) 73 | 74 | profile = builder.create_optimization_profile() 75 | profile.set_shape( 76 | net_input.name, # input tensor name 77 | (batch_size, *cls.INPUT_SHAPE), # min shape 78 | (batch_size, *cls.INPUT_SHAPE), # opt shape 79 | (batch_size, *cls.INPUT_SHAPE) # max shape 80 | ) 81 | config.add_optimization_profile(profile) 82 | 83 | # engine = builder.build_cuda_engine(network) 84 | engine = builder.build_engine(network, config) 85 | if engine is None: 86 | LOGGER.critical('Failed to build engine') 87 | return None 88 | 89 | LOGGER.info("Completed creating engine") 90 | with open(cls.ENGINE_PATH, 'wb') as engine_file: 91 | engine_file.write(engine.serialize()) 92 | return engine 93 | 94 | 95 | class OSNet025(ReID): 96 | ENGINE_PATH = Path(__file__).parent / 'osnet_x0_25_msmt17.trt' 97 | MODEL_PATH = Path(__file__).parent / 'osnet_x0_25_msmt17.onnx' 98 | INPUT_SHAPE = (3, 256, 128) 99 | OUTPUT_LAYOUT = 512 100 | METRIC = 'euclidean' 101 | 102 | 103 | class OSNet10(ReID): 104 | """Multi-source model trained on MSMT17, DukeMTMC, and CUHK03, not provided.""" 105 | ENGINE_PATH = Path(__file__).parent / 'osnet_x1_0_msdc.trt' 106 | MODEL_PATH = Path(__file__).parent / 'osnet_x1_0_msdc.onnx' 107 | INPUT_SHAPE = (3, 256, 128) 108 | OUTPUT_LAYOUT = 512 109 | METRIC = 'cosine' 110 | -------------------------------------------------------------------------------- /fastmot/models/ssd.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import logging 3 | import tensorrt as trt 4 | 5 | 6 | LOGGER = logging.getLogger(__name__) 7 | 8 | 9 | class SSD: 10 | """Base class for SSD models. 11 | 12 | Attributes 13 | ---------- 14 | PLUGIN_PATH : Path, optional 15 | Path to TensorRT plugin. 16 | ENGINE_PATH : Path 17 | Path to TensorRT engine. 18 | If not found, TensorRT engine will be converted from TensorFlow model 19 | at runtime and cached for later use. 20 | MODEL_PATH : Path 21 | Path to TensorFlow model. 22 | NUM_CLASSES : int 23 | Total number of trained classes.s 24 | INPUT_SHAPE : tuple 25 | Input size in the format `(channel, height, width)`. 26 | OUTPUT_NAME : str 27 | Output tensor name. 28 | NMS_THRESH : float 29 | Nonmaximum suppression threshold. 30 | TOPK : int 31 | Max number of detections to output. 32 | """ 33 | __registry = {} 34 | 35 | PLUGIN_PATH = None 36 | ENGINE_PATH = None 37 | MODEL_PATH = None 38 | NUM_CLASSES = None 39 | INPUT_SHAPE = None 40 | OUTPUT_NAME = None 41 | NMS_THRESH = None 42 | TOPK = None 43 | 44 | def __init_subclass__(cls, **kwargs): 45 | super().__init_subclass__(**kwargs) 46 | cls.__registry[cls.__name__] = cls 47 | 48 | @classmethod 49 | def get_model(cls, name): 50 | return cls.__registry[name] 51 | 52 | @classmethod 53 | def add_plugin(cls, graph): 54 | raise NotImplementedError 55 | 56 | @classmethod 57 | def build_engine(cls, trt_logger, batch_size, calib_dataset=Path.home() / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'): 58 | import graphsurgeon as gs 59 | import uff 60 | from . import calibrator 61 | 62 | if trt.__version__[0] >= '8': 63 | raise RuntimeError('SSD requires TensorRT version < 8') 64 | 65 | # convert TensorFlow graph into UFF 66 | dynamic_graph = gs.DynamicGraph(str(cls.MODEL_PATH)) 67 | dynamic_graph = cls.add_plugin(dynamic_graph) 68 | uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), [cls.OUTPUT_NAME], quiet=True) 69 | 70 | with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser: 71 | builder.max_workspace_size = 1 << 30 72 | builder.max_batch_size = batch_size 73 | LOGGER.info('Building engine with batch size: %d', batch_size) 74 | LOGGER.info('This may take a while...') 75 | 76 | if builder.platform_has_fast_fp16: 77 | builder.fp16_mode = True 78 | if builder.platform_has_fast_int8: 79 | builder.int8_mode = True 80 | builder.int8_calibrator = calibrator.SSDEntropyCalibrator(cls.INPUT_SHAPE, 81 | data_dir=calib_dataset, 82 | cache_file=Path(__file__).parent / 83 | f'{cls.__name__}_calib_cache') 84 | 85 | parser.register_input('Input', cls.INPUT_SHAPE) 86 | parser.register_output('MarkOutput_0') 87 | parser.parse_buffer(uff_model, network) 88 | engine = builder.build_cuda_engine(network) 89 | if engine is None: 90 | LOGGER.critical('Failed to build engine') 91 | return None 92 | 93 | LOGGER.info("Completed creating engine") 94 | with open(cls.ENGINE_PATH, 'wb') as engine_file: 95 | engine_file.write(engine.serialize()) 96 | return engine 97 | 98 | 99 | class SSDMobileNetV1(SSD): 100 | ENGINE_PATH = Path(__file__).parent / 'ssd_mobilenet_v1_coco.trt' 101 | MODEL_PATH = Path(__file__).parent / 'ssd_mobilenet_v1_coco.pb' 102 | NUM_CLASSES = 91 103 | INPUT_SHAPE = (3, 300, 300) 104 | OUTPUT_NAME = 'NMS' 105 | NMS_THRESH = 0.5 106 | TOPK = 100 107 | 108 | @classmethod 109 | def add_plugin(cls, graph): 110 | import tensorflow as tf 111 | import graphsurgeon as gs 112 | 113 | all_assert_nodes = graph.find_nodes_by_op("Assert") 114 | graph.remove(all_assert_nodes, remove_exclusive_dependencies=True) 115 | all_identity_nodes = graph.find_nodes_by_op("Identity") 116 | graph.forward_inputs(all_identity_nodes) 117 | 118 | Input = gs.create_plugin_node( 119 | name="Input", 120 | op="Placeholder", 121 | dtype=tf.float32, 122 | shape=[1, *cls.INPUT_SHAPE] 123 | ) 124 | 125 | PriorBox = gs.create_plugin_node( 126 | name="MultipleGridAnchorGenerator", 127 | op="GridAnchor_TRT", 128 | minSize=0.2, 129 | maxSize=0.95, 130 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], 131 | variance=[0.1, 0.1, 0.2, 0.2], 132 | featureMapShapes=[19, 10, 5, 3, 2, 1], 133 | numLayers=6 134 | ) 135 | 136 | NMS = gs.create_plugin_node( 137 | name="NMS", 138 | op="NMS_TRT", 139 | shareLocation=1, 140 | varianceEncodedInTarget=0, 141 | backgroundLabelId=0, 142 | confidenceThreshold=1e-8, 143 | nmsThreshold=cls.NMS_THRESH, 144 | topK=100, 145 | keepTopK=100, 146 | numClasses=91, 147 | inputOrder=[0, 2, 1], 148 | confSigmoid=1, 149 | isNormalized=1 150 | ) 151 | 152 | concat_priorbox = gs.create_node( 153 | "concat_priorbox", 154 | op="ConcatV2", 155 | dtype=tf.float32, 156 | axis=2 157 | ) 158 | 159 | concat_box_loc = gs.create_plugin_node( 160 | "concat_box_loc", 161 | op="FlattenConcat_TRT", 162 | dtype=tf.float32, 163 | axis=1, 164 | ignoreBatch=0 165 | ) 166 | 167 | concat_box_conf = gs.create_plugin_node( 168 | "concat_box_conf", 169 | op="FlattenConcat_TRT", 170 | dtype=tf.float32, 171 | axis=1, 172 | ignoreBatch=0 173 | ) 174 | 175 | namespace_plugin_map = { 176 | "MultipleGridAnchorGenerator": PriorBox, 177 | "Postprocessor": NMS, 178 | "Preprocessor": Input, 179 | "ToFloat": Input, 180 | "image_tensor": Input, 181 | "MultipleGridAnchorGenerator/Concatenate": concat_priorbox, 182 | "concat": concat_box_loc, 183 | "concat_1": concat_box_conf 184 | } 185 | 186 | # Create a new graph by collapsing namespaces 187 | graph.collapse_namespaces(namespace_plugin_map) 188 | # Remove the outputs, so we just have a single output node (NMS). 189 | # If remove_exclusive_dependencies is True, the whole graph will be removed! 190 | graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False) 191 | graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input") 192 | graph.find_nodes_by_name("Input")[0].input.remove("image_tensor:0") 193 | 194 | return graph 195 | 196 | 197 | class SSDMobileNetV2(SSD): 198 | ENGINE_PATH = Path(__file__).parent / 'ssd_mobilenet_v2_coco.trt' 199 | MODEL_PATH = Path(__file__).parent / 'ssd_mobilenet_v2_coco.pb' 200 | NUM_CLASSES = 91 201 | INPUT_SHAPE = (3, 300, 300) 202 | OUTPUT_NAME = 'NMS' 203 | NMS_THRESH = 0.5 204 | TOPK = 100 205 | 206 | @classmethod 207 | def add_plugin(cls, graph): 208 | import tensorflow as tf 209 | import graphsurgeon as gs 210 | 211 | all_assert_nodes = graph.find_nodes_by_op("Assert") 212 | graph.remove(all_assert_nodes, remove_exclusive_dependencies=True) 213 | all_identity_nodes = graph.find_nodes_by_op("Identity") 214 | graph.forward_inputs(all_identity_nodes) 215 | 216 | Input = gs.create_plugin_node( 217 | name="Input", 218 | op="Placeholder", 219 | dtype=tf.float32, 220 | shape=[1, *cls.INPUT_SHAPE] 221 | ) 222 | 223 | PriorBox = gs.create_plugin_node( 224 | name="GridAnchor", 225 | op="GridAnchor_TRT", 226 | minSize=0.2, 227 | maxSize=0.95, 228 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], 229 | variance=[0.1, 0.1, 0.2, 0.2], 230 | featureMapShapes=[19, 10, 5, 3, 2, 1], 231 | numLayers=6 232 | ) 233 | 234 | NMS = gs.create_plugin_node( 235 | name="NMS", 236 | op="NMS_TRT", 237 | shareLocation=1, 238 | varianceEncodedInTarget=0, 239 | backgroundLabelId=0, 240 | confidenceThreshold=1e-8, 241 | nmsThreshold=cls.NMS_THRESH, 242 | topK=100, 243 | keepTopK=100, 244 | numClasses=91, 245 | inputOrder=[1, 0, 2], 246 | confSigmoid=1, 247 | isNormalized=1 248 | ) 249 | 250 | concat_priorbox = gs.create_node( 251 | "concat_priorbox", 252 | op="ConcatV2", 253 | dtype=tf.float32, 254 | axis=2 255 | ) 256 | 257 | concat_box_loc = gs.create_plugin_node( 258 | "concat_box_loc", 259 | op="FlattenConcat_TRT", 260 | dtype=tf.float32, 261 | axis=1, 262 | ignoreBatch=0 263 | ) 264 | 265 | concat_box_conf = gs.create_plugin_node( 266 | "concat_box_conf", 267 | op="FlattenConcat_TRT", 268 | dtype=tf.float32, 269 | axis=1, 270 | ignoreBatch=0 271 | ) 272 | 273 | namespace_plugin_map = { 274 | "MultipleGridAnchorGenerator": PriorBox, 275 | "Postprocessor": NMS, 276 | "Preprocessor": Input, 277 | "ToFloat": Input, 278 | "image_tensor": Input, 279 | "Concatenate": concat_priorbox, 280 | "concat": concat_box_loc, 281 | "concat_1": concat_box_conf 282 | } 283 | 284 | # Create a new graph by collapsing namespaces 285 | graph.collapse_namespaces(namespace_plugin_map) 286 | # Remove the outputs, so we just have a single output node (NMS). 287 | # If remove_exclusive_dependencies is True, the whole graph will be removed! 288 | graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False) 289 | graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input") 290 | 291 | return graph 292 | 293 | 294 | class SSDInceptionV2(SSD): 295 | ENGINE_PATH = Path(__file__).parent / 'ssd_inception_v2_coco.trt' 296 | MODEL_PATH = Path(__file__).parent / 'ssd_inception_v2_coco.pb' 297 | NUM_CLASSES = 91 298 | INPUT_SHAPE = (3, 300, 300) 299 | OUTPUT_NAME = 'NMS' 300 | NMS_THRESH = 0.5 301 | TOPK = 100 302 | 303 | @classmethod 304 | def add_plugin(cls, graph): 305 | import tensorflow as tf 306 | import graphsurgeon as gs 307 | 308 | all_assert_nodes = graph.find_nodes_by_op("Assert") 309 | graph.remove(all_assert_nodes, remove_exclusive_dependencies=True) 310 | all_identity_nodes = graph.find_nodes_by_op("Identity") 311 | graph.forward_inputs(all_identity_nodes) 312 | 313 | # Create TRT plugin nodes to replace unsupported ops in Tensorflow graph 314 | Input = gs.create_plugin_node( 315 | name="Input", 316 | op="Placeholder", 317 | dtype=tf.float32, 318 | shape=[1, *cls.INPUT_SHAPE] 319 | ) 320 | 321 | PriorBox = gs.create_plugin_node( 322 | name="GridAnchor", 323 | op="GridAnchor_TRT", 324 | minSize=0.2, 325 | maxSize=0.95, 326 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], 327 | variance=[0.1, 0.1, 0.2, 0.2], 328 | featureMapShapes=[19, 10, 5, 3, 2, 1], 329 | numLayers=6 330 | ) 331 | 332 | NMS = gs.create_plugin_node( 333 | name="NMS", 334 | op="NMS_TRT", 335 | shareLocation=1, 336 | varianceEncodedInTarget=0, 337 | backgroundLabelId=0, 338 | confidenceThreshold=1e-8, 339 | nmsThreshold=cls.NMS_THRESH, 340 | topK=100, 341 | keepTopK=100, 342 | numClasses=91, 343 | inputOrder=[0, 2, 1], 344 | confSigmoid=1, 345 | isNormalized=1 346 | ) 347 | 348 | concat_priorbox = gs.create_node( 349 | "concat_priorbox", 350 | op="ConcatV2", 351 | dtype=tf.float32, 352 | axis=2 353 | ) 354 | 355 | concat_box_loc = gs.create_plugin_node( 356 | "concat_box_loc", 357 | op="FlattenConcat_TRT", 358 | dtype=tf.float32, 359 | axis=1, 360 | ignoreBatch=0 361 | ) 362 | 363 | concat_box_conf = gs.create_plugin_node( 364 | "concat_box_conf", 365 | op="FlattenConcat_TRT", 366 | dtype=tf.float32, 367 | axis=1, 368 | ignoreBatch=0 369 | ) 370 | 371 | # Create a mapping of namespace names -> plugin nodes. 372 | namespace_plugin_map = { 373 | "MultipleGridAnchorGenerator": PriorBox, 374 | "Postprocessor": NMS, 375 | "Preprocessor": Input, 376 | "ToFloat": Input, 377 | "image_tensor": Input, 378 | "MultipleGridAnchorGenerator/Concatenate": concat_priorbox, 379 | "concat": concat_box_loc, 380 | "concat_1": concat_box_conf 381 | } 382 | 383 | # Create a new graph by collapsing namespaces 384 | graph.collapse_namespaces(namespace_plugin_map) 385 | # Remove the outputs, so we just have a single output node (NMS). 386 | # If remove_exclusive_dependencies is True, the whole graph will be removed! 387 | graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False) 388 | return graph 389 | -------------------------------------------------------------------------------- /fastmot/models/yolo.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import logging 3 | import numpy as np 4 | import tensorrt as trt 5 | 6 | 7 | EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 8 | LOGGER = logging.getLogger(__name__) 9 | 10 | 11 | class YOLO: 12 | """Base class for YOLO models. 13 | 14 | Attributes 15 | ---------- 16 | PLUGIN_PATH : Path, optional 17 | Path to TensorRT plugin. 18 | ENGINE_PATH : Path 19 | Path to TensorRT engine. 20 | If not found, TensorRT engine will be converted from the ONNX model 21 | at runtime and cached for later use. 22 | MODEL_PATH : Path 23 | Path to ONNX model. 24 | NUM_CLASSES : int 25 | Total number of trained classes. 26 | LETTERBOX : bool 27 | Keep aspect ratio when resizing. 28 | NEW_COORDS : bool 29 | new_coords Darknet parameter for each yolo layer. 30 | INPUT_SHAPE : tuple 31 | Input size in the format `(channel, height, width)`. 32 | LAYER_FACTORS : List[int] 33 | Scale factors with respect to the input size for each yolo layer. 34 | SCALES : List[float] 35 | scale_x_y Darknet parameter for each yolo layer. 36 | ANCHORS : List[List[int]] 37 | Anchors grouped by each yolo layer. 38 | """ 39 | __registry = {} 40 | 41 | PLUGIN_PATH = Path(__file__).parents[1] / 'plugins' / 'libyolo_layer.so' 42 | ENGINE_PATH = None 43 | MODEL_PATH = None 44 | NUM_CLASSES = None 45 | LETTERBOX = False 46 | NEW_COORDS = False 47 | INPUT_SHAPE = None 48 | LAYER_FACTORS = None 49 | SCALES = None 50 | ANCHORS = None 51 | 52 | def __init_subclass__(cls, **kwargs): 53 | super().__init_subclass__(**kwargs) 54 | cls.__registry[cls.__name__] = cls 55 | 56 | @classmethod 57 | def get_model(cls, name): 58 | return cls.__registry[name] 59 | 60 | @classmethod 61 | def add_plugin(cls, network): 62 | def get_plugin_creator(plugin_name): 63 | plugin_creators = trt.get_plugin_registry().plugin_creator_list 64 | for plugin_creator in plugin_creators: 65 | if plugin_creator.name == plugin_name: 66 | return plugin_creator 67 | return None 68 | 69 | assert len(cls.LAYER_FACTORS) == network.num_outputs 70 | assert len(cls.SCALES) == network.num_outputs 71 | assert len(cls.ANCHORS) == network.num_outputs 72 | assert all(s >= 1.0 for s in cls.SCALES) 73 | 74 | plugin_creator = get_plugin_creator('YoloLayer_TRT') 75 | if not plugin_creator: 76 | raise RuntimeError('Failed to get YoloLayer_TRT plugin creator') 77 | 78 | old_tensors = [network.get_output(i) for i in range(network.num_outputs)] 79 | new_tensors = [] 80 | for i, old_tensor in enumerate(old_tensors): 81 | yolo_width = cls.INPUT_SHAPE[2] // cls.LAYER_FACTORS[i] 82 | yolo_height = cls.INPUT_SHAPE[1] // cls.LAYER_FACTORS[i] 83 | num_anchors = len(cls.ANCHORS[i]) // 2 84 | plugin = network.add_plugin_v2( 85 | [old_tensor], 86 | plugin_creator.create_plugin('YoloLayer_TRT', trt.PluginFieldCollection([ 87 | trt.PluginField("yoloWidth", np.array(yolo_width, dtype=np.int32), trt.PluginFieldType.INT32), 88 | trt.PluginField("yoloHeight", np.array(yolo_height, dtype=np.int32), trt.PluginFieldType.INT32), 89 | trt.PluginField("inputMultiplier", np.array(cls.LAYER_FACTORS[i], dtype=np.int32), trt.PluginFieldType.INT32), 90 | trt.PluginField("newCoords", np.array(cls.NEW_COORDS, dtype=np.int32), trt.PluginFieldType.INT32), 91 | trt.PluginField("numClasses", np.array(cls.NUM_CLASSES, dtype=np.int32), trt.PluginFieldType.INT32), 92 | trt.PluginField("numAnchors", np.array(num_anchors, dtype=np.int32), trt.PluginFieldType.INT32), 93 | trt.PluginField("anchors", np.array(cls.ANCHORS[i], dtype=np.float32), trt.PluginFieldType.FLOAT32), 94 | trt.PluginField("scaleXY", np.array(cls.SCALES[i], dtype=np.float32), trt.PluginFieldType.FLOAT32), 95 | ])) 96 | ) 97 | new_tensors.append(plugin.get_output(0)) 98 | 99 | for new_tensor in new_tensors: 100 | network.mark_output(new_tensor) 101 | for old_tensor in old_tensors: 102 | network.unmark_output(old_tensor) 103 | return network 104 | 105 | @classmethod 106 | def build_engine(cls, trt_logger, batch_size): 107 | with trt.Builder(trt_logger) as builder, builder.create_network(EXPLICIT_BATCH) as network, \ 108 | trt.OnnxParser(network, trt_logger) as parser: 109 | 110 | builder.max_batch_size = batch_size 111 | LOGGER.info('Building engine with batch size: %d', batch_size) 112 | LOGGER.info('This may take a while...') 113 | 114 | # parse model file 115 | with open(cls.MODEL_PATH, 'rb') as model_file: 116 | if not parser.parse(model_file.read()): 117 | LOGGER.critical('Failed to parse the ONNX file') 118 | for err in range(parser.num_errors): 119 | LOGGER.error(parser.get_error(err)) 120 | return None 121 | 122 | # yolo*.onnx is generated with batch size 64 123 | # reshape input to the right batch size 124 | net_input = network.get_input(0) 125 | assert cls.INPUT_SHAPE == net_input.shape[1:] 126 | net_input.shape = (batch_size, *cls.INPUT_SHAPE) 127 | 128 | config = builder.create_builder_config() 129 | config.max_workspace_size = 1 << 30 130 | if builder.platform_has_fast_fp16: 131 | config.set_flag(trt.BuilderFlag.FP16) 132 | 133 | profile = builder.create_optimization_profile() 134 | profile.set_shape( 135 | net_input.name, # input tensor name 136 | (batch_size, *cls.INPUT_SHAPE), # min shape 137 | (batch_size, *cls.INPUT_SHAPE), # opt shape 138 | (batch_size, *cls.INPUT_SHAPE) # max shape 139 | ) 140 | config.add_optimization_profile(profile) 141 | 142 | network = cls.add_plugin(network) 143 | engine = builder.build_engine(network, config) 144 | if engine is None: 145 | LOGGER.critical('Failed to build engine') 146 | return None 147 | 148 | LOGGER.info("Completed creating engine") 149 | with open(cls.ENGINE_PATH, 'wb') as engine_file: 150 | engine_file.write(engine.serialize()) 151 | return engine 152 | 153 | 154 | class YOLOv4(YOLO): 155 | ENGINE_PATH = Path(__file__).parent / 'yolov4_crowdhuman.trt' 156 | MODEL_PATH = Path(__file__).parent / 'yolov4_crowdhuman.onnx' 157 | NUM_CLASSES = 2 158 | INPUT_SHAPE = (3, 512, 512) 159 | LAYER_FACTORS = [8, 16, 32] 160 | SCALES = [1.2, 1.1, 1.05] 161 | ANCHORS = [[11,22, 24,60, 37,116], 162 | [54,186, 69,268, 89,369], 163 | [126,491, 194,314, 278,520]] 164 | 165 | 166 | """ 167 | The following models are supported but not provided. 168 | Modify paths, # classes, input shape, and anchors according to your Darknet cfg for custom model. 169 | """ 170 | 171 | class YOLOv4CSP(YOLO): 172 | ENGINE_PATH = Path(__file__).parent / 'yolov4-csp.trt' 173 | MODEL_PATH = Path(__file__).parent / 'yolov4-csp.onnx' 174 | NUM_CLASSES = 1 175 | LETTERBOX = True 176 | NEW_COORDS = True 177 | INPUT_SHAPE = (3, 640, 640) 178 | LAYER_FACTORS = [8, 16, 32] 179 | SCALES = [2.0, 2.0, 2.0] 180 | ANCHORS = [[12,16, 19,36, 40,28], 181 | [36,75, 76,55, 72,146], 182 | [142,110, 192,243, 459,401]] 183 | 184 | 185 | class YOLOv4xMish(YOLO): 186 | ENGINE_PATH = Path(__file__).parent / 'yolov4x-mish.trt' 187 | MODEL_PATH = Path(__file__).parent / 'yolov4x-mish.onnx' 188 | NUM_CLASSES = 1 189 | LETTERBOX = True 190 | NEW_COORDS = True 191 | INPUT_SHAPE = (3, 640, 640) 192 | LAYER_FACTORS = [8, 16, 32] 193 | SCALES = [2.0, 2.0, 2.0] 194 | ANCHORS = [[12,16, 19,36, 40,28], 195 | [36,75, 76,55, 72,146], 196 | [142,110, 192,243, 459,401]] 197 | 198 | 199 | class YOLOv4CSPSwish(YOLO): 200 | ENGINE_PATH = Path(__file__).parent / 'yolov4-csp-swish.trt' 201 | MODEL_PATH = Path(__file__).parent / 'yolov4-csp-swish.onnx' 202 | NUM_CLASSES = 1 203 | LETTERBOX = True 204 | NEW_COORDS = True 205 | INPUT_SHAPE = (3, 640, 640) 206 | LAYER_FACTORS = [8, 16, 32] 207 | SCALES = [2.0, 2.0, 2.0] 208 | ANCHORS = [[12,16, 19,36, 40,28], 209 | [36,75, 76,55, 72,146], 210 | [142,110, 192,243, 459,401]] 211 | 212 | 213 | class YOLOv4CSPxSwish(YOLO): 214 | ENGINE_PATH = Path(__file__).parent / 'yolov4-csp-x-swish.trt' 215 | MODEL_PATH = Path(__file__).parent / 'yolov4-csp-x-swish.onnx' 216 | NUM_CLASSES = 1 217 | LETTERBOX = True 218 | NEW_COORDS = True 219 | INPUT_SHAPE = (3, 640, 640) 220 | LAYER_FACTORS = [8, 16, 32] 221 | SCALES = [2.0, 2.0, 2.0] 222 | ANCHORS = [[12,16, 19,36, 40,28], 223 | [36,75, 76,55, 72,146], 224 | [142,110, 192,243, 459,401]] 225 | 226 | 227 | class YOLOv4P5(YOLO): 228 | ENGINE_PATH = Path(__file__).parent / 'yolov4-p5.trt' 229 | MODEL_PATH = Path(__file__).parent / 'yolov4-p5.onnx' 230 | NUM_CLASSES = 1 231 | LETTERBOX = True 232 | NEW_COORDS = True 233 | INPUT_SHAPE = (3, 896, 896) 234 | LAYER_FACTORS = [8, 16, 32] 235 | SCALES = [2.0, 2.0, 2.0] 236 | ANCHORS = [[13,17, 31,25, 24,51, 61,45], 237 | [48,102, 119,96, 97,189, 217,184], 238 | [171,384, 324,451, 616,618, 800,800]] 239 | 240 | 241 | class YOLOv4P6(YOLO): 242 | ENGINE_PATH = Path(__file__).parent / 'yolov4-p6.trt' 243 | MODEL_PATH = Path(__file__).parent / 'yolov4-p6.onnx' 244 | NUM_CLASSES = 1 245 | LETTERBOX = True 246 | NEW_COORDS = True 247 | INPUT_SHAPE = (3, 1280, 1280) 248 | LAYER_FACTORS = [8, 16, 32, 64] 249 | SCALES = [2.0, 2.0, 2.0, 2.0] 250 | ANCHORS = [[13,17, 31,25, 24,51, 61,45], 251 | [61,45, 48,102, 119,96, 97,189], 252 | [97,189, 217,184, 171,384, 324,451], 253 | [324,451, 545,357, 616,618, 1024,1024]] 254 | 255 | 256 | class YOLOv4Tiny(YOLO): 257 | ENGINE_PATH = Path(__file__).parent / 'yolov4-tiny.trt' 258 | MODEL_PATH = Path(__file__).parent / 'yolov4-tiny.onnx' 259 | NUM_CLASSES = 1 260 | INPUT_SHAPE = (3, 416, 416) 261 | LAYER_FACTORS = [32, 16] 262 | SCALES = [1.05, 1.05] 263 | ANCHORS = [[81,82, 135,169, 344,319], 264 | [23,27, 37,58, 81,82]] 265 | 266 | 267 | class YOLOv3(YOLO): 268 | ENGINE_PATH = Path(__file__).parent / 'yolov3.trt' 269 | MODEL_PATH = Path(__file__).parent / 'yolov3.onnx' 270 | NUM_CLASSES = 1 271 | INPUT_SHAPE = (3, 416, 416) 272 | LAYER_FACTORS = [32, 16, 8] 273 | SCALES = [1., 1.] 274 | ANCHORS = [[116,90, 156,198, 373,326], 275 | [30,61, 62,45, 59,119], 276 | [10,13, 16,30, 33,23]] 277 | 278 | 279 | class YOLOv3SPP(YOLO): 280 | ENGINE_PATH = Path(__file__).parent / 'yolov3-spp.trt' 281 | MODEL_PATH = Path(__file__).parent / 'yolov3-spp.onnx' 282 | NUM_CLASSES = 1 283 | INPUT_SHAPE = (3, 608, 608) 284 | LAYER_FACTORS = [32, 16, 8] 285 | SCALES = [1., 1.] 286 | ANCHORS = [[116,90, 156,198, 373,326], 287 | [30,61, 62,45, 59,119], 288 | [10,13, 16,30, 33,23]] 289 | 290 | 291 | class YOLOv3Tiny(YOLO): 292 | ENGINE_PATH = Path(__file__).parent / 'yolov3-tiny.trt' 293 | MODEL_PATH = Path(__file__).parent / 'yolov3-tiny.onnx' 294 | NUM_CLASSES = 1 295 | INPUT_SHAPE = (3, 416, 416) 296 | LAYER_FACTORS = [32, 16] 297 | SCALES = [1., 1.] 298 | ANCHORS = [[81,82, 135,169, 344,319], 299 | [10,14, 23,27, 37,58]] 300 | -------------------------------------------------------------------------------- /fastmot/mot.py: -------------------------------------------------------------------------------- 1 | from types import SimpleNamespace 2 | from enum import Enum 3 | import logging 4 | import numpy as np 5 | import numba as nb 6 | import cv2 7 | 8 | from .detector import SSDDetector, YOLODetector, PublicDetector 9 | from .feature_extractor import FeatureExtractor 10 | from .tracker import MultiTracker 11 | from .utils import Profiler 12 | from .utils.visualization import Visualizer 13 | from .utils.numba import bisect_right 14 | 15 | 16 | LOGGER = logging.getLogger(__name__) 17 | 18 | 19 | class DetectorType(Enum): 20 | SSD = 0 21 | YOLO = 1 22 | PUBLIC = 2 23 | 24 | 25 | class MOT: 26 | def __init__(self, size, 27 | detector_type='YOLO', 28 | detector_frame_skip=5, 29 | class_ids=(1,), 30 | ssd_detector_cfg=None, 31 | yolo_detector_cfg=None, 32 | public_detector_cfg=None, 33 | feature_extractor_cfgs=None, 34 | tracker_cfg=None, 35 | visualizer_cfg=None, 36 | draw=False): 37 | """Top level module that integrates detection, feature extraction, 38 | and tracking together. 39 | 40 | Parameters 41 | ---------- 42 | size : tuple 43 | Width and height of each frame. 44 | detector_type : {'SSD', 'YOLO', 'public'}, optional 45 | Type of detector to use. 46 | detector_frame_skip : int, optional 47 | Number of frames to skip for the detector. 48 | class_ids : sequence, optional 49 | Class IDs to track. Note class ID starts at zero. 50 | ssd_detector_cfg : SimpleNamespace, optional 51 | SSD detector configuration. 52 | yolo_detector_cfg : SimpleNamespace, optional 53 | YOLO detector configuration. 54 | public_detector_cfg : SimpleNamespace, optional 55 | Public detector configuration. 56 | feature_extractor_cfgs : List[SimpleNamespace], optional 57 | Feature extractor configurations for all classes. 58 | Each configuration corresponds to the class at the same index in sorted `class_ids`. 59 | tracker_cfg : SimpleNamespace, optional 60 | Tracker configuration. 61 | visualizer_cfg : SimpleNamespace, optional 62 | Visualization configuration. 63 | draw : bool, optional 64 | Draw visualizations. 65 | """ 66 | self.size = size 67 | self.detector_type = DetectorType[detector_type.upper()] 68 | assert detector_frame_skip >= 1 69 | self.detector_frame_skip = detector_frame_skip 70 | self.class_ids = tuple(np.unique(class_ids)) 71 | self.draw = draw 72 | 73 | if ssd_detector_cfg is None: 74 | ssd_detector_cfg = SimpleNamespace() 75 | if yolo_detector_cfg is None: 76 | yolo_detector_cfg = SimpleNamespace() 77 | if public_detector_cfg is None: 78 | public_detector_cfg = SimpleNamespace() 79 | if feature_extractor_cfgs is None: 80 | feature_extractor_cfgs = (SimpleNamespace(),) 81 | if tracker_cfg is None: 82 | tracker_cfg = SimpleNamespace() 83 | if visualizer_cfg is None: 84 | visualizer_cfg = SimpleNamespace() 85 | if len(feature_extractor_cfgs) != len(class_ids): 86 | raise ValueError('Number of feature extractors must match length of class IDs') 87 | 88 | LOGGER.info('Loading detector model...') 89 | if self.detector_type == DetectorType.SSD: 90 | self.detector = SSDDetector(self.size, self.class_ids, **vars(ssd_detector_cfg)) 91 | elif self.detector_type == DetectorType.YOLO: 92 | self.detector = YOLODetector(self.size, self.class_ids, **vars(yolo_detector_cfg)) 93 | elif self.detector_type == DetectorType.PUBLIC: 94 | self.detector = PublicDetector(self.size, self.class_ids, self.detector_frame_skip, 95 | **vars(public_detector_cfg)) 96 | 97 | LOGGER.info('Loading feature extractor models...') 98 | self.extractors = [FeatureExtractor(**vars(cfg)) for cfg in feature_extractor_cfgs] 99 | self.tracker = MultiTracker(self.size, self.extractors[0].metric, **vars(tracker_cfg)) 100 | self.visualizer = Visualizer(**vars(visualizer_cfg)) 101 | self.frame_count = 0 102 | 103 | def visible_tracks(self): 104 | """Retrieve visible tracks from the tracker 105 | 106 | Returns 107 | ------- 108 | Iterator[Track] 109 | Confirmed and active tracks from the tracker. 110 | """ 111 | return (track for track in self.tracker.tracks.values() 112 | if track.confirmed and track.active) 113 | 114 | def reset(self, cap_dt): 115 | """Resets multiple object tracker. Must be called before `step`. 116 | 117 | Parameters 118 | ---------- 119 | cap_dt : float 120 | Time interval in seconds between each frame. 121 | """ 122 | self.frame_count = 0 123 | self.tracker.reset(cap_dt) 124 | 125 | def step(self, frame): 126 | """Runs multiple object tracker on the next frame. 127 | 128 | Parameters 129 | ---------- 130 | frame : ndarray 131 | The next frame. 132 | """ 133 | detections = [] 134 | if self.frame_count == 0: 135 | detections = self.detector(frame) 136 | self.tracker.init(frame, detections) 137 | elif self.frame_count % self.detector_frame_skip == 0: 138 | with Profiler('preproc'): 139 | self.detector.detect_async(frame) 140 | 141 | with Profiler('detect'): 142 | with Profiler('track'): 143 | self.tracker.compute_flow(frame) 144 | detections = self.detector.postprocess() 145 | 146 | with Profiler('extract'): 147 | cls_bboxes = self._split_bboxes_by_cls(detections.tlbr, detections.label, 148 | self.class_ids) 149 | for extractor, bboxes in zip(self.extractors, cls_bboxes): 150 | extractor.extract_async(frame, bboxes) 151 | 152 | with Profiler('track', aggregate=True): 153 | self.tracker.apply_kalman() 154 | 155 | embeddings = [] 156 | for extractor in self.extractors: 157 | embeddings.append(extractor.postprocess()) 158 | embeddings = np.concatenate(embeddings) if len(embeddings) > 1 else embeddings[0] 159 | 160 | with Profiler('assoc'): 161 | self.tracker.update(self.frame_count, detections, embeddings) 162 | else: 163 | with Profiler('track'): 164 | self.tracker.track(frame) 165 | 166 | if self.draw: 167 | self._draw(frame, detections) 168 | self.frame_count += 1 169 | 170 | @staticmethod 171 | def print_timing_info(): 172 | LOGGER.debug('=================Timing Stats=================') 173 | LOGGER.debug(f"{'track time:':<37}{Profiler.get_avg_millis('track'):>6.3f} ms") 174 | LOGGER.debug(f"{'preprocess time:':<37}{Profiler.get_avg_millis('preproc'):>6.3f} ms") 175 | LOGGER.debug(f"{'detect/flow time:':<37}{Profiler.get_avg_millis('detect'):>6.3f} ms") 176 | LOGGER.debug(f"{'feature extract/kalman filter time:':<37}" 177 | f"{Profiler.get_avg_millis('extract'):>6.3f} ms") 178 | LOGGER.debug(f"{'association time:':<37}{Profiler.get_avg_millis('assoc'):>6.3f} ms") 179 | 180 | @staticmethod 181 | @nb.njit(cache=True) 182 | def _split_bboxes_by_cls(bboxes, labels, class_ids): 183 | cls_bboxes = [] 184 | begin = 0 185 | for cls_id in class_ids: 186 | end = bisect_right(labels, cls_id, begin) 187 | cls_bboxes.append(bboxes[begin:end]) 188 | begin = end 189 | return cls_bboxes 190 | 191 | def _draw(self, frame, detections): 192 | visible_tracks = list(self.visible_tracks()) 193 | self.visualizer.render(frame, visible_tracks, detections, self.tracker.klt_bboxes.values(), 194 | self.tracker.flow.prev_bg_keypoints, self.tracker.flow.bg_keypoints) 195 | cv2.putText(frame, f'visible: {len(visible_tracks)}', (30, 30), 196 | cv2.FONT_HERSHEY_SIMPLEX, 1, 0, 2, cv2.LINE_AA) 197 | -------------------------------------------------------------------------------- /fastmot/plugins/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | LD=ld 3 | CXXFLAGS=-Wall -std=c++11 -g -O 4 | 5 | NVCC=nvcc 6 | 7 | # Space separated compute capabilities e.g. computes=70 75. If not present will automatically fetch device's compute 8 | computes= 9 | 10 | ifeq ($(computes), ) 11 | computes=$(shell python get_compute.py || exit 1) 12 | $(info computes: $(computes)) 13 | endif 14 | 15 | NVCCFLAGS=$(foreach compute, $(computes),-gencode arch=compute_$(compute),code=[sm_$(compute),compute_$(compute)]) 16 | $(info NVCCFLAGS: $(NVCCFLAGS)) 17 | 18 | # These are the directories where TensorRT is installed 19 | TENSORRT_INCS=-I"/usr/include/x86_64-linux-gnu" 20 | TENSORRT_LIBS=-L"/usr/lib/x86_64-linux-gnu" 21 | 22 | # INCS and LIBS 23 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin" 24 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group 25 | 26 | .PHONY: all clean 27 | 28 | all: libyolo_layer.so 29 | 30 | clean: 31 | rm -f *.so *.o 32 | 33 | libyolo_layer.so: yolo_layer.o 34 | $(CC) -shared -o $@ $< $(LIBS) 35 | 36 | yolo_layer.o: yolo_layer.cu yolo_layer.h 37 | $(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $< 38 | -------------------------------------------------------------------------------- /fastmot/plugins/README.md: -------------------------------------------------------------------------------- 1 | YOLO layer plugins are adapted from [tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos). The original code is under [MIT License](https://github.com/jkjung-avt/tensorrt_demos/blob/master/LICENSE). 2 | -------------------------------------------------------------------------------- /fastmot/plugins/get_compute.py: -------------------------------------------------------------------------------- 1 | """ 2 | Outputs some information on CUDA-enabled devices on your computer, 3 | including current memory usage. 4 | 5 | It's a port of https://gist.github.com/f0k/0d6431e3faa60bffc788f8b4daa029b1 6 | from C to Python with ctypes, so it can run without compiling anything. Note 7 | that this is a direct translation with no attempt to make the code Pythonic. 8 | It's meant as a general demonstration on how to obtain CUDA device information 9 | from Python without resorting to nvidia-smi or a compiled Python extension. 10 | """ 11 | 12 | import sys 13 | import ctypes 14 | 15 | 16 | CUDA_SUCCESS = 0 17 | 18 | 19 | def main(): 20 | libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll') 21 | for libname in libnames: 22 | try: 23 | cuda = ctypes.CDLL(libname) 24 | except OSError: 25 | continue 26 | else: 27 | break 28 | else: 29 | return 30 | 31 | gpu_archs = set() 32 | 33 | n_gpus = ctypes.c_int() 34 | cc_major = ctypes.c_int() 35 | cc_minor = ctypes.c_int() 36 | 37 | result = ctypes.c_int() 38 | device = ctypes.c_int() 39 | error_str = ctypes.c_char_p() 40 | 41 | result = cuda.cuInit(0) 42 | if result != CUDA_SUCCESS: 43 | cuda.cuGetErrorString(result, ctypes.byref(error_str)) 44 | print('cuInit failed with error code %d: %s' % (result, error_str.value.decode())) 45 | return 1 46 | 47 | result = cuda.cuDeviceGetCount(ctypes.byref(n_gpus)) 48 | if result != CUDA_SUCCESS: 49 | cuda.cuGetErrorString(result, ctypes.byref(error_str)) 50 | print('cuDeviceGetCount failed with error code %d: %s' % (result, error_str.value.decode())) 51 | return 1 52 | 53 | for i in range(n_gpus.value): 54 | if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) == CUDA_SUCCESS: 55 | gpu_archs.add(str(cc_major.value) + str(cc_minor.value)) 56 | print(' '.join(gpu_archs)) 57 | 58 | return 0 59 | 60 | 61 | if __name__ == '__main__': 62 | sys.exit(main()) 63 | -------------------------------------------------------------------------------- /fastmot/plugins/yolo_layer.cu: -------------------------------------------------------------------------------- 1 | #include "yolo_layer.h" 2 | 3 | using namespace Yolo; 4 | 5 | namespace 6 | { 7 | // Write values into buffer 8 | template 9 | void write(char*& buffer, const T& val) 10 | { 11 | *reinterpret_cast(buffer) = val; 12 | buffer += sizeof(T); 13 | } 14 | 15 | // Read values from buffer 16 | template 17 | void read(const char*& buffer, T& val) 18 | { 19 | val = *reinterpret_cast(buffer); 20 | buffer += sizeof(T); 21 | } 22 | } // namespace 23 | 24 | namespace nvinfer1 25 | { 26 | YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords) 27 | { 28 | mYoloWidth = yolo_width; 29 | mYoloHeight = yolo_height; 30 | mNumAnchors = num_anchors; 31 | memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float)); 32 | mNumClasses = num_classes; 33 | mInputWidth = input_width; 34 | mInputHeight = input_height; 35 | mScaleXY = scale_x_y; 36 | mNewCoords = new_coords; 37 | 38 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float))); 39 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice)); 40 | } 41 | 42 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) 43 | { 44 | const char *d = reinterpret_cast(data), *a = d; 45 | read(d, mThreadCount); 46 | read(d, mYoloWidth); 47 | read(d, mYoloHeight); 48 | read(d, mNumAnchors); 49 | memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float)); 50 | d += MAX_ANCHORS * 2 * sizeof(float); 51 | read(d, mNumClasses); 52 | read(d, mInputWidth); 53 | read(d, mInputHeight); 54 | read(d, mScaleXY); 55 | read(d, mNewCoords); 56 | 57 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float))); 58 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice)); 59 | 60 | assert(d == a + length); 61 | } 62 | 63 | IPluginV2IOExt* YoloLayerPlugin::clone() const NOEXCEPT 64 | { 65 | YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY, mNewCoords); 66 | p->setPluginNamespace(mPluginNamespace); 67 | return p; 68 | } 69 | 70 | void YoloLayerPlugin::terminate() NOEXCEPT 71 | { 72 | CHECK(cudaFree(mAnchors)); 73 | } 74 | 75 | size_t YoloLayerPlugin::getSerializationSize() const NOEXCEPT 76 | { 77 | return sizeof(mThreadCount) + \ 78 | sizeof(mYoloWidth) + sizeof(mYoloHeight) + \ 79 | sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \ 80 | sizeof(mNumClasses) + \ 81 | sizeof(mInputWidth) + sizeof(mInputHeight) + \ 82 | sizeof(mScaleXY) + sizeof(mNewCoords); 83 | } 84 | 85 | void YoloLayerPlugin::serialize(void* buffer) const NOEXCEPT 86 | { 87 | char* d = static_cast(buffer), *a = d; 88 | write(d, mThreadCount); 89 | write(d, mYoloWidth); 90 | write(d, mYoloHeight); 91 | write(d, mNumAnchors); 92 | memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float)); 93 | d += MAX_ANCHORS * 2 * sizeof(float); 94 | write(d, mNumClasses); 95 | write(d, mInputWidth); 96 | write(d, mInputHeight); 97 | write(d, mScaleXY); 98 | write(d, mNewCoords); 99 | 100 | assert(d == a + getSerializationSize()); 101 | } 102 | 103 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) NOEXCEPT 104 | { 105 | assert(index == 0); 106 | assert(nbInputDims == 1); 107 | assert(inputs[0].d[0] == (mNumClasses + 5) * mNumAnchors); 108 | assert(inputs[0].d[1] == mYoloHeight); 109 | assert(inputs[0].d[2] == mYoloWidth); 110 | // output detection results to the channel dimension 111 | int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float); 112 | return Dims3(totalsize, 1, 1); 113 | } 114 | 115 | inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); } 116 | 117 | inline __device__ float scale_sigmoidGPU(float x, float s) 118 | { 119 | return s * sigmoidGPU(x) - (s - 1.0f) * 0.5f; 120 | } 121 | 122 | // CalDetection(): This kernel processes 1 yolo layer calculation. It 123 | // distributes calculations so that 1 GPU thread would be responsible 124 | // for each grid/anchor combination. 125 | // NOTE: The output (x, y, w, h) are between 0.0 and 1.0 126 | // (relative to orginal image width and height). 127 | __global__ void CalDetection(const float *input, float *output, 128 | int batch_size, 129 | int yolo_width, int yolo_height, 130 | int num_anchors, const float *anchors, 131 | int num_classes, int input_w, int input_h, 132 | float scale_x_y) 133 | { 134 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 135 | Detection* det = ((Detection*) output) + idx; 136 | int total_grids = yolo_width * yolo_height; 137 | if (idx >= batch_size * total_grids * num_anchors) return; 138 | 139 | int info_len = 5 + num_classes; 140 | //int batch_idx = idx / (total_grids * num_anchors); 141 | int group_idx = idx / total_grids; 142 | int anchor_idx = group_idx % num_anchors; 143 | const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids); 144 | 145 | int class_id; 146 | float max_cls_logit = -CUDART_INF_F; // minus infinity 147 | for (int i = 5; i < info_len; ++i) { 148 | float l = *(cur_input + i * total_grids); 149 | if (l > max_cls_logit) { 150 | max_cls_logit = l; 151 | class_id = i - 5; 152 | } 153 | } 154 | float max_cls_prob = sigmoidGPU(max_cls_logit); 155 | float box_prob = sigmoidGPU(*(cur_input + 4 * total_grids)); 156 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH) 157 | // return; 158 | 159 | int row = (idx % total_grids) / yolo_width; 160 | int col = (idx % total_grids) % yolo_width; 161 | 162 | det->bbox[0] = (col + scale_sigmoidGPU(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width; // [0, 1] 163 | det->bbox[1] = (row + scale_sigmoidGPU(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height; // [0, 1] 164 | det->bbox[2] = __expf(*(cur_input + 2 * total_grids)) * *(anchors + 2 * anchor_idx + 0) / input_w; // [0, 1] 165 | det->bbox[3] = __expf(*(cur_input + 3 * total_grids)) * *(anchors + 2 * anchor_idx + 1) / input_h; // [0, 1] 166 | 167 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left 168 | det->bbox[1] -= det->bbox[3] / 2; 169 | 170 | det->det_confidence = box_prob; 171 | det->class_id = class_id; 172 | det->class_confidence = max_cls_prob; 173 | } 174 | 175 | inline __device__ float scale(float x, float s) 176 | { 177 | return s * x - (s - 1.0f) * 0.5f; 178 | } 179 | 180 | inline __device__ float square(float x) 181 | { 182 | return x * x; 183 | } 184 | 185 | __global__ void CalDetection_NewCoords(const float *input, float *output, 186 | int batch_size, 187 | int yolo_width, int yolo_height, 188 | int num_anchors, const float *anchors, 189 | int num_classes, int input_w, int input_h, 190 | float scale_x_y) 191 | { 192 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 193 | Detection* det = ((Detection*) output) + idx; 194 | int total_grids = yolo_width * yolo_height; 195 | if (idx >= batch_size * total_grids * num_anchors) return; 196 | 197 | int info_len = 5 + num_classes; 198 | //int batch_idx = idx / (total_grids * num_anchors); 199 | int group_idx = idx / total_grids; 200 | int anchor_idx = group_idx % num_anchors; 201 | const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids); 202 | 203 | int class_id; 204 | float max_cls_prob = -CUDART_INF_F; // minus infinity 205 | for (int i = 5; i < info_len; ++i) { 206 | float l = *(cur_input + i * total_grids); 207 | if (l > max_cls_prob) { 208 | max_cls_prob = l; 209 | class_id = i - 5; 210 | } 211 | } 212 | float box_prob = *(cur_input + 4 * total_grids); 213 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH) 214 | // return; 215 | 216 | int row = (idx % total_grids) / yolo_width; 217 | int col = (idx % total_grids) % yolo_width; 218 | 219 | det->bbox[0] = (col + scale(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width; // [0, 1] 220 | det->bbox[1] = (row + scale(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height; // [0, 1] 221 | det->bbox[2] = square(*(cur_input + 2 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 0) / input_w; // [0, 1] 222 | det->bbox[3] = square(*(cur_input + 3 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 1) / input_h; // [0, 1] 223 | 224 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left 225 | det->bbox[1] -= det->bbox[3] / 2; 226 | 227 | det->det_confidence = box_prob; 228 | det->class_id = class_id; 229 | det->class_confidence = max_cls_prob; 230 | } 231 | 232 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize) 233 | { 234 | int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight; 235 | 236 | //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection))); 237 | 238 | if (mNewCoords) { 239 | CalDetection_NewCoords<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>> 240 | (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY); 241 | } else { 242 | CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>> 243 | (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY); 244 | } 245 | } 246 | 247 | #if NV_TENSORRT_MAJOR == 8 248 | int32_t YoloLayerPlugin::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) NOEXCEPT 249 | #else 250 | int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) 251 | #endif 252 | { 253 | forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize); 254 | return 0; 255 | } 256 | 257 | YoloPluginCreator::YoloPluginCreator() 258 | { 259 | mPluginAttributes.clear(); 260 | 261 | mFC.nbFields = mPluginAttributes.size(); 262 | mFC.fields = mPluginAttributes.data(); 263 | } 264 | 265 | const char* YoloPluginCreator::getPluginName() const NOEXCEPT 266 | { 267 | return "YoloLayer_TRT"; 268 | } 269 | 270 | const char* YoloPluginCreator::getPluginVersion() const NOEXCEPT 271 | { 272 | return "1"; 273 | } 274 | 275 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() NOEXCEPT 276 | { 277 | return &mFC; 278 | } 279 | 280 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) NOEXCEPT 281 | { 282 | assert(!strcmp(name, getPluginName())); 283 | const PluginField* fields = fc->fields; 284 | int yolo_width, yolo_height, num_anchors = 0; 285 | float anchors[MAX_ANCHORS * 2]; 286 | int num_classes, input_multiplier, new_coords = 0; 287 | float scale_x_y = 1.0; 288 | 289 | for (int i = 0; i < fc->nbFields; ++i) 290 | { 291 | const char* attrName = fields[i].name; 292 | if (!strcmp(attrName, "yoloWidth")) 293 | { 294 | assert(fields[i].type == PluginFieldType::kINT32); 295 | yolo_width = *(static_cast(fields[i].data)); 296 | } 297 | else if (!strcmp(attrName, "yoloHeight")) 298 | { 299 | assert(fields[i].type == PluginFieldType::kINT32); 300 | yolo_height = *(static_cast(fields[i].data)); 301 | } 302 | else if (!strcmp(attrName, "numAnchors")) 303 | { 304 | assert(fields[i].type == PluginFieldType::kINT32); 305 | num_anchors = *(static_cast(fields[i].data)); 306 | } 307 | else if (!strcmp(attrName, "numClasses")) 308 | { 309 | assert(fields[i].type == PluginFieldType::kINT32); 310 | num_classes = *(static_cast(fields[i].data)); 311 | } 312 | else if (!strcmp(attrName, "inputMultiplier")) 313 | { 314 | assert(fields[i].type == PluginFieldType::kINT32); 315 | input_multiplier = *(static_cast(fields[i].data)); 316 | } 317 | else if (!strcmp(attrName, "anchors")){ 318 | assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS); 319 | assert(fields[i].type == PluginFieldType::kFLOAT32); 320 | memcpy(anchors, static_cast(fields[i].data), num_anchors * 2 * sizeof(float)); 321 | } 322 | else if (!strcmp(attrName, "scaleXY")) 323 | { 324 | assert(fields[i].type == PluginFieldType::kFLOAT32); 325 | scale_x_y = *(static_cast(fields[i].data)); 326 | } 327 | else if (!strcmp(attrName, "newCoords")) 328 | { 329 | assert(fields[i].type == PluginFieldType::kINT32); 330 | new_coords = *(static_cast(fields[i].data)); 331 | } 332 | else 333 | { 334 | std::cerr << "Unknown attribute: " << attrName << std::endl; 335 | assert(0); 336 | } 337 | } 338 | assert(yolo_width > 0 && yolo_height > 0); 339 | assert(anchors[0] > 0.0f && anchors[1] > 0.0f); 340 | assert(num_classes > 0); 341 | assert(input_multiplier == 128 || input_multiplier == 64 || 342 | input_multiplier == 32 || input_multiplier == 16 || 343 | input_multiplier == 8); 344 | assert(scale_x_y >= 1.0); 345 | 346 | YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, yolo_width * input_multiplier, yolo_height * input_multiplier, scale_x_y, new_coords); 347 | obj->setPluginNamespace(mNamespace.c_str()); 348 | return obj; 349 | } 350 | 351 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) NOEXCEPT 352 | { 353 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength); 354 | obj->setPluginNamespace(mNamespace.c_str()); 355 | return obj; 356 | } 357 | 358 | PluginFieldCollection YoloPluginCreator::mFC{}; 359 | std::vector YoloPluginCreator::mPluginAttributes; 360 | } // namespace nvinfer1 361 | -------------------------------------------------------------------------------- /fastmot/plugins/yolo_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef _YOLO_LAYER_H 2 | #define _YOLO_LAYER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "math_constants.h" 9 | #include "NvInfer.h" 10 | 11 | #define MAX_ANCHORS 6 12 | 13 | #if NV_TENSORRT_MAJOR >= 8 14 | #define NOEXCEPT noexcept 15 | #else 16 | #define NOEXCEPT 17 | #endif 18 | 19 | #define CHECK(status) \ 20 | do { \ 21 | auto ret = status; \ 22 | if (ret != 0) { \ 23 | std::cerr << "Cuda failure in file '" << __FILE__ \ 24 | << "' line " << __LINE__ \ 25 | << ": " << ret << std::endl; \ 26 | abort(); \ 27 | } \ 28 | } while (0) 29 | 30 | namespace Yolo 31 | { 32 | static constexpr float IGNORE_THRESH = 0.01f; 33 | 34 | struct alignas(float) Detection { 35 | float bbox[4]; // x, y, w, h 36 | float det_confidence; 37 | float class_id; 38 | float class_confidence; 39 | }; 40 | } 41 | 42 | namespace nvinfer1 43 | { 44 | class YoloLayerPlugin: public IPluginV2IOExt 45 | { 46 | public: 47 | YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords); 48 | YoloLayerPlugin(const void* data, size_t length); 49 | 50 | ~YoloLayerPlugin() override = default; 51 | 52 | IPluginV2IOExt* clone() const NOEXCEPT override; 53 | 54 | int initialize() NOEXCEPT override { return 0; } 55 | 56 | void terminate() NOEXCEPT override; 57 | 58 | void destroy() NOEXCEPT override { delete this; } 59 | 60 | size_t getSerializationSize() const NOEXCEPT override; 61 | 62 | void serialize(void* buffer) const NOEXCEPT override; 63 | 64 | int getNbOutputs() const NOEXCEPT override { return 1; } 65 | 66 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) NOEXCEPT override; 67 | 68 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override { return 0; } 69 | 70 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const NOEXCEPT override { return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; } 71 | 72 | const char* getPluginType() const NOEXCEPT override { return "YoloLayer_TRT"; } 73 | 74 | const char* getPluginVersion() const NOEXCEPT override { return "1"; } 75 | 76 | void setPluginNamespace(const char* pluginNamespace) NOEXCEPT override { mPluginNamespace = pluginNamespace; } 77 | 78 | const char* getPluginNamespace() const NOEXCEPT override { return mPluginNamespace; } 79 | 80 | DataType getOutputDataType(int index, const DataType* inputTypes, int nbInputs) const NOEXCEPT override { return DataType::kFLOAT; } 81 | 82 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override { return false; } 83 | 84 | bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override { return false; } 85 | 86 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override {} 87 | 88 | //using IPluginV2IOExt::configurePlugin; 89 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) NOEXCEPT override {} 90 | 91 | void detachFromContext() NOEXCEPT override {} 92 | 93 | #if NV_TENSORRT_MAJOR >= 8 94 | int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) NOEXCEPT override; 95 | #else 96 | int enqueue(int batchSize, const void* const * inputs, void** outputs, void* workspace, cudaStream_t stream) NOEXCEPT override; 97 | #endif 98 | 99 | private: 100 | void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1); 101 | 102 | int mThreadCount = 64; 103 | int mYoloWidth, mYoloHeight, mNumAnchors; 104 | float mAnchorsHost[MAX_ANCHORS * 2]; 105 | float *mAnchors; // allocated on GPU 106 | int mNumClasses; 107 | int mInputWidth, mInputHeight; 108 | float mScaleXY; 109 | int mNewCoords = 0; 110 | 111 | const char* mPluginNamespace; 112 | }; 113 | 114 | class YoloPluginCreator : public IPluginCreator 115 | { 116 | public: 117 | YoloPluginCreator(); 118 | 119 | ~YoloPluginCreator() override = default; 120 | 121 | const char* getPluginName() const NOEXCEPT override; 122 | 123 | const char* getPluginVersion() const NOEXCEPT override; 124 | 125 | const PluginFieldCollection* getFieldNames() NOEXCEPT override; 126 | 127 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) NOEXCEPT override; 128 | 129 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) NOEXCEPT override; 130 | 131 | void setPluginNamespace(const char* libNamespace) NOEXCEPT override 132 | { 133 | mNamespace = libNamespace; 134 | } 135 | 136 | const char* getPluginNamespace() const NOEXCEPT override 137 | { 138 | return mNamespace.c_str(); 139 | } 140 | 141 | private: 142 | static PluginFieldCollection mFC; 143 | static std::vector mPluginAttributes; 144 | std::string mNamespace; 145 | }; 146 | 147 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); 148 | }; 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /fastmot/track.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import numpy as np 3 | import numba as nb 4 | 5 | from .models import get_label_name 6 | from .utils.distance import cdist, cosine 7 | from .utils.numba import apply_along_axis, normalize_vec 8 | from .utils.rect import get_center 9 | 10 | 11 | class ClusterFeature: 12 | def __init__(self, num_clusters, metric): 13 | self.num_clusters = num_clusters 14 | self.metric = metric 15 | self.clusters = None 16 | self.cluster_sizes = None 17 | self._next_idx = 0 18 | 19 | def __len__(self): 20 | return self._next_idx 21 | 22 | def __call__(self): 23 | return self.clusters[:self._next_idx] 24 | 25 | def update(self, embedding): 26 | if self._next_idx < self.num_clusters: 27 | if self.clusters is None: 28 | self.clusters = np.empty((self.num_clusters, len(embedding)), embedding.dtype) 29 | self.cluster_sizes = np.zeros(self.num_clusters, int) 30 | self.clusters[self._next_idx] = embedding 31 | self.cluster_sizes[self._next_idx] += 1 32 | self._next_idx += 1 33 | else: 34 | nearest_idx = self._get_nearest_cluster(self.clusters, embedding) 35 | self.cluster_sizes[nearest_idx] += 1 36 | self._seq_kmeans(self.clusters, self.cluster_sizes, embedding, nearest_idx) 37 | 38 | def distance(self, embeddings): 39 | if self.clusters is None: 40 | return np.ones(len(embeddings)) 41 | clusters = normalize_vec(self.clusters[:self._next_idx]) 42 | return apply_along_axis(np.min, cdist(clusters, embeddings, self.metric), axis=0) 43 | 44 | def merge(self, features, other, other_features): 45 | if len(features) > len(other_features): 46 | for feature in other_features: 47 | if feature is not None: 48 | self.update(feature) 49 | else: 50 | for feature in features: 51 | if feature is not None: 52 | other.update(feature) 53 | self.clusters = other.clusters.copy() 54 | self.clusters_sizes = other.cluster_sizes.copy() 55 | self._next_idx = other._next_idx 56 | 57 | @staticmethod 58 | @nb.njit(fastmath=True, cache=True) 59 | def _get_nearest_cluster(clusters, embedding): 60 | return np.argmin(cosine(np.atleast_2d(embedding), clusters)) 61 | 62 | @staticmethod 63 | @nb.njit(fastmath=True, cache=True) 64 | def _seq_kmeans(clusters, cluster_sizes, embedding, idx): 65 | div_size = 1. / cluster_sizes[idx] 66 | clusters[idx] += (embedding - clusters[idx]) * div_size 67 | 68 | 69 | class SmoothFeature: 70 | def __init__(self, learning_rate): 71 | self.lr = learning_rate 72 | self.smooth = None 73 | 74 | def __call__(self): 75 | return self.smooth 76 | 77 | def update(self, embedding): 78 | if self.smooth is None: 79 | self.smooth = embedding.copy() 80 | else: 81 | self._rolling(self.smooth, embedding, self.lr) 82 | 83 | @staticmethod 84 | @nb.njit(fastmath=True, cache=True) 85 | def _rolling(smooth, embedding, lr): 86 | smooth[:] = (1. - lr) * smooth + lr * embedding 87 | norm_factor = 1. / np.linalg.norm(smooth) 88 | smooth *= norm_factor 89 | 90 | 91 | class AverageFeature: 92 | def __init__(self): 93 | self.sum = None 94 | self.avg = None 95 | self.count = 0 96 | 97 | def __call__(self): 98 | return self.avg 99 | 100 | def is_valid(self): 101 | return self.count > 0 102 | 103 | def update(self, embedding): 104 | self.count += 1 105 | if self.sum is None: 106 | self.sum = embedding.copy() 107 | self.avg = embedding.copy() 108 | else: 109 | self._average(self.sum, self.avg, embedding, self.count) 110 | 111 | def merge(self, other): 112 | self.count += other.count 113 | if self.sum is None: 114 | self.sum = other.sum 115 | self.avg = other.avg 116 | elif other.sum is not None: 117 | self._average(self.sum, self.avg, other.sum, self.count) 118 | 119 | @staticmethod 120 | @nb.njit(fastmath=True, cache=True) 121 | def _average(sum, avg, vec, count): 122 | sum += vec 123 | div_cnt = 1. / count 124 | avg[:] = sum * div_cnt 125 | norm_factor = 1. / np.linalg.norm(avg) 126 | avg *= norm_factor 127 | 128 | 129 | class Track: 130 | _count = 0 131 | 132 | def __init__(self, frame_id, tlbr, state, label, confirm_hits=1, buffer_size=30): 133 | self.trk_id = self.next_id() 134 | self.start_frame = frame_id 135 | self.frame_ids = deque([frame_id], maxlen=buffer_size) 136 | self.bboxes = deque([tlbr], maxlen=buffer_size) 137 | self.confirm_hits = confirm_hits 138 | self.state = state 139 | self.label = label 140 | 141 | self.age = 0 142 | self.hits = 0 143 | self.avg_feat = AverageFeature() 144 | self.last_feat = None 145 | 146 | self.inlier_ratio = 1. 147 | self.keypoints = np.empty((0, 2), np.float32) 148 | self.prev_keypoints = np.empty((0, 2), np.float32) 149 | 150 | def __str__(self): 151 | x, y = get_center(self.tlbr) 152 | return f'{get_label_name(self.label):<10} {self.trk_id:>3} at ({int(x):>4}, {int(y):>4})' 153 | 154 | def __repr__(self): 155 | return self.__str__() 156 | 157 | def __len__(self): 158 | return self.end_frame - self.start_frame 159 | 160 | def __lt__(self, other): 161 | # ordered by approximate distance to the image plane, closer is greater 162 | return (self.tlbr[-1], -self.age) < (other.tlbr[-1], -other.age) 163 | 164 | @property 165 | def tlbr(self): 166 | return self.bboxes[-1] 167 | 168 | @property 169 | def end_frame(self): 170 | return self.frame_ids[-1] 171 | 172 | @property 173 | def active(self): 174 | return self.age < 2 175 | 176 | @property 177 | def confirmed(self): 178 | return self.hits >= self.confirm_hits 179 | 180 | def update(self, tlbr, state): 181 | self.bboxes.append(tlbr) 182 | self.state = state 183 | 184 | def add_detection(self, frame_id, tlbr, state, embedding, is_valid=True): 185 | self.frame_ids.append(frame_id) 186 | self.bboxes.append(tlbr) 187 | self.state = state 188 | if is_valid: 189 | self.last_feat = embedding 190 | self.avg_feat.update(embedding) 191 | self.age = 0 192 | self.hits += 1 193 | 194 | def reinstate(self, frame_id, tlbr, state, embedding): 195 | self.start_frame = frame_id 196 | self.frame_ids.append(frame_id) 197 | self.bboxes.append(tlbr) 198 | self.state = state 199 | self.last_feat = embedding 200 | self.avg_feat.update(embedding) 201 | self.age = 0 202 | self.keypoints = np.empty((0, 2), np.float32) 203 | self.prev_keypoints = np.empty((0, 2), np.float32) 204 | 205 | def mark_missed(self): 206 | self.age += 1 207 | 208 | def merge_continuation(self, other): 209 | self.frame_ids.extend(other.frame_ids) 210 | self.bboxes.extend(other.bboxes) 211 | self.state = other.state 212 | self.age = other.age 213 | self.hits += other.hits 214 | 215 | self.keypoints = other.keypoints 216 | self.prev_keypoints = other.prev_keypoints 217 | 218 | if other.last_feat is not None: 219 | self.last_feat = other.last_feat 220 | self.avg_feat.merge(other.avg_feat) 221 | 222 | @staticmethod 223 | def next_id(): 224 | Track._count += 1 225 | return Track._count 226 | -------------------------------------------------------------------------------- /fastmot/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import TRTInference 2 | from .decoder import ConfigDecoder 3 | from .profiler import Profiler -------------------------------------------------------------------------------- /fastmot/utils/decoder.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class ConfigDecoder(json.JSONDecoder): 5 | def __init__(self, **kwargs): 6 | json.JSONDecoder.__init__(self, **kwargs) 7 | # Use the custom JSONArray 8 | self.parse_array = self.JSONArray 9 | # Use the python implemenation of the scanner 10 | self.scan_once = json.scanner.py_make_scanner(self) 11 | 12 | def JSONArray(self, s_and_end, scan_once, **kwargs): 13 | values, end = json.decoder.JSONArray(s_and_end, scan_once, **kwargs) 14 | return tuple(values), end 15 | -------------------------------------------------------------------------------- /fastmot/utils/distance.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import numpy as np 3 | import numba as nb 4 | 5 | from .rect import area, get_center 6 | 7 | 8 | INF_DIST = 1e5 9 | 10 | 11 | class Metric(Enum): 12 | EUCLIDEAN = 0 13 | COSINE = 1 14 | 15 | 16 | @nb.njit(parallel=True, fastmath=True, cache=True) 17 | def cdist(XA, XB, metric, empty_mask=None, fill_val=None): 18 | """Numba implementation of Scipy's cdist""" 19 | assert XA.ndim == XB.ndim == 2 20 | assert XA.shape[1] == XB.shape[1] 21 | if empty_mask is not None: 22 | assert empty_mask.ndim == 2 23 | assert empty_mask.shape[0] == XA.shape[0] 24 | assert empty_mask.shape[1] == XB.shape[0] 25 | filler = 1. if fill_val is None else fill_val 26 | 27 | if metric == Metric.EUCLIDEAN: 28 | return euclidean(XA, XB, empty_mask, filler) 29 | elif metric == Metric.COSINE: 30 | return cosine(XA, XB, empty_mask, filler) 31 | else: 32 | raise ValueError('Unsupported distance metric') 33 | 34 | 35 | @nb.njit(parallel=True, fastmath=True, cache=True) 36 | def pdist(X, metric): 37 | """Numba implementation of Scipy's pdist""" 38 | assert X.ndim == 2 39 | 40 | if metric == Metric.EUCLIDEAN: 41 | return euclidean(X, X, symmetric=True) 42 | elif metric == Metric.COSINE: 43 | return cosine(X, X, symmetric=True) 44 | else: 45 | raise ValueError('Unsupported distance metric') 46 | 47 | 48 | @nb.njit(parallel=True, fastmath=True, cache=True, inline='always') 49 | def euclidean(XA, XB, empty_mask=None, filler=1., symmetric=False): 50 | """Numba implementation of Scipy's euclidean""" 51 | Y = np.empty((XA.shape[0], XB.shape[0])) 52 | for i in nb.prange(XA.shape[0]): 53 | for j in range(XB.shape[0]): 54 | if symmetric and i >= j: 55 | Y[i, j] = INF_DIST 56 | elif empty_mask is not None and empty_mask[i, j]: 57 | Y[i, j] = filler 58 | else: 59 | norm = 0. 60 | for k in range(XA.shape[1]): 61 | norm += (XA[i, k] - XB[j, k])**2 62 | Y[i, j] = np.sqrt(norm) 63 | return Y 64 | 65 | 66 | @nb.njit(parallel=True, fastmath=True, cache=True, inline='always') 67 | def cosine(XA, XB, empty_mask=None, filler=1., symmetric=False): 68 | """Numba implementation of Scipy's cosine""" 69 | Y = np.empty((XA.shape[0], XB.shape[0])) 70 | for i in nb.prange(XA.shape[0]): 71 | for j in range(XB.shape[0]): 72 | if symmetric and i >= j: 73 | Y[i, j] = INF_DIST 74 | elif empty_mask is not None and empty_mask[i, j]: 75 | Y[i, j] = filler 76 | else: 77 | dot = 0. 78 | a_norm = 0. 79 | b_norm = 0. 80 | for k in range(XA.shape[1]): 81 | dot += XA[i, k] * XB[j, k] 82 | a_norm += XA[i, k] * XA[i, k] 83 | b_norm += XB[j, k] * XB[j, k] 84 | a_norm = np.sqrt(a_norm) 85 | b_norm = np.sqrt(b_norm) 86 | Y[i, j] = 1. - dot / (a_norm * b_norm) 87 | return Y 88 | 89 | 90 | @nb.njit(parallel=False, fastmath=True, cache=True) 91 | def iou_dist(tlbrs1, tlbrs2): 92 | """Computes pairwise IoU distance.""" 93 | assert tlbrs1.ndim == tlbrs2.ndim == 2 94 | assert tlbrs1.shape[1] == tlbrs2.shape[1] == 4 95 | 96 | Y = np.empty((tlbrs1.shape[0], tlbrs2.shape[0])) 97 | for i in nb.prange(tlbrs1.shape[0]): 98 | area1 = area(tlbrs1[i, :]) 99 | for j in range(tlbrs2.shape[0]): 100 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1 101 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1 102 | if iw > 0 and ih > 0: 103 | area_inter = iw * ih 104 | area_union = area1 + area(tlbrs2[j, :]) - area_inter 105 | Y[i, j] = 1. - area_inter / area_union 106 | else: 107 | Y[i, j] = 1. 108 | return Y 109 | 110 | 111 | @nb.njit(parallel=False, fastmath=True, cache=True) 112 | def giou_dist(tlbrs1, tlbrs2): 113 | """Computes pairwise GIoU distance.""" 114 | assert tlbrs1.ndim == tlbrs2.ndim == 2 115 | assert tlbrs1.shape[1] == tlbrs2.shape[1] == 4 116 | 117 | Y = np.empty((tlbrs1.shape[0], tlbrs2.shape[0])) 118 | for i in nb.prange(tlbrs1.shape[0]): 119 | area1 = area(tlbrs1[i, :]) 120 | for j in range(tlbrs2.shape[0]): 121 | iou = 0. 122 | area_union = area1 + area(tlbrs2[j, :]) 123 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1 124 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1 125 | if iw > 0 and ih > 0: 126 | area_inter = iw * ih 127 | area_union -= area_inter 128 | iou = area_inter / area_union 129 | ew = max(tlbrs1[i, 2], tlbrs2[j, 2]) - min(tlbrs1[i, 0], tlbrs2[j, 0]) + 1 130 | eh = max(tlbrs1[i, 3], tlbrs2[j, 3]) - min(tlbrs1[i, 1], tlbrs2[j, 1]) + 1 131 | area_encls = ew * eh 132 | giou = iou - (area_encls - area_union) / area_encls 133 | Y[i, j] = (1. - giou) * 0.5 134 | return Y 135 | 136 | 137 | @nb.njit(parallel=True, fastmath=True, cache=True) 138 | def diou_dist(tlbrs1, tlbrs2): 139 | """Computes pairwise DIoU distance.""" 140 | assert tlbrs1.ndim == tlbrs2.ndim == 2 141 | assert tlbrs1.shape[1] == tlbrs2.shape[1] == 4 142 | 143 | Y = np.empty((tlbrs1.shape[0], tlbrs2.shape[0])) 144 | for i in nb.prange(tlbrs1.shape[0]): 145 | area1 = area(tlbrs1[i, :]) 146 | x1, y1 = get_center(tlbrs1[i, :]) 147 | for j in range(tlbrs2.shape[0]): 148 | iou = 0. 149 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1 150 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1 151 | if iw > 0 and ih > 0: 152 | area_inter = iw * ih 153 | area_union = area1 + area(tlbrs2[j, :]) - area_inter 154 | iou = area_inter / area_union 155 | ew = max(tlbrs1[i, 2], tlbrs2[j, 2]) - min(tlbrs1[i, 0], tlbrs2[j, 0]) + 1 156 | eh = max(tlbrs1[i, 3], tlbrs2[j, 3]) - min(tlbrs1[i, 1], tlbrs2[j, 1]) + 1 157 | c = ew**2 + eh**2 158 | x2, y2 = get_center(tlbrs2[j, :]) 159 | d = (x2 - x1)**2 + (y2 - y1)**2 160 | diou = iou - (d / c)**0.6 161 | Y[i, j] = (1. - diou) * 0.5 162 | return Y 163 | -------------------------------------------------------------------------------- /fastmot/utils/inference.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import cupy as cp 3 | import cupyx 4 | import tensorrt as trt 5 | 6 | 7 | class HostDeviceMem: 8 | def __init__(self, size, dtype): 9 | self.size = size 10 | self.dtype = dtype 11 | self.host = cupyx.empty_pinned(size, dtype) 12 | self.device = cp.empty(size, dtype) 13 | 14 | def __str__(self): 15 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 16 | 17 | def __repr__(self): 18 | return self.__str__() 19 | 20 | @property 21 | def nbytes(self): 22 | return self.host.nbytes 23 | 24 | @property 25 | def hostptr(self): 26 | return self.host.ctypes.data 27 | 28 | @property 29 | def devptr(self): 30 | return self.device.data.ptr 31 | 32 | def copy_htod_async(self, stream): 33 | self.device.data.copy_from_host_async(self.hostptr, self.nbytes, stream) 34 | 35 | def copy_dtoh_async(self, stream): 36 | self.device.data.copy_to_host_async(self.hostptr, self.nbytes, stream) 37 | 38 | 39 | class TRTInference: 40 | # initialize TensorRT 41 | TRT_LOGGER = trt.Logger(trt.Logger.ERROR) 42 | trt.init_libnvinfer_plugins(TRT_LOGGER, '') 43 | 44 | def __init__(self, model, batch_size): 45 | self.model = model 46 | self.batch_size = batch_size 47 | 48 | # load plugin if the model requires one 49 | if self.model.PLUGIN_PATH is not None: 50 | try: 51 | ctypes.cdll.LoadLibrary(self.model.PLUGIN_PATH) 52 | except OSError as err: 53 | raise RuntimeError('Plugin not found') from err 54 | 55 | # load trt engine or build one if not found 56 | if not self.model.ENGINE_PATH.exists(): 57 | self.engine = self.model.build_engine(TRTInference.TRT_LOGGER, self.batch_size) 58 | else: 59 | runtime = trt.Runtime(TRTInference.TRT_LOGGER) 60 | with open(self.model.ENGINE_PATH, 'rb') as engine_file: 61 | self.engine = runtime.deserialize_cuda_engine(engine_file.read()) 62 | if self.engine is None: 63 | raise RuntimeError('Unable to load the engine file') 64 | if self.engine.has_implicit_batch_dimension: 65 | assert self.batch_size <= self.engine.max_batch_size 66 | self.context = self.engine.create_execution_context() 67 | self.stream = cp.cuda.Stream() 68 | 69 | # allocate buffers 70 | self.bindings = [] 71 | self.outputs = [] 72 | self.input = None 73 | for binding in self.engine: 74 | shape = self.engine.get_binding_shape(binding) 75 | size = trt.volume(shape) 76 | if self.engine.has_implicit_batch_dimension: 77 | size *= self.batch_size 78 | dtype = trt.nptype(self.engine.get_binding_dtype(binding)) 79 | # allocate host and device buffers 80 | buffer = HostDeviceMem(size, dtype) 81 | # append the device buffer to device bindings 82 | self.bindings.append(buffer.devptr) 83 | if self.engine.binding_is_input(binding): 84 | if not self.engine.has_implicit_batch_dimension: 85 | assert self.batch_size == shape[0] 86 | # expect one input 87 | self.input = buffer 88 | else: 89 | self.outputs.append(buffer) 90 | assert self.input is not None 91 | 92 | # timing events 93 | self.start = cp.cuda.Event() 94 | self.end = cp.cuda.Event() 95 | 96 | def __del__(self): 97 | if hasattr(self, 'context'): 98 | self.context.__del__() 99 | if hasattr(self, 'engine'): 100 | self.engine.__del__() 101 | 102 | def infer(self): 103 | self.infer_async() 104 | return self.synchronize() 105 | 106 | def infer_async(self, from_device=False): 107 | self.start.record(self.stream) 108 | if not from_device: 109 | self.input.copy_htod_async(self.stream) 110 | if self.engine.has_implicit_batch_dimension: 111 | self.context.execute_async(batch_size=self.batch_size, bindings=self.bindings, 112 | stream_handle=self.stream.ptr) 113 | else: 114 | self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.ptr) 115 | for out in self.outputs: 116 | out.copy_dtoh_async(self.stream) 117 | self.end.record(self.stream) 118 | 119 | def synchronize(self): 120 | self.stream.synchronize() 121 | return [out.host for out in self.outputs] 122 | 123 | def get_infer_time(self): 124 | self.end.synchronize() 125 | return cp.cuda.get_elapsed_time(self.start, self.end) 126 | -------------------------------------------------------------------------------- /fastmot/utils/matching.py: -------------------------------------------------------------------------------- 1 | from scipy.optimize import linear_sum_assignment 2 | import numpy as np 3 | import numba as nb 4 | 5 | 6 | CHI_SQ_INV_95 = 9.4877 # 0.95 quantile of chi-square distribution 7 | INF_COST = 1e5 8 | 9 | 10 | def linear_assignment(cost, row_ids, col_ids): 11 | """Solves the linear assignment problem. 12 | 13 | Parameters 14 | ---------- 15 | cost : ndarray 16 | The cost matrix. 17 | row_ids : List[int] 18 | IDs that correspond to each row in the cost matrix. 19 | col_ids : List[int] 20 | IDs that correspond to each column in the cost matrix. 21 | 22 | Returns 23 | ------- 24 | List[tuple], List[int], List[int] 25 | Matched row and column IDs, unmatched row IDs, and unmatched column IDs. 26 | """ 27 | m_rows, m_cols = linear_sum_assignment(cost) 28 | row_ids = np.fromiter(row_ids, int, len(row_ids)) 29 | col_ids = np.fromiter(col_ids, int, len(col_ids)) 30 | return _get_assignment_matches(cost, row_ids, col_ids, m_rows, m_cols) 31 | 32 | 33 | def greedy_match(cost, row_ids, col_ids, max_cost): 34 | """Performs greedy matching until the cost exceeds `max_cost`. 35 | 36 | Parameters 37 | ---------- 38 | cost : ndarray 39 | The cost matrix. 40 | row_ids : List[int] 41 | IDs that correspond to each row in the cost matrix. 42 | col_ids : List[int] 43 | IDs that correspond to each column in the cost matrix. 44 | max_cost : float 45 | Maximum cost allowed to match a row with a column. 46 | 47 | Returns 48 | ------- 49 | List[tuple], List[int], List[int] 50 | Matched row and column IDs, unmatched row IDs, and unmatched column IDs. 51 | """ 52 | row_ids = np.fromiter(row_ids, int, len(row_ids)) 53 | col_ids = np.fromiter(col_ids, int, len(col_ids)) 54 | return _greedy_match(cost, row_ids, col_ids, max_cost) 55 | 56 | 57 | @nb.njit(fastmath=True, cache=True) 58 | def _get_assignment_matches(cost, row_ids, col_ids, m_rows, m_cols): 59 | unmatched_rows = list(set(range(cost.shape[0])) - set(m_rows)) 60 | unmatched_cols = list(set(range(cost.shape[1])) - set(m_cols)) 61 | unmatched_row_ids = [row_ids[row] for row in unmatched_rows] 62 | unmatched_col_ids = [col_ids[col] for col in unmatched_cols] 63 | matches = [] 64 | for row, col in zip(m_rows, m_cols): 65 | if cost[row, col] < INF_COST: 66 | matches.append((row_ids[row], col_ids[col])) 67 | else: 68 | unmatched_row_ids.append(row_ids[row]) 69 | unmatched_col_ids.append(col_ids[col]) 70 | return matches, unmatched_row_ids, unmatched_col_ids 71 | 72 | 73 | @nb.njit(fastmath=True, cache=True) 74 | def _greedy_match(cost, row_ids, col_ids, max_cost): 75 | indices_rows = np.arange(cost.shape[0]) 76 | indices_cols = np.arange(cost.shape[1]) 77 | 78 | matches = [] 79 | while cost.shape[0] > 0 and cost.shape[1] > 0: 80 | idx = np.argmin(cost) 81 | i, j = idx // cost.shape[1], idx % cost.shape[1] 82 | if cost[i, j] <= max_cost: 83 | matches.append((row_ids[indices_rows[i]], col_ids[indices_cols[j]])) 84 | row_mask = np.ones(cost.shape[0], np.bool_) 85 | col_mask = np.ones(cost.shape[1], np.bool_) 86 | row_mask[i] = False 87 | col_mask[j] = False 88 | 89 | indices_rows = indices_rows[row_mask] 90 | indices_cols = indices_cols[col_mask] 91 | cost = cost[row_mask, :][:, col_mask] 92 | else: 93 | break 94 | 95 | unmatched_row_ids = [row_ids[row] for row in indices_rows] 96 | unmatched_col_ids = [col_ids[col] for col in indices_cols] 97 | return matches, unmatched_row_ids, unmatched_col_ids 98 | 99 | 100 | @nb.njit(fastmath=True, cache=True) 101 | def fuse_motion(cost, m_dist, m_weight): 102 | """Fuse each row of cost matrix with motion information.""" 103 | norm_factor = 1. / CHI_SQ_INV_95 104 | f_weight = 1. - m_weight 105 | cost[:] = f_weight * cost + m_weight * norm_factor * m_dist 106 | cost[m_dist > CHI_SQ_INV_95] = INF_COST 107 | 108 | 109 | @nb.njit(parallel=False, fastmath=True, cache=True) 110 | def gate_cost(cost, row_labels, col_labels, max_cost=None): 111 | """Gate cost matrix if cost exceeds the maximum.""" 112 | for i in nb.prange(cost.shape[0]): 113 | for j in range(cost.shape[1]): 114 | if (row_labels[i] != col_labels[j] or 115 | max_cost is not None and cost[i, j] > max_cost): 116 | cost[i, j] = INF_COST 117 | -------------------------------------------------------------------------------- /fastmot/utils/numba.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numba as nb 3 | 4 | 5 | @nb.njit(fastmath=True, cache=True) 6 | def apply_along_axis(func1d, mat, axis): 7 | """Numba utility to apply reduction to a given axis.""" 8 | assert mat.ndim == 2 9 | assert axis in [0, 1] 10 | if axis == 0: 11 | result = np.empty(mat.shape[1], mat.dtype) 12 | for i in range(len(result)): 13 | result[i, :] = func1d(mat[:, i]) 14 | else: 15 | result = np.empty(mat.shape[0], mat.dtype) 16 | for i in range(len(result)): 17 | result[i, :] = func1d(mat[i, :]) 18 | return result 19 | 20 | 21 | @nb.njit(parallel=True, fastmath=True, cache=True) 22 | def normalize_vec(vectors): 23 | """Numba utility to normalize an array of vectors.""" 24 | assert vectors.ndim == 2 25 | out = np.empty_like(vectors) 26 | for i in nb.prange(vectors.shape[0]): 27 | norm_factor = 1. / np.linalg.norm(vectors[i, :]) 28 | out[i, :] = norm_factor * vectors[i, :] 29 | return out 30 | 31 | 32 | @nb.njit(fastmath=True, cache=True) 33 | def mask_area(mask): 34 | """Utility to calculate the area of a mask.""" 35 | count = 0 36 | for val in mask.ravel(): 37 | if val != 0: 38 | count += 1 39 | return count 40 | 41 | 42 | @nb.njit(fastmath=True, cache=True) 43 | def bisect_right(arr, val, left=0): 44 | """Utility to search a value in a sorted array.""" 45 | right = len(arr) 46 | while left < right: 47 | mid = left + (right - left) // 2 48 | if arr[mid] >= val: 49 | left = mid + 1 50 | else: 51 | right = mid 52 | return left 53 | 54 | 55 | @nb.njit(fastmath=True, cache=True) 56 | def find_split_indices(arr): 57 | """Utility to find indices of unique elements in sorted array.""" 58 | prev = arr[0] 59 | split_indices = [] 60 | for i, val in enumerate(arr[1:], 1): 61 | if val != prev: 62 | split_indices.append(i) 63 | prev = val 64 | return np.array(split_indices) 65 | 66 | 67 | @nb.njit(fastmath=True, cache=True, inline='always') 68 | def transform(pts, m): 69 | """Numba implementation of OpenCV's transform.""" 70 | pts = np.asarray(pts, dtype=np.float64) 71 | pts = np.atleast_2d(pts) 72 | 73 | augment = np.ones((len(pts), 1)) 74 | pts = np.concatenate((pts, augment), axis=1) 75 | return pts @ m.T 76 | 77 | 78 | @nb.njit(fastmath=True, cache=True, inline='always') 79 | def perspective_transform(pts, m): 80 | """Numba implementation of OpenCV's perspectiveTransform.""" 81 | pts = np.asarray(pts, dtype=np.float64) 82 | pts = np.atleast_2d(pts) 83 | 84 | augment = np.ones((len(pts), 1)) 85 | pts = np.concatenate((pts, augment), axis=1).T 86 | pts = m @ pts 87 | pts = pts / pts[-1] 88 | return pts[:2].T 89 | -------------------------------------------------------------------------------- /fastmot/utils/profiler.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import Counter 3 | 4 | 5 | class Profiler: 6 | __call_count = Counter() 7 | __time_elapsed = Counter() 8 | 9 | def __init__(self, name, aggregate=False): 10 | self.name = name 11 | if not aggregate: 12 | Profiler.__call_count[self.name] += 1 13 | 14 | def __enter__(self): 15 | self.start = time.perf_counter() 16 | return self 17 | 18 | def __exit__(self, type, value, traceback): 19 | self.end = time.perf_counter() 20 | self.duration = self.end - self.start 21 | Profiler.__time_elapsed[self.name] += self.duration 22 | 23 | @classmethod 24 | def reset(cls): 25 | cls.__call_count.clear() 26 | cls.__time_elapsed.clear() 27 | 28 | @classmethod 29 | def get_avg_millis(cls, name): 30 | call_count = cls.__call_count[name] 31 | if call_count == 0: 32 | return 0. 33 | return cls.__time_elapsed[name] * 1000 / call_count 34 | -------------------------------------------------------------------------------- /fastmot/utils/rect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numba as nb 3 | 4 | 5 | @nb.njit(cache=True, inline='always') 6 | def as_tlbr(tlbr): 7 | """Construct a rectangle from a tuple or np.ndarray.""" 8 | _tlbr = np.empty(4) 9 | _tlbr[0] = round(float(tlbr[0]), 0) 10 | _tlbr[1] = round(float(tlbr[1]), 0) 11 | _tlbr[2] = round(float(tlbr[2]), 0) 12 | _tlbr[3] = round(float(tlbr[3]), 0) 13 | return _tlbr 14 | 15 | 16 | @nb.njit(cache=True, inline='always') 17 | def get_size(tlbr): 18 | return tlbr[2] - tlbr[0] + 1, tlbr[3] - tlbr[1] + 1 19 | 20 | 21 | @nb.njit(cache=True, inline='always') 22 | def aspect_ratio(tlbr): 23 | w, h = get_size(tlbr) 24 | return h / w if w > 0 else 0. 25 | 26 | 27 | @nb.njit(cache=True, inline='always') 28 | def area(tlbr): 29 | w, h = get_size(tlbr) 30 | if w <= 0 or h <= 0: 31 | return 0. 32 | return w * h 33 | 34 | 35 | @nb.njit(cache=True, inline='always') 36 | def get_center(tlbr): 37 | return (tlbr[0] + tlbr[2]) / 2, (tlbr[1] + tlbr[3]) / 2 38 | 39 | 40 | @nb.njit(cache=True, inline='always') 41 | def to_tlwh(tlbr): 42 | tlwh = np.empty(4) 43 | tlwh[:2] = tlbr[:2] 44 | tlwh[2:] = get_size(tlbr) 45 | return tlwh 46 | 47 | 48 | @nb.njit(cache=True, inline='always') 49 | def to_tlbr(tlwh): 50 | tlbr = np.empty(4) 51 | xmin = float(tlwh[0]) 52 | ymin = float(tlwh[1]) 53 | tlbr[0] = round(xmin, 0) 54 | tlbr[1] = round(ymin, 0) 55 | tlbr[2] = round(xmin + float(tlwh[2]) - 1., 0) 56 | tlbr[3] = round(ymin + float(tlwh[3]) - 1., 0) 57 | return tlbr 58 | 59 | 60 | @nb.njit(cache=True, inline='always') 61 | def intersection(tlbr1, tlbr2): 62 | tlbr = np.empty(4) 63 | tlbr[0] = max(tlbr1[0], tlbr2[0]) 64 | tlbr[1] = max(tlbr1[1], tlbr2[1]) 65 | tlbr[2] = min(tlbr1[2], tlbr2[2]) 66 | tlbr[3] = min(tlbr1[3], tlbr2[3]) 67 | if tlbr[2] < tlbr[0] or tlbr[3] < tlbr[1]: 68 | return None 69 | return tlbr 70 | 71 | 72 | @nb.njit(cache=True, inline='always') 73 | def enclosing(tlbr1, tlbr2): 74 | tlbr = np.empty(4) 75 | tlbr[0] = min(tlbr1[0], tlbr2[0]) 76 | tlbr[1] = min(tlbr1[1], tlbr2[1]) 77 | tlbr[2] = max(tlbr1[2], tlbr2[2]) 78 | tlbr[3] = max(tlbr1[3], tlbr2[3]) 79 | return tlbr 80 | 81 | 82 | @nb.njit(cache=True, inline='always') 83 | def crop(img, tlbr): 84 | assert tlbr is not None 85 | xmin = max(int(tlbr[0]), 0) 86 | ymin = max(int(tlbr[1]), 0) 87 | xmax = max(int(tlbr[2]), 0) 88 | ymax = max(int(tlbr[3]), 0) 89 | return img[ymin:ymax + 1, xmin:xmax + 1] 90 | 91 | 92 | @nb.njit(cache=True, inline='always') 93 | def multi_crop(img, tlbrs): 94 | _tlbrs = tlbrs.astype(np.int_) 95 | _tlbrs = np.maximum(_tlbrs, 0) 96 | return [img[_tlbrs[i, 1]:_tlbrs[i, 3] + 1, _tlbrs[i, 0]:_tlbrs[i, 2] + 1] 97 | for i in range(len(_tlbrs))] 98 | 99 | 100 | @nb.njit(fastmath=True, cache=True, inline='always') 101 | def ios(tlbr1, tlbr2): 102 | """Computes intersection over self.""" 103 | iw = min(tlbr1[2], tlbr2[2]) - max(tlbr1[0], tlbr2[0]) + 1 104 | ih = min(tlbr1[3], tlbr2[3]) - max(tlbr1[1], tlbr2[1]) + 1 105 | if iw <= 0 or ih <= 0: 106 | return 0. 107 | area_inter = iw * ih 108 | area_self = area(tlbr1) 109 | return area_inter / area_self 110 | 111 | 112 | @nb.njit(fastmath=True, cache=True, inline='always') 113 | def iom(tlbr1, tlbr2): 114 | """Computes intersection over minimum.""" 115 | iw = min(tlbr1[2], tlbr2[2]) - max(tlbr1[0], tlbr2[0]) + 1 116 | ih = min(tlbr1[3], tlbr2[3]) - max(tlbr1[1], tlbr2[1]) + 1 117 | if iw <= 0 or ih <= 0: 118 | return 0. 119 | area_inter = iw * ih 120 | area_min = min(area(tlbr1), area(tlbr2)) 121 | return area_inter / area_min 122 | 123 | 124 | @nb.njit(parallel=False, fastmath=True, cache=True) 125 | def bbox_ious(tlbrs1, tlbrs2): 126 | """Computes pairwise bounding box overlaps using IoU.""" 127 | ious = np.empty((tlbrs1.shape[0], tlbrs2.shape[0])) 128 | for i in nb.prange(tlbrs1.shape[0]): 129 | area1 = area(tlbrs1[i, :]) 130 | for j in range(tlbrs2.shape[0]): 131 | iw = min(tlbrs1[i, 2], tlbrs2[j, 2]) - max(tlbrs1[i, 0], tlbrs2[j, 0]) + 1 132 | ih = min(tlbrs1[i, 3], tlbrs2[j, 3]) - max(tlbrs1[i, 1], tlbrs2[j, 1]) + 1 133 | if iw > 0 and ih > 0: 134 | area_inter = iw * ih 135 | area_union = area1 + area(tlbrs2[j, :]) - area_inter 136 | ious[i, j] = area_inter / area_union 137 | else: 138 | ious[i, j] = 0. 139 | return ious 140 | 141 | 142 | @nb.njit(parallel=False, fastmath=True, cache=True) 143 | def find_occluded(tlbrs, occlusion_thresh): 144 | """Computes a mask of occluded bounding boxes.""" 145 | occluded_mask = np.zeros(tlbrs.shape[0], dtype=np.bool_) 146 | for i in nb.prange(tlbrs.shape[0]): 147 | area_self = area(tlbrs[i, :]) 148 | for j in range(tlbrs.shape[0]): 149 | if i != j: 150 | iw = min(tlbrs[i, 2], tlbrs[j, 2]) - max(tlbrs[i, 0], tlbrs[j, 0]) + 1 151 | ih = min(tlbrs[i, 3], tlbrs[j, 3]) - max(tlbrs[i, 1], tlbrs[j, 1]) + 1 152 | if iw > 0 and ih > 0: 153 | ios = iw * ih / area_self 154 | if ios >= occlusion_thresh: 155 | occluded_mask[i] = True 156 | break 157 | return occluded_mask 158 | 159 | 160 | @nb.njit(fastmath=True, cache=True) 161 | def nms(tlwhs, scores, nms_thresh): 162 | """Applies Non-Maximum Suppression on the bounding boxes [x, y, w, h]. 163 | Returns an array with the indexes of the bounding boxes we want to keep. 164 | """ 165 | areas = tlwhs[:, 2] * tlwhs[:, 3] 166 | ordered = scores.argsort()[::-1] 167 | 168 | tls = tlwhs[:, :2] 169 | brs = tlwhs[:, :2] + tlwhs[:, 2:] - 1 170 | 171 | keep = [] 172 | while ordered.size > 0: 173 | # index of the current element 174 | i = ordered[0] 175 | keep.append(i) 176 | 177 | other_tls = tls[ordered[1:]] 178 | other_brs = brs[ordered[1:]] 179 | 180 | # compute IoU 181 | inter_xmin = np.maximum(tls[i, 0], other_tls[:, 0]) 182 | inter_ymin = np.maximum(tls[i, 1], other_tls[:, 1]) 183 | inter_xmax = np.minimum(brs[i, 0], other_brs[:, 0]) 184 | inter_ymax = np.minimum(brs[i, 1], other_brs[:, 1]) 185 | 186 | inter_w = np.maximum(0, inter_xmax - inter_xmin + 1) 187 | inter_h = np.maximum(0, inter_ymax - inter_ymin + 1) 188 | inter_area = inter_w * inter_h 189 | union_area = areas[i] + areas[ordered[1:]] - inter_area 190 | iou = inter_area / union_area 191 | 192 | idx = np.where(iou <= nms_thresh)[0] 193 | ordered = ordered[idx + 1] 194 | keep = np.array(keep) 195 | return keep 196 | 197 | 198 | @nb.njit(fastmath=True, cache=True) 199 | def diou_nms(tlwhs, scores, nms_thresh, beta=0.6): 200 | """Applies Non-Maximum Suppression using the DIoU metric.""" 201 | areas = tlwhs[:, 2] * tlwhs[:, 3] 202 | ordered = scores.argsort()[::-1] 203 | 204 | tls = tlwhs[:, :2] 205 | brs = tlwhs[:, :2] + tlwhs[:, 2:] - 1 206 | centers = (tls + brs) / 2 207 | 208 | keep = [] 209 | while ordered.size > 0: 210 | # index of the current element 211 | i = ordered[0] 212 | keep.append(i) 213 | 214 | other_tls = tls[ordered[1:]] 215 | other_brs = brs[ordered[1:]] 216 | 217 | # compute IoU 218 | inter_xmin = np.maximum(tls[i, 0], other_tls[:, 0]) 219 | inter_ymin = np.maximum(tls[i, 1], other_tls[:, 1]) 220 | inter_xmax = np.minimum(brs[i, 0], other_brs[:, 0]) 221 | inter_ymax = np.minimum(brs[i, 1], other_brs[:, 1]) 222 | 223 | inter_w = np.maximum(0, inter_xmax - inter_xmin + 1) 224 | inter_h = np.maximum(0, inter_ymax - inter_ymin + 1) 225 | inter_area = inter_w * inter_h 226 | union_area = areas[i] + areas[ordered[1:]] - inter_area 227 | iou = inter_area / union_area 228 | 229 | # compute DIoU 230 | encls_xmin = np.minimum(tls[i, 0], other_tls[:, 0]) 231 | encls_ymin = np.minimum(tls[i, 1], other_tls[:, 1]) 232 | encls_xmax = np.maximum(brs[i, 0], other_brs[:, 0]) 233 | encls_ymax = np.maximum(brs[i, 1], other_brs[:, 1]) 234 | 235 | encls_w = encls_xmax - encls_xmin + 1 236 | encls_h = encls_ymax - encls_ymin + 1 237 | c = encls_w**2 + encls_h**2 238 | d = np.sum((centers[i] - centers[ordered[1:]])**2, axis=1) 239 | diou = iou - (d / c)**beta 240 | 241 | idx = np.where(diou <= nms_thresh)[0] 242 | ordered = ordered[idx + 1] 243 | keep = np.array(keep) 244 | return keep 245 | -------------------------------------------------------------------------------- /fastmot/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import colorsys 2 | import numpy as np 3 | import cv2 4 | from .rect import get_center 5 | 6 | 7 | GOLDEN_RATIO = 0.618033988749895 8 | 9 | 10 | def draw_tracks(frame, tracks, show_flow=False, show_cov=False, 11 | show_traj=False): 12 | for track in tracks: 13 | draw_bbox(frame, track.tlbr, get_color(track.trk_id), 2, str(track.trk_id)) 14 | if show_traj: 15 | draw_trajectory(frame, track.bboxes, track.trk_id) 16 | if show_flow: 17 | draw_feature_match(frame, track.prev_keypoints, track.keypoints, (0, 255, 255)) 18 | if show_cov: 19 | draw_covariance(frame, track.tlbr, track.state[1]) 20 | 21 | 22 | def draw_detections(frame, detections, color=(255, 255, 255), show_conf=False): 23 | for det in detections: 24 | text = f'{det.label}: {det.conf:.2f}' if show_conf else None 25 | draw_bbox(frame, det.tlbr, color, 1, text) 26 | 27 | 28 | def draw_trajectory(frame, bboxes, trk_id): 29 | tlbrs = np.reshape(list(bboxes), (len(bboxes), 4)) 30 | centers = tuple(map(lambda box: get_center(box), tlbrs[::4])) 31 | color = get_color(trk_id) 32 | pts = np.array(centers, dtype=np.int32) 33 | cv2.polylines(frame, [pts], False, color, thickness=1) 34 | 35 | 36 | def draw_klt_bboxes(frame, klt_bboxes, color=(0, 0, 0)): 37 | for tlbr in klt_bboxes: 38 | draw_bbox(frame, tlbr, color, 1) 39 | 40 | 41 | def draw_tiles(frame, tiles, scale_factor, color=(0, 0, 0)): 42 | for tile in tiles: 43 | tlbr = np.rint(tile * np.tile(scale_factor, 2)) 44 | draw_bbox(frame, tlbr, color, 1) 45 | 46 | 47 | def draw_background_flow(frame, prev_bg_keypoints, bg_keypoints, color=(0, 0, 255)): 48 | draw_feature_match(frame, prev_bg_keypoints, bg_keypoints, color) 49 | 50 | 51 | def get_color(idx, s=0.8, vmin=0.7): 52 | h = np.fmod(idx * GOLDEN_RATIO, 1.) 53 | v = 1. - np.fmod(idx * GOLDEN_RATIO, 1. - vmin) 54 | r, g, b = colorsys.hsv_to_rgb(h, s, v) 55 | return int(255 * b), int(255 * g), int(255 * r) 56 | 57 | 58 | def draw_bbox(frame, tlbr, color, thickness, text=None): 59 | tlbr = tlbr.astype(int) 60 | tl, br = tuple(tlbr[:2]), tuple(tlbr[2:]) 61 | cv2.rectangle(frame, tl, br, color, thickness) 62 | if text is not None: 63 | (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_DUPLEX, 0.5, 1) 64 | cv2.rectangle(frame, tl, (tl[0] + text_width - 1, tl[1] + text_height - 1), 65 | color, cv2.FILLED) 66 | cv2.putText(frame, text, (tl[0], tl[1] + text_height - 1), cv2.FONT_HERSHEY_DUPLEX, 67 | 0.5, 0, 1, cv2.LINE_AA) 68 | 69 | 70 | def draw_feature_match(frame, prev_pts, cur_pts, color): 71 | if len(cur_pts) > 0: 72 | cur_pts = np.rint(cur_pts).astype(np.int32) 73 | for pt in cur_pts: 74 | cv2.circle(frame, tuple(pt), 1, color, cv2.FILLED) 75 | if len(prev_pts) > 0: 76 | prev_pts = np.rint(prev_pts).astype(np.int32) 77 | for pt1, pt2 in zip(prev_pts, cur_pts): 78 | cv2.line(frame, tuple(pt1), tuple(pt2), color, 1, cv2.LINE_AA) 79 | 80 | 81 | def draw_covariance(frame, tlbr, covariance): 82 | tlbr = tlbr.astype(int) 83 | tl, br = tuple(tlbr[:2]), tuple(tlbr[2:]) 84 | 85 | def ellipse(cov): 86 | vals, vecs = np.linalg.eigh(cov) 87 | order = vals.argsort()[::-1] 88 | # 95% confidence ellipse 89 | vals, vecs = np.sqrt(vals[order] * 5.9915), vecs[:, order] 90 | axes = int(vals[0] + 0.5), int(vals[1] + 0.5) 91 | angle = np.degrees(np.arctan2(vecs[1, 0], vecs[0, 0])) 92 | return axes, angle 93 | 94 | axes, angle = ellipse(covariance[:2, :2]) 95 | cv2.ellipse(frame, tl, axes, angle, 0, 360, (255, 255, 255), 1, cv2.LINE_AA) 96 | axes, angle = ellipse(covariance[2:4, 2:4]) 97 | cv2.ellipse(frame, br, axes, angle, 0, 360, (255, 255, 255), 1, cv2.LINE_AA) 98 | 99 | 100 | class Visualizer: 101 | def __init__(self, 102 | draw_detections=False, 103 | draw_confidence=False, 104 | draw_covariance=False, 105 | draw_klt=False, 106 | draw_obj_flow=False, 107 | draw_bg_flow=False, 108 | draw_trajectory=False): 109 | """Class for visualization. 110 | 111 | Parameters 112 | ---------- 113 | draw_detections : bool, optional 114 | Enable drawing detections. 115 | draw_confidence : bool, optional 116 | Enable drawing detection confidence, ignored if `draw_detections` is disabled. 117 | draw_covariance : bool, optional 118 | Enable drawing Kalman filter position covariance. 119 | draw_klt : bool, optional 120 | Enable drawing KLT bounding boxes. 121 | draw_obj_flow : bool, optional 122 | Enable drawing object flow matches. 123 | draw_bg_flow : bool, optional 124 | Enable drawing background flow matches. 125 | draw_trajectory: bool, optional 126 | Enable drawing trajectories of boxes 127 | """ 128 | self.draw_detections = draw_detections 129 | self.draw_confidence = draw_confidence 130 | self.draw_covariance = draw_covariance 131 | self.draw_klt = draw_klt 132 | self.draw_obj_flow = draw_obj_flow 133 | self.draw_bg_flow = draw_bg_flow 134 | self.draw_trajectory = draw_trajectory 135 | 136 | def render(self, frame, tracks, detections, klt_bboxes, prev_bg_keypoints, bg_keypoints): 137 | """Render visualizations onto the frame.""" 138 | draw_tracks(frame, tracks, show_flow=self.draw_obj_flow, 139 | show_cov=self.draw_covariance, 140 | show_traj=self.draw_trajectory) 141 | if self.draw_detections: 142 | draw_detections(frame, detections, show_conf=self.draw_confidence) 143 | if self.draw_klt: 144 | draw_klt_bboxes(frame, klt_bboxes) 145 | if self.draw_bg_flow: 146 | draw_background_flow(frame, prev_bg_keypoints, bg_keypoints) 147 | -------------------------------------------------------------------------------- /fastmot/videoio.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from enum import Enum 3 | from collections import deque 4 | from urllib.parse import urlparse 5 | import subprocess 6 | import threading 7 | import logging 8 | import cv2 9 | 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | WITH_GSTREAMER = True 13 | 14 | 15 | class Protocol(Enum): 16 | IMAGE = 0 17 | VIDEO = 1 18 | CSI = 2 19 | V4L2 = 3 20 | RTSP = 4 21 | HTTP = 5 22 | 23 | 24 | class VideoIO: 25 | def __init__(self, size, input_uri, 26 | output_uri=None, 27 | resolution=(1920, 1080), 28 | frame_rate=30, 29 | buffer_size=10, 30 | proc_fps=30): 31 | """Class for video capturing and output saving. 32 | Encoding, decoding, and scaling can be accelerated using the GStreamer backend. 33 | 34 | Parameters 35 | ---------- 36 | size : tuple 37 | Width and height of each frame to output. 38 | input_uri : str 39 | URI to input stream. It could be image sequence (e.g. '%06d.jpg'), video file (e.g. 'file.mp4'), 40 | MIPI CSI camera (e.g. 'csi://0'), USB/V4L2 camera (e.g. '/dev/video0'), 41 | RTSP stream (e.g. 'rtsp://:@:/'), 42 | or HTTP live stream (e.g. 'http://:@:/') 43 | output_uri : str, optionals 44 | URI to an output video file. 45 | resolution : tuple, optional 46 | Original resolution of the input source. 47 | Useful to set a certain capture mode of a USB/CSI camera. 48 | frame_rate : int, optional 49 | Frame rate of the input source. 50 | Required if frame rate cannot be deduced, e.g. image sequence and/or RTSP. 51 | Useful to set a certain capture mode of a USB/CSI camera. 52 | buffer_size : int, optional 53 | Number of frames to buffer. 54 | For live sources, a larger buffer drops less frames but increases latency. 55 | proc_fps : int, optional 56 | Estimated processing speed that may limit the capture interval `cap_dt`. 57 | This depends on hardware and processing complexity. 58 | """ 59 | self.size = size 60 | self.input_uri = input_uri 61 | self.output_uri = output_uri 62 | self.resolution = resolution 63 | assert frame_rate > 0 64 | self.frame_rate = frame_rate 65 | assert buffer_size >= 1 66 | self.buffer_size = buffer_size 67 | assert proc_fps > 0 68 | self.proc_fps = proc_fps 69 | 70 | self.protocol = self._parse_uri(self.input_uri) 71 | self.is_live = self.protocol != Protocol.IMAGE and self.protocol != Protocol.VIDEO 72 | if WITH_GSTREAMER: 73 | self.source = cv2.VideoCapture(self._gst_cap_pipeline(), cv2.CAP_GSTREAMER) 74 | else: 75 | self.source = cv2.VideoCapture(self.input_uri) 76 | 77 | self.frame_queue = deque([], maxlen=self.buffer_size) 78 | self.cond = threading.Condition() 79 | self.exit_event = threading.Event() 80 | self.cap_thread = threading.Thread(target=self._capture_frames) 81 | 82 | ret, frame = self.source.read() 83 | if not ret: 84 | raise RuntimeError('Unable to read video stream') 85 | self.frame_queue.append(frame) 86 | 87 | width = self.source.get(cv2.CAP_PROP_FRAME_WIDTH) 88 | height = self.source.get(cv2.CAP_PROP_FRAME_HEIGHT) 89 | self.cap_fps = self.source.get(cv2.CAP_PROP_FPS) 90 | self.do_resize = (width, height) != self.size 91 | if self.cap_fps == 0: 92 | self.cap_fps = self.frame_rate # fallback to config if unknown 93 | LOGGER.info('%dx%d stream @ %d FPS', width, height, self.cap_fps) 94 | 95 | if self.output_uri is not None: 96 | Path(self.output_uri).parent.mkdir(parents=True, exist_ok=True) 97 | output_fps = 1 / self.cap_dt 98 | if WITH_GSTREAMER: 99 | self.writer = cv2.VideoWriter(self._gst_write_pipeline(), cv2.CAP_GSTREAMER, 0, 100 | output_fps, self.size, True) 101 | else: 102 | fourcc = cv2.VideoWriter_fourcc(*'avc1') 103 | self.writer = cv2.VideoWriter(self.output_uri, fourcc, output_fps, self.size, True) 104 | 105 | @property 106 | def cap_dt(self): 107 | # limit capture interval at processing latency for live sources 108 | return 1 / min(self.cap_fps, self.proc_fps) if self.is_live else 1 / self.cap_fps 109 | 110 | def start_capture(self): 111 | """Start capturing from file or device.""" 112 | if not self.source.isOpened(): 113 | self.source.open(self._gst_cap_pipeline(), cv2.CAP_GSTREAMER) 114 | if not self.cap_thread.is_alive(): 115 | self.cap_thread.start() 116 | 117 | def stop_capture(self): 118 | """Stop capturing from file or device.""" 119 | with self.cond: 120 | self.exit_event.set() 121 | self.cond.notify() 122 | self.frame_queue.clear() 123 | self.cap_thread.join() 124 | 125 | def read(self): 126 | """Reads the next video frame. 127 | 128 | Returns 129 | ------- 130 | ndarray 131 | Returns None if there are no more frames. 132 | """ 133 | with self.cond: 134 | while len(self.frame_queue) == 0 and not self.exit_event.is_set(): 135 | self.cond.wait() 136 | if len(self.frame_queue) == 0 and self.exit_event.is_set(): 137 | return None 138 | frame = self.frame_queue.popleft() 139 | self.cond.notify() 140 | if self.do_resize: 141 | frame = cv2.resize(frame, self.size) 142 | return frame 143 | 144 | def write(self, frame): 145 | """Writes the next video frame.""" 146 | assert hasattr(self, 'writer') 147 | self.writer.write(frame) 148 | 149 | def release(self): 150 | """Cleans up input and output sources.""" 151 | self.stop_capture() 152 | if hasattr(self, 'writer'): 153 | self.writer.release() 154 | self.source.release() 155 | 156 | def _gst_cap_pipeline(self): 157 | gst_elements = str(subprocess.check_output('gst-inspect-1.0')) 158 | if 'nvvidconv' in gst_elements and self.protocol != Protocol.V4L2: 159 | # format conversion for hardware decoder 160 | cvt_pipeline = ( 161 | 'nvvidconv interpolation-method=5 ! ' 162 | 'video/x-raw, width=%d, height=%d, format=BGRx !' 163 | 'videoconvert ! appsink sync=false' 164 | % self.size 165 | ) 166 | else: 167 | cvt_pipeline = ( 168 | 'videoscale ! ' 169 | 'video/x-raw, width=%d, height=%d !' 170 | 'videoconvert ! appsink sync=false' 171 | % self.size 172 | ) 173 | 174 | if self.protocol == Protocol.IMAGE: 175 | pipeline = ( 176 | 'multifilesrc location=%s index=1 caps="image/%s,framerate=%d/1" ! decodebin ! ' 177 | % ( 178 | self.input_uri, 179 | self._img_format(self.input_uri), 180 | self.frame_rate 181 | ) 182 | ) 183 | elif self.protocol == Protocol.VIDEO: 184 | pipeline = 'filesrc location=%s ! decodebin ! ' % self.input_uri 185 | elif self.protocol == Protocol.CSI: 186 | if 'nvarguscamerasrc' in gst_elements: 187 | pipeline = ( 188 | 'nvarguscamerasrc sensor_id=%s ! ' 189 | 'video/x-raw(memory:NVMM), width=%d, height=%d, ' 190 | 'format=NV12, framerate=%d/1 ! ' 191 | % ( 192 | self.input_uri[6:], 193 | *self.resolution, 194 | self.frame_rate 195 | ) 196 | ) 197 | else: 198 | raise RuntimeError('GStreamer CSI plugin not found') 199 | elif self.protocol == Protocol.V4L2: 200 | if 'v4l2src' in gst_elements: 201 | pipeline = ( 202 | 'v4l2src device=%s ! ' 203 | 'video/x-raw, width=%d, height=%d, ' 204 | 'format=YUY2, framerate=%d/1 ! ' 205 | % ( 206 | self.input_uri, 207 | *self.resolution, 208 | self.frame_rate 209 | ) 210 | ) 211 | else: 212 | raise RuntimeError('GStreamer V4L2 plugin not found') 213 | elif self.protocol == Protocol.RTSP: 214 | pipeline = ( 215 | 'rtspsrc location=%s latency=0 ! ' 216 | 'capsfilter caps=application/x-rtp,media=video ! decodebin ! ' % self.input_uri 217 | ) 218 | elif self.protocol == Protocol.HTTP: 219 | pipeline = 'souphttpsrc location=%s is-live=true ! decodebin ! ' % self.input_uri 220 | return pipeline + cvt_pipeline 221 | 222 | def _gst_write_pipeline(self): 223 | gst_elements = str(subprocess.check_output('gst-inspect-1.0')) 224 | # use hardware encoder if found 225 | if 'omxh264enc' in gst_elements: 226 | h264_encoder = 'omxh264enc preset-level=2' 227 | elif 'x264enc' in gst_elements: 228 | h264_encoder = 'x264enc pass=4' 229 | else: 230 | raise RuntimeError('GStreamer H.264 encoder not found') 231 | pipeline = ( 232 | 'appsrc ! autovideoconvert ! %s ! qtmux ! filesink location=%s ' 233 | % ( 234 | h264_encoder, 235 | self.output_uri 236 | ) 237 | ) 238 | return pipeline 239 | 240 | def _capture_frames(self): 241 | while not self.exit_event.is_set(): 242 | ret, frame = self.source.read() 243 | with self.cond: 244 | if not ret: 245 | self.exit_event.set() 246 | self.cond.notify() 247 | break 248 | # keep unprocessed frames in the buffer for file 249 | if not self.is_live: 250 | while (len(self.frame_queue) == self.buffer_size and 251 | not self.exit_event.is_set()): 252 | self.cond.wait() 253 | self.frame_queue.append(frame) 254 | self.cond.notify() 255 | 256 | @staticmethod 257 | def _parse_uri(uri): 258 | result = urlparse(uri) 259 | if result.scheme == 'csi': 260 | protocol = Protocol.CSI 261 | elif result.scheme == 'rtsp': 262 | protocol = Protocol.RTSP 263 | elif result.scheme == 'http': 264 | protocol = Protocol.HTTP 265 | else: 266 | if '/dev/video' in result.path: 267 | protocol = Protocol.V4L2 268 | elif '%' in result.path: 269 | protocol = Protocol.IMAGE 270 | else: 271 | protocol = Protocol.VIDEO 272 | return protocol 273 | 274 | @staticmethod 275 | def _img_format(uri): 276 | img_format = Path(uri).suffix[1:] 277 | return 'jpeg' if img_format == 'jpg' else img_format 278 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.17 2 | scipy >= 1.5 3 | numba == 0.48 4 | tensorflow < 2.0 5 | cupy == 9.2 -------------------------------------------------------------------------------- /scripts/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$HOME 4 | 5 | set -e 6 | 7 | PASCAL_VOC2007_DATASET=$DIR/VOCtrainval_06-Nov-2007.tar 8 | wget -O $PASCAL_VOC2007_DATASET http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 9 | tar -xf $PASCAL_VOC2007_DATASET -C $DIR 10 | rm $PASCAL_VOC2007_DATASET -------------------------------------------------------------------------------- /scripts/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASEDIR=$(dirname "$0") 4 | DIR=$BASEDIR/../fastmot/models 5 | 6 | set -e 7 | 8 | pip3 install gdown 9 | 10 | gdown https://drive.google.com/uc?id=1MLC2lKnQvAQgBKZP1EXB6UdmqujY9qVd -O $DIR/osnet_x0_25_msmt17.onnx 11 | gdown https://drive.google.com/uc?id=1-Cqk2P72P4feYLJGtJFPcCxN5JttzTfX -O $DIR/ssd_inception_v2_coco.pb 12 | gdown https://drive.google.com/uc?id=1IfSveiXaub-L6PO9mqne5pk2EByzb25z -O $DIR/ssd_mobilenet_v1_coco.pb 13 | gdown https://drive.google.com/uc?id=1ste0fQevAjF4UqD3JsCtu1rUAwCTmETN -O $DIR/ssd_mobilenet_v2_coco.pb 14 | gdown https://drive.google.com/uc?id=1-kXZpA6y8pNbDMMD7N--IWIjwqqnAIGZ -O $DIR/yolov4_crowdhuman.onnx -------------------------------------------------------------------------------- /scripts/install_jetson.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | L4T_VERSION=$(dpkg-query --showformat='${Version}' --show nvidia-l4t-core | cut -f1 -d'-') 6 | 7 | # Jetpack>=4.4 (OpenCV, CUDA, TensorRT) is required 8 | if dpkg --compare-versions $L4T_VERSION ge 32.6; then 9 | TF_VERSION=1.15.5 10 | NV_VERSION=21.6 11 | JP_VERSION=46 12 | elif dpkg --compare-versions $L4T_VERSION ge 32.5; then 13 | TF_VERSION=1.15.4 14 | NV_VERSION=20.12 15 | JP_VERSION=45 16 | elif dpkg --compare-versions $L4T_VERSION ge 32.4; then 17 | TF_VERSION=1.15.2 18 | NV_VERSION=20.4 19 | JP_VERSION=44 20 | else 21 | echo "Error: unsupported L4T version $L4T_VERSION" 22 | exit 1 23 | fi 24 | 25 | # Set up CUDA environment 26 | if [ ! -x "$(command -v nvcc)" ]; then 27 | echo "export PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> ~/.bashrc 28 | echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> ~/.bashrc 29 | source ~/.bashrc 30 | fi 31 | 32 | # NumPy and TensorFlow 33 | sudo apt-get update 34 | sudo apt-get install -y python3-pip libhdf5-serial-dev hdf5-tools libcanberra-gtk-module 35 | sudo -H pip3 install numpy 36 | sudo ln -s /usr/include/locale.h /usr/include/xlocale.h 37 | sudo -H pip3 install --no-cache-dir --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v$JP_VERSION tensorflow==$TF_VERSION+nv$NV_VERSION 38 | 39 | # SciPy 40 | sudo apt-get install -y libatlas-base-dev gfortran 41 | sudo -H pip3 install scipy==1.5 42 | 43 | # Numba 44 | sudo apt-get install -y llvm-8 llvm-8-dev 45 | sudo -H LLVM_CONFIG=/usr/bin/llvm-config-8 pip3 install numba==0.48 46 | 47 | # CuPy 48 | echo "Installing CuPy, this may take a while..." 49 | sudo -H CUPY_NVCC_GENERATE_CODE="current" CUPY_NUM_BUILD_JOBS=$(nproc) pip3 install cupy==9.2 50 | --------------------------------------------------------------------------------