├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── assets ├── MOT17-01-SDP.gif ├── MOT17-07-SDP.gif ├── MOT20-07.gif ├── MOT20-08.gif ├── palace_demo.gif ├── sota.png └── teasing.png ├── datasets └── data_path │ ├── citypersons.train │ └── eth.train ├── deploy ├── ONNXRuntime │ ├── README.md │ └── onnx_inference.py ├── TensorRT │ ├── cpp │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── include │ │ │ ├── BYTETracker.h │ │ │ ├── STrack.h │ │ │ ├── dataType.h │ │ │ ├── kalmanFilter.h │ │ │ ├── lapjv.h │ │ │ └── logging.h │ │ └── src │ │ │ ├── BYTETracker.cpp │ │ │ ├── STrack.cpp │ │ │ ├── bytetrack.cpp │ │ │ ├── kalmanFilter.cpp │ │ │ ├── lapjv.cpp │ │ │ └── utils.cpp │ └── python │ │ └── README.md └── ncnn │ └── cpp │ ├── CMakeLists.txt │ ├── README.md │ ├── include │ ├── BYTETracker.h │ ├── STrack.h │ ├── dataType.h │ ├── kalmanFilter.h │ └── lapjv.h │ └── src │ ├── BYTETracker.cpp │ ├── STrack.cpp │ ├── bytetrack.cpp │ ├── kalmanFilter.cpp │ ├── lapjv.cpp │ └── utils.cpp ├── exps ├── default │ ├── nano.py │ ├── yolov3.py │ ├── yolox_l.py │ ├── yolox_m.py │ ├── yolox_s.py │ ├── yolox_tiny.py │ └── yolox_x.py └── example │ └── mot │ ├── yolov5_s_mix_det.py │ ├── yolox_l_mix_det.py │ ├── yolox_m_mix_det.py │ ├── yolox_nano_mix_det.py │ ├── yolox_s_mix_det.py │ ├── yolox_tiny_mix_det.py │ ├── yolox_x_ablation.py │ ├── yolox_x_ch.py │ ├── yolox_x_mix_det.py │ ├── yolox_x_mix_mot20_ch.py │ └── yolox_x_mot17_half.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── tools ├── convert_cityperson_to_coco.py ├── convert_crowdhuman_to_coco.py ├── convert_ethz_to_coco.py ├── convert_mot17_to_coco.py ├── convert_mot20_to_coco.py ├── convert_video.py ├── demo_track.py ├── export_onnx.py ├── interpolation.py ├── mix_data_ablation.py ├── mix_data_test_mot17.py ├── mix_data_test_mot20.py ├── mota.py ├── track.py ├── track_deepsort.py ├── track_motdt.py ├── track_sort.py ├── train.py ├── trt.py └── txt2video.py ├── tools_yolov5 ├── demo_track_yolov5.py ├── models │ ├── __init__.py │ ├── common.py │ ├── experimental.py │ ├── export_master.py │ ├── yolo.py │ └── yolov5s.yaml ├── utils │ ├── __init__.py │ ├── activations.py │ ├── autoanchor.py │ ├── datasets.py │ ├── general.py │ ├── general_v4.py │ ├── google_utils.py │ ├── loss.py │ ├── metrics.py │ ├── plots.py │ └── torch_utils.py └── yolov5_v4 │ ├── .dockerignore │ ├── .gitattributes │ ├── .github │ ├── ISSUE_TEMPLATE │ │ ├── bug-report.md │ │ ├── feature-request.md │ │ └── question.md │ ├── dependabot.yml │ └── workflows │ │ ├── ci-testing.yml │ │ ├── codeql-analysis.yml │ │ ├── greetings.yml │ │ ├── rebase.yml │ │ └── stale.yml │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── hubconf.py │ ├── models │ ├── __init__.py │ ├── common.py │ ├── experimental.py │ ├── export_master.py │ ├── yolo.py │ └── yolov5s.yaml │ ├── requirements.txt │ └── utils │ ├── __init__.py │ ├── activations.py │ ├── autoanchor.py │ ├── datasets.py │ ├── general.py │ ├── general_v4.py │ ├── google_utils.py │ ├── loss.py │ ├── metrics.py │ ├── plots.py │ └── torch_utils.py ├── tutorials ├── centertrack │ ├── README.md │ ├── byte_tracker.py │ ├── mot_online │ │ ├── basetrack.py │ │ ├── kalman_filter.py │ │ └── matching.py │ ├── opts.py │ └── tracker.py ├── cstrack │ ├── README.md │ ├── byte_tracker.py │ └── tracker.py ├── ctracker │ ├── README.md │ ├── byte_tracker.py │ ├── eval_motchallenge.py │ ├── generate_half_csv.py │ ├── mot_online │ │ ├── basetrack.py │ │ ├── kalman_filter.py │ │ └── matching.py │ ├── test.py │ └── test_byte.py ├── fairmot │ ├── README.md │ ├── byte_tracker.py │ └── tracker.py ├── jde │ ├── README.md │ ├── byte_tracker.py │ ├── evaluation.py │ ├── track_half.py │ └── tracker.py ├── motr │ ├── README.md │ ├── byte_tracker.py │ ├── eval.py │ ├── evaluation.py │ ├── joint.py │ ├── mot_online │ │ ├── basetrack.py │ │ ├── kalman_filter.py │ │ └── matching.py │ ├── motr.py │ ├── motr_det.py │ └── transforms.py ├── qdtrack │ ├── README.md │ ├── byte_tracker.py │ ├── mot_online │ │ ├── basetrack.py │ │ ├── kalman_filter.py │ │ └── matching.py │ ├── qdtrack.py │ └── tracker_reid_motion.py ├── trades │ ├── README.md │ ├── byte_tracker.py │ ├── mot_online │ │ ├── basetrack.py │ │ ├── kalman_filter.py │ │ └── matching.py │ ├── opts.py │ └── tracker.py └── transtrack │ ├── README.md │ ├── engine_track.py │ ├── main_track.py │ ├── mot_online │ ├── basetrack.py │ ├── byte_tracker.py │ ├── kalman_filter.py │ └── matching.py │ ├── save_track.py │ └── tracker.py ├── videos └── palace.mp4 └── yolox ├── __init__.py ├── core ├── __init__.py ├── launch.py └── trainer.py ├── data ├── __init__.py ├── data_augment.py ├── data_prefetcher.py ├── dataloading.py ├── datasets │ ├── __init__.py │ ├── datasets_wrapper.py │ ├── mosaicdetection.py │ └── mot.py └── samplers.py ├── deepsort_tracker ├── deepsort.py ├── detection.py ├── iou_matching.py ├── kalman_filter.py ├── linear_assignment.py ├── reid_model.py └── track.py ├── evaluators ├── __init__.py ├── coco_evaluator.py ├── evaluation.py └── mot_evaluator.py ├── exp ├── __init__.py ├── base_exp.py ├── build.py └── yolox_base.py ├── layers ├── __init__.py ├── csrc │ ├── cocoeval │ │ ├── cocoeval.cpp │ │ └── cocoeval.h │ └── vision.cpp └── fast_coco_eval_api.py ├── models ├── __init__.py ├── darknet.py ├── losses.py ├── network_blocks.py ├── yolo_fpn.py ├── yolo_head.py ├── yolo_pafpn.py └── yolox.py ├── motdt_tracker ├── basetrack.py ├── kalman_filter.py ├── matching.py ├── motdt_tracker.py └── reid_model.py ├── sort_tracker └── sort.py ├── tracker ├── basetrack.py ├── byte_tracker.py ├── kalman_filter.py └── matching.py ├── tracking_utils ├── evaluation.py ├── io.py └── timer.py └── utils ├── __init__.py ├── allreduce_norm.py ├── boxes.py ├── checkpoint.py ├── demo_utils.py ├── dist.py ├── ema.py ├── logger.py ├── lr_scheduler.py ├── metric.py ├── model_utils.py ├── setup_env.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # output 132 | docs/api 133 | .code-workspace.code-workspace 134 | *.pkl 135 | *.npy 136 | *.pth 137 | *.onnx 138 | *.engine 139 | events.out.tfevents* 140 | pretrained 141 | YOLOX_outputs -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tensorrt:21.09-py3 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ARG USERNAME=user 5 | ARG WORKDIR=/workspace/ByteTrack 6 | 7 | RUN apt-get update && apt-get install -y \ 8 | automake autoconf libpng-dev nano python3-pip \ 9 | curl zip unzip libtool swig zlib1g-dev pkg-config \ 10 | python3-mock libpython3-dev libpython3-all-dev \ 11 | g++ gcc cmake make pciutils cpio gosu wget \ 12 | libgtk-3-dev libxtst-dev sudo apt-transport-https \ 13 | build-essential gnupg git xz-utils vim \ 14 | libva-drm2 libva-x11-2 vainfo libva-wayland2 libva-glx2 \ 15 | libva-dev libdrm-dev xorg xorg-dev protobuf-compiler \ 16 | openbox libx11-dev libgl1-mesa-glx libgl1-mesa-dev \ 17 | libtbb2 libtbb-dev libopenblas-dev libopenmpi-dev \ 18 | && sed -i 's/# set linenumbers/set linenumbers/g' /etc/nanorc \ 19 | && apt clean \ 20 | && rm -rf /var/lib/apt/lists/* 21 | 22 | RUN git clone https://github.com/ifzhang/ByteTrack \ 23 | && cd ByteTrack \ 24 | && git checkout 3434c5e8bc6a5ae8ad530528ba8d9a431967f237 \ 25 | && mkdir -p YOLOX_outputs/yolox_x_mix_det/track_vis \ 26 | && sed -i 's/torch>=1.7/torch==1.9.1+cu111/g' requirements.txt \ 27 | && sed -i 's/torchvision==0.10.0/torchvision==0.10.1+cu111/g' requirements.txt \ 28 | && sed -i "s/'cuda'/0/g" tools/demo_track.py \ 29 | && pip3 install pip --upgrade \ 30 | && pip3 install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html \ 31 | && python3 setup.py develop \ 32 | && pip3 install cython \ 33 | && pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' \ 34 | && pip3 install cython_bbox gdown \ 35 | && ldconfig \ 36 | && pip cache purge 37 | 38 | RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt \ 39 | && cd torch2trt \ 40 | && git checkout 0400b38123d01cc845364870bdf0a0044ea2b3b2 \ 41 | # https://github.com/NVIDIA-AI-IOT/torch2trt/issues/619 42 | && wget https://github.com/NVIDIA-AI-IOT/torch2trt/commit/8b9fb46ddbe99c2ddf3f1ed148c97435cbeb8fd3.patch \ 43 | && git apply 8b9fb46ddbe99c2ddf3f1ed148c97435cbeb8fd3.patch \ 44 | && python3 setup.py install 45 | 46 | RUN echo "root:root" | chpasswd \ 47 | && adduser --disabled-password --gecos "" "${USERNAME}" \ 48 | && echo "${USERNAME}:${USERNAME}" | chpasswd \ 49 | && echo "%${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers.d/${USERNAME} \ 50 | && chmod 0440 /etc/sudoers.d/${USERNAME} 51 | USER ${USERNAME} 52 | RUN sudo chown -R ${USERNAME}:${USERNAME} ${WORKDIR} 53 | WORKDIR ${WORKDIR} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Yifu Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/MOT17-01-SDP.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT17-01-SDP.gif -------------------------------------------------------------------------------- /assets/MOT17-07-SDP.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT17-07-SDP.gif -------------------------------------------------------------------------------- /assets/MOT20-07.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT20-07.gif -------------------------------------------------------------------------------- /assets/MOT20-08.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT20-08.gif -------------------------------------------------------------------------------- /assets/palace_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/palace_demo.gif -------------------------------------------------------------------------------- /assets/sota.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/sota.png -------------------------------------------------------------------------------- /assets/teasing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/teasing.png -------------------------------------------------------------------------------- /deploy/ONNXRuntime/README.md: -------------------------------------------------------------------------------- 1 | ## ByteTrack-ONNXRuntime in Python 2 | 3 | This doc introduces how to convert your pytorch model into onnx, and how to run an onnxruntime demo to verify your convertion. 4 | 5 | ### Convert Your Model to ONNX 6 | 7 | ```shell 8 | cd 9 | python3 tools/export_onnx.py --output-name bytetrack_s.onnx -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar 10 | ``` 11 | 12 | ### ONNXRuntime Demo 13 | 14 | You can run onnx demo with **16 FPS** (96-core Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz): 15 | 16 | ```shell 17 | cd /deploy/ONNXRuntime 18 | python3 onnx_inference.py 19 | ``` 20 | -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | project(bytetrack) 4 | 5 | add_definitions(-std=c++11) 6 | 7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) 8 | set(CMAKE_CXX_STANDARD 11) 9 | set(CMAKE_BUILD_TYPE Debug) 10 | 11 | find_package(CUDA REQUIRED) 12 | 13 | include_directories(${PROJECT_SOURCE_DIR}/include) 14 | include_directories(/usr/local/include/eigen3) 15 | link_directories(${PROJECT_SOURCE_DIR}/include) 16 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different 17 | # cuda 18 | include_directories(/usr/local/cuda/include) 19 | link_directories(/usr/local/cuda/lib64) 20 | # cudnn 21 | include_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include) 22 | link_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64) 23 | # tensorrt 24 | include_directories(/opt/tiger/demo/TensorRT-7.2.3.4/include) 25 | link_directories(/opt/tiger/demo/TensorRT-7.2.3.4/lib) 26 | 27 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED") 28 | 29 | find_package(OpenCV) 30 | include_directories(${OpenCV_INCLUDE_DIRS}) 31 | 32 | file(GLOB My_Source_Files ${PROJECT_SOURCE_DIR}/src/*.cpp) 33 | add_executable(bytetrack ${My_Source_Files}) 34 | target_link_libraries(bytetrack nvinfer) 35 | target_link_libraries(bytetrack cudart) 36 | target_link_libraries(bytetrack ${OpenCV_LIBS}) 37 | 38 | add_definitions(-O2 -pthread) 39 | 40 | -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/README.md: -------------------------------------------------------------------------------- 1 | # ByteTrack-TensorRT in C++ 2 | 3 | ## Installation 4 | 5 | Install opencv with ```sudo apt-get install libopencv-dev``` (we don't need a higher version of opencv like v3.3+). 6 | 7 | Install eigen-3.3.9 [[google]](https://drive.google.com/file/d/1rqO74CYCNrmRAg8Rra0JP3yZtJ-rfket/view?usp=sharing), [[baidu(code:ueq4)]](https://pan.baidu.com/s/15kEfCxpy-T7tz60msxxExg). 8 | 9 | ```shell 10 | unzip eigen-3.3.9.zip 11 | cd eigen-3.3.9 12 | mkdir build 13 | cd build 14 | cmake .. 15 | sudo make install 16 | ``` 17 | 18 | ## Prepare serialized engine file 19 | 20 | Follow the TensorRT Python demo to convert and save the serialized engine file. 21 | 22 | Check the 'model_trt.engine' file, which will be automatically saved at the YOLOX_output dir. 23 | 24 | ## Build the demo 25 | 26 | You should set the TensorRT path and CUDA path in CMakeLists.txt. 27 | 28 | For bytetrack_s model, we set the input frame size 1088 x 608. For bytetrack_m, bytetrack_l, bytetrack_x models, we set the input frame size 1440 x 800. You can modify the INPUT_W and INPUT_H in src/bytetrack.cpp 29 | 30 | ```c++ 31 | static const int INPUT_W = 1088; 32 | static const int INPUT_H = 608; 33 | ``` 34 | 35 | You can first build the demo: 36 | 37 | ```shell 38 | cd /demo/TensorRT/cpp 39 | mkdir build 40 | cd build 41 | cmake .. 42 | make 43 | ``` 44 | 45 | Then you can run the demo with **200 FPS**: 46 | 47 | ```shell 48 | ./bytetrack ../../../../YOLOX_outputs/yolox_s_mix_det/model_trt.engine -i ../../../../videos/palace.mp4 49 | ``` 50 | 51 | (If you find the output video lose some frames, you can convert the input video by running: 52 | 53 | ```shell 54 | cd 55 | python3 tools/convert_video.py 56 | ``` 57 | to generate an appropriate input video for TensorRT C++ demo. ) 58 | 59 | -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/include/BYTETracker.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "STrack.h" 4 | 5 | struct Object 6 | { 7 | cv::Rect_ rect; 8 | int label; 9 | float prob; 10 | }; 11 | 12 | class BYTETracker 13 | { 14 | public: 15 | BYTETracker(int frame_rate = 30, int track_buffer = 30); 16 | ~BYTETracker(); 17 | 18 | vector update(const vector& objects); 19 | Scalar get_color(int idx); 20 | 21 | private: 22 | vector joint_stracks(vector &tlista, vector &tlistb); 23 | vector joint_stracks(vector &tlista, vector &tlistb); 24 | 25 | vector sub_stracks(vector &tlista, vector &tlistb); 26 | void remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb); 27 | 28 | void linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, 29 | vector > &matches, vector &unmatched_a, vector &unmatched_b); 30 | vector > iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size); 31 | vector > iou_distance(vector &atracks, vector &btracks); 32 | vector > ious(vector > &atlbrs, vector > &btlbrs); 33 | 34 | double lapjv(const vector > &cost, vector &rowsol, vector &colsol, 35 | bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true); 36 | 37 | private: 38 | 39 | float track_thresh; 40 | float high_thresh; 41 | float match_thresh; 42 | int frame_id; 43 | int max_time_lost; 44 | 45 | vector tracked_stracks; 46 | vector lost_stracks; 47 | vector removed_stracks; 48 | byte_kalman::KalmanFilter kalman_filter; 49 | }; -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/include/STrack.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "kalmanFilter.h" 5 | 6 | using namespace cv; 7 | using namespace std; 8 | 9 | enum TrackState { New = 0, Tracked, Lost, Removed }; 10 | 11 | class STrack 12 | { 13 | public: 14 | STrack(vector tlwh_, float score); 15 | ~STrack(); 16 | 17 | vector static tlbr_to_tlwh(vector &tlbr); 18 | void static multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter); 19 | void static_tlwh(); 20 | void static_tlbr(); 21 | vector tlwh_to_xyah(vector tlwh_tmp); 22 | vector to_xyah(); 23 | void mark_lost(); 24 | void mark_removed(); 25 | int next_id(); 26 | int end_frame(); 27 | 28 | void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id); 29 | void re_activate(STrack &new_track, int frame_id, bool new_id = false); 30 | void update(STrack &new_track, int frame_id); 31 | 32 | public: 33 | bool is_activated; 34 | int track_id; 35 | int state; 36 | 37 | vector _tlwh; 38 | vector tlwh; 39 | vector tlbr; 40 | int frame_id; 41 | int tracklet_len; 42 | int start_frame; 43 | 44 | KAL_MEAN mean; 45 | KAL_COVA covariance; 46 | float score; 47 | 48 | private: 49 | byte_kalman::KalmanFilter kalman_filter; 50 | }; -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/include/dataType.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | typedef Eigen::Matrix DETECTBOX; 9 | typedef Eigen::Matrix DETECTBOXSS; 10 | typedef Eigen::Matrix FEATURE; 11 | typedef Eigen::Matrix FEATURESS; 12 | //typedef std::vector FEATURESS; 13 | 14 | //Kalmanfilter 15 | //typedef Eigen::Matrix KAL_FILTER; 16 | typedef Eigen::Matrix KAL_MEAN; 17 | typedef Eigen::Matrix KAL_COVA; 18 | typedef Eigen::Matrix KAL_HMEAN; 19 | typedef Eigen::Matrix KAL_HCOVA; 20 | using KAL_DATA = std::pair; 21 | using KAL_HDATA = std::pair; 22 | 23 | //main 24 | using RESULT_DATA = std::pair; 25 | 26 | //tracker: 27 | using TRACKER_DATA = std::pair; 28 | using MATCH_DATA = std::pair; 29 | typedef struct t { 30 | std::vector matches; 31 | std::vector unmatched_tracks; 32 | std::vector unmatched_detections; 33 | }TRACHER_MATCHD; 34 | 35 | //linear_assignment: 36 | typedef Eigen::Matrix DYNAMICM; -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/include/kalmanFilter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dataType.h" 4 | 5 | namespace byte_kalman 6 | { 7 | class KalmanFilter 8 | { 9 | public: 10 | static const double chi2inv95[10]; 11 | KalmanFilter(); 12 | KAL_DATA initiate(const DETECTBOX& measurement); 13 | void predict(KAL_MEAN& mean, KAL_COVA& covariance); 14 | KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); 15 | KAL_DATA update(const KAL_MEAN& mean, 16 | const KAL_COVA& covariance, 17 | const DETECTBOX& measurement); 18 | 19 | Eigen::Matrix gating_distance( 20 | const KAL_MEAN& mean, 21 | const KAL_COVA& covariance, 22 | const std::vector& measurements, 23 | bool only_position = false); 24 | 25 | private: 26 | Eigen::Matrix _motion_mat; 27 | Eigen::Matrix _update_mat; 28 | float _std_weight_position; 29 | float _std_weight_velocity; 30 | }; 31 | } -------------------------------------------------------------------------------- /deploy/TensorRT/cpp/include/lapjv.h: -------------------------------------------------------------------------------- 1 | #ifndef LAPJV_H 2 | #define LAPJV_H 3 | 4 | #define LARGE 1000000 5 | 6 | #if !defined TRUE 7 | #define TRUE 1 8 | #endif 9 | #if !defined FALSE 10 | #define FALSE 0 11 | #endif 12 | 13 | #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; } 14 | #define FREE(x) if (x != 0) { free(x); x = 0; } 15 | #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; } 16 | 17 | #if 0 18 | #include 19 | #define ASSERT(cond) assert(cond) 20 | #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) 21 | #define PRINT_COST_ARRAY(a, n) \ 22 | while (1) { \ 23 | printf(#a" = ["); \ 24 | if ((n) > 0) { \ 25 | printf("%f", (a)[0]); \ 26 | for (uint_t j = 1; j < n; j++) { \ 27 | printf(", %f", (a)[j]); \ 28 | } \ 29 | } \ 30 | printf("]\n"); \ 31 | break; \ 32 | } 33 | #define PRINT_INDEX_ARRAY(a, n) \ 34 | while (1) { \ 35 | printf(#a" = ["); \ 36 | if ((n) > 0) { \ 37 | printf("%d", (a)[0]); \ 38 | for (uint_t j = 1; j < n; j++) { \ 39 | printf(", %d", (a)[j]); \ 40 | } \ 41 | } \ 42 | printf("]\n"); \ 43 | break; \ 44 | } 45 | #else 46 | #define ASSERT(cond) 47 | #define PRINTF(fmt, ...) 48 | #define PRINT_COST_ARRAY(a, n) 49 | #define PRINT_INDEX_ARRAY(a, n) 50 | #endif 51 | 52 | 53 | typedef signed int int_t; 54 | typedef unsigned int uint_t; 55 | typedef double cost_t; 56 | typedef char boolean; 57 | typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; 58 | 59 | extern int_t lapjv_internal( 60 | const uint_t n, cost_t *cost[], 61 | int_t *x, int_t *y); 62 | 63 | #endif // LAPJV_H -------------------------------------------------------------------------------- /deploy/TensorRT/python/README.md: -------------------------------------------------------------------------------- 1 | # ByteTrack-TensorRT in Python 2 | 3 | ## Install TensorRT Toolkit 4 | Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) and [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt) to install TensorRT (Version 7 recommended) and torch2trt. 5 | 6 | ## Convert model 7 | 8 | You can convert the Pytorch model “bytetrack_s_mot17” to TensorRT model by running: 9 | 10 | ```shell 11 | cd 12 | python3 tools/trt.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar 13 | ``` 14 | 15 | ## Run TensorRT demo 16 | 17 | You can use the converted model_trt.pth to run TensorRT demo with **130 FPS**: 18 | 19 | ```shell 20 | cd 21 | python3 tools/demo_track.py video -f exps/example/mot/yolox_s_mix_det.py --trt --save_result 22 | ``` 23 | -------------------------------------------------------------------------------- /deploy/ncnn/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | macro(ncnn_add_example name) 2 | add_executable(${name} ${name}.cpp) 3 | if(OpenCV_FOUND) 4 | target_include_directories(${name} PRIVATE ${OpenCV_INCLUDE_DIRS}) 5 | target_link_libraries(${name} PRIVATE ncnn ${OpenCV_LIBS}) 6 | elseif(NCNN_SIMPLEOCV) 7 | target_compile_definitions(${name} PUBLIC USE_NCNN_SIMPLEOCV) 8 | target_link_libraries(${name} PRIVATE ncnn) 9 | endif() 10 | 11 | # add test to a virtual project group 12 | set_property(TARGET ${name} PROPERTY FOLDER "examples") 13 | endmacro() 14 | 15 | if(NCNN_PIXEL) 16 | find_package(OpenCV QUIET COMPONENTS opencv_world) 17 | # for opencv 2.4 on ubuntu 16.04, there is no opencv_world but OpenCV_FOUND will be TRUE 18 | if("${OpenCV_LIBS}" STREQUAL "") 19 | set(OpenCV_FOUND FALSE) 20 | endif() 21 | if(NOT OpenCV_FOUND) 22 | find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio) 23 | endif() 24 | if(NOT OpenCV_FOUND) 25 | find_package(OpenCV QUIET COMPONENTS core highgui imgproc) 26 | endif() 27 | 28 | if(OpenCV_FOUND OR NCNN_SIMPLEOCV) 29 | if(OpenCV_FOUND) 30 | message(STATUS "OpenCV library: ${OpenCV_INSTALL_PATH}") 31 | message(STATUS " version: ${OpenCV_VERSION}") 32 | message(STATUS " libraries: ${OpenCV_LIBS}") 33 | message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") 34 | 35 | if(${OpenCV_VERSION_MAJOR} GREATER 3) 36 | set(CMAKE_CXX_STANDARD 11) 37 | endif() 38 | endif() 39 | 40 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src) 41 | include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src) 42 | include_directories(include) 43 | include_directories(/usr/local/include/eigen3) 44 | 45 | ncnn_add_example(squeezenet) 46 | ncnn_add_example(squeezenet_c_api) 47 | ncnn_add_example(fasterrcnn) 48 | ncnn_add_example(rfcn) 49 | ncnn_add_example(yolov2) 50 | ncnn_add_example(yolov3) 51 | if(OpenCV_FOUND) 52 | ncnn_add_example(yolov4) 53 | endif() 54 | ncnn_add_example(yolov5) 55 | ncnn_add_example(yolox) 56 | ncnn_add_example(mobilenetv2ssdlite) 57 | ncnn_add_example(mobilenetssd) 58 | ncnn_add_example(squeezenetssd) 59 | ncnn_add_example(shufflenetv2) 60 | ncnn_add_example(peleenetssd_seg) 61 | ncnn_add_example(simplepose) 62 | ncnn_add_example(retinaface) 63 | ncnn_add_example(yolact) 64 | ncnn_add_example(nanodet) 65 | ncnn_add_example(scrfd) 66 | ncnn_add_example(scrfd_crowdhuman) 67 | ncnn_add_example(rvm) 68 | file(GLOB My_Source_Files src/*.cpp) 69 | add_executable(bytetrack ${My_Source_Files}) 70 | if(OpenCV_FOUND) 71 | target_include_directories(bytetrack PRIVATE ${OpenCV_INCLUDE_DIRS}) 72 | target_link_libraries(bytetrack PRIVATE ncnn ${OpenCV_LIBS}) 73 | elseif(NCNN_SIMPLEOCV) 74 | target_compile_definitions(bytetrack PUBLIC USE_NCNN_SIMPLEOCV) 75 | target_link_libraries(bytetrack PRIVATE ncnn) 76 | endif() 77 | # add test to a virtual project group 78 | set_property(TARGET bytetrack PROPERTY FOLDER "examples") 79 | else() 80 | message(WARNING "OpenCV not found and NCNN_SIMPLEOCV disabled, examples won't be built") 81 | endif() 82 | else() 83 | message(WARNING "NCNN_PIXEL not enabled, examples won't be built") 84 | endif() 85 | -------------------------------------------------------------------------------- /deploy/ncnn/cpp/README.md: -------------------------------------------------------------------------------- 1 | # ByteTrack-CPP-ncnn 2 | 3 | ## Installation 4 | 5 | Clone [ncnn](https://github.com/Tencent/ncnn) first, then please following [build tutorial of ncnn](https://github.com/Tencent/ncnn/wiki/how-to-build) to build on your own device. 6 | 7 | Install eigen-3.3.9 [[google]](https://drive.google.com/file/d/1rqO74CYCNrmRAg8Rra0JP3yZtJ-rfket/view?usp=sharing), [[baidu(code:ueq4)]](https://pan.baidu.com/s/15kEfCxpy-T7tz60msxxExg). 8 | 9 | ```shell 10 | unzip eigen-3.3.9.zip 11 | cd eigen-3.3.9 12 | mkdir build 13 | cd build 14 | cmake .. 15 | sudo make install 16 | ``` 17 | 18 | ## Generate onnx file 19 | Use provided tools to generate onnx file. 20 | For example, if you want to generate onnx file of bytetrack_s_mot17.pth, please run the following command: 21 | ```shell 22 | cd 23 | python3 tools/export_onnx.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar 24 | ``` 25 | Then, a bytetrack_s.onnx file is generated under . 26 | 27 | ## Generate ncnn param and bin file 28 | Put bytetrack_s.onnx under ncnn/build/tools/onnx and then run: 29 | 30 | ```shell 31 | cd ncnn/build/tools/onnx 32 | ./onnx2ncnn bytetrack_s.onnx bytetrack_s.param bytetrack_s.bin 33 | ``` 34 | 35 | Since Focus module is not supported in ncnn. Warnings like: 36 | ```shell 37 | Unsupported slice step ! 38 | ``` 39 | will be printed. However, don't worry! C++ version of Focus layer is already implemented in src/bytetrack.cpp. 40 | 41 | ## Modify param file 42 | Open **bytetrack_s.param**, and modify it. 43 | Before (just an example): 44 | ``` 45 | 235 268 46 | Input images 0 1 images 47 | Split splitncnn_input0 1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3 48 | Crop Slice_4 1 1 images_splitncnn_3 467 -23309=1,0 -23310=1,2147483647 -23311=1,1 49 | Crop Slice_9 1 1 467 472 -23309=1,0 -23310=1,2147483647 -23311=1,2 50 | Crop Slice_14 1 1 images_splitncnn_2 477 -23309=1,0 -23310=1,2147483647 -23311=1,1 51 | Crop Slice_19 1 1 477 482 -23309=1,1 -23310=1,2147483647 -23311=1,2 52 | Crop Slice_24 1 1 images_splitncnn_1 487 -23309=1,1 -23310=1,2147483647 -23311=1,1 53 | Crop Slice_29 1 1 487 492 -23309=1,0 -23310=1,2147483647 -23311=1,2 54 | Crop Slice_34 1 1 images_splitncnn_0 497 -23309=1,1 -23310=1,2147483647 -23311=1,1 55 | Crop Slice_39 1 1 497 502 -23309=1,1 -23310=1,2147483647 -23311=1,2 56 | Concat Concat_40 4 1 472 492 482 502 503 0=0 57 | ... 58 | ``` 59 | * Change first number for 235 to 235 - 9 = 226(since we will remove 10 layers and add 1 layers, total layers number should minus 9). 60 | * Then remove 10 lines of code from Split to Concat, but remember the last but 2nd number: 503. 61 | * Add YoloV5Focus layer After Input (using previous number 503): 62 | ``` 63 | YoloV5Focus focus 1 1 images 503 64 | ``` 65 | After(just an exmaple): 66 | ``` 67 | 226 328 68 | Input images 0 1 images 69 | YoloV5Focus focus 1 1 images 503 70 | ... 71 | ``` 72 | 73 | ## Use ncnn_optimize to generate new param and bin 74 | ```shell 75 | # suppose you are still under ncnn/build/tools/onnx dir. 76 | ../ncnnoptimize bytetrack_s.param bytetrack_s.bin bytetrack_s_op.param bytetrack_s_op.bin 65536 77 | ``` 78 | 79 | ## Copy files and build ByteTrack 80 | Copy or move 'src', 'include' folders and 'CMakeLists.txt' file into ncnn/examples. Copy bytetrack_s_op.param, bytetrack_s_op.bin and /videos/palace.mp4 into ncnn/build/examples. Then, build ByteTrack: 81 | 82 | ```shell 83 | cd ncnn/build/examples 84 | cmake .. 85 | make 86 | ``` 87 | 88 | ## Run the demo 89 | You can run the ncnn demo with **5 FPS** (96-core Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz): 90 | ```shell 91 | ./bytetrack palace.mp4 92 | ``` 93 | 94 | You can modify 'num_threads' to optimize the running speed in [bytetrack.cpp](https://github.com/ifzhang/ByteTrack/blob/2e9a67895da6b47b948015f6861bba0bacd4e72f/deploy/ncnn/cpp/src/bytetrack.cpp#L309) according to the number of your CPU cores: 95 | 96 | ``` 97 | yolox.opt.num_threads = 20; 98 | ``` 99 | 100 | 101 | ## Acknowledgement 102 | 103 | * [ncnn](https://github.com/Tencent/ncnn) 104 | -------------------------------------------------------------------------------- /deploy/ncnn/cpp/include/BYTETracker.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "STrack.h" 4 | 5 | struct Object 6 | { 7 | cv::Rect_ rect; 8 | int label; 9 | float prob; 10 | }; 11 | 12 | class BYTETracker 13 | { 14 | public: 15 | BYTETracker(int frame_rate = 30, int track_buffer = 30); 16 | ~BYTETracker(); 17 | 18 | vector update(const vector& objects); 19 | Scalar get_color(int idx); 20 | 21 | private: 22 | vector joint_stracks(vector &tlista, vector &tlistb); 23 | vector joint_stracks(vector &tlista, vector &tlistb); 24 | 25 | vector sub_stracks(vector &tlista, vector &tlistb); 26 | void remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb); 27 | 28 | void linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, 29 | vector > &matches, vector &unmatched_a, vector &unmatched_b); 30 | vector > iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size); 31 | vector > iou_distance(vector &atracks, vector &btracks); 32 | vector > ious(vector > &atlbrs, vector > &btlbrs); 33 | 34 | double lapjv(const vector > &cost, vector &rowsol, vector &colsol, 35 | bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true); 36 | 37 | private: 38 | 39 | float track_thresh; 40 | float high_thresh; 41 | float match_thresh; 42 | int frame_id; 43 | int max_time_lost; 44 | 45 | vector tracked_stracks; 46 | vector lost_stracks; 47 | vector removed_stracks; 48 | byte_kalman::KalmanFilter kalman_filter; 49 | }; -------------------------------------------------------------------------------- /deploy/ncnn/cpp/include/STrack.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "kalmanFilter.h" 5 | 6 | using namespace cv; 7 | using namespace std; 8 | 9 | enum TrackState { New = 0, Tracked, Lost, Removed }; 10 | 11 | class STrack 12 | { 13 | public: 14 | STrack(vector tlwh_, float score); 15 | ~STrack(); 16 | 17 | vector static tlbr_to_tlwh(vector &tlbr); 18 | void static multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter); 19 | void static_tlwh(); 20 | void static_tlbr(); 21 | vector tlwh_to_xyah(vector tlwh_tmp); 22 | vector to_xyah(); 23 | void mark_lost(); 24 | void mark_removed(); 25 | int next_id(); 26 | int end_frame(); 27 | 28 | void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id); 29 | void re_activate(STrack &new_track, int frame_id, bool new_id = false); 30 | void update(STrack &new_track, int frame_id); 31 | 32 | public: 33 | bool is_activated; 34 | int track_id; 35 | int state; 36 | 37 | vector _tlwh; 38 | vector tlwh; 39 | vector tlbr; 40 | int frame_id; 41 | int tracklet_len; 42 | int start_frame; 43 | 44 | KAL_MEAN mean; 45 | KAL_COVA covariance; 46 | float score; 47 | 48 | private: 49 | byte_kalman::KalmanFilter kalman_filter; 50 | }; -------------------------------------------------------------------------------- /deploy/ncnn/cpp/include/dataType.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | typedef Eigen::Matrix DETECTBOX; 9 | typedef Eigen::Matrix DETECTBOXSS; 10 | typedef Eigen::Matrix FEATURE; 11 | typedef Eigen::Matrix FEATURESS; 12 | //typedef std::vector FEATURESS; 13 | 14 | //Kalmanfilter 15 | //typedef Eigen::Matrix KAL_FILTER; 16 | typedef Eigen::Matrix KAL_MEAN; 17 | typedef Eigen::Matrix KAL_COVA; 18 | typedef Eigen::Matrix KAL_HMEAN; 19 | typedef Eigen::Matrix KAL_HCOVA; 20 | using KAL_DATA = std::pair; 21 | using KAL_HDATA = std::pair; 22 | 23 | //main 24 | using RESULT_DATA = std::pair; 25 | 26 | //tracker: 27 | using TRACKER_DATA = std::pair; 28 | using MATCH_DATA = std::pair; 29 | typedef struct t { 30 | std::vector matches; 31 | std::vector unmatched_tracks; 32 | std::vector unmatched_detections; 33 | }TRACHER_MATCHD; 34 | 35 | //linear_assignment: 36 | typedef Eigen::Matrix DYNAMICM; -------------------------------------------------------------------------------- /deploy/ncnn/cpp/include/kalmanFilter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dataType.h" 4 | 5 | namespace byte_kalman 6 | { 7 | class KalmanFilter 8 | { 9 | public: 10 | static const double chi2inv95[10]; 11 | KalmanFilter(); 12 | KAL_DATA initiate(const DETECTBOX& measurement); 13 | void predict(KAL_MEAN& mean, KAL_COVA& covariance); 14 | KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); 15 | KAL_DATA update(const KAL_MEAN& mean, 16 | const KAL_COVA& covariance, 17 | const DETECTBOX& measurement); 18 | 19 | Eigen::Matrix gating_distance( 20 | const KAL_MEAN& mean, 21 | const KAL_COVA& covariance, 22 | const std::vector& measurements, 23 | bool only_position = false); 24 | 25 | private: 26 | Eigen::Matrix _motion_mat; 27 | Eigen::Matrix _update_mat; 28 | float _std_weight_position; 29 | float _std_weight_velocity; 30 | }; 31 | } -------------------------------------------------------------------------------- /deploy/ncnn/cpp/include/lapjv.h: -------------------------------------------------------------------------------- 1 | #ifndef LAPJV_H 2 | #define LAPJV_H 3 | 4 | #define LARGE 1000000 5 | 6 | #if !defined TRUE 7 | #define TRUE 1 8 | #endif 9 | #if !defined FALSE 10 | #define FALSE 0 11 | #endif 12 | 13 | #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; } 14 | #define FREE(x) if (x != 0) { free(x); x = 0; } 15 | #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; } 16 | 17 | #if 0 18 | #include 19 | #define ASSERT(cond) assert(cond) 20 | #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) 21 | #define PRINT_COST_ARRAY(a, n) \ 22 | while (1) { \ 23 | printf(#a" = ["); \ 24 | if ((n) > 0) { \ 25 | printf("%f", (a)[0]); \ 26 | for (uint_t j = 1; j < n; j++) { \ 27 | printf(", %f", (a)[j]); \ 28 | } \ 29 | } \ 30 | printf("]\n"); \ 31 | break; \ 32 | } 33 | #define PRINT_INDEX_ARRAY(a, n) \ 34 | while (1) { \ 35 | printf(#a" = ["); \ 36 | if ((n) > 0) { \ 37 | printf("%d", (a)[0]); \ 38 | for (uint_t j = 1; j < n; j++) { \ 39 | printf(", %d", (a)[j]); \ 40 | } \ 41 | } \ 42 | printf("]\n"); \ 43 | break; \ 44 | } 45 | #else 46 | #define ASSERT(cond) 47 | #define PRINTF(fmt, ...) 48 | #define PRINT_COST_ARRAY(a, n) 49 | #define PRINT_INDEX_ARRAY(a, n) 50 | #endif 51 | 52 | 53 | typedef signed int int_t; 54 | typedef unsigned int uint_t; 55 | typedef double cost_t; 56 | typedef char boolean; 57 | typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; 58 | 59 | extern int_t lapjv_internal( 60 | const uint_t n, cost_t *cost[], 61 | int_t *x, int_t *y); 62 | 63 | #endif // LAPJV_H -------------------------------------------------------------------------------- /deploy/ncnn/cpp/src/STrack.cpp: -------------------------------------------------------------------------------- 1 | #include "STrack.h" 2 | 3 | STrack::STrack(vector tlwh_, float score) 4 | { 5 | _tlwh.resize(4); 6 | _tlwh.assign(tlwh_.begin(), tlwh_.end()); 7 | 8 | is_activated = false; 9 | track_id = 0; 10 | state = TrackState::New; 11 | 12 | tlwh.resize(4); 13 | tlbr.resize(4); 14 | 15 | static_tlwh(); 16 | static_tlbr(); 17 | frame_id = 0; 18 | tracklet_len = 0; 19 | this->score = score; 20 | start_frame = 0; 21 | } 22 | 23 | STrack::~STrack() 24 | { 25 | } 26 | 27 | void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id) 28 | { 29 | this->kalman_filter = kalman_filter; 30 | this->track_id = this->next_id(); 31 | 32 | vector _tlwh_tmp(4); 33 | _tlwh_tmp[0] = this->_tlwh[0]; 34 | _tlwh_tmp[1] = this->_tlwh[1]; 35 | _tlwh_tmp[2] = this->_tlwh[2]; 36 | _tlwh_tmp[3] = this->_tlwh[3]; 37 | vector xyah = tlwh_to_xyah(_tlwh_tmp); 38 | DETECTBOX xyah_box; 39 | xyah_box[0] = xyah[0]; 40 | xyah_box[1] = xyah[1]; 41 | xyah_box[2] = xyah[2]; 42 | xyah_box[3] = xyah[3]; 43 | auto mc = this->kalman_filter.initiate(xyah_box); 44 | this->mean = mc.first; 45 | this->covariance = mc.second; 46 | 47 | static_tlwh(); 48 | static_tlbr(); 49 | 50 | this->tracklet_len = 0; 51 | this->state = TrackState::Tracked; 52 | if (frame_id == 1) 53 | { 54 | this->is_activated = true; 55 | } 56 | //this->is_activated = true; 57 | this->frame_id = frame_id; 58 | this->start_frame = frame_id; 59 | } 60 | 61 | void STrack::re_activate(STrack &new_track, int frame_id, bool new_id) 62 | { 63 | vector xyah = tlwh_to_xyah(new_track.tlwh); 64 | DETECTBOX xyah_box; 65 | xyah_box[0] = xyah[0]; 66 | xyah_box[1] = xyah[1]; 67 | xyah_box[2] = xyah[2]; 68 | xyah_box[3] = xyah[3]; 69 | auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); 70 | this->mean = mc.first; 71 | this->covariance = mc.second; 72 | 73 | static_tlwh(); 74 | static_tlbr(); 75 | 76 | this->tracklet_len = 0; 77 | this->state = TrackState::Tracked; 78 | this->is_activated = true; 79 | this->frame_id = frame_id; 80 | this->score = new_track.score; 81 | if (new_id) 82 | this->track_id = next_id(); 83 | } 84 | 85 | void STrack::update(STrack &new_track, int frame_id) 86 | { 87 | this->frame_id = frame_id; 88 | this->tracklet_len++; 89 | 90 | vector xyah = tlwh_to_xyah(new_track.tlwh); 91 | DETECTBOX xyah_box; 92 | xyah_box[0] = xyah[0]; 93 | xyah_box[1] = xyah[1]; 94 | xyah_box[2] = xyah[2]; 95 | xyah_box[3] = xyah[3]; 96 | 97 | auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); 98 | this->mean = mc.first; 99 | this->covariance = mc.second; 100 | 101 | static_tlwh(); 102 | static_tlbr(); 103 | 104 | this->state = TrackState::Tracked; 105 | this->is_activated = true; 106 | 107 | this->score = new_track.score; 108 | } 109 | 110 | void STrack::static_tlwh() 111 | { 112 | if (this->state == TrackState::New) 113 | { 114 | tlwh[0] = _tlwh[0]; 115 | tlwh[1] = _tlwh[1]; 116 | tlwh[2] = _tlwh[2]; 117 | tlwh[3] = _tlwh[3]; 118 | return; 119 | } 120 | 121 | tlwh[0] = mean[0]; 122 | tlwh[1] = mean[1]; 123 | tlwh[2] = mean[2]; 124 | tlwh[3] = mean[3]; 125 | 126 | tlwh[2] *= tlwh[3]; 127 | tlwh[0] -= tlwh[2] / 2; 128 | tlwh[1] -= tlwh[3] / 2; 129 | } 130 | 131 | void STrack::static_tlbr() 132 | { 133 | tlbr.clear(); 134 | tlbr.assign(tlwh.begin(), tlwh.end()); 135 | tlbr[2] += tlbr[0]; 136 | tlbr[3] += tlbr[1]; 137 | } 138 | 139 | vector STrack::tlwh_to_xyah(vector tlwh_tmp) 140 | { 141 | vector tlwh_output = tlwh_tmp; 142 | tlwh_output[0] += tlwh_output[2] / 2; 143 | tlwh_output[1] += tlwh_output[3] / 2; 144 | tlwh_output[2] /= tlwh_output[3]; 145 | return tlwh_output; 146 | } 147 | 148 | vector STrack::to_xyah() 149 | { 150 | return tlwh_to_xyah(tlwh); 151 | } 152 | 153 | vector STrack::tlbr_to_tlwh(vector &tlbr) 154 | { 155 | tlbr[2] -= tlbr[0]; 156 | tlbr[3] -= tlbr[1]; 157 | return tlbr; 158 | } 159 | 160 | void STrack::mark_lost() 161 | { 162 | state = TrackState::Lost; 163 | } 164 | 165 | void STrack::mark_removed() 166 | { 167 | state = TrackState::Removed; 168 | } 169 | 170 | int STrack::next_id() 171 | { 172 | static int _count = 0; 173 | _count++; 174 | return _count; 175 | } 176 | 177 | int STrack::end_frame() 178 | { 179 | return this->frame_id; 180 | } 181 | 182 | void STrack::multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter) 183 | { 184 | for (int i = 0; i < stracks.size(); i++) 185 | { 186 | if (stracks[i]->state != TrackState::Tracked) 187 | { 188 | stracks[i]->mean[7] = 0; 189 | } 190 | kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance); 191 | } 192 | } -------------------------------------------------------------------------------- /exps/default/nano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | import torch.nn as nn 7 | 8 | from yolox.exp import Exp as MyExp 9 | 10 | 11 | class Exp(MyExp): 12 | def __init__(self): 13 | super(Exp, self).__init__() 14 | self.depth = 0.33 15 | self.width = 0.25 16 | self.scale = (0.5, 1.5) 17 | self.random_size = (10, 20) 18 | self.test_size = (416, 416) 19 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 20 | self.enable_mixup = False 21 | 22 | def get_model(self, sublinear=False): 23 | 24 | def init_yolo(M): 25 | for m in M.modules(): 26 | if isinstance(m, nn.BatchNorm2d): 27 | m.eps = 1e-3 28 | m.momentum = 0.03 29 | if "model" not in self.__dict__: 30 | from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead 31 | in_channels = [256, 512, 1024] 32 | # NANO model use depthwise = True, which is main difference. 33 | backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) 34 | head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) 35 | self.model = YOLOX(backbone, head) 36 | 37 | self.model.apply(init_yolo) 38 | self.model.head.initialize_biases(1e-2) 39 | return self.model 40 | -------------------------------------------------------------------------------- /exps/default/yolov3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | import torch 7 | import torch.nn as nn 8 | 9 | from yolox.exp import Exp as MyExp 10 | 11 | 12 | class Exp(MyExp): 13 | def __init__(self): 14 | super(Exp, self).__init__() 15 | self.depth = 1.0 16 | self.width = 1.0 17 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 18 | 19 | def get_model(self, sublinear=False): 20 | def init_yolo(M): 21 | for m in M.modules(): 22 | if isinstance(m, nn.BatchNorm2d): 23 | m.eps = 1e-3 24 | m.momentum = 0.03 25 | if "model" not in self.__dict__: 26 | from yolox.models import YOLOX, YOLOFPN, YOLOXHead 27 | backbone = YOLOFPN() 28 | head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu") 29 | self.model = YOLOX(backbone, head) 30 | self.model.apply(init_yolo) 31 | self.model.head.initialize_biases(1e-2) 32 | 33 | return self.model 34 | 35 | def get_data_loader(self, batch_size, is_distributed, no_aug=False): 36 | from data.datasets.cocodataset import COCODataset 37 | from data.datasets.mosaicdetection import MosaicDetection 38 | from data.datasets.data_augment import TrainTransform 39 | from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler 40 | import torch.distributed as dist 41 | 42 | dataset = COCODataset( 43 | data_dir='data/COCO/', 44 | json_file=self.train_ann, 45 | img_size=self.input_size, 46 | preproc=TrainTransform( 47 | rgb_means=(0.485, 0.456, 0.406), 48 | std=(0.229, 0.224, 0.225), 49 | max_labels=50 50 | ), 51 | ) 52 | 53 | dataset = MosaicDetection( 54 | dataset, 55 | mosaic=not no_aug, 56 | img_size=self.input_size, 57 | preproc=TrainTransform( 58 | rgb_means=(0.485, 0.456, 0.406), 59 | std=(0.229, 0.224, 0.225), 60 | max_labels=120 61 | ), 62 | degrees=self.degrees, 63 | translate=self.translate, 64 | scale=self.scale, 65 | shear=self.shear, 66 | perspective=self.perspective, 67 | ) 68 | 69 | self.dataset = dataset 70 | 71 | if is_distributed: 72 | batch_size = batch_size // dist.get_world_size() 73 | sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0) 74 | else: 75 | sampler = torch.utils.data.RandomSampler(self.dataset) 76 | 77 | batch_sampler = YoloBatchSampler( 78 | sampler=sampler, 79 | batch_size=batch_size, 80 | drop_last=False, 81 | input_dimension=self.input_size, 82 | mosaic=not no_aug 83 | ) 84 | 85 | dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} 86 | dataloader_kwargs["batch_sampler"] = batch_sampler 87 | train_loader = DataLoader(self.dataset, **dataloader_kwargs) 88 | 89 | return train_loader 90 | -------------------------------------------------------------------------------- /exps/default/yolox_l.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 1.0 14 | self.width = 1.0 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /exps/default/yolox_m.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 0.67 14 | self.width = 0.75 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /exps/default/yolox_s.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 0.33 14 | self.width = 0.50 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /exps/default/yolox_tiny.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 0.33 14 | self.width = 0.375 15 | self.scale = (0.5, 1.5) 16 | self.random_size = (10, 20) 17 | self.test_size = (416, 416) 18 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 19 | self.enable_mixup = False 20 | -------------------------------------------------------------------------------- /exps/default/yolox_x.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 1.33 14 | self.width = 1.25 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # TODO: Update with exact module version 2 | numpy 3 | torch>=1.7 4 | opencv_python 5 | loguru 6 | scikit-image 7 | tqdm 8 | torchvision>=0.10.0 9 | Pillow 10 | thop 11 | ninja 12 | tabulate 13 | tensorboard 14 | lap 15 | motmetrics 16 | filterpy 17 | h5py 18 | 19 | # verified versions 20 | onnx==1.8.1 21 | onnxruntime==1.8.0 22 | onnx-simplifier==0.3.5 23 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 100 3 | multi_line_output = 3 4 | balanced_wrapping = True 5 | known_standard_library = setuptools 6 | known_third_party = tqdm,loguru 7 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib,scikit_image 8 | known_datasets = pycocotools 9 | known_deeplearning = torch,torchvision,caffe2,onnx,apex,timm,thop,torch2trt,tensorrt,openvino,onnxruntime 10 | known_myself = yolox 11 | sections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER 12 | no_lines_before=STDLIB,THIRDPARTY,datasets 13 | default_section = FIRSTPARTY 14 | 15 | [flake8] 16 | max-line-length = 100 17 | max-complexity = 18 18 | exclude = __init__.py 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved 3 | 4 | import re 5 | import setuptools 6 | import glob 7 | from os import path 8 | import torch 9 | from torch.utils.cpp_extension import CppExtension 10 | 11 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 12 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" 13 | 14 | 15 | def get_extensions(): 16 | this_dir = path.dirname(path.abspath(__file__)) 17 | extensions_dir = path.join(this_dir, "yolox", "layers", "csrc") 18 | 19 | main_source = path.join(extensions_dir, "vision.cpp") 20 | sources = glob.glob(path.join(extensions_dir, "**", "*.cpp")) 21 | 22 | sources = [main_source] + sources 23 | extension = CppExtension 24 | 25 | extra_compile_args = {"cxx": ["-O3"]} 26 | define_macros = [] 27 | 28 | include_dirs = [extensions_dir] 29 | 30 | ext_modules = [ 31 | extension( 32 | "yolox._C", 33 | sources, 34 | include_dirs=include_dirs, 35 | define_macros=define_macros, 36 | extra_compile_args=extra_compile_args, 37 | ) 38 | ] 39 | 40 | return ext_modules 41 | 42 | 43 | with open("yolox/__init__.py", "r") as f: 44 | version = re.search( 45 | r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 46 | f.read(), re.MULTILINE 47 | ).group(1) 48 | 49 | 50 | with open("README.md", "r") as f: 51 | long_description = f.read() 52 | 53 | 54 | setuptools.setup( 55 | name="yolox", 56 | version=version, 57 | author="basedet team", 58 | python_requires=">=3.6", 59 | long_description=long_description, 60 | ext_modules=get_extensions(), 61 | classifiers=["Programming Language :: Python :: 3", "Operating System :: OS Independent"], 62 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 63 | packages=setuptools.find_namespace_packages(), 64 | ) 65 | -------------------------------------------------------------------------------- /tools/convert_cityperson_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | from PIL import Image 5 | 6 | DATA_PATH = 'datasets/Cityscapes/' 7 | DATA_FILE_PATH = 'datasets/data_path/citypersons.train' 8 | OUT_PATH = DATA_PATH + 'annotations/' 9 | 10 | def load_paths(data_path): 11 | with open(data_path, 'r') as file: 12 | img_files = file.readlines() 13 | img_files = [x.replace('\n', '') for x in img_files] 14 | img_files = list(filter(lambda x: len(x) > 0, img_files)) 15 | label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files] 16 | return img_files, label_files 17 | 18 | if __name__ == '__main__': 19 | if not os.path.exists(OUT_PATH): 20 | os.mkdir(OUT_PATH) 21 | 22 | out_path = OUT_PATH + 'train.json' 23 | out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} 24 | img_paths, label_paths = load_paths(DATA_FILE_PATH) 25 | image_cnt = 0 26 | ann_cnt = 0 27 | video_cnt = 0 28 | for img_path, label_path in zip(img_paths, label_paths): 29 | image_cnt += 1 30 | im = Image.open(os.path.join("datasets", img_path)) 31 | image_info = {'file_name': img_path, 32 | 'id': image_cnt, 33 | 'height': im.size[1], 34 | 'width': im.size[0]} 35 | out['images'].append(image_info) 36 | # Load labels 37 | if os.path.isfile(os.path.join("datasets", label_path)): 38 | labels0 = np.loadtxt(os.path.join("datasets", label_path), dtype=np.float32).reshape(-1, 6) 39 | # Normalized xywh to pixel xyxy format 40 | labels = labels0.copy() 41 | labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2) 42 | labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2) 43 | labels[:, 4] = image_info['width'] * labels0[:, 4] 44 | labels[:, 5] = image_info['height'] * labels0[:, 5] 45 | else: 46 | labels = np.array([]) 47 | for i in range(len(labels)): 48 | ann_cnt += 1 49 | fbox = labels[i, 2:6].tolist() 50 | ann = {'id': ann_cnt, 51 | 'category_id': 1, 52 | 'image_id': image_cnt, 53 | 'track_id': -1, 54 | 'bbox': fbox, 55 | 'area': fbox[2] * fbox[3], 56 | 'iscrowd': 0} 57 | out['annotations'].append(ann) 58 | print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations']))) 59 | json.dump(out, open(out_path, 'w')) 60 | -------------------------------------------------------------------------------- /tools/convert_crowdhuman_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | from PIL import Image 5 | 6 | DATA_PATH = 'datasets/crowdhuman/' 7 | OUT_PATH = DATA_PATH + 'annotations/' 8 | SPLITS = ['val', 'train'] 9 | DEBUG = False 10 | 11 | def load_func(fpath): 12 | print('fpath', fpath) 13 | assert os.path.exists(fpath) 14 | with open(fpath,'r') as fid: 15 | lines = fid.readlines() 16 | records =[json.loads(line.strip('\n')) for line in lines] 17 | return records 18 | 19 | if __name__ == '__main__': 20 | if not os.path.exists(OUT_PATH): 21 | os.mkdir(OUT_PATH) 22 | for split in SPLITS: 23 | data_path = DATA_PATH + split 24 | out_path = OUT_PATH + '{}.json'.format(split) 25 | out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} 26 | ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split) 27 | anns_data = load_func(ann_path) 28 | image_cnt = 0 29 | ann_cnt = 0 30 | video_cnt = 0 31 | for ann_data in anns_data: 32 | image_cnt += 1 33 | file_path = DATA_PATH + 'CrowdHuman_{}/'.format(split) + '{}.jpg'.format(ann_data['ID']) 34 | im = Image.open(file_path) 35 | image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 36 | 'id': image_cnt, 37 | 'height': im.size[1], 38 | 'width': im.size[0]} 39 | out['images'].append(image_info) 40 | if split != 'test': 41 | anns = ann_data['gtboxes'] 42 | for i in range(len(anns)): 43 | ann_cnt += 1 44 | fbox = anns[i]['fbox'] 45 | ann = {'id': ann_cnt, 46 | 'category_id': 1, 47 | 'image_id': image_cnt, 48 | 'track_id': -1, 49 | 'bbox_vis': anns[i]['vbox'], 50 | 'bbox': fbox, 51 | 'area': fbox[2] * fbox[3], 52 | 'iscrowd': 1 if 'extra' in anns[i] and \ 53 | 'ignore' in anns[i]['extra'] and \ 54 | anns[i]['extra']['ignore'] == 1 else 0} 55 | out['annotations'].append(ann) 56 | print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations']))) 57 | json.dump(out, open(out_path, 'w')) -------------------------------------------------------------------------------- /tools/convert_ethz_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | from PIL import Image 5 | 6 | DATA_PATH = 'datasets/ETHZ/' 7 | DATA_FILE_PATH = 'datasets/data_path/eth.train' 8 | OUT_PATH = DATA_PATH + 'annotations/' 9 | 10 | def load_paths(data_path): 11 | with open(data_path, 'r') as file: 12 | img_files = file.readlines() 13 | img_files = [x.replace('\n', '') for x in img_files] 14 | img_files = list(filter(lambda x: len(x) > 0, img_files)) 15 | label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files] 16 | return img_files, label_files 17 | 18 | if __name__ == '__main__': 19 | if not os.path.exists(OUT_PATH): 20 | os.mkdir(OUT_PATH) 21 | 22 | out_path = OUT_PATH + 'train.json' 23 | out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} 24 | img_paths, label_paths = load_paths(DATA_FILE_PATH) 25 | image_cnt = 0 26 | ann_cnt = 0 27 | video_cnt = 0 28 | for img_path, label_path in zip(img_paths, label_paths): 29 | image_cnt += 1 30 | im = Image.open(os.path.join("datasets", img_path)) 31 | image_info = {'file_name': img_path, 32 | 'id': image_cnt, 33 | 'height': im.size[1], 34 | 'width': im.size[0]} 35 | out['images'].append(image_info) 36 | # Load labels 37 | if os.path.isfile(os.path.join("datasets", label_path)): 38 | labels0 = np.loadtxt(os.path.join("datasets", label_path), dtype=np.float32).reshape(-1, 6) 39 | # Normalized xywh to pixel xyxy format 40 | labels = labels0.copy() 41 | labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2) 42 | labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2) 43 | labels[:, 4] = image_info['width'] * labels0[:, 4] 44 | labels[:, 5] = image_info['height'] * labels0[:, 5] 45 | else: 46 | labels = np.array([]) 47 | for i in range(len(labels)): 48 | ann_cnt += 1 49 | fbox = labels[i, 2:6].tolist() 50 | ann = {'id': ann_cnt, 51 | 'category_id': 1, 52 | 'image_id': image_cnt, 53 | 'track_id': -1, 54 | 'bbox': fbox, 55 | 'area': fbox[2] * fbox[3], 56 | 'iscrowd': 0} 57 | out['annotations'].append(ann) 58 | print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations']))) 59 | json.dump(out, open(out_path, 'w')) 60 | -------------------------------------------------------------------------------- /tools/convert_video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | def convert_video(video_path): 4 | cap = cv2.VideoCapture(video_path) 5 | width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float 6 | height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float 7 | fps = cap.get(cv2.CAP_PROP_FPS) 8 | video_name = video_path.split('/')[-1].split('.')[0] 9 | save_name = video_name + '_converted' 10 | save_path = video_path.replace(video_name, save_name) 11 | vid_writer = cv2.VideoWriter( 12 | save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) 13 | ) 14 | while True: 15 | ret_val, frame = cap.read() 16 | if ret_val: 17 | vid_writer.write(frame) 18 | ch = cv2.waitKey(1) 19 | if ch == 27 or ch == ord("q") or ch == ord("Q"): 20 | break 21 | else: 22 | break 23 | 24 | if __name__ == "__main__": 25 | video_path = 'videos/palace.mp4' 26 | convert_video(video_path) -------------------------------------------------------------------------------- /tools/export_onnx.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | import torch 4 | from torch import nn 5 | 6 | from yolox.exp import get_exp 7 | from yolox.models.network_blocks import SiLU 8 | from yolox.utils import replace_module 9 | 10 | import argparse 11 | import os 12 | 13 | 14 | def make_parser(): 15 | parser = argparse.ArgumentParser("YOLOX onnx deploy") 16 | parser.add_argument( 17 | "--output-name", type=str, default="bytetrack_s.onnx", help="output name of models" 18 | ) 19 | parser.add_argument( 20 | "--input", default="images", type=str, help="input node name of onnx model" 21 | ) 22 | parser.add_argument( 23 | "--output", default="output", type=str, help="output node name of onnx model" 24 | ) 25 | parser.add_argument( 26 | "-o", "--opset", default=11, type=int, help="onnx opset version" 27 | ) 28 | parser.add_argument("--no-onnxsim", action="store_true", help="use onnxsim or not") 29 | parser.add_argument( 30 | "-f", 31 | "--exp_file", 32 | default=None, 33 | type=str, 34 | help="expriment description file", 35 | ) 36 | parser.add_argument("-expn", "--experiment-name", type=str, default=None) 37 | parser.add_argument("-n", "--name", type=str, default=None, help="model name") 38 | parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path") 39 | parser.add_argument( 40 | "opts", 41 | help="Modify config options using the command-line", 42 | default=None, 43 | nargs=argparse.REMAINDER, 44 | ) 45 | 46 | return parser 47 | 48 | 49 | @logger.catch 50 | def main(): 51 | args = make_parser().parse_args() 52 | logger.info("args value: {}".format(args)) 53 | exp = get_exp(args.exp_file, args.name) 54 | exp.merge(args.opts) 55 | 56 | if not args.experiment_name: 57 | args.experiment_name = exp.exp_name 58 | 59 | model = exp.get_model() 60 | if args.ckpt is None: 61 | file_name = os.path.join(exp.output_dir, args.experiment_name) 62 | ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") 63 | else: 64 | ckpt_file = args.ckpt 65 | 66 | # load the model state dict 67 | ckpt = torch.load(ckpt_file, map_location="cpu") 68 | 69 | model.eval() 70 | if "model" in ckpt: 71 | ckpt = ckpt["model"] 72 | model.load_state_dict(ckpt) 73 | model = replace_module(model, nn.SiLU, SiLU) 74 | model.head.decode_in_inference = False 75 | 76 | logger.info("loading checkpoint done.") 77 | dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1]) 78 | torch.onnx._export( 79 | model, 80 | dummy_input, 81 | args.output_name, 82 | input_names=[args.input], 83 | output_names=[args.output], 84 | opset_version=args.opset, 85 | ) 86 | logger.info("generated onnx model named {}".format(args.output_name)) 87 | 88 | if not args.no_onnxsim: 89 | import onnx 90 | 91 | from onnxsim import simplify 92 | 93 | # use onnxsimplify to reduce reduent model. 94 | onnx_model = onnx.load(args.output_name) 95 | model_simp, check = simplify(onnx_model) 96 | assert check, "Simplified ONNX model could not be validated" 97 | onnx.save(model_simp, args.output_name) 98 | logger.info("generated simplified onnx model named {}".format(args.output_name)) 99 | 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /tools/mix_data_ablation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | """ 6 | cd datasets 7 | mkdir -p mix_mot_ch/annotations 8 | cp mot/annotations/val_half.json mix_mot_ch/annotations/val_half.json 9 | cp mot/annotations/test.json mix_mot_ch/annotations/test.json 10 | cd mix_mot_ch 11 | ln -s ../mot/train mot_train 12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train 13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val 14 | cd .. 15 | """ 16 | 17 | mot_json = json.load(open('datasets/mot/annotations/train_half.json','r')) 18 | 19 | img_list = list() 20 | for img in mot_json['images']: 21 | img['file_name'] = 'mot_train/' + img['file_name'] 22 | img_list.append(img) 23 | 24 | ann_list = list() 25 | for ann in mot_json['annotations']: 26 | ann_list.append(ann) 27 | 28 | video_list = mot_json['videos'] 29 | category_list = mot_json['categories'] 30 | 31 | print('mot17') 32 | 33 | max_img = 10000 34 | max_ann = 2000000 35 | max_video = 10 36 | 37 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r')) 38 | img_id_count = 0 39 | for img in crowdhuman_json['images']: 40 | img_id_count += 1 41 | img['file_name'] = 'crowdhuman_train/' + img['file_name'] 42 | img['frame_id'] = img_id_count 43 | img['prev_image_id'] = img['id'] + max_img 44 | img['next_image_id'] = img['id'] + max_img 45 | img['id'] = img['id'] + max_img 46 | img['video_id'] = max_video 47 | img_list.append(img) 48 | 49 | for ann in crowdhuman_json['annotations']: 50 | ann['id'] = ann['id'] + max_ann 51 | ann['image_id'] = ann['image_id'] + max_img 52 | ann_list.append(ann) 53 | 54 | video_list.append({ 55 | 'id': max_video, 56 | 'file_name': 'crowdhuman_train' 57 | }) 58 | 59 | print('crowdhuman_train') 60 | 61 | max_img = 30000 62 | max_ann = 10000000 63 | 64 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r')) 65 | img_id_count = 0 66 | for img in crowdhuman_val_json['images']: 67 | img_id_count += 1 68 | img['file_name'] = 'crowdhuman_val/' + img['file_name'] 69 | img['frame_id'] = img_id_count 70 | img['prev_image_id'] = img['id'] + max_img 71 | img['next_image_id'] = img['id'] + max_img 72 | img['id'] = img['id'] + max_img 73 | img['video_id'] = max_video 74 | img_list.append(img) 75 | 76 | for ann in crowdhuman_val_json['annotations']: 77 | ann['id'] = ann['id'] + max_ann 78 | ann['image_id'] = ann['image_id'] + max_img 79 | ann_list.append(ann) 80 | 81 | video_list.append({ 82 | 'id': max_video, 83 | 'file_name': 'crowdhuman_val' 84 | }) 85 | 86 | print('crowdhuman_val') 87 | 88 | mix_json = dict() 89 | mix_json['images'] = img_list 90 | mix_json['annotations'] = ann_list 91 | mix_json['videos'] = video_list 92 | mix_json['categories'] = category_list 93 | json.dump(mix_json, open('datasets/mix_mot_ch/annotations/train.json','w')) -------------------------------------------------------------------------------- /tools/mix_data_test_mot17.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | """ 6 | cd datasets 7 | mkdir -p mix_det/annotations 8 | cp mot/annotations/val_half.json mix_det/annotations/val_half.json 9 | cp mot/annotations/test.json mix_det/annotations/test.json 10 | cd mix_det 11 | ln -s ../mot/train mot_train 12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train 13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val 14 | ln -s ../Cityscapes cp_train 15 | ln -s ../ETHZ ethz_train 16 | cd .. 17 | """ 18 | 19 | mot_json = json.load(open('datasets/mot/annotations/train.json','r')) 20 | 21 | img_list = list() 22 | for img in mot_json['images']: 23 | img['file_name'] = 'mot_train/' + img['file_name'] 24 | img_list.append(img) 25 | 26 | ann_list = list() 27 | for ann in mot_json['annotations']: 28 | ann_list.append(ann) 29 | 30 | video_list = mot_json['videos'] 31 | category_list = mot_json['categories'] 32 | 33 | 34 | print('mot17') 35 | 36 | max_img = 10000 37 | max_ann = 2000000 38 | max_video = 10 39 | 40 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r')) 41 | img_id_count = 0 42 | for img in crowdhuman_json['images']: 43 | img_id_count += 1 44 | img['file_name'] = 'crowdhuman_train/' + img['file_name'] 45 | img['frame_id'] = img_id_count 46 | img['prev_image_id'] = img['id'] + max_img 47 | img['next_image_id'] = img['id'] + max_img 48 | img['id'] = img['id'] + max_img 49 | img['video_id'] = max_video 50 | img_list.append(img) 51 | 52 | for ann in crowdhuman_json['annotations']: 53 | ann['id'] = ann['id'] + max_ann 54 | ann['image_id'] = ann['image_id'] + max_img 55 | ann_list.append(ann) 56 | 57 | print('crowdhuman_train') 58 | 59 | video_list.append({ 60 | 'id': max_video, 61 | 'file_name': 'crowdhuman_train' 62 | }) 63 | 64 | 65 | max_img = 30000 66 | max_ann = 10000000 67 | 68 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r')) 69 | img_id_count = 0 70 | for img in crowdhuman_val_json['images']: 71 | img_id_count += 1 72 | img['file_name'] = 'crowdhuman_val/' + img['file_name'] 73 | img['frame_id'] = img_id_count 74 | img['prev_image_id'] = img['id'] + max_img 75 | img['next_image_id'] = img['id'] + max_img 76 | img['id'] = img['id'] + max_img 77 | img['video_id'] = max_video 78 | img_list.append(img) 79 | 80 | for ann in crowdhuman_val_json['annotations']: 81 | ann['id'] = ann['id'] + max_ann 82 | ann['image_id'] = ann['image_id'] + max_img 83 | ann_list.append(ann) 84 | 85 | print('crowdhuman_val') 86 | 87 | video_list.append({ 88 | 'id': max_video, 89 | 'file_name': 'crowdhuman_val' 90 | }) 91 | 92 | max_img = 40000 93 | max_ann = 20000000 94 | 95 | ethz_json = json.load(open('datasets/ETHZ/annotations/train.json','r')) 96 | img_id_count = 0 97 | for img in ethz_json['images']: 98 | img_id_count += 1 99 | img['file_name'] = 'ethz_train/' + img['file_name'][5:] 100 | img['frame_id'] = img_id_count 101 | img['prev_image_id'] = img['id'] + max_img 102 | img['next_image_id'] = img['id'] + max_img 103 | img['id'] = img['id'] + max_img 104 | img['video_id'] = max_video 105 | img_list.append(img) 106 | 107 | for ann in ethz_json['annotations']: 108 | ann['id'] = ann['id'] + max_ann 109 | ann['image_id'] = ann['image_id'] + max_img 110 | ann_list.append(ann) 111 | 112 | print('ETHZ') 113 | 114 | video_list.append({ 115 | 'id': max_video, 116 | 'file_name': 'ethz' 117 | }) 118 | 119 | max_img = 50000 120 | max_ann = 25000000 121 | 122 | cp_json = json.load(open('datasets/Cityscapes/annotations/train.json','r')) 123 | img_id_count = 0 124 | for img in cp_json['images']: 125 | img_id_count += 1 126 | img['file_name'] = 'cp_train/' + img['file_name'][11:] 127 | img['frame_id'] = img_id_count 128 | img['prev_image_id'] = img['id'] + max_img 129 | img['next_image_id'] = img['id'] + max_img 130 | img['id'] = img['id'] + max_img 131 | img['video_id'] = max_video 132 | img_list.append(img) 133 | 134 | for ann in cp_json['annotations']: 135 | ann['id'] = ann['id'] + max_ann 136 | ann['image_id'] = ann['image_id'] + max_img 137 | ann_list.append(ann) 138 | 139 | print('Cityscapes') 140 | 141 | video_list.append({ 142 | 'id': max_video, 143 | 'file_name': 'cityperson' 144 | }) 145 | 146 | mix_json = dict() 147 | mix_json['images'] = img_list 148 | mix_json['annotations'] = ann_list 149 | mix_json['videos'] = video_list 150 | mix_json['categories'] = category_list 151 | json.dump(mix_json, open('datasets/mix_det/annotations/train.json','w')) 152 | -------------------------------------------------------------------------------- /tools/mix_data_test_mot20.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | """ 6 | cd datasets 7 | mkdir -p mix_mot20_ch/annotations 8 | cp MOT20/annotations/val_half.json mix_mot20_ch/annotations/val_half.json 9 | cp MOT20/annotations/test.json mix_mot20_ch/annotations/test.json 10 | cd mix_mot20_ch 11 | ln -s ../MOT20/train mot20_train 12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train 13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val 14 | cd .. 15 | """ 16 | 17 | mot_json = json.load(open('datasets/MOT20/annotations/train.json','r')) 18 | 19 | img_list = list() 20 | for img in mot_json['images']: 21 | img['file_name'] = 'mot20_train/' + img['file_name'] 22 | img_list.append(img) 23 | 24 | ann_list = list() 25 | for ann in mot_json['annotations']: 26 | ann_list.append(ann) 27 | 28 | video_list = mot_json['videos'] 29 | category_list = mot_json['categories'] 30 | 31 | 32 | 33 | 34 | max_img = 10000 35 | max_ann = 2000000 36 | max_video = 10 37 | 38 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r')) 39 | img_id_count = 0 40 | for img in crowdhuman_json['images']: 41 | img_id_count += 1 42 | img['file_name'] = 'crowdhuman_train/' + img['file_name'] 43 | img['frame_id'] = img_id_count 44 | img['prev_image_id'] = img['id'] + max_img 45 | img['next_image_id'] = img['id'] + max_img 46 | img['id'] = img['id'] + max_img 47 | img['video_id'] = max_video 48 | img_list.append(img) 49 | 50 | for ann in crowdhuman_json['annotations']: 51 | ann['id'] = ann['id'] + max_ann 52 | ann['image_id'] = ann['image_id'] + max_img 53 | ann_list.append(ann) 54 | 55 | video_list.append({ 56 | 'id': max_video, 57 | 'file_name': 'crowdhuman_train' 58 | }) 59 | 60 | 61 | max_img = 30000 62 | max_ann = 10000000 63 | 64 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r')) 65 | img_id_count = 0 66 | for img in crowdhuman_val_json['images']: 67 | img_id_count += 1 68 | img['file_name'] = 'crowdhuman_val/' + img['file_name'] 69 | img['frame_id'] = img_id_count 70 | img['prev_image_id'] = img['id'] + max_img 71 | img['next_image_id'] = img['id'] + max_img 72 | img['id'] = img['id'] + max_img 73 | img['video_id'] = max_video 74 | img_list.append(img) 75 | 76 | for ann in crowdhuman_val_json['annotations']: 77 | ann['id'] = ann['id'] + max_ann 78 | ann['image_id'] = ann['image_id'] + max_img 79 | ann_list.append(ann) 80 | 81 | video_list.append({ 82 | 'id': max_video, 83 | 'file_name': 'crowdhuman_val' 84 | }) 85 | 86 | mix_json = dict() 87 | mix_json['images'] = img_list 88 | mix_json['annotations'] = ann_list 89 | mix_json['videos'] = video_list 90 | mix_json['categories'] = category_list 91 | json.dump(mix_json, open('datasets/mix_mot20_ch/annotations/train.json','w')) -------------------------------------------------------------------------------- /tools/mota.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | import torch 4 | import torch.backends.cudnn as cudnn 5 | from torch.nn.parallel import DistributedDataParallel as DDP 6 | 7 | from yolox.core import launch 8 | from yolox.exp import get_exp 9 | from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger 10 | from yolox.evaluators import MOTEvaluator 11 | 12 | import argparse 13 | import os 14 | import random 15 | import warnings 16 | import glob 17 | import motmetrics as mm 18 | from collections import OrderedDict 19 | from pathlib import Path 20 | 21 | 22 | def compare_dataframes(gts, ts): 23 | accs = [] 24 | names = [] 25 | for k, tsacc in ts.items(): 26 | if k in gts: 27 | logger.info('Comparing {}...'.format(k)) 28 | accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) 29 | names.append(k) 30 | else: 31 | logger.warning('No ground truth for {}, skipping.'.format(k)) 32 | 33 | return accs, names 34 | 35 | 36 | # evaluate MOTA 37 | results_folder = 'YOLOX_outputs/yolox_x_ablation/track_results' 38 | mm.lap.default_solver = 'lap' 39 | 40 | gt_type = '_val_half' 41 | #gt_type = '' 42 | print('gt_type', gt_type) 43 | gtfiles = glob.glob( 44 | os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) 45 | print('gt_files', gtfiles) 46 | tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')] 47 | 48 | logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) 49 | logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) 50 | logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) 51 | logger.info('Loading files.') 52 | 53 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles]) 54 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=0.6)) for f in tsfiles]) 55 | 56 | mh = mm.metrics.create() 57 | accs, names = compare_dataframes(gt, ts) 58 | 59 | logger.info('Running metrics') 60 | metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', 61 | 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', 62 | 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] 63 | summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) 64 | # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) 65 | # print(mm.io.render_summary( 66 | # summary, formatters=mh.formatters, 67 | # namemap=mm.io.motchallenge_metric_names)) 68 | div_dict = { 69 | 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], 70 | 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} 71 | for divisor in div_dict: 72 | for divided in div_dict[divisor]: 73 | summary[divided] = (summary[divided] / summary[divisor]) 74 | fmt = mh.formatters 75 | change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', 76 | 'partially_tracked', 'mostly_lost'] 77 | for k in change_fmt_list: 78 | fmt[k] = fmt['mota'] 79 | print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) 80 | 81 | metrics = mm.metrics.motchallenge_metrics + ['num_objects'] 82 | summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) 83 | print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) 84 | logger.info('Completed') -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | import torch 4 | import torch.backends.cudnn as cudnn 5 | 6 | from yolox.core import Trainer, launch 7 | from yolox.exp import get_exp 8 | 9 | import argparse 10 | import random 11 | import warnings 12 | 13 | 14 | def make_parser(): 15 | parser = argparse.ArgumentParser("YOLOX train parser") 16 | parser.add_argument("-expn", "--experiment-name", type=str, default=None) 17 | parser.add_argument("-n", "--name", type=str, default=None, help="model name") 18 | 19 | # distributed 20 | parser.add_argument( 21 | "--dist-backend", default="nccl", type=str, help="distributed backend" 22 | ) 23 | parser.add_argument( 24 | "--dist-url", 25 | default=None, 26 | type=str, 27 | help="url used to set up distributed training", 28 | ) 29 | parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") 30 | parser.add_argument( 31 | "-d", "--devices", default=None, type=int, help="device for training" 32 | ) 33 | parser.add_argument( 34 | "--local_rank", default=0, type=int, help="local rank for dist training" 35 | ) 36 | parser.add_argument( 37 | "-f", 38 | "--exp_file", 39 | default=None, 40 | type=str, 41 | help="plz input your expriment description file", 42 | ) 43 | parser.add_argument( 44 | "--resume", default=False, action="store_true", help="resume training" 45 | ) 46 | parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file") 47 | parser.add_argument( 48 | "-e", 49 | "--start_epoch", 50 | default=None, 51 | type=int, 52 | help="resume training start epoch", 53 | ) 54 | parser.add_argument( 55 | "--num_machines", default=1, type=int, help="num of node for training" 56 | ) 57 | parser.add_argument( 58 | "--machine_rank", default=0, type=int, help="node rank for multi-node training" 59 | ) 60 | parser.add_argument( 61 | "--fp16", 62 | dest="fp16", 63 | default=True, 64 | action="store_true", 65 | help="Adopting mix precision training.", 66 | ) 67 | parser.add_argument( 68 | "-o", 69 | "--occupy", 70 | dest="occupy", 71 | default=False, 72 | action="store_true", 73 | help="occupy GPU memory first for training.", 74 | ) 75 | parser.add_argument( 76 | "opts", 77 | help="Modify config options using the command-line", 78 | default=None, 79 | nargs=argparse.REMAINDER, 80 | ) 81 | return parser 82 | 83 | 84 | @logger.catch 85 | def main(exp, args): 86 | if exp.seed is not None: 87 | random.seed(exp.seed) 88 | torch.manual_seed(exp.seed) 89 | cudnn.deterministic = True 90 | warnings.warn( 91 | "You have chosen to seed training. This will turn on the CUDNN deterministic setting, " 92 | "which can slow down your training considerably! You may see unexpected behavior " 93 | "when restarting from checkpoints." 94 | ) 95 | 96 | # set environment variables for distributed training 97 | cudnn.benchmark = True 98 | 99 | trainer = Trainer(exp, args) 100 | trainer.train() 101 | 102 | 103 | if __name__ == "__main__": 104 | args = make_parser().parse_args() 105 | exp = get_exp(args.exp_file, args.name) 106 | exp.merge(args.opts) 107 | 108 | if not args.experiment_name: 109 | args.experiment_name = exp.exp_name 110 | 111 | num_gpu = torch.cuda.device_count() if args.devices is None else args.devices 112 | assert num_gpu <= torch.cuda.device_count() 113 | 114 | launch( 115 | main, 116 | num_gpu, 117 | args.num_machines, 118 | args.machine_rank, 119 | backend=args.dist_backend, 120 | dist_url=args.dist_url, 121 | args=(exp, args), 122 | ) 123 | -------------------------------------------------------------------------------- /tools/trt.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | import tensorrt as trt 4 | import torch 5 | from torch2trt import torch2trt 6 | 7 | from yolox.exp import get_exp 8 | 9 | import argparse 10 | import os 11 | import shutil 12 | 13 | 14 | def make_parser(): 15 | parser = argparse.ArgumentParser("YOLOX ncnn deploy") 16 | parser.add_argument("-expn", "--experiment-name", type=str, default=None) 17 | parser.add_argument("-n", "--name", type=str, default=None, help="model name") 18 | 19 | parser.add_argument( 20 | "-f", 21 | "--exp_file", 22 | default=None, 23 | type=str, 24 | help="pls input your expriment description file", 25 | ) 26 | parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path") 27 | return parser 28 | 29 | 30 | @logger.catch 31 | def main(): 32 | args = make_parser().parse_args() 33 | exp = get_exp(args.exp_file, args.name) 34 | if not args.experiment_name: 35 | args.experiment_name = exp.exp_name 36 | 37 | model = exp.get_model() 38 | file_name = os.path.join(exp.output_dir, args.experiment_name) 39 | os.makedirs(file_name, exist_ok=True) 40 | if args.ckpt is None: 41 | ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") 42 | else: 43 | ckpt_file = args.ckpt 44 | 45 | ckpt = torch.load(ckpt_file, map_location="cpu") 46 | # load the model state dict 47 | 48 | model.load_state_dict(ckpt["model"]) 49 | logger.info("loaded checkpoint done.") 50 | model.eval() 51 | model.cuda() 52 | model.head.decode_in_inference = False 53 | x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() 54 | model_trt = torch2trt( 55 | model, 56 | [x], 57 | fp16_mode=True, 58 | log_level=trt.Logger.INFO, 59 | max_workspace_size=(1 << 32), 60 | ) 61 | torch.save(model_trt.state_dict(), os.path.join(file_name, "model_trt.pth")) 62 | logger.info("Converted TensorRT model done.") 63 | engine_file = os.path.join(file_name, "model_trt.engine") 64 | engine_file_demo = os.path.join("deploy", "TensorRT", "cpp", "model_trt.engine") 65 | with open(engine_file, "wb") as f: 66 | f.write(model_trt.engine.serialize()) 67 | 68 | shutil.copyfile(engine_file, engine_file_demo) 69 | 70 | logger.info("Converted TensorRT model engine file is saved for C++ inference.") 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /tools_yolov5/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/models/__init__.py -------------------------------------------------------------------------------- /tools_yolov5/models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /tools_yolov5/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/utils/__init__.py -------------------------------------------------------------------------------- /tools_yolov5/utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.dockerignore: -------------------------------------------------------------------------------- 1 | # Repo-specific DockerIgnore ------------------------------------------------------------------------------------------- 2 | #.git 3 | .cache 4 | .idea 5 | runs 6 | output 7 | coco 8 | storage.googleapis.com 9 | 10 | data/samples/* 11 | **/results*.txt 12 | *.jpg 13 | 14 | # Neural Network weights ----------------------------------------------------------------------------------------------- 15 | **/*.weights 16 | **/*.pt 17 | **/*.pth 18 | **/*.onnx 19 | **/*.mlmodel 20 | **/*.torchscript 21 | 22 | 23 | # Below Copied From .gitignore ----------------------------------------------------------------------------------------- 24 | # Below Copied From .gitignore ----------------------------------------------------------------------------------------- 25 | 26 | 27 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 28 | # Byte-compiled / optimized / DLL files 29 | __pycache__/ 30 | *.py[cod] 31 | *$py.class 32 | 33 | # C extensions 34 | *.so 35 | 36 | # Distribution / packaging 37 | .Python 38 | env/ 39 | build/ 40 | develop-eggs/ 41 | dist/ 42 | downloads/ 43 | eggs/ 44 | .eggs/ 45 | lib/ 46 | lib64/ 47 | parts/ 48 | sdist/ 49 | var/ 50 | wheels/ 51 | *.egg-info/ 52 | wandb/ 53 | .installed.cfg 54 | *.egg 55 | 56 | # PyInstaller 57 | # Usually these files are written by a python script from a template 58 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 59 | *.manifest 60 | *.spec 61 | 62 | # Installer logs 63 | pip-log.txt 64 | pip-delete-this-directory.txt 65 | 66 | # Unit test / coverage reports 67 | htmlcov/ 68 | .tox/ 69 | .coverage 70 | .coverage.* 71 | .cache 72 | nosetests.xml 73 | coverage.xml 74 | *.cover 75 | .hypothesis/ 76 | 77 | # Translations 78 | *.mo 79 | *.pot 80 | 81 | # Django stuff: 82 | *.log 83 | local_settings.py 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # pyenv 102 | .python-version 103 | 104 | # celery beat schedule file 105 | celerybeat-schedule 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # dotenv 111 | .env 112 | 113 | # virtualenv 114 | .venv* 115 | venv*/ 116 | ENV*/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | 131 | 132 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 133 | 134 | # General 135 | .DS_Store 136 | .AppleDouble 137 | .LSOverride 138 | 139 | # Icon must end with two \r 140 | Icon 141 | Icon? 142 | 143 | # Thumbnails 144 | ._* 145 | 146 | # Files that might appear in the root of a volume 147 | .DocumentRevisions-V100 148 | .fseventsd 149 | .Spotlight-V100 150 | .TemporaryItems 151 | .Trashes 152 | .VolumeIcon.icns 153 | .com.apple.timemachine.donotpresent 154 | 155 | # Directories potentially created on remote AFP share 156 | .AppleDB 157 | .AppleDesktop 158 | Network Trash Folder 159 | Temporary Items 160 | .apdisk 161 | 162 | 163 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 164 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 165 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 166 | 167 | # User-specific stuff: 168 | .idea/* 169 | .idea/**/workspace.xml 170 | .idea/**/tasks.xml 171 | .idea/dictionaries 172 | .html # Bokeh Plots 173 | .pg # TensorFlow Frozen Graphs 174 | .avi # videos 175 | 176 | # Sensitive or high-churn files: 177 | .idea/**/dataSources/ 178 | .idea/**/dataSources.ids 179 | .idea/**/dataSources.local.xml 180 | .idea/**/sqlDataSources.xml 181 | .idea/**/dynamic.xml 182 | .idea/**/uiDesigner.xml 183 | 184 | # Gradle: 185 | .idea/**/gradle.xml 186 | .idea/**/libraries 187 | 188 | # CMake 189 | cmake-build-debug/ 190 | cmake-build-release/ 191 | 192 | # Mongo Explorer plugin: 193 | .idea/**/mongoSettings.xml 194 | 195 | ## File-based project format: 196 | *.iws 197 | 198 | ## Plugin-specific files: 199 | 200 | # IntelliJ 201 | out/ 202 | 203 | # mpeltonen/sbt-idea plugin 204 | .idea_modules/ 205 | 206 | # JIRA plugin 207 | atlassian-ide-plugin.xml 208 | 209 | # Cursive Clojure plugin 210 | .idea/replstate.xml 211 | 212 | # Crashlytics plugin (for Android Studio and IntelliJ) 213 | com_crashlytics_export_strings.xml 214 | crashlytics.properties 215 | crashlytics-build.properties 216 | fabric.properties 217 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.gitattributes: -------------------------------------------------------------------------------- 1 | # this drop notebooks from GitHub language stats 2 | *.ipynb linguist-vendored 3 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "🐛 Bug report" 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you: 11 | - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo 12 | - **Common dataset**: coco.yaml or coco128.yaml 13 | - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#environments 14 | 15 | If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`. 16 | 17 | 18 | ## 🐛 Bug 19 | A clear and concise description of what the bug is. 20 | 21 | 22 | ## To Reproduce (REQUIRED) 23 | 24 | Input: 25 | ``` 26 | import torch 27 | 28 | a = torch.tensor([5]) 29 | c = a / 0 30 | ``` 31 | 32 | Output: 33 | ``` 34 | Traceback (most recent call last): 35 | File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code 36 | exec(code_obj, self.user_global_ns, self.user_ns) 37 | File "", line 5, in 38 | c = a / 0 39 | RuntimeError: ZeroDivisionError 40 | ``` 41 | 42 | 43 | ## Expected behavior 44 | A clear and concise description of what you expected to happen. 45 | 46 | 47 | ## Environment 48 | If applicable, add screenshots to help explain your problem. 49 | 50 | - OS: [e.g. Ubuntu] 51 | - GPU [e.g. 2080 Ti] 52 | 53 | 54 | ## Additional context 55 | Add any other context about the problem here. 56 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "🚀 Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 🚀 Feature 11 | 12 | 13 | ## Motivation 14 | 15 | 16 | 17 | ## Pitch 18 | 19 | 20 | 21 | ## Alternatives 22 | 23 | 24 | 25 | ## Additional context 26 | 27 | 28 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Question" 3 | about: Ask a general question 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## ❔Question 11 | 12 | 13 | ## Additional context 14 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | reviewers: 10 | - glenn-jocher 11 | labels: 12 | - dependencies 13 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/workflows/ci-testing.yml: -------------------------------------------------------------------------------- 1 | name: CI CPU testing 2 | 3 | on: # https://help.github.com/en/actions/reference/events-that-trigger-workflows 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ master ] 9 | schedule: 10 | - cron: '0 0 * * *' # Runs at 00:00 UTC every day 11 | 12 | jobs: 13 | cpu-tests: 14 | 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: [ubuntu-latest, macos-latest, windows-latest] 20 | python-version: [3.8] 21 | model: ['yolov5s'] # models to test 22 | 23 | # Timeout: https://stackoverflow.com/a/59076067/4521646 24 | timeout-minutes: 50 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v2 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | 32 | # Note: This uses an internal pip API and may not always work 33 | # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow 34 | - name: Get pip cache 35 | id: pip-cache 36 | run: | 37 | python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)" 38 | 39 | - name: Cache pip 40 | uses: actions/cache@v1 41 | with: 42 | path: ${{ steps.pip-cache.outputs.dir }} 43 | key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }} 44 | restore-keys: | 45 | ${{ runner.os }}-${{ matrix.python-version }}-pip- 46 | 47 | - name: Install dependencies 48 | run: | 49 | python -m pip install --upgrade pip 50 | pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html 51 | pip install -q onnx 52 | python --version 53 | pip --version 54 | pip list 55 | shell: bash 56 | 57 | - name: Download data 58 | run: | 59 | # curl -L -o tmp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip 60 | # unzip -q tmp.zip -d ../ 61 | # rm tmp.zip 62 | 63 | - name: Tests workflow 64 | run: | 65 | # export PYTHONPATH="$PWD" # to run '$ python *.py' files in subdirectories 66 | di=cpu # inference devices # define device 67 | 68 | # train 69 | python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di 70 | # detect 71 | python detect.py --weights weights/${{ matrix.model }}.pt --device $di 72 | python detect.py --weights runs/train/exp/weights/last.pt --device $di 73 | # test 74 | python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di 75 | python test.py --img 256 --batch 8 --weights runs/train/exp/weights/last.pt --device $di 76 | 77 | python models/yolo.py --cfg models/${{ matrix.model }}.yaml # inspect 78 | python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt # export 79 | shell: bash 80 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # This action runs GitHub's industry-leading static analysis engine, CodeQL, against a repository's source code to find security vulnerabilities. 2 | # https://github.com/github/codeql-action 3 | 4 | name: "CodeQL" 5 | 6 | on: 7 | schedule: 8 | - cron: '0 0 1 * *' # Runs at 00:00 UTC on the 1st of every month 9 | 10 | jobs: 11 | analyze: 12 | name: Analyze 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | language: [ 'python' ] 19 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 20 | # Learn more: 21 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 22 | 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v2 26 | 27 | # Initializes the CodeQL tools for scanning. 28 | - name: Initialize CodeQL 29 | uses: github/codeql-action/init@v1 30 | with: 31 | languages: ${{ matrix.language }} 32 | # If you wish to specify custom queries, you can do so here or in a config file. 33 | # By default, queries listed here will override any specified in a config file. 34 | # Prefix the list here with "+" to use these queries and those in the config file. 35 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 36 | 37 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 38 | # If this step fails, then you should remove it and run the build manually (see below) 39 | - name: Autobuild 40 | uses: github/codeql-action/autobuild@v1 41 | 42 | # ℹ️ Command-line programs to run using the OS shell. 43 | # 📚 https://git.io/JvXDl 44 | 45 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 46 | # and modify them (or add more) to build your code if your project 47 | # uses a compiled language 48 | 49 | #- run: | 50 | # make bootstrap 51 | # make release 52 | 53 | - name: Perform CodeQL Analysis 54 | uses: github/codeql-action/analyze@v1 55 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request_target, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1 10 | with: 11 | repo-token: ${{ secrets.GITHUB_TOKEN }} 12 | pr-message: | 13 | 👋 Hello @${{ github.actor }}, thank you for submitting a 🚀 PR! To allow your work to be integrated as seamlessly as possible, we advise you to: 14 | - ✅ Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch: 15 | ```bash 16 | git remote add upstream https://github.com/ultralytics/yolov5.git 17 | git fetch upstream 18 | git checkout feature # <----- replace 'feature' with local branch name 19 | git rebase upstream/master 20 | git push -u origin -f 21 | ``` 22 | - ✅ Verify all Continuous Integration (CI) **checks are passing**. 23 | - ✅ Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ -Bruce Lee 24 | 25 | issue-message: | 26 | 👋 Hello @${{ github.actor }}, thank you for your interest in 🚀 YOLOv5! Please visit our ⭐️ [Tutorials](https://github.com/ultralytics/yolov5/wiki#tutorials) to get started, where you can find quickstart guides for simple tasks like [Custom Data Training](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) all the way to advanced concepts like [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607). 27 | 28 | If this is a 🐛 Bug Report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you. 29 | 30 | If this is a custom training ❓ Question, please provide as much information as possible, including dataset images, training logs, screenshots, and a public link to online [W&B logging](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data#visualize) if available. 31 | 32 | For business inquiries or professional support requests please visit https://www.ultralytics.com or email Glenn Jocher at glenn.jocher@ultralytics.com. 33 | 34 | ## Requirements 35 | 36 | Python 3.8 or later with all [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) dependencies installed, including `torch>=1.7`. To install run: 37 | ```bash 38 | $ pip install -r requirements.txt 39 | ``` 40 | 41 | ## Environments 42 | 43 | YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): 44 | 45 | - **Google Colab Notebook** with free GPU: Open In Colab 46 | - **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5) 47 | - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) 48 | - **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker) 49 | 50 | ## Status 51 | 52 | ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg) 53 | 54 | If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([test.py](https://github.com/ultralytics/yolov5/blob/master/test.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/models/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit. 55 | 56 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/workflows/rebase.yml: -------------------------------------------------------------------------------- 1 | name: Automatic Rebase 2 | # https://github.com/marketplace/actions/automatic-rebase 3 | 4 | on: 5 | issue_comment: 6 | types: [created] 7 | 8 | jobs: 9 | rebase: 10 | name: Rebase 11 | if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout the latest code 15 | uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 18 | - name: Automatic Rebase 19 | uses: cirrus-actions/rebase@1.3.1 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close stale issues 2 | on: 3 | schedule: 4 | - cron: "0 0 * * *" 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v1 11 | with: 12 | repo-token: ${{ secrets.GITHUB_TOKEN }} 13 | stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 14 | stale-pr-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 15 | days-before-stale: 30 16 | days-before-close: 5 17 | exempt-issue-labels: 'documentation,tutorial' 18 | operations-per-run: 100 # The maximum number of operations per run, used to control rate limiting. 19 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | ''' 3 | @author: mengting gu 4 | @contact: 1065504814@qq.com 5 | @time: 2021/11/11 下午5:52 6 | @file: __init__.py.py 7 | @desc:  8 | ''' 9 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/yolov5_v4/models/__init__.py -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | 3 | # base ---------------------------------------- 4 | Cython 5 | matplotlib>=3.2.2 6 | numpy>=1.18.5 7 | opencv-python>=4.1.2 8 | Pillow 9 | PyYAML>=5.3 10 | scipy>=1.4.1 11 | tensorboard>=2.2 12 | torch>=1.7.0 13 | torchvision>=0.8.1 14 | tqdm>=4.41.0 15 | 16 | # logging ------------------------------------- 17 | # wandb 18 | 19 | # plotting ------------------------------------ 20 | seaborn>=0.11.0 21 | pandas 22 | 23 | # export -------------------------------------- 24 | # coremltools==4.0 25 | # onnx>=1.8.0 26 | # scikit-learn==0.19.2 # for coreml quantization 27 | 28 | # extras -------------------------------------- 29 | thop # FLOPS computation 30 | pycocotools>=2.0 # COCO mAP 31 | -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/yolov5_v4/utils/__init__.py -------------------------------------------------------------------------------- /tools_yolov5/yolov5_v4/utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /tutorials/centertrack/README.md: -------------------------------------------------------------------------------- 1 | # CenterTrack 2 | 3 | Step1. git clone https://github.com/xingyizhou/CenterTrack.git 4 | 5 | 6 | Step2. 7 | 8 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py 9 | 10 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/opts.py 11 | 12 | 13 | Step3. run 14 | ``` 15 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --load_model ../models/mot17_half.pth --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.2 --pre_thresh 0.5 16 | ``` 17 | 18 | 19 | # CenterTrack_BYTE 20 | 21 | Step1. git clone https://github.com/xingyizhou/CenterTrack.git 22 | 23 | 24 | Step2. 25 | 26 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py by byte_tracker.py 27 | 28 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/opts.py 29 | 30 | add mot_online to https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils 31 | 32 | Step3. run 33 | ``` 34 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --load_model ../models/mot17_half.pth --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.2 --pre_thresh 0.5 35 | ``` 36 | 37 | 38 | ## Notes 39 | tracker.py: only motion 40 | 41 | byte_tracker.py: motion with kalman filter 42 | 43 | -------------------------------------------------------------------------------- /tutorials/centertrack/mot_online/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | -------------------------------------------------------------------------------- /tutorials/cstrack/README.md: -------------------------------------------------------------------------------- 1 | # CSTrack 2 | 3 | Step1. git clone -b MOT https://github.com/JudasDie/SOTS.git 4 | 5 | 6 | Step2. replace https://github.com/JudasDie/SOTS/blob/master/lib/tracker/cstrack.py 7 | 8 | 9 | Step3. download cstrack model trained on MIX and MOT17_half (mix_mot17_half_cstrack.pt): [google](https://drive.google.com/file/d/1OG5PDj_CYmMiw3dN6pZ0FsgqY__CIDx1/view?usp=sharing), [baidu(code:0bsu)](https://pan.baidu.com/s/1Z2VnE-OhZIPmgX6-4r9Z1Q) and put it under SOTS/weights 10 | 11 | 12 | Step4. run BYTE tracker example: 13 | ``` 14 | cd tracking 15 | python3 test_cstrack.py --val_mot17 True --val_hf 2 --weights ../weights/mix_mot17_half_cstrack.pt --conf_thres 0.6 --data_cfg ../src/lib/cfg/mot17_hf.json --data_dir your/data/path 16 | ``` 17 | 18 | 19 | ## Notes 20 | byte_tracker: only motion 21 | 22 | tracker: motion + reid 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /tutorials/ctracker/README.md: -------------------------------------------------------------------------------- 1 | # CTracker 2 | 3 | #### Step1 4 | git clone https://github.com/pjl1995/CTracker.git and preapare dataset 5 | 6 | 7 | #### Step2 8 | 9 | add generate_half_csv.py to https://github.com/pjl1995/CTracker 10 | 11 | run generate_half_csv.py and put train_half_annots.csv in MOT17 12 | 13 | run 14 | ``` 15 | python3 train.py --root_path MOT17 --csv_train train_half_annots.csv --model_dir ctracker/ --depth 50 --epochs 50 16 | ``` 17 | You can also download the CTracker model trained by us: [google](https://drive.google.com/file/d/1TwBDomJx8pxD-e96mGIiTduLenUvmf1t/view?usp=sharing), [baidu(code:6p3w)](https://pan.baidu.com/s/1MaCvnHynX2Wzg81hWkqzeg) 18 | 19 | #### Step3 20 | 21 | replace https://github.com/pjl1995/CTracker/blob/master/test.py 22 | 23 | run 24 | ``` 25 | python3 test.py --dataset_path MOT17 --model_dir ctracker --model_path ctracker/mot17_half_ctracker.pt 26 | ``` 27 | 28 | #### Step4 29 | 30 | add eval_motchallenge.py to https://github.com/pjl1995/CTracker 31 | 32 | prepare gt_half_val.txt as CenterTrack [DATA.md](https://github.com/xingyizhou/CenterTrack/blob/master/readme/DATA.md) 33 | 34 | 35 | #### Step5 36 | 37 | run 38 | ``` 39 | python3 eval_motchallenge.py --groundtruths MOT17/train --tests ctracker/results --gt_type half_val --eval_official --score_threshold -1 40 | ``` 41 | 42 | 43 | 44 | # CTracker_BYTE 45 | 46 | #### Step3 47 | 48 | add mot_online to https://github.com/pjl1995/CTracker 49 | 50 | add byte_tracker.py to https://github.com/pjl1995/CTracker 51 | 52 | add test_byte.py to https://github.com/pjl1995/CTracker 53 | 54 | run 55 | ``` 56 | python3 test_byte.py --dataset_path MOT17 --model_dir ctracker --model_path ctracker/mot17_half_ctracker.pt 57 | ``` 58 | 59 | 60 | #### Step5 61 | 62 | run 63 | ``` 64 | python3 eval_motchallenge.py --groundtruths MOT17/train --tests ctracker/results --gt_type half_val --eval_official --score_threshold -1 65 | ``` 66 | -------------------------------------------------------------------------------- /tutorials/ctracker/generate_half_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | prefix_dir = 'MOT17/' 4 | root_dir = 'train/' 5 | result_csv = 'train_half_annots.csv' 6 | train_half_set = {2: 301, 4: 526, 5:419, 9:263, 10:328, 11:451, 13:376} 7 | fout = open(result_csv, 'w') 8 | 9 | for data_name in sorted(os.listdir(prefix_dir + root_dir)): 10 | print(data_name) 11 | gt_path = os.path.join(prefix_dir, root_dir, data_name, 'gt', 'gt.txt') 12 | # print(gt_path) 13 | data_raw = np.loadtxt(gt_path, delimiter=',', dtype='float', usecols=(0,1,2,3,4,5,6,7,8)) 14 | 15 | data_sort = data_raw[np.lexsort(data_raw[:,::-1].T)] 16 | visible_raw = data_sort[:,8] 17 | # print(data_sort) 18 | # print(data_sort[-1, 0]) 19 | img_num = data_sort[-1, 0] 20 | 21 | # print(data_sort.shape[0]) 22 | box_num = data_sort.shape[0] 23 | 24 | person_box_num = np.sum(data_sort[:,6] == 1) 25 | # print(person_box_num) 26 | # import ipdb; ipdb.set_trace() 27 | for i in range(box_num): 28 | c = int(data_sort[i, 6]) 29 | v = visible_raw[i] 30 | img_index = int(data_sort[i, 0]) 31 | if c == 1 and v > 0.1 and img_index < train_half_set[int(data_name[-2:])]: 32 | img_index = int(data_sort[i, 0]) 33 | img_name = data_name + '/img1/' + str(img_index).zfill(6) + '.jpg' 34 | print(root_dir + img_name + ', ' + str(int(data_sort[i, 1])) + ', ' + str(data_sort[i, 2]) + ', ' + str(data_sort[i, 3]) + ', ' + str(data_sort[i, 2] + data_sort[i, 4]) + ', ' + str(data_sort[i, 3] + data_sort[i, 5]) + ', person\n') 35 | fout.write(root_dir + img_name + ', ' + str(int(data_sort[i, 1])) + ', ' + str(data_sort[i, 2]) + ', ' + str(data_sort[i, 3]) + ', ' + str(data_sort[i, 2] + data_sort[i, 4]) + ', ' + str(data_sort[i, 3] + data_sort[i, 5]) + ', person\n') 36 | 37 | fout.close() 38 | -------------------------------------------------------------------------------- /tutorials/ctracker/mot_online/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | -------------------------------------------------------------------------------- /tutorials/fairmot/README.md: -------------------------------------------------------------------------------- 1 | # FairMOT 2 | 3 | Step1. git clone https://github.com/ifzhang/FairMOT.git 4 | 5 | 6 | Step2. replace https://github.com/ifzhang/FairMOT/blob/master/src/lib/tracker/multitracker.py 7 | 8 | 9 | Step3. run motion + reid tracker using tracker.py (set --match_thres 0.4), run BYTE tracker using byte_tracker.py (set --match_thres 0.8) 10 | 11 | run BYTE tracker example: 12 | ``` 13 | python3 track_half.py mot --load_model ../exp/mot/mot17_half_dla34/model_last.pth --match_thres 0.8 14 | ``` 15 | 16 | 17 | ## Notes 18 | byte_tracker: only motion 19 | 20 | tracker: motion + reid 21 | -------------------------------------------------------------------------------- /tutorials/jde/README.md: -------------------------------------------------------------------------------- 1 | # JDE 2 | 3 | Step1. git clone https://github.com/Zhongdao/Towards-Realtime-MOT.git 4 | 5 | 6 | Step2. replace https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py and https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/utils/evaluation.py 7 | 8 | Step3. download JDE model trained on MIX and MOT17_half (mix_mot17_half_jde.pt): [google](https://drive.google.com/file/d/1jUiIbaHFf75Jq6thOGI3CPygMMBy6850/view?usp=sharing), [baidu(code:ccdd)](https://pan.baidu.com/s/10se81ZktkUDUWn2dZzkk_Q) 9 | 10 | Step4. put track_half.py under https://github.com/Zhongdao/Towards-Realtime-MOT and run: 11 | ``` 12 | python3 track_half.py --cfg ./cfg/yolov3_1088x608.cfg --weights weights/mix_mot17_half_jde.pt 13 | ``` 14 | 15 | 16 | ## Notes 17 | byte_tracker: only motion 18 | 19 | tracker: motion + reid 20 | -------------------------------------------------------------------------------- /tutorials/jde/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | frames = sorted(list(set(result_frame_dict.keys()))) 75 | for frame_id in frames: 76 | trk_objs = result_frame_dict.get(frame_id, []) 77 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 78 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 79 | 80 | return self.acc 81 | 82 | @staticmethod 83 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 84 | names = copy.deepcopy(names) 85 | if metrics is None: 86 | metrics = mm.metrics.motchallenge_metrics 87 | metrics = copy.deepcopy(metrics) 88 | 89 | mh = mm.metrics.create() 90 | summary = mh.compute_many( 91 | accs, 92 | metrics=metrics, 93 | names=names, 94 | generate_overall=True 95 | ) 96 | 97 | return summary 98 | 99 | @staticmethod 100 | def save_summary(summary, filename): 101 | import pandas as pd 102 | writer = pd.ExcelWriter(filename) 103 | summary.to_excel(writer) 104 | writer.save() 105 | -------------------------------------------------------------------------------- /tutorials/motr/README.md: -------------------------------------------------------------------------------- 1 | # MOTR 2 | 3 | Step1. 4 | 5 | git clone https://github.com/megvii-model/MOTR.git and install 6 | 7 | replace https://github.com/megvii-model/MOTR/blob/main/datasets/joint.py 8 | 9 | replace https://github.com/megvii-model/MOTR/blob/main/datasets/transforms.py 10 | 11 | 12 | train 13 | 14 | ``` 15 | python3 -m torch.distributed.launch --nproc_per_node=8 \ 16 | --use_env main.py \ 17 | --meta_arch motr \ 18 | --dataset_file e2e_joint \ 19 | --epoch 50 \ 20 | --with_box_refine \ 21 | --lr_drop 40 \ 22 | --lr 2e-4 \ 23 | --lr_backbone 2e-5 \ 24 | --pretrained coco_model_final.pth \ 25 | --output_dir exps/e2e_motr_r50_mot17trainhalf \ 26 | --batch_size 1 \ 27 | --sample_mode 'random_interval' \ 28 | --sample_interval 10 \ 29 | --sampler_steps 10 20 30 \ 30 | --sampler_lengths 2 3 4 5 \ 31 | --update_query_pos \ 32 | --merger_dropout 0 \ 33 | --dropout 0 \ 34 | --random_drop 0.1 \ 35 | --fp_ratio 0.3 \ 36 | --query_interaction_layer 'QIM' \ 37 | --extra_track_attn \ 38 | --mot_path . 39 | --data_txt_path_train ./datasets/data_path/mot17.half \ 40 | --data_txt_path_val ./datasets/data_path/mot17.val \ 41 | ``` 42 | mot17.half and mot17.val are from https://github.com/ifzhang/FairMOT/tree/master/src/data 43 | 44 | You can also download the MOTR model trained by us: [google](https://drive.google.com/file/d/1pzGi53VooppQqhKf3TSxLK99LERsVyTw/view?usp=sharing), [baidu(code:t87h)](https://pan.baidu.com/s/1OrcR3L9Bf2xXIo8RQl3zyA) 45 | 46 | 47 | Step2. 48 | 49 | replace https://github.com/megvii-model/MOTR/blob/main/util/evaluation.py 50 | 51 | replace https://github.com/megvii-model/MOTR/blob/main/eval.py 52 | 53 | replace https://github.com/megvii-model/MOTR/blob/main/models/motr.py 54 | 55 | add byte_tracker.py to https://github.com/megvii-model/MOTR 56 | 57 | add mot_online to https://github.com/megvii-model/MOTR 58 | 59 | 60 | Step3. 61 | 62 | 63 | val 64 | 65 | ``` 66 | python3 eval.py \ 67 | --meta_arch motr \ 68 | --dataset_file e2e_joint \ 69 | --epoch 200 \ 70 | --with_box_refine \ 71 | --lr_drop 100 \ 72 | --lr 2e-4 \ 73 | --lr_backbone 2e-5 \ 74 | --pretrained exps/e2e_motr_r50_mot17val/motr_final.pth \ 75 | --output_dir exps/e2e_motr_r50_mot17val \ 76 | --batch_size 1 \ 77 | --sample_mode 'random_interval' \ 78 | --sample_interval 10 \ 79 | --sampler_steps 50 90 120 \ 80 | --sampler_lengths 2 3 4 5 \ 81 | --update_query_pos \ 82 | --merger_dropout 0 \ 83 | --dropout 0 \ 84 | --random_drop 0.1 \ 85 | --fp_ratio 0.3 \ 86 | --query_interaction_layer 'QIM' \ 87 | --extra_track_attn \ 88 | --mot_path ./MOT17/images/train 89 | --data_txt_path_train ./datasets/data_path/mot17.half \ 90 | --data_txt_path_val ./datasets/data_path/mot17.val \ 91 | --resume model_final.pth \ 92 | ``` 93 | 94 | 95 | 96 | # MOTR det 97 | 98 | in Step2, replace https://github.com/megvii-model/MOTR/blob/main/models/motr.py by motr_det.py 99 | 100 | others are the same as MOTR 101 | -------------------------------------------------------------------------------- /tutorials/motr/mot_online/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | -------------------------------------------------------------------------------- /tutorials/qdtrack/README.md: -------------------------------------------------------------------------------- 1 | # QDTrack_reid_motion 2 | 3 | Step1. git clone https://github.com/SysCV/qdtrack.git and train 4 | 5 | 6 | Step2. 7 | 8 | replace https://github.com/SysCV/qdtrack/blob/master/qdtrack/models/mot/qdtrack.py 9 | 10 | add mot_online to https://github.com/SysCV/qdtrack 11 | 12 | add tracker_reid_motion.py to https://github.com/SysCV/qdtrack and rename to tracker.py 13 | 14 | Step3. download qdtrack model trained on mot17 half training set: [google](https://drive.google.com/file/d/1IfM8i0R0lF_4NOgeloMPFo5d52dqhaHW/view?usp=sharing), [baidu(code:whcc)](https://pan.baidu.com/s/1IYRD3V2YOa6-YNFgMQyv7w) 15 | 16 | Step4. run 17 | ``` 18 | python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 tools/test.py configs/mot17/qdtrack-frcnn_r50_fpn_4e_mot17.py work_dirs/mot17_half_qdtrack.pth --launcher pytorch --eval track --eval-options resfile_path=output 19 | ``` 20 | 21 | 22 | # QDTrack_BYTE 23 | 24 | Step1. git clone https://github.com/SysCV/qdtrack.git and train 25 | 26 | 27 | Step2. 28 | 29 | replace https://github.com/SysCV/qdtrack/blob/master/qdtrack/models/mot/qdtrack.py 30 | 31 | add mot_online to https://github.com/SysCV/qdtrack 32 | 33 | add byte_tracker.py to https://github.com/SysCV/qdtrack 34 | 35 | 36 | Step3. run 37 | ``` 38 | python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 tools/test.py configs/mot17/qdtrack-frcnn_r50_fpn_4e_mot17.py work_dirs/mot17_half_qdtrack.pth --launcher pytorch --eval track --eval-options resfile_path=output 39 | ``` 40 | -------------------------------------------------------------------------------- /tutorials/qdtrack/mot_online/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | -------------------------------------------------------------------------------- /tutorials/trades/README.md: -------------------------------------------------------------------------------- 1 | # TraDeS 2 | 3 | Step1. git clone https://github.com/JialianW/TraDeS.git 4 | 5 | 6 | Step2. 7 | 8 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/utils/tracker.py 9 | 10 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/opts.py 11 | 12 | 13 | Step3. run 14 | ``` 15 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --inference --load_model ../models/mot_half.pth --gpus 0 --clip_len 3 --trades --track_thresh 0.4 --new_thresh 0.4 --out_thresh 0.2 --pre_thresh 0.5 16 | ``` 17 | 18 | 19 | # TraDeS_BYTE 20 | 21 | Step1. git clone https://github.com/JialianW/TraDeS.git 22 | 23 | 24 | Step2. 25 | 26 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/utils/tracker.py by byte_tracker.py 27 | 28 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/opts.py 29 | 30 | add mot_online to https://github.com/JialianW/TraDeS/blob/master/src/lib/utils 31 | 32 | Step3. run 33 | ``` 34 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --inference --load_model ../models/mot_half.pth --gpus 0 --clip_len 3 --trades --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.1 --pre_thresh 0.5 35 | ``` 36 | 37 | 38 | ## Notes 39 | tracker.py: motion + reid 40 | 41 | byte_tracker.py: motion with kalman filter 42 | -------------------------------------------------------------------------------- /tutorials/trades/mot_online/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | -------------------------------------------------------------------------------- /tutorials/transtrack/README.md: -------------------------------------------------------------------------------- 1 | # TransTrack 2 | 3 | Step1. git clone https://github.com/PeizeSun/TransTrack.git 4 | 5 | 6 | Step2. 7 | 8 | replace https://github.com/PeizeSun/TransTrack/blob/main/models/tracker.py 9 | 10 | Step3. 11 | 12 | Download TransTrack pretrained model: [671mot17_crowdhuman_mot17.pth](https://drive.google.com/drive/folders/1DjPL8xWoXDASrxgsA3O06EspJRdUXFQ-?usp=sharing) 13 | 14 | 15 | Step3. run 16 | ``` 17 | python3 main_track.py --output_dir . --dataset_file mot --coco_path mot --batch_size 1 --resume pretrained/671mot17_crowdhuman_mot17.pth --eval --with_box_refine --num_queries 500 18 | ``` 19 | 20 | 21 | # TransTrack_BYTE 22 | 23 | Step1. git clone https://github.com/PeizeSun/TransTrack.git 24 | 25 | Step2. 26 | 27 | replace https://github.com/PeizeSun/TransTrack/blob/main/models/save_track.py 28 | 29 | replace https://github.com/PeizeSun/TransTrack/blob/main/engine_track.py 30 | 31 | replace https://github.com/PeizeSun/TransTrack/blob/main/main_track.py 32 | 33 | add mot_online to https://github.com/PeizeSun/TransTrack 34 | 35 | Step3. run 36 | ``` 37 | python3 main_track.py --output_dir . --dataset_file mot --coco_path mot --batch_size 1 --resume pretrained/671mot17_crowdhuman_mot17.pth --eval --with_box_refine --num_queries 500 38 | ``` 39 | 40 | 41 | ## Notes 42 | tracker.py: only motion 43 | 44 | mot_online/byte_tracker.py: motion with kalman filter 45 | 46 | -------------------------------------------------------------------------------- /tutorials/transtrack/mot_online/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed -------------------------------------------------------------------------------- /tutorials/transtrack/save_track.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) https://github.com/xingyizhou/CenterTrack 3 | Modified by Peize Sun, Rufeng Zhang 4 | """ 5 | # coding: utf-8 6 | import os 7 | import json 8 | import logging 9 | from collections import defaultdict 10 | 11 | 12 | def save_track(results, out_root, video_to_images, video_names, data_split='val'): 13 | assert out_root is not None 14 | out_dir = os.path.join(out_root, data_split) 15 | if not os.path.exists(out_dir): 16 | os.mkdir(out_dir) 17 | 18 | # save json. 19 | # json_path = os.path.join(out_dir, "track_results.json") 20 | # with open(json_path, "w") as f: 21 | # f.write(json.dumps(results)) 22 | # f.flush() 23 | 24 | # save it in standard format. 25 | track_dir = os.path.join(out_dir, "tracks") 26 | if not os.path.exists(track_dir): 27 | os.mkdir(track_dir) 28 | for video_id in video_to_images.keys(): 29 | video_infos = video_to_images[video_id] 30 | video_name = video_names[video_id] 31 | file_path = os.path.join(track_dir, "{}.txt".format(video_name)) 32 | f = open(file_path, "w") 33 | tracks = defaultdict(list) 34 | for video_info in video_infos: 35 | image_id, frame_id = video_info["image_id"], video_info["frame_id"] 36 | result = results[image_id] 37 | for item in result: 38 | if not ("tracking_id" in item): 39 | raise NotImplementedError 40 | tracking_id = item["tracking_id"] 41 | bbox = item["bbox"] 42 | bbox = [bbox[0], bbox[1], bbox[2], bbox[3], item['score'], item['active']] 43 | tracks[tracking_id].append([frame_id] + bbox) 44 | 45 | rename_track_id = 0 46 | for track_id in sorted(tracks): 47 | rename_track_id += 1 48 | for t in tracks[track_id]: 49 | if t[6] > 0: 50 | f.write("{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n".format( 51 | t[0], rename_track_id, t[1], t[2], t[3] - t[1], t[4] - t[2])) 52 | f.close() 53 | -------------------------------------------------------------------------------- /videos/palace.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/videos/palace.mp4 -------------------------------------------------------------------------------- /yolox/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .utils import configure_module 5 | 6 | configure_module() 7 | 8 | __version__ = "0.1.0" 9 | -------------------------------------------------------------------------------- /yolox/core/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | from .launch import launch 6 | from .trainer import Trainer 7 | -------------------------------------------------------------------------------- /yolox/data/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | from .data_augment import TrainTransform, ValTransform 6 | from .data_prefetcher import DataPrefetcher 7 | from .dataloading import DataLoader, get_yolox_datadir 8 | from .datasets import * 9 | from .samplers import InfiniteSampler, YoloBatchSampler 10 | -------------------------------------------------------------------------------- /yolox/data/data_prefetcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import torch 6 | import torch.distributed as dist 7 | 8 | from yolox.utils import synchronize 9 | 10 | import random 11 | 12 | 13 | class DataPrefetcher: 14 | """ 15 | DataPrefetcher is inspired by code of following file: 16 | https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py 17 | It could speedup your pytorch dataloader. For more information, please check 18 | https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789. 19 | """ 20 | 21 | def __init__(self, loader): 22 | self.loader = iter(loader) 23 | self.stream = torch.cuda.Stream() 24 | self.input_cuda = self._input_cuda_for_image 25 | self.record_stream = DataPrefetcher._record_stream_for_image 26 | self.preload() 27 | 28 | def preload(self): 29 | try: 30 | self.next_input, self.next_target, _, _ = next(self.loader) 31 | except StopIteration: 32 | self.next_input = None 33 | self.next_target = None 34 | return 35 | 36 | with torch.cuda.stream(self.stream): 37 | self.input_cuda() 38 | self.next_target = self.next_target.cuda(non_blocking=True) 39 | 40 | def next(self): 41 | torch.cuda.current_stream().wait_stream(self.stream) 42 | input = self.next_input 43 | target = self.next_target 44 | if input is not None: 45 | self.record_stream(input) 46 | if target is not None: 47 | target.record_stream(torch.cuda.current_stream()) 48 | self.preload() 49 | return input, target 50 | 51 | def _input_cuda_for_image(self): 52 | self.next_input = self.next_input.cuda(non_blocking=True) 53 | 54 | @staticmethod 55 | def _record_stream_for_image(input): 56 | input.record_stream(torch.cuda.current_stream()) 57 | 58 | 59 | def random_resize(data_loader, exp, epoch, rank, is_distributed): 60 | tensor = torch.LongTensor(1).cuda() 61 | if is_distributed: 62 | synchronize() 63 | 64 | if rank == 0: 65 | if epoch > exp.max_epoch - 10: 66 | size = exp.input_size 67 | else: 68 | size = random.randint(*exp.random_size) 69 | size = int(32 * size) 70 | tensor.fill_(size) 71 | 72 | if is_distributed: 73 | synchronize() 74 | dist.broadcast(tensor, 0) 75 | 76 | input_size = data_loader.change_input_dim(multiple=tensor.item(), random_range=None) 77 | return input_size 78 | -------------------------------------------------------------------------------- /yolox/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset 6 | from .mosaicdetection import MosaicDetection 7 | from .mot import MOTDataset 8 | -------------------------------------------------------------------------------- /yolox/data/datasets/datasets_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | from torch.utils.data.dataset import ConcatDataset as torchConcatDataset 6 | from torch.utils.data.dataset import Dataset as torchDataset 7 | 8 | import bisect 9 | from functools import wraps 10 | 11 | 12 | class ConcatDataset(torchConcatDataset): 13 | def __init__(self, datasets): 14 | super(ConcatDataset, self).__init__(datasets) 15 | if hasattr(self.datasets[0], "input_dim"): 16 | self._input_dim = self.datasets[0].input_dim 17 | self.input_dim = self.datasets[0].input_dim 18 | 19 | def pull_item(self, idx): 20 | if idx < 0: 21 | if -idx > len(self): 22 | raise ValueError( 23 | "absolute value of index should not exceed dataset length" 24 | ) 25 | idx = len(self) + idx 26 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 27 | if dataset_idx == 0: 28 | sample_idx = idx 29 | else: 30 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 31 | return self.datasets[dataset_idx].pull_item(sample_idx) 32 | 33 | 34 | class MixConcatDataset(torchConcatDataset): 35 | def __init__(self, datasets): 36 | super(MixConcatDataset, self).__init__(datasets) 37 | if hasattr(self.datasets[0], "input_dim"): 38 | self._input_dim = self.datasets[0].input_dim 39 | self.input_dim = self.datasets[0].input_dim 40 | 41 | def __getitem__(self, index): 42 | 43 | if not isinstance(index, int): 44 | idx = index[1] 45 | if idx < 0: 46 | if -idx > len(self): 47 | raise ValueError( 48 | "absolute value of index should not exceed dataset length" 49 | ) 50 | idx = len(self) + idx 51 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 52 | if dataset_idx == 0: 53 | sample_idx = idx 54 | else: 55 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 56 | if not isinstance(index, int): 57 | index = (index[0], sample_idx, index[2]) 58 | 59 | return self.datasets[dataset_idx][index] 60 | 61 | 62 | class Dataset(torchDataset): 63 | """ This class is a subclass of the base :class:`torch.utils.data.Dataset`, 64 | that enables on the fly resizing of the ``input_dim``. 65 | 66 | Args: 67 | input_dimension (tuple): (width,height) tuple with default dimensions of the network 68 | """ 69 | 70 | def __init__(self, input_dimension, mosaic=True): 71 | super().__init__() 72 | self.__input_dim = input_dimension[:2] 73 | self.enable_mosaic = mosaic 74 | 75 | @property 76 | def input_dim(self): 77 | """ 78 | Dimension that can be used by transforms to set the correct image size, etc. 79 | This allows transforms to have a single source of truth 80 | for the input dimension of the network. 81 | 82 | Return: 83 | list: Tuple containing the current width,height 84 | """ 85 | if hasattr(self, "_input_dim"): 86 | return self._input_dim 87 | return self.__input_dim 88 | 89 | @staticmethod 90 | def resize_getitem(getitem_fn): 91 | """ 92 | Decorator method that needs to be used around the ``__getitem__`` method. |br| 93 | This decorator enables the on the fly resizing of 94 | the ``input_dim`` with our :class:`~lightnet.data.DataLoader` class. 95 | 96 | Example: 97 | >>> class CustomSet(ln.data.Dataset): 98 | ... def __len__(self): 99 | ... return 10 100 | ... @ln.data.Dataset.resize_getitem 101 | ... def __getitem__(self, index): 102 | ... # Should return (image, anno) but here we return input_dim 103 | ... return self.input_dim 104 | >>> data = CustomSet((200,200)) 105 | >>> data[0] 106 | (200, 200) 107 | >>> data[(480,320), 0] 108 | (480, 320) 109 | """ 110 | 111 | @wraps(getitem_fn) 112 | def wrapper(self, index): 113 | if not isinstance(index, int): 114 | has_dim = True 115 | self._input_dim = index[0] 116 | self.enable_mosaic = index[2] 117 | index = index[1] 118 | else: 119 | has_dim = False 120 | 121 | ret_val = getitem_fn(self, index) 122 | 123 | if has_dim: 124 | del self._input_dim 125 | 126 | return ret_val 127 | 128 | return wrapper 129 | -------------------------------------------------------------------------------- /yolox/data/samplers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import BatchSampler as torchBatchSampler 8 | from torch.utils.data.sampler import Sampler 9 | 10 | import itertools 11 | from typing import Optional 12 | 13 | 14 | class YoloBatchSampler(torchBatchSampler): 15 | """ 16 | This batch sampler will generate mini-batches of (dim, index) tuples from another sampler. 17 | It works just like the :class:`torch.utils.data.sampler.BatchSampler`, 18 | but it will prepend a dimension, whilst ensuring it stays the same across one mini-batch. 19 | """ 20 | 21 | def __init__(self, *args, input_dimension=None, mosaic=True, **kwargs): 22 | super().__init__(*args, **kwargs) 23 | self.input_dim = input_dimension 24 | self.new_input_dim = None 25 | self.mosaic = mosaic 26 | 27 | def __iter__(self): 28 | self.__set_input_dim() 29 | for batch in super().__iter__(): 30 | yield [(self.input_dim, idx, self.mosaic) for idx in batch] 31 | self.__set_input_dim() 32 | 33 | def __set_input_dim(self): 34 | """ This function randomly changes the the input dimension of the dataset. """ 35 | if self.new_input_dim is not None: 36 | self.input_dim = (self.new_input_dim[0], self.new_input_dim[1]) 37 | self.new_input_dim = None 38 | 39 | 40 | class InfiniteSampler(Sampler): 41 | """ 42 | In training, we only care about the "infinite stream" of training data. 43 | So this sampler produces an infinite stream of indices and 44 | all workers cooperate to correctly shuffle the indices and sample different indices. 45 | The samplers in each worker effectively produces `indices[worker_id::num_workers]` 46 | where `indices` is an infinite stream of indices consisting of 47 | `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) 48 | or `range(size) + range(size) + ...` (if shuffle is False) 49 | """ 50 | 51 | def __init__( 52 | self, 53 | size: int, 54 | shuffle: bool = True, 55 | seed: Optional[int] = 0, 56 | rank=0, 57 | world_size=1, 58 | ): 59 | """ 60 | Args: 61 | size (int): the total number of data of the underlying dataset to sample from 62 | shuffle (bool): whether to shuffle the indices or not 63 | seed (int): the initial seed of the shuffle. Must be the same 64 | across all workers. If None, will use a random seed shared 65 | among workers (require synchronization among all workers). 66 | """ 67 | self._size = size 68 | assert size > 0 69 | self._shuffle = shuffle 70 | self._seed = int(seed) 71 | 72 | if dist.is_available() and dist.is_initialized(): 73 | self._rank = dist.get_rank() 74 | self._world_size = dist.get_world_size() 75 | else: 76 | self._rank = rank 77 | self._world_size = world_size 78 | 79 | def __iter__(self): 80 | start = self._rank 81 | yield from itertools.islice( 82 | self._infinite_indices(), start, None, self._world_size 83 | ) 84 | 85 | def _infinite_indices(self): 86 | g = torch.Generator() 87 | g.manual_seed(self._seed) 88 | while True: 89 | if self._shuffle: 90 | yield from torch.randperm(self._size, generator=g) 91 | else: 92 | yield from torch.arange(self._size) 93 | 94 | def __len__(self): 95 | return self._size // self._world_size 96 | -------------------------------------------------------------------------------- /yolox/deepsort_tracker/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | Parameters 9 | ---------- 10 | tlwh : array_like 11 | Bounding box in format `(x, y, w, h)`. 12 | confidence : float 13 | Detector confidence score. 14 | feature : array_like 15 | A feature vector that describes the object contained in this image. 16 | Attributes 17 | ---------- 18 | tlwh : ndarray 19 | Bounding box in format `(top left x, top left y, width, height)`. 20 | confidence : ndarray 21 | Detector confidence score. 22 | feature : ndarray | NoneType 23 | A feature vector that describes the object contained in this image. 24 | """ 25 | 26 | def __init__(self, tlwh, confidence, feature): 27 | self.tlwh = np.asarray(tlwh, dtype=np.float) 28 | self.confidence = float(confidence) 29 | self.feature = np.asarray(feature, dtype=np.float32) 30 | 31 | def to_tlbr(self): 32 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 33 | `(top left, bottom right)`. 34 | """ 35 | ret = self.tlwh.copy() 36 | ret[2:] += ret[:2] 37 | return ret 38 | 39 | def to_xyah(self): 40 | """Convert bounding box to format `(center x, center y, aspect ratio, 41 | height)`, where the aspect ratio is `width / height`. 42 | """ 43 | ret = self.tlwh.copy() 44 | ret[:2] += ret[2:] / 2 45 | ret[2] /= ret[3] 46 | return ret -------------------------------------------------------------------------------- /yolox/deepsort_tracker/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from yolox.deepsort_tracker import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | Parameters 10 | ---------- 11 | bbox : ndarray 12 | A bounding box in format `(top left x, top left y, width, height)`. 13 | candidates : ndarray 14 | A matrix of candidate bounding boxes (one per row) in the same format 15 | as `bbox`. 16 | Returns 17 | ------- 18 | ndarray 19 | The intersection over union in [0, 1] between the `bbox` and each 20 | candidate. A higher score means a larger fraction of the `bbox` is 21 | occluded by the candidate. 22 | """ 23 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 24 | candidates_tl = candidates[:, :2] 25 | candidates_br = candidates[:, :2] + candidates[:, 2:] 26 | 27 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 28 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 29 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 30 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 31 | wh = np.maximum(0., br - tl) 32 | 33 | area_intersection = wh.prod(axis=1) 34 | area_bbox = bbox[2:].prod() 35 | area_candidates = candidates[:, 2:].prod(axis=1) 36 | return area_intersection / (area_bbox + area_candidates - area_intersection) 37 | 38 | 39 | def iou_cost(tracks, detections, track_indices=None, 40 | detection_indices=None): 41 | """An intersection over union distance metric. 42 | Parameters 43 | ---------- 44 | tracks : List[deep_sort.track.Track] 45 | A list of tracks. 46 | detections : List[deep_sort.detection.Detection] 47 | A list of detections. 48 | track_indices : Optional[List[int]] 49 | A list of indices to tracks that should be matched. Defaults to 50 | all `tracks`. 51 | detection_indices : Optional[List[int]] 52 | A list of indices to detections that should be matched. Defaults 53 | to all `detections`. 54 | Returns 55 | ------- 56 | ndarray 57 | Returns a cost matrix of shape 58 | len(track_indices), len(detection_indices) where entry (i, j) is 59 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 60 | """ 61 | if track_indices is None: 62 | track_indices = np.arange(len(tracks)) 63 | if detection_indices is None: 64 | detection_indices = np.arange(len(detections)) 65 | 66 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 67 | for row, track_idx in enumerate(track_indices): 68 | if tracks[track_idx].time_since_update > 1: 69 | cost_matrix[row, :] = linear_assignment.INFTY_COST 70 | continue 71 | 72 | bbox = tracks[track_idx].to_tlwh() 73 | candidates = np.asarray( 74 | [detections[i].tlwh for i in detection_indices]) 75 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 76 | return cost_matrix -------------------------------------------------------------------------------- /yolox/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | from .coco_evaluator import COCOEvaluator 6 | from .mot_evaluator import MOTEvaluator 7 | -------------------------------------------------------------------------------- /yolox/exp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | from .base_exp import BaseExp 6 | from .build import get_exp 7 | from .yolox_base import Exp 8 | -------------------------------------------------------------------------------- /yolox/exp/base_exp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | from torch.nn import Module 7 | 8 | from yolox.utils import LRScheduler 9 | 10 | import ast 11 | import pprint 12 | from abc import ABCMeta, abstractmethod 13 | from tabulate import tabulate 14 | from typing import Dict 15 | 16 | 17 | class BaseExp(metaclass=ABCMeta): 18 | """Basic class for any experiment.""" 19 | 20 | def __init__(self): 21 | self.seed = None 22 | self.output_dir = "./YOLOX_outputs" 23 | self.print_interval = 100 24 | self.eval_interval = 10 25 | 26 | @abstractmethod 27 | def get_model(self) -> Module: 28 | pass 29 | 30 | @abstractmethod 31 | def get_data_loader( 32 | self, batch_size: int, is_distributed: bool 33 | ) -> Dict[str, torch.utils.data.DataLoader]: 34 | pass 35 | 36 | @abstractmethod 37 | def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer: 38 | pass 39 | 40 | @abstractmethod 41 | def get_lr_scheduler( 42 | self, lr: float, iters_per_epoch: int, **kwargs 43 | ) -> LRScheduler: 44 | pass 45 | 46 | @abstractmethod 47 | def get_evaluator(self): 48 | pass 49 | 50 | @abstractmethod 51 | def eval(self, model, evaluator, weights): 52 | pass 53 | 54 | def __repr__(self): 55 | table_header = ["keys", "values"] 56 | exp_table = [ 57 | (str(k), pprint.pformat(v)) 58 | for k, v in vars(self).items() 59 | if not k.startswith("_") 60 | ] 61 | return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") 62 | 63 | def merge(self, cfg_list): 64 | assert len(cfg_list) % 2 == 0 65 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 66 | # only update value with same key 67 | if hasattr(self, k): 68 | src_value = getattr(self, k) 69 | src_type = type(src_value) 70 | if src_value is not None and src_type != type(v): 71 | try: 72 | v = src_type(v) 73 | except Exception: 74 | v = ast.literal_eval(v) 75 | setattr(self, k, v) 76 | -------------------------------------------------------------------------------- /yolox/exp/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import importlib 6 | import os 7 | import sys 8 | 9 | 10 | def get_exp_by_file(exp_file): 11 | try: 12 | sys.path.append(os.path.dirname(exp_file)) 13 | current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0]) 14 | exp = current_exp.Exp() 15 | except Exception: 16 | raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file)) 17 | return exp 18 | 19 | 20 | def get_exp_by_name(exp_name): 21 | import yolox 22 | 23 | yolox_path = os.path.dirname(os.path.dirname(yolox.__file__)) 24 | filedict = { 25 | "yolox-s": "yolox_s.py", 26 | "yolox-m": "yolox_m.py", 27 | "yolox-l": "yolox_l.py", 28 | "yolox-x": "yolox_x.py", 29 | "yolox-tiny": "yolox_tiny.py", 30 | "yolox-nano": "nano.py", 31 | "yolov3": "yolov3.py", 32 | } 33 | filename = filedict[exp_name] 34 | exp_path = os.path.join(yolox_path, "exps", "default", filename) 35 | return get_exp_by_file(exp_path) 36 | 37 | 38 | def get_exp(exp_file, exp_name): 39 | """ 40 | get Exp object by file or name. If exp_file and exp_name 41 | are both provided, get Exp by exp_file. 42 | 43 | Args: 44 | exp_file (str): file path of experiment. 45 | exp_name (str): name of experiment. "yolo-s", 46 | """ 47 | assert ( 48 | exp_file is not None or exp_name is not None 49 | ), "plz provide exp file or exp name." 50 | if exp_file is not None: 51 | return get_exp_by_file(exp_file) 52 | else: 53 | return get_exp_by_name(exp_name) 54 | -------------------------------------------------------------------------------- /yolox/layers/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | from .fast_coco_eval_api import COCOeval_opt 6 | -------------------------------------------------------------------------------- /yolox/layers/csrc/cocoeval/cocoeval.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace py = pybind11; 11 | 12 | namespace COCOeval { 13 | 14 | // Annotation data for a single object instance in an image 15 | struct InstanceAnnotation { 16 | InstanceAnnotation( 17 | uint64_t id, 18 | double score, 19 | double area, 20 | bool is_crowd, 21 | bool ignore) 22 | : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} 23 | uint64_t id; 24 | double score = 0.; 25 | double area = 0.; 26 | bool is_crowd = false; 27 | bool ignore = false; 28 | }; 29 | 30 | // Stores intermediate results for evaluating detection results for a single 31 | // image that has D detected instances and G ground truth instances. This stores 32 | // matches between detected and ground truth instances 33 | struct ImageEvaluation { 34 | // For each of the D detected instances, the id of the matched ground truth 35 | // instance, or 0 if unmatched 36 | std::vector detection_matches; 37 | 38 | // The detection score of each of the D detected instances 39 | std::vector detection_scores; 40 | 41 | // Marks whether or not each of G instances was ignored from evaluation (e.g., 42 | // because it's outside area_range) 43 | std::vector ground_truth_ignores; 44 | 45 | // Marks whether or not each of D instances was ignored from evaluation (e.g., 46 | // because it's outside aRng) 47 | std::vector detection_ignores; 48 | }; 49 | 50 | template 51 | using ImageCategoryInstances = std::vector>>; 52 | 53 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each 54 | // combination of image, category, area range settings, and IOU thresholds to 55 | // evaluate, it matches detected instances to ground truth instances and stores 56 | // the results into a vector of ImageEvaluation results, which will be 57 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall 58 | // curves. The parameters of nested vectors have the following semantics: 59 | // image_category_ious[i][c][d][g] is the intersection over union of the d'th 60 | // detected instance and g'th ground truth instance of 61 | // category category_ids[c] in image image_ids[i] 62 | // image_category_ground_truth_instances[i][c] is a vector of ground truth 63 | // instances in image image_ids[i] of category category_ids[c] 64 | // image_category_detection_instances[i][c] is a vector of detected 65 | // instances in image image_ids[i] of category category_ids[c] 66 | std::vector EvaluateImages( 67 | const std::vector>& area_ranges, // vector of 2-tuples 68 | int max_detections, 69 | const std::vector& iou_thresholds, 70 | const ImageCategoryInstances>& image_category_ious, 71 | const ImageCategoryInstances& 72 | image_category_ground_truth_instances, 73 | const ImageCategoryInstances& 74 | image_category_detection_instances); 75 | 76 | // C++ implementation of COCOeval.accumulate(), which generates precision 77 | // recall curves for each set of category, IOU threshold, detection area range, 78 | // and max number of detections parameters. It is assumed that the parameter 79 | // evaluations is the return value of the functon COCOeval::EvaluateImages(), 80 | // which was called with the same parameter settings params 81 | py::dict Accumulate( 82 | const py::object& params, 83 | const std::vector& evalutations); 84 | 85 | } // namespace COCOeval 86 | -------------------------------------------------------------------------------- /yolox/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | #include "cocoeval/cocoeval.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); 5 | m.def( 6 | "COCOevalEvaluateImages", 7 | &COCOeval::EvaluateImages, 8 | "COCOeval::EvaluateImages"); 9 | pybind11::class_(m, "InstanceAnnotation") 10 | .def(pybind11::init()); 11 | pybind11::class_(m, "ImageEvaluation") 12 | .def(pybind11::init<>()); 13 | } 14 | -------------------------------------------------------------------------------- /yolox/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | from .darknet import CSPDarknet, Darknet 6 | from .losses import IOUloss 7 | from .yolo_fpn import YOLOFPN 8 | from .yolo_head import YOLOXHead 9 | from .yolo_pafpn import YOLOPAFPN 10 | from .yolox import YOLOX 11 | -------------------------------------------------------------------------------- /yolox/models/losses.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class IOUloss(nn.Module): 11 | def __init__(self, reduction="none", loss_type="iou"): 12 | super(IOUloss, self).__init__() 13 | self.reduction = reduction 14 | self.loss_type = loss_type 15 | 16 | def forward(self, pred, target): 17 | assert pred.shape[0] == target.shape[0] 18 | 19 | pred = pred.view(-1, 4) 20 | target = target.view(-1, 4) 21 | tl = torch.max( 22 | (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 23 | ) 24 | br = torch.min( 25 | (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 26 | ) 27 | 28 | area_p = torch.prod(pred[:, 2:], 1) 29 | area_g = torch.prod(target[:, 2:], 1) 30 | 31 | en = (tl < br).type(tl.type()).prod(dim=1) 32 | area_i = torch.prod(br - tl, 1) * en 33 | iou = (area_i) / (area_p + area_g - area_i + 1e-16) 34 | 35 | if self.loss_type == "iou": 36 | loss = 1 - iou ** 2 37 | elif self.loss_type == "giou": 38 | c_tl = torch.min( 39 | (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 40 | ) 41 | c_br = torch.max( 42 | (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 43 | ) 44 | area_c = torch.prod(c_br - c_tl, 1) 45 | giou = iou - (area_c - area_i) / area_c.clamp(1e-16) 46 | loss = 1 - giou.clamp(min=-1.0, max=1.0) 47 | 48 | if self.reduction == "mean": 49 | loss = loss.mean() 50 | elif self.reduction == "sum": 51 | loss = loss.sum() 52 | 53 | return loss 54 | 55 | 56 | def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2): 57 | """ 58 | Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. 59 | Args: 60 | inputs: A float tensor of arbitrary shape. 61 | The predictions for each example. 62 | targets: A float tensor with the same shape as inputs. Stores the binary 63 | classification label for each element in inputs 64 | (0 for the negative class and 1 for the positive class). 65 | alpha: (optional) Weighting factor in range (0,1) to balance 66 | positive vs negative examples. Default = -1 (no weighting). 67 | gamma: Exponent of the modulating factor (1 - p_t) to 68 | balance easy vs hard examples. 69 | Returns: 70 | Loss tensor 71 | """ 72 | prob = inputs.sigmoid() 73 | ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") 74 | p_t = prob * targets + (1 - prob) * (1 - targets) 75 | loss = ce_loss * ((1 - p_t) ** gamma) 76 | 77 | if alpha >= 0: 78 | alpha_t = alpha * targets + (1 - alpha) * (1 - targets) 79 | loss = alpha_t * loss 80 | #return loss.mean(0).sum() / num_boxes 81 | return loss.sum() / num_boxes -------------------------------------------------------------------------------- /yolox/models/yolo_fpn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .darknet import Darknet 9 | from .network_blocks import BaseConv 10 | 11 | 12 | class YOLOFPN(nn.Module): 13 | """ 14 | YOLOFPN module. Darknet 53 is the default backbone of this model. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | depth=53, 20 | in_features=["dark3", "dark4", "dark5"], 21 | ): 22 | super().__init__() 23 | 24 | self.backbone = Darknet(depth) 25 | self.in_features = in_features 26 | 27 | # out 1 28 | self.out1_cbl = self._make_cbl(512, 256, 1) 29 | self.out1 = self._make_embedding([256, 512], 512 + 256) 30 | 31 | # out 2 32 | self.out2_cbl = self._make_cbl(256, 128, 1) 33 | self.out2 = self._make_embedding([128, 256], 256 + 128) 34 | 35 | # upsample 36 | self.upsample = nn.Upsample(scale_factor=2, mode="nearest") 37 | 38 | def _make_cbl(self, _in, _out, ks): 39 | return BaseConv(_in, _out, ks, stride=1, act="lrelu") 40 | 41 | def _make_embedding(self, filters_list, in_filters): 42 | m = nn.Sequential( 43 | *[ 44 | self._make_cbl(in_filters, filters_list[0], 1), 45 | self._make_cbl(filters_list[0], filters_list[1], 3), 46 | self._make_cbl(filters_list[1], filters_list[0], 1), 47 | self._make_cbl(filters_list[0], filters_list[1], 3), 48 | self._make_cbl(filters_list[1], filters_list[0], 1), 49 | ] 50 | ) 51 | return m 52 | 53 | def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"): 54 | with open(filename, "rb") as f: 55 | state_dict = torch.load(f, map_location="cpu") 56 | print("loading pretrained weights...") 57 | self.backbone.load_state_dict(state_dict) 58 | 59 | def forward(self, inputs): 60 | """ 61 | Args: 62 | inputs (Tensor): input image. 63 | 64 | Returns: 65 | Tuple[Tensor]: FPN output features.. 66 | """ 67 | # backbone 68 | out_features = self.backbone(inputs) 69 | x2, x1, x0 = [out_features[f] for f in self.in_features] 70 | 71 | # yolo branch 1 72 | x1_in = self.out1_cbl(x0) 73 | x1_in = self.upsample(x1_in) 74 | x1_in = torch.cat([x1_in, x1], 1) 75 | out_dark4 = self.out1(x1_in) 76 | 77 | # yolo branch 2 78 | x2_in = self.out2_cbl(out_dark4) 79 | x2_in = self.upsample(x2_in) 80 | x2_in = torch.cat([x2_in, x2], 1) 81 | out_dark3 = self.out2(x2_in) 82 | 83 | outputs = (out_dark3, out_dark4, x0) 84 | return outputs 85 | -------------------------------------------------------------------------------- /yolox/models/yolo_pafpn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .darknet import CSPDarknet 9 | from .network_blocks import BaseConv, CSPLayer, DWConv 10 | 11 | 12 | class YOLOPAFPN(nn.Module): 13 | """ 14 | YOLOv3 model. Darknet 53 is the default backbone of this model. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | depth=1.0, 20 | width=1.0, 21 | in_features=("dark3", "dark4", "dark5"), 22 | in_channels=[256, 512, 1024], 23 | depthwise=False, 24 | act="silu", 25 | ): 26 | super().__init__() 27 | self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act) 28 | self.in_features = in_features 29 | self.in_channels = in_channels 30 | Conv = DWConv if depthwise else BaseConv 31 | 32 | self.upsample = nn.Upsample(scale_factor=2, mode="nearest") 33 | self.lateral_conv0 = BaseConv( 34 | int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act 35 | ) 36 | self.C3_p4 = CSPLayer( 37 | int(2 * in_channels[1] * width), 38 | int(in_channels[1] * width), 39 | round(3 * depth), 40 | False, 41 | depthwise=depthwise, 42 | act=act, 43 | ) # cat 44 | 45 | self.reduce_conv1 = BaseConv( 46 | int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act 47 | ) 48 | self.C3_p3 = CSPLayer( 49 | int(2 * in_channels[0] * width), 50 | int(in_channels[0] * width), 51 | round(3 * depth), 52 | False, 53 | depthwise=depthwise, 54 | act=act, 55 | ) 56 | 57 | # bottom-up conv 58 | self.bu_conv2 = Conv( 59 | int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act 60 | ) 61 | self.C3_n3 = CSPLayer( 62 | int(2 * in_channels[0] * width), 63 | int(in_channels[1] * width), 64 | round(3 * depth), 65 | False, 66 | depthwise=depthwise, 67 | act=act, 68 | ) 69 | 70 | # bottom-up conv 71 | self.bu_conv1 = Conv( 72 | int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act 73 | ) 74 | self.C3_n4 = CSPLayer( 75 | int(2 * in_channels[1] * width), 76 | int(in_channels[2] * width), 77 | round(3 * depth), 78 | False, 79 | depthwise=depthwise, 80 | act=act, 81 | ) 82 | 83 | def forward(self, input): 84 | """ 85 | Args: 86 | inputs: input images. 87 | 88 | Returns: 89 | Tuple[Tensor]: FPN feature. 90 | """ 91 | 92 | # backbone 93 | out_features = self.backbone(input) 94 | features = [out_features[f] for f in self.in_features] 95 | [x2, x1, x0] = features 96 | 97 | fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 98 | f_out0 = self.upsample(fpn_out0) # 512/16 99 | f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16 100 | f_out0 = self.C3_p4(f_out0) # 1024->512/16 101 | 102 | fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 103 | f_out1 = self.upsample(fpn_out1) # 256/8 104 | f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8 105 | pan_out2 = self.C3_p3(f_out1) # 512->256/8 106 | 107 | p_out1 = self.bu_conv2(pan_out2) # 256->256/16 108 | p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16 109 | pan_out1 = self.C3_n3(p_out1) # 512->512/16 110 | 111 | p_out0 = self.bu_conv1(pan_out1) # 512->512/32 112 | p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32 113 | pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 114 | 115 | outputs = (pan_out2, pan_out1, pan_out0) 116 | return outputs 117 | -------------------------------------------------------------------------------- /yolox/models/yolox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch.nn as nn 6 | 7 | from .yolo_head import YOLOXHead 8 | from .yolo_pafpn import YOLOPAFPN 9 | 10 | 11 | class YOLOX(nn.Module): 12 | """ 13 | YOLOX model module. The module list is defined by create_yolov3_modules function. 14 | The network returns loss values from three YOLO layers during training 15 | and detection results during test. 16 | """ 17 | 18 | def __init__(self, backbone=None, head=None): 19 | super().__init__() 20 | if backbone is None: 21 | backbone = YOLOPAFPN() 22 | if head is None: 23 | head = YOLOXHead(80) 24 | 25 | self.backbone = backbone 26 | self.head = head 27 | 28 | def forward(self, x, targets=None): 29 | # fpn output content features of [dark3, dark4, dark5] 30 | fpn_outs = self.backbone(x) 31 | 32 | if self.training: 33 | assert targets is not None 34 | loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head( 35 | fpn_outs, targets, x 36 | ) 37 | outputs = { 38 | "total_loss": loss, 39 | "iou_loss": iou_loss, 40 | "l1_loss": l1_loss, 41 | "conf_loss": conf_loss, 42 | "cls_loss": cls_loss, 43 | "num_fg": num_fg, 44 | } 45 | else: 46 | outputs = self.head(fpn_outs) 47 | 48 | return outputs 49 | -------------------------------------------------------------------------------- /yolox/motdt_tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | Replaced = 4 11 | 12 | 13 | class BaseTrack(object): 14 | _count = 0 15 | 16 | track_id = 0 17 | is_activated = False 18 | state = TrackState.New 19 | 20 | history = OrderedDict() 21 | features = [] 22 | curr_feature = None 23 | score = 0 24 | start_frame = 0 25 | frame_id = 0 26 | time_since_update = 0 27 | 28 | # multi-camera 29 | location = (np.inf, np.inf) 30 | 31 | @property 32 | def end_frame(self): 33 | return self.frame_id 34 | 35 | @staticmethod 36 | def next_id(): 37 | BaseTrack._count += 1 38 | return BaseTrack._count 39 | 40 | def activate(self, *args): 41 | raise NotImplementedError 42 | 43 | def predict(self): 44 | raise NotImplementedError 45 | 46 | def update(self, *args, **kwargs): 47 | raise NotImplementedError 48 | 49 | def mark_lost(self): 50 | self.state = TrackState.Lost 51 | 52 | def mark_removed(self): 53 | self.state = TrackState.Removed 54 | 55 | def mark_replaced(self): 56 | self.state = TrackState.Replaced 57 | -------------------------------------------------------------------------------- /yolox/motdt_tracker/matching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import lap 4 | from scipy.spatial.distance import cdist 5 | 6 | from cython_bbox import bbox_overlaps as bbox_ious 7 | from yolox.motdt_tracker import kalman_filter 8 | 9 | 10 | def _indices_to_matches(cost_matrix, indices, thresh): 11 | matched_cost = cost_matrix[tuple(zip(*indices))] 12 | matched_mask = (matched_cost <= thresh) 13 | 14 | matches = indices[matched_mask] 15 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 16 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 17 | 18 | return matches, unmatched_a, unmatched_b 19 | 20 | 21 | def linear_assignment(cost_matrix, thresh): 22 | if cost_matrix.size == 0: 23 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 24 | matches, unmatched_a, unmatched_b = [], [], [] 25 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 26 | for ix, mx in enumerate(x): 27 | if mx >= 0: 28 | matches.append([ix, mx]) 29 | unmatched_a = np.where(x < 0)[0] 30 | unmatched_b = np.where(y < 0)[0] 31 | matches = np.asarray(matches) 32 | return matches, unmatched_a, unmatched_b 33 | 34 | 35 | def ious(atlbrs, btlbrs): 36 | """ 37 | Compute cost based on IoU 38 | :type atlbrs: list[tlbr] | np.ndarray 39 | :type atlbrs: list[tlbr] | np.ndarray 40 | :rtype ious np.ndarray 41 | """ 42 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 43 | if ious.size == 0: 44 | return ious 45 | 46 | ious = bbox_ious( 47 | np.ascontiguousarray(atlbrs, dtype=np.float), 48 | np.ascontiguousarray(btlbrs, dtype=np.float) 49 | ) 50 | 51 | return ious 52 | 53 | 54 | def iou_distance(atracks, btracks): 55 | """ 56 | Compute cost based on IoU 57 | :type atracks: list[STrack] 58 | :type btracks: list[STrack] 59 | :rtype cost_matrix np.ndarray 60 | """ 61 | atlbrs = [track.tlbr for track in atracks] 62 | btlbrs = [track.tlbr for track in btracks] 63 | _ious = ious(atlbrs, btlbrs) 64 | cost_matrix = 1 - _ious 65 | 66 | return cost_matrix 67 | 68 | 69 | def nearest_reid_distance(tracks, detections, metric='cosine'): 70 | """ 71 | Compute cost based on ReID features 72 | :type tracks: list[STrack] 73 | :type detections: list[BaseTrack] 74 | :rtype cost_matrix np.ndarray 75 | """ 76 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 77 | if cost_matrix.size == 0: 78 | return cost_matrix 79 | 80 | det_features = np.asarray([track.curr_feature for track in detections], dtype=np.float32) 81 | for i, track in enumerate(tracks): 82 | cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0)) 83 | 84 | return cost_matrix 85 | 86 | 87 | def mean_reid_distance(tracks, detections, metric='cosine'): 88 | """ 89 | Compute cost based on ReID features 90 | :type tracks: list[STrack] 91 | :type detections: list[BaseTrack] 92 | :type metric: str 93 | :rtype cost_matrix np.ndarray 94 | """ 95 | cost_matrix = np.empty((len(tracks), len(detections)), dtype=np.float) 96 | if cost_matrix.size == 0: 97 | return cost_matrix 98 | 99 | track_features = np.asarray([track.curr_feature for track in tracks], dtype=np.float32) 100 | det_features = np.asarray([track.curr_feature for track in detections], dtype=np.float32) 101 | cost_matrix = cdist(track_features, det_features, metric) 102 | 103 | return cost_matrix 104 | 105 | 106 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 107 | if cost_matrix.size == 0: 108 | return cost_matrix 109 | gating_dim = 2 if only_position else 4 110 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 111 | measurements = np.asarray([det.to_xyah() for det in detections]) 112 | for row, track in enumerate(tracks): 113 | gating_distance = kf.gating_distance( 114 | track.mean, track.covariance, measurements, only_position) 115 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 116 | return cost_matrix -------------------------------------------------------------------------------- /yolox/tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed -------------------------------------------------------------------------------- /yolox/tracking_utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | 7 | from yolox.tracking_utils.io import read_results, unzip_objs 8 | 9 | 10 | class Evaluator(object): 11 | 12 | def __init__(self, data_root, seq_name, data_type): 13 | self.data_root = data_root 14 | self.seq_name = seq_name 15 | self.data_type = data_type 16 | 17 | self.load_annotations() 18 | self.reset_accumulator() 19 | 20 | def load_annotations(self): 21 | assert self.data_type == 'mot' 22 | 23 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 24 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 25 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 26 | 27 | def reset_accumulator(self): 28 | self.acc = mm.MOTAccumulator(auto_id=True) 29 | 30 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 31 | # results 32 | trk_tlwhs = np.copy(trk_tlwhs) 33 | trk_ids = np.copy(trk_ids) 34 | 35 | # gts 36 | gt_objs = self.gt_frame_dict.get(frame_id, []) 37 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 38 | 39 | # ignore boxes 40 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 41 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 57 | #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 58 | #match_ious = iou_distance[match_is, match_js] 59 | 60 | #match_js = np.asarray(match_js, dtype=int) 61 | #match_js = match_js[np.logical_not(np.isnan(match_ious))] 62 | #keep[match_js] = False 63 | #trk_tlwhs = trk_tlwhs[keep] 64 | #trk_ids = trk_ids[keep] 65 | 66 | # get distance matrix 67 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 68 | 69 | # acc 70 | self.acc.update(gt_ids, trk_ids, iou_distance) 71 | 72 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 73 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 74 | else: 75 | events = None 76 | return events 77 | 78 | def eval_file(self, filename): 79 | self.reset_accumulator() 80 | 81 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 82 | #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 83 | frames = sorted(list(set(result_frame_dict.keys()))) 84 | for frame_id in frames: 85 | trk_objs = result_frame_dict.get(frame_id, []) 86 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 87 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 88 | 89 | return self.acc 90 | 91 | @staticmethod 92 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 93 | names = copy.deepcopy(names) 94 | if metrics is None: 95 | metrics = mm.metrics.motchallenge_metrics 96 | metrics = copy.deepcopy(metrics) 97 | 98 | mh = mm.metrics.create() 99 | summary = mh.compute_many( 100 | accs, 101 | metrics=metrics, 102 | names=names, 103 | generate_overall=True 104 | ) 105 | 106 | return summary 107 | 108 | @staticmethod 109 | def save_summary(summary, filename): 110 | import pandas as pd 111 | writer = pd.ExcelWriter(filename) 112 | summary.to_excel(writer) 113 | writer.save() -------------------------------------------------------------------------------- /yolox/tracking_utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | 6 | def write_results(filename, results_dict: Dict, data_type: str): 7 | if not filename: 8 | return 9 | path = os.path.dirname(filename) 10 | if not os.path.exists(path): 11 | os.makedirs(path) 12 | 13 | if data_type in ('mot', 'mcmot', 'lab'): 14 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 15 | elif data_type == 'kitti': 16 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 17 | else: 18 | raise ValueError(data_type) 19 | 20 | with open(filename, 'w') as f: 21 | for frame_id, frame_data in results_dict.items(): 22 | if data_type == 'kitti': 23 | frame_id -= 1 24 | for tlwh, track_id in frame_data: 25 | if track_id < 0: 26 | continue 27 | x1, y1, w, h = tlwh 28 | x2, y2 = x1 + w, y1 + h 29 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 30 | f.write(line) 31 | 32 | 33 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 34 | if data_type in ('mot', 'lab'): 35 | read_fun = read_mot_results 36 | else: 37 | raise ValueError('Unknown data type: {}'.format(data_type)) 38 | 39 | return read_fun(filename, is_gt, is_ignore) 40 | 41 | 42 | """ 43 | labels={'ped', ... % 1 44 | 'person_on_vhcl', ... % 2 45 | 'car', ... % 3 46 | 'bicycle', ... % 4 47 | 'mbike', ... % 5 48 | 'non_mot_vhcl', ... % 6 49 | 'static_person', ... % 7 50 | 'distractor', ... % 8 51 | 'occluder', ... % 9 52 | 'occluder_on_grnd', ... %10 53 | 'occluder_full', ... % 11 54 | 'reflection', ... % 12 55 | 'crowd' ... % 13 56 | }; 57 | """ 58 | 59 | 60 | def read_mot_results(filename, is_gt, is_ignore): 61 | valid_labels = {1} 62 | ignore_labels = {2, 7, 8, 12} 63 | results_dict = dict() 64 | if os.path.isfile(filename): 65 | with open(filename, 'r') as f: 66 | for line in f.readlines(): 67 | linelist = line.split(',') 68 | if len(linelist) < 7: 69 | continue 70 | fid = int(linelist[0]) 71 | if fid < 1: 72 | continue 73 | results_dict.setdefault(fid, list()) 74 | 75 | box_size = float(linelist[4]) * float(linelist[5]) 76 | 77 | if is_gt: 78 | if 'MOT16-' in filename or 'MOT17-' in filename: 79 | label = int(float(linelist[7])) 80 | mark = int(float(linelist[6])) 81 | if mark == 0 or label not in valid_labels: 82 | continue 83 | score = 1 84 | elif is_ignore: 85 | if 'MOT16-' in filename or 'MOT17-' in filename: 86 | label = int(float(linelist[7])) 87 | vis_ratio = float(linelist[8]) 88 | if label not in ignore_labels and vis_ratio >= 0: 89 | continue 90 | else: 91 | continue 92 | score = 1 93 | else: 94 | score = float(linelist[6]) 95 | 96 | #if box_size > 7000: 97 | #if box_size <= 7000 or box_size >= 15000: 98 | #if box_size < 15000: 99 | #continue 100 | 101 | tlwh = tuple(map(float, linelist[2:6])) 102 | target_id = int(linelist[1]) 103 | 104 | results_dict[fid].append((tlwh, target_id, score)) 105 | 106 | return results_dict 107 | 108 | 109 | def unzip_objs(objs): 110 | if len(objs) > 0: 111 | tlwhs, ids, scores = zip(*objs) 112 | else: 113 | tlwhs, ids, scores = [], [], [] 114 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 115 | 116 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /yolox/tracking_utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class Timer(object): 5 | """A simple timer.""" 6 | def __init__(self): 7 | self.total_time = 0. 8 | self.calls = 0 9 | self.start_time = 0. 10 | self.diff = 0. 11 | self.average_time = 0. 12 | 13 | self.duration = 0. 14 | 15 | def tic(self): 16 | # using time.time instead of time.clock because time time.clock 17 | # does not normalize for multithreading 18 | self.start_time = time.time() 19 | 20 | def toc(self, average=True): 21 | self.diff = time.time() - self.start_time 22 | self.total_time += self.diff 23 | self.calls += 1 24 | self.average_time = self.total_time / self.calls 25 | if average: 26 | self.duration = self.average_time 27 | else: 28 | self.duration = self.diff 29 | return self.duration 30 | 31 | def clear(self): 32 | self.total_time = 0. 33 | self.calls = 0 34 | self.start_time = 0. 35 | self.diff = 0. 36 | self.average_time = 0. 37 | self.duration = 0. -------------------------------------------------------------------------------- /yolox/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | from .allreduce_norm import * 6 | from .boxes import * 7 | from .checkpoint import load_ckpt, save_checkpoint 8 | from .demo_utils import * 9 | from .dist import * 10 | from .ema import ModelEMA 11 | from .logger import setup_logger 12 | from .lr_scheduler import LRScheduler 13 | from .metric import * 14 | from .model_utils import * 15 | from .setup_env import * 16 | from .visualize import * 17 | -------------------------------------------------------------------------------- /yolox/utils/allreduce_norm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | from torch import distributed as dist 7 | from torch import nn 8 | 9 | import pickle 10 | from collections import OrderedDict 11 | 12 | from .dist import _get_global_gloo_group, get_world_size 13 | 14 | ASYNC_NORM = ( 15 | nn.BatchNorm1d, 16 | nn.BatchNorm2d, 17 | nn.BatchNorm3d, 18 | nn.InstanceNorm1d, 19 | nn.InstanceNorm2d, 20 | nn.InstanceNorm3d, 21 | ) 22 | 23 | __all__ = [ 24 | "get_async_norm_states", 25 | "pyobj2tensor", 26 | "tensor2pyobj", 27 | "all_reduce", 28 | "all_reduce_norm", 29 | ] 30 | 31 | 32 | def get_async_norm_states(module): 33 | async_norm_states = OrderedDict() 34 | for name, child in module.named_modules(): 35 | if isinstance(child, ASYNC_NORM): 36 | for k, v in child.state_dict().items(): 37 | async_norm_states[".".join([name, k])] = v 38 | return async_norm_states 39 | 40 | 41 | def pyobj2tensor(pyobj, device="cuda"): 42 | """serialize picklable python object to tensor""" 43 | storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj)) 44 | return torch.ByteTensor(storage).to(device=device) 45 | 46 | 47 | def tensor2pyobj(tensor): 48 | """deserialize tensor to picklable python object""" 49 | return pickle.loads(tensor.cpu().numpy().tobytes()) 50 | 51 | 52 | def _get_reduce_op(op_name): 53 | return { 54 | "sum": dist.ReduceOp.SUM, 55 | "mean": dist.ReduceOp.SUM, 56 | }[op_name.lower()] 57 | 58 | 59 | def all_reduce(py_dict, op="sum", group=None): 60 | """ 61 | Apply all reduce function for python dict object. 62 | NOTE: make sure that every py_dict has the same keys and values are in the same shape. 63 | 64 | Args: 65 | py_dict (dict): dict to apply all reduce op. 66 | op (str): operator, could be "sum" or "mean". 67 | """ 68 | world_size = get_world_size() 69 | if world_size == 1: 70 | return py_dict 71 | if group is None: 72 | group = _get_global_gloo_group() 73 | if dist.get_world_size(group) == 1: 74 | return py_dict 75 | 76 | # all reduce logic across different devices. 77 | py_key = list(py_dict.keys()) 78 | py_key_tensor = pyobj2tensor(py_key) 79 | dist.broadcast(py_key_tensor, src=0) 80 | py_key = tensor2pyobj(py_key_tensor) 81 | 82 | tensor_shapes = [py_dict[k].shape for k in py_key] 83 | tensor_numels = [py_dict[k].numel() for k in py_key] 84 | 85 | flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key]) 86 | dist.all_reduce(flatten_tensor, op=_get_reduce_op(op)) 87 | if op == "mean": 88 | flatten_tensor /= world_size 89 | 90 | split_tensors = [ 91 | x.reshape(shape) 92 | for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) 93 | ] 94 | return OrderedDict({k: v for k, v in zip(py_key, split_tensors)}) 95 | 96 | 97 | def all_reduce_norm(module): 98 | """ 99 | All reduce norm statistics in different devices. 100 | """ 101 | states = get_async_norm_states(module) 102 | states = all_reduce(states, op="mean") 103 | module.load_state_dict(states, strict=False) 104 | -------------------------------------------------------------------------------- /yolox/utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | from loguru import logger 5 | 6 | import torch 7 | 8 | import os 9 | import shutil 10 | 11 | 12 | def load_ckpt(model, ckpt): 13 | model_state_dict = model.state_dict() 14 | load_dict = {} 15 | for key_model, v in model_state_dict.items(): 16 | if key_model not in ckpt: 17 | logger.warning( 18 | "{} is not in the ckpt. Please double check and see if this is desired.".format( 19 | key_model 20 | ) 21 | ) 22 | continue 23 | v_ckpt = ckpt[key_model] 24 | if v.shape != v_ckpt.shape: 25 | logger.warning( 26 | "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( 27 | key_model, v_ckpt.shape, key_model, v.shape 28 | ) 29 | ) 30 | continue 31 | load_dict[key_model] = v_ckpt 32 | 33 | model.load_state_dict(load_dict, strict=False) 34 | return model 35 | 36 | 37 | def save_checkpoint(state, is_best, save_dir, model_name=""): 38 | if not os.path.exists(save_dir): 39 | os.makedirs(save_dir) 40 | filename = os.path.join(save_dir, model_name + "_ckpt.pth.tar") 41 | torch.save(state, filename) 42 | if is_best: 43 | best_filename = os.path.join(save_dir, "best_ckpt.pth.tar") 44 | shutil.copyfile(filename, best_filename) 45 | -------------------------------------------------------------------------------- /yolox/utils/demo_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import numpy as np 6 | 7 | import os 8 | 9 | __all__ = ["mkdir", "nms", "multiclass_nms", "demo_postprocess"] 10 | 11 | 12 | def mkdir(path): 13 | if not os.path.exists(path): 14 | os.makedirs(path) 15 | 16 | 17 | def nms(boxes, scores, nms_thr): 18 | """Single class NMS implemented in Numpy.""" 19 | x1 = boxes[:, 0] 20 | y1 = boxes[:, 1] 21 | x2 = boxes[:, 2] 22 | y2 = boxes[:, 3] 23 | 24 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | order = scores.argsort()[::-1] 26 | 27 | keep = [] 28 | while order.size > 0: 29 | i = order[0] 30 | keep.append(i) 31 | xx1 = np.maximum(x1[i], x1[order[1:]]) 32 | yy1 = np.maximum(y1[i], y1[order[1:]]) 33 | xx2 = np.minimum(x2[i], x2[order[1:]]) 34 | yy2 = np.minimum(y2[i], y2[order[1:]]) 35 | 36 | w = np.maximum(0.0, xx2 - xx1 + 1) 37 | h = np.maximum(0.0, yy2 - yy1 + 1) 38 | inter = w * h 39 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 40 | 41 | inds = np.where(ovr <= nms_thr)[0] 42 | order = order[inds + 1] 43 | 44 | return keep 45 | 46 | 47 | def multiclass_nms(boxes, scores, nms_thr, score_thr): 48 | """Multiclass NMS implemented in Numpy""" 49 | final_dets = [] 50 | num_classes = scores.shape[1] 51 | for cls_ind in range(num_classes): 52 | cls_scores = scores[:, cls_ind] 53 | valid_score_mask = cls_scores > score_thr 54 | if valid_score_mask.sum() == 0: 55 | continue 56 | else: 57 | valid_scores = cls_scores[valid_score_mask] 58 | valid_boxes = boxes[valid_score_mask] 59 | keep = nms(valid_boxes, valid_scores, nms_thr) 60 | if len(keep) > 0: 61 | cls_inds = np.ones((len(keep), 1)) * cls_ind 62 | dets = np.concatenate( 63 | [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1 64 | ) 65 | final_dets.append(dets) 66 | if len(final_dets) == 0: 67 | return None 68 | return np.concatenate(final_dets, 0) 69 | 70 | 71 | def demo_postprocess(outputs, img_size, p6=False): 72 | 73 | grids = [] 74 | expanded_strides = [] 75 | 76 | if not p6: 77 | strides = [8, 16, 32] 78 | else: 79 | strides = [8, 16, 32, 64] 80 | 81 | hsizes = [img_size[0] // stride for stride in strides] 82 | wsizes = [img_size[1] // stride for stride in strides] 83 | 84 | for hsize, wsize, stride in zip(hsizes, wsizes, strides): 85 | xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) 86 | grid = np.stack((xv, yv), 2).reshape(1, -1, 2) 87 | grids.append(grid) 88 | shape = grid.shape[:2] 89 | expanded_strides.append(np.full((*shape, 1), stride)) 90 | 91 | grids = np.concatenate(grids, 1) 92 | expanded_strides = np.concatenate(expanded_strides, 1) 93 | outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides 94 | outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides 95 | 96 | return outputs 97 | -------------------------------------------------------------------------------- /yolox/utils/ema.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | import torch 5 | import torch.nn as nn 6 | 7 | import math 8 | from copy import deepcopy 9 | 10 | 11 | def is_parallel(model): 12 | """check if model is in parallel mode.""" 13 | 14 | parallel_type = ( 15 | nn.parallel.DataParallel, 16 | nn.parallel.DistributedDataParallel, 17 | ) 18 | return isinstance(model, parallel_type) 19 | 20 | 21 | def copy_attr(a, b, include=(), exclude=()): 22 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 23 | for k, v in b.__dict__.items(): 24 | if (len(include) and k not in include) or k.startswith("_") or k in exclude: 25 | continue 26 | else: 27 | setattr(a, k, v) 28 | 29 | 30 | class ModelEMA: 31 | """ 32 | Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 33 | Keep a moving average of everything in the model state_dict (parameters and buffers). 34 | This is intended to allow functionality like 35 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 36 | A smoothed version of the weights is necessary for some training schemes to perform well. 37 | This class is sensitive where it is initialized in the sequence of model init, 38 | GPU assignment and distributed training wrappers. 39 | """ 40 | 41 | def __init__(self, model, decay=0.9999, updates=0): 42 | """ 43 | Args: 44 | model (nn.Module): model to apply EMA. 45 | decay (float): ema decay reate. 46 | updates (int): counter of EMA updates. 47 | """ 48 | # Create EMA(FP32) 49 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() 50 | self.updates = updates 51 | # decay exponential ramp (to help early epochs) 52 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) 53 | for p in self.ema.parameters(): 54 | p.requires_grad_(False) 55 | 56 | def update(self, model): 57 | # Update EMA parameters 58 | with torch.no_grad(): 59 | self.updates += 1 60 | d = self.decay(self.updates) 61 | 62 | msd = ( 63 | model.module.state_dict() if is_parallel(model) else model.state_dict() 64 | ) # model state_dict 65 | for k, v in self.ema.state_dict().items(): 66 | if v.dtype.is_floating_point: 67 | v *= d 68 | v += (1.0 - d) * msd[k].detach() 69 | 70 | def update_attr(self, model, include=(), exclude=("process_group", "reducer")): 71 | # Update EMA attributes 72 | copy_attr(self.ema, model, include, exclude) 73 | -------------------------------------------------------------------------------- /yolox/utils/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | from loguru import logger 6 | 7 | import inspect 8 | import os 9 | import sys 10 | 11 | 12 | def get_caller_name(depth=0): 13 | """ 14 | Args: 15 | depth (int): Depth of caller conext, use 0 for caller depth. Default value: 0. 16 | 17 | Returns: 18 | str: module name of the caller 19 | """ 20 | # the following logic is a little bit faster than inspect.stack() logic 21 | frame = inspect.currentframe().f_back 22 | for _ in range(depth): 23 | frame = frame.f_back 24 | 25 | return frame.f_globals["__name__"] 26 | 27 | 28 | class StreamToLoguru: 29 | """ 30 | stream object that redirects writes to a logger instance. 31 | """ 32 | 33 | def __init__(self, level="INFO", caller_names=("apex", "pycocotools")): 34 | """ 35 | Args: 36 | level(str): log level string of loguru. Default value: "INFO". 37 | caller_names(tuple): caller names of redirected module. 38 | Default value: (apex, pycocotools). 39 | """ 40 | self.level = level 41 | self.linebuf = "" 42 | self.caller_names = caller_names 43 | 44 | def write(self, buf): 45 | full_name = get_caller_name(depth=1) 46 | module_name = full_name.rsplit(".", maxsplit=-1)[0] 47 | if module_name in self.caller_names: 48 | for line in buf.rstrip().splitlines(): 49 | # use caller level log 50 | logger.opt(depth=2).log(self.level, line.rstrip()) 51 | else: 52 | sys.__stdout__.write(buf) 53 | 54 | def flush(self): 55 | pass 56 | 57 | 58 | def redirect_sys_output(log_level="INFO"): 59 | redirect_logger = StreamToLoguru(log_level) 60 | sys.stderr = redirect_logger 61 | sys.stdout = redirect_logger 62 | 63 | 64 | def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"): 65 | """setup logger for training and testing. 66 | Args: 67 | save_dir(str): location to save log file 68 | distributed_rank(int): device rank when multi-gpu environment 69 | filename (string): log save name. 70 | mode(str): log file write mode, `append` or `override`. default is `a`. 71 | 72 | Return: 73 | logger instance. 74 | """ 75 | loguru_format = ( 76 | "{time:YYYY-MM-DD HH:mm:ss} | " 77 | "{level: <8} | " 78 | "{name}:{line} - {message}" 79 | ) 80 | 81 | logger.remove() 82 | save_file = os.path.join(save_dir, filename) 83 | if mode == "o" and os.path.exists(save_file): 84 | os.remove(save_file) 85 | # only keep logger in rank0 process 86 | if distributed_rank == 0: 87 | logger.add( 88 | sys.stderr, 89 | format=loguru_format, 90 | level="INFO", 91 | enqueue=True, 92 | ) 93 | logger.add(save_file) 94 | 95 | # redirect stdout/stderr to loguru 96 | redirect_sys_output("INFO") 97 | -------------------------------------------------------------------------------- /yolox/utils/metric.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | import numpy as np 5 | 6 | import torch 7 | 8 | import functools 9 | import os 10 | import time 11 | from collections import defaultdict, deque 12 | 13 | __all__ = [ 14 | "AverageMeter", 15 | "MeterBuffer", 16 | "get_total_and_free_memory_in_Mb", 17 | "occupy_mem", 18 | "gpu_mem_usage", 19 | ] 20 | 21 | 22 | def get_total_and_free_memory_in_Mb(cuda_device): 23 | devices_info_str = os.popen( 24 | "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader" 25 | ) 26 | devices_info = devices_info_str.read().strip().split("\n") 27 | total, used = devices_info[int(cuda_device)].split(",") 28 | return int(total), int(used) 29 | 30 | 31 | def occupy_mem(cuda_device, mem_ratio=0.95): 32 | """ 33 | pre-allocate gpu memory for training to avoid memory Fragmentation. 34 | """ 35 | total, used = get_total_and_free_memory_in_Mb(cuda_device) 36 | max_mem = int(total * mem_ratio) 37 | block_mem = max_mem - used 38 | x = torch.cuda.FloatTensor(256, 1024, block_mem) 39 | del x 40 | time.sleep(5) 41 | 42 | 43 | def gpu_mem_usage(): 44 | """ 45 | Compute the GPU memory usage for the current device (MB). 46 | """ 47 | mem_usage_bytes = torch.cuda.max_memory_allocated() 48 | return mem_usage_bytes / (1024 * 1024) 49 | 50 | 51 | class AverageMeter: 52 | """Track a series of values and provide access to smoothed values over a 53 | window or the global series average. 54 | """ 55 | 56 | def __init__(self, window_size=50): 57 | self._deque = deque(maxlen=window_size) 58 | self._total = 0.0 59 | self._count = 0 60 | 61 | def update(self, value): 62 | self._deque.append(value) 63 | self._count += 1 64 | self._total += value 65 | 66 | @property 67 | def median(self): 68 | d = np.array(list(self._deque)) 69 | return np.median(d) 70 | 71 | @property 72 | def avg(self): 73 | # if deque is empty, nan will be returned. 74 | d = np.array(list(self._deque)) 75 | return d.mean() 76 | 77 | @property 78 | def global_avg(self): 79 | return self._total / max(self._count, 1e-5) 80 | 81 | @property 82 | def latest(self): 83 | return self._deque[-1] if len(self._deque) > 0 else None 84 | 85 | @property 86 | def total(self): 87 | return self._total 88 | 89 | def reset(self): 90 | self._deque.clear() 91 | self._total = 0.0 92 | self._count = 0 93 | 94 | def clear(self): 95 | self._deque.clear() 96 | 97 | 98 | class MeterBuffer(defaultdict): 99 | """Computes and stores the average and current value""" 100 | 101 | def __init__(self, window_size=20): 102 | factory = functools.partial(AverageMeter, window_size=window_size) 103 | super().__init__(factory) 104 | 105 | def reset(self): 106 | for v in self.values(): 107 | v.reset() 108 | 109 | def get_filtered_meter(self, filter_key="time"): 110 | return {k: v for k, v in self.items() if filter_key in k} 111 | 112 | def update(self, values=None, **kwargs): 113 | if values is None: 114 | values = {} 115 | values.update(kwargs) 116 | for k, v in values.items(): 117 | if isinstance(v, torch.Tensor): 118 | v = v.detach() 119 | self[k].update(v) 120 | 121 | def clear_meters(self): 122 | for v in self.values(): 123 | v.clear() 124 | -------------------------------------------------------------------------------- /yolox/utils/model_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | import torch.nn as nn 7 | from thop import profile 8 | 9 | from copy import deepcopy 10 | 11 | __all__ = [ 12 | "fuse_conv_and_bn", 13 | "fuse_model", 14 | "get_model_info", 15 | "replace_module", 16 | ] 17 | 18 | 19 | def get_model_info(model, tsize): 20 | 21 | stride = 64 22 | img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device) 23 | flops, params = profile(deepcopy(model), inputs=(img,), verbose=False) 24 | params /= 1e6 25 | flops /= 1e9 26 | flops *= tsize[0] * tsize[1] / stride / stride * 2 # Gflops 27 | info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops) 28 | return info 29 | 30 | 31 | def fuse_conv_and_bn(conv, bn): 32 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 33 | fusedconv = ( 34 | nn.Conv2d( 35 | conv.in_channels, 36 | conv.out_channels, 37 | kernel_size=conv.kernel_size, 38 | stride=conv.stride, 39 | padding=conv.padding, 40 | groups=conv.groups, 41 | bias=True, 42 | ) 43 | .requires_grad_(False) 44 | .to(conv.weight.device) 45 | ) 46 | 47 | # prepare filters 48 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 49 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 50 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) 51 | 52 | # prepare spatial bias 53 | b_conv = ( 54 | torch.zeros(conv.weight.size(0), device=conv.weight.device) 55 | if conv.bias is None 56 | else conv.bias 57 | ) 58 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( 59 | torch.sqrt(bn.running_var + bn.eps) 60 | ) 61 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 62 | 63 | return fusedconv 64 | 65 | 66 | def fuse_model(model): 67 | from yolox.models.network_blocks import BaseConv 68 | 69 | for m in model.modules(): 70 | if type(m) is BaseConv and hasattr(m, "bn"): 71 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 72 | delattr(m, "bn") # remove batchnorm 73 | m.forward = m.fuseforward # update forward 74 | return model 75 | 76 | 77 | def replace_module(module, replaced_module_type, new_module_type, replace_func=None): 78 | """ 79 | Replace given type in module to a new type. mostly used in deploy. 80 | 81 | Args: 82 | module (nn.Module): model to apply replace operation. 83 | replaced_module_type (Type): module type to be replaced. 84 | new_module_type (Type) 85 | replace_func (function): python function to describe replace logic. Defalut value None. 86 | 87 | Returns: 88 | model (nn.Module): module that already been replaced. 89 | """ 90 | 91 | def default_replace_func(replaced_module_type, new_module_type): 92 | return new_module_type() 93 | 94 | if replace_func is None: 95 | replace_func = default_replace_func 96 | 97 | model = module 98 | if isinstance(module, replaced_module_type): 99 | model = replace_func(replaced_module_type, new_module_type) 100 | else: # recurrsively replace 101 | for name, child in module.named_children(): 102 | new_child = replace_module(child, replaced_module_type, new_module_type) 103 | if new_child is not child: # child is already replaced 104 | model.add_module(name, new_child) 105 | 106 | return model 107 | -------------------------------------------------------------------------------- /yolox/utils/setup_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import cv2 6 | 7 | import os 8 | import subprocess 9 | 10 | __all__ = ["configure_nccl", "configure_module"] 11 | 12 | 13 | def configure_nccl(): 14 | """Configure multi-machine environment variables of NCCL.""" 15 | os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL" 16 | os.environ["NCCL_IB_HCA"] = subprocess.getoutput( 17 | "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; " 18 | "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null " 19 | "| grep v >/dev/null && echo $i ; done; popd > /dev/null" 20 | ) 21 | os.environ["NCCL_IB_GID_INDEX"] = "3" 22 | os.environ["NCCL_IB_TC"] = "106" 23 | 24 | 25 | def configure_module(ulimit_value=8192): 26 | """ 27 | Configure pytorch module environment. setting of ulimit and cv2 will be set. 28 | 29 | Args: 30 | ulimit_value(int): default open file number on linux. Default value: 8192. 31 | """ 32 | # system setting 33 | try: 34 | import resource 35 | 36 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 37 | resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1])) 38 | except Exception: 39 | # Exception might be raised in Windows OS or rlimit reaches max limit number. 40 | # However, set rlimit value might not be necessary. 41 | pass 42 | 43 | # cv2 44 | # multiprocess might be harmful on performance of torch dataloader 45 | os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" 46 | try: 47 | cv2.setNumThreads(0) 48 | cv2.ocl.setUseOpenCL(False) 49 | except Exception: 50 | # cv2 version mismatch might rasie exceptions. 51 | pass 52 | --------------------------------------------------------------------------------