├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── assets
    ├── MOT17-01-SDP.gif
    ├── MOT17-07-SDP.gif
    ├── MOT20-07.gif
    ├── MOT20-08.gif
    ├── palace_demo.gif
    ├── sota.png
    └── teasing.png
├── datasets
    └── data_path
    │   ├── citypersons.train
    │   └── eth.train
├── deploy
    ├── ONNXRuntime
    │   ├── README.md
    │   └── onnx_inference.py
    ├── TensorRT
    │   ├── cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── include
    │   │   │   ├── BYTETracker.h
    │   │   │   ├── STrack.h
    │   │   │   ├── dataType.h
    │   │   │   ├── kalmanFilter.h
    │   │   │   ├── lapjv.h
    │   │   │   └── logging.h
    │   │   └── src
    │   │   │   ├── BYTETracker.cpp
    │   │   │   ├── STrack.cpp
    │   │   │   ├── bytetrack.cpp
    │   │   │   ├── kalmanFilter.cpp
    │   │   │   ├── lapjv.cpp
    │   │   │   └── utils.cpp
    │   └── python
    │   │   └── README.md
    └── ncnn
    │   └── cpp
    │       ├── CMakeLists.txt
    │       ├── README.md
    │       ├── include
    │           ├── BYTETracker.h
    │           ├── STrack.h
    │           ├── dataType.h
    │           ├── kalmanFilter.h
    │           └── lapjv.h
    │       └── src
    │           ├── BYTETracker.cpp
    │           ├── STrack.cpp
    │           ├── bytetrack.cpp
    │           ├── kalmanFilter.cpp
    │           ├── lapjv.cpp
    │           └── utils.cpp
├── exps
    ├── default
    │   ├── nano.py
    │   ├── yolov3.py
    │   ├── yolox_l.py
    │   ├── yolox_m.py
    │   ├── yolox_s.py
    │   ├── yolox_tiny.py
    │   └── yolox_x.py
    └── example
    │   └── mot
    │       ├── yolov5_s_mix_det.py
    │       ├── yolox_l_mix_det.py
    │       ├── yolox_m_mix_det.py
    │       ├── yolox_nano_mix_det.py
    │       ├── yolox_s_mix_det.py
    │       ├── yolox_tiny_mix_det.py
    │       ├── yolox_x_ablation.py
    │       ├── yolox_x_ch.py
    │       ├── yolox_x_mix_det.py
    │       ├── yolox_x_mix_mot20_ch.py
    │       └── yolox_x_mot17_half.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── tools
    ├── convert_cityperson_to_coco.py
    ├── convert_crowdhuman_to_coco.py
    ├── convert_ethz_to_coco.py
    ├── convert_mot17_to_coco.py
    ├── convert_mot20_to_coco.py
    ├── convert_video.py
    ├── demo_track.py
    ├── export_onnx.py
    ├── interpolation.py
    ├── mix_data_ablation.py
    ├── mix_data_test_mot17.py
    ├── mix_data_test_mot20.py
    ├── mota.py
    ├── track.py
    ├── track_deepsort.py
    ├── track_motdt.py
    ├── track_sort.py
    ├── train.py
    ├── trt.py
    └── txt2video.py
├── tools_yolov5
    ├── demo_track_yolov5.py
    ├── models
    │   ├── __init__.py
    │   ├── common.py
    │   ├── experimental.py
    │   ├── export_master.py
    │   ├── yolo.py
    │   └── yolov5s.yaml
    ├── utils
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── autoanchor.py
    │   ├── datasets.py
    │   ├── general.py
    │   ├── general_v4.py
    │   ├── google_utils.py
    │   ├── loss.py
    │   ├── metrics.py
    │   ├── plots.py
    │   └── torch_utils.py
    └── yolov5_v4
    │   ├── .dockerignore
    │   ├── .gitattributes
    │   ├── .github
    │       ├── ISSUE_TEMPLATE
    │       │   ├── bug-report.md
    │       │   ├── feature-request.md
    │       │   └── question.md
    │       ├── dependabot.yml
    │       └── workflows
    │       │   ├── ci-testing.yml
    │       │   ├── codeql-analysis.yml
    │       │   ├── greetings.yml
    │       │   ├── rebase.yml
    │       │   └── stale.yml
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── hubconf.py
    │   ├── models
    │       ├── __init__.py
    │       ├── common.py
    │       ├── experimental.py
    │       ├── export_master.py
    │       ├── yolo.py
    │       └── yolov5s.yaml
    │   ├── requirements.txt
    │   └── utils
    │       ├── __init__.py
    │       ├── activations.py
    │       ├── autoanchor.py
    │       ├── datasets.py
    │       ├── general.py
    │       ├── general_v4.py
    │       ├── google_utils.py
    │       ├── loss.py
    │       ├── metrics.py
    │       ├── plots.py
    │       └── torch_utils.py
├── tutorials
    ├── centertrack
    │   ├── README.md
    │   ├── byte_tracker.py
    │   ├── mot_online
    │   │   ├── basetrack.py
    │   │   ├── kalman_filter.py
    │   │   └── matching.py
    │   ├── opts.py
    │   └── tracker.py
    ├── cstrack
    │   ├── README.md
    │   ├── byte_tracker.py
    │   └── tracker.py
    ├── ctracker
    │   ├── README.md
    │   ├── byte_tracker.py
    │   ├── eval_motchallenge.py
    │   ├── generate_half_csv.py
    │   ├── mot_online
    │   │   ├── basetrack.py
    │   │   ├── kalman_filter.py
    │   │   └── matching.py
    │   ├── test.py
    │   └── test_byte.py
    ├── fairmot
    │   ├── README.md
    │   ├── byte_tracker.py
    │   └── tracker.py
    ├── jde
    │   ├── README.md
    │   ├── byte_tracker.py
    │   ├── evaluation.py
    │   ├── track_half.py
    │   └── tracker.py
    ├── motr
    │   ├── README.md
    │   ├── byte_tracker.py
    │   ├── eval.py
    │   ├── evaluation.py
    │   ├── joint.py
    │   ├── mot_online
    │   │   ├── basetrack.py
    │   │   ├── kalman_filter.py
    │   │   └── matching.py
    │   ├── motr.py
    │   ├── motr_det.py
    │   └── transforms.py
    ├── qdtrack
    │   ├── README.md
    │   ├── byte_tracker.py
    │   ├── mot_online
    │   │   ├── basetrack.py
    │   │   ├── kalman_filter.py
    │   │   └── matching.py
    │   ├── qdtrack.py
    │   └── tracker_reid_motion.py
    ├── trades
    │   ├── README.md
    │   ├── byte_tracker.py
    │   ├── mot_online
    │   │   ├── basetrack.py
    │   │   ├── kalman_filter.py
    │   │   └── matching.py
    │   ├── opts.py
    │   └── tracker.py
    └── transtrack
    │   ├── README.md
    │   ├── engine_track.py
    │   ├── main_track.py
    │   ├── mot_online
    │       ├── basetrack.py
    │       ├── byte_tracker.py
    │       ├── kalman_filter.py
    │       └── matching.py
    │   ├── save_track.py
    │   └── tracker.py
├── videos
    └── palace.mp4
└── yolox
    ├── __init__.py
    ├── core
        ├── __init__.py
        ├── launch.py
        └── trainer.py
    ├── data
        ├── __init__.py
        ├── data_augment.py
        ├── data_prefetcher.py
        ├── dataloading.py
        ├── datasets
        │   ├── __init__.py
        │   ├── datasets_wrapper.py
        │   ├── mosaicdetection.py
        │   └── mot.py
        └── samplers.py
    ├── deepsort_tracker
        ├── deepsort.py
        ├── detection.py
        ├── iou_matching.py
        ├── kalman_filter.py
        ├── linear_assignment.py
        ├── reid_model.py
        └── track.py
    ├── evaluators
        ├── __init__.py
        ├── coco_evaluator.py
        ├── evaluation.py
        └── mot_evaluator.py
    ├── exp
        ├── __init__.py
        ├── base_exp.py
        ├── build.py
        └── yolox_base.py
    ├── layers
        ├── __init__.py
        ├── csrc
        │   ├── cocoeval
        │   │   ├── cocoeval.cpp
        │   │   └── cocoeval.h
        │   └── vision.cpp
        └── fast_coco_eval_api.py
    ├── models
        ├── __init__.py
        ├── darknet.py
        ├── losses.py
        ├── network_blocks.py
        ├── yolo_fpn.py
        ├── yolo_head.py
        ├── yolo_pafpn.py
        └── yolox.py
    ├── motdt_tracker
        ├── basetrack.py
        ├── kalman_filter.py
        ├── matching.py
        ├── motdt_tracker.py
        └── reid_model.py
    ├── sort_tracker
        └── sort.py
    ├── tracker
        ├── basetrack.py
        ├── byte_tracker.py
        ├── kalman_filter.py
        └── matching.py
    ├── tracking_utils
        ├── evaluation.py
        ├── io.py
        └── timer.py
    └── utils
        ├── __init__.py
        ├── allreduce_norm.py
        ├── boxes.py
        ├── checkpoint.py
        ├── demo_utils.py
        ├── dist.py
        ├── ema.py
        ├── logger.py
        ├── lr_scheduler.py
        ├── metric.py
        ├── model_utils.py
        ├── setup_env.py
        └── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # output
132 | docs/api
133 | .code-workspace.code-workspace
134 | *.pkl
135 | *.npy
136 | *.pth
137 | *.onnx
138 | *.engine
139 | events.out.tfevents*
140 | pretrained
141 | YOLOX_outputs


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/tensorrt:21.09-py3
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | ARG USERNAME=user
 5 | ARG WORKDIR=/workspace/ByteTrack
 6 | 
 7 | RUN apt-get update && apt-get install -y \
 8 |         automake autoconf libpng-dev nano python3-pip \
 9 |         curl zip unzip libtool swig zlib1g-dev pkg-config \
10 |         python3-mock libpython3-dev libpython3-all-dev \
11 |         g++ gcc cmake make pciutils cpio gosu wget \
12 |         libgtk-3-dev libxtst-dev sudo apt-transport-https \
13 |         build-essential gnupg git xz-utils vim \
14 |         libva-drm2 libva-x11-2 vainfo libva-wayland2 libva-glx2 \
15 |         libva-dev libdrm-dev xorg xorg-dev protobuf-compiler \
16 |         openbox libx11-dev libgl1-mesa-glx libgl1-mesa-dev \
17 |         libtbb2 libtbb-dev libopenblas-dev libopenmpi-dev \
18 |     && sed -i 's/# set linenumbers/set linenumbers/g' /etc/nanorc \
19 |     && apt clean \
20 |     && rm -rf /var/lib/apt/lists/*
21 | 
22 | RUN git clone https://github.com/ifzhang/ByteTrack \
23 |     && cd ByteTrack \
24 |     && git checkout 3434c5e8bc6a5ae8ad530528ba8d9a431967f237 \
25 |     && mkdir -p YOLOX_outputs/yolox_x_mix_det/track_vis \
26 |     && sed -i 's/torch>=1.7/torch==1.9.1+cu111/g' requirements.txt \
27 |     && sed -i 's/torchvision==0.10.0/torchvision==0.10.1+cu111/g' requirements.txt \
28 |     && sed -i "s/'cuda'/0/g" tools/demo_track.py \
29 |     && pip3 install pip --upgrade \
30 |     && pip3 install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html \
31 |     && python3 setup.py develop \
32 |     && pip3 install cython \
33 |     && pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' \
34 |     && pip3 install cython_bbox gdown \
35 |     && ldconfig \
36 |     && pip cache purge
37 | 
38 | RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt \
39 |     && cd torch2trt \
40 |     && git checkout 0400b38123d01cc845364870bdf0a0044ea2b3b2 \
41 |     # https://github.com/NVIDIA-AI-IOT/torch2trt/issues/619
42 |     && wget https://github.com/NVIDIA-AI-IOT/torch2trt/commit/8b9fb46ddbe99c2ddf3f1ed148c97435cbeb8fd3.patch \
43 |     && git apply 8b9fb46ddbe99c2ddf3f1ed148c97435cbeb8fd3.patch \
44 |     && python3 setup.py install
45 | 
46 | RUN echo "root:root" | chpasswd \
47 |     && adduser --disabled-password --gecos "" "${USERNAME}" \
48 |     && echo "${USERNAME}:${USERNAME}" | chpasswd \
49 |     && echo "%${USERNAME}    ALL=(ALL)   NOPASSWD:    ALL" >> /etc/sudoers.d/${USERNAME} \
50 |     && chmod 0440 /etc/sudoers.d/${USERNAME}
51 | USER ${USERNAME}
52 | RUN sudo chown -R ${USERNAME}:${USERNAME} ${WORKDIR}
53 | WORKDIR ${WORKDIR}


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Yifu Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/MOT17-01-SDP.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT17-01-SDP.gif


--------------------------------------------------------------------------------
/assets/MOT17-07-SDP.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT17-07-SDP.gif


--------------------------------------------------------------------------------
/assets/MOT20-07.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT20-07.gif


--------------------------------------------------------------------------------
/assets/MOT20-08.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/MOT20-08.gif


--------------------------------------------------------------------------------
/assets/palace_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/palace_demo.gif


--------------------------------------------------------------------------------
/assets/sota.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/sota.png


--------------------------------------------------------------------------------
/assets/teasing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/assets/teasing.png


--------------------------------------------------------------------------------
/deploy/ONNXRuntime/README.md:
--------------------------------------------------------------------------------
 1 | ## ByteTrack-ONNXRuntime in Python
 2 | 
 3 | This doc introduces how to convert your pytorch model into onnx, and how to run an onnxruntime demo to verify your convertion.
 4 | 
 5 | ### Convert Your Model to ONNX
 6 | 
 7 | ```shell
 8 | cd <ByteTrack_HOME>
 9 | python3 tools/export_onnx.py --output-name bytetrack_s.onnx -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar
10 | ```
11 | 
12 | ### ONNXRuntime Demo
13 | 
14 | You can run onnx demo with **16 FPS** (96-core Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz):
15 | 
16 | ```shell
17 | cd <ByteTrack_HOME>/deploy/ONNXRuntime
18 | python3 onnx_inference.py
19 | ```
20 | 


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | 
 3 | project(bytetrack)
 4 | 
 5 | add_definitions(-std=c++11)
 6 | 
 7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
 8 | set(CMAKE_CXX_STANDARD 11)
 9 | set(CMAKE_BUILD_TYPE Debug)
10 | 
11 | find_package(CUDA REQUIRED)
12 | 
13 | include_directories(${PROJECT_SOURCE_DIR}/include)
14 | include_directories(/usr/local/include/eigen3)
15 | link_directories(${PROJECT_SOURCE_DIR}/include)
16 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
17 | # cuda
18 | include_directories(/usr/local/cuda/include)
19 | link_directories(/usr/local/cuda/lib64)
20 | # cudnn
21 | include_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include)
22 | link_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64)
23 | # tensorrt
24 | include_directories(/opt/tiger/demo/TensorRT-7.2.3.4/include)
25 | link_directories(/opt/tiger/demo/TensorRT-7.2.3.4/lib)
26 | 
27 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
28 | 
29 | find_package(OpenCV)
30 | include_directories(${OpenCV_INCLUDE_DIRS})
31 | 
32 | file(GLOB My_Source_Files ${PROJECT_SOURCE_DIR}/src/*.cpp)
33 | add_executable(bytetrack ${My_Source_Files})
34 | target_link_libraries(bytetrack nvinfer)
35 | target_link_libraries(bytetrack cudart)
36 | target_link_libraries(bytetrack ${OpenCV_LIBS})
37 | 
38 | add_definitions(-O2 -pthread)
39 | 
40 | 


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/README.md:
--------------------------------------------------------------------------------
 1 | # ByteTrack-TensorRT in C++
 2 | 
 3 | ## Installation
 4 | 
 5 | Install opencv with ```sudo apt-get install libopencv-dev``` (we don't need a higher version of opencv like v3.3+).
 6 | 
 7 | Install eigen-3.3.9 [[google]](https://drive.google.com/file/d/1rqO74CYCNrmRAg8Rra0JP3yZtJ-rfket/view?usp=sharing), [[baidu(code:ueq4)]](https://pan.baidu.com/s/15kEfCxpy-T7tz60msxxExg).
 8 | 
 9 | ```shell
10 | unzip eigen-3.3.9.zip
11 | cd eigen-3.3.9
12 | mkdir build
13 | cd build
14 | cmake ..
15 | sudo make install
16 | ```
17 | 
18 | ## Prepare serialized engine file
19 | 
20 | Follow the TensorRT Python demo to convert and save the serialized engine file.
21 | 
22 | Check the 'model_trt.engine' file, which will be automatically saved at the YOLOX_output dir.
23 | 
24 | ## Build the demo
25 | 
26 | You should set the TensorRT path and CUDA path in CMakeLists.txt.
27 | 
28 | For bytetrack_s model, we set the input frame size 1088 x 608. For bytetrack_m, bytetrack_l, bytetrack_x models, we set the input frame size 1440 x 800. You can modify the INPUT_W and INPUT_H in src/bytetrack.cpp
29 | 
30 | ```c++
31 | static const int INPUT_W = 1088;
32 | static const int INPUT_H = 608;
33 | ```
34 | 
35 | You can first build the demo:
36 | 
37 | ```shell
38 | cd <ByteTrack_HOME>/demo/TensorRT/cpp
39 | mkdir build
40 | cd build
41 | cmake ..
42 | make
43 | ```
44 | 
45 | Then you can run the demo with **200 FPS**:
46 | 
47 | ```shell
48 | ./bytetrack ../../../../YOLOX_outputs/yolox_s_mix_det/model_trt.engine -i ../../../../videos/palace.mp4
49 | ```
50 | 
51 | (If you find the output video lose some frames, you can convert the input video by running:
52 | 
53 | ```shell
54 | cd <ByteTrack_HOME>
55 | python3 tools/convert_video.py
56 | ```
57 | to generate an appropriate input video for TensorRT C++ demo. )
58 | 
59 | 


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/include/BYTETracker.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "STrack.h"
 4 | 
 5 | struct Object
 6 | {
 7 |     cv::Rect_<float> rect;
 8 |     int label;
 9 |     float prob;
10 | };
11 | 
12 | class BYTETracker
13 | {
14 | public:
15 | 	BYTETracker(int frame_rate = 30, int track_buffer = 30);
16 | 	~BYTETracker();
17 | 
18 | 	vector<STrack> update(const vector<Object>& objects);
19 | 	Scalar get_color(int idx);
20 | 
21 | private:
22 | 	vector<STrack*> joint_stracks(vector<STrack*> &tlista, vector<STrack> &tlistb);
23 | 	vector<STrack> joint_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
24 | 
25 | 	vector<STrack> sub_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
26 | 	void remove_duplicate_stracks(vector<STrack> &resa, vector<STrack> &resb, vector<STrack> &stracksa, vector<STrack> &stracksb);
27 | 
28 | 	void linear_assignment(vector<vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
29 | 		vector<vector<int> > &matches, vector<int> &unmatched_a, vector<int> &unmatched_b);
30 | 	vector<vector<float> > iou_distance(vector<STrack*> &atracks, vector<STrack> &btracks, int &dist_size, int &dist_size_size);
31 | 	vector<vector<float> > iou_distance(vector<STrack> &atracks, vector<STrack> &btracks);
32 | 	vector<vector<float> > ious(vector<vector<float> > &atlbrs, vector<vector<float> > &btlbrs);
33 | 
34 | 	double lapjv(const vector<vector<float> > &cost, vector<int> &rowsol, vector<int> &colsol, 
35 | 		bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true);
36 | 
37 | private:
38 | 
39 | 	float track_thresh;
40 | 	float high_thresh;
41 | 	float match_thresh;
42 | 	int frame_id;
43 | 	int max_time_lost;
44 | 
45 | 	vector<STrack> tracked_stracks;
46 | 	vector<STrack> lost_stracks;
47 | 	vector<STrack> removed_stracks;
48 | 	byte_kalman::KalmanFilter kalman_filter;
49 | };


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/include/STrack.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <opencv2/opencv.hpp>
 4 | #include "kalmanFilter.h"
 5 | 
 6 | using namespace cv;
 7 | using namespace std;
 8 | 
 9 | enum TrackState { New = 0, Tracked, Lost, Removed };
10 | 
11 | class STrack
12 | {
13 | public:
14 | 	STrack(vector<float> tlwh_, float score);
15 | 	~STrack();
16 | 
17 | 	vector<float> static tlbr_to_tlwh(vector<float> &tlbr);
18 | 	void static multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter);
19 | 	void static_tlwh();
20 | 	void static_tlbr();
21 | 	vector<float> tlwh_to_xyah(vector<float> tlwh_tmp);
22 | 	vector<float> to_xyah();
23 | 	void mark_lost();
24 | 	void mark_removed();
25 | 	int next_id();
26 | 	int end_frame();
27 | 	
28 | 	void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id);
29 | 	void re_activate(STrack &new_track, int frame_id, bool new_id = false);
30 | 	void update(STrack &new_track, int frame_id);
31 | 
32 | public:
33 | 	bool is_activated;
34 | 	int track_id;
35 | 	int state;
36 | 
37 | 	vector<float> _tlwh;
38 | 	vector<float> tlwh;
39 | 	vector<float> tlbr;
40 | 	int frame_id;
41 | 	int tracklet_len;
42 | 	int start_frame;
43 | 
44 | 	KAL_MEAN mean;
45 | 	KAL_COVA covariance;
46 | 	float score;
47 | 
48 | private:
49 | 	byte_kalman::KalmanFilter kalman_filter;
50 | };


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/include/dataType.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | #include <vector>
 5 | 
 6 | #include <Eigen/Core>
 7 | #include <Eigen/Dense>
 8 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> DETECTBOX;
 9 | typedef Eigen::Matrix<float, -1, 4, Eigen::RowMajor> DETECTBOXSS;
10 | typedef Eigen::Matrix<float, 1, 128, Eigen::RowMajor> FEATURE;
11 | typedef Eigen::Matrix<float, Eigen::Dynamic, 128, Eigen::RowMajor> FEATURESS;
12 | //typedef std::vector<FEATURE> FEATURESS;
13 | 
14 | //Kalmanfilter
15 | //typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_FILTER;
16 | typedef Eigen::Matrix<float, 1, 8, Eigen::RowMajor> KAL_MEAN;
17 | typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_COVA;
18 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> KAL_HMEAN;
19 | typedef Eigen::Matrix<float, 4, 4, Eigen::RowMajor> KAL_HCOVA;
20 | using KAL_DATA = std::pair<KAL_MEAN, KAL_COVA>;
21 | using KAL_HDATA = std::pair<KAL_HMEAN, KAL_HCOVA>;
22 | 
23 | //main
24 | using RESULT_DATA = std::pair<int, DETECTBOX>;
25 | 
26 | //tracker:
27 | using TRACKER_DATA = std::pair<int, FEATURESS>;
28 | using MATCH_DATA = std::pair<int, int>;
29 | typedef struct t {
30 | 	std::vector<MATCH_DATA> matches;
31 | 	std::vector<int> unmatched_tracks;
32 | 	std::vector<int> unmatched_detections;
33 | }TRACHER_MATCHD;
34 | 
35 | //linear_assignment:
36 | typedef Eigen::Matrix<float, -1, -1, Eigen::RowMajor> DYNAMICM;


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/include/kalmanFilter.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "dataType.h"
 4 | 
 5 | namespace byte_kalman
 6 | {
 7 | 	class KalmanFilter
 8 | 	{
 9 | 	public:
10 | 		static const double chi2inv95[10];
11 | 		KalmanFilter();
12 | 		KAL_DATA initiate(const DETECTBOX& measurement);
13 | 		void predict(KAL_MEAN& mean, KAL_COVA& covariance);
14 | 		KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance);
15 | 		KAL_DATA update(const KAL_MEAN& mean,
16 | 			const KAL_COVA& covariance,
17 | 			const DETECTBOX& measurement);
18 | 
19 | 		Eigen::Matrix<float, 1, -1> gating_distance(
20 | 			const KAL_MEAN& mean,
21 | 			const KAL_COVA& covariance,
22 | 			const std::vector<DETECTBOX>& measurements,
23 | 			bool only_position = false);
24 | 
25 | 	private:
26 | 		Eigen::Matrix<float, 8, 8, Eigen::RowMajor> _motion_mat;
27 | 		Eigen::Matrix<float, 4, 8, Eigen::RowMajor> _update_mat;
28 | 		float _std_weight_position;
29 | 		float _std_weight_velocity;
30 | 	};
31 | }


--------------------------------------------------------------------------------
/deploy/TensorRT/cpp/include/lapjv.h:
--------------------------------------------------------------------------------
 1 | #ifndef LAPJV_H
 2 | #define LAPJV_H
 3 | 
 4 | #define LARGE 1000000
 5 | 
 6 | #if !defined TRUE
 7 | #define TRUE 1
 8 | #endif
 9 | #if !defined FALSE
10 | #define FALSE 0
11 | #endif
12 | 
13 | #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; }
14 | #define FREE(x) if (x != 0) { free(x); x = 0; }
15 | #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; }
16 | 
17 | #if 0
18 | #include <assert.h>
19 | #define ASSERT(cond) assert(cond)
20 | #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__)
21 | #define PRINT_COST_ARRAY(a, n) \
22 |     while (1) { \
23 |         printf(#a" = ["); \
24 |         if ((n) > 0) { \
25 |             printf("%f", (a)[0]); \
26 |             for (uint_t j = 1; j < n; j++) { \
27 |                 printf(", %f", (a)[j]); \
28 |             } \
29 |         } \
30 |         printf("]\n"); \
31 |         break; \
32 |     }
33 | #define PRINT_INDEX_ARRAY(a, n) \
34 |     while (1) { \
35 |         printf(#a" = ["); \
36 |         if ((n) > 0) { \
37 |             printf("%d", (a)[0]); \
38 |             for (uint_t j = 1; j < n; j++) { \
39 |                 printf(", %d", (a)[j]); \
40 |             } \
41 |         } \
42 |         printf("]\n"); \
43 |         break; \
44 |     }
45 | #else
46 | #define ASSERT(cond)
47 | #define PRINTF(fmt, ...)
48 | #define PRINT_COST_ARRAY(a, n)
49 | #define PRINT_INDEX_ARRAY(a, n)
50 | #endif
51 | 
52 | 
53 | typedef signed int int_t;
54 | typedef unsigned int uint_t;
55 | typedef double cost_t;
56 | typedef char boolean;
57 | typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
58 | 
59 | extern int_t lapjv_internal(
60 | 	const uint_t n, cost_t *cost[],
61 | 	int_t *x, int_t *y);
62 | 
63 | #endif // LAPJV_H


--------------------------------------------------------------------------------
/deploy/TensorRT/python/README.md:
--------------------------------------------------------------------------------
 1 | # ByteTrack-TensorRT in Python
 2 | 
 3 | ## Install TensorRT Toolkit
 4 | Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) and [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt) to install TensorRT (Version 7 recommended) and torch2trt.
 5 | 
 6 | ## Convert model
 7 | 
 8 | You can convert the Pytorch model “bytetrack_s_mot17” to TensorRT model by running:
 9 | 
10 | ```shell
11 | cd <ByteTrack_HOME>
12 | python3 tools/trt.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar
13 | ```
14 | 
15 | ## Run TensorRT demo
16 | 
17 | You can use the converted model_trt.pth to run TensorRT demo with **130 FPS**:
18 | 
19 | ```shell
20 | cd <ByteTrack_HOME>
21 | python3 tools/demo_track.py video -f exps/example/mot/yolox_s_mix_det.py --trt --save_result
22 | ```
23 | 


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | macro(ncnn_add_example name)
 2 |     add_executable(${name} ${name}.cpp)
 3 |     if(OpenCV_FOUND)
 4 |         target_include_directories(${name} PRIVATE ${OpenCV_INCLUDE_DIRS})
 5 |         target_link_libraries(${name} PRIVATE ncnn ${OpenCV_LIBS})
 6 |     elseif(NCNN_SIMPLEOCV)
 7 |         target_compile_definitions(${name} PUBLIC USE_NCNN_SIMPLEOCV)
 8 |         target_link_libraries(${name} PRIVATE ncnn)
 9 |     endif()
10 | 
11 |     # add test to a virtual project group
12 |     set_property(TARGET ${name} PROPERTY FOLDER "examples")
13 | endmacro()
14 | 
15 | if(NCNN_PIXEL)
16 |     find_package(OpenCV QUIET COMPONENTS opencv_world)
17 |     # for opencv 2.4 on ubuntu 16.04, there is no opencv_world but OpenCV_FOUND will be TRUE
18 |     if("${OpenCV_LIBS}" STREQUAL "")
19 |         set(OpenCV_FOUND FALSE)
20 |     endif()
21 |     if(NOT OpenCV_FOUND)
22 |         find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio)
23 |     endif()
24 |     if(NOT OpenCV_FOUND)
25 |         find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
26 |     endif()
27 | 
28 |     if(OpenCV_FOUND OR NCNN_SIMPLEOCV)
29 |         if(OpenCV_FOUND)
30 |             message(STATUS "OpenCV library: ${OpenCV_INSTALL_PATH}")
31 |             message(STATUS "    version: ${OpenCV_VERSION}")
32 |             message(STATUS "    libraries: ${OpenCV_LIBS}")
33 |             message(STATUS "    include path: ${OpenCV_INCLUDE_DIRS}")
34 | 
35 |             if(${OpenCV_VERSION_MAJOR} GREATER 3)
36 |                 set(CMAKE_CXX_STANDARD 11)
37 |             endif()
38 |         endif()
39 | 
40 |         include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src)
41 |         include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src)
42 |         include_directories(include)
43 |         include_directories(/usr/local/include/eigen3)
44 | 
45 |         ncnn_add_example(squeezenet)
46 |         ncnn_add_example(squeezenet_c_api)
47 |         ncnn_add_example(fasterrcnn)
48 |         ncnn_add_example(rfcn)
49 |         ncnn_add_example(yolov2)
50 |         ncnn_add_example(yolov3)
51 |         if(OpenCV_FOUND)
52 |             ncnn_add_example(yolov4)
53 |         endif()
54 |         ncnn_add_example(yolov5)
55 |         ncnn_add_example(yolox)
56 |         ncnn_add_example(mobilenetv2ssdlite)
57 |         ncnn_add_example(mobilenetssd)
58 |         ncnn_add_example(squeezenetssd)
59 |         ncnn_add_example(shufflenetv2)
60 |         ncnn_add_example(peleenetssd_seg)
61 |         ncnn_add_example(simplepose)
62 |         ncnn_add_example(retinaface)
63 |         ncnn_add_example(yolact)
64 |         ncnn_add_example(nanodet)
65 |         ncnn_add_example(scrfd)
66 |         ncnn_add_example(scrfd_crowdhuman)
67 |         ncnn_add_example(rvm)
68 |         file(GLOB My_Source_Files src/*.cpp)
69 |         add_executable(bytetrack ${My_Source_Files})
70 |         if(OpenCV_FOUND)
71 |             target_include_directories(bytetrack PRIVATE ${OpenCV_INCLUDE_DIRS})
72 |             target_link_libraries(bytetrack PRIVATE ncnn ${OpenCV_LIBS})
73 |         elseif(NCNN_SIMPLEOCV)
74 |             target_compile_definitions(bytetrack PUBLIC USE_NCNN_SIMPLEOCV)
75 |             target_link_libraries(bytetrack PRIVATE ncnn)
76 |         endif()
77 |         # add test to a virtual project group
78 |         set_property(TARGET bytetrack PROPERTY FOLDER "examples")
79 |     else()
80 |         message(WARNING "OpenCV not found and NCNN_SIMPLEOCV disabled, examples won't be built")
81 |     endif()
82 | else()
83 |     message(WARNING "NCNN_PIXEL not enabled, examples won't be built")
84 | endif()
85 | 


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/README.md:
--------------------------------------------------------------------------------
  1 | # ByteTrack-CPP-ncnn
  2 | 
  3 | ## Installation
  4 | 
  5 | Clone [ncnn](https://github.com/Tencent/ncnn) first, then please following [build tutorial of ncnn](https://github.com/Tencent/ncnn/wiki/how-to-build) to build on your own device.
  6 | 
  7 | Install eigen-3.3.9 [[google]](https://drive.google.com/file/d/1rqO74CYCNrmRAg8Rra0JP3yZtJ-rfket/view?usp=sharing), [[baidu(code:ueq4)]](https://pan.baidu.com/s/15kEfCxpy-T7tz60msxxExg).
  8 | 
  9 | ```shell
 10 | unzip eigen-3.3.9.zip
 11 | cd eigen-3.3.9
 12 | mkdir build
 13 | cd build
 14 | cmake ..
 15 | sudo make install
 16 | ```
 17 | 
 18 | ## Generate onnx file
 19 | Use provided tools to generate onnx file.
 20 | For example, if you want to generate onnx file of bytetrack_s_mot17.pth, please run the following command:
 21 | ```shell
 22 | cd <ByteTrack_HOME>
 23 | python3 tools/export_onnx.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar
 24 | ```
 25 | Then, a bytetrack_s.onnx file is generated under <ByteTrack_HOME>.
 26 | 
 27 | ## Generate ncnn param and bin file
 28 | Put bytetrack_s.onnx under ncnn/build/tools/onnx and then run: 
 29 | 
 30 | ```shell
 31 | cd ncnn/build/tools/onnx
 32 | ./onnx2ncnn bytetrack_s.onnx bytetrack_s.param bytetrack_s.bin
 33 | ```
 34 | 
 35 | Since Focus module is not supported in ncnn. Warnings like:
 36 | ```shell
 37 | Unsupported slice step ! 
 38 | ```
 39 | will be printed. However, don't  worry!  C++ version of Focus layer is already implemented in src/bytetrack.cpp.
 40 |   
 41 | ## Modify param file
 42 | Open **bytetrack_s.param**, and modify it.
 43 | Before (just an example):
 44 | ```
 45 | 235 268
 46 | Input            images                   0 1 images
 47 | Split            splitncnn_input0         1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3
 48 | Crop             Slice_4                  1 1 images_splitncnn_3 467 -23309=1,0 -23310=1,2147483647 -23311=1,1
 49 | Crop             Slice_9                  1 1 467 472 -23309=1,0 -23310=1,2147483647 -23311=1,2
 50 | Crop             Slice_14                 1 1 images_splitncnn_2 477 -23309=1,0 -23310=1,2147483647 -23311=1,1
 51 | Crop             Slice_19                 1 1 477 482 -23309=1,1 -23310=1,2147483647 -23311=1,2
 52 | Crop             Slice_24                 1 1 images_splitncnn_1 487 -23309=1,1 -23310=1,2147483647 -23311=1,1
 53 | Crop             Slice_29                 1 1 487 492 -23309=1,0 -23310=1,2147483647 -23311=1,2
 54 | Crop             Slice_34                 1 1 images_splitncnn_0 497 -23309=1,1 -23310=1,2147483647 -23311=1,1
 55 | Crop             Slice_39                 1 1 497 502 -23309=1,1 -23310=1,2147483647 -23311=1,2
 56 | Concat           Concat_40                4 1 472 492 482 502 503 0=0
 57 | ...
 58 | ```
 59 | * Change first number for 235 to 235 - 9 = 226(since we will remove 10 layers and add 1 layers, total layers number should minus 9). 
 60 | * Then remove 10 lines of code from Split to Concat, but remember the last but 2nd number: 503.
 61 | * Add YoloV5Focus layer After Input (using previous number 503):
 62 | ```
 63 | YoloV5Focus      focus                    1 1 images 503
 64 | ```
 65 | After(just an exmaple):
 66 | ```
 67 | 226 328
 68 | Input            images                   0 1 images
 69 | YoloV5Focus      focus                    1 1 images 503
 70 | ...
 71 | ```
 72 | 
 73 | ## Use ncnn_optimize to generate new param and bin
 74 | ```shell
 75 | # suppose you are still under ncnn/build/tools/onnx dir.
 76 | ../ncnnoptimize bytetrack_s.param bytetrack_s.bin bytetrack_s_op.param bytetrack_s_op.bin 65536
 77 | ```
 78 | 
 79 | ## Copy files and build ByteTrack
 80 | Copy or move 'src', 'include' folders and 'CMakeLists.txt' file into ncnn/examples. Copy bytetrack_s_op.param, bytetrack_s_op.bin and <ByteTrack_HOME>/videos/palace.mp4 into ncnn/build/examples. Then, build ByteTrack:
 81 | 
 82 | ```shell
 83 | cd ncnn/build/examples
 84 | cmake ..
 85 | make
 86 | ```
 87 | 
 88 | ## Run the demo
 89 | You can run the ncnn demo with **5 FPS** (96-core Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz):
 90 | ```shell
 91 | ./bytetrack palace.mp4
 92 | ```
 93 | 
 94 | You can modify 'num_threads' to optimize the running speed in [bytetrack.cpp](https://github.com/ifzhang/ByteTrack/blob/2e9a67895da6b47b948015f6861bba0bacd4e72f/deploy/ncnn/cpp/src/bytetrack.cpp#L309) according to the number of your CPU cores:
 95 | 
 96 | ```
 97 | yolox.opt.num_threads = 20;
 98 | ```
 99 | 
100 | 
101 | ## Acknowledgement
102 | 
103 | * [ncnn](https://github.com/Tencent/ncnn)
104 | 


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/include/BYTETracker.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "STrack.h"
 4 | 
 5 | struct Object
 6 | {
 7 |     cv::Rect_<float> rect;
 8 |     int label;
 9 |     float prob;
10 | };
11 | 
12 | class BYTETracker
13 | {
14 | public:
15 | 	BYTETracker(int frame_rate = 30, int track_buffer = 30);
16 | 	~BYTETracker();
17 | 
18 | 	vector<STrack> update(const vector<Object>& objects);
19 | 	Scalar get_color(int idx);
20 | 
21 | private:
22 | 	vector<STrack*> joint_stracks(vector<STrack*> &tlista, vector<STrack> &tlistb);
23 | 	vector<STrack> joint_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
24 | 
25 | 	vector<STrack> sub_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
26 | 	void remove_duplicate_stracks(vector<STrack> &resa, vector<STrack> &resb, vector<STrack> &stracksa, vector<STrack> &stracksb);
27 | 
28 | 	void linear_assignment(vector<vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
29 | 		vector<vector<int> > &matches, vector<int> &unmatched_a, vector<int> &unmatched_b);
30 | 	vector<vector<float> > iou_distance(vector<STrack*> &atracks, vector<STrack> &btracks, int &dist_size, int &dist_size_size);
31 | 	vector<vector<float> > iou_distance(vector<STrack> &atracks, vector<STrack> &btracks);
32 | 	vector<vector<float> > ious(vector<vector<float> > &atlbrs, vector<vector<float> > &btlbrs);
33 | 
34 | 	double lapjv(const vector<vector<float> > &cost, vector<int> &rowsol, vector<int> &colsol, 
35 | 		bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true);
36 | 
37 | private:
38 | 
39 | 	float track_thresh;
40 | 	float high_thresh;
41 | 	float match_thresh;
42 | 	int frame_id;
43 | 	int max_time_lost;
44 | 
45 | 	vector<STrack> tracked_stracks;
46 | 	vector<STrack> lost_stracks;
47 | 	vector<STrack> removed_stracks;
48 | 	byte_kalman::KalmanFilter kalman_filter;
49 | };


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/include/STrack.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <opencv2/opencv.hpp>
 4 | #include "kalmanFilter.h"
 5 | 
 6 | using namespace cv;
 7 | using namespace std;
 8 | 
 9 | enum TrackState { New = 0, Tracked, Lost, Removed };
10 | 
11 | class STrack
12 | {
13 | public:
14 | 	STrack(vector<float> tlwh_, float score);
15 | 	~STrack();
16 | 
17 | 	vector<float> static tlbr_to_tlwh(vector<float> &tlbr);
18 | 	void static multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter);
19 | 	void static_tlwh();
20 | 	void static_tlbr();
21 | 	vector<float> tlwh_to_xyah(vector<float> tlwh_tmp);
22 | 	vector<float> to_xyah();
23 | 	void mark_lost();
24 | 	void mark_removed();
25 | 	int next_id();
26 | 	int end_frame();
27 | 	
28 | 	void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id);
29 | 	void re_activate(STrack &new_track, int frame_id, bool new_id = false);
30 | 	void update(STrack &new_track, int frame_id);
31 | 
32 | public:
33 | 	bool is_activated;
34 | 	int track_id;
35 | 	int state;
36 | 
37 | 	vector<float> _tlwh;
38 | 	vector<float> tlwh;
39 | 	vector<float> tlbr;
40 | 	int frame_id;
41 | 	int tracklet_len;
42 | 	int start_frame;
43 | 
44 | 	KAL_MEAN mean;
45 | 	KAL_COVA covariance;
46 | 	float score;
47 | 
48 | private:
49 | 	byte_kalman::KalmanFilter kalman_filter;
50 | };


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/include/dataType.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | #include <vector>
 5 | 
 6 | #include <Eigen/Core>
 7 | #include <Eigen/Dense>
 8 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> DETECTBOX;
 9 | typedef Eigen::Matrix<float, -1, 4, Eigen::RowMajor> DETECTBOXSS;
10 | typedef Eigen::Matrix<float, 1, 128, Eigen::RowMajor> FEATURE;
11 | typedef Eigen::Matrix<float, Eigen::Dynamic, 128, Eigen::RowMajor> FEATURESS;
12 | //typedef std::vector<FEATURE> FEATURESS;
13 | 
14 | //Kalmanfilter
15 | //typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_FILTER;
16 | typedef Eigen::Matrix<float, 1, 8, Eigen::RowMajor> KAL_MEAN;
17 | typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_COVA;
18 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> KAL_HMEAN;
19 | typedef Eigen::Matrix<float, 4, 4, Eigen::RowMajor> KAL_HCOVA;
20 | using KAL_DATA = std::pair<KAL_MEAN, KAL_COVA>;
21 | using KAL_HDATA = std::pair<KAL_HMEAN, KAL_HCOVA>;
22 | 
23 | //main
24 | using RESULT_DATA = std::pair<int, DETECTBOX>;
25 | 
26 | //tracker:
27 | using TRACKER_DATA = std::pair<int, FEATURESS>;
28 | using MATCH_DATA = std::pair<int, int>;
29 | typedef struct t {
30 | 	std::vector<MATCH_DATA> matches;
31 | 	std::vector<int> unmatched_tracks;
32 | 	std::vector<int> unmatched_detections;
33 | }TRACHER_MATCHD;
34 | 
35 | //linear_assignment:
36 | typedef Eigen::Matrix<float, -1, -1, Eigen::RowMajor> DYNAMICM;


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/include/kalmanFilter.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "dataType.h"
 4 | 
 5 | namespace byte_kalman
 6 | {
 7 | 	class KalmanFilter
 8 | 	{
 9 | 	public:
10 | 		static const double chi2inv95[10];
11 | 		KalmanFilter();
12 | 		KAL_DATA initiate(const DETECTBOX& measurement);
13 | 		void predict(KAL_MEAN& mean, KAL_COVA& covariance);
14 | 		KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance);
15 | 		KAL_DATA update(const KAL_MEAN& mean,
16 | 			const KAL_COVA& covariance,
17 | 			const DETECTBOX& measurement);
18 | 
19 | 		Eigen::Matrix<float, 1, -1> gating_distance(
20 | 			const KAL_MEAN& mean,
21 | 			const KAL_COVA& covariance,
22 | 			const std::vector<DETECTBOX>& measurements,
23 | 			bool only_position = false);
24 | 
25 | 	private:
26 | 		Eigen::Matrix<float, 8, 8, Eigen::RowMajor> _motion_mat;
27 | 		Eigen::Matrix<float, 4, 8, Eigen::RowMajor> _update_mat;
28 | 		float _std_weight_position;
29 | 		float _std_weight_velocity;
30 | 	};
31 | }


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/include/lapjv.h:
--------------------------------------------------------------------------------
 1 | #ifndef LAPJV_H
 2 | #define LAPJV_H
 3 | 
 4 | #define LARGE 1000000
 5 | 
 6 | #if !defined TRUE
 7 | #define TRUE 1
 8 | #endif
 9 | #if !defined FALSE
10 | #define FALSE 0
11 | #endif
12 | 
13 | #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; }
14 | #define FREE(x) if (x != 0) { free(x); x = 0; }
15 | #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; }
16 | 
17 | #if 0
18 | #include <assert.h>
19 | #define ASSERT(cond) assert(cond)
20 | #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__)
21 | #define PRINT_COST_ARRAY(a, n) \
22 |     while (1) { \
23 |         printf(#a" = ["); \
24 |         if ((n) > 0) { \
25 |             printf("%f", (a)[0]); \
26 |             for (uint_t j = 1; j < n; j++) { \
27 |                 printf(", %f", (a)[j]); \
28 |             } \
29 |         } \
30 |         printf("]\n"); \
31 |         break; \
32 |     }
33 | #define PRINT_INDEX_ARRAY(a, n) \
34 |     while (1) { \
35 |         printf(#a" = ["); \
36 |         if ((n) > 0) { \
37 |             printf("%d", (a)[0]); \
38 |             for (uint_t j = 1; j < n; j++) { \
39 |                 printf(", %d", (a)[j]); \
40 |             } \
41 |         } \
42 |         printf("]\n"); \
43 |         break; \
44 |     }
45 | #else
46 | #define ASSERT(cond)
47 | #define PRINTF(fmt, ...)
48 | #define PRINT_COST_ARRAY(a, n)
49 | #define PRINT_INDEX_ARRAY(a, n)
50 | #endif
51 | 
52 | 
53 | typedef signed int int_t;
54 | typedef unsigned int uint_t;
55 | typedef double cost_t;
56 | typedef char boolean;
57 | typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
58 | 
59 | extern int_t lapjv_internal(
60 | 	const uint_t n, cost_t *cost[],
61 | 	int_t *x, int_t *y);
62 | 
63 | #endif // LAPJV_H


--------------------------------------------------------------------------------
/deploy/ncnn/cpp/src/STrack.cpp:
--------------------------------------------------------------------------------
  1 | #include "STrack.h"
  2 | 
  3 | STrack::STrack(vector<float> tlwh_, float score)
  4 | {
  5 | 	_tlwh.resize(4);
  6 | 	_tlwh.assign(tlwh_.begin(), tlwh_.end());
  7 | 
  8 | 	is_activated = false;
  9 | 	track_id = 0;
 10 | 	state = TrackState::New;
 11 | 	
 12 | 	tlwh.resize(4);
 13 | 	tlbr.resize(4);
 14 | 
 15 | 	static_tlwh();
 16 | 	static_tlbr();
 17 | 	frame_id = 0;
 18 | 	tracklet_len = 0;
 19 | 	this->score = score;
 20 | 	start_frame = 0;
 21 | }
 22 | 
 23 | STrack::~STrack()
 24 | {
 25 | }
 26 | 
 27 | void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id)
 28 | {
 29 | 	this->kalman_filter = kalman_filter;
 30 | 	this->track_id = this->next_id();
 31 | 
 32 | 	vector<float> _tlwh_tmp(4);
 33 | 	_tlwh_tmp[0] = this->_tlwh[0];
 34 | 	_tlwh_tmp[1] = this->_tlwh[1];
 35 | 	_tlwh_tmp[2] = this->_tlwh[2];
 36 | 	_tlwh_tmp[3] = this->_tlwh[3];
 37 | 	vector<float> xyah = tlwh_to_xyah(_tlwh_tmp);
 38 | 	DETECTBOX xyah_box;
 39 | 	xyah_box[0] = xyah[0];
 40 | 	xyah_box[1] = xyah[1];
 41 | 	xyah_box[2] = xyah[2];
 42 | 	xyah_box[3] = xyah[3];
 43 | 	auto mc = this->kalman_filter.initiate(xyah_box);
 44 | 	this->mean = mc.first;
 45 | 	this->covariance = mc.second;
 46 | 
 47 | 	static_tlwh();
 48 | 	static_tlbr();
 49 | 
 50 | 	this->tracklet_len = 0;
 51 | 	this->state = TrackState::Tracked;
 52 | 	if (frame_id == 1)
 53 | 	{
 54 | 		this->is_activated = true;
 55 | 	}
 56 | 	//this->is_activated = true;
 57 | 	this->frame_id = frame_id;
 58 | 	this->start_frame = frame_id;
 59 | }
 60 | 
 61 | void STrack::re_activate(STrack &new_track, int frame_id, bool new_id)
 62 | {
 63 | 	vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
 64 | 	DETECTBOX xyah_box;
 65 | 	xyah_box[0] = xyah[0];
 66 | 	xyah_box[1] = xyah[1];
 67 | 	xyah_box[2] = xyah[2];
 68 | 	xyah_box[3] = xyah[3];
 69 | 	auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
 70 | 	this->mean = mc.first;
 71 | 	this->covariance = mc.second;
 72 | 
 73 | 	static_tlwh();
 74 | 	static_tlbr();
 75 | 
 76 | 	this->tracklet_len = 0;
 77 | 	this->state = TrackState::Tracked;
 78 | 	this->is_activated = true;
 79 | 	this->frame_id = frame_id;
 80 | 	this->score = new_track.score;
 81 | 	if (new_id)
 82 | 		this->track_id = next_id();
 83 | }
 84 | 
 85 | void STrack::update(STrack &new_track, int frame_id)
 86 | {
 87 | 	this->frame_id = frame_id;
 88 | 	this->tracklet_len++;
 89 | 
 90 | 	vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
 91 | 	DETECTBOX xyah_box;
 92 | 	xyah_box[0] = xyah[0];
 93 | 	xyah_box[1] = xyah[1];
 94 | 	xyah_box[2] = xyah[2];
 95 | 	xyah_box[3] = xyah[3];
 96 | 
 97 | 	auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
 98 | 	this->mean = mc.first;
 99 | 	this->covariance = mc.second;
100 | 
101 | 	static_tlwh();
102 | 	static_tlbr();
103 | 
104 | 	this->state = TrackState::Tracked;
105 | 	this->is_activated = true;
106 | 
107 | 	this->score = new_track.score;
108 | }
109 | 
110 | void STrack::static_tlwh()
111 | {
112 | 	if (this->state == TrackState::New)
113 | 	{
114 | 		tlwh[0] = _tlwh[0];
115 | 		tlwh[1] = _tlwh[1];
116 | 		tlwh[2] = _tlwh[2];
117 | 		tlwh[3] = _tlwh[3];
118 | 		return;
119 | 	}
120 | 
121 | 	tlwh[0] = mean[0];
122 | 	tlwh[1] = mean[1];
123 | 	tlwh[2] = mean[2];
124 | 	tlwh[3] = mean[3];
125 | 
126 | 	tlwh[2] *= tlwh[3];
127 | 	tlwh[0] -= tlwh[2] / 2;
128 | 	tlwh[1] -= tlwh[3] / 2;
129 | }
130 | 
131 | void STrack::static_tlbr()
132 | {
133 | 	tlbr.clear();
134 | 	tlbr.assign(tlwh.begin(), tlwh.end());
135 | 	tlbr[2] += tlbr[0];
136 | 	tlbr[3] += tlbr[1];
137 | }
138 | 
139 | vector<float> STrack::tlwh_to_xyah(vector<float> tlwh_tmp)
140 | {
141 | 	vector<float> tlwh_output = tlwh_tmp;
142 | 	tlwh_output[0] += tlwh_output[2] / 2;
143 | 	tlwh_output[1] += tlwh_output[3] / 2;
144 | 	tlwh_output[2] /= tlwh_output[3];
145 | 	return tlwh_output;
146 | }
147 | 
148 | vector<float> STrack::to_xyah()
149 | {
150 | 	return tlwh_to_xyah(tlwh);
151 | }
152 | 
153 | vector<float> STrack::tlbr_to_tlwh(vector<float> &tlbr)
154 | {
155 | 	tlbr[2] -= tlbr[0];
156 | 	tlbr[3] -= tlbr[1];
157 | 	return tlbr;
158 | }
159 | 
160 | void STrack::mark_lost()
161 | {
162 | 	state = TrackState::Lost;
163 | }
164 | 
165 | void STrack::mark_removed()
166 | {
167 | 	state = TrackState::Removed;
168 | }
169 | 
170 | int STrack::next_id()
171 | {
172 | 	static int _count = 0;
173 | 	_count++;
174 | 	return _count;
175 | }
176 | 
177 | int STrack::end_frame()
178 | {
179 | 	return this->frame_id;
180 | }
181 | 
182 | void STrack::multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter)
183 | {
184 | 	for (int i = 0; i < stracks.size(); i++)
185 | 	{
186 | 		if (stracks[i]->state != TrackState::Tracked)
187 | 		{
188 | 			stracks[i]->mean[7] = 0;
189 | 		}
190 | 		kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance);
191 | 	}
192 | }


--------------------------------------------------------------------------------
/exps/default/nano.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch.nn as nn
 7 | 
 8 | from yolox.exp import Exp as MyExp
 9 | 
10 | 
11 | class Exp(MyExp):
12 |     def __init__(self):
13 |         super(Exp, self).__init__()
14 |         self.depth = 0.33
15 |         self.width = 0.25
16 |         self.scale = (0.5, 1.5)
17 |         self.random_size = (10, 20)
18 |         self.test_size = (416, 416)
19 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20 |         self.enable_mixup = False
21 | 
22 |     def get_model(self, sublinear=False):
23 | 
24 |         def init_yolo(M):
25 |             for m in M.modules():
26 |                 if isinstance(m, nn.BatchNorm2d):
27 |                     m.eps = 1e-3
28 |                     m.momentum = 0.03
29 |         if "model" not in self.__dict__:
30 |             from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
31 |             in_channels = [256, 512, 1024]
32 |             # NANO model use depthwise = True, which is main difference.
33 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
34 |             head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
35 |             self.model = YOLOX(backbone, head)
36 | 
37 |         self.model.apply(init_yolo)
38 |         self.model.head.initialize_biases(1e-2)
39 |         return self.model
40 | 


--------------------------------------------------------------------------------
/exps/default/yolov3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | from yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 1.0
16 |         self.width = 1.0
17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
18 | 
19 |     def get_model(self, sublinear=False):
20 |         def init_yolo(M):
21 |             for m in M.modules():
22 |                 if isinstance(m, nn.BatchNorm2d):
23 |                     m.eps = 1e-3
24 |                     m.momentum = 0.03
25 |         if "model" not in self.__dict__:
26 |             from yolox.models import YOLOX, YOLOFPN, YOLOXHead
27 |             backbone = YOLOFPN()
28 |             head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
29 |             self.model = YOLOX(backbone, head)
30 |         self.model.apply(init_yolo)
31 |         self.model.head.initialize_biases(1e-2)
32 | 
33 |         return self.model
34 | 
35 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
36 |         from data.datasets.cocodataset import COCODataset
37 |         from data.datasets.mosaicdetection import MosaicDetection
38 |         from data.datasets.data_augment import TrainTransform
39 |         from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler
40 |         import torch.distributed as dist
41 | 
42 |         dataset = COCODataset(
43 |                 data_dir='data/COCO/',
44 |                 json_file=self.train_ann,
45 |                 img_size=self.input_size,
46 |                 preproc=TrainTransform(
47 |                     rgb_means=(0.485, 0.456, 0.406),
48 |                     std=(0.229, 0.224, 0.225),
49 |                     max_labels=50
50 |                 ),
51 |         )
52 | 
53 |         dataset = MosaicDetection(
54 |             dataset,
55 |             mosaic=not no_aug,
56 |             img_size=self.input_size,
57 |             preproc=TrainTransform(
58 |                 rgb_means=(0.485, 0.456, 0.406),
59 |                 std=(0.229, 0.224, 0.225),
60 |                 max_labels=120
61 |             ),
62 |             degrees=self.degrees,
63 |             translate=self.translate,
64 |             scale=self.scale,
65 |             shear=self.shear,
66 |             perspective=self.perspective,
67 |         )
68 | 
69 |         self.dataset = dataset
70 | 
71 |         if is_distributed:
72 |             batch_size = batch_size // dist.get_world_size()
73 |             sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
74 |         else:
75 |             sampler = torch.utils.data.RandomSampler(self.dataset)
76 | 
77 |         batch_sampler = YoloBatchSampler(
78 |             sampler=sampler,
79 |             batch_size=batch_size,
80 |             drop_last=False,
81 |             input_dimension=self.input_size,
82 |             mosaic=not no_aug
83 |         )
84 | 
85 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
86 |         dataloader_kwargs["batch_sampler"] = batch_sampler
87 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
88 | 
89 |         return train_loader
90 | 


--------------------------------------------------------------------------------
/exps/default/yolox_l.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.0
14 |         self.width = 1.0
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_m.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.67
14 |         self.width = 0.75
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.50
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_tiny.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.375
15 |         self.scale = (0.5, 1.5)
16 |         self.random_size = (10, 20)
17 |         self.test_size = (416, 416)
18 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
19 |         self.enable_mixup = False
20 | 


--------------------------------------------------------------------------------
/exps/default/yolox_x.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.33
14 |         self.width = 1.25
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # TODO: Update with exact module version
 2 | numpy
 3 | torch>=1.7
 4 | opencv_python
 5 | loguru
 6 | scikit-image
 7 | tqdm
 8 | torchvision>=0.10.0
 9 | Pillow
10 | thop
11 | ninja
12 | tabulate
13 | tensorboard
14 | lap
15 | motmetrics
16 | filterpy
17 | h5py
18 | 
19 | # verified versions
20 | onnx==1.8.1
21 | onnxruntime==1.8.0
22 | onnx-simplifier==0.3.5
23 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 100
 3 | multi_line_output = 3
 4 | balanced_wrapping = True
 5 | known_standard_library = setuptools
 6 | known_third_party = tqdm,loguru
 7 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib,scikit_image
 8 | known_datasets = pycocotools
 9 | known_deeplearning = torch,torchvision,caffe2,onnx,apex,timm,thop,torch2trt,tensorrt,openvino,onnxruntime
10 | known_myself = yolox
11 | sections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER
12 | no_lines_before=STDLIB,THIRDPARTY,datasets
13 | default_section = FIRSTPARTY
14 | 
15 | [flake8]
16 | max-line-length = 100
17 | max-complexity = 18
18 | exclude = __init__.py
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import re
 5 | import setuptools
 6 | import glob
 7 | from os import path
 8 | import torch
 9 | from torch.utils.cpp_extension import CppExtension
10 | 
11 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
12 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
13 | 
14 | 
15 | def get_extensions():
16 |     this_dir = path.dirname(path.abspath(__file__))
17 |     extensions_dir = path.join(this_dir, "yolox", "layers", "csrc")
18 | 
19 |     main_source = path.join(extensions_dir, "vision.cpp")
20 |     sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"))
21 | 
22 |     sources = [main_source] + sources
23 |     extension = CppExtension
24 | 
25 |     extra_compile_args = {"cxx": ["-O3"]}
26 |     define_macros = []
27 | 
28 |     include_dirs = [extensions_dir]
29 | 
30 |     ext_modules = [
31 |         extension(
32 |             "yolox._C",
33 |             sources,
34 |             include_dirs=include_dirs,
35 |             define_macros=define_macros,
36 |             extra_compile_args=extra_compile_args,
37 |         )
38 |     ]
39 | 
40 |     return ext_modules
41 | 
42 | 
43 | with open("yolox/__init__.py", "r") as f:
44 |     version = re.search(
45 |         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
46 |         f.read(), re.MULTILINE
47 |     ).group(1)
48 | 
49 | 
50 | with open("README.md", "r") as f:
51 |     long_description = f.read()
52 | 
53 | 
54 | setuptools.setup(
55 |     name="yolox",
56 |     version=version,
57 |     author="basedet team",
58 |     python_requires=">=3.6",
59 |     long_description=long_description,
60 |     ext_modules=get_extensions(),
61 |     classifiers=["Programming Language :: Python :: 3", "Operating System :: OS Independent"],
62 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
63 |     packages=setuptools.find_namespace_packages(),
64 | )
65 | 


--------------------------------------------------------------------------------
/tools/convert_cityperson_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | DATA_PATH = 'datasets/Cityscapes/'
 7 | DATA_FILE_PATH = 'datasets/data_path/citypersons.train'
 8 | OUT_PATH = DATA_PATH + 'annotations/'
 9 | 
10 | def load_paths(data_path):
11 |     with open(data_path, 'r') as file:
12 |         img_files = file.readlines()
13 |         img_files = [x.replace('\n', '') for x in img_files]
14 |         img_files = list(filter(lambda x: len(x) > 0, img_files))
15 |     label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files]
16 |     return img_files, label_files                    
17 | 
18 | if __name__ == '__main__':
19 |     if not os.path.exists(OUT_PATH):
20 |         os.mkdir(OUT_PATH)
21 | 
22 |     out_path = OUT_PATH + 'train.json'
23 |     out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
24 |     img_paths, label_paths = load_paths(DATA_FILE_PATH)
25 |     image_cnt = 0
26 |     ann_cnt = 0
27 |     video_cnt = 0
28 |     for img_path, label_path in zip(img_paths, label_paths):
29 |         image_cnt += 1
30 |         im = Image.open(os.path.join("datasets", img_path))
31 |         image_info = {'file_name': img_path, 
32 |                         'id': image_cnt,
33 |                         'height': im.size[1], 
34 |                         'width': im.size[0]}
35 |         out['images'].append(image_info)
36 |         # Load labels
37 |         if os.path.isfile(os.path.join("datasets", label_path)):
38 |             labels0 = np.loadtxt(os.path.join("datasets", label_path), dtype=np.float32).reshape(-1, 6)
39 |             # Normalized xywh to pixel xyxy format
40 |             labels = labels0.copy()
41 |             labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2)
42 |             labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2)
43 |             labels[:, 4] = image_info['width'] * labels0[:, 4]
44 |             labels[:, 5] = image_info['height'] * labels0[:, 5]
45 |         else:
46 |             labels = np.array([])
47 |         for i in range(len(labels)):
48 |             ann_cnt += 1
49 |             fbox = labels[i, 2:6].tolist()
50 |             ann = {'id': ann_cnt,
51 |                     'category_id': 1,
52 |                     'image_id': image_cnt,
53 |                     'track_id': -1,
54 |                     'bbox': fbox,
55 |                     'area': fbox[2] * fbox[3],
56 |                     'iscrowd': 0}
57 |             out['annotations'].append(ann)
58 |     print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations'])))
59 |     json.dump(out, open(out_path, 'w'))
60 | 


--------------------------------------------------------------------------------
/tools/convert_crowdhuman_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | DATA_PATH = 'datasets/crowdhuman/'
 7 | OUT_PATH = DATA_PATH + 'annotations/'
 8 | SPLITS = ['val', 'train']
 9 | DEBUG = False
10 | 
11 | def load_func(fpath):
12 |     print('fpath', fpath)
13 |     assert os.path.exists(fpath)
14 |     with open(fpath,'r') as fid:
15 |         lines = fid.readlines()
16 |     records =[json.loads(line.strip('\n')) for line in lines]
17 |     return records
18 | 
19 | if __name__ == '__main__':
20 |     if not os.path.exists(OUT_PATH):
21 |         os.mkdir(OUT_PATH)
22 |     for split in SPLITS:
23 |         data_path = DATA_PATH + split
24 |         out_path = OUT_PATH + '{}.json'.format(split)
25 |         out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
26 |         ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split)
27 |         anns_data = load_func(ann_path)
28 |         image_cnt = 0
29 |         ann_cnt = 0
30 |         video_cnt = 0
31 |         for ann_data in anns_data:
32 |             image_cnt += 1
33 |             file_path = DATA_PATH + 'CrowdHuman_{}/'.format(split) + '{}.jpg'.format(ann_data['ID'])
34 |             im = Image.open(file_path)
35 |             image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 
36 |                           'id': image_cnt,
37 |                           'height': im.size[1], 
38 |                           'width': im.size[0]}
39 |             out['images'].append(image_info)
40 |             if split != 'test':
41 |                 anns = ann_data['gtboxes']
42 |                 for i in range(len(anns)):
43 |                     ann_cnt += 1
44 |                     fbox = anns[i]['fbox']
45 |                     ann = {'id': ann_cnt,
46 |                          'category_id': 1,
47 |                          'image_id': image_cnt,
48 |                          'track_id': -1,
49 |                          'bbox_vis': anns[i]['vbox'],
50 |                          'bbox': fbox,
51 |                          'area': fbox[2] * fbox[3],
52 |                          'iscrowd': 1 if 'extra' in anns[i] and \
53 |                                          'ignore' in anns[i]['extra'] and \
54 |                                          anns[i]['extra']['ignore'] == 1 else 0}
55 |                     out['annotations'].append(ann)
56 |         print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
57 |         json.dump(out, open(out_path, 'w'))


--------------------------------------------------------------------------------
/tools/convert_ethz_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | DATA_PATH = 'datasets/ETHZ/'
 7 | DATA_FILE_PATH = 'datasets/data_path/eth.train'
 8 | OUT_PATH = DATA_PATH + 'annotations/'
 9 | 
10 | def load_paths(data_path):
11 |     with open(data_path, 'r') as file:
12 |         img_files = file.readlines()
13 |         img_files = [x.replace('\n', '') for x in img_files]
14 |         img_files = list(filter(lambda x: len(x) > 0, img_files))
15 |     label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files]
16 |     return img_files, label_files                    
17 | 
18 | if __name__ == '__main__':
19 |     if not os.path.exists(OUT_PATH):
20 |         os.mkdir(OUT_PATH)
21 | 
22 |     out_path = OUT_PATH + 'train.json'
23 |     out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
24 |     img_paths, label_paths = load_paths(DATA_FILE_PATH)
25 |     image_cnt = 0
26 |     ann_cnt = 0
27 |     video_cnt = 0
28 |     for img_path, label_path in zip(img_paths, label_paths):
29 |         image_cnt += 1
30 |         im = Image.open(os.path.join("datasets", img_path))
31 |         image_info = {'file_name': img_path, 
32 |                         'id': image_cnt,
33 |                         'height': im.size[1], 
34 |                         'width': im.size[0]}
35 |         out['images'].append(image_info)
36 |         # Load labels
37 |         if os.path.isfile(os.path.join("datasets", label_path)):
38 |             labels0 = np.loadtxt(os.path.join("datasets", label_path), dtype=np.float32).reshape(-1, 6)
39 |             # Normalized xywh to pixel xyxy format
40 |             labels = labels0.copy()
41 |             labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2)
42 |             labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2)
43 |             labels[:, 4] = image_info['width'] * labels0[:, 4]
44 |             labels[:, 5] = image_info['height'] * labels0[:, 5]
45 |         else:
46 |             labels = np.array([])
47 |         for i in range(len(labels)):
48 |             ann_cnt += 1
49 |             fbox = labels[i, 2:6].tolist()
50 |             ann = {'id': ann_cnt,
51 |                     'category_id': 1,
52 |                     'image_id': image_cnt,
53 |                     'track_id': -1,
54 |                     'bbox': fbox,
55 |                     'area': fbox[2] * fbox[3],
56 |                     'iscrowd': 0}
57 |             out['annotations'].append(ann)
58 |     print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations'])))
59 |     json.dump(out, open(out_path, 'w'))
60 | 


--------------------------------------------------------------------------------
/tools/convert_video.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | def convert_video(video_path):
 4 |     cap = cv2.VideoCapture(video_path)
 5 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
 6 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
 7 |     fps = cap.get(cv2.CAP_PROP_FPS)
 8 |     video_name = video_path.split('/')[-1].split('.')[0]
 9 |     save_name = video_name + '_converted'
10 |     save_path = video_path.replace(video_name, save_name)
11 |     vid_writer = cv2.VideoWriter(
12 |         save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
13 |     )
14 |     while True:
15 |         ret_val, frame = cap.read()
16 |         if ret_val:
17 |             vid_writer.write(frame)
18 |             ch = cv2.waitKey(1)
19 |             if ch == 27 or ch == ord("q") or ch == ord("Q"):
20 |                 break
21 |         else:
22 |             break
23 | 
24 | if __name__ == "__main__":
25 |     video_path = 'videos/palace.mp4'
26 |     convert_video(video_path)


--------------------------------------------------------------------------------
/tools/export_onnx.py:
--------------------------------------------------------------------------------
  1 | from loguru import logger
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | 
  6 | from yolox.exp import get_exp
  7 | from yolox.models.network_blocks import SiLU
  8 | from yolox.utils import replace_module
  9 | 
 10 | import argparse
 11 | import os
 12 | 
 13 | 
 14 | def make_parser():
 15 |     parser = argparse.ArgumentParser("YOLOX onnx deploy")
 16 |     parser.add_argument(
 17 |         "--output-name", type=str, default="bytetrack_s.onnx", help="output name of models"
 18 |     )
 19 |     parser.add_argument(
 20 |         "--input", default="images", type=str, help="input node name of onnx model"
 21 |     )
 22 |     parser.add_argument(
 23 |         "--output", default="output", type=str, help="output node name of onnx model"
 24 |     )
 25 |     parser.add_argument(
 26 |         "-o", "--opset", default=11, type=int, help="onnx opset version"
 27 |     )
 28 |     parser.add_argument("--no-onnxsim", action="store_true", help="use onnxsim or not")
 29 |     parser.add_argument(
 30 |         "-f",
 31 |         "--exp_file",
 32 |         default=None,
 33 |         type=str,
 34 |         help="expriment description file",
 35 |     )
 36 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
 37 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
 38 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path")
 39 |     parser.add_argument(
 40 |         "opts",
 41 |         help="Modify config options using the command-line",
 42 |         default=None,
 43 |         nargs=argparse.REMAINDER,
 44 |     )
 45 | 
 46 |     return parser
 47 | 
 48 | 
 49 | @logger.catch
 50 | def main():
 51 |     args = make_parser().parse_args()
 52 |     logger.info("args value: {}".format(args))
 53 |     exp = get_exp(args.exp_file, args.name)
 54 |     exp.merge(args.opts)
 55 | 
 56 |     if not args.experiment_name:
 57 |         args.experiment_name = exp.exp_name
 58 | 
 59 |     model = exp.get_model()
 60 |     if args.ckpt is None:
 61 |         file_name = os.path.join(exp.output_dir, args.experiment_name)
 62 |         ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
 63 |     else:
 64 |         ckpt_file = args.ckpt
 65 | 
 66 |     # load the model state dict
 67 |     ckpt = torch.load(ckpt_file, map_location="cpu")
 68 | 
 69 |     model.eval()
 70 |     if "model" in ckpt:
 71 |         ckpt = ckpt["model"]
 72 |     model.load_state_dict(ckpt)
 73 |     model = replace_module(model, nn.SiLU, SiLU)
 74 |     model.head.decode_in_inference = False
 75 | 
 76 |     logger.info("loading checkpoint done.")
 77 |     dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1])
 78 |     torch.onnx._export(
 79 |         model,
 80 |         dummy_input,
 81 |         args.output_name,
 82 |         input_names=[args.input],
 83 |         output_names=[args.output],
 84 |         opset_version=args.opset,
 85 |     )
 86 |     logger.info("generated onnx model named {}".format(args.output_name))
 87 | 
 88 |     if not args.no_onnxsim:
 89 |         import onnx
 90 | 
 91 |         from onnxsim import simplify
 92 | 
 93 |         # use onnxsimplify to reduce reduent model.
 94 |         onnx_model = onnx.load(args.output_name)
 95 |         model_simp, check = simplify(onnx_model)
 96 |         assert check, "Simplified ONNX model could not be validated"
 97 |         onnx.save(model_simp, args.output_name)
 98 |         logger.info("generated simplified onnx model named {}".format(args.output_name))
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/tools/mix_data_ablation.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | """
 6 | cd datasets
 7 | mkdir -p mix_mot_ch/annotations
 8 | cp mot/annotations/val_half.json mix_mot_ch/annotations/val_half.json
 9 | cp mot/annotations/test.json mix_mot_ch/annotations/test.json
10 | cd mix_mot_ch
11 | ln -s ../mot/train mot_train
12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
14 | cd ..
15 | """
16 | 
17 | mot_json = json.load(open('datasets/mot/annotations/train_half.json','r'))
18 | 
19 | img_list = list()
20 | for img in mot_json['images']:
21 |     img['file_name'] = 'mot_train/' + img['file_name']
22 |     img_list.append(img)
23 | 
24 | ann_list = list()
25 | for ann in mot_json['annotations']:
26 |     ann_list.append(ann)
27 | 
28 | video_list = mot_json['videos']
29 | category_list = mot_json['categories']
30 | 
31 | print('mot17')
32 | 
33 | max_img = 10000
34 | max_ann = 2000000
35 | max_video = 10
36 | 
37 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r'))
38 | img_id_count = 0
39 | for img in crowdhuman_json['images']:
40 |     img_id_count += 1
41 |     img['file_name'] = 'crowdhuman_train/' + img['file_name']
42 |     img['frame_id'] = img_id_count
43 |     img['prev_image_id'] = img['id'] + max_img
44 |     img['next_image_id'] = img['id'] + max_img
45 |     img['id'] = img['id'] + max_img
46 |     img['video_id'] = max_video
47 |     img_list.append(img)
48 |     
49 | for ann in crowdhuman_json['annotations']:
50 |     ann['id'] = ann['id'] + max_ann
51 |     ann['image_id'] = ann['image_id'] + max_img
52 |     ann_list.append(ann)
53 | 
54 | video_list.append({
55 |     'id': max_video,
56 |     'file_name': 'crowdhuman_train'
57 | })
58 | 
59 | print('crowdhuman_train')
60 | 
61 | max_img = 30000
62 | max_ann = 10000000
63 | 
64 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r'))
65 | img_id_count = 0
66 | for img in crowdhuman_val_json['images']:
67 |     img_id_count += 1
68 |     img['file_name'] = 'crowdhuman_val/' + img['file_name']
69 |     img['frame_id'] = img_id_count
70 |     img['prev_image_id'] = img['id'] + max_img
71 |     img['next_image_id'] = img['id'] + max_img
72 |     img['id'] = img['id'] + max_img
73 |     img['video_id'] = max_video
74 |     img_list.append(img)
75 |     
76 | for ann in crowdhuman_val_json['annotations']:
77 |     ann['id'] = ann['id'] + max_ann
78 |     ann['image_id'] = ann['image_id'] + max_img
79 |     ann_list.append(ann)
80 | 
81 | video_list.append({
82 |     'id': max_video,
83 |     'file_name': 'crowdhuman_val'
84 | })
85 | 
86 | print('crowdhuman_val')
87 | 
88 | mix_json = dict()
89 | mix_json['images'] = img_list
90 | mix_json['annotations'] = ann_list
91 | mix_json['videos'] = video_list
92 | mix_json['categories'] = category_list
93 | json.dump(mix_json, open('datasets/mix_mot_ch/annotations/train.json','w'))


--------------------------------------------------------------------------------
/tools/mix_data_test_mot17.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | 
  5 | """
  6 | cd datasets
  7 | mkdir -p mix_det/annotations
  8 | cp mot/annotations/val_half.json mix_det/annotations/val_half.json
  9 | cp mot/annotations/test.json mix_det/annotations/test.json
 10 | cd mix_det
 11 | ln -s ../mot/train mot_train
 12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
 13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
 14 | ln -s ../Cityscapes cp_train
 15 | ln -s ../ETHZ ethz_train
 16 | cd ..
 17 | """
 18 | 
 19 | mot_json = json.load(open('datasets/mot/annotations/train.json','r'))
 20 | 
 21 | img_list = list()
 22 | for img in mot_json['images']:
 23 |     img['file_name'] = 'mot_train/' + img['file_name']
 24 |     img_list.append(img)
 25 | 
 26 | ann_list = list()
 27 | for ann in mot_json['annotations']:
 28 |     ann_list.append(ann)
 29 | 
 30 | video_list = mot_json['videos']
 31 | category_list = mot_json['categories']
 32 | 
 33 | 
 34 | print('mot17')
 35 | 
 36 | max_img = 10000
 37 | max_ann = 2000000
 38 | max_video = 10
 39 | 
 40 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r'))
 41 | img_id_count = 0
 42 | for img in crowdhuman_json['images']:
 43 |     img_id_count += 1
 44 |     img['file_name'] = 'crowdhuman_train/' + img['file_name']
 45 |     img['frame_id'] = img_id_count
 46 |     img['prev_image_id'] = img['id'] + max_img
 47 |     img['next_image_id'] = img['id'] + max_img
 48 |     img['id'] = img['id'] + max_img
 49 |     img['video_id'] = max_video
 50 |     img_list.append(img)
 51 |     
 52 | for ann in crowdhuman_json['annotations']:
 53 |     ann['id'] = ann['id'] + max_ann
 54 |     ann['image_id'] = ann['image_id'] + max_img
 55 |     ann_list.append(ann)
 56 | 
 57 | print('crowdhuman_train')
 58 | 
 59 | video_list.append({
 60 |     'id': max_video,
 61 |     'file_name': 'crowdhuman_train'
 62 | })
 63 | 
 64 | 
 65 | max_img = 30000
 66 | max_ann = 10000000
 67 | 
 68 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r'))
 69 | img_id_count = 0
 70 | for img in crowdhuman_val_json['images']:
 71 |     img_id_count += 1
 72 |     img['file_name'] = 'crowdhuman_val/' + img['file_name']
 73 |     img['frame_id'] = img_id_count
 74 |     img['prev_image_id'] = img['id'] + max_img
 75 |     img['next_image_id'] = img['id'] + max_img
 76 |     img['id'] = img['id'] + max_img
 77 |     img['video_id'] = max_video
 78 |     img_list.append(img)
 79 |     
 80 | for ann in crowdhuman_val_json['annotations']:
 81 |     ann['id'] = ann['id'] + max_ann
 82 |     ann['image_id'] = ann['image_id'] + max_img
 83 |     ann_list.append(ann)
 84 | 
 85 | print('crowdhuman_val')
 86 | 
 87 | video_list.append({
 88 |     'id': max_video,
 89 |     'file_name': 'crowdhuman_val'
 90 | })
 91 | 
 92 | max_img = 40000
 93 | max_ann = 20000000
 94 | 
 95 | ethz_json = json.load(open('datasets/ETHZ/annotations/train.json','r'))
 96 | img_id_count = 0
 97 | for img in ethz_json['images']:
 98 |     img_id_count += 1
 99 |     img['file_name'] = 'ethz_train/' + img['file_name'][5:]
100 |     img['frame_id'] = img_id_count
101 |     img['prev_image_id'] = img['id'] + max_img
102 |     img['next_image_id'] = img['id'] + max_img
103 |     img['id'] = img['id'] + max_img
104 |     img['video_id'] = max_video
105 |     img_list.append(img)
106 |     
107 | for ann in ethz_json['annotations']:
108 |     ann['id'] = ann['id'] + max_ann
109 |     ann['image_id'] = ann['image_id'] + max_img
110 |     ann_list.append(ann)
111 | 
112 | print('ETHZ')
113 | 
114 | video_list.append({
115 |     'id': max_video,
116 |     'file_name': 'ethz'
117 | })
118 | 
119 | max_img = 50000
120 | max_ann = 25000000
121 | 
122 | cp_json = json.load(open('datasets/Cityscapes/annotations/train.json','r'))
123 | img_id_count = 0
124 | for img in cp_json['images']:
125 |     img_id_count += 1
126 |     img['file_name'] = 'cp_train/' + img['file_name'][11:]
127 |     img['frame_id'] = img_id_count
128 |     img['prev_image_id'] = img['id'] + max_img
129 |     img['next_image_id'] = img['id'] + max_img
130 |     img['id'] = img['id'] + max_img
131 |     img['video_id'] = max_video
132 |     img_list.append(img)
133 |     
134 | for ann in cp_json['annotations']:
135 |     ann['id'] = ann['id'] + max_ann
136 |     ann['image_id'] = ann['image_id'] + max_img
137 |     ann_list.append(ann)
138 | 
139 | print('Cityscapes')
140 | 
141 | video_list.append({
142 |     'id': max_video,
143 |     'file_name': 'cityperson'
144 | })
145 | 
146 | mix_json = dict()
147 | mix_json['images'] = img_list
148 | mix_json['annotations'] = ann_list
149 | mix_json['videos'] = video_list
150 | mix_json['categories'] = category_list
151 | json.dump(mix_json, open('datasets/mix_det/annotations/train.json','w'))
152 | 


--------------------------------------------------------------------------------
/tools/mix_data_test_mot20.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | """
 6 | cd datasets
 7 | mkdir -p mix_mot20_ch/annotations
 8 | cp MOT20/annotations/val_half.json mix_mot20_ch/annotations/val_half.json
 9 | cp MOT20/annotations/test.json mix_mot20_ch/annotations/test.json
10 | cd mix_mot20_ch
11 | ln -s ../MOT20/train mot20_train
12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
14 | cd ..
15 | """
16 | 
17 | mot_json = json.load(open('datasets/MOT20/annotations/train.json','r'))
18 | 
19 | img_list = list()
20 | for img in mot_json['images']:
21 |     img['file_name'] = 'mot20_train/' + img['file_name']
22 |     img_list.append(img)
23 | 
24 | ann_list = list()
25 | for ann in mot_json['annotations']:
26 |     ann_list.append(ann)
27 | 
28 | video_list = mot_json['videos']
29 | category_list = mot_json['categories']
30 | 
31 | 
32 | 
33 | 
34 | max_img = 10000
35 | max_ann = 2000000
36 | max_video = 10
37 | 
38 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r'))
39 | img_id_count = 0
40 | for img in crowdhuman_json['images']:
41 |     img_id_count += 1
42 |     img['file_name'] = 'crowdhuman_train/' + img['file_name']
43 |     img['frame_id'] = img_id_count
44 |     img['prev_image_id'] = img['id'] + max_img
45 |     img['next_image_id'] = img['id'] + max_img
46 |     img['id'] = img['id'] + max_img
47 |     img['video_id'] = max_video
48 |     img_list.append(img)
49 |     
50 | for ann in crowdhuman_json['annotations']:
51 |     ann['id'] = ann['id'] + max_ann
52 |     ann['image_id'] = ann['image_id'] + max_img
53 |     ann_list.append(ann)
54 | 
55 | video_list.append({
56 |     'id': max_video,
57 |     'file_name': 'crowdhuman_train'
58 | })
59 | 
60 | 
61 | max_img = 30000
62 | max_ann = 10000000
63 | 
64 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r'))
65 | img_id_count = 0
66 | for img in crowdhuman_val_json['images']:
67 |     img_id_count += 1
68 |     img['file_name'] = 'crowdhuman_val/' + img['file_name']
69 |     img['frame_id'] = img_id_count
70 |     img['prev_image_id'] = img['id'] + max_img
71 |     img['next_image_id'] = img['id'] + max_img
72 |     img['id'] = img['id'] + max_img
73 |     img['video_id'] = max_video
74 |     img_list.append(img)
75 |     
76 | for ann in crowdhuman_val_json['annotations']:
77 |     ann['id'] = ann['id'] + max_ann
78 |     ann['image_id'] = ann['image_id'] + max_img
79 |     ann_list.append(ann)
80 | 
81 | video_list.append({
82 |     'id': max_video,
83 |     'file_name': 'crowdhuman_val'
84 | })
85 | 
86 | mix_json = dict()
87 | mix_json['images'] = img_list
88 | mix_json['annotations'] = ann_list
89 | mix_json['videos'] = video_list
90 | mix_json['categories'] = category_list
91 | json.dump(mix_json, open('datasets/mix_mot20_ch/annotations/train.json','w'))


--------------------------------------------------------------------------------
/tools/mota.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | 
 3 | import torch
 4 | import torch.backends.cudnn as cudnn
 5 | from torch.nn.parallel import DistributedDataParallel as DDP
 6 | 
 7 | from yolox.core import launch
 8 | from yolox.exp import get_exp
 9 | from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger
10 | from yolox.evaluators import MOTEvaluator
11 | 
12 | import argparse
13 | import os
14 | import random
15 | import warnings
16 | import glob
17 | import motmetrics as mm
18 | from collections import OrderedDict
19 | from pathlib import Path
20 | 
21 | 
22 | def compare_dataframes(gts, ts):
23 |     accs = []
24 |     names = []
25 |     for k, tsacc in ts.items():
26 |         if k in gts:            
27 |             logger.info('Comparing {}...'.format(k))
28 |             accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5))
29 |             names.append(k)
30 |         else:
31 |             logger.warning('No ground truth for {}, skipping.'.format(k))
32 | 
33 |     return accs, names
34 | 
35 | 
36 | # evaluate MOTA
37 | results_folder = 'YOLOX_outputs/yolox_x_ablation/track_results'
38 | mm.lap.default_solver = 'lap'
39 | 
40 | gt_type = '_val_half'
41 | #gt_type = ''
42 | print('gt_type', gt_type)
43 | gtfiles = glob.glob(
44 |     os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type)))
45 | print('gt_files', gtfiles)
46 | tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')]
47 | 
48 | logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles)))
49 | logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers))
50 | logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver))
51 | logger.info('Loading files.')
52 | 
53 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles])
54 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=0.6)) for f in tsfiles])    
55 | 
56 | mh = mm.metrics.create()    
57 | accs, names = compare_dataframes(gt, ts)
58 | 
59 | logger.info('Running metrics')
60 | metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked',
61 |             'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses',
62 |             'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects']
63 | summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True)
64 | # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True)
65 | # print(mm.io.render_summary(
66 | #   summary, formatters=mh.formatters, 
67 | #   namemap=mm.io.motchallenge_metric_names))
68 | div_dict = {
69 |     'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'],
70 |     'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']}
71 | for divisor in div_dict:
72 |     for divided in div_dict[divisor]:
73 |         summary[divided] = (summary[divided] / summary[divisor])
74 | fmt = mh.formatters
75 | change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked',
76 |                     'partially_tracked', 'mostly_lost']
77 | for k in change_fmt_list:
78 |     fmt[k] = fmt['mota']
79 | print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names))
80 | 
81 | metrics = mm.metrics.motchallenge_metrics + ['num_objects']
82 | summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True)
83 | print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names))
84 | logger.info('Completed')


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | from loguru import logger
  2 | 
  3 | import torch
  4 | import torch.backends.cudnn as cudnn
  5 | 
  6 | from yolox.core import Trainer, launch
  7 | from yolox.exp import get_exp
  8 | 
  9 | import argparse
 10 | import random
 11 | import warnings
 12 | 
 13 | 
 14 | def make_parser():
 15 |     parser = argparse.ArgumentParser("YOLOX train parser")
 16 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
 17 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
 18 | 
 19 |     # distributed
 20 |     parser.add_argument(
 21 |         "--dist-backend", default="nccl", type=str, help="distributed backend"
 22 |     )
 23 |     parser.add_argument(
 24 |         "--dist-url",
 25 |         default=None,
 26 |         type=str,
 27 |         help="url used to set up distributed training",
 28 |     )
 29 |     parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
 30 |     parser.add_argument(
 31 |         "-d", "--devices", default=None, type=int, help="device for training"
 32 |     )
 33 |     parser.add_argument(
 34 |         "--local_rank", default=0, type=int, help="local rank for dist training"
 35 |     )
 36 |     parser.add_argument(
 37 |         "-f",
 38 |         "--exp_file",
 39 |         default=None,
 40 |         type=str,
 41 |         help="plz input your expriment description file",
 42 |     )
 43 |     parser.add_argument(
 44 |         "--resume", default=False, action="store_true", help="resume training"
 45 |     )
 46 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
 47 |     parser.add_argument(
 48 |         "-e",
 49 |         "--start_epoch",
 50 |         default=None,
 51 |         type=int,
 52 |         help="resume training start epoch",
 53 |     )
 54 |     parser.add_argument(
 55 |         "--num_machines", default=1, type=int, help="num of node for training"
 56 |     )
 57 |     parser.add_argument(
 58 |         "--machine_rank", default=0, type=int, help="node rank for multi-node training"
 59 |     )
 60 |     parser.add_argument(
 61 |         "--fp16",
 62 |         dest="fp16",
 63 |         default=True,
 64 |         action="store_true",
 65 |         help="Adopting mix precision training.",
 66 |     )
 67 |     parser.add_argument(
 68 |         "-o",
 69 |         "--occupy",
 70 |         dest="occupy",
 71 |         default=False,
 72 |         action="store_true",
 73 |         help="occupy GPU memory first for training.",
 74 |     )
 75 |     parser.add_argument(
 76 |         "opts",
 77 |         help="Modify config options using the command-line",
 78 |         default=None,
 79 |         nargs=argparse.REMAINDER,
 80 |     )
 81 |     return parser
 82 | 
 83 | 
 84 | @logger.catch
 85 | def main(exp, args):
 86 |     if exp.seed is not None:
 87 |         random.seed(exp.seed)
 88 |         torch.manual_seed(exp.seed)
 89 |         cudnn.deterministic = True
 90 |         warnings.warn(
 91 |             "You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
 92 |             "which can slow down your training considerably! You may see unexpected behavior "
 93 |             "when restarting from checkpoints."
 94 |         )
 95 | 
 96 |     # set environment variables for distributed training
 97 |     cudnn.benchmark = True
 98 | 
 99 |     trainer = Trainer(exp, args)
100 |     trainer.train()
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     args = make_parser().parse_args()
105 |     exp = get_exp(args.exp_file, args.name)
106 |     exp.merge(args.opts)
107 | 
108 |     if not args.experiment_name:
109 |         args.experiment_name = exp.exp_name
110 | 
111 |     num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
112 |     assert num_gpu <= torch.cuda.device_count()
113 | 
114 |     launch(
115 |         main,
116 |         num_gpu,
117 |         args.num_machines,
118 |         args.machine_rank,
119 |         backend=args.dist_backend,
120 |         dist_url=args.dist_url,
121 |         args=(exp, args),
122 |     )
123 | 


--------------------------------------------------------------------------------
/tools/trt.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | 
 3 | import tensorrt as trt
 4 | import torch
 5 | from torch2trt import torch2trt
 6 | 
 7 | from yolox.exp import get_exp
 8 | 
 9 | import argparse
10 | import os
11 | import shutil
12 | 
13 | 
14 | def make_parser():
15 |     parser = argparse.ArgumentParser("YOLOX ncnn deploy")
16 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
17 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
18 | 
19 |     parser.add_argument(
20 |         "-f",
21 |         "--exp_file",
22 |         default=None,
23 |         type=str,
24 |         help="pls input your expriment description file",
25 |     )
26 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path")
27 |     return parser
28 | 
29 | 
30 | @logger.catch
31 | def main():
32 |     args = make_parser().parse_args()
33 |     exp = get_exp(args.exp_file, args.name)
34 |     if not args.experiment_name:
35 |         args.experiment_name = exp.exp_name
36 | 
37 |     model = exp.get_model()
38 |     file_name = os.path.join(exp.output_dir, args.experiment_name)
39 |     os.makedirs(file_name, exist_ok=True)
40 |     if args.ckpt is None:
41 |         ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
42 |     else:
43 |         ckpt_file = args.ckpt
44 | 
45 |     ckpt = torch.load(ckpt_file, map_location="cpu")
46 |     # load the model state dict
47 | 
48 |     model.load_state_dict(ckpt["model"])
49 |     logger.info("loaded checkpoint done.")
50 |     model.eval()
51 |     model.cuda()
52 |     model.head.decode_in_inference = False
53 |     x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
54 |     model_trt = torch2trt(
55 |         model,
56 |         [x],
57 |         fp16_mode=True,
58 |         log_level=trt.Logger.INFO,
59 |         max_workspace_size=(1 << 32),
60 |     )
61 |     torch.save(model_trt.state_dict(), os.path.join(file_name, "model_trt.pth"))
62 |     logger.info("Converted TensorRT model done.")
63 |     engine_file = os.path.join(file_name, "model_trt.engine")
64 |     engine_file_demo = os.path.join("deploy", "TensorRT", "cpp", "model_trt.engine")
65 |     with open(engine_file, "wb") as f:
66 |         f.write(model_trt.engine.serialize())
67 | 
68 |     shutil.copyfile(engine_file, engine_file_demo)
69 | 
70 |     logger.info("Converted TensorRT model engine file is saved for C++ inference.")
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/tools_yolov5/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/models/__init__.py


--------------------------------------------------------------------------------
/tools_yolov5/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, C3, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/tools_yolov5/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/utils/__init__.py


--------------------------------------------------------------------------------
/tools_yolov5/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Activation functions
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ----------------------------------------------------------------------------
 9 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
10 |     @staticmethod
11 |     def forward(x):
12 |         return x * torch.sigmoid(x)
13 | 
14 | 
15 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
16 |     @staticmethod
17 |     def forward(x):
18 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
19 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
20 | 
21 | 
22 | class MemoryEfficientSwish(nn.Module):
23 |     class F(torch.autograd.Function):
24 |         @staticmethod
25 |         def forward(ctx, x):
26 |             ctx.save_for_backward(x)
27 |             return x * torch.sigmoid(x)
28 | 
29 |         @staticmethod
30 |         def backward(ctx, grad_output):
31 |             x = ctx.saved_tensors[0]
32 |             sx = torch.sigmoid(x)
33 |             return grad_output * (sx * (1 + x * (1 - sx)))
34 | 
35 |     def forward(self, x):
36 |         return self.F.apply(x)
37 | 
38 | 
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 |     @staticmethod
42 |     def forward(x):
43 |         return x * F.softplus(x).tanh()
44 | 
45 | 
46 | class MemoryEfficientMish(nn.Module):
47 |     class F(torch.autograd.Function):
48 |         @staticmethod
49 |         def forward(ctx, x):
50 |             ctx.save_for_backward(x)
51 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
52 | 
53 |         @staticmethod
54 |         def backward(ctx, grad_output):
55 |             x = ctx.saved_tensors[0]
56 |             sx = torch.sigmoid(x)
57 |             fx = F.softplus(x).tanh()
58 |             return grad_output * (fx + x * sx * (1 - fx * fx))
59 | 
60 |     def forward(self, x):
61 |         return self.F.apply(x)
62 | 
63 | 
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 |     def __init__(self, c1, k=3):  # ch_in, kernel
67 |         super().__init__()
68 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 |         self.bn = nn.BatchNorm2d(c1)
70 | 
71 |     def forward(self, x):
72 |         return torch.max(x, self.bn(self.conv(x)))
73 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific DockerIgnore -------------------------------------------------------------------------------------------
  2 | #.git
  3 | .cache
  4 | .idea
  5 | runs
  6 | output
  7 | coco
  8 | storage.googleapis.com
  9 | 
 10 | data/samples/*
 11 | **/results*.txt
 12 | *.jpg
 13 | 
 14 | # Neural Network weights -----------------------------------------------------------------------------------------------
 15 | **/*.weights
 16 | **/*.pt
 17 | **/*.pth
 18 | **/*.onnx
 19 | **/*.mlmodel
 20 | **/*.torchscript
 21 | 
 22 | 
 23 | # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 24 | # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 25 | 
 26 | 
 27 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 28 | # Byte-compiled / optimized / DLL files
 29 | __pycache__/
 30 | *.py[cod]
 31 | *$py.class
 32 | 
 33 | # C extensions
 34 | *.so
 35 | 
 36 | # Distribution / packaging
 37 | .Python
 38 | env/
 39 | build/
 40 | develop-eggs/
 41 | dist/
 42 | downloads/
 43 | eggs/
 44 | .eggs/
 45 | lib/
 46 | lib64/
 47 | parts/
 48 | sdist/
 49 | var/
 50 | wheels/
 51 | *.egg-info/
 52 | wandb/
 53 | .installed.cfg
 54 | *.egg
 55 | 
 56 | # PyInstaller
 57 | #  Usually these files are written by a python script from a template
 58 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 59 | *.manifest
 60 | *.spec
 61 | 
 62 | # Installer logs
 63 | pip-log.txt
 64 | pip-delete-this-directory.txt
 65 | 
 66 | # Unit test / coverage reports
 67 | htmlcov/
 68 | .tox/
 69 | .coverage
 70 | .coverage.*
 71 | .cache
 72 | nosetests.xml
 73 | coverage.xml
 74 | *.cover
 75 | .hypothesis/
 76 | 
 77 | # Translations
 78 | *.mo
 79 | *.pot
 80 | 
 81 | # Django stuff:
 82 | *.log
 83 | local_settings.py
 84 | 
 85 | # Flask stuff:
 86 | instance/
 87 | .webassets-cache
 88 | 
 89 | # Scrapy stuff:
 90 | .scrapy
 91 | 
 92 | # Sphinx documentation
 93 | docs/_build/
 94 | 
 95 | # PyBuilder
 96 | target/
 97 | 
 98 | # Jupyter Notebook
 99 | .ipynb_checkpoints
100 | 
101 | # pyenv
102 | .python-version
103 | 
104 | # celery beat schedule file
105 | celerybeat-schedule
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # dotenv
111 | .env
112 | 
113 | # virtualenv
114 | .venv*
115 | venv*/
116 | ENV*/
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | 
131 | 
132 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
133 | 
134 | # General
135 | .DS_Store
136 | .AppleDouble
137 | .LSOverride
138 | 
139 | # Icon must end with two \r
140 | Icon
141 | Icon?
142 | 
143 | # Thumbnails
144 | ._*
145 | 
146 | # Files that might appear in the root of a volume
147 | .DocumentRevisions-V100
148 | .fseventsd
149 | .Spotlight-V100
150 | .TemporaryItems
151 | .Trashes
152 | .VolumeIcon.icns
153 | .com.apple.timemachine.donotpresent
154 | 
155 | # Directories potentially created on remote AFP share
156 | .AppleDB
157 | .AppleDesktop
158 | Network Trash Folder
159 | Temporary Items
160 | .apdisk
161 | 
162 | 
163 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
164 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
165 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
166 | 
167 | # User-specific stuff:
168 | .idea/*
169 | .idea/**/workspace.xml
170 | .idea/**/tasks.xml
171 | .idea/dictionaries
172 | .html  # Bokeh Plots
173 | .pg  # TensorFlow Frozen Graphs
174 | .avi # videos
175 | 
176 | # Sensitive or high-churn files:
177 | .idea/**/dataSources/
178 | .idea/**/dataSources.ids
179 | .idea/**/dataSources.local.xml
180 | .idea/**/sqlDataSources.xml
181 | .idea/**/dynamic.xml
182 | .idea/**/uiDesigner.xml
183 | 
184 | # Gradle:
185 | .idea/**/gradle.xml
186 | .idea/**/libraries
187 | 
188 | # CMake
189 | cmake-build-debug/
190 | cmake-build-release/
191 | 
192 | # Mongo Explorer plugin:
193 | .idea/**/mongoSettings.xml
194 | 
195 | ## File-based project format:
196 | *.iws
197 | 
198 | ## Plugin-specific files:
199 | 
200 | # IntelliJ
201 | out/
202 | 
203 | # mpeltonen/sbt-idea plugin
204 | .idea_modules/
205 | 
206 | # JIRA plugin
207 | atlassian-ide-plugin.xml
208 | 
209 | # Cursive Clojure plugin
210 | .idea/replstate.xml
211 | 
212 | # Crashlytics plugin (for Android Studio and IntelliJ)
213 | com_crashlytics_export_strings.xml
214 | crashlytics.properties
215 | crashlytics-build.properties
216 | fabric.properties
217 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.gitattributes:
--------------------------------------------------------------------------------
1 | # this drop notebooks from GitHub language stats
2 | *.ipynb linguist-vendored
3 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "🐛 Bug report"
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you:
11 |  - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo
12 |  - **Common dataset**: coco.yaml or coco128.yaml
13 |  - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#environments
14 |  
15 | If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`.
16 | 
17 | 
18 | ## 🐛 Bug
19 | A clear and concise description of what the bug is.
20 | 
21 | 
22 | ## To Reproduce (REQUIRED)
23 | 
24 | Input:
25 | ```
26 | import torch
27 | 
28 | a = torch.tensor([5])
29 | c = a / 0
30 | ```
31 | 
32 | Output:
33 | ```
34 | Traceback (most recent call last):
35 |   File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
36 |     exec(code_obj, self.user_global_ns, self.user_ns)
37 |   File "<ipython-input-5-be04c762b799>", line 5, in <module>
38 |     c = a / 0
39 | RuntimeError: ZeroDivisionError
40 | ```
41 | 
42 | 
43 | ## Expected behavior
44 | A clear and concise description of what you expected to happen.
45 | 
46 | 
47 | ## Environment
48 | If applicable, add screenshots to help explain your problem.
49 | 
50 |  - OS: [e.g. Ubuntu]
51 |  - GPU [e.g. 2080 Ti]
52 | 
53 | 
54 | ## Additional context
55 | Add any other context about the problem here.
56 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "🚀 Feature request"
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 🚀 Feature
11 | <!-- A clear and concise description of the feature proposal -->
12 | 
13 | ## Motivation
14 | 
15 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
16 | 
17 | ## Pitch
18 | 
19 | <!-- A clear and concise description of what you want to happen. -->
20 | 
21 | ## Alternatives
22 | 
23 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
24 | 
25 | ## Additional context
26 | 
27 | <!-- Add any other context or screenshots about the feature request here. -->
28 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓Question"
 3 | about: Ask a general question
 4 | title: ''
 5 | labels: question
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## ❔Question
11 | 
12 | 
13 | ## Additional context
14 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: pip
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: weekly
 7 |     time: "04:00"
 8 |   open-pull-requests-limit: 10
 9 |   reviewers:
10 |   - glenn-jocher
11 |   labels:
12 |   - dependencies
13 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/workflows/ci-testing.yml:
--------------------------------------------------------------------------------
 1 | name: CI CPU testing
 2 | 
 3 | on:  # https://help.github.com/en/actions/reference/events-that-trigger-workflows
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     # The branches below must be a subset of the branches above
 8 |     branches: [ master ]
 9 |   schedule:
10 |     - cron: '0 0 * * *'  # Runs at 00:00 UTC every day
11 | 
12 | jobs:
13 |   cpu-tests:
14 | 
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         os: [ubuntu-latest, macos-latest, windows-latest]
20 |         python-version: [3.8]
21 |         model: ['yolov5s']  # models to test
22 | 
23 |     # Timeout: https://stackoverflow.com/a/59076067/4521646
24 |     timeout-minutes: 50
25 |     steps:
26 |       - uses: actions/checkout@v2
27 |       - name: Set up Python ${{ matrix.python-version }}
28 |         uses: actions/setup-python@v2
29 |         with:
30 |           python-version: ${{ matrix.python-version }}
31 | 
32 |       # Note: This uses an internal pip API and may not always work
33 |       # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
34 |       - name: Get pip cache
35 |         id: pip-cache
36 |         run: |
37 |           python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)"
38 | 
39 |       - name: Cache pip
40 |         uses: actions/cache@v1
41 |         with:
42 |           path: ${{ steps.pip-cache.outputs.dir }}
43 |           key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }}
44 |           restore-keys: |
45 |             ${{ runner.os }}-${{ matrix.python-version }}-pip-
46 | 
47 |       - name: Install dependencies
48 |         run: |
49 |           python -m pip install --upgrade pip
50 |           pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html
51 |           pip install -q onnx
52 |           python --version
53 |           pip --version
54 |           pip list
55 |         shell: bash
56 | 
57 |       - name: Download data
58 |         run: |
59 |           # curl -L -o tmp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
60 |           # unzip -q tmp.zip -d ../
61 |           # rm tmp.zip
62 | 
63 |       - name: Tests workflow
64 |         run: |
65 |           # export PYTHONPATH="$PWD"  # to run '$ python *.py' files in subdirectories
66 |           di=cpu # inference devices  # define device
67 | 
68 |           # train
69 |           python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
70 |           # detect
71 |           python detect.py --weights weights/${{ matrix.model }}.pt --device $di
72 |           python detect.py --weights runs/train/exp/weights/last.pt --device $di
73 |           # test
74 |           python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di
75 |           python test.py --img 256 --batch 8 --weights runs/train/exp/weights/last.pt --device $di
76 | 
77 |           python models/yolo.py --cfg models/${{ matrix.model }}.yaml  # inspect
78 |           python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt  # export
79 |         shell: bash
80 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # This action runs GitHub's industry-leading static analysis engine, CodeQL, against a repository's source code to find security vulnerabilities. 
 2 | # https://github.com/github/codeql-action
 3 | 
 4 | name: "CodeQL"
 5 | 
 6 | on:
 7 |   schedule:
 8 |     - cron: '0 0 1 * *'  # Runs at 00:00 UTC on the 1st of every month
 9 | 
10 | jobs:
11 |   analyze:
12 |     name: Analyze
13 |     runs-on: ubuntu-latest
14 | 
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         language: [ 'python' ]
19 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
20 |         # Learn more:
21 |         # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
22 | 
23 |     steps:
24 |     - name: Checkout repository
25 |       uses: actions/checkout@v2
26 | 
27 |     # Initializes the CodeQL tools for scanning.
28 |     - name: Initialize CodeQL
29 |       uses: github/codeql-action/init@v1
30 |       with:
31 |         languages: ${{ matrix.language }}
32 |         # If you wish to specify custom queries, you can do so here or in a config file.
33 |         # By default, queries listed here will override any specified in a config file.
34 |         # Prefix the list here with "+" to use these queries and those in the config file.
35 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
36 | 
37 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
38 |     # If this step fails, then you should remove it and run the build manually (see below)
39 |     - name: Autobuild
40 |       uses: github/codeql-action/autobuild@v1
41 | 
42 |     # ℹ️ Command-line programs to run using the OS shell.
43 |     # 📚 https://git.io/JvXDl
44 | 
45 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
46 |     #    and modify them (or add more) to build your code if your project
47 |     #    uses a compiled language
48 | 
49 |     #- run: |
50 |     #   make bootstrap
51 |     #   make release
52 | 
53 |     - name: Perform CodeQL Analysis
54 |       uses: github/codeql-action/analyze@v1
55 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request_target, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/first-interaction@v1
10 |         with:
11 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
12 |           pr-message: |
13 |             👋 Hello @${{ github.actor }}, thank you for submitting a 🚀 PR! To allow your work to be integrated as seamlessly as possible, we advise you to:
14 |             - ✅ Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch:
15 |             ```bash
16 |             git remote add upstream https://github.com/ultralytics/yolov5.git
17 |             git fetch upstream
18 |             git checkout feature  # <----- replace 'feature' with local branch name
19 |             git rebase upstream/master
20 |             git push -u origin -f
21 |             ```
22 |             - ✅ Verify all Continuous Integration (CI) **checks are passing**.
23 |             - ✅ Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_  -Bruce Lee
24 | 
25 |           issue-message: |
26 |             👋 Hello @${{ github.actor }}, thank you for your interest in 🚀 YOLOv5! Please visit our ⭐️ [Tutorials](https://github.com/ultralytics/yolov5/wiki#tutorials) to get started, where you can find quickstart guides for simple tasks like [Custom Data Training](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) all the way to advanced concepts like [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607).
27 | 
28 |             If this is a 🐛 Bug Report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you.
29 | 
30 |             If this is a custom training ❓ Question, please provide as much information as possible, including dataset images, training logs, screenshots, and a public link to online [W&B logging](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data#visualize) if available.
31 | 
32 |             For business inquiries or professional support requests please visit https://www.ultralytics.com or email Glenn Jocher at glenn.jocher@ultralytics.com.
33 | 
34 |             ## Requirements
35 | 
36 |             Python 3.8 or later with all [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) dependencies installed, including `torch>=1.7`. To install run:
37 |             ```bash
38 |             $ pip install -r requirements.txt
39 |             ```
40 | 
41 |             ## Environments
42 |             
43 |             YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
44 |             
45 |             - **Google Colab Notebook** with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
46 |             - **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5)
47 |             - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) 
48 |             - **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker)
49 |             
50 |             ## Status
51 |             
52 |             ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)
53 |             
54 |             If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([test.py](https://github.com/ultralytics/yolov5/blob/master/test.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/models/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.
55 |             
56 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/workflows/rebase.yml:
--------------------------------------------------------------------------------
 1 | name: Automatic Rebase
 2 | # https://github.com/marketplace/actions/automatic-rebase
 3 | 
 4 | on:
 5 |   issue_comment:
 6 |     types: [created]
 7 | 
 8 | jobs:
 9 |   rebase:
10 |     name: Rebase
11 |     if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase')
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout the latest code
15 |         uses: actions/checkout@v2
16 |         with:
17 |           fetch-depth: 0
18 |       - name: Automatic Rebase
19 |         uses: cirrus-actions/rebase@1.3.1
20 |         env:
21 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Close stale issues
 2 | on:
 3 |   schedule:
 4 |     - cron: "0 0 * * *"
 5 | 
 6 | jobs:
 7 |   stale:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/stale@v1
11 |         with:
12 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
13 |           stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
14 |           stale-pr-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
15 |           days-before-stale: 30
16 |           days-before-close: 5
17 |           exempt-issue-labels: 'documentation,tutorial'
18 |           operations-per-run: 100  # The maximum number of operations per run, used to control rate limiting.
19 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | '''
3 | @author: mengting gu
4 | @contact: 1065504814@qq.com
5 | @time: 2021/11/11 下午5:52
6 | @file: __init__.py.py
7 | @desc: 
8 | '''
9 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/yolov5_v4/models/__init__.py


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, C3, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip install -r requirements.txt
 2 | 
 3 | # base ----------------------------------------
 4 | Cython
 5 | matplotlib>=3.2.2
 6 | numpy>=1.18.5
 7 | opencv-python>=4.1.2
 8 | Pillow
 9 | PyYAML>=5.3
10 | scipy>=1.4.1
11 | tensorboard>=2.2
12 | torch>=1.7.0
13 | torchvision>=0.8.1
14 | tqdm>=4.41.0
15 | 
16 | # logging -------------------------------------
17 | # wandb
18 | 
19 | # plotting ------------------------------------
20 | seaborn>=0.11.0
21 | pandas
22 | 
23 | # export --------------------------------------
24 | # coremltools==4.0
25 | # onnx>=1.8.0
26 | # scikit-learn==0.19.2  # for coreml quantization
27 | 
28 | # extras --------------------------------------
29 | thop  # FLOPS computation
30 | pycocotools>=2.0  # COCO mAP
31 | 


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/tools_yolov5/yolov5_v4/utils/__init__.py


--------------------------------------------------------------------------------
/tools_yolov5/yolov5_v4/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Activation functions
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ----------------------------------------------------------------------------
 9 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
10 |     @staticmethod
11 |     def forward(x):
12 |         return x * torch.sigmoid(x)
13 | 
14 | 
15 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
16 |     @staticmethod
17 |     def forward(x):
18 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
19 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
20 | 
21 | 
22 | class MemoryEfficientSwish(nn.Module):
23 |     class F(torch.autograd.Function):
24 |         @staticmethod
25 |         def forward(ctx, x):
26 |             ctx.save_for_backward(x)
27 |             return x * torch.sigmoid(x)
28 | 
29 |         @staticmethod
30 |         def backward(ctx, grad_output):
31 |             x = ctx.saved_tensors[0]
32 |             sx = torch.sigmoid(x)
33 |             return grad_output * (sx * (1 + x * (1 - sx)))
34 | 
35 |     def forward(self, x):
36 |         return self.F.apply(x)
37 | 
38 | 
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 |     @staticmethod
42 |     def forward(x):
43 |         return x * F.softplus(x).tanh()
44 | 
45 | 
46 | class MemoryEfficientMish(nn.Module):
47 |     class F(torch.autograd.Function):
48 |         @staticmethod
49 |         def forward(ctx, x):
50 |             ctx.save_for_backward(x)
51 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
52 | 
53 |         @staticmethod
54 |         def backward(ctx, grad_output):
55 |             x = ctx.saved_tensors[0]
56 |             sx = torch.sigmoid(x)
57 |             fx = F.softplus(x).tanh()
58 |             return grad_output * (fx + x * sx * (1 - fx * fx))
59 | 
60 |     def forward(self, x):
61 |         return self.F.apply(x)
62 | 
63 | 
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 |     def __init__(self, c1, k=3):  # ch_in, kernel
67 |         super().__init__()
68 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 |         self.bn = nn.BatchNorm2d(c1)
70 | 
71 |     def forward(self, x):
72 |         return torch.max(x, self.bn(self.conv(x)))
73 | 


--------------------------------------------------------------------------------
/tutorials/centertrack/README.md:
--------------------------------------------------------------------------------
 1 | # CenterTrack
 2 | 
 3 | Step1.  git clone https://github.com/xingyizhou/CenterTrack.git
 4 | 
 5 | 
 6 | Step2. 
 7 | 
 8 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py
 9 | 
10 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/opts.py
11 | 
12 | 
13 | Step3. run
14 | ```
15 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal  --load_model ../models/mot17_half.pth --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.2 --pre_thresh 0.5
16 | ```
17 | 
18 | 
19 | # CenterTrack_BYTE
20 | 
21 | Step1.  git clone https://github.com/xingyizhou/CenterTrack.git
22 | 
23 | 
24 | Step2. 
25 | 
26 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py by byte_tracker.py
27 | 
28 | replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/opts.py
29 | 
30 | add mot_online to https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils
31 | 
32 | Step3. run
33 | ```
34 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal  --load_model ../models/mot17_half.pth --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.2 --pre_thresh 0.5
35 | ```
36 | 
37 | 
38 | ## Notes
39 | tracker.py: only motion
40 | 
41 | byte_tracker.py: motion with kalman filter
42 | 
43 | 


--------------------------------------------------------------------------------
/tutorials/centertrack/mot_online/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/tutorials/cstrack/README.md:
--------------------------------------------------------------------------------
 1 | # CSTrack
 2 | 
 3 | Step1.  git clone -b MOT https://github.com/JudasDie/SOTS.git
 4 | 
 5 | 
 6 | Step2. replace https://github.com/JudasDie/SOTS/blob/master/lib/tracker/cstrack.py
 7 | 
 8 | 
 9 | Step3. download cstrack model trained on MIX and MOT17_half (mix_mot17_half_cstrack.pt): [google](https://drive.google.com/file/d/1OG5PDj_CYmMiw3dN6pZ0FsgqY__CIDx1/view?usp=sharing), [baidu(code:0bsu)](https://pan.baidu.com/s/1Z2VnE-OhZIPmgX6-4r9Z1Q) and put it under SOTS/weights
10 | 
11 | 
12 | Step4. run BYTE tracker example:
13 | ```
14 | cd tracking
15 | python3 test_cstrack.py --val_mot17 True --val_hf 2 --weights ../weights/mix_mot17_half_cstrack.pt --conf_thres 0.6 --data_cfg ../src/lib/cfg/mot17_hf.json --data_dir your/data/path
16 | ```
17 | 
18 | 
19 | ## Notes
20 | byte_tracker: only motion
21 | 
22 | tracker: motion + reid
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/tutorials/ctracker/README.md:
--------------------------------------------------------------------------------
 1 | # CTracker
 2 | 
 3 | #### Step1  
 4 | git clone https://github.com/pjl1995/CTracker.git and preapare dataset
 5 | 
 6 | 
 7 | #### Step2
 8 | 
 9 | add generate_half_csv.py to https://github.com/pjl1995/CTracker
10 | 
11 | run generate_half_csv.py and put train_half_annots.csv in MOT17
12 | 
13 | run
14 | ```
15 | python3 train.py --root_path MOT17 --csv_train train_half_annots.csv --model_dir ctracker/ --depth 50 --epochs 50
16 | ```
17 | You can also download the CTracker model trained by us: [google](https://drive.google.com/file/d/1TwBDomJx8pxD-e96mGIiTduLenUvmf1t/view?usp=sharing), [baidu(code:6p3w)](https://pan.baidu.com/s/1MaCvnHynX2Wzg81hWkqzeg)
18 | 
19 | #### Step3 
20 | 
21 | replace https://github.com/pjl1995/CTracker/blob/master/test.py
22 | 
23 | run
24 | ```
25 | python3 test.py --dataset_path MOT17 --model_dir ctracker --model_path ctracker/mot17_half_ctracker.pt
26 | ```
27 | 
28 | #### Step4
29 | 
30 | add eval_motchallenge.py to https://github.com/pjl1995/CTracker
31 | 
32 | prepare gt_half_val.txt as CenterTrack [DATA.md](https://github.com/xingyizhou/CenterTrack/blob/master/readme/DATA.md)
33 | 
34 | 
35 | #### Step5
36 | 
37 | run
38 | ```
39 | python3 eval_motchallenge.py --groundtruths MOT17/train --tests ctracker/results --gt_type half_val --eval_official  --score_threshold -1
40 | ```
41 | 
42 | 
43 | 
44 | # CTracker_BYTE
45 | 
46 | #### Step3 
47 | 
48 | add mot_online to https://github.com/pjl1995/CTracker
49 | 
50 | add byte_tracker.py to https://github.com/pjl1995/CTracker
51 | 
52 | add test_byte.py to https://github.com/pjl1995/CTracker
53 | 
54 | run
55 | ```
56 | python3 test_byte.py --dataset_path MOT17 --model_dir ctracker --model_path ctracker/mot17_half_ctracker.pt
57 | ```
58 | 
59 | 
60 | #### Step5 
61 | 
62 | run
63 | ```
64 | python3 eval_motchallenge.py --groundtruths MOT17/train --tests ctracker/results --gt_type half_val --eval_official  --score_threshold -1
65 | ```
66 | 


--------------------------------------------------------------------------------
/tutorials/ctracker/generate_half_csv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | prefix_dir = 'MOT17/'
 4 | root_dir = 'train/'
 5 | result_csv = 'train_half_annots.csv'
 6 | train_half_set = {2: 301, 4: 526, 5:419, 9:263, 10:328, 11:451, 13:376}
 7 | fout = open(result_csv, 'w')
 8 | 
 9 | for data_name in sorted(os.listdir(prefix_dir + root_dir)):
10 | 	print(data_name)
11 | 	gt_path = os.path.join(prefix_dir, root_dir, data_name, 'gt', 'gt.txt')
12 | 	# print(gt_path)
13 | 	data_raw = np.loadtxt(gt_path, delimiter=',', dtype='float', usecols=(0,1,2,3,4,5,6,7,8))
14 | 
15 | 	data_sort = data_raw[np.lexsort(data_raw[:,::-1].T)]
16 | 	visible_raw = data_sort[:,8]
17 | 	# print(data_sort)
18 | 	# print(data_sort[-1, 0])
19 | 	img_num = data_sort[-1, 0]
20 | 
21 | 	# print(data_sort.shape[0])
22 | 	box_num = data_sort.shape[0]
23 | 
24 | 	person_box_num = np.sum(data_sort[:,6] == 1)
25 | 	# print(person_box_num)
26 | # 	import ipdb; ipdb.set_trace()
27 | 	for i in range(box_num):
28 | 		c = int(data_sort[i, 6])
29 | 		v = visible_raw[i]
30 | 		img_index = int(data_sort[i, 0])
31 | 		if c == 1 and v > 0.1 and img_index < train_half_set[int(data_name[-2:])]:
32 | 			img_index = int(data_sort[i, 0])
33 | 			img_name = data_name + '/img1/' + str(img_index).zfill(6) + '.jpg'
34 | 			print(root_dir + img_name + ', ' + str(int(data_sort[i, 1])) + ', ' + str(data_sort[i, 2]) + ', ' + str(data_sort[i, 3]) + ', ' + str(data_sort[i, 2] + data_sort[i, 4]) + ', ' + str(data_sort[i, 3] + data_sort[i, 5]) + ', person\n')
35 | 			fout.write(root_dir + img_name + ', ' + str(int(data_sort[i, 1])) + ', ' + str(data_sort[i, 2]) + ', ' + str(data_sort[i, 3]) + ', ' + str(data_sort[i, 2] + data_sort[i, 4]) + ', ' + str(data_sort[i, 3] + data_sort[i, 5]) + ', person\n')
36 | 
37 | fout.close()
38 | 


--------------------------------------------------------------------------------
/tutorials/ctracker/mot_online/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/tutorials/fairmot/README.md:
--------------------------------------------------------------------------------
 1 | # FairMOT
 2 | 
 3 | Step1.  git clone https://github.com/ifzhang/FairMOT.git
 4 | 
 5 | 
 6 | Step2. replace https://github.com/ifzhang/FairMOT/blob/master/src/lib/tracker/multitracker.py
 7 | 
 8 | 
 9 | Step3. run motion + reid tracker using tracker.py (set --match_thres 0.4), run BYTE tracker using byte_tracker.py (set --match_thres 0.8)
10 | 
11 | run BYTE tracker example: 
12 | ```
13 | python3 track_half.py mot --load_model ../exp/mot/mot17_half_dla34/model_last.pth --match_thres 0.8
14 | ```
15 | 
16 | 
17 | ## Notes
18 | byte_tracker: only motion
19 | 
20 | tracker: motion + reid
21 | 


--------------------------------------------------------------------------------
/tutorials/jde/README.md:
--------------------------------------------------------------------------------
 1 | # JDE
 2 | 
 3 | Step1.  git clone https://github.com/Zhongdao/Towards-Realtime-MOT.git
 4 | 
 5 | 
 6 | Step2. replace https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py and https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/utils/evaluation.py
 7 | 
 8 | Step3. download JDE model trained on MIX and MOT17_half (mix_mot17_half_jde.pt): [google](https://drive.google.com/file/d/1jUiIbaHFf75Jq6thOGI3CPygMMBy6850/view?usp=sharing), [baidu(code:ccdd)](https://pan.baidu.com/s/10se81ZktkUDUWn2dZzkk_Q)
 9 | 
10 | Step4. put track_half.py under https://github.com/Zhongdao/Towards-Realtime-MOT and run:
11 | ```
12 | python3 track_half.py --cfg ./cfg/yolov3_1088x608.cfg --weights weights/mix_mot17_half_jde.pt
13 | ```
14 | 
15 | 
16 | ## Notes
17 | byte_tracker: only motion
18 | 
19 | tracker: motion + reid
20 | 


--------------------------------------------------------------------------------
/tutorials/jde/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | from utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 | 
 57 |         # get distance matrix
 58 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 59 | 
 60 |         # acc
 61 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 62 | 
 63 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 64 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 65 |         else:
 66 |             events = None
 67 |         return events
 68 | 
 69 |     def eval_file(self, filename):
 70 |         self.reset_accumulator()
 71 | 
 72 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 73 |         #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 74 |         frames = sorted(list(set(result_frame_dict.keys())))
 75 |         for frame_id in frames:
 76 |             trk_objs = result_frame_dict.get(frame_id, [])
 77 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 78 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 79 | 
 80 |         return self.acc
 81 | 
 82 |     @staticmethod
 83 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 84 |         names = copy.deepcopy(names)
 85 |         if metrics is None:
 86 |             metrics = mm.metrics.motchallenge_metrics
 87 |         metrics = copy.deepcopy(metrics)
 88 | 
 89 |         mh = mm.metrics.create()
 90 |         summary = mh.compute_many(
 91 |             accs,
 92 |             metrics=metrics,
 93 |             names=names,
 94 |             generate_overall=True
 95 |         )
 96 | 
 97 |         return summary
 98 | 
 99 |     @staticmethod
100 |     def save_summary(summary, filename):
101 |         import pandas as pd
102 |         writer = pd.ExcelWriter(filename)
103 |         summary.to_excel(writer)
104 |         writer.save()
105 | 


--------------------------------------------------------------------------------
/tutorials/motr/README.md:
--------------------------------------------------------------------------------
  1 | # MOTR
  2 | 
  3 | Step1.  
  4 | 
  5 | git clone https://github.com/megvii-model/MOTR.git and install
  6 | 
  7 | replace https://github.com/megvii-model/MOTR/blob/main/datasets/joint.py
  8 | 
  9 | replace https://github.com/megvii-model/MOTR/blob/main/datasets/transforms.py
 10 | 
 11 | 
 12 | train
 13 | 
 14 | ```
 15 | python3 -m torch.distributed.launch --nproc_per_node=8 \
 16 |     --use_env main.py \
 17 |     --meta_arch motr \
 18 |     --dataset_file e2e_joint \
 19 |     --epoch 50 \
 20 |     --with_box_refine \
 21 |     --lr_drop 40 \
 22 |     --lr 2e-4 \
 23 |     --lr_backbone 2e-5 \
 24 |     --pretrained coco_model_final.pth \
 25 |     --output_dir exps/e2e_motr_r50_mot17trainhalf \
 26 |     --batch_size 1 \
 27 |     --sample_mode 'random_interval' \
 28 |     --sample_interval 10 \
 29 |     --sampler_steps 10 20 30 \
 30 |     --sampler_lengths 2 3 4 5 \
 31 |     --update_query_pos \
 32 |     --merger_dropout 0 \
 33 |     --dropout 0 \
 34 |     --random_drop 0.1 \
 35 |     --fp_ratio 0.3 \
 36 |     --query_interaction_layer 'QIM' \
 37 |     --extra_track_attn \
 38 |     --mot_path .
 39 |     --data_txt_path_train ./datasets/data_path/mot17.half \
 40 |     --data_txt_path_val ./datasets/data_path/mot17.val \
 41 | ```
 42 | mot17.half and mot17.val are from https://github.com/ifzhang/FairMOT/tree/master/src/data
 43 | 
 44 | You can also download the MOTR model trained by us: [google](https://drive.google.com/file/d/1pzGi53VooppQqhKf3TSxLK99LERsVyTw/view?usp=sharing), [baidu(code:t87h)](https://pan.baidu.com/s/1OrcR3L9Bf2xXIo8RQl3zyA)
 45 | 
 46 | 
 47 | Step2. 
 48 |    
 49 | replace https://github.com/megvii-model/MOTR/blob/main/util/evaluation.py
 50 | 
 51 | replace https://github.com/megvii-model/MOTR/blob/main/eval.py
 52 | 
 53 | replace https://github.com/megvii-model/MOTR/blob/main/models/motr.py
 54 | 
 55 | add byte_tracker.py to https://github.com/megvii-model/MOTR
 56 | 
 57 | add mot_online to https://github.com/megvii-model/MOTR
 58 | 
 59 | 
 60 | Step3. 
 61 | 
 62 | 
 63 | val
 64 | 
 65 | ```
 66 | python3 eval.py \
 67 |     --meta_arch motr \
 68 |     --dataset_file e2e_joint \
 69 |     --epoch 200 \
 70 |     --with_box_refine \
 71 |     --lr_drop 100 \
 72 |     --lr 2e-4 \
 73 |     --lr_backbone 2e-5 \
 74 |     --pretrained exps/e2e_motr_r50_mot17val/motr_final.pth \
 75 |     --output_dir exps/e2e_motr_r50_mot17val \
 76 |     --batch_size 1 \
 77 |     --sample_mode 'random_interval' \
 78 |     --sample_interval 10 \
 79 |     --sampler_steps 50 90 120 \
 80 |     --sampler_lengths 2 3 4 5 \
 81 |     --update_query_pos \
 82 |     --merger_dropout 0 \
 83 |     --dropout 0 \
 84 |     --random_drop 0.1 \
 85 |     --fp_ratio 0.3 \
 86 |     --query_interaction_layer 'QIM' \
 87 |     --extra_track_attn \
 88 |     --mot_path ./MOT17/images/train
 89 |     --data_txt_path_train ./datasets/data_path/mot17.half \
 90 |     --data_txt_path_val ./datasets/data_path/mot17.val \
 91 |     --resume model_final.pth \
 92 | ```
 93 | 
 94 | 
 95 | 
 96 | # MOTR det
 97 | 
 98 | in Step2, replace https://github.com/megvii-model/MOTR/blob/main/models/motr.py by motr_det.py 
 99 | 
100 | others are the same as MOTR
101 | 


--------------------------------------------------------------------------------
/tutorials/motr/mot_online/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/tutorials/qdtrack/README.md:
--------------------------------------------------------------------------------
 1 | # QDTrack_reid_motion
 2 | 
 3 | Step1.  git clone https://github.com/SysCV/qdtrack.git and train
 4 | 
 5 | 
 6 | Step2. 
 7 | 
 8 | replace https://github.com/SysCV/qdtrack/blob/master/qdtrack/models/mot/qdtrack.py
 9 | 
10 | add mot_online to https://github.com/SysCV/qdtrack
11 | 
12 | add tracker_reid_motion.py to https://github.com/SysCV/qdtrack and rename to tracker.py
13 | 
14 | Step3. download qdtrack model trained on mot17 half training set: [google](https://drive.google.com/file/d/1IfM8i0R0lF_4NOgeloMPFo5d52dqhaHW/view?usp=sharing), [baidu(code:whcc)](https://pan.baidu.com/s/1IYRD3V2YOa6-YNFgMQyv7w)
15 | 
16 | Step4. run
17 | ```
18 | python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 tools/test.py configs/mot17/qdtrack-frcnn_r50_fpn_4e_mot17.py work_dirs/mot17_half_qdtrack.pth --launcher pytorch --eval track --eval-options resfile_path=output
19 | ```
20 | 
21 | 
22 | # QDTrack_BYTE
23 | 
24 | Step1.  git clone https://github.com/SysCV/qdtrack.git and train
25 | 
26 | 
27 | Step2. 
28 | 
29 | replace https://github.com/SysCV/qdtrack/blob/master/qdtrack/models/mot/qdtrack.py
30 | 
31 | add mot_online to https://github.com/SysCV/qdtrack
32 | 
33 | add byte_tracker.py to https://github.com/SysCV/qdtrack
34 | 
35 | 
36 | Step3. run
37 | ```
38 | python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 tools/test.py configs/mot17/qdtrack-frcnn_r50_fpn_4e_mot17.py work_dirs/mot17_half_qdtrack.pth --launcher pytorch --eval track --eval-options resfile_path=output
39 | ```
40 | 


--------------------------------------------------------------------------------
/tutorials/qdtrack/mot_online/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/tutorials/trades/README.md:
--------------------------------------------------------------------------------
 1 | # TraDeS
 2 | 
 3 | Step1.  git clone https://github.com/JialianW/TraDeS.git
 4 | 
 5 | 
 6 | Step2. 
 7 | 
 8 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/utils/tracker.py
 9 | 
10 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/opts.py
11 | 
12 | 
13 | Step3. run
14 | ```
15 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --inference --load_model ../models/mot_half.pth --gpus 0 --clip_len 3 --trades --track_thresh 0.4 --new_thresh 0.4 --out_thresh 0.2 --pre_thresh 0.5
16 | ```
17 | 
18 | 
19 | # TraDeS_BYTE
20 | 
21 | Step1.  git clone https://github.com/JialianW/TraDeS.git
22 | 
23 | 
24 | Step2. 
25 | 
26 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/utils/tracker.py by byte_tracker.py
27 | 
28 | replace https://github.com/JialianW/TraDeS/blob/master/src/lib/opts.py
29 | 
30 | add mot_online to https://github.com/JialianW/TraDeS/blob/master/src/lib/utils
31 | 
32 | Step3. run
33 | ```
34 | python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --inference --load_model ../models/mot_half.pth --gpus 0 --clip_len 3 --trades  --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.1 --pre_thresh 0.5
35 | ```
36 | 
37 | 
38 | ## Notes
39 | tracker.py: motion + reid
40 | 
41 | byte_tracker.py: motion with kalman filter
42 | 


--------------------------------------------------------------------------------
/tutorials/trades/mot_online/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/tutorials/transtrack/README.md:
--------------------------------------------------------------------------------
 1 | # TransTrack
 2 | 
 3 | Step1.  git clone https://github.com/PeizeSun/TransTrack.git
 4 | 
 5 | 
 6 | Step2. 
 7 | 
 8 | replace https://github.com/PeizeSun/TransTrack/blob/main/models/tracker.py
 9 | 
10 | Step3.
11 | 
12 | Download TransTrack pretrained model: [671mot17_crowdhuman_mot17.pth](https://drive.google.com/drive/folders/1DjPL8xWoXDASrxgsA3O06EspJRdUXFQ-?usp=sharing)
13 | 
14 | 
15 | Step3. run
16 | ```
17 | python3 main_track.py  --output_dir . --dataset_file mot --coco_path mot --batch_size 1 --resume pretrained/671mot17_crowdhuman_mot17.pth --eval --with_box_refine --num_queries 500
18 | ```
19 | 
20 | 
21 | # TransTrack_BYTE
22 | 
23 | Step1.  git clone https://github.com/PeizeSun/TransTrack.git
24 | 
25 | Step2. 
26 | 
27 | replace https://github.com/PeizeSun/TransTrack/blob/main/models/save_track.py
28 | 
29 | replace https://github.com/PeizeSun/TransTrack/blob/main/engine_track.py
30 | 
31 | replace https://github.com/PeizeSun/TransTrack/blob/main/main_track.py
32 | 
33 | add mot_online to https://github.com/PeizeSun/TransTrack
34 | 
35 | Step3. run
36 | ```
37 | python3 main_track.py  --output_dir . --dataset_file mot --coco_path mot --batch_size 1 --resume pretrained/671mot17_crowdhuman_mot17.pth --eval --with_box_refine --num_queries 500
38 | ```
39 | 
40 | 
41 | ## Notes
42 | tracker.py: only motion
43 | 
44 | mot_online/byte_tracker.py: motion with kalman filter
45 | 
46 | 


--------------------------------------------------------------------------------
/tutorials/transtrack/mot_online/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed


--------------------------------------------------------------------------------
/tutorials/transtrack/save_track.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) https://github.com/xingyizhou/CenterTrack
 3 | Modified by Peize Sun, Rufeng Zhang
 4 | """
 5 | # coding: utf-8
 6 | import os
 7 | import json
 8 | import logging
 9 | from collections import defaultdict
10 | 
11 | 
12 | def save_track(results, out_root, video_to_images, video_names, data_split='val'):
13 |     assert out_root is not None
14 |     out_dir = os.path.join(out_root, data_split)
15 |     if not os.path.exists(out_dir):
16 |         os.mkdir(out_dir)
17 | 
18 |     # save json.
19 |     # json_path = os.path.join(out_dir, "track_results.json")
20 |     # with open(json_path, "w") as f:
21 |     #     f.write(json.dumps(results))
22 |     #     f.flush()
23 | 
24 |     # save it in standard format.
25 |     track_dir = os.path.join(out_dir, "tracks")
26 |     if not os.path.exists(track_dir):
27 |         os.mkdir(track_dir)
28 |     for video_id in video_to_images.keys():
29 |         video_infos = video_to_images[video_id]
30 |         video_name = video_names[video_id]
31 |         file_path = os.path.join(track_dir, "{}.txt".format(video_name))
32 |         f = open(file_path, "w")
33 |         tracks = defaultdict(list)
34 |         for video_info in video_infos:
35 |             image_id, frame_id = video_info["image_id"], video_info["frame_id"]
36 |             result = results[image_id]
37 |             for item in result:
38 |                 if not ("tracking_id" in item):
39 |                     raise NotImplementedError
40 |                 tracking_id = item["tracking_id"]
41 |                 bbox = item["bbox"]
42 |                 bbox = [bbox[0], bbox[1], bbox[2], bbox[3], item['score'], item['active']]
43 |                 tracks[tracking_id].append([frame_id] + bbox)
44 | 
45 |         rename_track_id = 0
46 |         for track_id in sorted(tracks):
47 |             rename_track_id += 1
48 |             for t in tracks[track_id]:
49 |                 if t[6] > 0:
50 |                     f.write("{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n".format(
51 |                         t[0], rename_track_id, t[1], t[2], t[3] - t[1], t[4] - t[2]))
52 |         f.close()
53 | 


--------------------------------------------------------------------------------
/videos/palace.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmt710/yolov5ByteTrack/b24749e3d88ae15510ccc8ad39bfc24fdd466922/videos/palace.mp4


--------------------------------------------------------------------------------
/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .utils import configure_module
5 | 
6 | configure_module()
7 | 
8 | __version__ = "0.1.0"
9 | 


--------------------------------------------------------------------------------
/yolox/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .launch import launch
6 | from .trainer import Trainer
7 | 


--------------------------------------------------------------------------------
/yolox/data/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from .data_augment import TrainTransform, ValTransform
 6 | from .data_prefetcher import DataPrefetcher
 7 | from .dataloading import DataLoader, get_yolox_datadir
 8 | from .datasets import *
 9 | from .samplers import InfiniteSampler, YoloBatchSampler
10 | 


--------------------------------------------------------------------------------
/yolox/data/data_prefetcher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | 
 8 | from yolox.utils import synchronize
 9 | 
10 | import random
11 | 
12 | 
13 | class DataPrefetcher:
14 |     """
15 |     DataPrefetcher is inspired by code of following file:
16 |     https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
17 |     It could speedup your pytorch dataloader. For more information, please check
18 |     https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
19 |     """
20 | 
21 |     def __init__(self, loader):
22 |         self.loader = iter(loader)
23 |         self.stream = torch.cuda.Stream()
24 |         self.input_cuda = self._input_cuda_for_image
25 |         self.record_stream = DataPrefetcher._record_stream_for_image
26 |         self.preload()
27 | 
28 |     def preload(self):
29 |         try:
30 |             self.next_input, self.next_target, _, _ = next(self.loader)
31 |         except StopIteration:
32 |             self.next_input = None
33 |             self.next_target = None
34 |             return
35 | 
36 |         with torch.cuda.stream(self.stream):
37 |             self.input_cuda()
38 |             self.next_target = self.next_target.cuda(non_blocking=True)
39 | 
40 |     def next(self):
41 |         torch.cuda.current_stream().wait_stream(self.stream)
42 |         input = self.next_input
43 |         target = self.next_target
44 |         if input is not None:
45 |             self.record_stream(input)
46 |         if target is not None:
47 |             target.record_stream(torch.cuda.current_stream())
48 |         self.preload()
49 |         return input, target
50 | 
51 |     def _input_cuda_for_image(self):
52 |         self.next_input = self.next_input.cuda(non_blocking=True)
53 | 
54 |     @staticmethod
55 |     def _record_stream_for_image(input):
56 |         input.record_stream(torch.cuda.current_stream())
57 | 
58 | 
59 | def random_resize(data_loader, exp, epoch, rank, is_distributed):
60 |     tensor = torch.LongTensor(1).cuda()
61 |     if is_distributed:
62 |         synchronize()
63 | 
64 |     if rank == 0:
65 |         if epoch > exp.max_epoch - 10:
66 |             size = exp.input_size
67 |         else:
68 |             size = random.randint(*exp.random_size)
69 |             size = int(32 * size)
70 |         tensor.fill_(size)
71 | 
72 |     if is_distributed:
73 |         synchronize()
74 |         dist.broadcast(tensor, 0)
75 | 
76 |     input_size = data_loader.change_input_dim(multiple=tensor.item(), random_range=None)
77 |     return input_size
78 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset
6 | from .mosaicdetection import MosaicDetection
7 | from .mot import MOTDataset
8 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/datasets_wrapper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) Megvii, Inc. and its affiliates.
  4 | 
  5 | from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
  6 | from torch.utils.data.dataset import Dataset as torchDataset
  7 | 
  8 | import bisect
  9 | from functools import wraps
 10 | 
 11 | 
 12 | class ConcatDataset(torchConcatDataset):
 13 |     def __init__(self, datasets):
 14 |         super(ConcatDataset, self).__init__(datasets)
 15 |         if hasattr(self.datasets[0], "input_dim"):
 16 |             self._input_dim = self.datasets[0].input_dim
 17 |             self.input_dim = self.datasets[0].input_dim
 18 | 
 19 |     def pull_item(self, idx):
 20 |         if idx < 0:
 21 |             if -idx > len(self):
 22 |                 raise ValueError(
 23 |                     "absolute value of index should not exceed dataset length"
 24 |                 )
 25 |             idx = len(self) + idx
 26 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
 27 |         if dataset_idx == 0:
 28 |             sample_idx = idx
 29 |         else:
 30 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
 31 |         return self.datasets[dataset_idx].pull_item(sample_idx)
 32 | 
 33 | 
 34 | class MixConcatDataset(torchConcatDataset):
 35 |     def __init__(self, datasets):
 36 |         super(MixConcatDataset, self).__init__(datasets)
 37 |         if hasattr(self.datasets[0], "input_dim"):
 38 |             self._input_dim = self.datasets[0].input_dim
 39 |             self.input_dim = self.datasets[0].input_dim
 40 | 
 41 |     def __getitem__(self, index):
 42 | 
 43 |         if not isinstance(index, int):
 44 |             idx = index[1]
 45 |         if idx < 0:
 46 |             if -idx > len(self):
 47 |                 raise ValueError(
 48 |                     "absolute value of index should not exceed dataset length"
 49 |                 )
 50 |             idx = len(self) + idx
 51 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
 52 |         if dataset_idx == 0:
 53 |             sample_idx = idx
 54 |         else:
 55 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
 56 |         if not isinstance(index, int):
 57 |             index = (index[0], sample_idx, index[2])
 58 | 
 59 |         return self.datasets[dataset_idx][index]
 60 | 
 61 | 
 62 | class Dataset(torchDataset):
 63 |     """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
 64 |     that enables on the fly resizing of the ``input_dim``.
 65 | 
 66 |     Args:
 67 |         input_dimension (tuple): (width,height) tuple with default dimensions of the network
 68 |     """
 69 | 
 70 |     def __init__(self, input_dimension, mosaic=True):
 71 |         super().__init__()
 72 |         self.__input_dim = input_dimension[:2]
 73 |         self.enable_mosaic = mosaic
 74 | 
 75 |     @property
 76 |     def input_dim(self):
 77 |         """
 78 |         Dimension that can be used by transforms to set the correct image size, etc.
 79 |         This allows transforms to have a single source of truth
 80 |         for the input dimension of the network.
 81 | 
 82 |         Return:
 83 |             list: Tuple containing the current width,height
 84 |         """
 85 |         if hasattr(self, "_input_dim"):
 86 |             return self._input_dim
 87 |         return self.__input_dim
 88 | 
 89 |     @staticmethod
 90 |     def resize_getitem(getitem_fn):
 91 |         """
 92 |         Decorator method that needs to be used around the ``__getitem__`` method. |br|
 93 |         This decorator enables the on the fly resizing of
 94 |         the ``input_dim`` with our :class:`~lightnet.data.DataLoader` class.
 95 | 
 96 |         Example:
 97 |             >>> class CustomSet(ln.data.Dataset):
 98 |             ...     def __len__(self):
 99 |             ...         return 10
100 |             ...     @ln.data.Dataset.resize_getitem
101 |             ...     def __getitem__(self, index):
102 |             ...         # Should return (image, anno) but here we return input_dim
103 |             ...         return self.input_dim
104 |             >>> data = CustomSet((200,200))
105 |             >>> data[0]
106 |             (200, 200)
107 |             >>> data[(480,320), 0]
108 |             (480, 320)
109 |         """
110 | 
111 |         @wraps(getitem_fn)
112 |         def wrapper(self, index):
113 |             if not isinstance(index, int):
114 |                 has_dim = True
115 |                 self._input_dim = index[0]
116 |                 self.enable_mosaic = index[2]
117 |                 index = index[1]
118 |             else:
119 |                 has_dim = False
120 | 
121 |             ret_val = getitem_fn(self, index)
122 | 
123 |             if has_dim:
124 |                 del self._input_dim
125 | 
126 |             return ret_val
127 | 
128 |         return wrapper
129 | 


--------------------------------------------------------------------------------
/yolox/data/samplers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import BatchSampler as torchBatchSampler
 8 | from torch.utils.data.sampler import Sampler
 9 | 
10 | import itertools
11 | from typing import Optional
12 | 
13 | 
14 | class YoloBatchSampler(torchBatchSampler):
15 |     """
16 |     This batch sampler will generate mini-batches of (dim, index) tuples from another sampler.
17 |     It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
18 |     but it will prepend a dimension, whilst ensuring it stays the same across one mini-batch.
19 |     """
20 | 
21 |     def __init__(self, *args, input_dimension=None, mosaic=True, **kwargs):
22 |         super().__init__(*args, **kwargs)
23 |         self.input_dim = input_dimension
24 |         self.new_input_dim = None
25 |         self.mosaic = mosaic
26 | 
27 |     def __iter__(self):
28 |         self.__set_input_dim()
29 |         for batch in super().__iter__():
30 |             yield [(self.input_dim, idx, self.mosaic) for idx in batch]
31 |             self.__set_input_dim()
32 | 
33 |     def __set_input_dim(self):
34 |         """ This function randomly changes the the input dimension of the dataset. """
35 |         if self.new_input_dim is not None:
36 |             self.input_dim = (self.new_input_dim[0], self.new_input_dim[1])
37 |             self.new_input_dim = None
38 | 
39 | 
40 | class InfiniteSampler(Sampler):
41 |     """
42 |     In training, we only care about the "infinite stream" of training data.
43 |     So this sampler produces an infinite stream of indices and
44 |     all workers cooperate to correctly shuffle the indices and sample different indices.
45 |     The samplers in each worker effectively produces `indices[worker_id::num_workers]`
46 |     where `indices` is an infinite stream of indices consisting of
47 |     `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
48 |     or `range(size) + range(size) + ...` (if shuffle is False)
49 |     """
50 | 
51 |     def __init__(
52 |         self,
53 |         size: int,
54 |         shuffle: bool = True,
55 |         seed: Optional[int] = 0,
56 |         rank=0,
57 |         world_size=1,
58 |     ):
59 |         """
60 |         Args:
61 |             size (int): the total number of data of the underlying dataset to sample from
62 |             shuffle (bool): whether to shuffle the indices or not
63 |             seed (int): the initial seed of the shuffle. Must be the same
64 |                 across all workers. If None, will use a random seed shared
65 |                 among workers (require synchronization among all workers).
66 |         """
67 |         self._size = size
68 |         assert size > 0
69 |         self._shuffle = shuffle
70 |         self._seed = int(seed)
71 | 
72 |         if dist.is_available() and dist.is_initialized():
73 |             self._rank = dist.get_rank()
74 |             self._world_size = dist.get_world_size()
75 |         else:
76 |             self._rank = rank
77 |             self._world_size = world_size
78 | 
79 |     def __iter__(self):
80 |         start = self._rank
81 |         yield from itertools.islice(
82 |             self._infinite_indices(), start, None, self._world_size
83 |         )
84 | 
85 |     def _infinite_indices(self):
86 |         g = torch.Generator()
87 |         g.manual_seed(self._seed)
88 |         while True:
89 |             if self._shuffle:
90 |                 yield from torch.randperm(self._size, generator=g)
91 |             else:
92 |                 yield from torch.arange(self._size)
93 | 
94 |     def __len__(self):
95 |         return self._size // self._world_size
96 | 


--------------------------------------------------------------------------------
/yolox/deepsort_tracker/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 |     Parameters
 9 |     ----------
10 |     tlwh : array_like
11 |         Bounding box in format `(x, y, w, h)`.
12 |     confidence : float
13 |         Detector confidence score.
14 |     feature : array_like
15 |         A feature vector that describes the object contained in this image.
16 |     Attributes
17 |     ----------
18 |     tlwh : ndarray
19 |         Bounding box in format `(top left x, top left y, width, height)`.
20 |     confidence : ndarray
21 |         Detector confidence score.
22 |     feature : ndarray | NoneType
23 |         A feature vector that describes the object contained in this image.
24 |     """
25 | 
26 |     def __init__(self, tlwh, confidence, feature):
27 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
28 |         self.confidence = float(confidence)
29 |         self.feature = np.asarray(feature, dtype=np.float32)
30 | 
31 |     def to_tlbr(self):
32 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
33 |         `(top left, bottom right)`.
34 |         """
35 |         ret = self.tlwh.copy()
36 |         ret[2:] += ret[:2]
37 |         return ret
38 | 
39 |     def to_xyah(self):
40 |         """Convert bounding box to format `(center x, center y, aspect ratio,
41 |         height)`, where the aspect ratio is `width / height`.
42 |         """
43 |         ret = self.tlwh.copy()
44 |         ret[:2] += ret[2:] / 2
45 |         ret[2] /= ret[3]
46 |         return ret


--------------------------------------------------------------------------------
/yolox/deepsort_tracker/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from yolox.deepsort_tracker import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 |     Parameters
10 |     ----------
11 |     bbox : ndarray
12 |         A bounding box in format `(top left x, top left y, width, height)`.
13 |     candidates : ndarray
14 |         A matrix of candidate bounding boxes (one per row) in the same format
15 |         as `bbox`.
16 |     Returns
17 |     -------
18 |     ndarray
19 |         The intersection over union in [0, 1] between the `bbox` and each
20 |         candidate. A higher score means a larger fraction of the `bbox` is
21 |         occluded by the candidate.
22 |     """
23 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
24 |     candidates_tl = candidates[:, :2]
25 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
26 | 
27 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
28 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
29 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
30 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
31 |     wh = np.maximum(0., br - tl)
32 | 
33 |     area_intersection = wh.prod(axis=1)
34 |     area_bbox = bbox[2:].prod()
35 |     area_candidates = candidates[:, 2:].prod(axis=1)
36 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
37 | 
38 | 
39 | def iou_cost(tracks, detections, track_indices=None,
40 |              detection_indices=None):
41 |     """An intersection over union distance metric.
42 |     Parameters
43 |     ----------
44 |     tracks : List[deep_sort.track.Track]
45 |         A list of tracks.
46 |     detections : List[deep_sort.detection.Detection]
47 |         A list of detections.
48 |     track_indices : Optional[List[int]]
49 |         A list of indices to tracks that should be matched. Defaults to
50 |         all `tracks`.
51 |     detection_indices : Optional[List[int]]
52 |         A list of indices to detections that should be matched. Defaults
53 |         to all `detections`.
54 |     Returns
55 |     -------
56 |     ndarray
57 |         Returns a cost matrix of shape
58 |         len(track_indices), len(detection_indices) where entry (i, j) is
59 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
60 |     """
61 |     if track_indices is None:
62 |         track_indices = np.arange(len(tracks))
63 |     if detection_indices is None:
64 |         detection_indices = np.arange(len(detections))
65 | 
66 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
67 |     for row, track_idx in enumerate(track_indices):
68 |         if tracks[track_idx].time_since_update > 1:
69 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
70 |             continue
71 | 
72 |         bbox = tracks[track_idx].to_tlwh()
73 |         candidates = np.asarray(
74 |             [detections[i].tlwh for i in detection_indices])
75 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
76 |     return cost_matrix


--------------------------------------------------------------------------------
/yolox/evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .coco_evaluator import COCOEvaluator
6 | from .mot_evaluator import MOTEvaluator
7 | 


--------------------------------------------------------------------------------
/yolox/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .base_exp import BaseExp
6 | from .build import get_exp
7 | from .yolox_base import Exp
8 | 


--------------------------------------------------------------------------------
/yolox/exp/base_exp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | from torch.nn import Module
 7 | 
 8 | from yolox.utils import LRScheduler
 9 | 
10 | import ast
11 | import pprint
12 | from abc import ABCMeta, abstractmethod
13 | from tabulate import tabulate
14 | from typing import Dict
15 | 
16 | 
17 | class BaseExp(metaclass=ABCMeta):
18 |     """Basic class for any experiment."""
19 | 
20 |     def __init__(self):
21 |         self.seed = None
22 |         self.output_dir = "./YOLOX_outputs"
23 |         self.print_interval = 100
24 |         self.eval_interval = 10
25 | 
26 |     @abstractmethod
27 |     def get_model(self) -> Module:
28 |         pass
29 | 
30 |     @abstractmethod
31 |     def get_data_loader(
32 |         self, batch_size: int, is_distributed: bool
33 |     ) -> Dict[str, torch.utils.data.DataLoader]:
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
38 |         pass
39 | 
40 |     @abstractmethod
41 |     def get_lr_scheduler(
42 |         self, lr: float, iters_per_epoch: int, **kwargs
43 |     ) -> LRScheduler:
44 |         pass
45 | 
46 |     @abstractmethod
47 |     def get_evaluator(self):
48 |         pass
49 | 
50 |     @abstractmethod
51 |     def eval(self, model, evaluator, weights):
52 |         pass
53 | 
54 |     def __repr__(self):
55 |         table_header = ["keys", "values"]
56 |         exp_table = [
57 |             (str(k), pprint.pformat(v))
58 |             for k, v in vars(self).items()
59 |             if not k.startswith("_")
60 |         ]
61 |         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
62 | 
63 |     def merge(self, cfg_list):
64 |         assert len(cfg_list) % 2 == 0
65 |         for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
66 |             # only update value with same key
67 |             if hasattr(self, k):
68 |                 src_value = getattr(self, k)
69 |                 src_type = type(src_value)
70 |                 if src_value is not None and src_type != type(v):
71 |                     try:
72 |                         v = src_type(v)
73 |                     except Exception:
74 |                         v = ast.literal_eval(v)
75 |                 setattr(self, k, v)
76 | 


--------------------------------------------------------------------------------
/yolox/exp/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import importlib
 6 | import os
 7 | import sys
 8 | 
 9 | 
10 | def get_exp_by_file(exp_file):
11 |     try:
12 |         sys.path.append(os.path.dirname(exp_file))
13 |         current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0])
14 |         exp = current_exp.Exp()
15 |     except Exception:
16 |         raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file))
17 |     return exp
18 | 
19 | 
20 | def get_exp_by_name(exp_name):
21 |     import yolox
22 | 
23 |     yolox_path = os.path.dirname(os.path.dirname(yolox.__file__))
24 |     filedict = {
25 |         "yolox-s": "yolox_s.py",
26 |         "yolox-m": "yolox_m.py",
27 |         "yolox-l": "yolox_l.py",
28 |         "yolox-x": "yolox_x.py",
29 |         "yolox-tiny": "yolox_tiny.py",
30 |         "yolox-nano": "nano.py",
31 |         "yolov3": "yolov3.py",
32 |     }
33 |     filename = filedict[exp_name]
34 |     exp_path = os.path.join(yolox_path, "exps", "default", filename)
35 |     return get_exp_by_file(exp_path)
36 | 
37 | 
38 | def get_exp(exp_file, exp_name):
39 |     """
40 |     get Exp object by file or name. If exp_file and exp_name
41 |     are both provided, get Exp by exp_file.
42 | 
43 |     Args:
44 |         exp_file (str): file path of experiment.
45 |         exp_name (str): name of experiment. "yolo-s",
46 |     """
47 |     assert (
48 |         exp_file is not None or exp_name is not None
49 |     ), "plz provide exp file or exp name."
50 |     if exp_file is not None:
51 |         return get_exp_by_file(exp_file)
52 |     else:
53 |         return get_exp_by_name(exp_name)
54 | 


--------------------------------------------------------------------------------
/yolox/layers/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .fast_coco_eval_api import COCOeval_opt
6 | 


--------------------------------------------------------------------------------
/yolox/layers/csrc/cocoeval/cocoeval.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | 
 4 | #include <pybind11/numpy.h>
 5 | #include <pybind11/pybind11.h>
 6 | #include <pybind11/stl.h>
 7 | #include <pybind11/stl_bind.h>
 8 | #include <vector>
 9 | 
10 | namespace py = pybind11;
11 | 
12 | namespace COCOeval {
13 | 
14 | // Annotation data for a single object instance in an image
15 | struct InstanceAnnotation {
16 |   InstanceAnnotation(
17 |       uint64_t id,
18 |       double score,
19 |       double area,
20 |       bool is_crowd,
21 |       bool ignore)
22 |       : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
23 |   uint64_t id;
24 |   double score = 0.;
25 |   double area = 0.;
26 |   bool is_crowd = false;
27 |   bool ignore = false;
28 | };
29 | 
30 | // Stores intermediate results for evaluating detection results for a single
31 | // image that has D detected instances and G ground truth instances. This stores
32 | // matches between detected and ground truth instances
33 | struct ImageEvaluation {
34 |   // For each of the D detected instances, the id of the matched ground truth
35 |   // instance, or 0 if unmatched
36 |   std::vector<uint64_t> detection_matches;
37 | 
38 |   // The detection score of each of the D detected instances
39 |   std::vector<double> detection_scores;
40 | 
41 |   // Marks whether or not each of G instances was ignored from evaluation (e.g.,
42 |   // because it's outside area_range)
43 |   std::vector<bool> ground_truth_ignores;
44 | 
45 |   // Marks whether or not each of D instances was ignored from evaluation (e.g.,
46 |   // because it's outside aRng)
47 |   std::vector<bool> detection_ignores;
48 | };
49 | 
50 | template <class T>
51 | using ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;
52 | 
53 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each
54 | // combination of image, category, area range settings, and IOU thresholds to
55 | // evaluate, it matches detected instances to ground truth instances and stores
56 | // the results into a vector of ImageEvaluation results, which will be
57 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall
58 | // curves.  The parameters of nested vectors have the following semantics:
59 | //   image_category_ious[i][c][d][g] is the intersection over union of the d'th
60 | //     detected instance and g'th ground truth instance of
61 | //     category category_ids[c] in image image_ids[i]
62 | //   image_category_ground_truth_instances[i][c] is a vector of ground truth
63 | //     instances in image image_ids[i] of category category_ids[c]
64 | //   image_category_detection_instances[i][c] is a vector of detected
65 | //     instances in image image_ids[i] of category category_ids[c]
66 | std::vector<ImageEvaluation> EvaluateImages(
67 |     const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples
68 |     int max_detections,
69 |     const std::vector<double>& iou_thresholds,
70 |     const ImageCategoryInstances<std::vector<double>>& image_category_ious,
71 |     const ImageCategoryInstances<InstanceAnnotation>&
72 |         image_category_ground_truth_instances,
73 |     const ImageCategoryInstances<InstanceAnnotation>&
74 |         image_category_detection_instances);
75 | 
76 | // C++ implementation of COCOeval.accumulate(), which generates precision
77 | // recall curves for each set of category, IOU threshold, detection area range,
78 | // and max number of detections parameters.  It is assumed that the parameter
79 | // evaluations is the return value of the functon COCOeval::EvaluateImages(),
80 | // which was called with the same parameter settings params
81 | py::dict Accumulate(
82 |     const py::object& params,
83 |     const std::vector<ImageEvaluation>& evalutations);
84 | 
85 | } // namespace COCOeval
86 | 


--------------------------------------------------------------------------------
/yolox/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | #include "cocoeval/cocoeval.h"
 2 | 
 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 4 |     m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
 5 |     m.def(
 6 |         "COCOevalEvaluateImages",
 7 |         &COCOeval::EvaluateImages,
 8 |         "COCOeval::EvaluateImages");
 9 |     pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
10 |         .def(pybind11::init<uint64_t, double, double, bool, bool>());
11 |     pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
12 |         .def(pybind11::init<>());
13 | }
14 | 


--------------------------------------------------------------------------------
/yolox/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .darknet import CSPDarknet, Darknet
 6 | from .losses import IOUloss
 7 | from .yolo_fpn import YOLOFPN
 8 | from .yolo_head import YOLOXHead
 9 | from .yolo_pafpn import YOLOPAFPN
10 | from .yolox import YOLOX
11 | 


--------------------------------------------------------------------------------
/yolox/models/losses.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | class IOUloss(nn.Module):
11 |     def __init__(self, reduction="none", loss_type="iou"):
12 |         super(IOUloss, self).__init__()
13 |         self.reduction = reduction
14 |         self.loss_type = loss_type
15 | 
16 |     def forward(self, pred, target):
17 |         assert pred.shape[0] == target.shape[0]
18 | 
19 |         pred = pred.view(-1, 4)
20 |         target = target.view(-1, 4)
21 |         tl = torch.max(
22 |             (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
23 |         )
24 |         br = torch.min(
25 |             (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
26 |         )
27 | 
28 |         area_p = torch.prod(pred[:, 2:], 1)
29 |         area_g = torch.prod(target[:, 2:], 1)
30 | 
31 |         en = (tl < br).type(tl.type()).prod(dim=1)
32 |         area_i = torch.prod(br - tl, 1) * en
33 |         iou = (area_i) / (area_p + area_g - area_i + 1e-16)
34 | 
35 |         if self.loss_type == "iou":
36 |             loss = 1 - iou ** 2
37 |         elif self.loss_type == "giou":
38 |             c_tl = torch.min(
39 |                 (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
40 |             )
41 |             c_br = torch.max(
42 |                 (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
43 |             )
44 |             area_c = torch.prod(c_br - c_tl, 1)
45 |             giou = iou - (area_c - area_i) / area_c.clamp(1e-16)
46 |             loss = 1 - giou.clamp(min=-1.0, max=1.0)
47 | 
48 |         if self.reduction == "mean":
49 |             loss = loss.mean()
50 |         elif self.reduction == "sum":
51 |             loss = loss.sum()
52 | 
53 |         return loss
54 | 
55 | 
56 | def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
57 |     """
58 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
59 |     Args:
60 |         inputs: A float tensor of arbitrary shape.
61 |                 The predictions for each example.
62 |         targets: A float tensor with the same shape as inputs. Stores the binary
63 |                  classification label for each element in inputs
64 |                 (0 for the negative class and 1 for the positive class).
65 |         alpha: (optional) Weighting factor in range (0,1) to balance
66 |                 positive vs negative examples. Default = -1 (no weighting).
67 |         gamma: Exponent of the modulating factor (1 - p_t) to
68 |                balance easy vs hard examples.
69 |     Returns:
70 |         Loss tensor
71 |     """
72 |     prob = inputs.sigmoid()
73 |     ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
74 |     p_t = prob * targets + (1 - prob) * (1 - targets)
75 |     loss = ce_loss * ((1 - p_t) ** gamma)
76 | 
77 |     if alpha >= 0:
78 |         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
79 |         loss = alpha_t * loss
80 |     #return loss.mean(0).sum() / num_boxes
81 |     return loss.sum() / num_boxes


--------------------------------------------------------------------------------
/yolox/models/yolo_fpn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from .darknet import Darknet
 9 | from .network_blocks import BaseConv
10 | 
11 | 
12 | class YOLOFPN(nn.Module):
13 |     """
14 |     YOLOFPN module. Darknet 53 is the default backbone of this model.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         depth=53,
20 |         in_features=["dark3", "dark4", "dark5"],
21 |     ):
22 |         super().__init__()
23 | 
24 |         self.backbone = Darknet(depth)
25 |         self.in_features = in_features
26 | 
27 |         # out 1
28 |         self.out1_cbl = self._make_cbl(512, 256, 1)
29 |         self.out1 = self._make_embedding([256, 512], 512 + 256)
30 | 
31 |         # out 2
32 |         self.out2_cbl = self._make_cbl(256, 128, 1)
33 |         self.out2 = self._make_embedding([128, 256], 256 + 128)
34 | 
35 |         # upsample
36 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
37 | 
38 |     def _make_cbl(self, _in, _out, ks):
39 |         return BaseConv(_in, _out, ks, stride=1, act="lrelu")
40 | 
41 |     def _make_embedding(self, filters_list, in_filters):
42 |         m = nn.Sequential(
43 |             *[
44 |                 self._make_cbl(in_filters, filters_list[0], 1),
45 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
46 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
47 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
48 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
49 |             ]
50 |         )
51 |         return m
52 | 
53 |     def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"):
54 |         with open(filename, "rb") as f:
55 |             state_dict = torch.load(f, map_location="cpu")
56 |         print("loading pretrained weights...")
57 |         self.backbone.load_state_dict(state_dict)
58 | 
59 |     def forward(self, inputs):
60 |         """
61 |         Args:
62 |             inputs (Tensor): input image.
63 | 
64 |         Returns:
65 |             Tuple[Tensor]: FPN output features..
66 |         """
67 |         #  backbone
68 |         out_features = self.backbone(inputs)
69 |         x2, x1, x0 = [out_features[f] for f in self.in_features]
70 | 
71 |         #  yolo branch 1
72 |         x1_in = self.out1_cbl(x0)
73 |         x1_in = self.upsample(x1_in)
74 |         x1_in = torch.cat([x1_in, x1], 1)
75 |         out_dark4 = self.out1(x1_in)
76 | 
77 |         #  yolo branch 2
78 |         x2_in = self.out2_cbl(out_dark4)
79 |         x2_in = self.upsample(x2_in)
80 |         x2_in = torch.cat([x2_in, x2], 1)
81 |         out_dark3 = self.out2(x2_in)
82 | 
83 |         outputs = (out_dark3, out_dark4, x0)
84 |         return outputs
85 | 


--------------------------------------------------------------------------------
/yolox/models/yolo_pafpn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from .darknet import CSPDarknet
  9 | from .network_blocks import BaseConv, CSPLayer, DWConv
 10 | 
 11 | 
 12 | class YOLOPAFPN(nn.Module):
 13 |     """
 14 |     YOLOv3 model. Darknet 53 is the default backbone of this model.
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         depth=1.0,
 20 |         width=1.0,
 21 |         in_features=("dark3", "dark4", "dark5"),
 22 |         in_channels=[256, 512, 1024],
 23 |         depthwise=False,
 24 |         act="silu",
 25 |     ):
 26 |         super().__init__()
 27 |         self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
 28 |         self.in_features = in_features
 29 |         self.in_channels = in_channels
 30 |         Conv = DWConv if depthwise else BaseConv
 31 | 
 32 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
 33 |         self.lateral_conv0 = BaseConv(
 34 |             int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
 35 |         )
 36 |         self.C3_p4 = CSPLayer(
 37 |             int(2 * in_channels[1] * width),
 38 |             int(in_channels[1] * width),
 39 |             round(3 * depth),
 40 |             False,
 41 |             depthwise=depthwise,
 42 |             act=act,
 43 |         )  # cat
 44 | 
 45 |         self.reduce_conv1 = BaseConv(
 46 |             int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
 47 |         )
 48 |         self.C3_p3 = CSPLayer(
 49 |             int(2 * in_channels[0] * width),
 50 |             int(in_channels[0] * width),
 51 |             round(3 * depth),
 52 |             False,
 53 |             depthwise=depthwise,
 54 |             act=act,
 55 |         )
 56 | 
 57 |         # bottom-up conv
 58 |         self.bu_conv2 = Conv(
 59 |             int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
 60 |         )
 61 |         self.C3_n3 = CSPLayer(
 62 |             int(2 * in_channels[0] * width),
 63 |             int(in_channels[1] * width),
 64 |             round(3 * depth),
 65 |             False,
 66 |             depthwise=depthwise,
 67 |             act=act,
 68 |         )
 69 | 
 70 |         # bottom-up conv
 71 |         self.bu_conv1 = Conv(
 72 |             int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
 73 |         )
 74 |         self.C3_n4 = CSPLayer(
 75 |             int(2 * in_channels[1] * width),
 76 |             int(in_channels[2] * width),
 77 |             round(3 * depth),
 78 |             False,
 79 |             depthwise=depthwise,
 80 |             act=act,
 81 |         )
 82 | 
 83 |     def forward(self, input):
 84 |         """
 85 |         Args:
 86 |             inputs: input images.
 87 | 
 88 |         Returns:
 89 |             Tuple[Tensor]: FPN feature.
 90 |         """
 91 | 
 92 |         #  backbone
 93 |         out_features = self.backbone(input)
 94 |         features = [out_features[f] for f in self.in_features]
 95 |         [x2, x1, x0] = features
 96 | 
 97 |         fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
 98 |         f_out0 = self.upsample(fpn_out0)  # 512/16
 99 |         f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16
100 |         f_out0 = self.C3_p4(f_out0)  # 1024->512/16
101 | 
102 |         fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
103 |         f_out1 = self.upsample(fpn_out1)  # 256/8
104 |         f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8
105 |         pan_out2 = self.C3_p3(f_out1)  # 512->256/8
106 | 
107 |         p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
108 |         p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16
109 |         pan_out1 = self.C3_n3(p_out1)  # 512->512/16
110 | 
111 |         p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
112 |         p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32
113 |         pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
114 | 
115 |         outputs = (pan_out2, pan_out1, pan_out0)
116 |         return outputs
117 | 


--------------------------------------------------------------------------------
/yolox/models/yolox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | 
10 | 
11 | class YOLOX(nn.Module):
12 |     """
13 |     YOLOX model module. The module list is defined by create_yolov3_modules function.
14 |     The network returns loss values from three YOLO layers during training
15 |     and detection results during test.
16 |     """
17 | 
18 |     def __init__(self, backbone=None, head=None):
19 |         super().__init__()
20 |         if backbone is None:
21 |             backbone = YOLOPAFPN()
22 |         if head is None:
23 |             head = YOLOXHead(80)
24 | 
25 |         self.backbone = backbone
26 |         self.head = head
27 | 
28 |     def forward(self, x, targets=None):
29 |         # fpn output content features of [dark3, dark4, dark5]
30 |         fpn_outs = self.backbone(x)
31 | 
32 |         if self.training:
33 |             assert targets is not None
34 |             loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head(
35 |                 fpn_outs, targets, x
36 |             )
37 |             outputs = {
38 |                 "total_loss": loss,
39 |                 "iou_loss": iou_loss,
40 |                 "l1_loss": l1_loss,
41 |                 "conf_loss": conf_loss,
42 |                 "cls_loss": cls_loss,
43 |                 "num_fg": num_fg,
44 |             }
45 |         else:
46 |             outputs = self.head(fpn_outs)
47 | 
48 |         return outputs
49 | 


--------------------------------------------------------------------------------
/yolox/motdt_tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 |     Replaced = 4
11 | 
12 | 
13 | class BaseTrack(object):
14 |     _count = 0
15 | 
16 |     track_id = 0
17 |     is_activated = False
18 |     state = TrackState.New
19 | 
20 |     history = OrderedDict()
21 |     features = []
22 |     curr_feature = None
23 |     score = 0
24 |     start_frame = 0
25 |     frame_id = 0
26 |     time_since_update = 0
27 | 
28 |     # multi-camera
29 |     location = (np.inf, np.inf)
30 | 
31 |     @property
32 |     def end_frame(self):
33 |         return self.frame_id
34 | 
35 |     @staticmethod
36 |     def next_id():
37 |         BaseTrack._count += 1
38 |         return BaseTrack._count
39 | 
40 |     def activate(self, *args):
41 |         raise NotImplementedError
42 | 
43 |     def predict(self):
44 |         raise NotImplementedError
45 | 
46 |     def update(self, *args, **kwargs):
47 |         raise NotImplementedError
48 | 
49 |     def mark_lost(self):
50 |         self.state = TrackState.Lost
51 | 
52 |     def mark_removed(self):
53 |         self.state = TrackState.Removed
54 | 
55 |     def mark_replaced(self):
56 |         self.state = TrackState.Replaced
57 | 


--------------------------------------------------------------------------------
/yolox/motdt_tracker/matching.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import lap
  4 | from scipy.spatial.distance import cdist
  5 | 
  6 | from cython_bbox import bbox_overlaps as bbox_ious
  7 | from yolox.motdt_tracker import kalman_filter
  8 | 
  9 | 
 10 | def _indices_to_matches(cost_matrix, indices, thresh):
 11 |     matched_cost = cost_matrix[tuple(zip(*indices))]
 12 |     matched_mask = (matched_cost <= thresh)
 13 | 
 14 |     matches = indices[matched_mask]
 15 |     unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
 16 |     unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
 17 | 
 18 |     return matches, unmatched_a, unmatched_b
 19 | 
 20 | 
 21 | def linear_assignment(cost_matrix, thresh):
 22 |     if cost_matrix.size == 0:
 23 |         return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
 24 |     matches, unmatched_a, unmatched_b = [], [], []
 25 |     cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
 26 |     for ix, mx in enumerate(x):
 27 |         if mx >= 0:
 28 |             matches.append([ix, mx])
 29 |     unmatched_a = np.where(x < 0)[0]
 30 |     unmatched_b = np.where(y < 0)[0]
 31 |     matches = np.asarray(matches)
 32 |     return matches, unmatched_a, unmatched_b
 33 | 
 34 | 
 35 | def ious(atlbrs, btlbrs):
 36 |     """
 37 |     Compute cost based on IoU
 38 |     :type atlbrs: list[tlbr] | np.ndarray
 39 |     :type atlbrs: list[tlbr] | np.ndarray
 40 |     :rtype ious np.ndarray
 41 |     """
 42 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
 43 |     if ious.size == 0:
 44 |         return ious
 45 | 
 46 |     ious = bbox_ious(
 47 |         np.ascontiguousarray(atlbrs, dtype=np.float),
 48 |         np.ascontiguousarray(btlbrs, dtype=np.float)
 49 |     )
 50 | 
 51 |     return ious
 52 | 
 53 | 
 54 | def iou_distance(atracks, btracks):
 55 |     """
 56 |     Compute cost based on IoU
 57 |     :type atracks: list[STrack]
 58 |     :type btracks: list[STrack]
 59 |     :rtype cost_matrix np.ndarray
 60 |     """
 61 |     atlbrs = [track.tlbr for track in atracks]
 62 |     btlbrs = [track.tlbr for track in btracks]
 63 |     _ious = ious(atlbrs, btlbrs)
 64 |     cost_matrix = 1 - _ious
 65 | 
 66 |     return cost_matrix
 67 | 
 68 | 
 69 | def nearest_reid_distance(tracks, detections, metric='cosine'):
 70 |     """
 71 |     Compute cost based on ReID features
 72 |     :type tracks: list[STrack]
 73 |     :type detections: list[BaseTrack]
 74 |     :rtype cost_matrix np.ndarray
 75 |     """
 76 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
 77 |     if cost_matrix.size == 0:
 78 |         return cost_matrix
 79 | 
 80 |     det_features = np.asarray([track.curr_feature for track in detections], dtype=np.float32)
 81 |     for i, track in enumerate(tracks):
 82 |         cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0))
 83 | 
 84 |     return cost_matrix
 85 | 
 86 | 
 87 | def mean_reid_distance(tracks, detections, metric='cosine'):
 88 |     """
 89 |     Compute cost based on ReID features
 90 |     :type tracks: list[STrack]
 91 |     :type detections: list[BaseTrack]
 92 |     :type metric: str
 93 |     :rtype cost_matrix np.ndarray
 94 |     """
 95 |     cost_matrix = np.empty((len(tracks), len(detections)), dtype=np.float)
 96 |     if cost_matrix.size == 0:
 97 |         return cost_matrix
 98 | 
 99 |     track_features = np.asarray([track.curr_feature for track in tracks], dtype=np.float32)
100 |     det_features = np.asarray([track.curr_feature for track in detections], dtype=np.float32)
101 |     cost_matrix = cdist(track_features, det_features, metric)
102 | 
103 |     return cost_matrix
104 | 
105 | 
106 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
107 |     if cost_matrix.size == 0:
108 |         return cost_matrix
109 |     gating_dim = 2 if only_position else 4
110 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
111 |     measurements = np.asarray([det.to_xyah() for det in detections])
112 |     for row, track in enumerate(tracks):
113 |         gating_distance = kf.gating_distance(
114 |             track.mean, track.covariance, measurements, only_position)
115 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
116 |     return cost_matrix


--------------------------------------------------------------------------------
/yolox/tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed


--------------------------------------------------------------------------------
/yolox/tracking_utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | 
  7 | from yolox.tracking_utils.io import read_results, unzip_objs
  8 | 
  9 | 
 10 | class Evaluator(object):
 11 | 
 12 |     def __init__(self, data_root, seq_name, data_type):
 13 |         self.data_root = data_root
 14 |         self.seq_name = seq_name
 15 |         self.data_type = data_type
 16 | 
 17 |         self.load_annotations()
 18 |         self.reset_accumulator()
 19 | 
 20 |     def load_annotations(self):
 21 |         assert self.data_type == 'mot'
 22 | 
 23 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 24 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 25 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 26 | 
 27 |     def reset_accumulator(self):
 28 |         self.acc = mm.MOTAccumulator(auto_id=True)
 29 | 
 30 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 31 |         # results
 32 |         trk_tlwhs = np.copy(trk_tlwhs)
 33 |         trk_ids = np.copy(trk_ids)
 34 | 
 35 |         # gts
 36 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 37 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 38 | 
 39 |         # ignore boxes
 40 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 41 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 |         #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 57 |         #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 58 |         #match_ious = iou_distance[match_is, match_js]
 59 | 
 60 |         #match_js = np.asarray(match_js, dtype=int)
 61 |         #match_js = match_js[np.logical_not(np.isnan(match_ious))]
 62 |         #keep[match_js] = False
 63 |         #trk_tlwhs = trk_tlwhs[keep]
 64 |         #trk_ids = trk_ids[keep]
 65 | 
 66 |         # get distance matrix
 67 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 68 | 
 69 |         # acc
 70 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 71 | 
 72 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 73 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 74 |         else:
 75 |             events = None
 76 |         return events
 77 | 
 78 |     def eval_file(self, filename):
 79 |         self.reset_accumulator()
 80 | 
 81 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 82 |         #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 83 |         frames = sorted(list(set(result_frame_dict.keys())))
 84 |         for frame_id in frames:
 85 |             trk_objs = result_frame_dict.get(frame_id, [])
 86 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 87 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 88 | 
 89 |         return self.acc
 90 | 
 91 |     @staticmethod
 92 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 93 |         names = copy.deepcopy(names)
 94 |         if metrics is None:
 95 |             metrics = mm.metrics.motchallenge_metrics
 96 |         metrics = copy.deepcopy(metrics)
 97 | 
 98 |         mh = mm.metrics.create()
 99 |         summary = mh.compute_many(
100 |             accs,
101 |             metrics=metrics,
102 |             names=names,
103 |             generate_overall=True
104 |         )
105 | 
106 |         return summary
107 | 
108 |     @staticmethod
109 |     def save_summary(summary, filename):
110 |         import pandas as pd
111 |         writer = pd.ExcelWriter(filename)
112 |         summary.to_excel(writer)
113 |         writer.save()


--------------------------------------------------------------------------------
/yolox/tracking_utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | 
  6 | def write_results(filename, results_dict: Dict, data_type: str):
  7 |     if not filename:
  8 |         return
  9 |     path = os.path.dirname(filename)
 10 |     if not os.path.exists(path):
 11 |         os.makedirs(path)
 12 | 
 13 |     if data_type in ('mot', 'mcmot', 'lab'):
 14 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 15 |     elif data_type == 'kitti':
 16 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 17 |     else:
 18 |         raise ValueError(data_type)
 19 | 
 20 |     with open(filename, 'w') as f:
 21 |         for frame_id, frame_data in results_dict.items():
 22 |             if data_type == 'kitti':
 23 |                 frame_id -= 1
 24 |             for tlwh, track_id in frame_data:
 25 |                 if track_id < 0:
 26 |                     continue
 27 |                 x1, y1, w, h = tlwh
 28 |                 x2, y2 = x1 + w, y1 + h
 29 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 30 |                 f.write(line)
 31 | 
 32 | 
 33 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 34 |     if data_type in ('mot', 'lab'):
 35 |         read_fun = read_mot_results
 36 |     else:
 37 |         raise ValueError('Unknown data type: {}'.format(data_type))
 38 | 
 39 |     return read_fun(filename, is_gt, is_ignore)
 40 | 
 41 | 
 42 | """
 43 | labels={'ped', ...			% 1
 44 | 'person_on_vhcl', ...	% 2
 45 | 'car', ...				% 3
 46 | 'bicycle', ...			% 4
 47 | 'mbike', ...			% 5
 48 | 'non_mot_vhcl', ...		% 6
 49 | 'static_person', ...	% 7
 50 | 'distractor', ...		% 8
 51 | 'occluder', ...			% 9
 52 | 'occluder_on_grnd', ...		%10
 53 | 'occluder_full', ...		% 11
 54 | 'reflection', ...		% 12
 55 | 'crowd' ...			% 13
 56 | };
 57 | """
 58 | 
 59 | 
 60 | def read_mot_results(filename, is_gt, is_ignore):
 61 |     valid_labels = {1}
 62 |     ignore_labels = {2, 7, 8, 12}
 63 |     results_dict = dict()
 64 |     if os.path.isfile(filename):
 65 |         with open(filename, 'r') as f:
 66 |             for line in f.readlines():
 67 |                 linelist = line.split(',')
 68 |                 if len(linelist) < 7:
 69 |                     continue
 70 |                 fid = int(linelist[0])
 71 |                 if fid < 1:
 72 |                     continue
 73 |                 results_dict.setdefault(fid, list())
 74 | 
 75 |                 box_size = float(linelist[4]) * float(linelist[5])
 76 | 
 77 |                 if is_gt:
 78 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 79 |                         label = int(float(linelist[7]))
 80 |                         mark = int(float(linelist[6]))
 81 |                         if mark == 0 or label not in valid_labels:
 82 |                             continue
 83 |                     score = 1
 84 |                 elif is_ignore:
 85 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 86 |                         label = int(float(linelist[7]))
 87 |                         vis_ratio = float(linelist[8])
 88 |                         if label not in ignore_labels and vis_ratio >= 0:
 89 |                             continue
 90 |                     else:
 91 |                         continue
 92 |                     score = 1
 93 |                 else:
 94 |                     score = float(linelist[6])
 95 | 
 96 |                 #if box_size > 7000:
 97 |                 #if box_size <= 7000 or box_size >= 15000:
 98 |                 #if box_size < 15000:
 99 |                     #continue
100 | 
101 |                 tlwh = tuple(map(float, linelist[2:6]))
102 |                 target_id = int(linelist[1])
103 | 
104 |                 results_dict[fid].append((tlwh, target_id, score))
105 | 
106 |     return results_dict
107 | 
108 | 
109 | def unzip_objs(objs):
110 |     if len(objs) > 0:
111 |         tlwhs, ids, scores = zip(*objs)
112 |     else:
113 |         tlwhs, ids, scores = [], [], []
114 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
115 | 
116 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/yolox/tracking_utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Timer(object):
 5 |     """A simple timer."""
 6 |     def __init__(self):
 7 |         self.total_time = 0.
 8 |         self.calls = 0
 9 |         self.start_time = 0.
10 |         self.diff = 0.
11 |         self.average_time = 0.
12 | 
13 |         self.duration = 0.
14 | 
15 |     def tic(self):
16 |         # using time.time instead of time.clock because time time.clock
17 |         # does not normalize for multithreading
18 |         self.start_time = time.time()
19 | 
20 |     def toc(self, average=True):
21 |         self.diff = time.time() - self.start_time
22 |         self.total_time += self.diff
23 |         self.calls += 1
24 |         self.average_time = self.total_time / self.calls
25 |         if average:
26 |             self.duration = self.average_time
27 |         else:
28 |             self.duration = self.diff
29 |         return self.duration
30 | 
31 |     def clear(self):
32 |         self.total_time = 0.
33 |         self.calls = 0
34 |         self.start_time = 0.
35 |         self.diff = 0.
36 |         self.average_time = 0.
37 |         self.duration = 0.


--------------------------------------------------------------------------------
/yolox/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .allreduce_norm import *
 6 | from .boxes import *
 7 | from .checkpoint import load_ckpt, save_checkpoint
 8 | from .demo_utils import *
 9 | from .dist import *
10 | from .ema import ModelEMA
11 | from .logger import setup_logger
12 | from .lr_scheduler import LRScheduler
13 | from .metric import *
14 | from .model_utils import *
15 | from .setup_env import *
16 | from .visualize import *
17 | 


--------------------------------------------------------------------------------
/yolox/utils/allreduce_norm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | from torch import distributed as dist
  7 | from torch import nn
  8 | 
  9 | import pickle
 10 | from collections import OrderedDict
 11 | 
 12 | from .dist import _get_global_gloo_group, get_world_size
 13 | 
 14 | ASYNC_NORM = (
 15 |     nn.BatchNorm1d,
 16 |     nn.BatchNorm2d,
 17 |     nn.BatchNorm3d,
 18 |     nn.InstanceNorm1d,
 19 |     nn.InstanceNorm2d,
 20 |     nn.InstanceNorm3d,
 21 | )
 22 | 
 23 | __all__ = [
 24 |     "get_async_norm_states",
 25 |     "pyobj2tensor",
 26 |     "tensor2pyobj",
 27 |     "all_reduce",
 28 |     "all_reduce_norm",
 29 | ]
 30 | 
 31 | 
 32 | def get_async_norm_states(module):
 33 |     async_norm_states = OrderedDict()
 34 |     for name, child in module.named_modules():
 35 |         if isinstance(child, ASYNC_NORM):
 36 |             for k, v in child.state_dict().items():
 37 |                 async_norm_states[".".join([name, k])] = v
 38 |     return async_norm_states
 39 | 
 40 | 
 41 | def pyobj2tensor(pyobj, device="cuda"):
 42 |     """serialize picklable python object to tensor"""
 43 |     storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))
 44 |     return torch.ByteTensor(storage).to(device=device)
 45 | 
 46 | 
 47 | def tensor2pyobj(tensor):
 48 |     """deserialize tensor to picklable python object"""
 49 |     return pickle.loads(tensor.cpu().numpy().tobytes())
 50 | 
 51 | 
 52 | def _get_reduce_op(op_name):
 53 |     return {
 54 |         "sum": dist.ReduceOp.SUM,
 55 |         "mean": dist.ReduceOp.SUM,
 56 |     }[op_name.lower()]
 57 | 
 58 | 
 59 | def all_reduce(py_dict, op="sum", group=None):
 60 |     """
 61 |     Apply all reduce function for python dict object.
 62 |     NOTE: make sure that every py_dict has the same keys and values are in the same shape.
 63 | 
 64 |     Args:
 65 |         py_dict (dict): dict to apply all reduce op.
 66 |         op (str): operator, could be "sum" or "mean".
 67 |     """
 68 |     world_size = get_world_size()
 69 |     if world_size == 1:
 70 |         return py_dict
 71 |     if group is None:
 72 |         group = _get_global_gloo_group()
 73 |     if dist.get_world_size(group) == 1:
 74 |         return py_dict
 75 | 
 76 |     # all reduce logic across different devices.
 77 |     py_key = list(py_dict.keys())
 78 |     py_key_tensor = pyobj2tensor(py_key)
 79 |     dist.broadcast(py_key_tensor, src=0)
 80 |     py_key = tensor2pyobj(py_key_tensor)
 81 | 
 82 |     tensor_shapes = [py_dict[k].shape for k in py_key]
 83 |     tensor_numels = [py_dict[k].numel() for k in py_key]
 84 | 
 85 |     flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])
 86 |     dist.all_reduce(flatten_tensor, op=_get_reduce_op(op))
 87 |     if op == "mean":
 88 |         flatten_tensor /= world_size
 89 | 
 90 |     split_tensors = [
 91 |         x.reshape(shape)
 92 |         for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
 93 |     ]
 94 |     return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
 95 | 
 96 | 
 97 | def all_reduce_norm(module):
 98 |     """
 99 |     All reduce norm statistics in different devices.
100 |     """
101 |     states = get_async_norm_states(module)
102 |     states = all_reduce(states, op="mean")
103 |     module.load_state_dict(states, strict=False)
104 | 


--------------------------------------------------------------------------------
/yolox/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | from loguru import logger
 5 | 
 6 | import torch
 7 | 
 8 | import os
 9 | import shutil
10 | 
11 | 
12 | def load_ckpt(model, ckpt):
13 |     model_state_dict = model.state_dict()
14 |     load_dict = {}
15 |     for key_model, v in model_state_dict.items():
16 |         if key_model not in ckpt:
17 |             logger.warning(
18 |                 "{} is not in the ckpt. Please double check and see if this is desired.".format(
19 |                     key_model
20 |                 )
21 |             )
22 |             continue
23 |         v_ckpt = ckpt[key_model]
24 |         if v.shape != v_ckpt.shape:
25 |             logger.warning(
26 |                 "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
27 |                     key_model, v_ckpt.shape, key_model, v.shape
28 |                 )
29 |             )
30 |             continue
31 |         load_dict[key_model] = v_ckpt
32 | 
33 |     model.load_state_dict(load_dict, strict=False)
34 |     return model
35 | 
36 | 
37 | def save_checkpoint(state, is_best, save_dir, model_name=""):
38 |     if not os.path.exists(save_dir):
39 |         os.makedirs(save_dir)
40 |     filename = os.path.join(save_dir, model_name + "_ckpt.pth.tar")
41 |     torch.save(state, filename)
42 |     if is_best:
43 |         best_filename = os.path.join(save_dir, "best_ckpt.pth.tar")
44 |         shutil.copyfile(filename, best_filename)
45 | 


--------------------------------------------------------------------------------
/yolox/utils/demo_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import numpy as np
 6 | 
 7 | import os
 8 | 
 9 | __all__ = ["mkdir", "nms", "multiclass_nms", "demo_postprocess"]
10 | 
11 | 
12 | def mkdir(path):
13 |     if not os.path.exists(path):
14 |         os.makedirs(path)
15 | 
16 | 
17 | def nms(boxes, scores, nms_thr):
18 |     """Single class NMS implemented in Numpy."""
19 |     x1 = boxes[:, 0]
20 |     y1 = boxes[:, 1]
21 |     x2 = boxes[:, 2]
22 |     y2 = boxes[:, 3]
23 | 
24 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     order = scores.argsort()[::-1]
26 | 
27 |     keep = []
28 |     while order.size > 0:
29 |         i = order[0]
30 |         keep.append(i)
31 |         xx1 = np.maximum(x1[i], x1[order[1:]])
32 |         yy1 = np.maximum(y1[i], y1[order[1:]])
33 |         xx2 = np.minimum(x2[i], x2[order[1:]])
34 |         yy2 = np.minimum(y2[i], y2[order[1:]])
35 | 
36 |         w = np.maximum(0.0, xx2 - xx1 + 1)
37 |         h = np.maximum(0.0, yy2 - yy1 + 1)
38 |         inter = w * h
39 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
40 | 
41 |         inds = np.where(ovr <= nms_thr)[0]
42 |         order = order[inds + 1]
43 | 
44 |     return keep
45 | 
46 | 
47 | def multiclass_nms(boxes, scores, nms_thr, score_thr):
48 |     """Multiclass NMS implemented in Numpy"""
49 |     final_dets = []
50 |     num_classes = scores.shape[1]
51 |     for cls_ind in range(num_classes):
52 |         cls_scores = scores[:, cls_ind]
53 |         valid_score_mask = cls_scores > score_thr
54 |         if valid_score_mask.sum() == 0:
55 |             continue
56 |         else:
57 |             valid_scores = cls_scores[valid_score_mask]
58 |             valid_boxes = boxes[valid_score_mask]
59 |             keep = nms(valid_boxes, valid_scores, nms_thr)
60 |             if len(keep) > 0:
61 |                 cls_inds = np.ones((len(keep), 1)) * cls_ind
62 |                 dets = np.concatenate(
63 |                     [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
64 |                 )
65 |                 final_dets.append(dets)
66 |     if len(final_dets) == 0:
67 |         return None
68 |     return np.concatenate(final_dets, 0)
69 | 
70 | 
71 | def demo_postprocess(outputs, img_size, p6=False):
72 | 
73 |     grids = []
74 |     expanded_strides = []
75 | 
76 |     if not p6:
77 |         strides = [8, 16, 32]
78 |     else:
79 |         strides = [8, 16, 32, 64]
80 | 
81 |     hsizes = [img_size[0] // stride for stride in strides]
82 |     wsizes = [img_size[1] // stride for stride in strides]
83 | 
84 |     for hsize, wsize, stride in zip(hsizes, wsizes, strides):
85 |         xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
86 |         grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
87 |         grids.append(grid)
88 |         shape = grid.shape[:2]
89 |         expanded_strides.append(np.full((*shape, 1), stride))
90 | 
91 |     grids = np.concatenate(grids, 1)
92 |     expanded_strides = np.concatenate(expanded_strides, 1)
93 |     outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
94 |     outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
95 | 
96 |     return outputs
97 | 


--------------------------------------------------------------------------------
/yolox/utils/ema.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | import math
 8 | from copy import deepcopy
 9 | 
10 | 
11 | def is_parallel(model):
12 |     """check if model is in parallel mode."""
13 | 
14 |     parallel_type = (
15 |         nn.parallel.DataParallel,
16 |         nn.parallel.DistributedDataParallel,
17 |     )
18 |     return isinstance(model, parallel_type)
19 | 
20 | 
21 | def copy_attr(a, b, include=(), exclude=()):
22 |     # Copy attributes from b to a, options to only include [...] and to exclude [...]
23 |     for k, v in b.__dict__.items():
24 |         if (len(include) and k not in include) or k.startswith("_") or k in exclude:
25 |             continue
26 |         else:
27 |             setattr(a, k, v)
28 | 
29 | 
30 | class ModelEMA:
31 |     """
32 |     Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
33 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
34 |     This is intended to allow functionality like
35 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
36 |     A smoothed version of the weights is necessary for some training schemes to perform well.
37 |     This class is sensitive where it is initialized in the sequence of model init,
38 |     GPU assignment and distributed training wrappers.
39 |     """
40 | 
41 |     def __init__(self, model, decay=0.9999, updates=0):
42 |         """
43 |         Args:
44 |             model (nn.Module): model to apply EMA.
45 |             decay (float): ema decay reate.
46 |             updates (int): counter of EMA updates.
47 |         """
48 |         # Create EMA(FP32)
49 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()
50 |         self.updates = updates
51 |         # decay exponential ramp (to help early epochs)
52 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
53 |         for p in self.ema.parameters():
54 |             p.requires_grad_(False)
55 | 
56 |     def update(self, model):
57 |         # Update EMA parameters
58 |         with torch.no_grad():
59 |             self.updates += 1
60 |             d = self.decay(self.updates)
61 | 
62 |             msd = (
63 |                 model.module.state_dict() if is_parallel(model) else model.state_dict()
64 |             )  # model state_dict
65 |             for k, v in self.ema.state_dict().items():
66 |                 if v.dtype.is_floating_point:
67 |                     v *= d
68 |                     v += (1.0 - d) * msd[k].detach()
69 | 
70 |     def update_attr(self, model, include=(), exclude=("process_group", "reducer")):
71 |         # Update EMA attributes
72 |         copy_attr(self.ema, model, include, exclude)
73 | 


--------------------------------------------------------------------------------
/yolox/utils/logger.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from loguru import logger
 6 | 
 7 | import inspect
 8 | import os
 9 | import sys
10 | 
11 | 
12 | def get_caller_name(depth=0):
13 |     """
14 |     Args:
15 |         depth (int): Depth of caller conext, use 0 for caller depth. Default value: 0.
16 | 
17 |     Returns:
18 |         str: module name of the caller
19 |     """
20 |     # the following logic is a little bit faster than inspect.stack() logic
21 |     frame = inspect.currentframe().f_back
22 |     for _ in range(depth):
23 |         frame = frame.f_back
24 | 
25 |     return frame.f_globals["__name__"]
26 | 
27 | 
28 | class StreamToLoguru:
29 |     """
30 |     stream object that redirects writes to a logger instance.
31 |     """
32 | 
33 |     def __init__(self, level="INFO", caller_names=("apex", "pycocotools")):
34 |         """
35 |         Args:
36 |             level(str): log level string of loguru. Default value: "INFO".
37 |             caller_names(tuple): caller names of redirected module.
38 |                 Default value: (apex, pycocotools).
39 |         """
40 |         self.level = level
41 |         self.linebuf = ""
42 |         self.caller_names = caller_names
43 | 
44 |     def write(self, buf):
45 |         full_name = get_caller_name(depth=1)
46 |         module_name = full_name.rsplit(".", maxsplit=-1)[0]
47 |         if module_name in self.caller_names:
48 |             for line in buf.rstrip().splitlines():
49 |                 # use caller level log
50 |                 logger.opt(depth=2).log(self.level, line.rstrip())
51 |         else:
52 |             sys.__stdout__.write(buf)
53 | 
54 |     def flush(self):
55 |         pass
56 | 
57 | 
58 | def redirect_sys_output(log_level="INFO"):
59 |     redirect_logger = StreamToLoguru(log_level)
60 |     sys.stderr = redirect_logger
61 |     sys.stdout = redirect_logger
62 | 
63 | 
64 | def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"):
65 |     """setup logger for training and testing.
66 |     Args:
67 |         save_dir(str): location to save log file
68 |         distributed_rank(int): device rank when multi-gpu environment
69 |         filename (string): log save name.
70 |         mode(str): log file write mode, `append` or `override`. default is `a`.
71 | 
72 |     Return:
73 |         logger instance.
74 |     """
75 |     loguru_format = (
76 |         "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
77 |         "<level>{level: <8}</level> | "
78 |         "<cyan>{name}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
79 |     )
80 | 
81 |     logger.remove()
82 |     save_file = os.path.join(save_dir, filename)
83 |     if mode == "o" and os.path.exists(save_file):
84 |         os.remove(save_file)
85 |     # only keep logger in rank0 process
86 |     if distributed_rank == 0:
87 |         logger.add(
88 |             sys.stderr,
89 |             format=loguru_format,
90 |             level="INFO",
91 |             enqueue=True,
92 |         )
93 |         logger.add(save_file)
94 | 
95 |     # redirect stdout/stderr to loguru
96 |     redirect_sys_output("INFO")
97 | 


--------------------------------------------------------------------------------
/yolox/utils/metric.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | 
  8 | import functools
  9 | import os
 10 | import time
 11 | from collections import defaultdict, deque
 12 | 
 13 | __all__ = [
 14 |     "AverageMeter",
 15 |     "MeterBuffer",
 16 |     "get_total_and_free_memory_in_Mb",
 17 |     "occupy_mem",
 18 |     "gpu_mem_usage",
 19 | ]
 20 | 
 21 | 
 22 | def get_total_and_free_memory_in_Mb(cuda_device):
 23 |     devices_info_str = os.popen(
 24 |         "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
 25 |     )
 26 |     devices_info = devices_info_str.read().strip().split("\n")
 27 |     total, used = devices_info[int(cuda_device)].split(",")
 28 |     return int(total), int(used)
 29 | 
 30 | 
 31 | def occupy_mem(cuda_device, mem_ratio=0.95):
 32 |     """
 33 |     pre-allocate gpu memory for training to avoid memory Fragmentation.
 34 |     """
 35 |     total, used = get_total_and_free_memory_in_Mb(cuda_device)
 36 |     max_mem = int(total * mem_ratio)
 37 |     block_mem = max_mem - used
 38 |     x = torch.cuda.FloatTensor(256, 1024, block_mem)
 39 |     del x
 40 |     time.sleep(5)
 41 | 
 42 | 
 43 | def gpu_mem_usage():
 44 |     """
 45 |     Compute the GPU memory usage for the current device (MB).
 46 |     """
 47 |     mem_usage_bytes = torch.cuda.max_memory_allocated()
 48 |     return mem_usage_bytes / (1024 * 1024)
 49 | 
 50 | 
 51 | class AverageMeter:
 52 |     """Track a series of values and provide access to smoothed values over a
 53 |     window or the global series average.
 54 |     """
 55 | 
 56 |     def __init__(self, window_size=50):
 57 |         self._deque = deque(maxlen=window_size)
 58 |         self._total = 0.0
 59 |         self._count = 0
 60 | 
 61 |     def update(self, value):
 62 |         self._deque.append(value)
 63 |         self._count += 1
 64 |         self._total += value
 65 | 
 66 |     @property
 67 |     def median(self):
 68 |         d = np.array(list(self._deque))
 69 |         return np.median(d)
 70 | 
 71 |     @property
 72 |     def avg(self):
 73 |         # if deque is empty, nan will be returned.
 74 |         d = np.array(list(self._deque))
 75 |         return d.mean()
 76 | 
 77 |     @property
 78 |     def global_avg(self):
 79 |         return self._total / max(self._count, 1e-5)
 80 | 
 81 |     @property
 82 |     def latest(self):
 83 |         return self._deque[-1] if len(self._deque) > 0 else None
 84 | 
 85 |     @property
 86 |     def total(self):
 87 |         return self._total
 88 | 
 89 |     def reset(self):
 90 |         self._deque.clear()
 91 |         self._total = 0.0
 92 |         self._count = 0
 93 | 
 94 |     def clear(self):
 95 |         self._deque.clear()
 96 | 
 97 | 
 98 | class MeterBuffer(defaultdict):
 99 |     """Computes and stores the average and current value"""
100 | 
101 |     def __init__(self, window_size=20):
102 |         factory = functools.partial(AverageMeter, window_size=window_size)
103 |         super().__init__(factory)
104 | 
105 |     def reset(self):
106 |         for v in self.values():
107 |             v.reset()
108 | 
109 |     def get_filtered_meter(self, filter_key="time"):
110 |         return {k: v for k, v in self.items() if filter_key in k}
111 | 
112 |     def update(self, values=None, **kwargs):
113 |         if values is None:
114 |             values = {}
115 |         values.update(kwargs)
116 |         for k, v in values.items():
117 |             if isinstance(v, torch.Tensor):
118 |                 v = v.detach()
119 |             self[k].update(v)
120 | 
121 |     def clear_meters(self):
122 |         for v in self.values():
123 |             v.clear()
124 | 


--------------------------------------------------------------------------------
/yolox/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from thop import profile
  8 | 
  9 | from copy import deepcopy
 10 | 
 11 | __all__ = [
 12 |     "fuse_conv_and_bn",
 13 |     "fuse_model",
 14 |     "get_model_info",
 15 |     "replace_module",
 16 | ]
 17 | 
 18 | 
 19 | def get_model_info(model, tsize):
 20 | 
 21 |     stride = 64
 22 |     img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
 23 |     flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
 24 |     params /= 1e6
 25 |     flops /= 1e9
 26 |     flops *= tsize[0] * tsize[1] / stride / stride * 2  # Gflops
 27 |     info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
 28 |     return info
 29 | 
 30 | 
 31 | def fuse_conv_and_bn(conv, bn):
 32 |     # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 33 |     fusedconv = (
 34 |         nn.Conv2d(
 35 |             conv.in_channels,
 36 |             conv.out_channels,
 37 |             kernel_size=conv.kernel_size,
 38 |             stride=conv.stride,
 39 |             padding=conv.padding,
 40 |             groups=conv.groups,
 41 |             bias=True,
 42 |         )
 43 |         .requires_grad_(False)
 44 |         .to(conv.weight.device)
 45 |     )
 46 | 
 47 |     # prepare filters
 48 |     w_conv = conv.weight.clone().view(conv.out_channels, -1)
 49 |     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 50 |     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
 51 | 
 52 |     # prepare spatial bias
 53 |     b_conv = (
 54 |         torch.zeros(conv.weight.size(0), device=conv.weight.device)
 55 |         if conv.bias is None
 56 |         else conv.bias
 57 |     )
 58 |     b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
 59 |         torch.sqrt(bn.running_var + bn.eps)
 60 |     )
 61 |     fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
 62 | 
 63 |     return fusedconv
 64 | 
 65 | 
 66 | def fuse_model(model):
 67 |     from yolox.models.network_blocks import BaseConv
 68 | 
 69 |     for m in model.modules():
 70 |         if type(m) is BaseConv and hasattr(m, "bn"):
 71 |             m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
 72 |             delattr(m, "bn")  # remove batchnorm
 73 |             m.forward = m.fuseforward  # update forward
 74 |     return model
 75 | 
 76 | 
 77 | def replace_module(module, replaced_module_type, new_module_type, replace_func=None):
 78 |     """
 79 |     Replace given type in module to a new type. mostly used in deploy.
 80 | 
 81 |     Args:
 82 |         module (nn.Module): model to apply replace operation.
 83 |         replaced_module_type (Type): module type to be replaced.
 84 |         new_module_type (Type)
 85 |         replace_func (function): python function to describe replace logic. Defalut value None.
 86 | 
 87 |     Returns:
 88 |         model (nn.Module): module that already been replaced.
 89 |     """
 90 | 
 91 |     def default_replace_func(replaced_module_type, new_module_type):
 92 |         return new_module_type()
 93 | 
 94 |     if replace_func is None:
 95 |         replace_func = default_replace_func
 96 | 
 97 |     model = module
 98 |     if isinstance(module, replaced_module_type):
 99 |         model = replace_func(replaced_module_type, new_module_type)
100 |     else:  # recurrsively replace
101 |         for name, child in module.named_children():
102 |             new_child = replace_module(child, replaced_module_type, new_module_type)
103 |             if new_child is not child:  # child is already replaced
104 |                 model.add_module(name, new_child)
105 | 
106 |     return model
107 | 


--------------------------------------------------------------------------------
/yolox/utils/setup_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import cv2
 6 | 
 7 | import os
 8 | import subprocess
 9 | 
10 | __all__ = ["configure_nccl", "configure_module"]
11 | 
12 | 
13 | def configure_nccl():
14 |     """Configure multi-machine environment variables of NCCL."""
15 |     os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL"
16 |     os.environ["NCCL_IB_HCA"] = subprocess.getoutput(
17 |         "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; "
18 |         "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null "
19 |         "| grep v >/dev/null && echo $i ; done; popd > /dev/null"
20 |     )
21 |     os.environ["NCCL_IB_GID_INDEX"] = "3"
22 |     os.environ["NCCL_IB_TC"] = "106"
23 | 
24 | 
25 | def configure_module(ulimit_value=8192):
26 |     """
27 |     Configure pytorch module environment. setting of ulimit and cv2 will be set.
28 | 
29 |     Args:
30 |         ulimit_value(int): default open file number on linux. Default value: 8192.
31 |     """
32 |     # system setting
33 |     try:
34 |         import resource
35 | 
36 |         rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
37 |         resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1]))
38 |     except Exception:
39 |         # Exception might be raised in Windows OS or rlimit reaches max limit number.
40 |         # However, set rlimit value might not be necessary.
41 |         pass
42 | 
43 |     # cv2
44 |     # multiprocess might be harmful on performance of torch dataloader
45 |     os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
46 |     try:
47 |         cv2.setNumThreads(0)
48 |         cv2.ocl.setUseOpenCL(False)
49 |     except Exception:
50 |         # cv2 version mismatch might rasie exceptions.
51 |         pass
52 | 


--------------------------------------------------------------------------------