├── .dockerignore ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── --bug-report.md │ ├── --feature-request.md │ └── -question.md └── workflows │ ├── ci-testing.yml │ ├── greetings.yml │ ├── rebase.yml │ └── stale.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── data ├── get_coco2017.sh ├── get_voc.sh └── mot.yaml ├── detect.py ├── hubconf.py ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── hub │ ├── yolov3-spp.yaml │ ├── yolov5-fpn.yaml │ └── yolov5-panet.yaml ├── yolo.py ├── yolov5_JDE.yaml ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── mot_data ├── caltech.10k.val ├── caltech.train ├── caltech.val ├── citypersons.train ├── citypersons.val ├── cuhksysu.train ├── cuhksysu.val ├── eth.train ├── mot16.train ├── mot17.train ├── mot19.train ├── prw.train └── prw.val ├── requirements.txt ├── test.py ├── track.py ├── tracker ├── __init__.py ├── basetrack.py ├── matching.py └── multitracker.py ├── tracker_utils ├── datasets.py ├── evaluation.py ├── io.py ├── kalman_filter.py ├── log.py ├── timer.py ├── utils.py └── visualization.py ├── train.py ├── utils ├── __init__.py ├── activations.py ├── datasets.py ├── general.py ├── google_utils.py └── torch_utils.py └── weights └── download_weights.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | # Repo-specific DockerIgnore ------------------------------------------------------------------------------------------- 2 | # .git 3 | .cache 4 | .idea 5 | runs 6 | output 7 | coco 8 | storage.googleapis.com 9 | 10 | data/samples/* 11 | **/results*.txt 12 | *.jpg 13 | 14 | # Neural Network weights ----------------------------------------------------------------------------------------------- 15 | **/*.weights 16 | **/*.pt 17 | **/*.pth 18 | **/*.onnx 19 | **/*.mlmodel 20 | **/*.torchscript 21 | 22 | 23 | # Below Copied From .gitignore ----------------------------------------------------------------------------------------- 24 | # Below Copied From .gitignore ----------------------------------------------------------------------------------------- 25 | 26 | 27 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 28 | # Byte-compiled / optimized / DLL files 29 | __pycache__/ 30 | *.py[cod] 31 | *$py.class 32 | 33 | # C extensions 34 | *.so 35 | 36 | # Distribution / packaging 37 | .Python 38 | env/ 39 | build/ 40 | develop-eggs/ 41 | dist/ 42 | downloads/ 43 | eggs/ 44 | .eggs/ 45 | lib/ 46 | lib64/ 47 | parts/ 48 | sdist/ 49 | var/ 50 | wheels/ 51 | *.egg-info/ 52 | .installed.cfg 53 | *.egg 54 | 55 | # PyInstaller 56 | # Usually these files are written by a python script from a template 57 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 58 | *.manifest 59 | *.spec 60 | 61 | # Installer logs 62 | pip-log.txt 63 | pip-delete-this-directory.txt 64 | 65 | # Unit test / coverage reports 66 | htmlcov/ 67 | .tox/ 68 | .coverage 69 | .coverage.* 70 | .cache 71 | nosetests.xml 72 | coverage.xml 73 | *.cover 74 | .hypothesis/ 75 | 76 | # Translations 77 | *.mo 78 | *.pot 79 | 80 | # Django stuff: 81 | *.log 82 | local_settings.py 83 | 84 | # Flask stuff: 85 | instance/ 86 | .webassets-cache 87 | 88 | # Scrapy stuff: 89 | .scrapy 90 | 91 | # Sphinx documentation 92 | docs/_build/ 93 | 94 | # PyBuilder 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # pyenv 101 | .python-version 102 | 103 | # celery beat schedule file 104 | celerybeat-schedule 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # dotenv 110 | .env 111 | 112 | # virtualenv 113 | .venv 114 | venv/ 115 | ENV/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | 130 | 131 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 132 | 133 | # General 134 | .DS_Store 135 | .AppleDouble 136 | .LSOverride 137 | 138 | # Icon must end with two \r 139 | Icon 140 | Icon? 141 | 142 | # Thumbnails 143 | ._* 144 | 145 | # Files that might appear in the root of a volume 146 | .DocumentRevisions-V100 147 | .fseventsd 148 | .Spotlight-V100 149 | .TemporaryItems 150 | .Trashes 151 | .VolumeIcon.icns 152 | .com.apple.timemachine.donotpresent 153 | 154 | # Directories potentially created on remote AFP share 155 | .AppleDB 156 | .AppleDesktop 157 | Network Trash Folder 158 | Temporary Items 159 | .apdisk 160 | 161 | 162 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 163 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 164 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 165 | 166 | # User-specific stuff: 167 | .idea/* 168 | .idea/**/workspace.xml 169 | .idea/**/tasks.xml 170 | .idea/dictionaries 171 | .html # Bokeh Plots 172 | .pg # TensorFlow Frozen Graphs 173 | .avi # videos 174 | 175 | # Sensitive or high-churn files: 176 | .idea/**/dataSources/ 177 | .idea/**/dataSources.ids 178 | .idea/**/dataSources.local.xml 179 | .idea/**/sqlDataSources.xml 180 | .idea/**/dynamic.xml 181 | .idea/**/uiDesigner.xml 182 | 183 | # Gradle: 184 | .idea/**/gradle.xml 185 | .idea/**/libraries 186 | 187 | # CMake 188 | cmake-build-debug/ 189 | cmake-build-release/ 190 | 191 | # Mongo Explorer plugin: 192 | .idea/**/mongoSettings.xml 193 | 194 | ## File-based project format: 195 | *.iws 196 | 197 | ## Plugin-specific files: 198 | 199 | # IntelliJ 200 | out/ 201 | 202 | # mpeltonen/sbt-idea plugin 203 | .idea_modules/ 204 | 205 | # JIRA plugin 206 | atlassian-ide-plugin.xml 207 | 208 | # Cursive Clojure plugin 209 | .idea/replstate.xml 210 | 211 | # Crashlytics plugin (for Android Studio and IntelliJ) 212 | com_crashlytics_export_strings.xml 213 | crashlytics.properties 214 | crashlytics-build.properties 215 | fabric.properties 216 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # this drop notebooks from GitHub language stats 2 | *.ipynb linguist-vendored 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41BBug report" 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you: 11 | - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo 12 | - **Common dataset**: coco.yaml or coco128.yaml 13 | - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#environments 14 | 15 | If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`. 16 | 17 | 18 | ## 🐛 Bug 19 | A clear and concise description of what the bug is. 20 | 21 | 22 | ## To Reproduce (REQUIRED) 23 | 24 | Input: 25 | ``` 26 | import torch 27 | 28 | a = torch.tensor([5]) 29 | c = a / 0 30 | ``` 31 | 32 | Output: 33 | ``` 34 | Traceback (most recent call last): 35 | File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code 36 | exec(code_obj, self.user_global_ns, self.user_ns) 37 | File "", line 5, in 38 | c = a / 0 39 | RuntimeError: ZeroDivisionError 40 | ``` 41 | 42 | 43 | ## Expected behavior 44 | A clear and concise description of what you expected to happen. 45 | 46 | 47 | ## Environment 48 | If applicable, add screenshots to help explain your problem. 49 | 50 | - OS: [e.g. Ubuntu] 51 | - GPU [e.g. 2080 Ti] 52 | 53 | 54 | ## Additional context 55 | Add any other context about the problem here. 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 🚀 Feature 11 | 12 | 13 | ## Motivation 14 | 15 | 16 | 17 | ## Pitch 18 | 19 | 20 | 21 | ## Alternatives 22 | 23 | 24 | 25 | ## Additional context 26 | 27 | 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Question" 3 | about: Ask a general question 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## ❔Question 11 | 12 | 13 | ## Additional context 14 | -------------------------------------------------------------------------------- /.github/workflows/ci-testing.yml: -------------------------------------------------------------------------------- 1 | name: CI CPU testing 2 | 3 | on: # https://help.github.com/en/actions/reference/events-that-trigger-workflows 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 0 * * *" 8 | 9 | jobs: 10 | cpu-tests: 11 | 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: [ubuntu-latest, macos-latest, windows-latest] 17 | python-version: [3.8] 18 | model: ['yolov5s'] # models to test 19 | 20 | # Timeout: https://stackoverflow.com/a/59076067/4521646 21 | timeout-minutes: 50 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | 29 | # Note: This uses an internal pip API and may not always work 30 | # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow 31 | - name: Get pip cache 32 | id: pip-cache 33 | run: | 34 | python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)" 35 | 36 | - name: Cache pip 37 | uses: actions/cache@v1 38 | with: 39 | path: ${{ steps.pip-cache.outputs.dir }} 40 | key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }} 41 | restore-keys: | 42 | ${{ runner.os }}-${{ matrix.python-version }}-pip- 43 | 44 | - name: Install dependencies 45 | run: | 46 | python -m pip install --upgrade pip 47 | pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html 48 | pip install -q onnx 49 | python --version 50 | pip --version 51 | pip list 52 | shell: bash 53 | 54 | - name: Download data 55 | run: | 56 | curl -L -o temp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip 57 | unzip -q temp.zip -d ../ 58 | rm temp.zip 59 | 60 | - name: Tests workflow 61 | run: | 62 | export PYTHONPATH="$PWD" # to run *.py. files in subdirectories 63 | di=cpu # inference devices # define device 64 | 65 | # train 66 | python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di 67 | # detect 68 | python detect.py --weights weights/${{ matrix.model }}.pt --device $di 69 | python detect.py --weights runs/exp0/weights/last.pt --device $di 70 | # test 71 | python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di 72 | python test.py --img 256 --batch 8 --weights runs/exp0/weights/last.pt --device $di 73 | 74 | python models/yolo.py --cfg models/${{ matrix.model }}.yaml # inspect 75 | python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt # export 76 | shell: bash 77 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request_target, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1 10 | with: 11 | repo-token: ${{ secrets.GITHUB_TOKEN }} 12 | pr-message: | 13 | Hello @${{ github.actor }}, thank you for submitting a PR! To allow your work to be integrated as seamlessly as possible, we advise you to: 14 | - Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch: 15 | ```bash 16 | git remote add upstream https://github.com/ultralytics/yolov5.git 17 | git fetch upstream 18 | git checkout feature # <----- replace 'feature' with local branch name 19 | git rebase upstream/master 20 | git push -u origin -f 21 | ``` 22 | - Verify all Continuous Integration (CI) **checks are passing**. 23 | - Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ -Bruce Lee 24 | 25 | issue-message: | 26 | Hello @${{ github.actor }}, thank you for your interest in our work! Please visit our [Custom Training Tutorial](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) to get started, and see our [Jupyter Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab, [Docker Image](https://hub.docker.com/r/ultralytics/yolov5), and [Google Cloud Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) for example environments. 27 | 28 | If this is a bug report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you. 29 | 30 | If this is a custom model or data training question, please note Ultralytics does **not** provide free personal support. As a leader in vision ML and AI, we do offer professional consulting, from simple expert advice up to delivery of fully customized, end-to-end production solutions for our clients, such as: 31 | - **Cloud-based AI** systems operating on **hundreds of HD video streams in realtime.** 32 | - **Edge AI** integrated into custom iOS and Android apps for realtime **30 FPS video inference.** 33 | - **Custom data training**, hyperparameter evolution, and model exportation to any destination. 34 | 35 | For more information please visit https://www.ultralytics.com. 36 | -------------------------------------------------------------------------------- /.github/workflows/rebase.yml: -------------------------------------------------------------------------------- 1 | name: Automatic Rebase 2 | # https://github.com/marketplace/actions/automatic-rebase 3 | 4 | on: 5 | issue_comment: 6 | types: [created] 7 | 8 | jobs: 9 | rebase: 10 | name: Rebase 11 | if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout the latest code 15 | uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 18 | - name: Automatic Rebase 19 | uses: cirrus-actions/rebase@1.3.1 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close stale issues 2 | on: 3 | schedule: 4 | - cron: "0 0 * * *" 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v1 11 | with: 12 | repo-token: ${{ secrets.GITHUB_TOKEN }} 13 | stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 14 | stale-pr-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 15 | days-before-stale: 30 16 | days-before-close: 5 17 | exempt-issue-label: 'documentation,tutorial' 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Repo-specific GitIgnore ---------------------------------------------------------------------------------------------- 2 | *.jpg 3 | *.jpeg 4 | *.png 5 | *.bmp 6 | *.tif 7 | *.tiff 8 | *.heic 9 | *.JPG 10 | *.JPEG 11 | *.PNG 12 | *.BMP 13 | *.TIF 14 | *.TIFF 15 | *.HEIC 16 | *.mp4 17 | *.mov 18 | *.MOV 19 | *.avi 20 | *.data 21 | *.json 22 | 23 | *.cfg 24 | !cfg/yolov3*.cfg 25 | 26 | storage.googleapis.com 27 | runs/* 28 | data/* 29 | !data/samples/zidane.jpg 30 | !data/samples/bus.jpg 31 | !data/coco.names 32 | !data/coco_paper.names 33 | !data/coco.data 34 | !data/coco_*.data 35 | !data/coco_*.txt 36 | !data/trainvalno5k.shapes 37 | !data/*.sh 38 | 39 | pycocotools/* 40 | results*.txt 41 | gcp_test*.sh 42 | 43 | # MATLAB GitIgnore ----------------------------------------------------------------------------------------------------- 44 | *.m~ 45 | *.mat 46 | !targets*.mat 47 | 48 | # Neural Network weights ----------------------------------------------------------------------------------------------- 49 | *.weights 50 | *.pt 51 | *.onnx 52 | *.mlmodel 53 | *.torchscript 54 | darknet53.conv.74 55 | yolov3-tiny.conv.15 56 | 57 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 58 | # Byte-compiled / optimized / DLL files 59 | __pycache__/ 60 | *.py[cod] 61 | *$py.class 62 | 63 | # C extensions 64 | *.so 65 | 66 | # Distribution / packaging 67 | .Python 68 | env/ 69 | build/ 70 | develop-eggs/ 71 | dist/ 72 | downloads/ 73 | eggs/ 74 | .eggs/ 75 | lib/ 76 | lib64/ 77 | parts/ 78 | sdist/ 79 | var/ 80 | wheels/ 81 | *.egg-info/ 82 | .installed.cfg 83 | *.egg 84 | 85 | # PyInstaller 86 | # Usually these files are written by a python script from a template 87 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 88 | *.manifest 89 | *.spec 90 | 91 | # Installer logs 92 | pip-log.txt 93 | pip-delete-this-directory.txt 94 | 95 | # Unit test / coverage reports 96 | htmlcov/ 97 | .tox/ 98 | .coverage 99 | .coverage.* 100 | .cache 101 | nosetests.xml 102 | coverage.xml 103 | *.cover 104 | .hypothesis/ 105 | 106 | # Translations 107 | *.mo 108 | *.pot 109 | 110 | # Django stuff: 111 | *.log 112 | local_settings.py 113 | 114 | # Flask stuff: 115 | instance/ 116 | .webassets-cache 117 | 118 | # Scrapy stuff: 119 | .scrapy 120 | 121 | # Sphinx documentation 122 | docs/_build/ 123 | 124 | # PyBuilder 125 | target/ 126 | 127 | # Jupyter Notebook 128 | .ipynb_checkpoints 129 | 130 | # pyenv 131 | .python-version 132 | 133 | # celery beat schedule file 134 | celerybeat-schedule 135 | 136 | # SageMath parsed files 137 | *.sage.py 138 | 139 | # dotenv 140 | .env 141 | 142 | # virtualenv 143 | .venv 144 | venv/ 145 | ENV/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # mypy 158 | .mypy_cache/ 159 | 160 | 161 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 162 | 163 | # General 164 | .DS_Store 165 | .AppleDouble 166 | .LSOverride 167 | 168 | # Icon must end with two \r 169 | Icon 170 | Icon? 171 | 172 | # Thumbnails 173 | ._* 174 | 175 | # Files that might appear in the root of a volume 176 | .DocumentRevisions-V100 177 | .fseventsd 178 | .Spotlight-V100 179 | .TemporaryItems 180 | .Trashes 181 | .VolumeIcon.icns 182 | .com.apple.timemachine.donotpresent 183 | 184 | # Directories potentially created on remote AFP share 185 | .AppleDB 186 | .AppleDesktop 187 | Network Trash Folder 188 | Temporary Items 189 | .apdisk 190 | 191 | 192 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 193 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 194 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 195 | 196 | # User-specific stuff: 197 | .idea/* 198 | .idea/**/workspace.xml 199 | .idea/**/tasks.xml 200 | .idea/dictionaries 201 | .html # Bokeh Plots 202 | .pg # TensorFlow Frozen Graphs 203 | .avi # videos 204 | 205 | # Sensitive or high-churn files: 206 | .idea/**/dataSources/ 207 | .idea/**/dataSources.ids 208 | .idea/**/dataSources.local.xml 209 | .idea/**/sqlDataSources.xml 210 | .idea/**/dynamic.xml 211 | .idea/**/uiDesigner.xml 212 | 213 | # Gradle: 214 | .idea/**/gradle.xml 215 | .idea/**/libraries 216 | 217 | # CMake 218 | cmake-build-debug/ 219 | cmake-build-release/ 220 | 221 | # Mongo Explorer plugin: 222 | .idea/**/mongoSettings.xml 223 | 224 | ## File-based project format: 225 | *.iws 226 | 227 | ## Plugin-specific files: 228 | 229 | # IntelliJ 230 | out/ 231 | 232 | # mpeltonen/sbt-idea plugin 233 | .idea_modules/ 234 | 235 | # JIRA plugin 236 | atlassian-ide-plugin.xml 237 | 238 | # Cursive Clojure plugin 239 | .idea/replstate.xml 240 | 241 | # Crashlytics plugin (for Android Studio and IntelliJ) 242 | com_crashlytics_export_strings.xml 243 | crashlytics.properties 244 | crashlytics-build.properties 245 | fabric.properties 246 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 2 | FROM nvcr.io/nvidia/pytorch:20.03-py3 3 | 4 | # Install dependencies 5 | COPY requirements.txt . 6 | RUN pip install -r requirements.txt gsutil 7 | 8 | # Create working directory 9 | RUN mkdir -p /usr/src/app 10 | WORKDIR /usr/src/app 11 | 12 | # Copy contents 13 | COPY . /usr/src/app 14 | 15 | # Copy weights 16 | #RUN python3 -c "from models import *; \ 17 | #attempt_download('weights/yolov5s.pt'); \ 18 | #attempt_download('weights/yolov5m.pt'); \ 19 | #attempt_download('weights/yolov5l.pt')" 20 | 21 | 22 | # --------------------------------------------------- Extras Below --------------------------------------------------- 23 | 24 | # Build and Push 25 | # t=ultralytics/yolov5:latest && sudo docker build -t $t . && sudo docker push $t 26 | 27 | # Pull and Run 28 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host $t 29 | 30 | # Pull and Run with local directory access 31 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/coco:/usr/src/coco $t 32 | 33 | # Kill all 34 | # sudo docker kill "$(sudo docker ps -q)" 35 | 36 | # Kill all image-based 37 | # sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov5:latest) 38 | 39 | # Bash into running container 40 | # sudo docker container exec -it ba65811811ab bash 41 | 42 | # Bash into stopped container 43 | # sudo docker commit 092b16b25c5b usr/resume && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh usr/resume 44 | 45 | # Send weights to GCP 46 | # python -c "from utils.utils import *; strip_optimizer('runs/exp0/weights/last.pt', 'temp.pt')" && gsutil cp temp.pt gs://* 47 | 48 | # Clean up 49 | # docker system prune -a --volumes 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This repo is the a codebase of the Joint Detection and Embedding (JDE) model. JDE is a fast and high-performance multiple-object tracker that learns the object detection task and appearance embedding task simutaneously in a shared neural network. Due to the recent release of YOLOv5, we replace the detector in JDE with YOLOv5 and achieve high performance on MOT Benchmark. For some reasons, 3 | we can't release our stronger version, but we hope this repo will help researches/engineers to develop more practical real-time MOT systems. 4 | 5 | # Requirements 6 | Just follow the environmnet configuration of [YOLOv5](https://github.com/ultralytics/yolov5). 7 | 8 | # Dataset zoo 9 | Just follow the [DATASET_ZOO](https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/DATASET_ZOO.md) of JDE. 10 | 11 | 12 | ## Results on MOT16 Dataset 13 | | | MOTA | IDS |IDF1 | MOTP| FPS | Params(M) | 14 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 15 | | JDE(1088x608) |68.5 |1496 |66.8 | 0.221 |21 | 298 | 16 | | Ours(1088x608) |71.0 |695 | 73.2 | 0.166 | 56 | 35 | 17 | 18 | ## Results on MOT20 Dataset 19 | | | MOTA | IDS |IDF1 | MOTP| FPS | Params(M) | 20 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 21 | | JDE(1088x608) |49.1 |24507 |38.4 | 0.272 |14 | 298 | 22 | | Ours(1088x608) |55.3 |9190 | 47.5 | 0.287 | 24 | 35 | -------------------------------------------------------------------------------- /data/get_coco2017.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # COCO 2017 dataset http://cocodataset.org 3 | # Download command: bash yolov5/data/get_coco2017.sh 4 | # Train command: python train.py --data coco.yaml 5 | # Default dataset location is next to /yolov5: 6 | # /parent_folder 7 | # /coco 8 | # /yolov5 9 | 10 | 11 | # Download labels from Google Drive, accepting presented query 12 | filename="coco2017labels.zip" 13 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L" 14 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 15 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 16 | rm ./cookie 17 | 18 | # Unzip labels 19 | unzip -q ${filename} # for coco.zip 20 | # tar -xzf ${filename} # for coco.tar.gz 21 | rm ${filename} 22 | 23 | # Download and unzip images 24 | cd coco/images 25 | f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 19G, 118k images 26 | f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 1G, 5k images 27 | # f="test2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 7G, 41k images 28 | 29 | # cd out 30 | cd ../.. 31 | -------------------------------------------------------------------------------- /data/get_voc.sh: -------------------------------------------------------------------------------- 1 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ 2 | # Download command: bash ./data/get_voc.sh 3 | # Train command: python train.py --data voc.yaml 4 | # Default dataset location is next to /yolov5: 5 | # /parent_folder 6 | # /VOC 7 | # /yolov5 8 | 9 | 10 | start=`date +%s` 11 | 12 | # handle optional download dir 13 | if [ -z "$1" ] 14 | then 15 | # navigate to ~/tmp 16 | echo "navigating to ../tmp/ ..." 17 | mkdir -p ../tmp 18 | cd ../tmp/ 19 | else 20 | # check if is valid directory 21 | if [ ! -d $1 ]; then 22 | echo $1 "is not a valid directory" 23 | exit 0 24 | fi 25 | echo "navigating to" $1 "..." 26 | cd $1 27 | fi 28 | 29 | echo "Downloading VOC2007 trainval ..." 30 | # Download the data. 31 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 32 | echo "Downloading VOC2007 test data ..." 33 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 34 | echo "Done downloading." 35 | 36 | # Extract data 37 | echo "Extracting trainval ..." 38 | tar -xf VOCtrainval_06-Nov-2007.tar 39 | echo "Extracting test ..." 40 | tar -xf VOCtest_06-Nov-2007.tar 41 | echo "removing tars ..." 42 | rm VOCtrainval_06-Nov-2007.tar 43 | rm VOCtest_06-Nov-2007.tar 44 | 45 | end=`date +%s` 46 | runtime=$((end-start)) 47 | 48 | echo "Completed in" $runtime "seconds" 49 | 50 | start=`date +%s` 51 | 52 | # handle optional download dir 53 | if [ -z "$1" ] 54 | then 55 | # navigate to ~/tmp 56 | echo "navigating to ../tmp/ ..." 57 | mkdir -p ../tmp 58 | cd ../tmp/ 59 | else 60 | # check if is valid directory 61 | if [ ! -d $1 ]; then 62 | echo $1 "is not a valid directory" 63 | exit 0 64 | fi 65 | echo "navigating to" $1 "..." 66 | cd $1 67 | fi 68 | 69 | echo "Downloading VOC2012 trainval ..." 70 | # Download the data. 71 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 72 | echo "Done downloading." 73 | 74 | 75 | # Extract data 76 | echo "Extracting trainval ..." 77 | tar -xf VOCtrainval_11-May-2012.tar 78 | echo "removing tar ..." 79 | rm VOCtrainval_11-May-2012.tar 80 | 81 | end=`date +%s` 82 | runtime=$((end-start)) 83 | 84 | echo "Completed in" $runtime "seconds" 85 | 86 | cd ../tmp 87 | echo "Spliting dataset..." 88 | python3 - "$@" < train.txt 148 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt 149 | 150 | python3 - "$@" <= 1 85 | p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() 86 | else: 87 | p, s, im0 = path, '', im0s 88 | 89 | save_path = str(Path(out) / Path(p).name) 90 | txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') 91 | s += '%gx%g ' % img.shape[2:] # print string 92 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 93 | if det is not None and len(det): 94 | # Rescale boxes from img_size to im0 size 95 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 96 | 97 | # Print results 98 | for c in det[:, -1].unique(): 99 | n = (det[:, -1] == c).sum() # detections per class 100 | s += '%g %ss, ' % (n, names[int(c)]) # add to string 101 | 102 | # Write results 103 | for *xyxy, conf, cls in det: 104 | if save_txt: # Write to file 105 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 106 | with open(txt_path + '.txt', 'a') as f: 107 | f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format 108 | 109 | if save_img or view_img: # Add bbox to image 110 | label = '%s %.2f' % (names[int(cls)], conf) 111 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 112 | 113 | # Print time (inference + NMS) 114 | print('%sDone. (%.3fs)' % (s, t2 - t1)) 115 | 116 | # Stream results 117 | if view_img: 118 | cv2.imshow(p, im0) 119 | if cv2.waitKey(1) == ord('q'): # q to quit 120 | raise StopIteration 121 | 122 | # Save results (image with detections) 123 | if save_img: 124 | if dataset.mode == 'images': 125 | cv2.imwrite(save_path, im0) 126 | else: 127 | if vid_path != save_path: # new video 128 | vid_path = save_path 129 | if isinstance(vid_writer, cv2.VideoWriter): 130 | vid_writer.release() # release previous video writer 131 | 132 | fourcc = 'mp4v' # output video codec 133 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 134 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 135 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 136 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) 137 | vid_writer.write(im0) 138 | 139 | if save_txt or save_img: 140 | print('Results saved to %s' % Path(out)) 141 | if platform == 'darwin' and not opt.update: # MacOS 142 | os.system('open ' + save_path) 143 | 144 | print('Done. (%.3fs)' % (time.time() - t0)) 145 | 146 | 147 | if __name__ == '__main__': 148 | parser = argparse.ArgumentParser() 149 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') 150 | parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam 151 | parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder 152 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 153 | parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold') 154 | parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS') 155 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 156 | parser.add_argument('--view-img', action='store_true', help='display results') 157 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 158 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') 159 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 160 | parser.add_argument('--augment', action='store_true', help='augmented inference') 161 | parser.add_argument('--update', action='store_true', help='update all models') 162 | opt = parser.parse_args() 163 | print(opt) 164 | 165 | with torch.no_grad(): 166 | if opt.update: # update all models (to fix SourceChangeWarning) 167 | for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: 168 | detect() 169 | strip_optimizer(opt.weights) 170 | else: 171 | detect() 172 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | dependencies = ['torch', 'yaml'] 9 | import os 10 | 11 | import torch 12 | 13 | from models.yolo import Model 14 | from utils.google_utils import attempt_download 15 | 16 | 17 | def create(name, pretrained, channels, classes): 18 | """Creates a specified YOLOv5 model 19 | 20 | Arguments: 21 | name (str): name of model, i.e. 'yolov5s' 22 | pretrained (bool): load pretrained weights into the model 23 | channels (int): number of input channels 24 | classes (int): number of model classes 25 | 26 | Returns: 27 | pytorch model 28 | """ 29 | config = os.path.join(os.path.dirname(__file__), 'models', '%s.yaml' % name) # model.yaml path 30 | try: 31 | model = Model(config, channels, classes) 32 | if pretrained: 33 | ckpt = '%s.pt' % name # checkpoint filename 34 | attempt_download(ckpt) # download if not found locally 35 | state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict() # to FP32 36 | state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter 37 | model.load_state_dict(state_dict, strict=False) # load 38 | return model 39 | 40 | except Exception as e: 41 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 42 | s = 'Cache maybe be out of date, deleting cache and retrying may solve this. See %s for help.' % help_url 43 | raise Exception(s) from e 44 | 45 | 46 | def yolov5s(pretrained=False, channels=3, classes=80): 47 | """YOLOv5-small model from https://github.com/ultralytics/yolov5 48 | 49 | Arguments: 50 | pretrained (bool): load pretrained weights into the model, default=False 51 | channels (int): number of input channels, default=3 52 | classes (int): number of model classes, default=80 53 | 54 | Returns: 55 | pytorch model 56 | """ 57 | return create('yolov5s', pretrained, channels, classes) 58 | 59 | 60 | def yolov5m(pretrained=False, channels=3, classes=80): 61 | """YOLOv5-medium model from https://github.com/ultralytics/yolov5 62 | 63 | Arguments: 64 | pretrained (bool): load pretrained weights into the model, default=False 65 | channels (int): number of input channels, default=3 66 | classes (int): number of model classes, default=80 67 | 68 | Returns: 69 | pytorch model 70 | """ 71 | return create('yolov5m', pretrained, channels, classes) 72 | 73 | 74 | def yolov5l(pretrained=False, channels=3, classes=80): 75 | """YOLOv5-large model from https://github.com/ultralytics/yolov5 76 | 77 | Arguments: 78 | pretrained (bool): load pretrained weights into the model, default=False 79 | channels (int): number of input channels, default=3 80 | classes (int): number of model classes, default=80 81 | 82 | Returns: 83 | pytorch model 84 | """ 85 | return create('yolov5l', pretrained, channels, classes) 86 | 87 | 88 | def yolov5x(pretrained=False, channels=3, classes=80): 89 | """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5 90 | 91 | Arguments: 92 | pretrained (bool): load pretrained weights into the model, default=False 93 | channels (int): number of input channels, default=3 94 | classes (int): number of model classes, default=80 95 | 96 | Returns: 97 | pytorch model 98 | """ 99 | return create('yolov5x', pretrained, channels, classes) 100 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaobin1231/YOLOv5_JDE/ea3149f280a2234a4bf840c7da8f5d0c77abd59d/models/__init__.py -------------------------------------------------------------------------------- /models/common.py: -------------------------------------------------------------------------------- 1 | # This file contains modules common to various models 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | def autopad(k, p=None): # kernel, padding 9 | # Pad to 'same' 10 | if p is None: 11 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 12 | return p 13 | 14 | 15 | def DWConv(c1, c2, k=1, s=1, act=True): 16 | # Depthwise convolution 17 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 18 | 19 | 20 | class Conv(nn.Module): 21 | # Standard convolution 22 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 23 | super(Conv, self).__init__() 24 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 25 | self.bn = nn.BatchNorm2d(c2) 26 | self.act = nn.Hardswish() if act else nn.Identity() 27 | 28 | def forward(self, x): 29 | return self.act(self.bn(self.conv(x))) 30 | 31 | def fuseforward(self, x): 32 | return self.act(self.conv(x)) 33 | 34 | 35 | class Bottleneck(nn.Module): 36 | # Standard bottleneck 37 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 38 | super(Bottleneck, self).__init__() 39 | c_ = int(c2 * e) # hidden channels 40 | self.cv1 = Conv(c1, c_, 1, 1) 41 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 42 | self.add = shortcut and c1 == c2 43 | 44 | def forward(self, x): 45 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 46 | 47 | 48 | class BottleneckCSP(nn.Module): 49 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 50 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 51 | super(BottleneckCSP, self).__init__() 52 | c_ = int(c2 * e) # hidden channels 53 | self.cv1 = Conv(c1, c_, 1, 1) 54 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 55 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 56 | self.cv4 = Conv(2 * c_, c2, 1, 1) 57 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 58 | self.act = nn.LeakyReLU(0.1, inplace=True) 59 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 60 | 61 | def forward(self, x): 62 | y1 = self.cv3(self.m(self.cv1(x))) 63 | y2 = self.cv2(x) 64 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 65 | 66 | 67 | class SPP(nn.Module): 68 | # Spatial pyramid pooling layer used in YOLOv3-SPP 69 | def __init__(self, c1, c2, k=(5, 9, 13)): 70 | super(SPP, self).__init__() 71 | c_ = c1 // 2 # hidden channels 72 | self.cv1 = Conv(c1, c_, 1, 1) 73 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 74 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 75 | 76 | def forward(self, x): 77 | x = self.cv1(x) 78 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 79 | 80 | 81 | class Focus(nn.Module): 82 | # Focus wh information into c-space 83 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 84 | super(Focus, self).__init__() 85 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 86 | 87 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 88 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 89 | 90 | 91 | class Concat(nn.Module): 92 | # Concatenate a list of tensors along dimension 93 | def __init__(self, dimension=1): 94 | super(Concat, self).__init__() 95 | self.d = dimension 96 | 97 | def forward(self, x): 98 | return torch.cat(x, self.d) 99 | 100 | 101 | class Flatten(nn.Module): 102 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 103 | @staticmethod 104 | def forward(x): 105 | return x.view(x.size(0), -1) 106 | 107 | 108 | class Classify(nn.Module): 109 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 110 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 111 | super(Classify, self).__init__() 112 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 113 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1) 114 | self.flat = Flatten() 115 | 116 | def forward(self, x): 117 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 118 | return self.flat(self.conv(z)) # flatten to x(b,c2) 119 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class C3(nn.Module): 26 | # Cross Convolution CSP 27 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 28 | super(C3, self).__init__() 29 | c_ = int(c2 * e) # hidden channels 30 | self.cv1 = Conv(c1, c_, 1, 1) 31 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 32 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 33 | self.cv4 = Conv(2 * c_, c2, 1, 1) 34 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 35 | self.act = nn.LeakyReLU(0.1, inplace=True) 36 | self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 37 | 38 | def forward(self, x): 39 | y1 = self.cv3(self.m(self.cv1(x))) 40 | y2 = self.cv2(x) 41 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 42 | 43 | 44 | class Sum(nn.Module): 45 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 46 | def __init__(self, n, weight=False): # n: number of inputs 47 | super(Sum, self).__init__() 48 | self.weight = weight # apply weights boolean 49 | self.iter = range(n - 1) # iter object 50 | if weight: 51 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 52 | 53 | def forward(self, x): 54 | y = x[0] # no weight 55 | if self.weight: 56 | w = torch.sigmoid(self.w) * 2 57 | for i in self.iter: 58 | y = y + x[i + 1] * w[i] 59 | else: 60 | for i in self.iter: 61 | y = y + x[i + 1] 62 | return y 63 | 64 | 65 | class GhostConv(nn.Module): 66 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 67 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 68 | super(GhostConv, self).__init__() 69 | c_ = c2 // 2 # hidden channels 70 | self.cv1 = Conv(c1, c_, k, s, g, act) 71 | self.cv2 = Conv(c_, c_, 5, 1, c_, act) 72 | 73 | def forward(self, x): 74 | y = self.cv1(x) 75 | return torch.cat([y, self.cv2(y)], 1) 76 | 77 | 78 | class GhostBottleneck(nn.Module): 79 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 80 | def __init__(self, c1, c2, k, s): 81 | super(GhostBottleneck, self).__init__() 82 | c_ = c2 // 2 83 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 84 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 85 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 86 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 87 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 88 | 89 | def forward(self, x): 90 | return self.conv(x) + self.shortcut(x) 91 | 92 | 93 | class MixConv2d(nn.Module): 94 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 95 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 96 | super(MixConv2d, self).__init__() 97 | groups = len(k) 98 | if equal_ch: # equal c_ per group 99 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 100 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 101 | else: # equal weight.numel() per group 102 | b = [c2] + [0] * groups 103 | a = np.eye(groups + 1, groups, k=-1) 104 | a -= np.roll(a, 1, axis=1) 105 | a *= np.array(k) ** 2 106 | a[0] = 1 107 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 108 | 109 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 110 | self.bn = nn.BatchNorm2d(c2) 111 | self.act = nn.LeakyReLU(0.1, inplace=True) 112 | 113 | def forward(self, x): 114 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 115 | 116 | 117 | class Ensemble(nn.ModuleList): 118 | # Ensemble of models 119 | def __init__(self): 120 | super(Ensemble, self).__init__() 121 | 122 | def forward(self, x, augment=False): 123 | y = [] 124 | for module in self: 125 | y.append(module(x, augment)[0]) 126 | # y = torch.stack(y).max(0)[0] # max ensemble 127 | # y = torch.cat(y, 1) # nms ensemble 128 | y = torch.stack(y).mean(0) # mean ensemble 129 | return y, None # inference, train output 130 | 131 | 132 | def attempt_load(weights, map_location=None): 133 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 134 | model = Ensemble() 135 | for w in weights if isinstance(weights, list) else [weights]: 136 | attempt_download(w) 137 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 138 | 139 | if len(model) == 1: 140 | return model[-1] # return model 141 | else: 142 | print('Ensemble created with %s\n' % weights) 143 | for k in ['names', 'stride']: 144 | setattr(model, k, getattr(model[-1], k)) 145 | return model # return ensemble 146 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | 9 | import torch 10 | 11 | from utils.google_utils import attempt_download 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') 16 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') 17 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 18 | opt = parser.parse_args() 19 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 20 | print(opt) 21 | 22 | # Input 23 | img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection 24 | 25 | # Load PyTorch model 26 | attempt_download(opt.weights) 27 | model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float() 28 | model.eval() 29 | model.model[-1].export = True # set Detect() layer export=True 30 | y = model(img) # dry run 31 | 32 | # TorchScript export 33 | try: 34 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 35 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 36 | ts = torch.jit.trace(model, img) 37 | ts.save(f) 38 | print('TorchScript export success, saved as %s' % f) 39 | except Exception as e: 40 | print('TorchScript export failure: %s' % e) 41 | 42 | # ONNX export 43 | try: 44 | import onnx 45 | 46 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 47 | f = opt.weights.replace('.pt', '.onnx') # filename 48 | model.fuse() # only for ONNX 49 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 50 | output_names=['classes', 'boxes'] if y is None else ['output']) 51 | 52 | # Checks 53 | onnx_model = onnx.load(f) # load onnx model 54 | onnx.checker.check_model(onnx_model) # check onnx model 55 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 56 | print('ONNX export success, saved as %s' % f) 57 | except Exception as e: 58 | print('ONNX export failure: %s' % e) 59 | 60 | # CoreML export 61 | try: 62 | import coremltools as ct 63 | 64 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 65 | # convert model from torchscript and apply pixel scaling as per detect.py 66 | model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 67 | f = opt.weights.replace('.pt', '.mlmodel') # filename 68 | model.save(f) 69 | print('CoreML export success, saved as %s' % f) 70 | except Exception as e: 71 | print('CoreML export failure: %s' % e) 72 | 73 | # Finish 74 | print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') 75 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [116,90, 156,198, 373,326] # P5/32 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [10,13, 16,30, 33,23] # P3/8 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | from copy import deepcopy 4 | from pathlib import Path 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat 11 | from models.experimental import MixConv2d, CrossConv, C3 12 | from utils.general import check_anchor_order, make_divisible, check_file 13 | from utils.torch_utils import ( 14 | time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, select_device) 15 | 16 | 17 | class Detect(nn.Module): 18 | def __init__(self, nc=80, anchors=(), ch=(), emb_dim=256): # detection layer 19 | super(Detect, self).__init__() 20 | self.stride = None # strides computed during build 21 | self.nc = nc # number of classes 22 | self.no = nc + 5 # number of outputs per anchor 23 | self.emb_dim = emb_dim # number of reid predictions dims 24 | self.nl = len(anchors) # number of detection layers 25 | self.na = len(anchors[0]) // 2 # number of anchors 26 | self.grid = [torch.zeros(1)] * self.nl # init grid 27 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 28 | self.register_buffer('anchors', a) # shape(nl,na,2) 29 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 30 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na + self.emb_dim, 1) for x in ch) # output conv 31 | self.export = False # onnx export 32 | 33 | def forward(self, x): 34 | # x = x.copy() # for profiling 35 | z, p, p_emb = [], [], [] # inference output 36 | self.training |= self.export 37 | for i in range(self.nl): 38 | x[i] = self.m[i](x[i]) # conv 39 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 40 | p.append(x[i][:, :self.na * self.no].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()) 41 | p_emb.append(x[i][:, self.na * self.no:].permute(0, 2, 3, 1).contiguous()) 42 | 43 | if not self.training: # inference 44 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 45 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 46 | 47 | y = p[i].sigmoid() 48 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 49 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 50 | embedding_pred = F.normalize(p_emb[i].unsqueeze(1).repeat(1, self.na, 1, 1, 1).contiguous(), dim=-1) 51 | fusion_res = torch.cat([y, embedding_pred], dim=-1) 52 | z.append(fusion_res.view(bs, -1, self.no + self.emb_dim)) 53 | 54 | return (p, p_emb) if self.training else (torch.cat(z, 1), p, p_emb) 55 | 56 | @staticmethod 57 | def _make_grid(nx=20, ny=20): 58 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 59 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 60 | 61 | 62 | class Model(nn.Module): 63 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 64 | super(Model, self).__init__() 65 | if isinstance(cfg, dict): 66 | self.yaml = cfg # model dict 67 | else: # is *.yaml 68 | import yaml # for torch hub 69 | self.yaml_file = Path(cfg).name 70 | with open(cfg) as f: 71 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 72 | 73 | # Define model 74 | if nc and nc != self.yaml['nc']: 75 | print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc)) 76 | self.yaml['nc'] = nc # override yaml value 77 | 78 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out 79 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 80 | 81 | # Build strides, anchors 82 | m = self.model[-1] # Detect() 83 | if isinstance(m, Detect): 84 | s = 128 # 2x min stride 85 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]]) # forward 86 | m.anchors /= m.stride.view(-1, 1, 1) 87 | check_anchor_order(m) 88 | self.stride = m.stride 89 | self._initialize_biases() # only run once 90 | 91 | self.emb_dim, self.nID = self.yaml['emb_dim'], self.yaml['nID'] 92 | self.classifier = nn.Linear(self.emb_dim, self.nID) if self.nID > 0 else None 93 | 94 | # Init weights, biases 95 | initialize_weights(self) 96 | self.info() 97 | print('') 98 | 99 | def forward(self, x, augment=False, profile=False): 100 | if augment: 101 | img_size = x.shape[-2:] # height, width 102 | s = [1, 0.83, 0.67] # scales 103 | f = [None, 3, None] # flips (2-ud, 3-lr) 104 | y = [] # outputs 105 | for si, fi in zip(s, f): 106 | xi = scale_img(x.flip(fi) if fi else x, si) 107 | yi = self.forward_once(xi)[0] # forward 108 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 109 | yi[..., :4] /= si # de-scale 110 | if fi == 2: 111 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 112 | elif fi == 3: 113 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 114 | y.append(yi) 115 | return torch.cat(y, 1), None # augmented inference, train 116 | else: 117 | return self.forward_once(x, profile) # single-scale inference, train 118 | 119 | def forward_once(self, x, profile=False): 120 | y, dt = [], [] # outputs 121 | for m in self.model: 122 | if m.f != -1: # if not from previous layer 123 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 124 | 125 | if profile: 126 | try: 127 | import thop 128 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # FLOPS 129 | except: 130 | o = 0 131 | t = time_synchronized() 132 | for _ in range(10): 133 | _ = m(x) 134 | dt.append((time_synchronized() - t) * 100) 135 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 136 | 137 | x = m(x) # run 138 | y.append(x if m.i in self.save else None) # save output 139 | 140 | if profile: 141 | print('%.1fms total' % sum(dt)) 142 | return x 143 | 144 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 145 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 146 | m = self.model[-1] # Detect() module 147 | for mi, s in zip(m.m, m.stride): # from 148 | t_dim = mi.bias.size()[0] 149 | b = mi.bias[:t_dim-m.emb_dim].view(m.na, -1) # conv.bias(255) to (3,85) 150 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 151 | b[:, 5:m.no] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 152 | mi.bias = torch.nn.Parameter(torch.cat([b.view(-1), mi.bias[t_dim-m.emb_dim:]]), requires_grad=True) 153 | 154 | def _print_biases(self): 155 | m = self.model[-1] # Detect() module 156 | for mi in m.m: # from 157 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 158 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 159 | 160 | 161 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 162 | print('Fusing layers... ', end='') 163 | for m in self.model.modules(): 164 | if type(m) is Conv: 165 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability 166 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 167 | m.bn = None # remove batchnorm 168 | m.forward = m.fuseforward # update forward 169 | self.info() 170 | return self 171 | 172 | def info(self): # print model information 173 | model_info(self) 174 | 175 | 176 | def parse_model(d, ch): # model_dict, input_channels(3) 177 | print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 178 | anchors, nc, gd, gw, emb_dim = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d['emb_dim'] 179 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 180 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 181 | 182 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 183 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 184 | m = eval(m) if isinstance(m, str) else m # eval strings 185 | for j, a in enumerate(args): 186 | try: 187 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 188 | except: 189 | pass 190 | 191 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 192 | if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 193 | c1, c2 = ch[f], args[0] 194 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 195 | 196 | args = [c1, c2, *args[1:]] 197 | if m in [BottleneckCSP, C3]: 198 | args.insert(2, n) 199 | n = 1 200 | elif m is nn.BatchNorm2d: 201 | args = [ch[f]] 202 | elif m is Concat: 203 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 204 | elif m is Detect: 205 | args.append([ch[x + 1] for x in f]) 206 | args.append(emb_dim) 207 | if isinstance(args[1], int): # number of anchors 208 | args[1] = [list(range(args[1] * 2))] * len(f) 209 | else: 210 | c2 = ch[f] 211 | 212 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 213 | t = str(m)[8:-2].replace('__main__.', '') # module type 214 | np = sum([x.numel() for x in m_.parameters()]) # number params 215 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 216 | print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 217 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 218 | layers.append(m_) 219 | ch.append(c2) 220 | return nn.Sequential(*layers), sorted(save) 221 | 222 | 223 | if __name__ == '__main__': 224 | parser = argparse.ArgumentParser() 225 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 226 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 227 | opt = parser.parse_args() 228 | opt.cfg = check_file(opt.cfg) # check file 229 | device = select_device(opt.device) 230 | 231 | # Create model 232 | model = Model(opt.cfg).to(device) 233 | model.train() -------------------------------------------------------------------------------- /models/yolov5_JDE.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | emb_dim: 256 # reid dim 7 | nID: 14455 # total IDs of objects 8 | 9 | 10 | # anchors 11 | anchors: 12 | - [8,24, 11,34, 16,48, 23,68] # P3/8 13 | - [32,96, 45,135, 64,192, 90,271] # P4/16 14 | - [128,384, 180,540, 256,640, 512,640] # P5/32 15 | 16 | 17 | # YOLOv5 backbone 18 | backbone: 19 | # [from, number, module, args] 20 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 21 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 22 | [-1, 3, BottleneckCSP, [128]], 23 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 24 | [-1, 9, BottleneckCSP, [256]], 25 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 26 | [-1, 9, BottleneckCSP, [512]], 27 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 28 | [-1, 1, SPP, [1024, [5, 9, 13]]], 29 | [-1, 3, BottleneckCSP, [1024, False]], # 9 30 | ] 31 | 32 | # YOLOv5 head 33 | head: 34 | [[-1, 1, Conv, [512, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 37 | [-1, 3, BottleneckCSP, [512, False]], # 13 38 | 39 | [-1, 1, Conv, [256, 1, 1]], 40 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 41 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 42 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 43 | 44 | [-1, 1, Conv, [256, 3, 2]], 45 | [[-1, 14], 1, Concat, [1]], # cat head P4 46 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 47 | 48 | [-1, 1, Conv, [512, 3, 2]], 49 | [[-1, 10], 1, Concat, [1]], # cat head P5 50 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 51 | 52 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 53 | ] -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | Cython 3 | matplotlib>=3.2.2 4 | numpy>=1.18.5 5 | opencv-python>=4.1.2 6 | pillow 7 | # pycocotools>=2.0 8 | PyYAML>=5.3 9 | scipy>=1.4.1 10 | tensorboard>=2.2 11 | torch>=1.6.0 12 | torchvision>=0.7.0 13 | tqdm>=4.41.0 14 | 15 | # Conda commands (in place of pip) --------------------------------------------- 16 | # conda update -yn base -c defaults conda 17 | # conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython 18 | # conda install -yc conda-forge scikit-image pycocotools tensorboard 19 | # conda install -yc spyder-ide spyder-line-profiler 20 | # conda install -yc pytorch pytorch torchvision 21 | # conda install -yc conda-forge protobuf numpy && pip install onnx==1.6.0 # https://github.com/onnx/onnx#linux-and-macos 22 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import json 4 | import os 5 | import shutil 6 | from pathlib import Path 7 | import cv2 8 | from utils.datasets import letterbox 9 | 10 | import numpy as np 11 | import torch 12 | import yaml 13 | from tqdm import tqdm 14 | 15 | from models.experimental import attempt_load 16 | from utils.datasets import create_dataloader 17 | from utils.general import ( 18 | coco80_to_coco91_class, check_file, check_img_size, compute_loss, non_max_suppression, 19 | scale_coords, xyxy2xywh, clip_coords, plot_images, plot_test_images, xywh2xyxy, box_iou, output_to_target, ap_per_class) 20 | from utils.torch_utils import select_device, time_synchronized 21 | 22 | 23 | def test(data, 24 | weights=None, 25 | batch_size=16, 26 | imgsz=640, 27 | conf_thres=0.3, 28 | iou_thres=0.5, # for NMS 29 | save_json=False, 30 | single_cls=False, 31 | augment=False, 32 | verbose=False, 33 | model=None, 34 | dataloader=None, 35 | save_dir='', 36 | merge=False, 37 | emb_dim=256, 38 | save_txt=False): 39 | # Initialize/load model and set device 40 | training = model is not None 41 | if training: # called by train.py 42 | device = next(model.parameters()).device # get model device 43 | 44 | else: # called directly 45 | device = select_device(opt.device, batch_size=batch_size) 46 | merge, save_txt = opt.merge, opt.save_txt # use Merge NMS, save *.txt labels 47 | if save_txt: 48 | out = Path('inference/output') 49 | if os.path.exists(out): 50 | shutil.rmtree(out) # delete output folder 51 | os.makedirs(out) # make new output folder 52 | 53 | # Remove previous 54 | for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')): 55 | os.remove(f) 56 | 57 | # Load model 58 | model = attempt_load(weights, map_location=device) # load FP32 model 59 | imgsz = [check_img_size(x, model.stride.max()) for x in imgsz] 60 | 61 | # Half 62 | half = device.type != 'cpu' # half precision only supported on CUDA 63 | if half: 64 | model.half() 65 | 66 | # Configure 67 | model.eval() 68 | with open(data) as f: 69 | data = yaml.load(f, Loader=yaml.FullLoader) # model dict 70 | nc = 1 if single_cls else int(data['nc']) # number of classes 71 | iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 72 | niou = iouv.numel() 73 | 74 | # Dataloader 75 | if not training: 76 | if len(imgsz) == 1: 77 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 78 | else: 79 | img = torch.zeros((1, 3, imgsz[1], imgsz[0]), device=device) 80 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 81 | root = data['root'] 82 | path = data['test'] if opt.task == 'test' else data['test_emb'] # path to val/test images 83 | dataloader = create_dataloader(root, path, imgsz, batch_size, model.stride.max(), opt, 84 | hyp=None, augment=False, cache=False, pad=0.5, rect=False)[0] 85 | 86 | seen = 0 87 | names = model.names if hasattr(model, 'names') else model.module.names 88 | s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') 89 | p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. 90 | jdict, stats, ap, ap_class = [], [], [], [] 91 | loss = torch.zeros(4, device=device) 92 | for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 93 | img = img.to(device, non_blocking=True) 94 | img = img.half() if half else img.float() # uint8 to fp16/32 95 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 96 | targets = targets.to(device) 97 | nb, _, height, width = img.shape # batch size, channels, height, width 98 | whwh = torch.Tensor([width, height, width, height]).to(device) 99 | 100 | # Disable gradients 101 | with torch.no_grad(): 102 | # Run model 103 | t = time_synchronized() 104 | inf_out, train_out_p, train_out_pemb = model(img, augment=augment) # inference and training outputs 105 | t0 += time_synchronized() - t 106 | 107 | # Compute loss 108 | if training: # if model has loss hyperparameters 109 | loss += compute_loss([x.float() for x in train_out_p], [x.float() for x in train_out_pemb], 110 | targets, model)[1][:4] # GIoU, obj, cls, lid 111 | 112 | # Run NMS 113 | t = time_synchronized() 114 | output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, 115 | merge=merge, emb_dim=emb_dim) 116 | t1 += time_synchronized() - t 117 | 118 | ''' 119 | images = letterbox(cv2.imread(paths[1]), [608,1088], auto=False, scaleup=False)[0] 120 | d = output[1] 121 | if d is None: 122 | continue 123 | for i in range(len(d)): 124 | cv2.rectangle(images, (int(d[i][0]), int(d[i][1])), (int(d[i][2]), int(d[i][3])), (0, 0, 255), 2) 125 | cv2.imshow("image", images) 126 | cv2.waitKey(0) 127 | ''' 128 | 129 | # Statistics per image 130 | for si, pred in enumerate(output): 131 | labels = targets[targets[:, 0] == si, 1:] 132 | nl = len(labels) 133 | tcls = labels[:, 0].tolist() if nl else [] # target class 134 | seen += 1 135 | 136 | if pred is None: 137 | if nl: 138 | stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) 139 | continue 140 | 141 | 142 | # Clip boxes to image bounds 143 | clip_coords(pred, (height, width)) 144 | 145 | # Assign all predictions as incorrect 146 | correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) 147 | if nl: 148 | detected = [] # target indices 149 | tcls_tensor = labels[:, 0] 150 | 151 | # target boxes 152 | tbox = xywh2xyxy(labels[:, 2:6]) * whwh 153 | 154 | # Per target class 155 | for cls in torch.unique(tcls_tensor): 156 | ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices 157 | pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices 158 | 159 | # Search for detections 160 | if pi.shape[0]: 161 | # Prediction to target ious 162 | ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices 163 | 164 | # Append detections 165 | for j in (ious > iouv[0]).nonzero(as_tuple=False): 166 | d = ti[i[j]] # detected target 167 | if d not in detected: 168 | detected.append(d) 169 | correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn 170 | if len(detected) == nl: # all targets already located in image 171 | break 172 | 173 | # Append statistics (correct, conf, pcls, tcls) 174 | stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) 175 | 176 | # Plot images 177 | if batch_i < 1: 178 | f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename 179 | plot_images(img, targets, paths, str(f), names) # ground truth 180 | f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) 181 | plot_test_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions 182 | 183 | # Compute statistics 184 | stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy 185 | if len(stats) and stats[0].any(): 186 | p, r, ap, f1, ap_class = ap_per_class(*stats) 187 | p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] 188 | mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() 189 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 190 | else: 191 | nt = torch.zeros(1) 192 | 193 | # Print results 194 | pf = '%20s' + '%12.3g' * 6 # print format 195 | print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) 196 | 197 | # Print results per class 198 | if verbose and nc > 1 and len(stats): 199 | for i, c in enumerate(ap_class): 200 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) 201 | 202 | # Print speeds 203 | t = tuple(x / seen * 1E3 for x in (t0, t1, t0+t1)) + (imgsz[0], imgsz[1], batch_size) 204 | if not training: 205 | print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) 206 | 207 | # Return results 208 | model.float() # for training 209 | maps = np.zeros(nc) + map 210 | for i, c in enumerate(ap_class): 211 | maps[c] = ap[i] 212 | return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t 213 | 214 | 215 | if __name__ == '__main__': 216 | parser = argparse.ArgumentParser(prog='test.py') 217 | parser.add_argument('--weights', nargs='+', type=str, default='runs/last.pt', help='model.pt path(s)') 218 | parser.add_argument('--data', type=str, default='data/mot.yaml', help='*.data path') 219 | parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') 220 | parser.add_argument('--img-size', type=int, default=[1088,608], help='inference size (pixels)') 221 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') 222 | parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS') 223 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 224 | parser.add_argument('--task', default='test', help="'val', 'test', 'study'") 225 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 226 | parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') 227 | parser.add_argument('--augment', action='store_true', help='augmented inference') 228 | parser.add_argument('--merge', action='store_true', help='use Merge NMS') 229 | parser.add_argument('--verbose', action='store_true', help='report mAP by class') 230 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 231 | parser.add_argument('--emb_dim', action='store_true', help='dim of reid prediction', default=256) 232 | opt = parser.parse_args() 233 | opt.save_json |= opt.data.endswith('coco.yaml') 234 | opt.data = check_file(opt.data) # check file 235 | print(opt) 236 | 237 | if opt.task in ['val', 'test']: # run normally 238 | test(opt.data, 239 | opt.weights, 240 | opt.batch_size, 241 | opt.img_size, 242 | opt.conf_thres, 243 | opt.iou_thres, 244 | opt.save_json, 245 | opt.single_cls, 246 | opt.augment, 247 | opt.verbose, 248 | emb_dim=opt.emb_dim) 249 | 250 | elif opt.task == 'study': # run over a range of settings and save/plot 251 | for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']: 252 | f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to 253 | x = list(range(352, 832, 64)) # x axis 254 | y = [] # y axis 255 | for i in x: # img-size 256 | print('\nRunning %s point %s...' % (f, i)) 257 | r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json) 258 | y.append(r + t) # results and times 259 | np.savetxt(f, y, fmt='%10.4g') # save 260 | os.system('zip -r study.zip study_*.txt') 261 | # plot_study_txt(f, x) # plot 262 | -------------------------------------------------------------------------------- /track.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import cv2 4 | import logging 5 | import argparse 6 | import motmetrics as mm 7 | 8 | import torch 9 | from tracker.multitracker import JDETracker 10 | from tracker_utils import visualization as vis 11 | from tracker_utils.log import logger 12 | from tracker_utils.timer import Timer 13 | from tracker_utils.evaluation import Evaluator 14 | # from tracker_utils.parse_config import parse_model_cfg 15 | import tracker_utils.datasets as datasets 16 | from tracker_utils.utils import * 17 | 18 | 19 | def write_results(filename, results, data_type): 20 | if data_type == 'mot': 21 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 22 | elif data_type == 'kitti': 23 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 24 | else: 25 | raise ValueError(data_type) 26 | 27 | with open(filename, 'w') as f: 28 | for frame_id, tlwhs, track_ids in results: 29 | if data_type == 'kitti': 30 | frame_id -= 1 31 | for tlwh, track_id in zip(tlwhs, track_ids): 32 | if track_id < 0: 33 | continue 34 | x1, y1, w, h = tlwh 35 | x2, y2 = x1 + w, y1 + h 36 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 37 | f.write(line) 38 | logger.info('save results to {}'.format(filename)) 39 | 40 | 41 | def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): 42 | if save_dir: 43 | mkdir_if_missing(save_dir) 44 | tracker = JDETracker(opt, frame_rate=frame_rate) 45 | timer = Timer() 46 | results = [] 47 | frame_id = 0 48 | for path, img, img0 in dataloader: 49 | if frame_id % 20 == 0: 50 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1./max(1e-5, timer.average_time))) 51 | 52 | # run tracking 53 | timer.tic() 54 | blob = torch.from_numpy(img).cuda().unsqueeze(0).half() 55 | online_targets = tracker.update(blob, img0, path) 56 | online_tlwhs = [] 57 | online_ids = [] 58 | for t in online_targets: 59 | tlwh = t.tlwh 60 | tid = t.track_id 61 | vertical = tlwh[2] / tlwh[3] > 1.6 62 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 63 | online_tlwhs.append(tlwh) 64 | online_ids.append(tid) 65 | timer.toc() 66 | # save results 67 | results.append((frame_id + 1, online_tlwhs, online_ids)) 68 | if show_image or save_dir is not None: 69 | online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id, 70 | fps=1. / timer.average_time) 71 | if show_image: 72 | cv2.imshow('online_im', online_im) 73 | if save_dir is not None: 74 | cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im) 75 | frame_id += 1 76 | # save results 77 | write_results(result_filename, results, data_type) 78 | return frame_id, timer.average_time, timer.calls 79 | 80 | 81 | 82 | def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo', 83 | save_images=False, save_videos=False, show_image=True): 84 | logger.setLevel(logging.INFO) 85 | result_root = os.path.join(data_root, '..', 'results', exp_name) 86 | mkdir_if_missing(result_root) 87 | data_type = 'mot' 88 | 89 | # run tracking 90 | accs = [] 91 | n_frame = 0 92 | timer_avgs, timer_calls = [], [] 93 | for seq in seqs: 94 | output_dir = os.path.join(data_root, '..','outputs', exp_name, seq) if save_images or save_videos else None 95 | 96 | logger.info('start seq: {}'.format(seq)) 97 | dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size) 98 | result_filename = os.path.join(result_root, '{}.txt'.format(seq)) 99 | meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read() 100 | frame_rate = int(meta_info[meta_info.find('frameRate')+10:meta_info.find('\nseqLength')]) 101 | nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename, 102 | save_dir=output_dir, show_image=show_image, frame_rate=frame_rate) 103 | n_frame += nf 104 | timer_avgs.append(ta) 105 | timer_calls.append(tc) 106 | 107 | # eval 108 | logger.info('Evaluate seq: {}'.format(seq)) 109 | evaluator = Evaluator(data_root, seq, data_type) 110 | accs.append(evaluator.eval_file(result_filename)) 111 | if save_videos: 112 | output_video_path = osp.join(output_dir, '{}.mp4'.format(seq)) 113 | cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -c:v copy {}'.format(output_dir, output_video_path) 114 | os.system(cmd_str) 115 | timer_avgs = np.asarray(timer_avgs) 116 | timer_calls = np.asarray(timer_calls) 117 | all_time = np.dot(timer_avgs, timer_calls) 118 | avg_time = all_time / np.sum(timer_calls) 119 | logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time)) 120 | 121 | # get summary 122 | metrics = mm.metrics.motchallenge_metrics 123 | mh = mm.metrics.create() 124 | summary = Evaluator.get_summary(accs, seqs, metrics) 125 | strsummary = mm.io.render_summary( 126 | summary, 127 | formatters=mh.formatters, 128 | namemap=mm.io.motchallenge_metric_names 129 | ) 130 | print(strsummary) 131 | Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name))) 132 | 133 | 134 | 135 | if __name__ == '__main__': 136 | parser = argparse.ArgumentParser(prog='track.py') 137 | parser.add_argument('--cfg', type=str, default='models/yolov5_JDE.yaml', help='model.yaml path') 138 | parser.add_argument('--weights', nargs='+', type=str, default='runs/last.pt', help='model.pt path(s)') 139 | parser.add_argument('--data', type=str, default='data/mot.yaml', help='*.data path') 140 | parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') 141 | parser.add_argument('--img-size', type=int, default=[1088, 608], help='inference size (pixels)') 142 | parser.add_argument('--device', default='0, 1', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 143 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') 144 | parser.add_argument('--conf-thres', type=float, default=0.1, help='object confidence threshold') 145 | parser.add_argument('--nms-thres', type=float, default=0.4, help='IOU threshold for NMS') 146 | parser.add_argument('--min-box-area', type=float, default=200, help='filter out tiny boxes') 147 | parser.add_argument('--track-buffer', type=int, default=30, help='tracking buffer') 148 | parser.add_argument('--test-mot16', default=False, action='store_true', help='tracking buffer') 149 | parser.add_argument('--save-images', default=True, action='store_true', help='save tracking results (image)') 150 | parser.add_argument('--save-videos', action='store_true', help='save tracking results (video)') 151 | opt = parser.parse_args() 152 | print(opt, end='\n\n') 153 | 154 | if not opt.test_mot16: 155 | seqs_str = '''MOT20-01 156 | MOT20-02 157 | MOT20-03 158 | MOT20-05 159 | ''' 160 | images = '/home/xb/huawei/MOT20' 161 | # seqs_str = '''MOT16-02 162 | # MOT16-04 163 | # MOT16-05 164 | # MOT16-09 165 | # MOT16-10 166 | # MOT16-11 167 | # MOT16-13 168 | # ''' 169 | # images = '/home/xb/huawei/MOT16' 170 | data_root = '%s/train' % images 171 | else: 172 | seqs_str = '''MOT16-01 173 | MOT16-03 174 | MOT16-06 175 | MOT16-07 176 | MOT16-08 177 | MOT16-12 178 | MOT16-14''' 179 | data_root = '/home/xb/huawei/MOT16/test' 180 | seqs = [seq.strip() for seq in seqs_str.split()] 181 | 182 | main(opt, 183 | data_root=data_root, 184 | seqs=seqs, 185 | exp_name=opt.weights.split('/')[-2], 186 | show_image=False, 187 | save_images=opt.save_images, 188 | save_videos=opt.save_videos) 189 | 190 | -------------------------------------------------------------------------------- /tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaobin1231/YOLOv5_JDE/ea3149f280a2234a4bf840c7da8f5d0c77abd59d/tracker/__init__.py -------------------------------------------------------------------------------- /tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | 54 | -------------------------------------------------------------------------------- /tracker/matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | from scipy.spatial.distance import cdist 4 | import lap 5 | 6 | from cython_bbox import bbox_overlaps as bbox_ious 7 | from tracker_utils import kalman_filter 8 | 9 | def merge_matches(m1, m2, shape): 10 | O,P,Q = shape 11 | m1 = np.asarray(m1) 12 | m2 = np.asarray(m2) 13 | 14 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 15 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 16 | 17 | mask = M1*M2 18 | match = mask.nonzero() 19 | match = list(zip(match[0], match[1])) 20 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) 21 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) 22 | 23 | return match, unmatched_O, unmatched_Q 24 | 25 | 26 | def linear_assignment(cost_matrix, thresh): 27 | if cost_matrix.size == 0: 28 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 29 | matches, unmatched_a, unmatched_b = [], [], [] 30 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 31 | for ix, mx in enumerate(x): 32 | if mx >= 0: 33 | matches.append([ix, mx]) 34 | unmatched_a = np.where(x < 0)[0] 35 | unmatched_b = np.where(y < 0)[0] 36 | matches = np.asarray(matches) 37 | return matches, unmatched_a, unmatched_b 38 | 39 | 40 | def ious(atlbrs, btlbrs): 41 | """ 42 | Compute cost based on IoU 43 | :type atlbrs: list[tlbr] | np.ndarray 44 | :type atlbrs: list[tlbr] | np.ndarray 45 | 46 | :rtype ious np.ndarray 47 | """ 48 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 49 | if ious.size == 0: 50 | return ious 51 | 52 | ious = bbox_ious( 53 | np.ascontiguousarray(atlbrs, dtype=np.float), 54 | np.ascontiguousarray(btlbrs, dtype=np.float) 55 | ) 56 | 57 | return ious 58 | 59 | 60 | def iou_distance(atracks, btracks): 61 | """ 62 | Compute cost based on IoU 63 | :type atracks: list[STrack] 64 | :type btracks: list[STrack] 65 | 66 | :rtype cost_matrix np.ndarray 67 | """ 68 | 69 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 70 | atlbrs = atracks 71 | btlbrs = btracks 72 | else: 73 | atlbrs = [track.tlbr for track in atracks] 74 | btlbrs = [track.tlbr for track in btracks] 75 | _ious = ious(atlbrs, btlbrs) 76 | cost_matrix = 1 - _ious 77 | 78 | return cost_matrix 79 | 80 | def embedding_distance(tracks, detections, metric='cosine'): 81 | """ 82 | :param tracks: list[STrack] 83 | :param detections: list[BaseTrack] 84 | :param metric: 85 | :return: cost_matrix np.ndarray 86 | """ 87 | 88 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 89 | if cost_matrix.size == 0: 90 | return cost_matrix 91 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) 92 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) 93 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features)) # Nomalized features 94 | 95 | return cost_matrix 96 | 97 | 98 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): 99 | if cost_matrix.size == 0: 100 | return cost_matrix 101 | gating_dim = 2 if only_position else 4 102 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 103 | measurements = np.asarray([det.to_xyah() for det in detections]) 104 | for row, track in enumerate(tracks): 105 | gating_distance = kf.gating_distance( 106 | track.mean, track.covariance, measurements, only_position, metric='maha') 107 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 108 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance 109 | return cost_matrix 110 | -------------------------------------------------------------------------------- /tracker/multitracker.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | from collections import deque 3 | import torch 4 | import numpy as np 5 | from tracker_utils.kalman_filter import KalmanFilter 6 | from tracker_utils.log import logger 7 | from models.yolo import Model 8 | from tracker import matching 9 | from .basetrack import BaseTrack, TrackState 10 | import time 11 | from utils.general import non_max_suppression 12 | from tracker_utils.utils import scale_coords 13 | 14 | class STrack(BaseTrack): 15 | 16 | def __init__(self, tlwh, score, temp_feat, buffer_size=30): 17 | 18 | # wait activate 19 | self._tlwh = np.asarray(tlwh, dtype=np.float) 20 | self.kalman_filter = None 21 | self.mean, self.covariance = None, None 22 | self.is_activated = False 23 | 24 | self.score = score 25 | self.tracklet_len = 0 26 | 27 | self.smooth_feat = None 28 | self.update_features(temp_feat) 29 | self.features = deque([], maxlen=buffer_size) 30 | self.alpha = 0.9 31 | 32 | def update_features(self, feat): 33 | feat /= np.linalg.norm(feat) 34 | self.curr_feat = feat 35 | if self.smooth_feat is None: 36 | self.smooth_feat = feat 37 | else: 38 | self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat 39 | self.features.append(feat) 40 | self.smooth_feat /= np.linalg.norm(self.smooth_feat) 41 | 42 | def predict(self): 43 | mean_state = self.mean.copy() 44 | if self.state != TrackState.Tracked: 45 | mean_state[7] = 0 46 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) 47 | 48 | @staticmethod 49 | def multi_predict(stracks, kalman_filter): 50 | if len(stracks) > 0: 51 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 52 | multi_covariance = np.asarray([st.covariance for st in stracks]) 53 | for i, st in enumerate(stracks): 54 | if st.state != TrackState.Tracked: 55 | multi_mean[i][7] = 0 56 | # multi_mean, multi_covariance = STrack.kalman_filter.multi_predict(multi_mean, multi_covariance) 57 | multi_mean, multi_covariance = kalman_filter.multi_predict(multi_mean, multi_covariance) 58 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 59 | stracks[i].mean = mean 60 | stracks[i].covariance = cov 61 | 62 | def activate(self, kalman_filter, frame_id): 63 | """Start a new tracklet""" 64 | self.kalman_filter = kalman_filter 65 | self.track_id = self.next_id() 66 | self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) 67 | 68 | self.tracklet_len = 0 69 | self.state = TrackState.Tracked 70 | #self.is_activated = True 71 | self.frame_id = frame_id 72 | self.start_frame = frame_id 73 | 74 | def re_activate(self, new_track, frame_id, new_id=False): 75 | self.mean, self.covariance = self.kalman_filter.update( 76 | self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) 77 | ) 78 | 79 | self.update_features(new_track.curr_feat) 80 | self.tracklet_len = 0 81 | self.state = TrackState.Tracked 82 | self.is_activated = True 83 | self.frame_id = frame_id 84 | if new_id: 85 | self.track_id = self.next_id() 86 | 87 | def update(self, new_track, frame_id, update_feature=True): 88 | """ 89 | Update a matched track 90 | :type new_track: STrack 91 | :type frame_id: int 92 | :type update_feature: bool 93 | :return: 94 | """ 95 | self.frame_id = frame_id 96 | self.tracklet_len += 1 97 | 98 | new_tlwh = new_track.tlwh 99 | self.mean, self.covariance = self.kalman_filter.update( 100 | self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) 101 | self.state = TrackState.Tracked 102 | self.is_activated = True 103 | 104 | self.score = new_track.score 105 | if update_feature: 106 | self.update_features(new_track.curr_feat) 107 | 108 | @property 109 | @jit 110 | def tlwh(self): 111 | """Get current position in bounding box format `(top left x, top left y, 112 | width, height)`. 113 | """ 114 | if self.mean is None: 115 | return self._tlwh.copy() 116 | ret = self.mean[:4].copy() 117 | ret[2] *= ret[3] 118 | ret[:2] -= ret[2:] / 2 119 | return ret 120 | 121 | @property 122 | @jit 123 | def tlbr(self): 124 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 125 | `(top left, bottom right)`. 126 | """ 127 | ret = self.tlwh.copy() 128 | ret[2:] += ret[:2] 129 | return ret 130 | 131 | @staticmethod 132 | @jit 133 | def tlwh_to_xyah(tlwh): 134 | """Convert bounding box to format `(center x, center y, aspect ratio, 135 | height)`, where the aspect ratio is `width / height`. 136 | """ 137 | ret = np.asarray(tlwh).copy() 138 | ret[:2] += ret[2:] / 2 139 | ret[2] /= ret[3] 140 | return ret 141 | 142 | def to_xyah(self): 143 | return self.tlwh_to_xyah(self.tlwh) 144 | 145 | @staticmethod 146 | @jit 147 | def tlbr_to_tlwh(tlbr): 148 | ret = np.asarray(tlbr).copy() 149 | ret[2:] -= ret[:2] 150 | return ret 151 | 152 | @staticmethod 153 | @jit 154 | def tlwh_to_tlbr(tlwh): 155 | ret = np.asarray(tlwh).copy() 156 | ret[2:] += ret[:2] 157 | return ret 158 | 159 | def __repr__(self): 160 | return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) 161 | 162 | 163 | class JDETracker(object): 164 | def __init__(self, opt, frame_rate=30): 165 | self.opt = opt 166 | self.model = Model(opt.cfg, nc=1) 167 | ckpt = torch.load(opt.weights, map_location='cpu') 168 | ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() 169 | if k in self.model.state_dict() and self.model.state_dict()[k].shape == v.shape} 170 | self.model.load_state_dict(ckpt['model'], strict=False) 171 | self.model.cuda().half().eval() 172 | 173 | self.tracked_stracks = [] # type: list[STrack] 174 | self.lost_stracks = [] # type: list[STrack] 175 | self.removed_stracks = [] # type: list[STrack] 176 | 177 | self.frame_id = 0 178 | self.det_thresh = opt.conf_thres 179 | self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) 180 | self.max_time_lost = self.buffer_size 181 | 182 | self.kalman_filter = KalmanFilter() 183 | 184 | def update(self, im_blob, img0, path): 185 | self.frame_id += 1 186 | activated_starcks = [] # for storing active tracks, for the current frame 187 | refind_stracks = [] # Lost Tracks whose detections are obtained in the current frame 188 | lost_stracks = [] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) 189 | removed_stracks = [] 190 | 191 | t1 = time.time() 192 | ''' Step 1: Network forward, get detections & embeddings''' 193 | with torch.no_grad(): 194 | pred = self.model(im_blob)[0] 195 | # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings 196 | pred = pred[pred[:, :, 4] > self.opt.conf_thres] 197 | # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score 198 | if len(pred) > 0: 199 | dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu() 200 | 201 | # Final proposals are obtained in dets. Information of bounding box and embeddings also included 202 | # Next step changes the detection scales 203 | scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() 204 | '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)''' 205 | # class_pred is the embeddings. 206 | 207 | detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for 208 | (tlbrs, f) in zip(dets[:, :5], dets[:, 6:])] 209 | else: 210 | detections = [] 211 | 212 | t2 = time.time() 213 | # print('Forward: {} s'.format(t2-t1)) 214 | 215 | ''' Add newly detected tracklets to tracked_stracks''' 216 | unconfirmed = [] 217 | tracked_stracks = [] # type: list[STrack] 218 | for track in self.tracked_stracks: 219 | if not track.is_activated: 220 | # previous tracks which are not active in the current frame are added in unconfirmed list 221 | unconfirmed.append(track) 222 | # print("Should not be here, in unconfirmed") 223 | else: 224 | # Active tracks are added to the local list 'tracked_stracks' 225 | tracked_stracks.append(track) 226 | 227 | ''' Step 2: First association, with embedding''' 228 | # Combining currently tracked_stracks and lost_stracks 229 | strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) 230 | # Predict the current location with KF 231 | STrack.multi_predict(strack_pool, self.kalman_filter) 232 | 233 | 234 | dists = matching.embedding_distance(strack_pool, detections) 235 | # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) 236 | dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) 237 | # The dists is the list of distances of the detection with the tracks in strack_pool 238 | matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) 239 | # The matches is the array for corresponding matches of the detection with the corresponding strack_pool 240 | 241 | for itracked, idet in matches: 242 | # itracked is the id of the track and idet is the detection 243 | track = strack_pool[itracked] 244 | det = detections[idet] 245 | if track.state == TrackState.Tracked: 246 | # If the track is active, add the detection to the track 247 | track.update(detections[idet], self.frame_id) 248 | activated_starcks.append(track) 249 | else: 250 | # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list 251 | track.re_activate(det, self.frame_id, new_id=False) 252 | refind_stracks.append(track) 253 | 254 | # None of the steps below happen if there are no undetected tracks. 255 | ''' Step 3: Second association, with IOU''' 256 | detections = [detections[i] for i in u_detection] 257 | # detections is now a list of the unmatched detections 258 | r_tracked_stracks = [] # This is container for stracks which were tracked till the 259 | # previous frame but no detection was found for it in the current frame 260 | for i in u_track: 261 | if strack_pool[i].state == TrackState.Tracked: 262 | r_tracked_stracks.append(strack_pool[i]) 263 | dists = matching.iou_distance(r_tracked_stracks, detections) 264 | matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) 265 | # matches is the list of detections which matched with corresponding tracks by IOU distance method 266 | for itracked, idet in matches: 267 | track = r_tracked_stracks[itracked] 268 | det = detections[idet] 269 | if track.state == TrackState.Tracked: 270 | track.update(det, self.frame_id) 271 | activated_starcks.append(track) 272 | else: 273 | track.re_activate(det, self.frame_id, new_id=False) 274 | refind_stracks.append(track) 275 | # Same process done for some unmatched detections, but now considering IOU_distance as measure 276 | 277 | for it in u_track: 278 | track = r_tracked_stracks[it] 279 | if not track.state == TrackState.Lost: 280 | track.mark_lost() 281 | lost_stracks.append(track) 282 | # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost 283 | 284 | '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' 285 | detections = [detections[i] for i in u_detection] 286 | dists = matching.iou_distance(unconfirmed, detections) 287 | matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) 288 | for itracked, idet in matches: 289 | unconfirmed[itracked].update(detections[idet], self.frame_id) 290 | activated_starcks.append(unconfirmed[itracked]) 291 | 292 | # The tracks which are yet not matched 293 | for it in u_unconfirmed: 294 | track = unconfirmed[it] 295 | track.mark_removed() 296 | removed_stracks.append(track) 297 | 298 | # after all these confirmation steps, if a new detection is found, it is initialized for a new track 299 | """ Step 4: Init new stracks""" 300 | for inew in u_detection: 301 | track = detections[inew] 302 | if track.score < self.det_thresh: 303 | continue 304 | track.activate(self.kalman_filter, self.frame_id) 305 | activated_starcks.append(track) 306 | 307 | """ Step 5: Update state""" 308 | # If the tracks are lost for more frames than the threshold number, the tracks are removed. 309 | for track in self.lost_stracks: 310 | if self.frame_id - track.end_frame > self.max_time_lost: 311 | track.mark_removed() 312 | removed_stracks.append(track) 313 | # print('Remained match {} s'.format(t4-t3)) 314 | 315 | # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. 316 | self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] 317 | self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) 318 | self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) 319 | # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] 320 | self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) 321 | self.lost_stracks.extend(lost_stracks) 322 | self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) 323 | self.removed_stracks.extend(removed_stracks) 324 | self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) 325 | 326 | # get scores of lost tracks 327 | output_stracks = [track for track in self.tracked_stracks if track.is_activated] 328 | 329 | logger.debug('===========Frame {}=========='.format(self.frame_id)) 330 | logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) 331 | logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) 332 | logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) 333 | logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) 334 | # print('Final {} s'.format(t5-t4)) 335 | return output_stracks 336 | 337 | def joint_stracks(tlista, tlistb): 338 | exists = {} 339 | res = [] 340 | for t in tlista: 341 | exists[t.track_id] = 1 342 | res.append(t) 343 | for t in tlistb: 344 | tid = t.track_id 345 | if not exists.get(tid, 0): 346 | exists[tid] = 1 347 | res.append(t) 348 | return res 349 | 350 | def sub_stracks(tlista, tlistb): 351 | stracks = {} 352 | for t in tlista: 353 | stracks[t.track_id] = t 354 | for t in tlistb: 355 | tid = t.track_id 356 | if stracks.get(tid, 0): 357 | del stracks[tid] 358 | return list(stracks.values()) 359 | 360 | def remove_duplicate_stracks(stracksa, stracksb): 361 | pdist = matching.iou_distance(stracksa, stracksb) 362 | pairs = np.where(pdist<0.15) 363 | dupa, dupb = list(), list() 364 | for p,q in zip(*pairs): 365 | timep = stracksa[p].frame_id - stracksa[p].start_frame 366 | timeq = stracksb[q].frame_id - stracksb[q].start_frame 367 | if timep > timeq: 368 | dupb.append(q) 369 | else: 370 | dupa.append(p) 371 | resa = [t for i,t in enumerate(stracksa) if not i in dupa] 372 | resb = [t for i,t in enumerate(stracksb) if not i in dupb] 373 | return resa, resb 374 | 375 | 376 | -------------------------------------------------------------------------------- /tracker_utils/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import math 3 | import os 4 | import os.path as osp 5 | import random 6 | import time 7 | from collections import OrderedDict 8 | 9 | import cv2 10 | import numpy as np 11 | import torch 12 | 13 | from torch.utils.data import Dataset 14 | from tracker_utils.utils import xyxy2xywh 15 | 16 | class LoadImages: # for inference 17 | def __init__(self, path, img_size=(1088, 608)): 18 | if os.path.isdir(path): 19 | image_format = ['.jpg', '.jpeg', '.png', '.tif'] 20 | self.files = sorted(glob.glob('%s/*.*' % path)) 21 | self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files)) 22 | elif os.path.isfile(path): 23 | self.files = [path] 24 | 25 | self.nF = len(self.files) # number of image files 26 | self.width = img_size[0] 27 | self.height = img_size[1] 28 | self.count = 0 29 | 30 | assert self.nF > 0, 'No images found in ' + path 31 | 32 | def __iter__(self): 33 | self.count = -1 34 | return self 35 | 36 | def __next__(self): 37 | self.count += 1 38 | if self.count == self.nF: 39 | raise StopIteration 40 | img_path = self.files[self.count] 41 | 42 | # Read image 43 | img0 = cv2.imread(img_path) # BGR 44 | assert img0 is not None, 'Failed to load ' + img_path 45 | 46 | # Padded resize 47 | img, _, _, _ = letterbox(img0, height=self.height, width=self.width) 48 | 49 | # Normalize RGB 50 | img = img[:, :, ::-1].transpose(2, 0, 1) 51 | img = np.ascontiguousarray(img, dtype=np.float32) 52 | img /= 255.0 53 | 54 | # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image 55 | return img_path, img, img0 56 | 57 | def __getitem__(self, idx): 58 | idx = idx % self.nF 59 | img_path = self.files[idx] 60 | 61 | # Read image 62 | img0 = cv2.imread(img_path) # BGR 63 | assert img0 is not None, 'Failed to load ' + img_path 64 | 65 | # Padded resize 66 | img, _, _, _ = letterbox(img0, height=self.height, width=self.width) 67 | 68 | # Normalize RGB 69 | img = img[:, :, ::-1].transpose(2, 0, 1) 70 | img = np.ascontiguousarray(img, dtype=np.float32) 71 | img /= 255.0 72 | 73 | return img_path, img, img0 74 | 75 | def __len__(self): 76 | return self.nF # number of files 77 | 78 | 79 | class LoadVideo: # for inference 80 | def __init__(self, path, img_size=(1088, 608)): 81 | if not os.path.isfile(path): 82 | raise FileExistsError 83 | 84 | self.cap = cv2.VideoCapture(path) 85 | self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS))) 86 | self.vw = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 87 | self.vh = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 88 | self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) 89 | 90 | self.width = img_size[0] 91 | self.height = img_size[1] 92 | self.count = 0 93 | 94 | self.w, self.h = self.get_size(self.vw, self.vh, self.width, self.height) 95 | print('Lenth of the video: {:d} frames'.format(self.vn)) 96 | 97 | def get_size(self, vw, vh, dw, dh): 98 | wa, ha = float(dw) / vw, float(dh) / vh 99 | a = min(wa, ha) 100 | return int(vw *a), int(vh*a) 101 | 102 | def __iter__(self): 103 | self.count = -1 104 | return self 105 | 106 | def __next__(self): 107 | self.count += 1 108 | if self.count == len(self): 109 | raise StopIteration 110 | # Read image 111 | res, img0 = self.cap.read() # BGR 112 | assert img0 is not None, 'Failed to load frame {:d}'.format(self.count) 113 | img0 = cv2.resize(img0, (self.w, self.h)) 114 | 115 | # Padded resize 116 | img, _, _, _ = letterbox(img0, height=self.height, width=self.width) 117 | 118 | # Normalize RGB 119 | img = img[:, :, ::-1].transpose(2, 0, 1) 120 | img = np.ascontiguousarray(img, dtype=np.float32) 121 | img /= 255.0 122 | 123 | # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image 124 | return self.count, img, img0 125 | 126 | def __len__(self): 127 | return self.vn # number of files 128 | 129 | 130 | class LoadImagesAndLabels: # for training 131 | def __init__(self, path, img_size=(1088,608), augment=False, transforms=None): 132 | with open(path, 'r') as file: 133 | self.img_files = file.readlines() 134 | self.img_files = [x.replace('\n', '') for x in self.img_files] 135 | self.img_files = list(filter(lambda x: len(x) > 0, self.img_files)) 136 | 137 | self.label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') 138 | for x in self.img_files] 139 | 140 | self.nF = len(self.img_files) # number of image files 141 | self.width = img_size[0] 142 | self.height = img_size[1] 143 | self.augment = augment 144 | self.transforms = transforms 145 | 146 | 147 | def __getitem__(self, files_index): 148 | img_path = self.img_files[files_index] 149 | label_path = self.label_files[files_index] 150 | return self.get_data(img_path, label_path) 151 | 152 | def get_data(self, img_path, label_path): 153 | height = self.height 154 | width = self.width 155 | img = cv2.imread(img_path) # BGR 156 | if img is None: 157 | raise ValueError('File corrupt {}'.format(img_path)) 158 | augment_hsv = True 159 | if self.augment and augment_hsv: 160 | # SV augmentation by 50% 161 | fraction = 0.50 162 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 163 | S = img_hsv[:, :, 1].astype(np.float32) 164 | V = img_hsv[:, :, 2].astype(np.float32) 165 | 166 | a = (random.random() * 2 - 1) * fraction + 1 167 | S *= a 168 | if a > 1: 169 | np.clip(S, a_min=0, a_max=255, out=S) 170 | 171 | a = (random.random() * 2 - 1) * fraction + 1 172 | V *= a 173 | if a > 1: 174 | np.clip(V, a_min=0, a_max=255, out=V) 175 | 176 | img_hsv[:, :, 1] = S.astype(np.uint8) 177 | img_hsv[:, :, 2] = V.astype(np.uint8) 178 | cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) 179 | 180 | h, w, _ = img.shape 181 | img, ratio, padw, padh = letterbox(img, height=height, width=width) 182 | 183 | # Load labels 184 | if os.path.isfile(label_path): 185 | labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6) 186 | 187 | # Normalized xywh to pixel xyxy format 188 | labels = labels0.copy() 189 | labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw 190 | labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh 191 | labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw 192 | labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh 193 | else: 194 | labels = np.array([]) 195 | 196 | # Augment image and labels 197 | if self.augment: 198 | img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20)) 199 | 200 | 201 | plotFlag = False 202 | if plotFlag: 203 | import matplotlib 204 | matplotlib.use('Agg') 205 | import matplotlib.pyplot as plt 206 | plt.figure(figsize=(50, 50)) 207 | plt.imshow(img[:, :, ::-1]) 208 | plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') 209 | plt.axis('off') 210 | plt.savefig('test.jpg') 211 | time.sleep(10) 212 | 213 | nL = len(labels) 214 | if nL > 0: 215 | # convert xyxy to xywh 216 | labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) #/ height 217 | labels[:, 2] /= width 218 | labels[:, 3] /= height 219 | labels[:, 4] /= width 220 | labels[:, 5] /= height 221 | if self.augment: 222 | # random left-right flip 223 | lr_flip = True 224 | if lr_flip & (random.random() > 0.5): 225 | img = np.fliplr(img) 226 | if nL > 0: 227 | labels[:, 2] = 1 - labels[:, 2] 228 | 229 | img = np.ascontiguousarray(img[ :, :, ::-1]) # BGR to RGB 230 | if self.transforms is not None: 231 | img = self.transforms(img) 232 | 233 | return img, labels, img_path, (h, w) 234 | 235 | def __len__(self): 236 | return self.nF # number of batches 237 | 238 | 239 | def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)): # resize a rectangular image to a padded rectangular 240 | shape = img.shape[:2] # shape = [height, width] 241 | ratio = min(float(height)/shape[0], float(width)/shape[1]) 242 | new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height] 243 | dw = (width - new_shape[0]) / 2 # width padding 244 | dh = (height - new_shape[1]) / 2 # height padding 245 | top, bottom = round(dh - 0.1), round(dh + 0.1) 246 | left, right = round(dw - 0.1), round(dw + 0.1) 247 | img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border 248 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular 249 | return img, ratio, dw, dh 250 | 251 | 252 | def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), 253 | borderValue=(127.5, 127.5, 127.5)): 254 | # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) 255 | # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 256 | 257 | border = 0 # width of added border (optional) 258 | height = img.shape[0] 259 | width = img.shape[1] 260 | 261 | # Rotation and Scale 262 | R = np.eye(3) 263 | a = random.random() * (degrees[1] - degrees[0]) + degrees[0] 264 | # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations 265 | s = random.random() * (scale[1] - scale[0]) + scale[0] 266 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) 267 | 268 | # Translation 269 | T = np.eye(3) 270 | T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels) 271 | T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels) 272 | 273 | # Shear 274 | S = np.eye(3) 275 | S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg) 276 | S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg) 277 | 278 | M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! 279 | imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, 280 | borderValue=borderValue) # BGR order borderValue 281 | 282 | # Return warped points also 283 | if targets is not None: 284 | if len(targets) > 0: 285 | n = targets.shape[0] 286 | points = targets[:, 2:6].copy() 287 | area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) 288 | 289 | # warp points 290 | xy = np.ones((n * 4, 3)) 291 | xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 292 | xy = (xy @ M.T)[:, :2].reshape(n, 8) 293 | 294 | # create new boxes 295 | x = xy[:, [0, 2, 4, 6]] 296 | y = xy[:, [1, 3, 5, 7]] 297 | xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T 298 | 299 | # apply angle-based reduction 300 | radians = a * math.pi / 180 301 | reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 302 | x = (xy[:, 2] + xy[:, 0]) / 2 303 | y = (xy[:, 3] + xy[:, 1]) / 2 304 | w = (xy[:, 2] - xy[:, 0]) * reduction 305 | h = (xy[:, 3] - xy[:, 1]) * reduction 306 | xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T 307 | 308 | # reject warped points outside of image 309 | np.clip(xy[:, 0], 0, width, out=xy[:, 0]) 310 | np.clip(xy[:, 2], 0, width, out=xy[:, 2]) 311 | np.clip(xy[:, 1], 0, height, out=xy[:, 1]) 312 | np.clip(xy[:, 3], 0, height, out=xy[:, 3]) 313 | w = xy[:, 2] - xy[:, 0] 314 | h = xy[:, 3] - xy[:, 1] 315 | area = w * h 316 | ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) 317 | i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) 318 | 319 | targets = targets[i] 320 | targets[:, 2:6] = xy[i] 321 | 322 | return imw, targets, M 323 | else: 324 | return imw 325 | 326 | def collate_fn(batch): 327 | imgs, labels, paths, sizes = zip(*batch) 328 | batch_size = len(labels) 329 | imgs = torch.stack(imgs, 0) 330 | max_box_len = max([l.shape[0] for l in labels]) 331 | labels = [torch.from_numpy(l) for l in labels] 332 | filled_labels = torch.zeros(batch_size, max_box_len, 6) 333 | labels_len = torch.zeros(batch_size) 334 | 335 | for i in range(batch_size): 336 | isize = labels[i].shape[0] 337 | if len(labels[i])>0: 338 | filled_labels[i, :isize, :] = labels[i] 339 | labels_len[i] = isize 340 | 341 | return imgs, filled_labels, paths, sizes, labels_len.unsqueeze(1) 342 | 343 | 344 | class JointDataset(LoadImagesAndLabels): # for training 345 | def __init__(self, root, paths, img_size=(1088,608), augment=False, transforms=None): 346 | 347 | dataset_names = paths.keys() 348 | self.img_files = OrderedDict() 349 | self.label_files = OrderedDict() 350 | self.tid_num = OrderedDict() 351 | self.tid_start_index = OrderedDict() 352 | for ds, path in paths.items(): 353 | with open(path, 'r') as file: 354 | self.img_files[ds] = file.readlines() 355 | self.img_files[ds] = [osp.join(root, x.strip()) for x in self.img_files[ds]] 356 | self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds])) 357 | 358 | self.label_files[ds] = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') 359 | for x in self.img_files[ds]] 360 | 361 | for ds, label_paths in self.label_files.items(): 362 | max_index = -1 363 | for lp in label_paths: 364 | lb = np.loadtxt(lp) 365 | if len(lb) < 1: 366 | continue 367 | if len(lb.shape) < 2: 368 | img_max = lb[1] 369 | else: 370 | img_max = np.max(lb[:,1]) 371 | if img_max >max_index: 372 | max_index = img_max 373 | self.tid_num[ds] = max_index + 1 374 | 375 | last_index = 0 376 | for i, (k, v) in enumerate(self.tid_num.items()): 377 | self.tid_start_index[k] = last_index 378 | last_index += v 379 | 380 | self.nID = int(last_index+1) 381 | self.nds = [len(x) for x in self.img_files.values()] 382 | self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))] 383 | self.nF = sum(self.nds) 384 | self.width = img_size[0] 385 | self.height = img_size[1] 386 | self.augment = augment 387 | self.transforms = transforms 388 | 389 | print('='*80) 390 | print('dataset summary') 391 | print(self.tid_num) 392 | print('total # identities:', self.nID) 393 | print('start index') 394 | print(self.tid_start_index) 395 | print('='*80) 396 | 397 | 398 | def __getitem__(self, files_index): 399 | """ 400 | Iterator function for train dataset 401 | """ 402 | for i, c in enumerate(self.cds): 403 | if files_index >= c: 404 | ds = list(self.label_files.keys())[i] 405 | start_index = c 406 | img_path = self.img_files[ds][files_index - start_index] 407 | label_path = self.label_files[ds][files_index - start_index] 408 | 409 | imgs, labels, img_path, (h, w) = self.get_data(img_path, label_path) 410 | for i, _ in enumerate(labels): 411 | if labels[i,1] > -1: 412 | labels[i,1] += self.tid_start_index[ds] 413 | 414 | return imgs, labels, img_path, (h, w) 415 | 416 | 417 | -------------------------------------------------------------------------------- /tracker_utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from tracker_utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /tracker_utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | from tracker_utils.log import logger 6 | 7 | 8 | def write_results(filename, results_dict: Dict, data_type: str): 9 | if not filename: 10 | return 11 | path = os.path.dirname(filename) 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | if data_type in ('mot', 'mcmot', 'lab'): 16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 17 | elif data_type == 'kitti': 18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 19 | else: 20 | raise ValueError(data_type) 21 | 22 | with open(filename, 'w') as f: 23 | for frame_id, frame_data in results_dict.items(): 24 | if data_type == 'kitti': 25 | frame_id -= 1 26 | for tlwh, track_id in frame_data: 27 | if track_id < 0: 28 | continue 29 | x1, y1, w, h = tlwh 30 | x2, y2 = x1 + w, y1 + h 31 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 32 | f.write(line) 33 | logger.info('Save results to {}'.format(filename)) 34 | 35 | 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 37 | if data_type in ('mot', 'lab'): 38 | read_fun = read_mot_results 39 | else: 40 | raise ValueError('Unknown data type: {}'.format(data_type)) 41 | 42 | return read_fun(filename, is_gt, is_ignore) 43 | 44 | 45 | """ 46 | labels={'ped', ... % 1 47 | 'person_on_vhcl', ... % 2 48 | 'car', ... % 3 49 | 'bicycle', ... % 4 50 | 'mbike', ... % 5 51 | 'non_mot_vhcl', ... % 6 52 | 'static_person', ... % 7 53 | 'distractor', ... % 8 54 | 'occluder', ... % 9 55 | 'occluder_on_grnd', ... %10 56 | 'occluder_full', ... % 11 57 | 'reflection', ... % 12 58 | 'crowd' ... % 13 59 | }; 60 | """ 61 | 62 | 63 | def read_mot_results(filename, is_gt, is_ignore): 64 | valid_labels = {1} 65 | ignore_labels = {2, 7, 8, 12} 66 | results_dict = dict() 67 | if os.path.isfile(filename): 68 | with open(filename, 'r') as f: 69 | for line in f.readlines(): 70 | linelist = line.split(',') 71 | if len(linelist) < 7: 72 | continue 73 | fid = int(linelist[0]) 74 | if fid < 1: 75 | continue 76 | results_dict.setdefault(fid, list()) 77 | 78 | if is_gt: 79 | if 'MOT16-' in filename or 'MOT17-' in filename: 80 | label = int(float(linelist[7])) 81 | mark = int(float(linelist[6])) 82 | if mark == 0 or label not in valid_labels: 83 | continue 84 | score = 1 85 | elif is_ignore: 86 | if 'MOT16-' in filename or 'MOT17-' in filename: 87 | label = int(float(linelist[7])) 88 | vis_ratio = float(linelist[8]) 89 | if label not in ignore_labels and vis_ratio >= 0: 90 | continue 91 | else: 92 | continue 93 | score = 1 94 | else: 95 | score = float(linelist[6]) 96 | 97 | tlwh = tuple(map(float, linelist[2:6])) 98 | target_id = int(linelist[1]) 99 | 100 | results_dict[fid].append((tlwh, target_id, score)) 101 | 102 | return results_dict 103 | 104 | 105 | def unzip_objs(objs): 106 | if len(objs) > 0: 107 | tlwhs, ids, scores = zip(*objs) 108 | else: 109 | tlwhs, ids, scores = [], [], [] 110 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 111 | 112 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /tracker_utils/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(mean, self._motion_mat.T) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def multi_predict(self, mean, covariance): 155 | """Run Kalman filter prediction step (Vectorized version). 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The Nx8 dimensional mean matrix of the object states at the previous 161 | time step. 162 | covariance : ndarray 163 | The Nx8x8 dimensional covariance matrics of the object states at the 164 | previous time step. 165 | 166 | Returns 167 | ------- 168 | (ndarray, ndarray) 169 | Returns the mean vector and covariance matrix of the predicted 170 | state. Unobserved velocities are initialized to 0 mean. 171 | 172 | """ 173 | std_pos = [ 174 | self._std_weight_position * mean[:, 3], 175 | self._std_weight_position * mean[:, 3], 176 | 1e-2 * np.ones_like(mean[:, 3]), 177 | self._std_weight_position * mean[:, 3]] 178 | std_vel = [ 179 | self._std_weight_velocity * mean[:, 3], 180 | self._std_weight_velocity * mean[:, 3], 181 | 1e-5 * np.ones_like(mean[:, 3]), 182 | self._std_weight_velocity * mean[:, 3]] 183 | sqr = np.square(np.r_[std_pos, std_vel]).T 184 | 185 | motion_cov = [] 186 | for i in range(len(mean)): 187 | motion_cov.append(np.diag(sqr[i])) 188 | motion_cov = np.asarray(motion_cov) 189 | 190 | mean = np.dot(mean, self._motion_mat.T) 191 | left = np.dot(self._motion_mat, covariance).transpose((1,0,2)) 192 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 193 | 194 | return mean, covariance 195 | 196 | def update(self, mean, covariance, measurement): 197 | """Run Kalman filter correction step. 198 | 199 | Parameters 200 | ---------- 201 | mean : ndarray 202 | The predicted state's mean vector (8 dimensional). 203 | covariance : ndarray 204 | The state's covariance matrix (8x8 dimensional). 205 | measurement : ndarray 206 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 207 | is the center position, a the aspect ratio, and h the height of the 208 | bounding box. 209 | 210 | Returns 211 | ------- 212 | (ndarray, ndarray) 213 | Returns the measurement-corrected state distribution. 214 | 215 | """ 216 | projected_mean, projected_cov = self.project(mean, covariance) 217 | 218 | chol_factor, lower = scipy.linalg.cho_factor( 219 | projected_cov, lower=True, check_finite=False) 220 | kalman_gain = scipy.linalg.cho_solve( 221 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 222 | check_finite=False).T 223 | innovation = measurement - projected_mean 224 | 225 | new_mean = mean + np.dot(innovation, kalman_gain.T) 226 | new_covariance = covariance - np.linalg.multi_dot(( 227 | kalman_gain, projected_cov, kalman_gain.T)) 228 | return new_mean, new_covariance 229 | 230 | def gating_distance(self, mean, covariance, measurements, 231 | only_position=False, metric='maha'): 232 | """Compute gating distance between state distribution and measurements. 233 | 234 | A suitable distance threshold can be obtained from `chi2inv95`. If 235 | `only_position` is False, the chi-square distribution has 4 degrees of 236 | freedom, otherwise 2. 237 | 238 | Parameters 239 | ---------- 240 | mean : ndarray 241 | Mean vector over the state distribution (8 dimensional). 242 | covariance : ndarray 243 | Covariance of the state distribution (8x8 dimensional). 244 | measurements : ndarray 245 | An Nx4 dimensional matrix of N measurements, each in 246 | format (x, y, a, h) where (x, y) is the bounding box center 247 | position, a the aspect ratio, and h the height. 248 | only_position : Optional[bool] 249 | If True, distance computation is done with respect to the bounding 250 | box center position only. 251 | 252 | Returns 253 | ------- 254 | ndarray 255 | Returns an array of length N, where the i-th element contains the 256 | squared Mahalanobis distance between (mean, covariance) and 257 | `measurements[i]`. 258 | 259 | """ 260 | mean, covariance = self.project(mean, covariance) 261 | if only_position: 262 | mean, covariance = mean[:2], covariance[:2, :2] 263 | measurements = measurements[:, :2] 264 | 265 | d = measurements - mean 266 | if metric == 'gaussian': 267 | return np.sum(d * d, axis=1) 268 | elif metric == 'maha': 269 | cholesky_factor = np.linalg.cholesky(covariance) 270 | z = scipy.linalg.solve_triangular( 271 | cholesky_factor, d.T, lower=True, check_finite=False, 272 | overwrite_b=True) 273 | squared_maha = np.sum(z * z, axis=0) 274 | return squared_maha 275 | else: 276 | raise ValueError('invalid distance metric') 277 | 278 | -------------------------------------------------------------------------------- /tracker_utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /tracker_utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /tracker_utils/utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import os.path as osp 5 | 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import torch 10 | import torch.nn.functional as F 11 | from torchvision.ops import nms 12 | 13 | 14 | def mkdir_if_missing(dir): 15 | os.makedirs(dir, exist_ok=True) 16 | 17 | 18 | def float3(x): # format floats to 3 decimals 19 | return float(format(x, '.3f')) 20 | 21 | 22 | def init_seeds(seed=0): 23 | random.seed(seed) 24 | np.random.seed(seed) 25 | torch.manual_seed(seed) 26 | torch.cuda.manual_seed(seed) 27 | torch.cuda.manual_seed_all(seed) 28 | 29 | 30 | def load_classes(path): 31 | """ 32 | Loads class labels at 'path' 33 | """ 34 | fp = open(path, 'r') 35 | names = fp.read().split('\n') 36 | return list(filter(None, names)) # filter removes empty strings (such as last line) 37 | 38 | 39 | def model_info(model): 40 | """ 41 | Prints out a line-by-line description of a PyTorch model ending with a summary. 42 | """ 43 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 44 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 45 | print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 46 | for i, (name, p) in enumerate(model.named_parameters()): 47 | name = name.replace('module_list.', '') 48 | print('%5g %50s %9s %12g %20s %12.3g %12.3g' % ( 49 | i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 50 | print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g)) 51 | 52 | 53 | 54 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 55 | """ 56 | Plots one bounding box on image img. 57 | """ 58 | tl = line_thickness or round(0.0004 * max(img.shape[0:2])) + 1 # line thickness 59 | color = color or [random.randint(0, 255) for _ in range(3)] 60 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 61 | cv2.rectangle(img, c1, c2, color, thickness=tl) 62 | if label: 63 | tf = max(tl - 1, 1) # font thickness 64 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 65 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 66 | cv2.rectangle(img, c1, c2, color, -1) # filled 67 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 68 | 69 | 70 | def weights_init_normal(m): 71 | classname = m.__class__.__name__ 72 | if classname.find('Conv') != -1: 73 | torch.nn.init.normal_(m.weight.data, 0.0, 0.03) 74 | elif classname.find('BatchNorm2d') != -1: 75 | torch.nn.init.normal_(m.weight.data, 1.0, 0.03) 76 | torch.nn.init.constant_(m.bias.data, 0.0) 77 | 78 | 79 | def xyxy2xywh(x): 80 | # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] 81 | # x, y are coordinates of center 82 | # (x1, y1) and (x2, y2) are coordinates of bottom left and top right respectively. 83 | y = torch.zeros_like(x) if x.dtype is torch.float32 else np.zeros_like(x) 84 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 85 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 86 | y[:, 2] = x[:, 2] - x[:, 0] # width 87 | y[:, 3] = x[:, 3] - x[:, 1] # height 88 | return y 89 | 90 | 91 | def xywh2xyxy(x): 92 | # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] 93 | # x, y are coordinates of center 94 | # (x1, y1) and (x2, y2) are coordinates of bottom left and top right respectively. 95 | y = torch.zeros_like(x) if x.dtype is torch.float32 else np.zeros_like(x) 96 | y[:, 0] = (x[:, 0] - x[:, 2] / 2) # Bottom left x 97 | y[:, 1] = (x[:, 1] - x[:, 3] / 2) # Bottom left y 98 | y[:, 2] = (x[:, 0] + x[:, 2] / 2) # Top right x 99 | y[:, 3] = (x[:, 1] + x[:, 3] / 2) # Top right y 100 | return y 101 | 102 | 103 | def scale_coords(img_size, coords, img0_shape): 104 | # Rescale x1, y1, x2, y2 from 416 to image size 105 | gain_w = float(img_size[0]) / img0_shape[1] # gain = old / new 106 | gain_h = float(img_size[1]) / img0_shape[0] 107 | gain = min(gain_w, gain_h) 108 | pad_x = (img_size[0] - img0_shape[1] * gain) / 2 # width padding 109 | pad_y = (img_size[1] - img0_shape[0] * gain) / 2 # height padding 110 | coords[:, [0, 2]] -= pad_x 111 | coords[:, [1, 3]] -= pad_y 112 | coords[:, 0:4] /= gain 113 | coords[:, :4] = torch.clamp(coords[:, :4], min=0) 114 | return coords 115 | 116 | 117 | def ap_per_class(tp, conf, pred_cls, target_cls): 118 | """ Computes the average precision, given the recall and precision curves. 119 | Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics. 120 | # Arguments 121 | tp: True positives (list). 122 | conf: Objectness value from 0-1 (list). 123 | pred_cls: Predicted object classes (list). 124 | target_cls: True object classes (list). 125 | # Returns 126 | The average precision as computed in py-faster-rcnn. 127 | """ 128 | 129 | # lists/pytorch to numpy 130 | tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(pred_cls), np.array(target_cls) 131 | 132 | # Sort by objectness 133 | i = np.argsort(-conf) 134 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 135 | 136 | # Find unique classes 137 | unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0)) 138 | 139 | # Create Precision-Recall curve and compute AP for each class 140 | ap, p, r = [], [], [] 141 | for c in unique_classes: 142 | i = pred_cls == c 143 | n_gt = sum(target_cls == c) # Number of ground truth objects 144 | n_p = sum(i) # Number of predicted objects 145 | 146 | if (n_p == 0) and (n_gt == 0): 147 | continue 148 | elif (n_p == 0) or (n_gt == 0): 149 | ap.append(0) 150 | r.append(0) 151 | p.append(0) 152 | else: 153 | # Accumulate FPs and TPs 154 | fpc = np.cumsum(1 - tp[i]) 155 | tpc = np.cumsum(tp[i]) 156 | 157 | # Recall 158 | recall_curve = tpc / (n_gt + 1e-16) 159 | r.append(tpc[-1] / (n_gt + 1e-16)) 160 | 161 | # Precision 162 | precision_curve = tpc / (tpc + fpc) 163 | p.append(tpc[-1] / (tpc[-1] + fpc[-1])) 164 | 165 | # AP from recall-precision curve 166 | ap.append(compute_ap(recall_curve, precision_curve)) 167 | 168 | return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p) 169 | 170 | 171 | def compute_ap(recall, precision): 172 | """ Computes the average precision, given the recall and precision curves. 173 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 174 | # Arguments 175 | recall: The recall curve (list). 176 | precision: The precision curve (list). 177 | # Returns 178 | The average precision as computed in py-faster-rcnn. 179 | """ 180 | # correct AP calculation 181 | # first append sentinel values at the end 182 | 183 | mrec = np.concatenate(([0.], recall, [1.])) 184 | mpre = np.concatenate(([0.], precision, [0.])) 185 | 186 | # compute the precision envelope 187 | for i in range(mpre.size - 1, 0, -1): 188 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 189 | 190 | # to calculate area under PR curve, look for points 191 | # where X axis (recall) changes value 192 | i = np.where(mrec[1:] != mrec[:-1])[0] 193 | 194 | # and sum (\Delta recall) * prec 195 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 196 | return ap 197 | 198 | 199 | def bbox_iou(box1, box2, x1y1x2y2=False): 200 | """ 201 | Returns the IoU of two bounding boxes 202 | """ 203 | N, M = len(box1), len(box2) 204 | if x1y1x2y2: 205 | # Get the coordinates of bounding boxes 206 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 207 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 208 | else: 209 | # Transform from center and width to exact coordinates 210 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 211 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 212 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 213 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 214 | 215 | # get the coordinates of the intersection rectangle 216 | inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1) 217 | inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1) 218 | inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2) 219 | inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2) 220 | # Intersection area 221 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0) 222 | # Union Area 223 | b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)) 224 | b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M) 225 | b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M) 226 | 227 | return inter_area / (b1_area + b2_area - inter_area + 1e-16) 228 | 229 | 230 | def build_targets_max(target, anchor_wh, nA, nC, nGh, nGw): 231 | """ 232 | returns nT, nCorrect, tx, ty, tw, th, tconf, tcls 233 | """ 234 | nB = len(target) # number of images in batch 235 | 236 | txy = torch.zeros(nB, nA, nGh, nGw, 2).cuda() # batch size, anchors, grid size 237 | twh = torch.zeros(nB, nA, nGh, nGw, 2).cuda() 238 | tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda() 239 | tcls = torch.ByteTensor(nB, nA, nGh, nGw, nC).fill_(0).cuda() # nC = number of classes 240 | tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda() 241 | for b in range(nB): 242 | t = target[b] 243 | t_id = t[:, 1].clone().long().cuda() 244 | t = t[:,[0,2,3,4,5]] 245 | nTb = len(t) # number of targets 246 | if nTb == 0: 247 | continue 248 | 249 | #gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG 250 | gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone() 251 | gxy[:, 0] = gxy[:, 0] * nGw 252 | gxy[:, 1] = gxy[:, 1] * nGh 253 | gwh[:, 0] = gwh[:, 0] * nGw 254 | gwh[:, 1] = gwh[:, 1] * nGh 255 | gi = torch.clamp(gxy[:, 0], min=0, max=nGw -1).long() 256 | gj = torch.clamp(gxy[:, 1], min=0, max=nGh -1).long() 257 | 258 | # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors) 259 | #gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t() 260 | #gi, gj = gxy.long().t() 261 | 262 | # iou of targets-anchors (using wh only) 263 | box1 = gwh 264 | box2 = anchor_wh.unsqueeze(1) 265 | inter_area = torch.min(box1, box2).prod(2) 266 | iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16) 267 | 268 | # Select best iou_pred and anchor 269 | iou_best, a = iou.max(0) # best anchor [0-2] for each target 270 | 271 | # Select best unique target-anchor combinations 272 | if nTb > 1: 273 | _, iou_order = torch.sort(-iou_best) # best to worst 274 | 275 | # Unique anchor selection 276 | u = torch.stack((gi, gj, a), 0)[:, iou_order] 277 | # _, first_unique = np.unique(u, axis=1, return_index=True) # first unique indices 278 | first_unique = return_torch_unique_index(u, torch.unique(u, dim=1)) # torch alternative 279 | i = iou_order[first_unique] 280 | # best anchor must share significant commonality (iou) with target 281 | i = i[iou_best[i] > 0.60] # TODO: examine arbitrary threshold 282 | if len(i) == 0: 283 | continue 284 | 285 | a, gj, gi, t = a[i], gj[i], gi[i], t[i] 286 | t_id = t_id[i] 287 | if len(t.shape) == 1: 288 | t = t.view(1, 5) 289 | else: 290 | if iou_best < 0.60: 291 | continue 292 | 293 | tc, gxy, gwh = t[:, 0].long(), t[:, 1:3].clone(), t[:, 3:5].clone() 294 | gxy[:, 0] = gxy[:, 0] * nGw 295 | gxy[:, 1] = gxy[:, 1] * nGh 296 | gwh[:, 0] = gwh[:, 0] * nGw 297 | gwh[:, 1] = gwh[:, 1] * nGh 298 | 299 | # XY coordinates 300 | txy[b, a, gj, gi] = gxy - gxy.floor() 301 | 302 | # Width and height 303 | twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a]) # yolo method 304 | # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method 305 | 306 | # One-hot encoding of label 307 | tcls[b, a, gj, gi, tc] = 1 308 | tconf[b, a, gj, gi] = 1 309 | tid[b, a, gj, gi] = t_id.unsqueeze(1) 310 | tbox = torch.cat([txy, twh], -1) 311 | return tconf, tbox, tid 312 | 313 | 314 | 315 | def build_targets_thres(target, anchor_wh, nA, nC, nGh, nGw): 316 | ID_THRESH = 0.5 317 | FG_THRESH = 0.5 318 | BG_THRESH = 0.4 319 | nB = len(target) # number of images in batch 320 | assert(len(anchor_wh)==nA) 321 | 322 | tbox = torch.zeros(nB, nA, nGh, nGw, 4).cuda() # batch size, anchors, grid size 323 | tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda() 324 | tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda() 325 | for b in range(nB): 326 | t = target[b] 327 | t_id = t[:, 1].clone().long().cuda() 328 | t = t[:,[0,2,3,4,5]] 329 | nTb = len(t) # number of targets 330 | if nTb == 0: 331 | continue 332 | 333 | gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone() 334 | gxy[:, 0] = gxy[:, 0] * nGw 335 | gxy[:, 1] = gxy[:, 1] * nGh 336 | gwh[:, 0] = gwh[:, 0] * nGw 337 | gwh[:, 1] = gwh[:, 1] * nGh 338 | gxy[:, 0] = torch.clamp(gxy[:, 0], min=0, max=nGw -1) 339 | gxy[:, 1] = torch.clamp(gxy[:, 1], min=0, max=nGh -1) 340 | 341 | gt_boxes = torch.cat([gxy, gwh], dim=1) # Shape Ngx4 (xc, yc, w, h) 342 | 343 | anchor_mesh = generate_anchor(nGh, nGw, anchor_wh) 344 | anchor_list = anchor_mesh.permute(0,2,3,1).contiguous().view(-1, 4) # Shpae (nA x nGh x nGw) x 4 345 | #print(anchor_list.shape, gt_boxes.shape) 346 | iou_pdist = bbox_iou(anchor_list, gt_boxes) # Shape (nA x nGh x nGw) x Ng 347 | iou_max, max_gt_index = torch.max(iou_pdist, dim=1) # Shape (nA x nGh x nGw), both 348 | 349 | iou_map = iou_max.view(nA, nGh, nGw) 350 | gt_index_map = max_gt_index.view(nA, nGh, nGw) 351 | 352 | #nms_map = pooling_nms(iou_map, 3) 353 | 354 | id_index = iou_map > ID_THRESH 355 | fg_index = iou_map > FG_THRESH 356 | bg_index = iou_map < BG_THRESH 357 | ign_index = (iou_map < FG_THRESH) * (iou_map > BG_THRESH) 358 | tconf[b][fg_index] = 1 359 | tconf[b][bg_index] = 0 360 | tconf[b][ign_index] = -1 361 | 362 | gt_index = gt_index_map[fg_index] 363 | gt_box_list = gt_boxes[gt_index] 364 | gt_id_list = t_id[gt_index_map[id_index]] 365 | #print(gt_index.shape, gt_index_map[id_index].shape, gt_boxes.shape) 366 | if torch.sum(fg_index) > 0: 367 | tid[b][id_index] = gt_id_list.unsqueeze(1) 368 | fg_anchor_list = anchor_list.view(nA, nGh, nGw, 4)[fg_index] 369 | delta_target = encode_delta(gt_box_list, fg_anchor_list) 370 | tbox[b][fg_index] = delta_target 371 | return tconf, tbox, tid 372 | 373 | def generate_anchor(nGh, nGw, anchor_wh): 374 | nA = len(anchor_wh) 375 | yy, xx =torch.meshgrid(torch.arange(nGh), torch.arange(nGw)) 376 | xx, yy = xx.cuda(), yy.cuda() 377 | 378 | mesh = torch.stack([xx, yy], dim=0) # Shape 2, nGh, nGw 379 | mesh = mesh.unsqueeze(0).repeat(nA,1,1,1).float() # Shape nA x 2 x nGh x nGw 380 | anchor_offset_mesh = anchor_wh.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, nGh,nGw) # Shape nA x 2 x nGh x nGw 381 | anchor_mesh = torch.cat([mesh, anchor_offset_mesh], dim=1) # Shape nA x 4 x nGh x nGw 382 | return anchor_mesh 383 | 384 | def encode_delta(gt_box_list, fg_anchor_list): 385 | px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \ 386 | fg_anchor_list[:, 2], fg_anchor_list[:,3] 387 | gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \ 388 | gt_box_list[:, 2], gt_box_list[:, 3] 389 | dx = (gx - px) / pw 390 | dy = (gy - py) / ph 391 | dw = torch.log(gw/pw) 392 | dh = torch.log(gh/ph) 393 | return torch.stack([dx, dy, dw, dh], dim=1) 394 | 395 | def decode_delta(delta, fg_anchor_list): 396 | px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \ 397 | fg_anchor_list[:, 2], fg_anchor_list[:,3] 398 | dx, dy, dw, dh = delta[:, 0], delta[:, 1], delta[:, 2], delta[:, 3] 399 | gx = pw * dx + px 400 | gy = ph * dy + py 401 | gw = pw * torch.exp(dw) 402 | gh = ph * torch.exp(dh) 403 | return torch.stack([gx, gy, gw, gh], dim=1) 404 | 405 | def decode_delta_map(delta_map, anchors): 406 | ''' 407 | :param: delta_map, shape (nB, nA, nGh, nGw, 4) 408 | :param: anchors, shape (nA,4) 409 | ''' 410 | nB, nA, nGh, nGw, _ = delta_map.shape 411 | anchor_mesh = generate_anchor(nGh, nGw, anchors) 412 | anchor_mesh = anchor_mesh.permute(0,2,3,1).contiguous() # Shpae (nA x nGh x nGw) x 4 413 | anchor_mesh = anchor_mesh.unsqueeze(0).repeat(nB,1,1,1,1) 414 | pred_list = decode_delta(delta_map.view(-1,4), anchor_mesh.view(-1,4)) 415 | pred_map = pred_list.view(nB, nA, nGh, nGw, 4) 416 | return pred_map 417 | 418 | 419 | def pooling_nms(heatmap, kernel=1): 420 | pad = (kernel -1 ) // 2 421 | hmax = F.max_pool2d(heatmap, (kernel, kernel), stride=1, padding=pad) 422 | keep = (hmax == heatmap).float() 423 | return keep * heatmap 424 | 425 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'): 426 | """ 427 | Removes detections with lower object confidence score than 'conf_thres' 428 | Non-Maximum Suppression to further filter detections. 429 | Returns detections with shape: 430 | (x1, y1, x2, y2, object_conf, class_score, class_pred) 431 | Args: 432 | prediction, 433 | conf_thres, 434 | nms_thres, 435 | method = 'standard' or 'fast' 436 | """ 437 | 438 | output = [None for _ in range(len(prediction))] 439 | for image_i, pred in enumerate(prediction): 440 | # Filter out confidence scores below threshold 441 | # Get score and class with highest confidence 442 | 443 | v = pred[:, 4] > conf_thres 444 | v = v.nonzero().squeeze() 445 | if len(v.shape) == 0: 446 | v = v.unsqueeze(0) 447 | 448 | pred = pred[v] 449 | 450 | # If none are remaining => process next image 451 | nP = pred.shape[0] 452 | if not nP: 453 | continue 454 | # From (center x, center y, width, height) to (x1, y1, x2, y2) 455 | pred[:, :4] = xywh2xyxy(pred[:, :4]) 456 | 457 | 458 | # Non-maximum suppression 459 | if method == 'standard': 460 | nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres) 461 | elif method == 'fast': 462 | nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres) 463 | else: 464 | raise ValueError('Invalid NMS type!') 465 | det_max = pred[nms_indices] 466 | 467 | if len(det_max) > 0: 468 | # Add max detections to outputs 469 | output[image_i] = det_max if output[image_i] is None else torch.cat((output[image_i], det_max)) 470 | 471 | return output 472 | 473 | def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5): 474 | ''' 475 | Vectorized, approximated, fast NMS, adopted from YOLACT: 476 | https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py 477 | The original version is for multi-class NMS, here we simplify the code for single-class NMS 478 | ''' 479 | scores, idx = scores.sort(0, descending=True) 480 | 481 | idx = idx[:top_k].contiguous() 482 | scores = scores[:top_k] 483 | num_dets = idx.size() 484 | 485 | boxes = boxes[idx, :] 486 | 487 | iou = jaccard(boxes, boxes) 488 | iou.triu_(diagonal=1) 489 | iou_max, _ = iou.max(dim=0) 490 | 491 | keep = (iou_max <= iou_thres) 492 | 493 | if second_threshold: 494 | keep *= (scores > self.conf_thresh) 495 | 496 | return idx[keep] 497 | 498 | 499 | 500 | @torch.jit.script 501 | def intersect(box_a, box_b): 502 | """ We resize both tensors to [A,B,2] without new malloc: 503 | [A,2] -> [A,1,2] -> [A,B,2] 504 | [B,2] -> [1,B,2] -> [A,B,2] 505 | Then we compute the area of intersect between box_a and box_b. 506 | Args: 507 | box_a: (tensor) bounding boxes, Shape: [n,A,4]. 508 | box_b: (tensor) bounding boxes, Shape: [n,B,4]. 509 | Return: 510 | (tensor) intersection area, Shape: [n,A,B]. 511 | """ 512 | n = box_a.size(0) 513 | A = box_a.size(1) 514 | B = box_b.size(1) 515 | max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2), 516 | box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2)) 517 | min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2), 518 | box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2)) 519 | inter = torch.clamp((max_xy - min_xy), min=0) 520 | return inter[:, :, :, 0] * inter[:, :, :, 1] 521 | 522 | 523 | 524 | def jaccard(box_a, box_b, iscrowd:bool=False): 525 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 526 | is simply the intersection over union of two boxes. Here we operate on 527 | ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. 528 | E.g.: 529 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 530 | Args: 531 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 532 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 533 | Return: 534 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 535 | """ 536 | use_batch = True 537 | if box_a.dim() == 2: 538 | use_batch = False 539 | box_a = box_a[None, ...] 540 | box_b = box_b[None, ...] 541 | 542 | inter = intersect(box_a, box_b) 543 | area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) * 544 | (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B] 545 | area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) * 546 | (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B] 547 | union = area_a + area_b - inter 548 | 549 | out = inter / area_a if iscrowd else inter / union 550 | return out if use_batch else out.squeeze(0) 551 | 552 | 553 | def return_torch_unique_index(u, uv): 554 | n = uv.shape[1] # number of columns 555 | first_unique = torch.zeros(n, device=u.device).long() 556 | for j in range(n): 557 | first_unique[j] = (uv[:, j:j + 1] == u).all(0).nonzero()[0] 558 | 559 | return first_unique 560 | 561 | 562 | def strip_optimizer_from_checkpoint(filename='weights/best.pt'): 563 | # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size) 564 | a = torch.load(filename, map_location='cpu') 565 | a['optimizer'] = [] 566 | torch.save(a, filename.replace('.pt', '_lite.pt')) 567 | 568 | 569 | def plot_results(): 570 | """ 571 | Plot YOLO training results from the file 'results.txt' 572 | Example of what this is trying to plot can be found at: 573 | https://user-images.githubusercontent.com/26833433/63258271-fe9d5300-c27b-11e9-9a15-95038daf4438.png 574 | An example results.txt file: 575 | import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v1.txt') 576 | """ 577 | plt.figure(figsize=(14, 7)) 578 | s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'mAP', 'Recall', 'Precision'] 579 | files = sorted(glob.glob('results*.txt')) 580 | for f in files: 581 | results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP 582 | x = range(1, results.shape[1]) 583 | for i in range(8): 584 | plt.subplot(2, 4, i + 1) 585 | plt.plot(x, results[i, x], marker='.', label=f) 586 | plt.title(s[i]) 587 | if i == 0: 588 | plt.legend() 589 | -------------------------------------------------------------------------------- /tracker_utils/visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def tlwhs_to_tlbrs(tlwhs): 6 | tlbrs = np.copy(tlwhs) 7 | if len(tlbrs) == 0: 8 | return tlbrs 9 | tlbrs[:, 2] += tlwhs[:, 0] 10 | tlbrs[:, 3] += tlwhs[:, 1] 11 | return tlbrs 12 | 13 | 14 | def get_color(idx): 15 | idx = idx * 3 16 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) 17 | 18 | return color 19 | 20 | 21 | def resize_image(image, max_size=800): 22 | if max(image.shape[:2]) > max_size: 23 | scale = float(max_size) / max(image.shape[:2]) 24 | image = cv2.resize(image, None, fx=scale, fy=scale) 25 | return image 26 | 27 | 28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): 29 | im = np.ascontiguousarray(np.copy(image)) 30 | im_h, im_w = im.shape[:2] 31 | 32 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 33 | 34 | text_scale = max(1, image.shape[1] / 1600.) 35 | text_thickness = 1 if text_scale > 1.1 else 1 36 | line_thickness = max(1, int(image.shape[1] / 500.)) 37 | 38 | radius = max(5, int(im_w/140.)) 39 | cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), 40 | (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2) 41 | 42 | for i, tlwh in enumerate(tlwhs): 43 | x1, y1, w, h = tlwh 44 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) 45 | obj_id = int(obj_ids[i]) 46 | id_text = '{}'.format(int(obj_id)) 47 | if ids2 is not None: 48 | id_text = id_text + ', {}'.format(int(ids2[i])) 49 | _line_thickness = 1 if obj_id <= 0 else line_thickness 50 | color = get_color(abs(obj_id)) 51 | cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) 52 | cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), 53 | thickness=text_thickness) 54 | return im 55 | 56 | 57 | def plot_trajectory(image, tlwhs, track_ids): 58 | image = image.copy() 59 | for one_tlwhs, track_id in zip(tlwhs, track_ids): 60 | color = get_color(int(track_id)) 61 | for tlwh in one_tlwhs: 62 | x1, y1, w, h = tuple(map(int, tlwh)) 63 | cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) 64 | 65 | return image 66 | 67 | 68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): 69 | im = np.copy(image) 70 | text_scale = max(1, image.shape[1] / 800.) 71 | thickness = 2 if text_scale > 1.3 else 1 72 | for i, det in enumerate(tlbrs): 73 | x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) 74 | if len(det) >= 7: 75 | label = 'det' if det[5] > 0 else 'trk' 76 | if ids is not None: 77 | text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) 78 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 79 | thickness=thickness) 80 | else: 81 | text = '{}# {:.2f}'.format(label, det[6]) 82 | 83 | if scores is not None: 84 | text = '{:.2f}'.format(scores[i]) 85 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 86 | thickness=thickness) 87 | 88 | cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) 89 | 90 | return im 91 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaobin1231/YOLOv5_JDE/ea3149f280a2234a4bf840c7da8f5d0c77abd59d/utils/__init__.py -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | # Swish https://arxiv.org/pdf/1905.02244.pdf --------------------------------------------------------------------------- 7 | class Swish(nn.Module): # 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | class HardSwish(nn.Module): 14 | @staticmethod 15 | def forward(x): 16 | return x * F.hardtanh(x + 3, 0., 6., True) / 6. 17 | 18 | 19 | class MemoryEfficientSwish(nn.Module): 20 | class F(torch.autograd.Function): 21 | @staticmethod 22 | def forward(ctx, x): 23 | ctx.save_for_backward(x) 24 | return x * torch.sigmoid(x) 25 | 26 | @staticmethod 27 | def backward(ctx, grad_output): 28 | x = ctx.saved_tensors[0] 29 | sx = torch.sigmoid(x) 30 | return grad_output * (sx * (1 + x * (1 - sx))) 31 | 32 | def forward(self, x): 33 | return self.F.apply(x) 34 | 35 | 36 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 37 | class Mish(nn.Module): 38 | @staticmethod 39 | def forward(x): 40 | return x * F.softplus(x).tanh() 41 | 42 | 43 | class MemoryEfficientMish(nn.Module): 44 | class F(torch.autograd.Function): 45 | @staticmethod 46 | def forward(ctx, x): 47 | ctx.save_for_backward(x) 48 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 49 | 50 | @staticmethod 51 | def backward(ctx, grad_output): 52 | x = ctx.saved_tensors[0] 53 | sx = torch.sigmoid(x) 54 | fx = F.softplus(x).tanh() 55 | return grad_output * (fx + x * sx * (1 - fx * fx)) 56 | 57 | def forward(self, x): 58 | return self.F.apply(x) 59 | 60 | 61 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 62 | class FReLU(nn.Module): 63 | def __init__(self, c1, k=3): # ch_in, kernel 64 | super().__init__() 65 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) 66 | self.bn = nn.BatchNorm2d(c1) 67 | 68 | def forward(self, x): 69 | return torch.max(x, self.bn(self.conv(x))) 70 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | # from google.cloud import storage 4 | 5 | import os 6 | import platform 7 | import time 8 | from pathlib import Path 9 | 10 | 11 | def attempt_download(weights): 12 | # Attempt to download pretrained weights if not found locally 13 | weights = weights.strip().replace("'", '') 14 | msg = weights + ' missing, try downloading from https://drive.google.com/drive/folders/1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J' 15 | 16 | r = 1 # return 17 | if len(weights) > 0 and not os.path.isfile(weights): 18 | d = {'yolov3-spp.pt': '1mM67oNw4fZoIOL1c8M3hHmj66d8e-ni_', # yolov3-spp.yaml 19 | 'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO', # yolov5s.yaml 20 | 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr', # yolov5m.yaml 21 | 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV', # yolov5l.yaml 22 | 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS', # yolov5x.yaml 23 | } 24 | 25 | file = Path(weights).name 26 | if file in d: 27 | r = gdrive_download(id=d[file], name=weights) 28 | 29 | if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB 30 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads 31 | s = 'curl -L -o %s "storage.googleapis.com/ultralytics/yolov5/ckpt/%s"' % (weights, file) 32 | r = os.system(s) # execute, capture return values 33 | 34 | # Error check 35 | if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB 36 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads 37 | raise Exception(msg) 38 | 39 | 40 | def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'): 41 | # Downloads a file from Google Drive, accepting presented query 42 | # from utils.google_utils import *; gdrive_download() 43 | t = time.time() 44 | 45 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 46 | os.remove(name) if os.path.exists(name) else None # remove existing 47 | os.remove('cookie') if os.path.exists('cookie') else None 48 | 49 | # Attempt file download 50 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 51 | os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) 52 | if os.path.exists('cookie'): # large file 53 | s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) 54 | else: # small file 55 | s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) 56 | r = os.system(s) # execute, capture return values 57 | os.remove('cookie') if os.path.exists('cookie') else None 58 | 59 | # Error check 60 | if r != 0: 61 | os.remove(name) if os.path.exists(name) else None # remove partial 62 | print('Download error ') # raise Exception('Download error') 63 | return r 64 | 65 | # Unzip if archive 66 | if name.endswith('.zip'): 67 | print('unzipping... ', end='') 68 | os.system('unzip -q %s' % name) # unzip 69 | os.remove(name) # remove zip to free space 70 | 71 | print('Done (%.1fs)' % (time.time() - t)) 72 | return r 73 | 74 | 75 | def get_token(cookie="./cookie"): 76 | with open(cookie) as f: 77 | for line in f: 78 | if "download" in line: 79 | return line.split()[-1] 80 | return "" 81 | 82 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 83 | # # Uploads a file to a bucket 84 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 85 | # 86 | # storage_client = storage.Client() 87 | # bucket = storage_client.get_bucket(bucket_name) 88 | # blob = bucket.blob(destination_blob_name) 89 | # 90 | # blob.upload_from_filename(source_file_name) 91 | # 92 | # print('File {} uploaded to {}.'.format( 93 | # source_file_name, 94 | # destination_blob_name)) 95 | # 96 | # 97 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 98 | # # Uploads a blob from a bucket 99 | # storage_client = storage.Client() 100 | # bucket = storage_client.get_bucket(bucket_name) 101 | # blob = bucket.blob(source_blob_name) 102 | # 103 | # blob.download_to_filename(destination_file_name) 104 | # 105 | # print('Blob {} downloaded to {}.'.format( 106 | # source_blob_name, 107 | # destination_file_name)) 108 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import time 4 | from copy import deepcopy 5 | 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torchvision.models as models 11 | 12 | 13 | def init_seeds(seed=0): 14 | torch.manual_seed(seed) 15 | 16 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 17 | if seed == 0: # slower, more reproducible 18 | cudnn.deterministic = True 19 | cudnn.benchmark = False 20 | else: # faster, less reproducible 21 | cudnn.deterministic = False 22 | cudnn.benchmark = True 23 | 24 | 25 | def select_device(device='', batch_size=None): 26 | # device = 'cpu' or '0' or '0,1,2,3' 27 | cpu_request = device.lower() == 'cpu' 28 | if device and not cpu_request: # if device requested other than 'cpu' 29 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 30 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 31 | 32 | cuda = False if cpu_request else torch.cuda.is_available() 33 | if cuda: 34 | c = 1024 ** 2 # bytes to MB 35 | ng = torch.cuda.device_count() 36 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 37 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 38 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 39 | s = 'Using CUDA ' 40 | for i in range(0, ng): 41 | if i == 1: 42 | s = ' ' * len(s) 43 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 44 | (s, i, x[i].name, x[i].total_memory / c)) 45 | else: 46 | print('Using CPU') 47 | 48 | print('') # skip a line 49 | return torch.device('cuda:0' if cuda else 'cpu') 50 | 51 | 52 | def time_synchronized(): 53 | torch.cuda.synchronize() if torch.cuda.is_available() else None 54 | return time.time() 55 | 56 | 57 | def is_parallel(model): 58 | # is model is parallel with DP or DDP 59 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 60 | 61 | 62 | def initialize_weights(model): 63 | for m in model.modules(): 64 | t = type(m) 65 | if t is nn.Conv2d: 66 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 67 | elif t is nn.BatchNorm2d: 68 | m.eps = 1e-3 69 | m.momentum = 0.03 70 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 71 | m.inplace = True 72 | elif t is nn.Linear: 73 | nn.init.xavier_uniform_(m.weight) 74 | 75 | 76 | def find_modules(model, mclass=nn.Conv2d): 77 | # finds layer indices matching module class 'mclass' 78 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 79 | 80 | 81 | def sparsity(model): 82 | # Return global model sparsity 83 | a, b = 0., 0. 84 | for p in model.parameters(): 85 | a += p.numel() 86 | b += (p == 0).sum() 87 | return b / a 88 | 89 | 90 | def prune(model, amount=0.3): 91 | # Prune model to requested global sparsity 92 | import torch.nn.utils.prune as prune 93 | print('Pruning model... ', end='') 94 | for name, m in model.named_modules(): 95 | if isinstance(m, nn.Conv2d): 96 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 97 | prune.remove(m, 'weight') # make permanent 98 | print(' %.3g global sparsity' % sparsity(model)) 99 | 100 | 101 | def fuse_conv_and_bn(conv, bn): 102 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 103 | with torch.no_grad(): 104 | # init 105 | fusedconv = nn.Conv2d(conv.in_channels, 106 | conv.out_channels, 107 | kernel_size=conv.kernel_size, 108 | stride=conv.stride, 109 | padding=conv.padding, 110 | bias=True).to(conv.weight.device) 111 | 112 | # prepare filters 113 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 114 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 115 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 116 | 117 | # prepare spatial bias 118 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 119 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 120 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 121 | 122 | return fusedconv 123 | 124 | 125 | def model_info(model, verbose=False): 126 | # Plots a line-by-line description of a PyTorch model 127 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 128 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 129 | if verbose: 130 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 131 | for i, (name, p) in enumerate(model.named_parameters()): 132 | name = name.replace('module_list.', '') 133 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 134 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 135 | 136 | try: # FLOPS 137 | from thop import profile 138 | flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2 139 | fs = ', %.1f GFLOPS' % (flops * 100) # 640x640 FLOPS 140 | except: 141 | fs = '' 142 | 143 | print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs)) 144 | 145 | 146 | def load_classifier(name='resnet101', n=2): 147 | # Loads a pretrained model reshaped to n-class output 148 | model = models.__dict__[name](pretrained=True) 149 | 150 | # Display model properties 151 | input_size = [3, 224, 224] 152 | input_space = 'RGB' 153 | input_range = [0, 1] 154 | mean = [0.485, 0.456, 0.406] 155 | std = [0.229, 0.224, 0.225] 156 | for x in [input_size, input_space, input_range, mean, std]: 157 | print(x + ' =', eval(x)) 158 | 159 | # Reshape output to n classes 160 | filters = model.fc.weight.shape[1] 161 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 162 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 163 | model.fc.out_features = n 164 | return model 165 | 166 | 167 | def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio 168 | # scales img(bs,3,y,x) by ratio 169 | if ratio == 1.0: 170 | return img 171 | else: 172 | h, w = img.shape[2:] 173 | s = (int(h * ratio), int(w * ratio)) # new size 174 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 175 | if not same_shape: # pad/crop img 176 | gs = 32 # (pixels) grid size 177 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 178 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 179 | 180 | 181 | def copy_attr(a, b, include=(), exclude=()): 182 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 183 | for k, v in b.__dict__.items(): 184 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 185 | continue 186 | else: 187 | setattr(a, k, v) 188 | 189 | 190 | class ModelEMA: 191 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 192 | Keep a moving average of everything in the model state_dict (parameters and buffers). 193 | This is intended to allow functionality like 194 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 195 | A smoothed version of the weights is necessary for some training schemes to perform well. 196 | This class is sensitive where it is initialized in the sequence of model init, 197 | GPU assignment and distributed training wrappers. 198 | """ 199 | 200 | def __init__(self, model, decay=0.9999, updates=0): 201 | # Create EMA 202 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 203 | # if next(model.parameters()).device.type != 'cpu': 204 | # self.ema.half() # FP16 EMA 205 | self.updates = updates # number of EMA updates 206 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 207 | for p in self.ema.parameters(): 208 | p.requires_grad_(False) 209 | 210 | def update(self, model): 211 | # Update EMA parameters 212 | with torch.no_grad(): 213 | self.updates += 1 214 | d = self.decay(self.updates) 215 | 216 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 217 | for k, v in self.ema.state_dict().items(): 218 | if v.dtype.is_floating_point: 219 | v *= d 220 | v += (1. - d) * msd[k].detach() 221 | 222 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 223 | # Update EMA attributes 224 | copy_attr(self.ema, model, include, exclude) 225 | -------------------------------------------------------------------------------- /weights/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download common models 3 | 4 | python -c " 5 | from utils.google_utils import *; 6 | attempt_download('weights/yolov5s.pt'); 7 | attempt_download('weights/yolov5m.pt'); 8 | attempt_download('weights/yolov5l.pt'); 9 | attempt_download('weights/yolov5x.pt') 10 | " 11 | --------------------------------------------------------------------------------