├── .dockerignore
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── --bug-report.md
    │   ├── --feature-request.md
    │   └── -question.md
    └── workflows
    │   ├── ci-testing.yml
    │   ├── greetings.yml
    │   ├── rebase.yml
    │   └── stale.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── data
    ├── get_coco2017.sh
    ├── get_voc.sh
    └── mot.yaml
├── detect.py
├── hubconf.py
├── models
    ├── __init__.py
    ├── common.py
    ├── experimental.py
    ├── export.py
    ├── hub
    │   ├── yolov3-spp.yaml
    │   ├── yolov5-fpn.yaml
    │   └── yolov5-panet.yaml
    ├── yolo.py
    ├── yolov5_JDE.yaml
    ├── yolov5l.yaml
    ├── yolov5m.yaml
    ├── yolov5s.yaml
    └── yolov5x.yaml
├── mot_data
    ├── caltech.10k.val
    ├── caltech.train
    ├── caltech.val
    ├── citypersons.train
    ├── citypersons.val
    ├── cuhksysu.train
    ├── cuhksysu.val
    ├── eth.train
    ├── mot16.train
    ├── mot17.train
    ├── mot19.train
    ├── prw.train
    └── prw.val
├── requirements.txt
├── test.py
├── track.py
├── tracker
    ├── __init__.py
    ├── basetrack.py
    ├── matching.py
    └── multitracker.py
├── tracker_utils
    ├── datasets.py
    ├── evaluation.py
    ├── io.py
    ├── kalman_filter.py
    ├── log.py
    ├── timer.py
    ├── utils.py
    └── visualization.py
├── train.py
├── utils
    ├── __init__.py
    ├── activations.py
    ├── datasets.py
    ├── general.py
    ├── google_utils.py
    └── torch_utils.py
└── weights
    └── download_weights.sh


/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific DockerIgnore -------------------------------------------------------------------------------------------
  2 | # .git
  3 | .cache
  4 | .idea
  5 | runs
  6 | output
  7 | coco
  8 | storage.googleapis.com
  9 | 
 10 | data/samples/*
 11 | **/results*.txt
 12 | *.jpg
 13 | 
 14 | # Neural Network weights -----------------------------------------------------------------------------------------------
 15 | **/*.weights
 16 | **/*.pt
 17 | **/*.pth
 18 | **/*.onnx
 19 | **/*.mlmodel
 20 | **/*.torchscript
 21 | 
 22 | 
 23 | # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 24 | # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 25 | 
 26 | 
 27 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 28 | # Byte-compiled / optimized / DLL files
 29 | __pycache__/
 30 | *.py[cod]
 31 | *$py.class
 32 | 
 33 | # C extensions
 34 | *.so
 35 | 
 36 | # Distribution / packaging
 37 | .Python
 38 | env/
 39 | build/
 40 | develop-eggs/
 41 | dist/
 42 | downloads/
 43 | eggs/
 44 | .eggs/
 45 | lib/
 46 | lib64/
 47 | parts/
 48 | sdist/
 49 | var/
 50 | wheels/
 51 | *.egg-info/
 52 | .installed.cfg
 53 | *.egg
 54 | 
 55 | # PyInstaller
 56 | #  Usually these files are written by a python script from a template
 57 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 58 | *.manifest
 59 | *.spec
 60 | 
 61 | # Installer logs
 62 | pip-log.txt
 63 | pip-delete-this-directory.txt
 64 | 
 65 | # Unit test / coverage reports
 66 | htmlcov/
 67 | .tox/
 68 | .coverage
 69 | .coverage.*
 70 | .cache
 71 | nosetests.xml
 72 | coverage.xml
 73 | *.cover
 74 | .hypothesis/
 75 | 
 76 | # Translations
 77 | *.mo
 78 | *.pot
 79 | 
 80 | # Django stuff:
 81 | *.log
 82 | local_settings.py
 83 | 
 84 | # Flask stuff:
 85 | instance/
 86 | .webassets-cache
 87 | 
 88 | # Scrapy stuff:
 89 | .scrapy
 90 | 
 91 | # Sphinx documentation
 92 | docs/_build/
 93 | 
 94 | # PyBuilder
 95 | target/
 96 | 
 97 | # Jupyter Notebook
 98 | .ipynb_checkpoints
 99 | 
100 | # pyenv
101 | .python-version
102 | 
103 | # celery beat schedule file
104 | celerybeat-schedule
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # dotenv
110 | .env
111 | 
112 | # virtualenv
113 | .venv
114 | venv/
115 | ENV/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | 
130 | 
131 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
132 | 
133 | # General
134 | .DS_Store
135 | .AppleDouble
136 | .LSOverride
137 | 
138 | # Icon must end with two \r
139 | Icon
140 | Icon?
141 | 
142 | # Thumbnails
143 | ._*
144 | 
145 | # Files that might appear in the root of a volume
146 | .DocumentRevisions-V100
147 | .fseventsd
148 | .Spotlight-V100
149 | .TemporaryItems
150 | .Trashes
151 | .VolumeIcon.icns
152 | .com.apple.timemachine.donotpresent
153 | 
154 | # Directories potentially created on remote AFP share
155 | .AppleDB
156 | .AppleDesktop
157 | Network Trash Folder
158 | Temporary Items
159 | .apdisk
160 | 
161 | 
162 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
163 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
164 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
165 | 
166 | # User-specific stuff:
167 | .idea/*
168 | .idea/**/workspace.xml
169 | .idea/**/tasks.xml
170 | .idea/dictionaries
171 | .html  # Bokeh Plots
172 | .pg  # TensorFlow Frozen Graphs
173 | .avi # videos
174 | 
175 | # Sensitive or high-churn files:
176 | .idea/**/dataSources/
177 | .idea/**/dataSources.ids
178 | .idea/**/dataSources.local.xml
179 | .idea/**/sqlDataSources.xml
180 | .idea/**/dynamic.xml
181 | .idea/**/uiDesigner.xml
182 | 
183 | # Gradle:
184 | .idea/**/gradle.xml
185 | .idea/**/libraries
186 | 
187 | # CMake
188 | cmake-build-debug/
189 | cmake-build-release/
190 | 
191 | # Mongo Explorer plugin:
192 | .idea/**/mongoSettings.xml
193 | 
194 | ## File-based project format:
195 | *.iws
196 | 
197 | ## Plugin-specific files:
198 | 
199 | # IntelliJ
200 | out/
201 | 
202 | # mpeltonen/sbt-idea plugin
203 | .idea_modules/
204 | 
205 | # JIRA plugin
206 | atlassian-ide-plugin.xml
207 | 
208 | # Cursive Clojure plugin
209 | .idea/replstate.xml
210 | 
211 | # Crashlytics plugin (for Android Studio and IntelliJ)
212 | com_crashlytics_export_strings.xml
213 | crashlytics.properties
214 | crashlytics-build.properties
215 | fabric.properties
216 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # this drop notebooks from GitHub language stats
2 | *.ipynb linguist-vendored
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41BBug report"
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you:
11 |  - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo
12 |  - **Common dataset**: coco.yaml or coco128.yaml
13 |  - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#environments
14 |  
15 | If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`.
16 | 
17 | 
18 | ## 🐛 Bug
19 | A clear and concise description of what the bug is.
20 | 
21 | 
22 | ## To Reproduce (REQUIRED)
23 | 
24 | Input:
25 | ```
26 | import torch
27 | 
28 | a = torch.tensor([5])
29 | c = a / 0
30 | ```
31 | 
32 | Output:
33 | ```
34 | Traceback (most recent call last):
35 |   File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
36 |     exec(code_obj, self.user_global_ns, self.user_ns)
37 |   File "<ipython-input-5-be04c762b799>", line 5, in <module>
38 |     c = a / 0
39 | RuntimeError: ZeroDivisionError
40 | ```
41 | 
42 | 
43 | ## Expected behavior
44 | A clear and concise description of what you expected to happen.
45 | 
46 | 
47 | ## Environment
48 | If applicable, add screenshots to help explain your problem.
49 | 
50 |  - OS: [e.g. Ubuntu]
51 |  - GPU [e.g. 2080 Ti]
52 | 
53 | 
54 | ## Additional context
55 | Add any other context about the problem here.
56 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680Feature request"
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 🚀 Feature
11 | <!-- A clear and concise description of the feature proposal -->
12 | 
13 | ## Motivation
14 | 
15 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
16 | 
17 | ## Pitch
18 | 
19 | <!-- A clear and concise description of what you want to happen. -->
20 | 
21 | ## Alternatives
22 | 
23 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
24 | 
25 | ## Additional context
26 | 
27 | <!-- Add any other context or screenshots about the feature request here. -->
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/-question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓Question"
 3 | about: Ask a general question
 4 | title: ''
 5 | labels: question
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## ❔Question
11 | 
12 | 
13 | ## Additional context
14 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-testing.yml:
--------------------------------------------------------------------------------
 1 | name: CI CPU testing
 2 | 
 3 | on:  # https://help.github.com/en/actions/reference/events-that-trigger-workflows
 4 |   push:
 5 |   pull_request:
 6 |   schedule:
 7 |     - cron: "0 0 * * *"
 8 | 
 9 | jobs:
10 |   cpu-tests:
11 | 
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         os: [ubuntu-latest, macos-latest, windows-latest]
17 |         python-version: [3.8]
18 |         model: ['yolov5s']  # models to test
19 | 
20 |     # Timeout: https://stackoverflow.com/a/59076067/4521646
21 |     timeout-minutes: 50
22 |     steps:
23 |       - uses: actions/checkout@v2
24 |       - name: Set up Python ${{ matrix.python-version }}
25 |         uses: actions/setup-python@v2
26 |         with:
27 |           python-version: ${{ matrix.python-version }}
28 | 
29 |       # Note: This uses an internal pip API and may not always work
30 |       # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
31 |       - name: Get pip cache
32 |         id: pip-cache
33 |         run: |
34 |           python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)"
35 | 
36 |       - name: Cache pip
37 |         uses: actions/cache@v1
38 |         with:
39 |           path: ${{ steps.pip-cache.outputs.dir }}
40 |           key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }}
41 |           restore-keys: |
42 |             ${{ runner.os }}-${{ matrix.python-version }}-pip-
43 | 
44 |       - name: Install dependencies
45 |         run: |
46 |           python -m pip install --upgrade pip
47 |           pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html
48 |           pip install -q onnx
49 |           python --version
50 |           pip --version
51 |           pip list
52 |         shell: bash
53 | 
54 |       - name: Download data
55 |         run: |
56 |           curl -L -o temp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
57 |           unzip -q temp.zip -d ../
58 |           rm temp.zip
59 | 
60 |       - name: Tests workflow
61 |         run: |
62 |           export PYTHONPATH="$PWD"  # to run *.py. files in subdirectories
63 |           di=cpu # inference devices  # define device
64 | 
65 |           # train
66 |           python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
67 |           # detect
68 |           python detect.py --weights weights/${{ matrix.model }}.pt --device $di
69 |           python detect.py --weights runs/exp0/weights/last.pt --device $di
70 |           # test
71 |           python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di
72 |           python test.py --img 256 --batch 8 --weights runs/exp0/weights/last.pt --device $di
73 | 
74 |           python models/yolo.py --cfg models/${{ matrix.model }}.yaml  # inspect
75 |           python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt  # export
76 |         shell: bash
77 | 


--------------------------------------------------------------------------------
/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request_target, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/first-interaction@v1
10 |         with:
11 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
12 |           pr-message: |
13 |             Hello @${{ github.actor }}, thank you for submitting a PR! To allow your work to be integrated as seamlessly as possible, we advise you to:
14 |             - Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch:
15 |             ```bash
16 |             git remote add upstream https://github.com/ultralytics/yolov5.git
17 |             git fetch upstream
18 |             git checkout feature  # <----- replace 'feature' with local branch name
19 |             git rebase upstream/master
20 |             git push -u origin -f
21 |             ```
22 |             - Verify all Continuous Integration (CI) **checks are passing**.
23 |             - Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_  -Bruce Lee
24 | 
25 |           issue-message: |
26 |             Hello @${{ github.actor }}, thank you for your interest in our work! Please visit our [Custom Training Tutorial](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) to get started, and see our [Jupyter Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb) <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>, [Docker Image](https://hub.docker.com/r/ultralytics/yolov5), and [Google Cloud Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) for example environments.
27 | 
28 |             If this is a bug report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you.
29 | 
30 |             If this is a custom model or data training question, please note Ultralytics does **not** provide free personal support. As a leader in vision ML and AI, we do offer professional consulting, from simple expert advice up to delivery of fully customized, end-to-end production solutions for our clients, such as:
31 |             - **Cloud-based AI** systems operating on **hundreds of HD video streams in realtime.**
32 |             - **Edge AI** integrated into custom iOS and Android apps for realtime **30 FPS video inference.**
33 |             - **Custom data training**, hyperparameter evolution, and model exportation to any destination.
34 | 
35 |             For more information please visit https://www.ultralytics.com.
36 | 


--------------------------------------------------------------------------------
/.github/workflows/rebase.yml:
--------------------------------------------------------------------------------
 1 | name: Automatic Rebase
 2 | # https://github.com/marketplace/actions/automatic-rebase
 3 | 
 4 | on:
 5 |   issue_comment:
 6 |     types: [created]
 7 | 
 8 | jobs:
 9 |   rebase:
10 |     name: Rebase
11 |     if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase')
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - name: Checkout the latest code
15 |       uses: actions/checkout@v2
16 |       with:
17 |         fetch-depth: 0
18 |     - name: Automatic Rebase
19 |       uses: cirrus-actions/rebase@1.3.1
20 |       env:
21 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Close stale issues
 2 | on:
 3 |   schedule:
 4 |     - cron: "0 0 * * *"
 5 | 
 6 | jobs:
 7 |   stale:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/stale@v1
11 |         with:
12 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
13 |           stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
14 |           stale-pr-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
15 |           days-before-stale: 30
16 |           days-before-close: 5
17 |           exempt-issue-label: 'documentation,tutorial'
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
  2 | *.jpg
  3 | *.jpeg
  4 | *.png
  5 | *.bmp
  6 | *.tif
  7 | *.tiff
  8 | *.heic
  9 | *.JPG
 10 | *.JPEG
 11 | *.PNG
 12 | *.BMP
 13 | *.TIF
 14 | *.TIFF
 15 | *.HEIC
 16 | *.mp4
 17 | *.mov
 18 | *.MOV
 19 | *.avi
 20 | *.data
 21 | *.json
 22 | 
 23 | *.cfg
 24 | !cfg/yolov3*.cfg
 25 | 
 26 | storage.googleapis.com
 27 | runs/*
 28 | data/*
 29 | !data/samples/zidane.jpg
 30 | !data/samples/bus.jpg
 31 | !data/coco.names
 32 | !data/coco_paper.names
 33 | !data/coco.data
 34 | !data/coco_*.data
 35 | !data/coco_*.txt
 36 | !data/trainvalno5k.shapes
 37 | !data/*.sh
 38 | 
 39 | pycocotools/*
 40 | results*.txt
 41 | gcp_test*.sh
 42 | 
 43 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
 44 | *.m~
 45 | *.mat
 46 | !targets*.mat
 47 | 
 48 | # Neural Network weights -----------------------------------------------------------------------------------------------
 49 | *.weights
 50 | *.pt
 51 | *.onnx
 52 | *.mlmodel
 53 | *.torchscript
 54 | darknet53.conv.74
 55 | yolov3-tiny.conv.15
 56 | 
 57 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 58 | # Byte-compiled / optimized / DLL files
 59 | __pycache__/
 60 | *.py[cod]
 61 | *$py.class
 62 | 
 63 | # C extensions
 64 | *.so
 65 | 
 66 | # Distribution / packaging
 67 | .Python
 68 | env/
 69 | build/
 70 | develop-eggs/
 71 | dist/
 72 | downloads/
 73 | eggs/
 74 | .eggs/
 75 | lib/
 76 | lib64/
 77 | parts/
 78 | sdist/
 79 | var/
 80 | wheels/
 81 | *.egg-info/
 82 | .installed.cfg
 83 | *.egg
 84 | 
 85 | # PyInstaller
 86 | #  Usually these files are written by a python script from a template
 87 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 88 | *.manifest
 89 | *.spec
 90 | 
 91 | # Installer logs
 92 | pip-log.txt
 93 | pip-delete-this-directory.txt
 94 | 
 95 | # Unit test / coverage reports
 96 | htmlcov/
 97 | .tox/
 98 | .coverage
 99 | .coverage.*
100 | .cache
101 | nosetests.xml
102 | coverage.xml
103 | *.cover
104 | .hypothesis/
105 | 
106 | # Translations
107 | *.mo
108 | *.pot
109 | 
110 | # Django stuff:
111 | *.log
112 | local_settings.py
113 | 
114 | # Flask stuff:
115 | instance/
116 | .webassets-cache
117 | 
118 | # Scrapy stuff:
119 | .scrapy
120 | 
121 | # Sphinx documentation
122 | docs/_build/
123 | 
124 | # PyBuilder
125 | target/
126 | 
127 | # Jupyter Notebook
128 | .ipynb_checkpoints
129 | 
130 | # pyenv
131 | .python-version
132 | 
133 | # celery beat schedule file
134 | celerybeat-schedule
135 | 
136 | # SageMath parsed files
137 | *.sage.py
138 | 
139 | # dotenv
140 | .env
141 | 
142 | # virtualenv
143 | .venv
144 | venv/
145 | ENV/
146 | 
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 | 
151 | # Rope project settings
152 | .ropeproject
153 | 
154 | # mkdocs documentation
155 | /site
156 | 
157 | # mypy
158 | .mypy_cache/
159 | 
160 | 
161 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
162 | 
163 | # General
164 | .DS_Store
165 | .AppleDouble
166 | .LSOverride
167 | 
168 | # Icon must end with two \r
169 | Icon
170 | Icon?
171 | 
172 | # Thumbnails
173 | ._*
174 | 
175 | # Files that might appear in the root of a volume
176 | .DocumentRevisions-V100
177 | .fseventsd
178 | .Spotlight-V100
179 | .TemporaryItems
180 | .Trashes
181 | .VolumeIcon.icns
182 | .com.apple.timemachine.donotpresent
183 | 
184 | # Directories potentially created on remote AFP share
185 | .AppleDB
186 | .AppleDesktop
187 | Network Trash Folder
188 | Temporary Items
189 | .apdisk
190 | 
191 | 
192 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
193 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
194 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
195 | 
196 | # User-specific stuff:
197 | .idea/*
198 | .idea/**/workspace.xml
199 | .idea/**/tasks.xml
200 | .idea/dictionaries
201 | .html  # Bokeh Plots
202 | .pg  # TensorFlow Frozen Graphs
203 | .avi # videos
204 | 
205 | # Sensitive or high-churn files:
206 | .idea/**/dataSources/
207 | .idea/**/dataSources.ids
208 | .idea/**/dataSources.local.xml
209 | .idea/**/sqlDataSources.xml
210 | .idea/**/dynamic.xml
211 | .idea/**/uiDesigner.xml
212 | 
213 | # Gradle:
214 | .idea/**/gradle.xml
215 | .idea/**/libraries
216 | 
217 | # CMake
218 | cmake-build-debug/
219 | cmake-build-release/
220 | 
221 | # Mongo Explorer plugin:
222 | .idea/**/mongoSettings.xml
223 | 
224 | ## File-based project format:
225 | *.iws
226 | 
227 | ## Plugin-specific files:
228 | 
229 | # IntelliJ
230 | out/
231 | 
232 | # mpeltonen/sbt-idea plugin
233 | .idea_modules/
234 | 
235 | # JIRA plugin
236 | atlassian-ide-plugin.xml
237 | 
238 | # Cursive Clojure plugin
239 | .idea/replstate.xml
240 | 
241 | # Crashlytics plugin (for Android Studio and IntelliJ)
242 | com_crashlytics_export_strings.xml
243 | crashlytics.properties
244 | crashlytics-build.properties
245 | fabric.properties
246 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
 2 | FROM nvcr.io/nvidia/pytorch:20.03-py3
 3 | 
 4 | # Install dependencies
 5 | COPY requirements.txt .
 6 | RUN pip install -r requirements.txt gsutil
 7 | 
 8 | # Create working directory
 9 | RUN mkdir -p /usr/src/app
10 | WORKDIR /usr/src/app
11 | 
12 | # Copy contents
13 | COPY . /usr/src/app
14 | 
15 | # Copy weights
16 | #RUN python3 -c "from models import *; \
17 | #attempt_download('weights/yolov5s.pt'); \
18 | #attempt_download('weights/yolov5m.pt'); \
19 | #attempt_download('weights/yolov5l.pt')"
20 | 
21 | 
22 | # ---------------------------------------------------  Extras Below  ---------------------------------------------------
23 | 
24 | # Build and Push
25 | # t=ultralytics/yolov5:latest && sudo docker build -t $t . && sudo docker push $t
26 | 
27 | # Pull and Run
28 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host $t
29 | 
30 | # Pull and Run with local directory access
31 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/coco:/usr/src/coco $t
32 | 
33 | # Kill all
34 | # sudo docker kill "$(sudo docker ps -q)"
35 | 
36 | # Kill all image-based
37 | # sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov5:latest)
38 | 
39 | # Bash into running container
40 | # sudo docker container exec -it ba65811811ab bash
41 | 
42 | # Bash into stopped container
43 | # sudo docker commit 092b16b25c5b usr/resume && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh usr/resume
44 | 
45 | # Send weights to GCP
46 | # python -c "from utils.utils import *; strip_optimizer('runs/exp0/weights/last.pt', 'temp.pt')" && gsutil cp temp.pt gs://*
47 | 
48 | # Clean up
49 | # docker system prune -a --volumes
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | This repo is the a codebase of the Joint Detection and Embedding (JDE) model. JDE is a fast and high-performance multiple-object tracker that learns the object detection task and appearance embedding task simutaneously in a shared neural network. Due to the recent release of YOLOv5, we replace the detector in JDE with YOLOv5 and achieve high performance on MOT Benchmark. For some reasons, 
 3 | we can't release our stronger version, but we hope this repo will help researches/engineers to develop more practical real-time MOT systems.
 4 | 
 5 | # Requirements
 6 | Just follow the environmnet configuration of [YOLOv5](https://github.com/ultralytics/yolov5).
 7 | 
 8 | # Dataset zoo
 9 | Just follow the [DATASET_ZOO](https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/DATASET_ZOO.md) of JDE.
10 | 
11 | 
12 | ## Results on MOT16 Dataset
13 | |  | MOTA | IDS |IDF1 | MOTP| FPS | Params(M) |
14 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
15 | | JDE(1088x608) |68.5 |1496 |66.8 | 0.221 |21 | 298 |
16 | | Ours(1088x608) |71.0 |695 | 73.2 | 0.166 | 56 | 35 |
17 | 
18 | ## Results on MOT20 Dataset
19 | |  | MOTA | IDS |IDF1 | MOTP| FPS | Params(M) |
20 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
21 | | JDE(1088x608) |49.1 |24507 |38.4 | 0.272 |14 | 298 |
22 | | Ours(1088x608) |55.3 |9190 | 47.5 | 0.287 | 24 | 35 |


--------------------------------------------------------------------------------
/data/get_coco2017.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # COCO 2017 dataset http://cocodataset.org
 3 | # Download command: bash yolov5/data/get_coco2017.sh
 4 | # Train command: python train.py --data coco.yaml
 5 | # Default dataset location is next to /yolov5:
 6 | #   /parent_folder
 7 | #     /coco
 8 | #     /yolov5
 9 | 
10 | 
11 | # Download labels from Google Drive, accepting presented query
12 | filename="coco2017labels.zip"
13 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L"
14 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
15 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
16 | rm ./cookie
17 | 
18 | # Unzip labels
19 | unzip -q ${filename}  # for coco.zip
20 | # tar -xzf ${filename}  # for coco.tar.gz
21 | rm ${filename}
22 | 
23 | # Download and unzip images
24 | cd coco/images
25 | f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f  # 19G, 118k images
26 | f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f  # 1G, 5k images
27 | # f="test2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f  # 7G,  41k images
28 | 
29 | # cd out
30 | cd ../..
31 | 


--------------------------------------------------------------------------------
/data/get_voc.sh:
--------------------------------------------------------------------------------
  1 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
  2 | # Download command: bash ./data/get_voc.sh
  3 | # Train command: python train.py --data voc.yaml
  4 | # Default dataset location is next to /yolov5:
  5 | #   /parent_folder
  6 | #     /VOC
  7 | #     /yolov5
  8 | 
  9 | 
 10 | start=`date +%s`
 11 | 
 12 | # handle optional download dir
 13 | if [ -z "$1" ]
 14 |   then
 15 |     # navigate to ~/tmp
 16 |     echo "navigating to ../tmp/ ..."
 17 |     mkdir -p ../tmp
 18 |     cd ../tmp/
 19 |   else
 20 |     # check if is valid directory
 21 |     if [ ! -d $1 ]; then
 22 |         echo $1 "is not a valid directory"
 23 |         exit 0
 24 |     fi
 25 |     echo "navigating to" $1 "..."
 26 |     cd $1
 27 | fi
 28 | 
 29 | echo "Downloading VOC2007 trainval ..."
 30 | # Download the data.
 31 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
 32 | echo "Downloading VOC2007 test data ..."
 33 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
 34 | echo "Done downloading."
 35 | 
 36 | # Extract data
 37 | echo "Extracting trainval ..."
 38 | tar -xf VOCtrainval_06-Nov-2007.tar
 39 | echo "Extracting test ..."
 40 | tar -xf VOCtest_06-Nov-2007.tar
 41 | echo "removing tars ..."
 42 | rm VOCtrainval_06-Nov-2007.tar
 43 | rm VOCtest_06-Nov-2007.tar
 44 | 
 45 | end=`date +%s`
 46 | runtime=$((end-start))
 47 | 
 48 | echo "Completed in" $runtime "seconds"
 49 | 
 50 | start=`date +%s`
 51 | 
 52 | # handle optional download dir
 53 | if [ -z "$1" ]
 54 |   then
 55 |     # navigate to ~/tmp
 56 |     echo "navigating to ../tmp/ ..."
 57 |     mkdir -p ../tmp
 58 |     cd ../tmp/
 59 |   else
 60 |     # check if is valid directory
 61 |     if [ ! -d $1 ]; then
 62 |         echo $1 "is not a valid directory"
 63 |         exit 0
 64 |     fi
 65 |     echo "navigating to" $1 "..."
 66 |     cd $1
 67 | fi
 68 | 
 69 | echo "Downloading VOC2012 trainval ..."
 70 | # Download the data.
 71 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
 72 | echo "Done downloading."
 73 | 
 74 | 
 75 | # Extract data
 76 | echo "Extracting trainval ..."
 77 | tar -xf VOCtrainval_11-May-2012.tar
 78 | echo "removing tar ..."
 79 | rm VOCtrainval_11-May-2012.tar
 80 | 
 81 | end=`date +%s`
 82 | runtime=$((end-start))
 83 | 
 84 | echo "Completed in" $runtime "seconds"
 85 | 
 86 | cd ../tmp
 87 | echo "Spliting dataset..."
 88 | python3 - "$@" <<END
 89 | import xml.etree.ElementTree as ET
 90 | import pickle
 91 | import os
 92 | from os import listdir, getcwd
 93 | from os.path import join
 94 | 
 95 | sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
 96 | 
 97 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
 98 | 
 99 | 
100 | def convert(size, box):
101 |     dw = 1./(size[0])
102 |     dh = 1./(size[1])
103 |     x = (box[0] + box[1])/2.0 - 1
104 |     y = (box[2] + box[3])/2.0 - 1
105 |     w = box[1] - box[0]
106 |     h = box[3] - box[2]
107 |     x = x*dw
108 |     w = w*dw
109 |     y = y*dh
110 |     h = h*dh
111 |     return (x,y,w,h)
112 | 
113 | def convert_annotation(year, image_id):
114 |     in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
115 |     out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
116 |     tree=ET.parse(in_file)
117 |     root = tree.getroot()
118 |     size = root.find('size')
119 |     w = int(size.find('width').text)
120 |     h = int(size.find('height').text)
121 | 
122 |     for obj in root.iter('object'):
123 |         difficult = obj.find('difficult').text
124 |         cls = obj.find('name').text
125 |         if cls not in classes or int(difficult)==1:
126 |             continue
127 |         cls_id = classes.index(cls)
128 |         xmlbox = obj.find('bndbox')
129 |         b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
130 |         bb = convert((w,h), b)
131 |         out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
132 | 
133 | wd = getcwd()
134 | 
135 | for year, image_set in sets:
136 |     if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
137 |         os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
138 |     image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
139 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
140 |     for image_id in image_ids:
141 |         list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
142 |         convert_annotation(year, image_id)
143 |     list_file.close()
144 | 
145 | END
146 | 
147 | cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt
148 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt
149 | 
150 | python3 - "$@" <<END
151 | 
152 | import shutil
153 | import os
154 | os.system('mkdir ../VOC/')
155 | os.system('mkdir ../VOC/images')
156 | os.system('mkdir ../VOC/images/train')
157 | os.system('mkdir ../VOC/images/val')
158 | 
159 | os.system('mkdir ../VOC/labels')
160 | os.system('mkdir ../VOC/labels/train')
161 | os.system('mkdir ../VOC/labels/val')
162 | 
163 | import os
164 | print(os.path.exists('../tmp/train.txt'))
165 | f = open('../tmp/train.txt', 'r')
166 | lines = f.readlines()
167 | 
168 | for line in lines:
169 |     #print(line.split('/')[-1][:-1])
170 |     line = "/".join(line.split('/')[2:])
171 |     #print(line)
172 |     if (os.path.exists("../" + line[:-1])):
173 |         os.system("cp ../"+ line[:-1] + " ../VOC/images/train")
174 |         
175 | print(os.path.exists('../tmp/train.txt'))
176 | f = open('../tmp/train.txt', 'r')
177 | lines = f.readlines()
178 | 
179 | for line in lines:
180 |     #print(line.split('/')[-1][:-1])
181 |     line = "/".join(line.split('/')[2:])
182 |     line = line.replace('JPEGImages', 'labels')
183 |     line = line.replace('jpg', 'txt')
184 |     #print(line)
185 |     if (os.path.exists("../" + line[:-1])):
186 |         os.system("cp ../"+ line[:-1] + " ../VOC/labels/train")
187 | 
188 | print(os.path.exists('../tmp/2007_test.txt'))
189 | f = open('../tmp/2007_test.txt', 'r')
190 | lines = f.readlines()
191 | 
192 | for line in lines:
193 |     #print(line.split('/')[-1][:-1])
194 |     line = "/".join(line.split('/')[2:])
195 |     
196 |     if (os.path.exists("../" + line[:-1])):
197 |         os.system("cp ../"+ line[:-1] + " ../VOC/images/val")
198 | 
199 | print(os.path.exists('../tmp/2007_test.txt'))
200 | f = open('../tmp/2007_test.txt', 'r')
201 | lines = f.readlines()
202 | 
203 | for line in lines:
204 |     #print(line.split('/')[-1][:-1])
205 |     line = "/".join(line.split('/')[2:])
206 |     line = line.replace('JPEGImages', 'labels')
207 |     line = line.replace('jpg', 'txt')
208 |     #print(line)
209 |     if (os.path.exists("../" + line[:-1])):
210 |         os.system("cp ../"+ line[:-1] + " ../VOC/labels/val")
211 | 
212 | END
213 | 
214 | rm -rf ../tmp  # remove temporary directory
215 | echo "VOC download done."
216 | 


--------------------------------------------------------------------------------
/data/mot.yaml:
--------------------------------------------------------------------------------
 1 | root: /home/xb/data
 2 | 
 3 | train: { mot17: ./mot_data/mot17.train,
 4 |          caltech: ./mot_data/caltech.train,
 5 |          cuhksysu: ./mot_data/cuhksysu.train,
 6 |          prw: ./mot_data/prw.train,
 7 |          eth: ./mot_data/eth.train
 8 |        }
 9 | 
10 | test_emb: { cuhksysu: ./mot_data/cuhksysu.val,
11 |             prw: ./mot_data/prw.val
12 |           }
13 | 
14 | 
15 | test: { cuhksysu: ./mot_data/cuhksysu.val,
16 |         prw: ./mot_data/prw.val
17 |       }
18 | 
19 | 
20 | # number of classes
21 | nc: 1
22 | 
23 | # class names
24 | names: ['person']


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import platform
  4 | import shutil
  5 | import time
  6 | from pathlib import Path
  7 | 
  8 | import cv2
  9 | import torch
 10 | import torch.backends.cudnn as cudnn
 11 | from numpy import random
 12 | 
 13 | from models.experimental import attempt_load
 14 | from utils.datasets import LoadStreams, LoadImages
 15 | from utils.general import (
 16 |     check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
 17 | from utils.torch_utils import select_device, load_classifier, time_synchronized
 18 | 
 19 | 
 20 | def detect(save_img=False):
 21 |     out, source, weights, view_img, save_txt, imgsz = \
 22 |         opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
 23 |     webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 24 | 
 25 |     # Initialize
 26 |     device = select_device(opt.device)
 27 |     if os.path.exists(out):
 28 |         shutil.rmtree(out)  # delete output folder
 29 |     os.makedirs(out)  # make new output folder
 30 |     half = device.type != 'cpu'  # half precision only supported on CUDA
 31 | 
 32 |     # Load model
 33 |     model = attempt_load(weights, map_location=device)  # load FP32 model
 34 |     imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
 35 |     if half:
 36 |         model.half()  # to FP16
 37 | 
 38 |     # Second-stage classifier
 39 |     classify = False
 40 |     if classify:
 41 |         modelc = load_classifier(name='resnet101', n=2)  # initialize
 42 |         modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
 43 |         modelc.to(device).eval()
 44 | 
 45 |     # Set Dataloader
 46 |     vid_path, vid_writer = None, None
 47 |     if webcam:
 48 |         view_img = True
 49 |         cudnn.benchmark = True  # set True to speed up constant image size inference
 50 |         dataset = LoadStreams(source, img_size=imgsz)
 51 |     else:
 52 |         save_img = True
 53 |         dataset = LoadImages(source, img_size=imgsz)
 54 | 
 55 |     # Get names and colors
 56 |     names = model.module.names if hasattr(model, 'module') else model.names
 57 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
 58 | 
 59 |     # Run inference
 60 |     t0 = time.time()
 61 |     img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
 62 |     _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
 63 |     for path, img, im0s, vid_cap in dataset:
 64 |         img = torch.from_numpy(img).to(device)
 65 |         img = img.half() if half else img.float()  # uint8 to fp16/32
 66 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 67 |         if img.ndimension() == 3:
 68 |             img = img.unsqueeze(0)
 69 | 
 70 |         # Inference
 71 |         t1 = time_synchronized()
 72 |         pred = model(img, augment=opt.augment)[0]
 73 | 
 74 |         # Apply NMS
 75 |         pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
 76 |         t2 = time_synchronized()
 77 | 
 78 |         # Apply Classifier
 79 |         if classify:
 80 |             pred = apply_classifier(pred, modelc, img, im0s)
 81 | 
 82 |         # Process detections
 83 |         for i, det in enumerate(pred):  # detections per image
 84 |             if webcam:  # batch_size >= 1
 85 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
 86 |             else:
 87 |                 p, s, im0 = path, '', im0s
 88 | 
 89 |             save_path = str(Path(out) / Path(p).name)
 90 |             txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
 91 |             s += '%gx%g ' % img.shape[2:]  # print string
 92 |             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
 93 |             if det is not None and len(det):
 94 |                 # Rescale boxes from img_size to im0 size
 95 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
 96 | 
 97 |                 # Print results
 98 |                 for c in det[:, -1].unique():
 99 |                     n = (det[:, -1] == c).sum()  # detections per class
100 |                     s += '%g %ss, ' % (n, names[int(c)])  # add to string
101 | 
102 |                 # Write results
103 |                 for *xyxy, conf, cls in det:
104 |                     if save_txt:  # Write to file
105 |                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
106 |                         with open(txt_path + '.txt', 'a') as f:
107 |                             f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
108 | 
109 |                     if save_img or view_img:  # Add bbox to image
110 |                         label = '%s %.2f' % (names[int(cls)], conf)
111 |                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
112 | 
113 |             # Print time (inference + NMS)
114 |             print('%sDone. (%.3fs)' % (s, t2 - t1))
115 | 
116 |             # Stream results
117 |             if view_img:
118 |                 cv2.imshow(p, im0)
119 |                 if cv2.waitKey(1) == ord('q'):  # q to quit
120 |                     raise StopIteration
121 | 
122 |             # Save results (image with detections)
123 |             if save_img:
124 |                 if dataset.mode == 'images':
125 |                     cv2.imwrite(save_path, im0)
126 |                 else:
127 |                     if vid_path != save_path:  # new video
128 |                         vid_path = save_path
129 |                         if isinstance(vid_writer, cv2.VideoWriter):
130 |                             vid_writer.release()  # release previous video writer
131 | 
132 |                         fourcc = 'mp4v'  # output video codec
133 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
134 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
135 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
136 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
137 |                     vid_writer.write(im0)
138 | 
139 |     if save_txt or save_img:
140 |         print('Results saved to %s' % Path(out))
141 |         if platform == 'darwin' and not opt.update:  # MacOS
142 |             os.system('open ' + save_path)
143 | 
144 |     print('Done. (%.3fs)' % (time.time() - t0))
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     parser = argparse.ArgumentParser()
149 |     parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
150 |     parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
151 |     parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
152 |     parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
153 |     parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
154 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
155 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
156 |     parser.add_argument('--view-img', action='store_true', help='display results')
157 |     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
158 |     parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
159 |     parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
160 |     parser.add_argument('--augment', action='store_true', help='augmented inference')
161 |     parser.add_argument('--update', action='store_true', help='update all models')
162 |     opt = parser.parse_args()
163 |     print(opt)
164 | 
165 |     with torch.no_grad():
166 |         if opt.update:  # update all models (to fix SourceChangeWarning)
167 |             for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
168 |                 detect()
169 |                 strip_optimizer(opt.weights)
170 |         else:
171 |             detect()
172 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
  1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/
  2 | 
  3 | Usage:
  4 |     import torch
  5 |     model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80)
  6 | """
  7 | 
  8 | dependencies = ['torch', 'yaml']
  9 | import os
 10 | 
 11 | import torch
 12 | 
 13 | from models.yolo import Model
 14 | from utils.google_utils import attempt_download
 15 | 
 16 | 
 17 | def create(name, pretrained, channels, classes):
 18 |     """Creates a specified YOLOv5 model
 19 | 
 20 |     Arguments:
 21 |         name (str): name of model, i.e. 'yolov5s'
 22 |         pretrained (bool): load pretrained weights into the model
 23 |         channels (int): number of input channels
 24 |         classes (int): number of model classes
 25 | 
 26 |     Returns:
 27 |         pytorch model
 28 |     """
 29 |     config = os.path.join(os.path.dirname(__file__), 'models', '%s.yaml' % name)  # model.yaml path
 30 |     try:
 31 |         model = Model(config, channels, classes)
 32 |         if pretrained:
 33 |             ckpt = '%s.pt' % name  # checkpoint filename
 34 |             attempt_download(ckpt)  # download if not found locally
 35 |             state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict()  # to FP32
 36 |             state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape}  # filter
 37 |             model.load_state_dict(state_dict, strict=False)  # load
 38 |         return model
 39 | 
 40 |     except Exception as e:
 41 |         help_url = 'https://github.com/ultralytics/yolov5/issues/36'
 42 |         s = 'Cache maybe be out of date, deleting cache and retrying may solve this. See %s for help.' % help_url
 43 |         raise Exception(s) from e
 44 | 
 45 | 
 46 | def yolov5s(pretrained=False, channels=3, classes=80):
 47 |     """YOLOv5-small model from https://github.com/ultralytics/yolov5
 48 | 
 49 |     Arguments:
 50 |         pretrained (bool): load pretrained weights into the model, default=False
 51 |         channels (int): number of input channels, default=3
 52 |         classes (int): number of model classes, default=80
 53 | 
 54 |     Returns:
 55 |         pytorch model
 56 |     """
 57 |     return create('yolov5s', pretrained, channels, classes)
 58 | 
 59 | 
 60 | def yolov5m(pretrained=False, channels=3, classes=80):
 61 |     """YOLOv5-medium model from https://github.com/ultralytics/yolov5
 62 | 
 63 |     Arguments:
 64 |         pretrained (bool): load pretrained weights into the model, default=False
 65 |         channels (int): number of input channels, default=3
 66 |         classes (int): number of model classes, default=80
 67 | 
 68 |     Returns:
 69 |         pytorch model
 70 |     """
 71 |     return create('yolov5m', pretrained, channels, classes)
 72 | 
 73 | 
 74 | def yolov5l(pretrained=False, channels=3, classes=80):
 75 |     """YOLOv5-large model from https://github.com/ultralytics/yolov5
 76 | 
 77 |     Arguments:
 78 |         pretrained (bool): load pretrained weights into the model, default=False
 79 |         channels (int): number of input channels, default=3
 80 |         classes (int): number of model classes, default=80
 81 | 
 82 |     Returns:
 83 |         pytorch model
 84 |     """
 85 |     return create('yolov5l', pretrained, channels, classes)
 86 | 
 87 | 
 88 | def yolov5x(pretrained=False, channels=3, classes=80):
 89 |     """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5
 90 | 
 91 |     Arguments:
 92 |         pretrained (bool): load pretrained weights into the model, default=False
 93 |         channels (int): number of input channels, default=3
 94 |         classes (int): number of model classes, default=80
 95 | 
 96 |     Returns:
 97 |         pytorch model
 98 |     """
 99 |     return create('yolov5x', pretrained, channels, classes)
100 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaobin1231/YOLOv5_JDE/ea3149f280a2234a4bf840c7da8f5d0c77abd59d/models/__init__.py


--------------------------------------------------------------------------------
/models/common.py:
--------------------------------------------------------------------------------
  1 | # This file contains modules common to various models
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | 
  8 | def autopad(k, p=None):  # kernel, padding
  9 |     # Pad to 'same'
 10 |     if p is None:
 11 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 12 |     return p
 13 | 
 14 | 
 15 | def DWConv(c1, c2, k=1, s=1, act=True):
 16 |     # Depthwise convolution
 17 |     return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 18 | 
 19 | 
 20 | class Conv(nn.Module):
 21 |     # Standard convolution
 22 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 23 |         super(Conv, self).__init__()
 24 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 25 |         self.bn = nn.BatchNorm2d(c2)
 26 |         self.act = nn.Hardswish() if act else nn.Identity()
 27 | 
 28 |     def forward(self, x):
 29 |         return self.act(self.bn(self.conv(x)))
 30 | 
 31 |     def fuseforward(self, x):
 32 |         return self.act(self.conv(x))
 33 | 
 34 | 
 35 | class Bottleneck(nn.Module):
 36 |     # Standard bottleneck
 37 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
 38 |         super(Bottleneck, self).__init__()
 39 |         c_ = int(c2 * e)  # hidden channels
 40 |         self.cv1 = Conv(c1, c_, 1, 1)
 41 |         self.cv2 = Conv(c_, c2, 3, 1, g=g)
 42 |         self.add = shortcut and c1 == c2
 43 | 
 44 |     def forward(self, x):
 45 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 46 | 
 47 | 
 48 | class BottleneckCSP(nn.Module):
 49 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
 50 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
 51 |         super(BottleneckCSP, self).__init__()
 52 |         c_ = int(c2 * e)  # hidden channels
 53 |         self.cv1 = Conv(c1, c_, 1, 1)
 54 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
 55 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
 56 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
 57 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
 58 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 59 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
 60 | 
 61 |     def forward(self, x):
 62 |         y1 = self.cv3(self.m(self.cv1(x)))
 63 |         y2 = self.cv2(x)
 64 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
 65 | 
 66 | 
 67 | class SPP(nn.Module):
 68 |     # Spatial pyramid pooling layer used in YOLOv3-SPP
 69 |     def __init__(self, c1, c2, k=(5, 9, 13)):
 70 |         super(SPP, self).__init__()
 71 |         c_ = c1 // 2  # hidden channels
 72 |         self.cv1 = Conv(c1, c_, 1, 1)
 73 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
 74 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.cv1(x)
 78 |         return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
 79 | 
 80 | 
 81 | class Focus(nn.Module):
 82 |     # Focus wh information into c-space
 83 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 84 |         super(Focus, self).__init__()
 85 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
 86 | 
 87 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
 88 |         return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
 89 | 
 90 | 
 91 | class Concat(nn.Module):
 92 |     # Concatenate a list of tensors along dimension
 93 |     def __init__(self, dimension=1):
 94 |         super(Concat, self).__init__()
 95 |         self.d = dimension
 96 | 
 97 |     def forward(self, x):
 98 |         return torch.cat(x, self.d)
 99 | 
100 | 
101 | class Flatten(nn.Module):
102 |     # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
103 |     @staticmethod
104 |     def forward(x):
105 |         return x.view(x.size(0), -1)
106 | 
107 | 
108 | class Classify(nn.Module):
109 |     # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
110 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
111 |         super(Classify, self).__init__()
112 |         self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
113 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)  # to x(b,c2,1,1)
114 |         self.flat = Flatten()
115 | 
116 |     def forward(self, x):
117 |         z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
118 |         return self.flat(self.conv(z))  # flatten to x(b,c2)
119 | 


--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # This file contains experimental modules
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from models.common import Conv, DWConv
  8 | from utils.google_utils import attempt_download
  9 | 
 10 | 
 11 | class CrossConv(nn.Module):
 12 |     # Cross Convolution Downsample
 13 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 14 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 15 |         super(CrossConv, self).__init__()
 16 |         c_ = int(c2 * e)  # hidden channels
 17 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 18 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 19 |         self.add = shortcut and c1 == c2
 20 | 
 21 |     def forward(self, x):
 22 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 23 | 
 24 | 
 25 | class C3(nn.Module):
 26 |     # Cross Convolution CSP
 27 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
 28 |         super(C3, self).__init__()
 29 |         c_ = int(c2 * e)  # hidden channels
 30 |         self.cv1 = Conv(c1, c_, 1, 1)
 31 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
 32 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
 33 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
 34 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
 35 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 36 |         self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
 37 | 
 38 |     def forward(self, x):
 39 |         y1 = self.cv3(self.m(self.cv1(x)))
 40 |         y2 = self.cv2(x)
 41 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
 42 | 
 43 | 
 44 | class Sum(nn.Module):
 45 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 46 |     def __init__(self, n, weight=False):  # n: number of inputs
 47 |         super(Sum, self).__init__()
 48 |         self.weight = weight  # apply weights boolean
 49 |         self.iter = range(n - 1)  # iter object
 50 |         if weight:
 51 |             self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
 52 | 
 53 |     def forward(self, x):
 54 |         y = x[0]  # no weight
 55 |         if self.weight:
 56 |             w = torch.sigmoid(self.w) * 2
 57 |             for i in self.iter:
 58 |                 y = y + x[i + 1] * w[i]
 59 |         else:
 60 |             for i in self.iter:
 61 |                 y = y + x[i + 1]
 62 |         return y
 63 | 
 64 | 
 65 | class GhostConv(nn.Module):
 66 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
 67 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
 68 |         super(GhostConv, self).__init__()
 69 |         c_ = c2 // 2  # hidden channels
 70 |         self.cv1 = Conv(c1, c_, k, s, g, act)
 71 |         self.cv2 = Conv(c_, c_, 5, 1, c_, act)
 72 | 
 73 |     def forward(self, x):
 74 |         y = self.cv1(x)
 75 |         return torch.cat([y, self.cv2(y)], 1)
 76 | 
 77 | 
 78 | class GhostBottleneck(nn.Module):
 79 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
 80 |     def __init__(self, c1, c2, k, s):
 81 |         super(GhostBottleneck, self).__init__()
 82 |         c_ = c2 // 2
 83 |         self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
 84 |                                   DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
 85 |                                   GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
 86 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
 87 |                                       Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
 88 | 
 89 |     def forward(self, x):
 90 |         return self.conv(x) + self.shortcut(x)
 91 | 
 92 | 
 93 | class MixConv2d(nn.Module):
 94 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
 95 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 96 |         super(MixConv2d, self).__init__()
 97 |         groups = len(k)
 98 |         if equal_ch:  # equal c_ per group
 99 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
100 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
101 |         else:  # equal weight.numel() per group
102 |             b = [c2] + [0] * groups
103 |             a = np.eye(groups + 1, groups, k=-1)
104 |             a -= np.roll(a, 1, axis=1)
105 |             a *= np.array(k) ** 2
106 |             a[0] = 1
107 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
108 | 
109 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
110 |         self.bn = nn.BatchNorm2d(c2)
111 |         self.act = nn.LeakyReLU(0.1, inplace=True)
112 | 
113 |     def forward(self, x):
114 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
115 | 
116 | 
117 | class Ensemble(nn.ModuleList):
118 |     # Ensemble of models
119 |     def __init__(self):
120 |         super(Ensemble, self).__init__()
121 | 
122 |     def forward(self, x, augment=False):
123 |         y = []
124 |         for module in self:
125 |             y.append(module(x, augment)[0])
126 |         # y = torch.stack(y).max(0)[0]  # max ensemble
127 |         # y = torch.cat(y, 1)  # nms ensemble
128 |         y = torch.stack(y).mean(0)  # mean ensemble
129 |         return y, None  # inference, train output
130 | 
131 | 
132 | def attempt_load(weights, map_location=None):
133 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
134 |     model = Ensemble()
135 |     for w in weights if isinstance(weights, list) else [weights]:
136 |         attempt_download(w)
137 |         model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval())  # load FP32 model
138 | 
139 |     if len(model) == 1:
140 |         return model[-1]  # return model
141 |     else:
142 |         print('Ensemble created with %s\n' % weights)
143 |         for k in ['names', 'stride']:
144 |             setattr(model, k, getattr(model[-1], k))
145 |         return model  # return ensemble
146 | 


--------------------------------------------------------------------------------
/models/export.py:
--------------------------------------------------------------------------------
 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
 2 | 
 3 | Usage:
 4 |     $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
 5 | """
 6 | 
 7 | import argparse
 8 | 
 9 | import torch
10 | 
11 | from utils.google_utils import attempt_download
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
16 |     parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
17 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
18 |     opt = parser.parse_args()
19 |     opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
20 |     print(opt)
21 | 
22 |     # Input
23 |     img = torch.zeros((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection
24 | 
25 |     # Load PyTorch model
26 |     attempt_download(opt.weights)
27 |     model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
28 |     model.eval()
29 |     model.model[-1].export = True  # set Detect() layer export=True
30 |     y = model(img)  # dry run
31 | 
32 |     # TorchScript export
33 |     try:
34 |         print('\nStarting TorchScript export with torch %s...' % torch.__version__)
35 |         f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
36 |         ts = torch.jit.trace(model, img)
37 |         ts.save(f)
38 |         print('TorchScript export success, saved as %s' % f)
39 |     except Exception as e:
40 |         print('TorchScript export failure: %s' % e)
41 | 
42 |     # ONNX export
43 |     try:
44 |         import onnx
45 | 
46 |         print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
47 |         f = opt.weights.replace('.pt', '.onnx')  # filename
48 |         model.fuse()  # only for ONNX
49 |         torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
50 |                           output_names=['classes', 'boxes'] if y is None else ['output'])
51 | 
52 |         # Checks
53 |         onnx_model = onnx.load(f)  # load onnx model
54 |         onnx.checker.check_model(onnx_model)  # check onnx model
55 |         print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
56 |         print('ONNX export success, saved as %s' % f)
57 |     except Exception as e:
58 |         print('ONNX export failure: %s' % e)
59 | 
60 |     # CoreML export
61 |     try:
62 |         import coremltools as ct
63 | 
64 |         print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
65 |         # convert model from torchscript and apply pixel scaling as per detect.py
66 |         model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
67 |         f = opt.weights.replace('.pt', '.mlmodel')  # filename
68 |         model.save(f)
69 |         print('CoreML export success, saved as %s' % f)
70 |     except Exception as e:
71 |         print('CoreML export failure: %s' % e)
72 | 
73 |     # Finish
74 |     print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
75 | 


--------------------------------------------------------------------------------
/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3-SPP head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, SPP, [512, [5, 9, 13]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, Bottleneck, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 6, BottleneckCSP, [1024]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 FPN head
28 | head:
29 |   [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
30 | 
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
35 | 
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
40 | 
41 |    [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 


--------------------------------------------------------------------------------
/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [116,90, 156,198, 373,326]  # P5/32
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [10,13, 16,30, 33,23]  # P3/8
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 PANet head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | from copy import deepcopy
  4 | from pathlib import Path
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat
 11 | from models.experimental import MixConv2d, CrossConv, C3
 12 | from utils.general import check_anchor_order, make_divisible, check_file
 13 | from utils.torch_utils import (
 14 |     time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, select_device)
 15 | 
 16 | 
 17 | class Detect(nn.Module):
 18 |     def __init__(self, nc=80, anchors=(), ch=(), emb_dim=256):  # detection layer
 19 |         super(Detect, self).__init__()
 20 |         self.stride = None  # strides computed during build
 21 |         self.nc = nc  # number of classes
 22 |         self.no = nc + 5  # number of outputs per anchor
 23 |         self.emb_dim = emb_dim   # number of reid predictions dims
 24 |         self.nl = len(anchors)  # number of detection layers
 25 |         self.na = len(anchors[0]) // 2  # number of anchors
 26 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
 27 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
 28 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
 29 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
 30 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na + self.emb_dim, 1) for x in ch)  # output conv
 31 |         self.export = False  # onnx export
 32 | 
 33 |     def forward(self, x):
 34 |         # x = x.copy()  # for profiling
 35 |         z, p, p_emb = [], [], []  # inference output
 36 |         self.training |= self.export
 37 |         for i in range(self.nl):
 38 |             x[i] = self.m[i](x[i])  # conv
 39 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 40 |             p.append(x[i][:, :self.na * self.no].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous())
 41 |             p_emb.append(x[i][:, self.na * self.no:].permute(0, 2, 3, 1).contiguous())
 42 | 
 43 |             if not self.training:  # inference
 44 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
 45 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 46 | 
 47 |                 y = p[i].sigmoid()
 48 |                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
 49 |                 y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
 50 |                 embedding_pred = F.normalize(p_emb[i].unsqueeze(1).repeat(1, self.na, 1, 1, 1).contiguous(), dim=-1)
 51 |                 fusion_res = torch.cat([y, embedding_pred], dim=-1)
 52 |                 z.append(fusion_res.view(bs, -1, self.no + self.emb_dim))
 53 | 
 54 |         return (p, p_emb) if self.training else (torch.cat(z, 1), p, p_emb)
 55 | 
 56 |     @staticmethod
 57 |     def _make_grid(nx=20, ny=20):
 58 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
 59 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 60 | 
 61 | 
 62 | class Model(nn.Module):
 63 |     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None):  # model, input channels, number of classes
 64 |         super(Model, self).__init__()
 65 |         if isinstance(cfg, dict):
 66 |             self.yaml = cfg  # model dict
 67 |         else:  # is *.yaml
 68 |             import yaml  # for torch hub
 69 |             self.yaml_file = Path(cfg).name
 70 |             with open(cfg) as f:
 71 |                 self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
 72 | 
 73 |         # Define model
 74 |         if nc and nc != self.yaml['nc']:
 75 |             print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
 76 |             self.yaml['nc'] = nc  # override yaml value
 77 | 
 78 |         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist, ch_out
 79 |         # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
 80 | 
 81 |         # Build strides, anchors
 82 |         m = self.model[-1]  # Detect()
 83 |         if isinstance(m, Detect):
 84 |             s = 128  # 2x min stride
 85 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]])  # forward
 86 |             m.anchors /= m.stride.view(-1, 1, 1)
 87 |             check_anchor_order(m)
 88 |             self.stride = m.stride
 89 |             self._initialize_biases()  # only run once
 90 | 
 91 |         self.emb_dim, self.nID = self.yaml['emb_dim'], self.yaml['nID']
 92 |         self.classifier = nn.Linear(self.emb_dim, self.nID) if self.nID > 0 else None
 93 | 
 94 |         # Init weights, biases
 95 |         initialize_weights(self)
 96 |         self.info()
 97 |         print('')
 98 | 
 99 |     def forward(self, x, augment=False, profile=False):
100 |         if augment:
101 |             img_size = x.shape[-2:]  # height, width
102 |             s = [1, 0.83, 0.67]  # scales
103 |             f = [None, 3, None]  # flips (2-ud, 3-lr)
104 |             y = []  # outputs
105 |             for si, fi in zip(s, f):
106 |                 xi = scale_img(x.flip(fi) if fi else x, si)
107 |                 yi = self.forward_once(xi)[0]  # forward
108 |                 # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
109 |                 yi[..., :4] /= si  # de-scale
110 |                 if fi == 2:
111 |                     yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
112 |                 elif fi == 3:
113 |                     yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
114 |                 y.append(yi)
115 |             return torch.cat(y, 1), None  # augmented inference, train
116 |         else:
117 |             return self.forward_once(x, profile)  # single-scale inference, train
118 | 
119 |     def forward_once(self, x, profile=False):
120 |         y, dt = [], []  # outputs
121 |         for m in self.model:
122 |             if m.f != -1:  # if not from previous layer
123 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
124 | 
125 |             if profile:
126 |                 try:
127 |                     import thop
128 |                     o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # FLOPS
129 |                 except:
130 |                     o = 0
131 |                 t = time_synchronized()
132 |                 for _ in range(10):
133 |                     _ = m(x)
134 |                 dt.append((time_synchronized() - t) * 100)
135 |                 print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
136 | 
137 |             x = m(x)  # run
138 |             y.append(x if m.i in self.save else None)  # save output
139 | 
140 |         if profile:
141 |             print('%.1fms total' % sum(dt))
142 |         return x
143 | 
144 |     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
145 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
146 |         m = self.model[-1]  # Detect() module
147 |         for mi, s in zip(m.m, m.stride):  # from
148 |             t_dim = mi.bias.size()[0]
149 |             b = mi.bias[:t_dim-m.emb_dim].view(m.na, -1)  # conv.bias(255) to (3,85)
150 |             b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
151 |             b[:, 5:m.no] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
152 |             mi.bias = torch.nn.Parameter(torch.cat([b.view(-1), mi.bias[t_dim-m.emb_dim:]]), requires_grad=True)
153 | 
154 |     def _print_biases(self):
155 |         m = self.model[-1]  # Detect() module
156 |         for mi in m.m:  # from
157 |             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
158 |             print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
159 | 
160 | 
161 |     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
162 |         print('Fusing layers... ', end='')
163 |         for m in self.model.modules():
164 |             if type(m) is Conv:
165 |                 m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
166 |                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
167 |                 m.bn = None  # remove batchnorm
168 |                 m.forward = m.fuseforward  # update forward
169 |         self.info()
170 |         return self
171 | 
172 |     def info(self):  # print model information
173 |         model_info(self)
174 | 
175 | 
176 | def parse_model(d, ch):  # model_dict, input_channels(3)
177 |     print('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
178 |     anchors, nc, gd, gw, emb_dim = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d['emb_dim']
179 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
180 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
181 | 
182 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
183 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
184 |         m = eval(m) if isinstance(m, str) else m  # eval strings
185 |         for j, a in enumerate(args):
186 |             try:
187 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
188 |             except:
189 |                 pass
190 | 
191 |         n = max(round(n * gd), 1) if n > 1 else n  # depth gain
192 |         if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
193 |             c1, c2 = ch[f], args[0]
194 |             c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
195 | 
196 |             args = [c1, c2, *args[1:]]
197 |             if m in [BottleneckCSP, C3]:
198 |                 args.insert(2, n)
199 |                 n = 1
200 |         elif m is nn.BatchNorm2d:
201 |             args = [ch[f]]
202 |         elif m is Concat:
203 |             c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
204 |         elif m is Detect:
205 |             args.append([ch[x + 1] for x in f])
206 |             args.append(emb_dim)
207 |             if isinstance(args[1], int):  # number of anchors
208 |                 args[1] = [list(range(args[1] * 2))] * len(f)
209 |         else:
210 |             c2 = ch[f]
211 | 
212 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
213 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
214 |         np = sum([x.numel() for x in m_.parameters()])  # number params
215 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
216 |         print('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
217 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
218 |         layers.append(m_)
219 |         ch.append(c2)
220 |     return nn.Sequential(*layers), sorted(save)
221 | 
222 | 
223 | if __name__ == '__main__':
224 |     parser = argparse.ArgumentParser()
225 |     parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
226 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
227 |     opt = parser.parse_args()
228 |     opt.cfg = check_file(opt.cfg)  # check file
229 |     device = select_device(opt.device)
230 | 
231 |     # Create model
232 |     model = Model(opt.cfg).to(device)
233 |     model.train()


--------------------------------------------------------------------------------
/models/yolov5_JDE.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | emb_dim: 256  # reid dim
 7 | nID: 14455    # total IDs of objects
 8 | 
 9 | 
10 | # anchors
11 | anchors:
12 |   - [8,24, 11,34, 16,48, 23,68]  # P3/8
13 |   - [32,96, 45,135, 64,192, 90,271]  # P4/16
14 |   - [128,384, 180,540, 256,640, 512,640]  # P5/32
15 | 
16 | 
17 | # YOLOv5 backbone
18 | backbone:
19 |   # [from, number, module, args]
20 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
21 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
22 |    [-1, 3, BottleneckCSP, [128]],
23 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
24 |    [-1, 9, BottleneckCSP, [256]],
25 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
26 |    [-1, 9, BottleneckCSP, [512]],
27 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
28 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
29 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
30 |   ]
31 | 
32 | # YOLOv5 head
33 | head:
34 |   [[-1, 1, Conv, [512, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
37 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
38 | 
39 |    [-1, 1, Conv, [256, 1, 1]],
40 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
41 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
42 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
43 | 
44 |    [-1, 1, Conv, [256, 3, 2]],
45 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
46 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
47 | 
48 |    [-1, 1, Conv, [512, 3, 2]],
49 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
50 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
51 | 
52 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
53 |   ]


--------------------------------------------------------------------------------
/models/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.67  # model depth multiple
 4 | width_multiple: 0.75  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5x.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.33  # model depth multiple
 4 | width_multiple: 1.25  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip install -r requirements.txt
 2 | Cython
 3 | matplotlib>=3.2.2
 4 | numpy>=1.18.5
 5 | opencv-python>=4.1.2
 6 | pillow
 7 | # pycocotools>=2.0
 8 | PyYAML>=5.3
 9 | scipy>=1.4.1
10 | tensorboard>=2.2
11 | torch>=1.6.0
12 | torchvision>=0.7.0
13 | tqdm>=4.41.0
14 | 
15 | # Conda commands (in place of pip) ---------------------------------------------
16 | # conda update -yn base -c defaults conda
17 | # conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython
18 | # conda install -yc conda-forge scikit-image pycocotools tensorboard
19 | # conda install -yc spyder-ide spyder-line-profiler
20 | # conda install -yc pytorch pytorch torchvision
21 | # conda install -yc conda-forge protobuf numpy && pip install onnx==1.6.0  # https://github.com/onnx/onnx#linux-and-macos
22 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import json
  4 | import os
  5 | import shutil
  6 | from pathlib import Path
  7 | import cv2
  8 | from utils.datasets import letterbox
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | import yaml
 13 | from tqdm import tqdm
 14 | 
 15 | from models.experimental import attempt_load
 16 | from utils.datasets import create_dataloader
 17 | from utils.general import (
 18 |     coco80_to_coco91_class, check_file, check_img_size, compute_loss, non_max_suppression,
 19 |     scale_coords, xyxy2xywh, clip_coords, plot_images, plot_test_images, xywh2xyxy, box_iou, output_to_target, ap_per_class)
 20 | from utils.torch_utils import select_device, time_synchronized
 21 | 
 22 | 
 23 | def test(data,
 24 |          weights=None,
 25 |          batch_size=16,
 26 |          imgsz=640,
 27 |          conf_thres=0.3,
 28 |          iou_thres=0.5,  # for NMS
 29 |          save_json=False,
 30 |          single_cls=False,
 31 |          augment=False,
 32 |          verbose=False,
 33 |          model=None,
 34 |          dataloader=None,
 35 |          save_dir='',
 36 |          merge=False,
 37 |          emb_dim=256,
 38 |          save_txt=False):
 39 |     # Initialize/load model and set device
 40 |     training = model is not None
 41 |     if training:  # called by train.py
 42 |         device = next(model.parameters()).device  # get model device
 43 | 
 44 |     else:  # called directly
 45 |         device = select_device(opt.device, batch_size=batch_size)
 46 |         merge, save_txt = opt.merge, opt.save_txt  # use Merge NMS, save *.txt labels
 47 |         if save_txt:
 48 |             out = Path('inference/output')
 49 |             if os.path.exists(out):
 50 |                 shutil.rmtree(out)  # delete output folder
 51 |             os.makedirs(out)  # make new output folder
 52 | 
 53 |         # Remove previous
 54 |         for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):
 55 |             os.remove(f)
 56 | 
 57 |         # Load model
 58 |         model = attempt_load(weights, map_location=device)  # load FP32 model
 59 |         imgsz = [check_img_size(x, model.stride.max()) for x in imgsz]
 60 | 
 61 |     # Half
 62 |     half = device.type != 'cpu'  # half precision only supported on CUDA
 63 |     if half:
 64 |         model.half()
 65 | 
 66 |     # Configure
 67 |     model.eval()
 68 |     with open(data) as f:
 69 |         data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
 70 |     nc = 1 if single_cls else int(data['nc'])  # number of classes
 71 |     iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
 72 |     niou = iouv.numel()
 73 | 
 74 |     # Dataloader
 75 |     if not training:
 76 |         if len(imgsz) == 1:
 77 |             img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
 78 |         else:
 79 |             img = torch.zeros((1, 3, imgsz[1], imgsz[0]), device=device)
 80 |         _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
 81 |         root = data['root']
 82 |         path = data['test'] if opt.task == 'test' else data['test_emb']  # path to val/test images
 83 |         dataloader = create_dataloader(root, path, imgsz, batch_size, model.stride.max(), opt,
 84 |                                        hyp=None, augment=False, cache=False, pad=0.5, rect=False)[0]
 85 | 
 86 |     seen = 0
 87 |     names = model.names if hasattr(model, 'names') else model.module.names
 88 |     s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
 89 |     p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
 90 |     jdict, stats, ap, ap_class = [], [], [], []
 91 |     loss = torch.zeros(4, device=device)
 92 |     for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
 93 |         img = img.to(device, non_blocking=True)
 94 |         img = img.half() if half else img.float()  # uint8 to fp16/32
 95 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 96 |         targets = targets.to(device)
 97 |         nb, _, height, width = img.shape  # batch size, channels, height, width
 98 |         whwh = torch.Tensor([width, height, width, height]).to(device)
 99 | 
100 |         # Disable gradients
101 |         with torch.no_grad():
102 |             # Run model
103 |             t = time_synchronized()
104 |             inf_out, train_out_p, train_out_pemb = model(img, augment=augment)  # inference and training outputs
105 |             t0 += time_synchronized() - t
106 | 
107 |             # Compute loss
108 |             if training:  # if model has loss hyperparameters
109 |                 loss += compute_loss([x.float() for x in train_out_p], [x.float() for x in train_out_pemb],
110 |                                      targets, model)[1][:4]  # GIoU, obj, cls, lid
111 | 
112 |             # Run NMS
113 |             t = time_synchronized()
114 |             output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres,
115 |                                          merge=merge, emb_dim=emb_dim)
116 |             t1 += time_synchronized() - t
117 | 
118 |             '''
119 |             images = letterbox(cv2.imread(paths[1]), [608,1088], auto=False, scaleup=False)[0]
120 |             d = output[1]
121 |             if d is None:
122 |                 continue
123 |             for i in range(len(d)):
124 |                 cv2.rectangle(images, (int(d[i][0]), int(d[i][1])), (int(d[i][2]), int(d[i][3])), (0, 0, 255), 2)
125 |             cv2.imshow("image", images)
126 |             cv2.waitKey(0)
127 |             '''
128 | 
129 |         # Statistics per image
130 |         for si, pred in enumerate(output):
131 |             labels = targets[targets[:, 0] == si, 1:]
132 |             nl = len(labels)
133 |             tcls = labels[:, 0].tolist() if nl else []  # target class
134 |             seen += 1
135 | 
136 |             if pred is None:
137 |                 if nl:
138 |                     stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
139 |                 continue
140 | 
141 | 
142 |             # Clip boxes to image bounds
143 |             clip_coords(pred, (height, width))
144 | 
145 |             # Assign all predictions as incorrect
146 |             correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
147 |             if nl:
148 |                 detected = []  # target indices
149 |                 tcls_tensor = labels[:, 0]
150 | 
151 |                 # target boxes
152 |                 tbox = xywh2xyxy(labels[:, 2:6]) * whwh
153 | 
154 |                 # Per target class
155 |                 for cls in torch.unique(tcls_tensor):
156 |                     ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
157 |                     pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices
158 | 
159 |                     # Search for detections
160 |                     if pi.shape[0]:
161 |                         # Prediction to target ious
162 |                         ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices
163 | 
164 |                         # Append detections
165 |                         for j in (ious > iouv[0]).nonzero(as_tuple=False):
166 |                             d = ti[i[j]]  # detected target
167 |                             if d not in detected:
168 |                                 detected.append(d)
169 |                                 correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
170 |                                 if len(detected) == nl:  # all targets already located in image
171 |                                     break
172 | 
173 |             # Append statistics (correct, conf, pcls, tcls)
174 |             stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
175 | 
176 |         # Plot images
177 |         if  batch_i < 1:
178 |             f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename
179 |             plot_images(img, targets, paths, str(f), names)  # ground truth
180 |             f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
181 |             plot_test_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions
182 | 
183 |     # Compute statistics
184 |     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
185 |     if len(stats) and stats[0].any():
186 |         p, r, ap, f1, ap_class = ap_per_class(*stats)
187 |         p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]
188 |         mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
189 |         nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
190 |     else:
191 |         nt = torch.zeros(1)
192 | 
193 |     # Print results
194 |     pf = '%20s' + '%12.3g' * 6  # print format
195 |     print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
196 | 
197 |     # Print results per class
198 |     if verbose and nc > 1 and len(stats):
199 |         for i, c in enumerate(ap_class):
200 |             print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
201 | 
202 |     # Print speeds
203 |     t = tuple(x / seen * 1E3 for x in (t0, t1, t0+t1)) + (imgsz[0], imgsz[1], batch_size)
204 |     if not training:
205 |         print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
206 | 
207 |     # Return results
208 |     model.float()  # for training
209 |     maps = np.zeros(nc) + map
210 |     for i, c in enumerate(ap_class):
211 |         maps[c] = ap[i]
212 |     return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
213 | 
214 | 
215 | if __name__ == '__main__':
216 |     parser = argparse.ArgumentParser(prog='test.py')
217 |     parser.add_argument('--weights', nargs='+', type=str, default='runs/last.pt', help='model.pt path(s)')
218 |     parser.add_argument('--data', type=str, default='data/mot.yaml', help='*.data path')
219 |     parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
220 |     parser.add_argument('--img-size', type=int, default=[1088,608], help='inference size (pixels)')
221 |     parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
222 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
223 |     parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
224 |     parser.add_argument('--task', default='test', help="'val', 'test', 'study'")
225 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
226 |     parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
227 |     parser.add_argument('--augment', action='store_true', help='augmented inference')
228 |     parser.add_argument('--merge', action='store_true', help='use Merge NMS')
229 |     parser.add_argument('--verbose', action='store_true', help='report mAP by class')
230 |     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
231 |     parser.add_argument('--emb_dim', action='store_true', help='dim of reid prediction', default=256)
232 |     opt = parser.parse_args()
233 |     opt.save_json |= opt.data.endswith('coco.yaml')
234 |     opt.data = check_file(opt.data)  # check file
235 |     print(opt)
236 | 
237 |     if opt.task in ['val', 'test']:  # run normally
238 |         test(opt.data,
239 |              opt.weights,
240 |              opt.batch_size,
241 |              opt.img_size,
242 |              opt.conf_thres,
243 |              opt.iou_thres,
244 |              opt.save_json,
245 |              opt.single_cls,
246 |              opt.augment,
247 |              opt.verbose,
248 |              emb_dim=opt.emb_dim)
249 | 
250 |     elif opt.task == 'study':  # run over a range of settings and save/plot
251 |         for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
252 |             f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to
253 |             x = list(range(352, 832, 64))  # x axis
254 |             y = []  # y axis
255 |             for i in x:  # img-size
256 |                 print('\nRunning %s point %s...' % (f, i))
257 |                 r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
258 |                 y.append(r + t)  # results and times
259 |             np.savetxt(f, y, fmt='%10.4g')  # save
260 |         os.system('zip -r study.zip study_*.txt')
261 |         # plot_study_txt(f, x)  # plot
262 | 


--------------------------------------------------------------------------------
/track.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import cv2
  4 | import logging
  5 | import argparse
  6 | import motmetrics as mm
  7 | 
  8 | import torch
  9 | from tracker.multitracker import JDETracker
 10 | from tracker_utils import visualization as vis
 11 | from tracker_utils.log import logger
 12 | from tracker_utils.timer import Timer
 13 | from tracker_utils.evaluation import Evaluator
 14 | # from tracker_utils.parse_config import parse_model_cfg
 15 | import tracker_utils.datasets as datasets
 16 | from tracker_utils.utils import *
 17 | 
 18 | 
 19 | def write_results(filename, results, data_type):
 20 |     if data_type == 'mot':
 21 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 22 |     elif data_type == 'kitti':
 23 |         save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
 24 |     else:
 25 |         raise ValueError(data_type)
 26 | 
 27 |     with open(filename, 'w') as f:
 28 |         for frame_id, tlwhs, track_ids in results:
 29 |             if data_type == 'kitti':
 30 |                 frame_id -= 1
 31 |             for tlwh, track_id in zip(tlwhs, track_ids):
 32 |                 if track_id < 0:
 33 |                     continue
 34 |                 x1, y1, w, h = tlwh
 35 |                 x2, y2 = x1 + w, y1 + h
 36 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
 37 |                 f.write(line)
 38 |     logger.info('save results to {}'.format(filename))
 39 | 
 40 | 
 41 | def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30):
 42 |     if save_dir:
 43 |         mkdir_if_missing(save_dir)
 44 |     tracker = JDETracker(opt, frame_rate=frame_rate)
 45 |     timer = Timer()
 46 |     results = []
 47 |     frame_id = 0
 48 |     for path, img, img0 in dataloader:
 49 |         if frame_id % 20 == 0:
 50 |             logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1./max(1e-5, timer.average_time)))
 51 | 
 52 |         # run tracking
 53 |         timer.tic()
 54 |         blob = torch.from_numpy(img).cuda().unsqueeze(0).half()
 55 |         online_targets = tracker.update(blob, img0, path)
 56 |         online_tlwhs = []
 57 |         online_ids = []
 58 |         for t in online_targets:
 59 |             tlwh = t.tlwh
 60 |             tid = t.track_id
 61 |             vertical = tlwh[2] / tlwh[3] > 1.6
 62 |             if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
 63 |                 online_tlwhs.append(tlwh)
 64 |                 online_ids.append(tid)
 65 |         timer.toc()
 66 |         # save results
 67 |         results.append((frame_id + 1, online_tlwhs, online_ids))
 68 |         if show_image or save_dir is not None:
 69 |             online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id,
 70 |                                           fps=1. / timer.average_time)
 71 |         if show_image:
 72 |             cv2.imshow('online_im', online_im)
 73 |         if save_dir is not None:
 74 |             cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im)
 75 |         frame_id += 1
 76 |     # save results
 77 |     write_results(result_filename, results, data_type)
 78 |     return frame_id, timer.average_time, timer.calls
 79 | 
 80 | 
 81 | 
 82 | def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo', 
 83 |          save_images=False, save_videos=False, show_image=True):
 84 |     logger.setLevel(logging.INFO)
 85 |     result_root = os.path.join(data_root, '..', 'results', exp_name)
 86 |     mkdir_if_missing(result_root)
 87 |     data_type = 'mot'
 88 | 
 89 |     # run tracking
 90 |     accs = []
 91 |     n_frame = 0
 92 |     timer_avgs, timer_calls = [], []
 93 |     for seq in seqs:
 94 |         output_dir = os.path.join(data_root, '..','outputs', exp_name, seq) if save_images or save_videos else None
 95 | 
 96 |         logger.info('start seq: {}'.format(seq))
 97 |         dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size)
 98 |         result_filename = os.path.join(result_root, '{}.txt'.format(seq))
 99 |         meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read() 
100 |         frame_rate = int(meta_info[meta_info.find('frameRate')+10:meta_info.find('\nseqLength')])
101 |         nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename,
102 |                               save_dir=output_dir, show_image=show_image, frame_rate=frame_rate)
103 |         n_frame += nf
104 |         timer_avgs.append(ta)
105 |         timer_calls.append(tc)
106 | 
107 |         # eval
108 |         logger.info('Evaluate seq: {}'.format(seq))
109 |         evaluator = Evaluator(data_root, seq, data_type)
110 |         accs.append(evaluator.eval_file(result_filename))
111 |         if save_videos:
112 |             output_video_path = osp.join(output_dir, '{}.mp4'.format(seq))
113 |             cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -c:v copy {}'.format(output_dir, output_video_path)
114 |             os.system(cmd_str)
115 |     timer_avgs = np.asarray(timer_avgs)
116 |     timer_calls = np.asarray(timer_calls)
117 |     all_time = np.dot(timer_avgs, timer_calls)
118 |     avg_time = all_time / np.sum(timer_calls)
119 |     logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time))
120 | 
121 |     # get summary
122 |     metrics = mm.metrics.motchallenge_metrics
123 |     mh = mm.metrics.create()
124 |     summary = Evaluator.get_summary(accs, seqs, metrics)
125 |     strsummary = mm.io.render_summary(
126 |         summary,
127 |         formatters=mh.formatters,
128 |         namemap=mm.io.motchallenge_metric_names
129 |     )
130 |     print(strsummary)
131 |     Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name)))
132 | 
133 | 
134 | 
135 | if __name__ == '__main__':
136 |     parser = argparse.ArgumentParser(prog='track.py')
137 |     parser.add_argument('--cfg', type=str, default='models/yolov5_JDE.yaml', help='model.yaml path')
138 |     parser.add_argument('--weights', nargs='+', type=str, default='runs/last.pt', help='model.pt path(s)')
139 |     parser.add_argument('--data', type=str, default='data/mot.yaml', help='*.data path')
140 |     parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
141 |     parser.add_argument('--img-size', type=int, default=[1088, 608], help='inference size (pixels)')
142 |     parser.add_argument('--device', default='0, 1', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
143 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
144 |     parser.add_argument('--conf-thres', type=float, default=0.1, help='object confidence threshold')
145 |     parser.add_argument('--nms-thres', type=float, default=0.4, help='IOU threshold for NMS')
146 |     parser.add_argument('--min-box-area', type=float, default=200, help='filter out tiny boxes')
147 |     parser.add_argument('--track-buffer', type=int, default=30, help='tracking buffer')
148 |     parser.add_argument('--test-mot16', default=False, action='store_true', help='tracking buffer')
149 |     parser.add_argument('--save-images', default=True, action='store_true', help='save tracking results (image)')
150 |     parser.add_argument('--save-videos', action='store_true', help='save tracking results (video)')
151 |     opt = parser.parse_args()
152 |     print(opt, end='\n\n')
153 |  
154 |     if not opt.test_mot16:
155 |         seqs_str = '''MOT20-01
156 |                       MOT20-02
157 |                       MOT20-03
158 |                       MOT20-05
159 |                     '''
160 |         images = '/home/xb/huawei/MOT20'
161 |         # seqs_str = '''MOT16-02
162 |         #               MOT16-04
163 |         #               MOT16-05
164 |         #               MOT16-09
165 |         #               MOT16-10
166 |         #               MOT16-11
167 |         #               MOT16-13
168 |         #             '''
169 |         # images = '/home/xb/huawei/MOT16'
170 |         data_root = '%s/train' % images
171 |     else:
172 |         seqs_str = '''MOT16-01
173 |                      MOT16-03
174 |                      MOT16-06
175 |                      MOT16-07
176 |                      MOT16-08
177 |                      MOT16-12
178 |                      MOT16-14'''
179 |         data_root = '/home/xb/huawei/MOT16/test'
180 |     seqs = [seq.strip() for seq in seqs_str.split()]
181 | 
182 |     main(opt,
183 |          data_root=data_root,
184 |          seqs=seqs,
185 |          exp_name=opt.weights.split('/')[-2],
186 |          show_image=False,
187 |          save_images=opt.save_images, 
188 |          save_videos=opt.save_videos)
189 | 
190 | 


--------------------------------------------------------------------------------
/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaobin1231/YOLOv5_JDE/ea3149f280a2234a4bf840c7da8f5d0c77abd59d/tracker/__init__.py


--------------------------------------------------------------------------------
/tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 
54 | 


--------------------------------------------------------------------------------
/tracker/matching.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | from scipy.spatial.distance import cdist
  4 | import lap
  5 | 
  6 | from cython_bbox import bbox_overlaps as bbox_ious
  7 | from tracker_utils import kalman_filter
  8 | 
  9 | def merge_matches(m1, m2, shape):
 10 |     O,P,Q = shape
 11 |     m1 = np.asarray(m1)
 12 |     m2 = np.asarray(m2)
 13 | 
 14 |     M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
 15 |     M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
 16 | 
 17 |     mask = M1*M2
 18 |     match = mask.nonzero()
 19 |     match = list(zip(match[0], match[1]))
 20 |     unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
 21 |     unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
 22 | 
 23 |     return match, unmatched_O, unmatched_Q
 24 | 
 25 | 
 26 | def linear_assignment(cost_matrix, thresh):
 27 |     if cost_matrix.size == 0:
 28 |         return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
 29 |     matches, unmatched_a, unmatched_b = [], [], []
 30 |     cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
 31 |     for ix, mx in enumerate(x):
 32 |         if mx >= 0:
 33 |             matches.append([ix, mx])
 34 |     unmatched_a = np.where(x < 0)[0]
 35 |     unmatched_b = np.where(y < 0)[0]
 36 |     matches = np.asarray(matches)
 37 |     return matches, unmatched_a, unmatched_b
 38 |             
 39 | 
 40 | def ious(atlbrs, btlbrs):
 41 |     """
 42 |     Compute cost based on IoU
 43 |     :type atlbrs: list[tlbr] | np.ndarray
 44 |     :type atlbrs: list[tlbr] | np.ndarray
 45 | 
 46 |     :rtype ious np.ndarray
 47 |     """
 48 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
 49 |     if ious.size == 0:
 50 |         return ious
 51 | 
 52 |     ious = bbox_ious(
 53 |         np.ascontiguousarray(atlbrs, dtype=np.float),
 54 |         np.ascontiguousarray(btlbrs, dtype=np.float)
 55 |     )
 56 | 
 57 |     return ious
 58 | 
 59 | 
 60 | def iou_distance(atracks, btracks):
 61 |     """
 62 |     Compute cost based on IoU
 63 |     :type atracks: list[STrack]
 64 |     :type btracks: list[STrack]
 65 | 
 66 |     :rtype cost_matrix np.ndarray
 67 |     """
 68 | 
 69 |     if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 70 |         atlbrs = atracks
 71 |         btlbrs = btracks
 72 |     else:
 73 |         atlbrs = [track.tlbr for track in atracks]
 74 |         btlbrs = [track.tlbr for track in btracks]
 75 |     _ious = ious(atlbrs, btlbrs)
 76 |     cost_matrix = 1 - _ious
 77 | 
 78 |     return cost_matrix
 79 | 
 80 | def embedding_distance(tracks, detections, metric='cosine'):
 81 |     """
 82 |     :param tracks: list[STrack]
 83 |     :param detections: list[BaseTrack]
 84 |     :param metric:
 85 |     :return: cost_matrix np.ndarray
 86 |     """
 87 | 
 88 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
 89 |     if cost_matrix.size == 0:
 90 |         return cost_matrix
 91 |     det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
 92 |     track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
 93 |     cost_matrix = np.maximum(0.0, cdist(track_features, det_features)) # Nomalized features
 94 | 
 95 |     return cost_matrix
 96 | 
 97 | 
 98 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
 99 |     if cost_matrix.size == 0:
100 |         return cost_matrix
101 |     gating_dim = 2 if only_position else 4
102 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
103 |     measurements = np.asarray([det.to_xyah() for det in detections])
104 |     for row, track in enumerate(tracks):
105 |         gating_distance = kf.gating_distance(
106 |             track.mean, track.covariance, measurements, only_position, metric='maha')
107 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
108 |         cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance
109 |     return cost_matrix
110 | 


--------------------------------------------------------------------------------
/tracker/multitracker.py:
--------------------------------------------------------------------------------
  1 | from numba import jit
  2 | from collections import deque
  3 | import torch
  4 | import numpy as np
  5 | from tracker_utils.kalman_filter import KalmanFilter
  6 | from tracker_utils.log import logger
  7 | from models.yolo import Model
  8 | from tracker import matching
  9 | from .basetrack import BaseTrack, TrackState
 10 | import time
 11 | from utils.general import non_max_suppression
 12 | from tracker_utils.utils import scale_coords
 13 | 
 14 | class STrack(BaseTrack):
 15 | 
 16 |     def __init__(self, tlwh, score, temp_feat, buffer_size=30):
 17 | 
 18 |         # wait activate
 19 |         self._tlwh = np.asarray(tlwh, dtype=np.float)
 20 |         self.kalman_filter = None
 21 |         self.mean, self.covariance = None, None
 22 |         self.is_activated = False
 23 | 
 24 |         self.score = score
 25 |         self.tracklet_len = 0
 26 | 
 27 |         self.smooth_feat = None
 28 |         self.update_features(temp_feat)
 29 |         self.features = deque([], maxlen=buffer_size)
 30 |         self.alpha = 0.9
 31 |     
 32 |     def update_features(self, feat):
 33 |         feat /= np.linalg.norm(feat)
 34 |         self.curr_feat = feat 
 35 |         if self.smooth_feat is None:
 36 |             self.smooth_feat = feat
 37 |         else:
 38 |             self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat
 39 |         self.features.append(feat)
 40 |         self.smooth_feat /= np.linalg.norm(self.smooth_feat)
 41 | 
 42 |     def predict(self):
 43 |         mean_state = self.mean.copy()
 44 |         if self.state != TrackState.Tracked:
 45 |             mean_state[7] = 0
 46 |         self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
 47 |         
 48 |     @staticmethod
 49 |     def multi_predict(stracks, kalman_filter):
 50 |         if len(stracks) > 0:
 51 |             multi_mean = np.asarray([st.mean.copy() for st in stracks])
 52 |             multi_covariance = np.asarray([st.covariance for st in stracks])
 53 |             for i, st in enumerate(stracks):
 54 |                 if st.state != TrackState.Tracked:
 55 |                     multi_mean[i][7] = 0
 56 | #            multi_mean, multi_covariance = STrack.kalman_filter.multi_predict(multi_mean, multi_covariance)
 57 |             multi_mean, multi_covariance = kalman_filter.multi_predict(multi_mean, multi_covariance)
 58 |             for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 59 |                 stracks[i].mean = mean
 60 |                 stracks[i].covariance = cov
 61 | 
 62 |     def activate(self, kalman_filter, frame_id):
 63 |         """Start a new tracklet"""
 64 |         self.kalman_filter = kalman_filter
 65 |         self.track_id = self.next_id()
 66 |         self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
 67 | 
 68 |         self.tracklet_len = 0
 69 |         self.state = TrackState.Tracked
 70 |         #self.is_activated = True
 71 |         self.frame_id = frame_id
 72 |         self.start_frame = frame_id
 73 | 
 74 |     def re_activate(self, new_track, frame_id, new_id=False):
 75 |         self.mean, self.covariance = self.kalman_filter.update(
 76 |             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
 77 |         )
 78 | 
 79 |         self.update_features(new_track.curr_feat)
 80 |         self.tracklet_len = 0
 81 |         self.state = TrackState.Tracked
 82 |         self.is_activated = True
 83 |         self.frame_id = frame_id
 84 |         if new_id:
 85 |             self.track_id = self.next_id()
 86 | 
 87 |     def update(self, new_track, frame_id, update_feature=True):
 88 |         """
 89 |         Update a matched track
 90 |         :type new_track: STrack
 91 |         :type frame_id: int
 92 |         :type update_feature: bool
 93 |         :return:
 94 |         """
 95 |         self.frame_id = frame_id
 96 |         self.tracklet_len += 1
 97 | 
 98 |         new_tlwh = new_track.tlwh
 99 |         self.mean, self.covariance = self.kalman_filter.update(
100 |             self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
101 |         self.state = TrackState.Tracked
102 |         self.is_activated = True
103 | 
104 |         self.score = new_track.score
105 |         if update_feature:
106 |             self.update_features(new_track.curr_feat)
107 | 
108 |     @property
109 |     @jit
110 |     def tlwh(self):
111 |         """Get current position in bounding box format `(top left x, top left y,
112 |                 width, height)`.
113 |         """
114 |         if self.mean is None:
115 |             return self._tlwh.copy()
116 |         ret = self.mean[:4].copy()
117 |         ret[2] *= ret[3]
118 |         ret[:2] -= ret[2:] / 2
119 |         return ret
120 | 
121 |     @property
122 |     @jit
123 |     def tlbr(self):
124 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
125 |         `(top left, bottom right)`.
126 |         """
127 |         ret = self.tlwh.copy()
128 |         ret[2:] += ret[:2]
129 |         return ret
130 | 
131 |     @staticmethod
132 |     @jit
133 |     def tlwh_to_xyah(tlwh):
134 |         """Convert bounding box to format `(center x, center y, aspect ratio,
135 |         height)`, where the aspect ratio is `width / height`.
136 |         """
137 |         ret = np.asarray(tlwh).copy()
138 |         ret[:2] += ret[2:] / 2
139 |         ret[2] /= ret[3]
140 |         return ret
141 | 
142 |     def to_xyah(self):
143 |         return self.tlwh_to_xyah(self.tlwh)
144 | 
145 |     @staticmethod
146 |     @jit
147 |     def tlbr_to_tlwh(tlbr):
148 |         ret = np.asarray(tlbr).copy()
149 |         ret[2:] -= ret[:2]
150 |         return ret
151 | 
152 |     @staticmethod
153 |     @jit
154 |     def tlwh_to_tlbr(tlwh):
155 |         ret = np.asarray(tlwh).copy()
156 |         ret[2:] += ret[:2]
157 |         return ret
158 | 
159 |     def __repr__(self):
160 |         return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
161 | 
162 | 
163 | class JDETracker(object):
164 |     def __init__(self, opt, frame_rate=30):
165 |         self.opt = opt
166 |         self.model = Model(opt.cfg, nc=1)
167 |         ckpt = torch.load(opt.weights, map_location='cpu')
168 |         ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items()
169 |                          if k in self.model.state_dict() and self.model.state_dict()[k].shape == v.shape}
170 |         self.model.load_state_dict(ckpt['model'], strict=False)
171 |         self.model.cuda().half().eval()
172 | 
173 |         self.tracked_stracks = []  # type: list[STrack]
174 |         self.lost_stracks = []  # type: list[STrack]
175 |         self.removed_stracks = []  # type: list[STrack]
176 | 
177 |         self.frame_id = 0
178 |         self.det_thresh = opt.conf_thres
179 |         self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
180 |         self.max_time_lost = self.buffer_size
181 | 
182 |         self.kalman_filter = KalmanFilter()
183 | 
184 |     def update(self, im_blob, img0, path):
185 |         self.frame_id += 1
186 |         activated_starcks = []      # for storing active tracks, for the current frame
187 |         refind_stracks = []         # Lost Tracks whose detections are obtained in the current frame
188 |         lost_stracks = []           # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing)
189 |         removed_stracks = []
190 | 
191 |         t1 = time.time()
192 |         ''' Step 1: Network forward, get detections & embeddings'''
193 |         with torch.no_grad():
194 |             pred = self.model(im_blob)[0]
195 |         # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings
196 |         pred = pred[pred[:, :, 4] > self.opt.conf_thres]
197 |         # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score
198 |         if len(pred) > 0:
199 |             dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
200 | 
201 |             # Final proposals are obtained in dets. Information of bounding box and embeddings also included
202 |             # Next step changes the detection scales
203 |             scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
204 |             '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)'''
205 |             # class_pred is the embeddings.
206 | 
207 |             detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
208 |                           (tlbrs, f) in zip(dets[:, :5], dets[:, 6:])]
209 |         else:
210 |             detections = []
211 | 
212 |         t2 = time.time()
213 |         # print('Forward: {} s'.format(t2-t1))
214 | 
215 |         ''' Add newly detected tracklets to tracked_stracks'''
216 |         unconfirmed = []
217 |         tracked_stracks = []  # type: list[STrack]
218 |         for track in self.tracked_stracks:
219 |             if not track.is_activated:
220 |                 # previous tracks which are not active in the current frame are added in unconfirmed list
221 |                 unconfirmed.append(track)
222 |                 # print("Should not be here, in unconfirmed")
223 |             else:
224 |                 # Active tracks are added to the local list 'tracked_stracks'
225 |                 tracked_stracks.append(track)
226 | 
227 |         ''' Step 2: First association, with embedding'''
228 |         # Combining currently tracked_stracks and lost_stracks
229 |         strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
230 |         # Predict the current location with KF
231 |         STrack.multi_predict(strack_pool, self.kalman_filter)
232 | 
233 | 
234 |         dists = matching.embedding_distance(strack_pool, detections)
235 |         # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
236 |         dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
237 |         # The dists is the list of distances of the detection with the tracks in strack_pool
238 |         matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
239 |         # The matches is the array for corresponding matches of the detection with the corresponding strack_pool
240 | 
241 |         for itracked, idet in matches:
242 |             # itracked is the id of the track and idet is the detection
243 |             track = strack_pool[itracked]
244 |             det = detections[idet]
245 |             if track.state == TrackState.Tracked:
246 |                 # If the track is active, add the detection to the track
247 |                 track.update(detections[idet], self.frame_id)
248 |                 activated_starcks.append(track)
249 |             else:
250 |                 # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list
251 |                 track.re_activate(det, self.frame_id, new_id=False)
252 |                 refind_stracks.append(track)
253 | 
254 |         # None of the steps below happen if there are no undetected tracks.
255 |         ''' Step 3: Second association, with IOU'''
256 |         detections = [detections[i] for i in u_detection]
257 |         # detections is now a list of the unmatched detections
258 |         r_tracked_stracks = [] # This is container for stracks which were tracked till the
259 |         # previous frame but no detection was found for it in the current frame
260 |         for i in u_track:
261 |             if strack_pool[i].state == TrackState.Tracked:
262 |                 r_tracked_stracks.append(strack_pool[i])
263 |         dists = matching.iou_distance(r_tracked_stracks, detections)
264 |         matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)
265 |         # matches is the list of detections which matched with corresponding tracks by IOU distance method
266 |         for itracked, idet in matches:
267 |             track = r_tracked_stracks[itracked]
268 |             det = detections[idet]
269 |             if track.state == TrackState.Tracked:
270 |                 track.update(det, self.frame_id)
271 |                 activated_starcks.append(track)
272 |             else:
273 |                 track.re_activate(det, self.frame_id, new_id=False)
274 |                 refind_stracks.append(track)
275 |         # Same process done for some unmatched detections, but now considering IOU_distance as measure
276 | 
277 |         for it in u_track:
278 |             track = r_tracked_stracks[it]
279 |             if not track.state == TrackState.Lost:
280 |                 track.mark_lost()
281 |                 lost_stracks.append(track)
282 |         # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost
283 | 
284 |         '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
285 |         detections = [detections[i] for i in u_detection]
286 |         dists = matching.iou_distance(unconfirmed, detections)
287 |         matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
288 |         for itracked, idet in matches:
289 |             unconfirmed[itracked].update(detections[idet], self.frame_id)
290 |             activated_starcks.append(unconfirmed[itracked])
291 | 
292 |         # The tracks which are yet not matched
293 |         for it in u_unconfirmed:
294 |             track = unconfirmed[it]
295 |             track.mark_removed()
296 |             removed_stracks.append(track)
297 | 
298 |         # after all these confirmation steps, if a new detection is found, it is initialized for a new track
299 |         """ Step 4: Init new stracks"""
300 |         for inew in u_detection:
301 |             track = detections[inew]
302 |             if track.score < self.det_thresh:
303 |                 continue
304 |             track.activate(self.kalman_filter, self.frame_id)
305 |             activated_starcks.append(track)
306 | 
307 |         """ Step 5: Update state"""
308 |         # If the tracks are lost for more frames than the threshold number, the tracks are removed.
309 |         for track in self.lost_stracks:
310 |             if self.frame_id - track.end_frame > self.max_time_lost:
311 |                 track.mark_removed()
312 |                 removed_stracks.append(track)
313 |         # print('Remained match {} s'.format(t4-t3))
314 | 
315 |         # Update the self.tracked_stracks and self.lost_stracks using the updates in this step.
316 |         self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
317 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
318 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
319 |         # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost]  # type: list[STrack]
320 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
321 |         self.lost_stracks.extend(lost_stracks)
322 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
323 |         self.removed_stracks.extend(removed_stracks)
324 |         self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
325 | 
326 |         # get scores of lost tracks
327 |         output_stracks = [track for track in self.tracked_stracks if track.is_activated]
328 | 
329 |         logger.debug('===========Frame {}=========='.format(self.frame_id))
330 |         logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
331 |         logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
332 |         logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
333 |         logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
334 |         # print('Final {} s'.format(t5-t4))
335 |         return output_stracks
336 | 
337 | def joint_stracks(tlista, tlistb):
338 |     exists = {}
339 |     res = []
340 |     for t in tlista:
341 |         exists[t.track_id] = 1
342 |         res.append(t)
343 |     for t in tlistb:
344 |         tid = t.track_id
345 |         if not exists.get(tid, 0):
346 |             exists[tid] = 1
347 |             res.append(t)
348 |     return res
349 | 
350 | def sub_stracks(tlista, tlistb):
351 |     stracks = {}
352 |     for t in tlista:
353 |         stracks[t.track_id] = t
354 |     for t in tlistb:
355 |         tid = t.track_id
356 |         if stracks.get(tid, 0):
357 |             del stracks[tid]
358 |     return list(stracks.values())
359 | 
360 | def remove_duplicate_stracks(stracksa, stracksb):
361 |     pdist = matching.iou_distance(stracksa, stracksb)
362 |     pairs = np.where(pdist<0.15)
363 |     dupa, dupb = list(), list()
364 |     for p,q in zip(*pairs):
365 |         timep = stracksa[p].frame_id - stracksa[p].start_frame
366 |         timeq = stracksb[q].frame_id - stracksb[q].start_frame
367 |         if timep > timeq:
368 |             dupb.append(q)
369 |         else:
370 |             dupa.append(p)
371 |     resa = [t for i,t in enumerate(stracksa) if not i in dupa]
372 |     resb = [t for i,t in enumerate(stracksb) if not i in dupb]
373 |     return resa, resb
374 |             
375 | 
376 | 


--------------------------------------------------------------------------------
/tracker_utils/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import math
  3 | import os
  4 | import os.path as osp
  5 | import random
  6 | import time
  7 | from collections import OrderedDict
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from torch.utils.data import Dataset
 14 | from tracker_utils.utils import xyxy2xywh
 15 | 
 16 | class LoadImages:  # for inference
 17 |     def __init__(self, path, img_size=(1088, 608)):
 18 |         if os.path.isdir(path):
 19 |             image_format = ['.jpg', '.jpeg', '.png', '.tif']
 20 |             self.files = sorted(glob.glob('%s/*.*' % path))
 21 |             self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
 22 |         elif os.path.isfile(path):
 23 |             self.files = [path]
 24 | 
 25 |         self.nF = len(self.files)  # number of image files
 26 |         self.width = img_size[0]
 27 |         self.height = img_size[1]
 28 |         self.count = 0
 29 | 
 30 |         assert self.nF > 0, 'No images found in ' + path
 31 | 
 32 |     def __iter__(self):
 33 |         self.count = -1
 34 |         return self
 35 | 
 36 |     def __next__(self):
 37 |         self.count += 1
 38 |         if self.count == self.nF:
 39 |             raise StopIteration
 40 |         img_path = self.files[self.count]
 41 | 
 42 |         # Read image
 43 |         img0 = cv2.imread(img_path)  # BGR
 44 |         assert img0 is not None, 'Failed to load ' + img_path
 45 | 
 46 |         # Padded resize
 47 |         img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
 48 | 
 49 |         # Normalize RGB
 50 |         img = img[:, :, ::-1].transpose(2, 0, 1)
 51 |         img = np.ascontiguousarray(img, dtype=np.float32)
 52 |         img /= 255.0
 53 | 
 54 |         # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
 55 |         return img_path, img, img0
 56 |     
 57 |     def __getitem__(self, idx):
 58 |         idx = idx % self.nF 
 59 |         img_path = self.files[idx]
 60 | 
 61 |         # Read image
 62 |         img0 = cv2.imread(img_path)  # BGR
 63 |         assert img0 is not None, 'Failed to load ' + img_path
 64 | 
 65 |         # Padded resize
 66 |         img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
 67 | 
 68 |         # Normalize RGB
 69 |         img = img[:, :, ::-1].transpose(2, 0, 1)
 70 |         img = np.ascontiguousarray(img, dtype=np.float32)
 71 |         img /= 255.0
 72 | 
 73 |         return img_path, img, img0
 74 | 
 75 |     def __len__(self):
 76 |         return self.nF  # number of files
 77 | 
 78 | 
 79 | class LoadVideo:  # for inference
 80 |     def __init__(self, path, img_size=(1088, 608)):
 81 |         if not os.path.isfile(path):
 82 |             raise FileExistsError
 83 |         
 84 |         self.cap = cv2.VideoCapture(path)        
 85 |         self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
 86 |         self.vw = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 87 |         self.vh = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 88 |         self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 89 | 
 90 |         self.width = img_size[0]
 91 |         self.height = img_size[1]
 92 |         self.count = 0
 93 | 
 94 |         self.w, self.h = self.get_size(self.vw, self.vh, self.width, self.height)
 95 |         print('Lenth of the video: {:d} frames'.format(self.vn))
 96 | 
 97 |     def get_size(self, vw, vh, dw, dh):
 98 |         wa, ha = float(dw) / vw, float(dh) / vh
 99 |         a = min(wa, ha)
100 |         return int(vw *a), int(vh*a)
101 | 
102 |     def __iter__(self):
103 |         self.count = -1
104 |         return self
105 | 
106 |     def __next__(self):
107 |         self.count += 1
108 |         if self.count == len(self):
109 |             raise StopIteration
110 |         # Read image
111 |         res, img0 = self.cap.read()  # BGR
112 |         assert img0 is not None, 'Failed to load frame {:d}'.format(self.count)
113 |         img0 = cv2.resize(img0, (self.w, self.h))
114 | 
115 |         # Padded resize
116 |         img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
117 | 
118 |         # Normalize RGB
119 |         img = img[:, :, ::-1].transpose(2, 0, 1)
120 |         img = np.ascontiguousarray(img, dtype=np.float32)
121 |         img /= 255.0
122 | 
123 |         # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
124 |         return self.count, img, img0
125 |     
126 |     def __len__(self):
127 |         return self.vn  # number of files
128 | 
129 | 
130 | class LoadImagesAndLabels:  # for training
131 |     def __init__(self, path, img_size=(1088,608),  augment=False, transforms=None):
132 |         with open(path, 'r') as file:
133 |             self.img_files = file.readlines()
134 |             self.img_files = [x.replace('\n', '') for x in self.img_files]
135 |             self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))
136 | 
137 |         self.label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
138 |                             for x in self.img_files]
139 | 
140 |         self.nF = len(self.img_files)  # number of image files
141 |         self.width = img_size[0]
142 |         self.height = img_size[1]
143 |         self.augment = augment
144 |         self.transforms = transforms
145 | 
146 | 
147 |     def __getitem__(self, files_index):
148 |         img_path = self.img_files[files_index]
149 |         label_path = self.label_files[files_index]
150 |         return self.get_data(img_path, label_path)
151 | 
152 |     def get_data(self, img_path, label_path):
153 |         height = self.height
154 |         width = self.width
155 |         img = cv2.imread(img_path)  # BGR
156 |         if img is None:
157 |             raise ValueError('File corrupt {}'.format(img_path))
158 |         augment_hsv = True
159 |         if self.augment and augment_hsv:
160 |             # SV augmentation by 50%
161 |             fraction = 0.50
162 |             img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
163 |             S = img_hsv[:, :, 1].astype(np.float32)
164 |             V = img_hsv[:, :, 2].astype(np.float32)
165 | 
166 |             a = (random.random() * 2 - 1) * fraction + 1
167 |             S *= a
168 |             if a > 1:
169 |                 np.clip(S, a_min=0, a_max=255, out=S)
170 | 
171 |             a = (random.random() * 2 - 1) * fraction + 1
172 |             V *= a
173 |             if a > 1:
174 |                 np.clip(V, a_min=0, a_max=255, out=V)
175 | 
176 |             img_hsv[:, :, 1] = S.astype(np.uint8)
177 |             img_hsv[:, :, 2] = V.astype(np.uint8)
178 |             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
179 | 
180 |         h, w, _ = img.shape
181 |         img, ratio, padw, padh = letterbox(img, height=height, width=width)
182 | 
183 |         # Load labels
184 |         if os.path.isfile(label_path):
185 |             labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)
186 | 
187 |             # Normalized xywh to pixel xyxy format
188 |             labels = labels0.copy()
189 |             labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
190 |             labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
191 |             labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
192 |             labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
193 |         else:
194 |             labels = np.array([])
195 | 
196 |         # Augment image and labels
197 |         if self.augment:
198 |             img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
199 | 
200 |     
201 |         plotFlag = False
202 |         if plotFlag:
203 |             import matplotlib
204 |             matplotlib.use('Agg')
205 |             import matplotlib.pyplot as plt
206 |             plt.figure(figsize=(50, 50)) 
207 |             plt.imshow(img[:, :, ::-1])
208 |             plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')
209 |             plt.axis('off')
210 |             plt.savefig('test.jpg')
211 |             time.sleep(10)
212 | 
213 |         nL = len(labels)
214 |         if nL > 0:
215 |             # convert xyxy to xywh
216 |             labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) #/ height
217 |             labels[:, 2] /= width
218 |             labels[:, 3] /= height
219 |             labels[:, 4] /= width
220 |             labels[:, 5] /= height
221 |         if self.augment:
222 |             # random left-right flip
223 |             lr_flip = True
224 |             if lr_flip & (random.random() > 0.5):
225 |                 img = np.fliplr(img)
226 |                 if nL > 0:
227 |                     labels[:, 2] = 1 - labels[:, 2]
228 |        
229 |         img = np.ascontiguousarray(img[ :, :, ::-1]) # BGR to RGB
230 |         if self.transforms is not None:
231 |             img = self.transforms(img)
232 | 
233 |         return img, labels, img_path, (h, w)
234 | 
235 |     def __len__(self):
236 |         return self.nF  # number of batches
237 | 
238 | 
239 | def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)):  # resize a rectangular image to a padded rectangular 
240 |     shape = img.shape[:2]  # shape = [height, width]
241 |     ratio = min(float(height)/shape[0], float(width)/shape[1])
242 |     new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
243 |     dw = (width - new_shape[0]) / 2  # width padding
244 |     dh = (height - new_shape[1]) / 2  # height padding
245 |     top, bottom = round(dh - 0.1), round(dh + 0.1)
246 |     left, right = round(dw - 0.1), round(dw + 0.1)
247 |     img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
248 |     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded rectangular
249 |     return img, ratio, dw, dh
250 | 
251 | 
252 | def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
253 |                   borderValue=(127.5, 127.5, 127.5)):
254 |     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
255 |     # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
256 | 
257 |     border = 0  # width of added border (optional)
258 |     height = img.shape[0]
259 |     width = img.shape[1]
260 | 
261 |     # Rotation and Scale
262 |     R = np.eye(3)
263 |     a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
264 |     # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
265 |     s = random.random() * (scale[1] - scale[0]) + scale[0]
266 |     R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
267 | 
268 |     # Translation
269 |     T = np.eye(3)
270 |     T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
271 |     T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
272 | 
273 |     # Shear
274 |     S = np.eye(3)
275 |     S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
276 |     S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
277 | 
278 |     M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
279 |     imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
280 |                               borderValue=borderValue)  # BGR order borderValue
281 | 
282 |     # Return warped points also
283 |     if targets is not None:
284 |         if len(targets) > 0:
285 |             n = targets.shape[0]
286 |             points = targets[:, 2:6].copy()
287 |             area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
288 | 
289 |             # warp points
290 |             xy = np.ones((n * 4, 3))
291 |             xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
292 |             xy = (xy @ M.T)[:, :2].reshape(n, 8)
293 | 
294 |             # create new boxes
295 |             x = xy[:, [0, 2, 4, 6]]
296 |             y = xy[:, [1, 3, 5, 7]]
297 |             xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
298 | 
299 |             # apply angle-based reduction
300 |             radians = a * math.pi / 180
301 |             reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
302 |             x = (xy[:, 2] + xy[:, 0]) / 2
303 |             y = (xy[:, 3] + xy[:, 1]) / 2
304 |             w = (xy[:, 2] - xy[:, 0]) * reduction
305 |             h = (xy[:, 3] - xy[:, 1]) * reduction
306 |             xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
307 | 
308 |             # reject warped points outside of image
309 |             np.clip(xy[:, 0], 0, width, out=xy[:, 0])
310 |             np.clip(xy[:, 2], 0, width, out=xy[:, 2])
311 |             np.clip(xy[:, 1], 0, height, out=xy[:, 1])
312 |             np.clip(xy[:, 3], 0, height, out=xy[:, 3])
313 |             w = xy[:, 2] - xy[:, 0]
314 |             h = xy[:, 3] - xy[:, 1]
315 |             area = w * h
316 |             ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
317 |             i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
318 | 
319 |             targets = targets[i]
320 |             targets[:, 2:6] = xy[i]
321 | 
322 |         return imw, targets, M
323 |     else:
324 |         return imw
325 | 
326 | def collate_fn(batch):
327 |     imgs, labels, paths, sizes = zip(*batch)
328 |     batch_size = len(labels)
329 |     imgs = torch.stack(imgs, 0)
330 |     max_box_len = max([l.shape[0] for l in labels])
331 |     labels = [torch.from_numpy(l) for l in labels]
332 |     filled_labels = torch.zeros(batch_size, max_box_len, 6)
333 |     labels_len = torch.zeros(batch_size)
334 | 
335 |     for i in range(batch_size):
336 |         isize = labels[i].shape[0]
337 |         if len(labels[i])>0:
338 |             filled_labels[i, :isize, :] = labels[i]
339 |         labels_len[i] = isize
340 | 
341 |     return imgs, filled_labels, paths, sizes, labels_len.unsqueeze(1)
342 | 
343 | 
344 | class JointDataset(LoadImagesAndLabels):  # for training
345 |     def __init__(self, root, paths, img_size=(1088,608), augment=False, transforms=None):
346 |         
347 |         dataset_names = paths.keys()
348 |         self.img_files = OrderedDict()
349 |         self.label_files = OrderedDict()
350 |         self.tid_num = OrderedDict()
351 |         self.tid_start_index = OrderedDict()
352 |         for ds, path in paths.items():
353 |             with open(path, 'r') as file:
354 |                 self.img_files[ds] = file.readlines()
355 |                 self.img_files[ds] = [osp.join(root, x.strip()) for x in self.img_files[ds]]
356 |                 self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds]))
357 | 
358 |             self.label_files[ds] = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
359 |                                 for x in self.img_files[ds]]
360 | 
361 |         for ds, label_paths in self.label_files.items():
362 |             max_index = -1
363 |             for lp in label_paths:
364 |                 lb = np.loadtxt(lp)
365 |                 if len(lb) < 1:
366 |                     continue
367 |                 if len(lb.shape) < 2:
368 |                     img_max = lb[1]
369 |                 else:
370 |                     img_max = np.max(lb[:,1])
371 |                 if img_max >max_index:
372 |                     max_index = img_max 
373 |             self.tid_num[ds] = max_index + 1
374 |         
375 |         last_index = 0
376 |         for i, (k, v) in enumerate(self.tid_num.items()):
377 |             self.tid_start_index[k] = last_index
378 |             last_index += v
379 |         
380 |         self.nID = int(last_index+1)
381 |         self.nds = [len(x) for x in self.img_files.values()]
382 |         self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))]
383 |         self.nF = sum(self.nds)
384 |         self.width = img_size[0]
385 |         self.height = img_size[1]
386 |         self.augment = augment
387 |         self.transforms = transforms
388 |         
389 |         print('='*80)
390 |         print('dataset summary')
391 |         print(self.tid_num)
392 |         print('total # identities:', self.nID)
393 |         print('start index')
394 |         print(self.tid_start_index)
395 |         print('='*80)
396 |         
397 | 
398 |     def __getitem__(self, files_index):
399 |         """
400 |         Iterator function for train dataset
401 |         """
402 |         for i, c in enumerate(self.cds):
403 |             if files_index >= c: 
404 |                 ds = list(self.label_files.keys())[i]
405 |                 start_index = c
406 |         img_path = self.img_files[ds][files_index - start_index]
407 |         label_path = self.label_files[ds][files_index - start_index]
408 |         
409 |         imgs, labels, img_path, (h, w) = self.get_data(img_path, label_path) 
410 |         for i, _ in enumerate(labels):
411 |             if labels[i,1] > -1:
412 |                 labels[i,1] += self.tid_start_index[ds]
413 |         
414 |         return imgs, labels, img_path, (h, w) 
415 | 
416 | 
417 | 


--------------------------------------------------------------------------------
/tracker_utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | from tracker_utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 | 
 57 |         # get distance matrix
 58 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 59 | 
 60 |         # acc
 61 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 62 | 
 63 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 64 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 65 |         else:
 66 |             events = None
 67 |         return events
 68 | 
 69 |     def eval_file(self, filename):
 70 |         self.reset_accumulator()
 71 | 
 72 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 73 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 74 |         for frame_id in frames:
 75 |             trk_objs = result_frame_dict.get(frame_id, [])
 76 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 77 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 78 | 
 79 |         return self.acc
 80 | 
 81 |     @staticmethod
 82 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 83 |         names = copy.deepcopy(names)
 84 |         if metrics is None:
 85 |             metrics = mm.metrics.motchallenge_metrics
 86 |         metrics = copy.deepcopy(metrics)
 87 | 
 88 |         mh = mm.metrics.create()
 89 |         summary = mh.compute_many(
 90 |             accs,
 91 |             metrics=metrics,
 92 |             names=names,
 93 |             generate_overall=True
 94 |         )
 95 | 
 96 |         return summary
 97 | 
 98 |     @staticmethod
 99 |     def save_summary(summary, filename):
100 |         import pandas as pd
101 |         writer = pd.ExcelWriter(filename)
102 |         summary.to_excel(writer)
103 |         writer.save()
104 | 


--------------------------------------------------------------------------------
/tracker_utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | from tracker_utils.log import logger
  6 | 
  7 | 
  8 | def write_results(filename, results_dict: Dict, data_type: str):
  9 |     if not filename:
 10 |         return
 11 |     path = os.path.dirname(filename)
 12 |     if not os.path.exists(path):
 13 |         os.makedirs(path)
 14 | 
 15 |     if data_type in ('mot', 'mcmot', 'lab'):
 16 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 17 |     elif data_type == 'kitti':
 18 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 19 |     else:
 20 |         raise ValueError(data_type)
 21 | 
 22 |     with open(filename, 'w') as f:
 23 |         for frame_id, frame_data in results_dict.items():
 24 |             if data_type == 'kitti':
 25 |                 frame_id -= 1
 26 |             for tlwh, track_id in frame_data:
 27 |                 if track_id < 0:
 28 |                     continue
 29 |                 x1, y1, w, h = tlwh
 30 |                 x2, y2 = x1 + w, y1 + h
 31 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 32 |                 f.write(line)
 33 |     logger.info('Save results to {}'.format(filename))
 34 | 
 35 | 
 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 37 |     if data_type in ('mot', 'lab'):
 38 |         read_fun = read_mot_results
 39 |     else:
 40 |         raise ValueError('Unknown data type: {}'.format(data_type))
 41 | 
 42 |     return read_fun(filename, is_gt, is_ignore)
 43 | 
 44 | 
 45 | """
 46 | labels={'ped', ...			% 1
 47 | 'person_on_vhcl', ...	% 2
 48 | 'car', ...				% 3
 49 | 'bicycle', ...			% 4
 50 | 'mbike', ...			% 5
 51 | 'non_mot_vhcl', ...		% 6
 52 | 'static_person', ...	% 7
 53 | 'distractor', ...		% 8
 54 | 'occluder', ...			% 9
 55 | 'occluder_on_grnd', ...		%10
 56 | 'occluder_full', ...		% 11
 57 | 'reflection', ...		% 12
 58 | 'crowd' ...			% 13
 59 | };
 60 | """
 61 | 
 62 | 
 63 | def read_mot_results(filename, is_gt, is_ignore):
 64 |     valid_labels = {1}
 65 |     ignore_labels = {2, 7, 8, 12}
 66 |     results_dict = dict()
 67 |     if os.path.isfile(filename):
 68 |         with open(filename, 'r') as f:
 69 |             for line in f.readlines():
 70 |                 linelist = line.split(',')
 71 |                 if len(linelist) < 7:
 72 |                     continue
 73 |                 fid = int(linelist[0])
 74 |                 if fid < 1:
 75 |                     continue
 76 |                 results_dict.setdefault(fid, list())
 77 | 
 78 |                 if is_gt:
 79 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 80 |                         label = int(float(linelist[7]))
 81 |                         mark = int(float(linelist[6]))
 82 |                         if mark == 0 or label not in valid_labels:
 83 |                             continue
 84 |                     score = 1
 85 |                 elif is_ignore:
 86 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 87 |                         label = int(float(linelist[7]))
 88 |                         vis_ratio = float(linelist[8])
 89 |                         if label not in ignore_labels and vis_ratio >= 0:
 90 |                             continue
 91 |                     else:
 92 |                         continue
 93 |                     score = 1
 94 |                 else:
 95 |                     score = float(linelist[6])
 96 | 
 97 |                 tlwh = tuple(map(float, linelist[2:6]))
 98 |                 target_id = int(linelist[1])
 99 | 
100 |                 results_dict[fid].append((tlwh, target_id, score))
101 | 
102 |     return results_dict
103 | 
104 | 
105 | def unzip_objs(objs):
106 |     if len(objs) > 0:
107 |         tlwhs, ids, scores = zip(*objs)
108 |     else:
109 |         tlwhs, ids, scores = [], [], []
110 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
111 | 
112 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/tracker_utils/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(mean, self._motion_mat.T)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 |     
154 |     def multi_predict(self, mean, covariance):
155 |         """Run Kalman filter prediction step (Vectorized version).
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The Nx8 dimensional mean matrix of the object states at the previous
161 |             time step.
162 |         covariance : ndarray
163 |             The Nx8x8 dimensional covariance matrics of the object states at the
164 |             previous time step.
165 | 
166 |         Returns
167 |         -------
168 |         (ndarray, ndarray)
169 |             Returns the mean vector and covariance matrix of the predicted
170 |             state. Unobserved velocities are initialized to 0 mean.
171 | 
172 |         """
173 |         std_pos = [
174 |             self._std_weight_position * mean[:, 3],
175 |             self._std_weight_position * mean[:, 3],
176 |             1e-2 * np.ones_like(mean[:, 3]),
177 |             self._std_weight_position * mean[:, 3]]
178 |         std_vel = [
179 |             self._std_weight_velocity * mean[:, 3],
180 |             self._std_weight_velocity * mean[:, 3],
181 |             1e-5 * np.ones_like(mean[:, 3]),
182 |             self._std_weight_velocity * mean[:, 3]]
183 |         sqr = np.square(np.r_[std_pos, std_vel]).T
184 |         
185 |         motion_cov = []
186 |         for i in range(len(mean)):
187 |             motion_cov.append(np.diag(sqr[i]))
188 |         motion_cov = np.asarray(motion_cov)
189 |             
190 |         mean = np.dot(mean, self._motion_mat.T)
191 |         left = np.dot(self._motion_mat, covariance).transpose((1,0,2))
192 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
193 | 
194 |         return mean, covariance
195 | 
196 |     def update(self, mean, covariance, measurement):
197 |         """Run Kalman filter correction step.
198 | 
199 |         Parameters
200 |         ----------
201 |         mean : ndarray
202 |             The predicted state's mean vector (8 dimensional).
203 |         covariance : ndarray
204 |             The state's covariance matrix (8x8 dimensional).
205 |         measurement : ndarray
206 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
207 |             is the center position, a the aspect ratio, and h the height of the
208 |             bounding box.
209 | 
210 |         Returns
211 |         -------
212 |         (ndarray, ndarray)
213 |             Returns the measurement-corrected state distribution.
214 | 
215 |         """
216 |         projected_mean, projected_cov = self.project(mean, covariance)
217 | 
218 |         chol_factor, lower = scipy.linalg.cho_factor(
219 |             projected_cov, lower=True, check_finite=False)
220 |         kalman_gain = scipy.linalg.cho_solve(
221 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
222 |             check_finite=False).T
223 |         innovation = measurement - projected_mean
224 | 
225 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
226 |         new_covariance = covariance - np.linalg.multi_dot((
227 |             kalman_gain, projected_cov, kalman_gain.T))
228 |         return new_mean, new_covariance
229 | 
230 |     def gating_distance(self, mean, covariance, measurements,
231 |                         only_position=False, metric='maha'):
232 |         """Compute gating distance between state distribution and measurements.
233 | 
234 |         A suitable distance threshold can be obtained from `chi2inv95`. If
235 |         `only_position` is False, the chi-square distribution has 4 degrees of
236 |         freedom, otherwise 2.
237 | 
238 |         Parameters
239 |         ----------
240 |         mean : ndarray
241 |             Mean vector over the state distribution (8 dimensional).
242 |         covariance : ndarray
243 |             Covariance of the state distribution (8x8 dimensional).
244 |         measurements : ndarray
245 |             An Nx4 dimensional matrix of N measurements, each in
246 |             format (x, y, a, h) where (x, y) is the bounding box center
247 |             position, a the aspect ratio, and h the height.
248 |         only_position : Optional[bool]
249 |             If True, distance computation is done with respect to the bounding
250 |             box center position only.
251 | 
252 |         Returns
253 |         -------
254 |         ndarray
255 |             Returns an array of length N, where the i-th element contains the
256 |             squared Mahalanobis distance between (mean, covariance) and
257 |             `measurements[i]`.
258 | 
259 |         """
260 |         mean, covariance = self.project(mean, covariance)
261 |         if only_position:
262 |             mean, covariance = mean[:2], covariance[:2, :2]
263 |             measurements = measurements[:, :2]
264 |         
265 |         d = measurements - mean
266 |         if metric == 'gaussian':
267 |             return np.sum(d * d, axis=1)
268 |         elif metric == 'maha':
269 |             cholesky_factor = np.linalg.cholesky(covariance)
270 |             z = scipy.linalg.solve_triangular(
271 |                 cholesky_factor, d.T, lower=True, check_finite=False,
272 |                 overwrite_b=True)
273 |             squared_maha = np.sum(z * z, axis=0)
274 |             return squared_maha
275 |         else:
276 |             raise ValueError('invalid distance metric')
277 | 
278 | 


--------------------------------------------------------------------------------
/tracker_utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | logger = get_logger('root')
19 | 


--------------------------------------------------------------------------------
/tracker_utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/tracker_utils/utils.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import random
  3 | import os
  4 | import os.path as osp
  5 | 
  6 | import cv2
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import torch
 10 | import torch.nn.functional as F
 11 | from torchvision.ops import nms
 12 | 
 13 | 
 14 | def mkdir_if_missing(dir):
 15 |     os.makedirs(dir, exist_ok=True)
 16 | 
 17 | 
 18 | def float3(x):  # format floats to 3 decimals
 19 |     return float(format(x, '.3f'))
 20 | 
 21 | 
 22 | def init_seeds(seed=0):
 23 |     random.seed(seed)
 24 |     np.random.seed(seed)
 25 |     torch.manual_seed(seed)
 26 |     torch.cuda.manual_seed(seed)
 27 |     torch.cuda.manual_seed_all(seed)
 28 | 
 29 | 
 30 | def load_classes(path):
 31 |     """
 32 |     Loads class labels at 'path'
 33 |     """
 34 |     fp = open(path, 'r')
 35 |     names = fp.read().split('\n')
 36 |     return list(filter(None, names))  # filter removes empty strings (such as last line)
 37 | 
 38 | 
 39 | def model_info(model):  
 40 |     """
 41 |     Prints out a line-by-line description of a PyTorch model ending with a summary.
 42 |     """
 43 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
 44 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
 45 |     print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
 46 |     for i, (name, p) in enumerate(model.named_parameters()):
 47 |         name = name.replace('module_list.', '')
 48 |         print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
 49 |             i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
 50 |     print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))
 51 | 
 52 | 
 53 | 
 54 | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
 55 |     """
 56 |     Plots one bounding box on image img.
 57 |     """
 58 |     tl = line_thickness or round(0.0004 * max(img.shape[0:2])) + 1  # line thickness
 59 |     color = color or [random.randint(0, 255) for _ in range(3)]
 60 |     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
 61 |     cv2.rectangle(img, c1, c2, color, thickness=tl)
 62 |     if label:
 63 |         tf = max(tl - 1, 1)  # font thickness
 64 |         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
 65 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
 66 |         cv2.rectangle(img, c1, c2, color, -1)  # filled
 67 |         cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
 68 | 
 69 | 
 70 | def weights_init_normal(m):
 71 |     classname = m.__class__.__name__
 72 |     if classname.find('Conv') != -1:
 73 |         torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
 74 |     elif classname.find('BatchNorm2d') != -1:
 75 |         torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
 76 |         torch.nn.init.constant_(m.bias.data, 0.0)
 77 | 
 78 | 
 79 | def xyxy2xywh(x):
 80 |     # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
 81 |     # x, y are coordinates of center 
 82 |     # (x1, y1) and (x2, y2) are coordinates of bottom left and top right respectively. 
 83 |     y = torch.zeros_like(x) if x.dtype is torch.float32 else np.zeros_like(x)
 84 |     y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
 85 |     y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
 86 |     y[:, 2] = x[:, 2] - x[:, 0]  # width
 87 |     y[:, 3] = x[:, 3] - x[:, 1]  # height
 88 |     return y
 89 | 
 90 | 
 91 | def xywh2xyxy(x):
 92 |     # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
 93 |     # x, y are coordinates of center 
 94 |     # (x1, y1) and (x2, y2) are coordinates of bottom left and top right respectively. 
 95 |     y = torch.zeros_like(x) if x.dtype is torch.float32 else np.zeros_like(x)
 96 |     y[:, 0] = (x[:, 0] - x[:, 2] / 2)  # Bottom left x
 97 |     y[:, 1] = (x[:, 1] - x[:, 3] / 2)  # Bottom left y
 98 |     y[:, 2] = (x[:, 0] + x[:, 2] / 2)  # Top right x
 99 |     y[:, 3] = (x[:, 1] + x[:, 3] / 2)  # Top right y
100 |     return y
101 | 
102 | 
103 | def scale_coords(img_size, coords, img0_shape):
104 |     # Rescale x1, y1, x2, y2 from 416 to image size
105 |     gain_w = float(img_size[0]) / img0_shape[1]  # gain  = old / new
106 |     gain_h = float(img_size[1]) / img0_shape[0]
107 |     gain = min(gain_w, gain_h)
108 |     pad_x = (img_size[0] - img0_shape[1] * gain) / 2  # width padding
109 |     pad_y = (img_size[1] - img0_shape[0] * gain) / 2  # height padding
110 |     coords[:, [0, 2]] -= pad_x
111 |     coords[:, [1, 3]] -= pad_y
112 |     coords[:, 0:4] /= gain
113 |     coords[:, :4] = torch.clamp(coords[:, :4], min=0)
114 |     return coords
115 | 
116 | 
117 | def ap_per_class(tp, conf, pred_cls, target_cls):
118 |     """ Computes the average precision, given the recall and precision curves.
119 |     Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
120 |     # Arguments
121 |         tp:    True positives (list).
122 |         conf:  Objectness value from 0-1 (list).
123 |         pred_cls: Predicted object classes (list).
124 |         target_cls: True object classes (list).
125 |     # Returns
126 |         The average precision as computed in py-faster-rcnn.
127 |     """
128 | 
129 |     # lists/pytorch to numpy
130 |     tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(pred_cls), np.array(target_cls)
131 | 
132 |     # Sort by objectness
133 |     i = np.argsort(-conf)
134 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
135 | 
136 |     # Find unique classes
137 |     unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
138 | 
139 |     # Create Precision-Recall curve and compute AP for each class
140 |     ap, p, r = [], [], []
141 |     for c in unique_classes:
142 |         i = pred_cls == c
143 |         n_gt = sum(target_cls == c)  # Number of ground truth objects
144 |         n_p = sum(i)  # Number of predicted objects
145 | 
146 |         if (n_p == 0) and (n_gt == 0):
147 |             continue
148 |         elif (n_p == 0) or (n_gt == 0):
149 |             ap.append(0)
150 |             r.append(0)
151 |             p.append(0)
152 |         else:
153 |             # Accumulate FPs and TPs
154 |             fpc = np.cumsum(1 - tp[i])
155 |             tpc = np.cumsum(tp[i])
156 | 
157 |             # Recall
158 |             recall_curve = tpc / (n_gt + 1e-16)
159 |             r.append(tpc[-1] / (n_gt + 1e-16))
160 | 
161 |             # Precision
162 |             precision_curve = tpc / (tpc + fpc)
163 |             p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
164 | 
165 |             # AP from recall-precision curve
166 |             ap.append(compute_ap(recall_curve, precision_curve))
167 | 
168 |     return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)
169 | 
170 | 
171 | def compute_ap(recall, precision):
172 |     """ Computes the average precision, given the recall and precision curves.
173 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
174 |     # Arguments
175 |         recall:    The recall curve (list).
176 |         precision: The precision curve (list).
177 |     # Returns
178 |         The average precision as computed in py-faster-rcnn.
179 |     """
180 |     # correct AP calculation
181 |     # first append sentinel values at the end
182 | 
183 |     mrec = np.concatenate(([0.], recall, [1.]))
184 |     mpre = np.concatenate(([0.], precision, [0.]))
185 | 
186 |     # compute the precision envelope
187 |     for i in range(mpre.size - 1, 0, -1):
188 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
189 | 
190 |     # to calculate area under PR curve, look for points
191 |     # where X axis (recall) changes value
192 |     i = np.where(mrec[1:] != mrec[:-1])[0]
193 | 
194 |     # and sum (\Delta recall) * prec
195 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
196 |     return ap
197 | 
198 | 
199 | def bbox_iou(box1, box2, x1y1x2y2=False):
200 |     """
201 |     Returns the IoU of two bounding boxes
202 |     """
203 |     N, M = len(box1), len(box2)
204 |     if x1y1x2y2:
205 |         # Get the coordinates of bounding boxes
206 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
207 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
208 |     else:
209 |         # Transform from center and width to exact coordinates
210 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
211 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
212 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
213 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
214 | 
215 |     # get the coordinates of the intersection rectangle
216 |     inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1)
217 |     inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1)
218 |     inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2)
219 |     inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2)
220 |     # Intersection area
221 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0)
222 |     # Union Area
223 |     b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1))
224 |     b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M)
225 |     b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M)
226 | 
227 |     return inter_area / (b1_area + b2_area - inter_area + 1e-16)
228 | 
229 | 
230 | def build_targets_max(target, anchor_wh, nA, nC, nGh, nGw):
231 |     """
232 |     returns nT, nCorrect, tx, ty, tw, th, tconf, tcls
233 |     """
234 |     nB = len(target)  # number of images in batch
235 | 
236 |     txy = torch.zeros(nB, nA, nGh, nGw, 2).cuda()  # batch size, anchors, grid size
237 |     twh = torch.zeros(nB, nA, nGh, nGw, 2).cuda()
238 |     tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
239 |     tcls = torch.ByteTensor(nB, nA, nGh, nGw, nC).fill_(0).cuda()  # nC = number of classes
240 |     tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda() 
241 |     for b in range(nB):
242 |         t = target[b]
243 |         t_id = t[:, 1].clone().long().cuda()
244 |         t = t[:,[0,2,3,4,5]]
245 |         nTb = len(t)  # number of targets
246 |         if nTb == 0:
247 |             continue
248 | 
249 |         #gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG
250 |         gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone()
251 |         gxy[:, 0] = gxy[:, 0] * nGw
252 |         gxy[:, 1] = gxy[:, 1] * nGh
253 |         gwh[:, 0] = gwh[:, 0] * nGw
254 |         gwh[:, 1] = gwh[:, 1] * nGh
255 |         gi = torch.clamp(gxy[:, 0], min=0, max=nGw -1).long()
256 |         gj = torch.clamp(gxy[:, 1], min=0, max=nGh -1).long()
257 | 
258 |         # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors)
259 |         #gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t()
260 |         #gi, gj = gxy.long().t()
261 | 
262 |         # iou of targets-anchors (using wh only)
263 |         box1 = gwh
264 |         box2 = anchor_wh.unsqueeze(1)
265 |         inter_area = torch.min(box1, box2).prod(2)
266 |         iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
267 | 
268 |         # Select best iou_pred and anchor
269 |         iou_best, a = iou.max(0)  # best anchor [0-2] for each target
270 | 
271 |         # Select best unique target-anchor combinations
272 |         if nTb > 1:
273 |             _, iou_order = torch.sort(-iou_best)  # best to worst
274 | 
275 |             # Unique anchor selection
276 |             u = torch.stack((gi, gj, a), 0)[:, iou_order]
277 |             # _, first_unique = np.unique(u, axis=1, return_index=True)  # first unique indices
278 |             first_unique = return_torch_unique_index(u, torch.unique(u, dim=1))  # torch alternative
279 |             i = iou_order[first_unique]
280 |             # best anchor must share significant commonality (iou) with target
281 |             i = i[iou_best[i] > 0.60]  # TODO: examine arbitrary threshold
282 |             if len(i) == 0:
283 |                 continue
284 | 
285 |             a, gj, gi, t = a[i], gj[i], gi[i], t[i]
286 |             t_id = t_id[i]
287 |             if len(t.shape) == 1:
288 |                 t = t.view(1, 5)
289 |         else:
290 |             if iou_best < 0.60:
291 |                 continue
292 |         
293 |         tc, gxy, gwh = t[:, 0].long(), t[:, 1:3].clone(), t[:, 3:5].clone()
294 |         gxy[:, 0] = gxy[:, 0] * nGw
295 |         gxy[:, 1] = gxy[:, 1] * nGh
296 |         gwh[:, 0] = gwh[:, 0] * nGw
297 |         gwh[:, 1] = gwh[:, 1] * nGh
298 | 
299 |         # XY coordinates
300 |         txy[b, a, gj, gi] = gxy - gxy.floor()
301 | 
302 |         # Width and height
303 |         twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a])  # yolo method
304 |         # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method
305 | 
306 |         # One-hot encoding of label
307 |         tcls[b, a, gj, gi, tc] = 1
308 |         tconf[b, a, gj, gi] = 1
309 |         tid[b, a, gj, gi] = t_id.unsqueeze(1)
310 |     tbox = torch.cat([txy, twh], -1)
311 |     return tconf, tbox, tid
312 | 
313 | 
314 | 
315 | def build_targets_thres(target, anchor_wh, nA, nC, nGh, nGw):
316 |     ID_THRESH = 0.5
317 |     FG_THRESH = 0.5
318 |     BG_THRESH = 0.4
319 |     nB = len(target)  # number of images in batch
320 |     assert(len(anchor_wh)==nA)
321 | 
322 |     tbox = torch.zeros(nB, nA, nGh, nGw, 4).cuda()  # batch size, anchors, grid size
323 |     tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
324 |     tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda() 
325 |     for b in range(nB):
326 |         t = target[b]
327 |         t_id = t[:, 1].clone().long().cuda()
328 |         t = t[:,[0,2,3,4,5]]
329 |         nTb = len(t)  # number of targets
330 |         if nTb == 0:
331 |             continue
332 | 
333 |         gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone()
334 |         gxy[:, 0] = gxy[:, 0] * nGw
335 |         gxy[:, 1] = gxy[:, 1] * nGh
336 |         gwh[:, 0] = gwh[:, 0] * nGw
337 |         gwh[:, 1] = gwh[:, 1] * nGh
338 |         gxy[:, 0] = torch.clamp(gxy[:, 0], min=0, max=nGw -1)
339 |         gxy[:, 1] = torch.clamp(gxy[:, 1], min=0, max=nGh -1)
340 | 
341 |         gt_boxes = torch.cat([gxy, gwh], dim=1)                                            # Shape Ngx4 (xc, yc, w, h)
342 |         
343 |         anchor_mesh = generate_anchor(nGh, nGw, anchor_wh)
344 |         anchor_list = anchor_mesh.permute(0,2,3,1).contiguous().view(-1, 4)              # Shpae (nA x nGh x nGw) x 4
345 |         #print(anchor_list.shape, gt_boxes.shape)
346 |         iou_pdist = bbox_iou(anchor_list, gt_boxes)                                      # Shape (nA x nGh x nGw) x Ng
347 |         iou_max, max_gt_index = torch.max(iou_pdist, dim=1)                              # Shape (nA x nGh x nGw), both
348 | 
349 |         iou_map = iou_max.view(nA, nGh, nGw)       
350 |         gt_index_map = max_gt_index.view(nA, nGh, nGw)
351 | 
352 |         #nms_map = pooling_nms(iou_map, 3)
353 |         
354 |         id_index = iou_map > ID_THRESH
355 |         fg_index = iou_map > FG_THRESH                                                    
356 |         bg_index = iou_map < BG_THRESH 
357 |         ign_index = (iou_map < FG_THRESH) * (iou_map > BG_THRESH)
358 |         tconf[b][fg_index] = 1
359 |         tconf[b][bg_index] = 0
360 |         tconf[b][ign_index] = -1
361 | 
362 |         gt_index = gt_index_map[fg_index]
363 |         gt_box_list = gt_boxes[gt_index]
364 |         gt_id_list = t_id[gt_index_map[id_index]]
365 |         #print(gt_index.shape, gt_index_map[id_index].shape, gt_boxes.shape)
366 |         if torch.sum(fg_index) > 0:
367 |             tid[b][id_index] =  gt_id_list.unsqueeze(1)
368 |             fg_anchor_list = anchor_list.view(nA, nGh, nGw, 4)[fg_index] 
369 |             delta_target = encode_delta(gt_box_list, fg_anchor_list)
370 |             tbox[b][fg_index] = delta_target
371 |     return tconf, tbox, tid
372 | 
373 | def generate_anchor(nGh, nGw, anchor_wh):
374 |     nA = len(anchor_wh)
375 |     yy, xx =torch.meshgrid(torch.arange(nGh), torch.arange(nGw))
376 |     xx, yy = xx.cuda(), yy.cuda()
377 | 
378 |     mesh = torch.stack([xx, yy], dim=0)                                              # Shape 2, nGh, nGw
379 |     mesh = mesh.unsqueeze(0).repeat(nA,1,1,1).float()                                # Shape nA x 2 x nGh x nGw
380 |     anchor_offset_mesh = anchor_wh.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, nGh,nGw) # Shape nA x 2 x nGh x nGw
381 |     anchor_mesh = torch.cat([mesh, anchor_offset_mesh], dim=1)                       # Shape nA x 4 x nGh x nGw
382 |     return anchor_mesh
383 | 
384 | def encode_delta(gt_box_list, fg_anchor_list):
385 |     px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
386 |                      fg_anchor_list[:, 2], fg_anchor_list[:,3]
387 |     gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
388 |                      gt_box_list[:, 2], gt_box_list[:, 3]
389 |     dx = (gx - px) / pw
390 |     dy = (gy - py) / ph
391 |     dw = torch.log(gw/pw)
392 |     dh = torch.log(gh/ph)
393 |     return torch.stack([dx, dy, dw, dh], dim=1)
394 | 
395 | def decode_delta(delta, fg_anchor_list):
396 |     px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
397 |                      fg_anchor_list[:, 2], fg_anchor_list[:,3]
398 |     dx, dy, dw, dh = delta[:, 0], delta[:, 1], delta[:, 2], delta[:, 3]
399 |     gx = pw * dx + px
400 |     gy = ph * dy + py
401 |     gw = pw * torch.exp(dw)
402 |     gh = ph * torch.exp(dh)
403 |     return torch.stack([gx, gy, gw, gh], dim=1)
404 | 
405 | def decode_delta_map(delta_map, anchors):
406 |     '''
407 |     :param: delta_map, shape (nB, nA, nGh, nGw, 4)
408 |     :param: anchors, shape (nA,4)
409 |     '''
410 |     nB, nA, nGh, nGw, _ = delta_map.shape
411 |     anchor_mesh = generate_anchor(nGh, nGw, anchors) 
412 |     anchor_mesh = anchor_mesh.permute(0,2,3,1).contiguous()              # Shpae (nA x nGh x nGw) x 4
413 |     anchor_mesh = anchor_mesh.unsqueeze(0).repeat(nB,1,1,1,1)
414 |     pred_list = decode_delta(delta_map.view(-1,4), anchor_mesh.view(-1,4))
415 |     pred_map = pred_list.view(nB, nA, nGh, nGw, 4)
416 |     return pred_map
417 | 
418 | 
419 | def pooling_nms(heatmap, kernel=1):
420 |     pad = (kernel -1 ) // 2
421 |     hmax = F.max_pool2d(heatmap, (kernel, kernel), stride=1, padding=pad)
422 |     keep = (hmax == heatmap).float()
423 |     return keep * heatmap
424 | 
425 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'):
426 |     """
427 |     Removes detections with lower object confidence score than 'conf_thres'
428 |     Non-Maximum Suppression to further filter detections.
429 |     Returns detections with shape:
430 |         (x1, y1, x2, y2, object_conf, class_score, class_pred)
431 |     Args:
432 |         prediction,
433 |         conf_thres,
434 |         nms_thres,
435 |         method = 'standard' or 'fast'
436 |     """
437 | 
438 |     output = [None for _ in range(len(prediction))]
439 |     for image_i, pred in enumerate(prediction):
440 |         # Filter out confidence scores below threshold
441 |         # Get score and class with highest confidence
442 | 
443 |         v = pred[:, 4] > conf_thres
444 |         v = v.nonzero().squeeze()
445 |         if len(v.shape) == 0:
446 |             v = v.unsqueeze(0)
447 | 
448 |         pred = pred[v]
449 | 
450 |         # If none are remaining => process next image
451 |         nP = pred.shape[0]
452 |         if not nP:
453 |             continue
454 |         # From (center x, center y, width, height) to (x1, y1, x2, y2)
455 |         pred[:, :4] = xywh2xyxy(pred[:, :4])
456 | 
457 |         
458 |         # Non-maximum suppression
459 |         if method == 'standard':
460 |             nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
461 |         elif method == 'fast':
462 |             nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres)
463 |         else:
464 |             raise ValueError('Invalid NMS type!')
465 |         det_max = pred[nms_indices]        
466 | 
467 |         if len(det_max) > 0:
468 |             # Add max detections to outputs
469 |             output[image_i] = det_max if output[image_i] is None else torch.cat((output[image_i], det_max))
470 | 
471 |     return output
472 | 
473 | def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5):
474 |     '''
475 |     Vectorized, approximated, fast NMS, adopted from YOLACT:
476 |     https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py
477 |     The original version is for multi-class NMS, here we simplify the code for single-class NMS
478 |     '''
479 |     scores, idx = scores.sort(0, descending=True)
480 |     
481 |     idx = idx[:top_k].contiguous()
482 |     scores = scores[:top_k]
483 |     num_dets = idx.size()
484 | 
485 |     boxes = boxes[idx, :]
486 | 
487 |     iou = jaccard(boxes, boxes)
488 |     iou.triu_(diagonal=1)
489 |     iou_max, _ = iou.max(dim=0)
490 | 
491 |     keep = (iou_max <= iou_thres)
492 | 
493 |     if second_threshold:
494 |         keep *= (scores > self.conf_thresh)
495 | 
496 |     return idx[keep]
497 | 
498 | 
499 | 
500 | @torch.jit.script
501 | def intersect(box_a, box_b):
502 |     """ We resize both tensors to [A,B,2] without new malloc:
503 |     [A,2] -> [A,1,2] -> [A,B,2]
504 |     [B,2] -> [1,B,2] -> [A,B,2]
505 |     Then we compute the area of intersect between box_a and box_b.
506 |     Args:
507 |       box_a: (tensor) bounding boxes, Shape: [n,A,4].
508 |       box_b: (tensor) bounding boxes, Shape: [n,B,4].
509 |     Return:
510 |       (tensor) intersection area, Shape: [n,A,B].
511 |     """
512 |     n = box_a.size(0)
513 |     A = box_a.size(1)
514 |     B = box_b.size(1)
515 |     max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
516 |                        box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
517 |     min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
518 |                        box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
519 |     inter = torch.clamp((max_xy - min_xy), min=0)
520 |     return inter[:, :, :, 0] * inter[:, :, :, 1]
521 | 
522 | 
523 | 
524 | def jaccard(box_a, box_b, iscrowd:bool=False):
525 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
526 |     is simply the intersection over union of two boxes.  Here we operate on
527 |     ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
528 |     E.g.:
529 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
530 |     Args:
531 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
532 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
533 |     Return:
534 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
535 |     """
536 |     use_batch = True
537 |     if box_a.dim() == 2:
538 |         use_batch = False
539 |         box_a = box_a[None, ...]
540 |         box_b = box_b[None, ...]
541 | 
542 |     inter = intersect(box_a, box_b)
543 |     area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
544 |               (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
545 |     area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
546 |               (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
547 |     union = area_a + area_b - inter
548 | 
549 |     out = inter / area_a if iscrowd else inter / union
550 |     return out if use_batch else out.squeeze(0)
551 | 
552 | 
553 | def return_torch_unique_index(u, uv):
554 |     n = uv.shape[1]  # number of columns
555 |     first_unique = torch.zeros(n, device=u.device).long()
556 |     for j in range(n):
557 |         first_unique[j] = (uv[:, j:j + 1] == u).all(0).nonzero()[0]
558 | 
559 |     return first_unique
560 | 
561 | 
562 | def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
563 |     # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
564 |     a = torch.load(filename, map_location='cpu')
565 |     a['optimizer'] = []
566 |     torch.save(a, filename.replace('.pt', '_lite.pt'))
567 | 
568 | 
569 | def plot_results():
570 |     """
571 |     Plot YOLO training results from the file 'results.txt'
572 |     Example of what this is trying to plot can be found at: 
573 |     https://user-images.githubusercontent.com/26833433/63258271-fe9d5300-c27b-11e9-9a15-95038daf4438.png
574 |     An example results.txt file:
575 |     import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v1.txt')
576 |     """
577 |     plt.figure(figsize=(14, 7))
578 |     s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'mAP', 'Recall', 'Precision']
579 |     files = sorted(glob.glob('results*.txt'))
580 |     for f in files:
581 |         results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T  # column 11 is mAP
582 |         x = range(1, results.shape[1])
583 |         for i in range(8):
584 |             plt.subplot(2, 4, i + 1)
585 |             plt.plot(x, results[i, x], marker='.', label=f)
586 |             plt.title(s[i])
587 |             if i == 0:
588 |                 plt.legend()
589 | 


--------------------------------------------------------------------------------
/tracker_utils/visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def tlwhs_to_tlbrs(tlwhs):
 6 |     tlbrs = np.copy(tlwhs)
 7 |     if len(tlbrs) == 0:
 8 |         return tlbrs
 9 |     tlbrs[:, 2] += tlwhs[:, 0]
10 |     tlbrs[:, 3] += tlwhs[:, 1]
11 |     return tlbrs
12 | 
13 | 
14 | def get_color(idx):
15 |     idx = idx * 3
16 |     color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
17 | 
18 |     return color
19 | 
20 | 
21 | def resize_image(image, max_size=800):
22 |     if max(image.shape[:2]) > max_size:
23 |         scale = float(max_size) / max(image.shape[:2])
24 |         image = cv2.resize(image, None, fx=scale, fy=scale)
25 |     return image
26 | 
27 | 
28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
29 |     im = np.ascontiguousarray(np.copy(image))
30 |     im_h, im_w = im.shape[:2]
31 | 
32 |     top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
33 | 
34 |     text_scale = max(1, image.shape[1] / 1600.)
35 |     text_thickness = 1 if text_scale > 1.1 else 1
36 |     line_thickness = max(1, int(image.shape[1] / 500.))
37 | 
38 |     radius = max(5, int(im_w/140.))
39 |     cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
40 |                 (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
41 | 
42 |     for i, tlwh in enumerate(tlwhs):
43 |         x1, y1, w, h = tlwh
44 |         intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
45 |         obj_id = int(obj_ids[i])
46 |         id_text = '{}'.format(int(obj_id))
47 |         if ids2 is not None:
48 |             id_text = id_text + ', {}'.format(int(ids2[i]))
49 |         _line_thickness = 1 if obj_id <= 0 else line_thickness
50 |         color = get_color(abs(obj_id))
51 |         cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
52 |         cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
53 |                     thickness=text_thickness)
54 |     return im
55 | 
56 | 
57 | def plot_trajectory(image, tlwhs, track_ids):
58 |     image = image.copy()
59 |     for one_tlwhs, track_id in zip(tlwhs, track_ids):
60 |         color = get_color(int(track_id))
61 |         for tlwh in one_tlwhs:
62 |             x1, y1, w, h = tuple(map(int, tlwh))
63 |             cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
64 | 
65 |     return image
66 | 
67 | 
68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
69 |     im = np.copy(image)
70 |     text_scale = max(1, image.shape[1] / 800.)
71 |     thickness = 2 if text_scale > 1.3 else 1
72 |     for i, det in enumerate(tlbrs):
73 |         x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
74 |         if len(det) >= 7:
75 |             label = 'det' if det[5] > 0 else 'trk'
76 |             if ids is not None:
77 |                 text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
78 |                 cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
79 |                             thickness=thickness)
80 |             else:
81 |                 text = '{}# {:.2f}'.format(label, det[6])
82 | 
83 |         if scores is not None:
84 |             text = '{:.2f}'.format(scores[i])
85 |             cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
86 |                         thickness=thickness)
87 | 
88 |         cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
89 | 
90 |     return im
91 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaobin1231/YOLOv5_JDE/ea3149f280a2234a4bf840c7da8f5d0c77abd59d/utils/__init__.py


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | # Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
 7 | class Swish(nn.Module):  #
 8 |     @staticmethod
 9 |     def forward(x):
10 |         return x * torch.sigmoid(x)
11 | 
12 | 
13 | class HardSwish(nn.Module):
14 |     @staticmethod
15 |     def forward(x):
16 |         return x * F.hardtanh(x + 3, 0., 6., True) / 6.
17 | 
18 | 
19 | class MemoryEfficientSwish(nn.Module):
20 |     class F(torch.autograd.Function):
21 |         @staticmethod
22 |         def forward(ctx, x):
23 |             ctx.save_for_backward(x)
24 |             return x * torch.sigmoid(x)
25 | 
26 |         @staticmethod
27 |         def backward(ctx, grad_output):
28 |             x = ctx.saved_tensors[0]
29 |             sx = torch.sigmoid(x)
30 |             return grad_output * (sx * (1 + x * (1 - sx)))
31 | 
32 |     def forward(self, x):
33 |         return self.F.apply(x)
34 | 
35 | 
36 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
37 | class Mish(nn.Module):
38 |     @staticmethod
39 |     def forward(x):
40 |         return x * F.softplus(x).tanh()
41 | 
42 | 
43 | class MemoryEfficientMish(nn.Module):
44 |     class F(torch.autograd.Function):
45 |         @staticmethod
46 |         def forward(ctx, x):
47 |             ctx.save_for_backward(x)
48 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
49 | 
50 |         @staticmethod
51 |         def backward(ctx, grad_output):
52 |             x = ctx.saved_tensors[0]
53 |             sx = torch.sigmoid(x)
54 |             fx = F.softplus(x).tanh()
55 |             return grad_output * (fx + x * sx * (1 - fx * fx))
56 | 
57 |     def forward(self, x):
58 |         return self.F.apply(x)
59 | 
60 | 
61 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
62 | class FReLU(nn.Module):
63 |     def __init__(self, c1, k=3):  # ch_in, kernel
64 |         super().__init__()
65 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1)
66 |         self.bn = nn.BatchNorm2d(c1)
67 | 
68 |     def forward(self, x):
69 |         return torch.max(x, self.bn(self.conv(x)))
70 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
  1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
  2 | # pip install --upgrade google-cloud-storage
  3 | # from google.cloud import storage
  4 | 
  5 | import os
  6 | import platform
  7 | import time
  8 | from pathlib import Path
  9 | 
 10 | 
 11 | def attempt_download(weights):
 12 |     # Attempt to download pretrained weights if not found locally
 13 |     weights = weights.strip().replace("'", '')
 14 |     msg = weights + ' missing, try downloading from https://drive.google.com/drive/folders/1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J'
 15 | 
 16 |     r = 1  # return
 17 |     if len(weights) > 0 and not os.path.isfile(weights):
 18 |         d = {'yolov3-spp.pt': '1mM67oNw4fZoIOL1c8M3hHmj66d8e-ni_',  # yolov3-spp.yaml
 19 |              'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO',  # yolov5s.yaml
 20 |              'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr',  # yolov5m.yaml
 21 |              'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV',  # yolov5l.yaml
 22 |              'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS',  # yolov5x.yaml
 23 |              }
 24 | 
 25 |         file = Path(weights).name
 26 |         if file in d:
 27 |             r = gdrive_download(id=d[file], name=weights)
 28 | 
 29 |         if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6):  # weights exist and > 1MB
 30 |             os.remove(weights) if os.path.exists(weights) else None  # remove partial downloads
 31 |             s = 'curl -L -o %s "storage.googleapis.com/ultralytics/yolov5/ckpt/%s"' % (weights, file)
 32 |             r = os.system(s)  # execute, capture return values
 33 | 
 34 |             # Error check
 35 |             if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6):  # weights exist and > 1MB
 36 |                 os.remove(weights) if os.path.exists(weights) else None  # remove partial downloads
 37 |                 raise Exception(msg)
 38 | 
 39 | 
 40 | def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'):
 41 |     # Downloads a file from Google Drive, accepting presented query
 42 |     # from utils.google_utils import *; gdrive_download()
 43 |     t = time.time()
 44 | 
 45 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
 46 |     os.remove(name) if os.path.exists(name) else None  # remove existing
 47 |     os.remove('cookie') if os.path.exists('cookie') else None
 48 | 
 49 |     # Attempt file download
 50 |     out = "NUL" if platform.system() == "Windows" else "/dev/null"
 51 |     os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
 52 |     if os.path.exists('cookie'):  # large file
 53 |         s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
 54 |     else:  # small file
 55 |         s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
 56 |     r = os.system(s)  # execute, capture return values
 57 |     os.remove('cookie') if os.path.exists('cookie') else None
 58 | 
 59 |     # Error check
 60 |     if r != 0:
 61 |         os.remove(name) if os.path.exists(name) else None  # remove partial
 62 |         print('Download error ')  # raise Exception('Download error')
 63 |         return r
 64 | 
 65 |     # Unzip if archive
 66 |     if name.endswith('.zip'):
 67 |         print('unzipping... ', end='')
 68 |         os.system('unzip -q %s' % name)  # unzip
 69 |         os.remove(name)  # remove zip to free space
 70 | 
 71 |     print('Done (%.1fs)' % (time.time() - t))
 72 |     return r
 73 | 
 74 | 
 75 | def get_token(cookie="./cookie"):
 76 |     with open(cookie) as f:
 77 |         for line in f:
 78 |             if "download" in line:
 79 |                 return line.split()[-1]
 80 |     return ""
 81 | 
 82 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
 83 | #     # Uploads a file to a bucket
 84 | #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
 85 | #
 86 | #     storage_client = storage.Client()
 87 | #     bucket = storage_client.get_bucket(bucket_name)
 88 | #     blob = bucket.blob(destination_blob_name)
 89 | #
 90 | #     blob.upload_from_filename(source_file_name)
 91 | #
 92 | #     print('File {} uploaded to {}.'.format(
 93 | #         source_file_name,
 94 | #         destination_blob_name))
 95 | #
 96 | #
 97 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
 98 | #     # Uploads a blob from a bucket
 99 | #     storage_client = storage.Client()
100 | #     bucket = storage_client.get_bucket(bucket_name)
101 | #     blob = bucket.blob(source_blob_name)
102 | #
103 | #     blob.download_to_filename(destination_file_name)
104 | #
105 | #     print('Blob {} downloaded to {}.'.format(
106 | #         source_blob_name,
107 | #         destination_file_name))
108 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | import time
  4 | from copy import deepcopy
  5 | 
  6 | import torch
  7 | import torch.backends.cudnn as cudnn
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torchvision.models as models
 11 | 
 12 | 
 13 | def init_seeds(seed=0):
 14 |     torch.manual_seed(seed)
 15 | 
 16 |     # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
 17 |     if seed == 0:  # slower, more reproducible
 18 |         cudnn.deterministic = True
 19 |         cudnn.benchmark = False
 20 |     else:  # faster, less reproducible
 21 |         cudnn.deterministic = False
 22 |         cudnn.benchmark = True
 23 | 
 24 | 
 25 | def select_device(device='', batch_size=None):
 26 |     # device = 'cpu' or '0' or '0,1,2,3'
 27 |     cpu_request = device.lower() == 'cpu'
 28 |     if device and not cpu_request:  # if device requested other than 'cpu'
 29 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 30 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
 31 | 
 32 |     cuda = False if cpu_request else torch.cuda.is_available()
 33 |     if cuda:
 34 |         c = 1024 ** 2  # bytes to MB
 35 |         ng = torch.cuda.device_count()
 36 |         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
 37 |             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
 38 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
 39 |         s = 'Using CUDA '
 40 |         for i in range(0, ng):
 41 |             if i == 1:
 42 |                 s = ' ' * len(s)
 43 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
 44 |                   (s, i, x[i].name, x[i].total_memory / c))
 45 |     else:
 46 |         print('Using CPU')
 47 | 
 48 |     print('')  # skip a line
 49 |     return torch.device('cuda:0' if cuda else 'cpu')
 50 | 
 51 | 
 52 | def time_synchronized():
 53 |     torch.cuda.synchronize() if torch.cuda.is_available() else None
 54 |     return time.time()
 55 | 
 56 | 
 57 | def is_parallel(model):
 58 |     # is model is parallel with DP or DDP
 59 |     return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
 60 | 
 61 | 
 62 | def initialize_weights(model):
 63 |     for m in model.modules():
 64 |         t = type(m)
 65 |         if t is nn.Conv2d:
 66 |             pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 67 |         elif t is nn.BatchNorm2d:
 68 |             m.eps = 1e-3
 69 |             m.momentum = 0.03
 70 |         elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
 71 |             m.inplace = True
 72 |         elif t is nn.Linear:
 73 |             nn.init.xavier_uniform_(m.weight)
 74 | 
 75 | 
 76 | def find_modules(model, mclass=nn.Conv2d):
 77 |     # finds layer indices matching module class 'mclass'
 78 |     return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
 79 | 
 80 | 
 81 | def sparsity(model):
 82 |     # Return global model sparsity
 83 |     a, b = 0., 0.
 84 |     for p in model.parameters():
 85 |         a += p.numel()
 86 |         b += (p == 0).sum()
 87 |     return b / a
 88 | 
 89 | 
 90 | def prune(model, amount=0.3):
 91 |     # Prune model to requested global sparsity
 92 |     import torch.nn.utils.prune as prune
 93 |     print('Pruning model... ', end='')
 94 |     for name, m in model.named_modules():
 95 |         if isinstance(m, nn.Conv2d):
 96 |             prune.l1_unstructured(m, name='weight', amount=amount)  # prune
 97 |             prune.remove(m, 'weight')  # make permanent
 98 |     print(' %.3g global sparsity' % sparsity(model))
 99 | 
100 | 
101 | def fuse_conv_and_bn(conv, bn):
102 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
103 |     with torch.no_grad():
104 |         # init
105 |         fusedconv = nn.Conv2d(conv.in_channels,
106 |                               conv.out_channels,
107 |                               kernel_size=conv.kernel_size,
108 |                               stride=conv.stride,
109 |                               padding=conv.padding,
110 |                               bias=True).to(conv.weight.device)
111 | 
112 |         # prepare filters
113 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
114 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
115 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
116 | 
117 |         # prepare spatial bias
118 |         b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
119 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
120 |         fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
121 | 
122 |         return fusedconv
123 | 
124 | 
125 | def model_info(model, verbose=False):
126 |     # Plots a line-by-line description of a PyTorch model
127 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
128 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
129 |     if verbose:
130 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
131 |         for i, (name, p) in enumerate(model.named_parameters()):
132 |             name = name.replace('module_list.', '')
133 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
134 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
135 | 
136 |     try:  # FLOPS
137 |         from thop import profile
138 |         flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2
139 |         fs = ', %.1f GFLOPS' % (flops * 100)  # 640x640 FLOPS
140 |     except:
141 |         fs = ''
142 | 
143 |     print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs))
144 | 
145 | 
146 | def load_classifier(name='resnet101', n=2):
147 |     # Loads a pretrained model reshaped to n-class output
148 |     model = models.__dict__[name](pretrained=True)
149 | 
150 |     # Display model properties
151 |     input_size = [3, 224, 224]
152 |     input_space = 'RGB'
153 |     input_range = [0, 1]
154 |     mean = [0.485, 0.456, 0.406]
155 |     std = [0.229, 0.224, 0.225]
156 |     for x in [input_size, input_space, input_range, mean, std]:
157 |         print(x + ' =', eval(x))
158 | 
159 |     # Reshape output to n classes
160 |     filters = model.fc.weight.shape[1]
161 |     model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
162 |     model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
163 |     model.fc.out_features = n
164 |     return model
165 | 
166 | 
167 | def scale_img(img, ratio=1.0, same_shape=False):  # img(16,3,256,416), r=ratio
168 |     # scales img(bs,3,y,x) by ratio
169 |     if ratio == 1.0:
170 |         return img
171 |     else:
172 |         h, w = img.shape[2:]
173 |         s = (int(h * ratio), int(w * ratio))  # new size
174 |         img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
175 |         if not same_shape:  # pad/crop img
176 |             gs = 32  # (pixels) grid size
177 |             h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
178 |         return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
179 | 
180 | 
181 | def copy_attr(a, b, include=(), exclude=()):
182 |     # Copy attributes from b to a, options to only include [...] and to exclude [...]
183 |     for k, v in b.__dict__.items():
184 |         if (len(include) and k not in include) or k.startswith('_') or k in exclude:
185 |             continue
186 |         else:
187 |             setattr(a, k, v)
188 | 
189 | 
190 | class ModelEMA:
191 |     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
192 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
193 |     This is intended to allow functionality like
194 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
195 |     A smoothed version of the weights is necessary for some training schemes to perform well.
196 |     This class is sensitive where it is initialized in the sequence of model init,
197 |     GPU assignment and distributed training wrappers.
198 |     """
199 | 
200 |     def __init__(self, model, decay=0.9999, updates=0):
201 |         # Create EMA
202 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
203 |         # if next(model.parameters()).device.type != 'cpu':
204 |         #     self.ema.half()  # FP16 EMA
205 |         self.updates = updates  # number of EMA updates
206 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
207 |         for p in self.ema.parameters():
208 |             p.requires_grad_(False)
209 | 
210 |     def update(self, model):
211 |         # Update EMA parameters
212 |         with torch.no_grad():
213 |             self.updates += 1
214 |             d = self.decay(self.updates)
215 | 
216 |             msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
217 |             for k, v in self.ema.state_dict().items():
218 |                 if v.dtype.is_floating_point:
219 |                     v *= d
220 |                     v += (1. - d) * msd[k].detach()
221 | 
222 |     def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
223 |         # Update EMA attributes
224 |         copy_attr(self.ema, model, include, exclude)
225 | 


--------------------------------------------------------------------------------
/weights/download_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Download common models
 3 | 
 4 | python -c "
 5 | from utils.google_utils import *;
 6 | attempt_download('weights/yolov5s.pt');
 7 | attempt_download('weights/yolov5m.pt');
 8 | attempt_download('weights/yolov5l.pt');
 9 | attempt_download('weights/yolov5x.pt')
10 | "
11 | 


--------------------------------------------------------------------------------