├── .gitignore
├── LICENSE
├── README.md
├── convert_yolo.py
├── convert_yolov5.py
├── convertor
    ├── __init__.py
    ├── yolo
    │   ├── convert_tf_onnx.py
    │   └── convert_tf_tflite.py
    └── yolov5
    │   ├── __init__.py
    │   ├── convert_onnx_tf.py
    │   ├── convert_onnx_vino.py
    │   ├── convert_tf_onnx.py
    │   ├── convert_tf_tflite.py
    │   └── convert_torch_onnx.py
├── datasets
    ├── .gitignore
    ├── README.md
    ├── create_small_dataset.py
    └── download_coco_val2017.sh
├── detect.py
├── detect_all.sh
├── detector
    ├── .gitignore
    ├── __init__.py
    ├── base.py
    ├── yolo.py
    └── yolov5.py
├── download_font.sh
├── download_superres.sh
├── evaluate_all.sh
├── ipynb
    ├── accumulate.ipynb
    ├── map.png
    ├── time.png
    └── time_vs_map.png
├── labels
    └── coco_labels.txt
├── models
    ├── __init__.py
    ├── common.py
    ├── experimental.py
    ├── export.py
    ├── tf_yolo.py
    ├── tf_yolov5.py
    ├── yolo.py
    ├── yolov5l.yaml
    ├── yolov5m.yaml
    ├── yolov5s.yaml
    └── yolov5x.yaml
├── requirements.txt
├── tools
    ├── convert_yolo_config_csv.py
    ├── yolov3-spp.cfg
    ├── yolov3-spp.csv
    ├── yolov3-spp.xlsx
    ├── yolov3-tiny.cfg
    ├── yolov3-tiny.csv
    ├── yolov3-tiny.xlsx
    ├── yolov3.cfg
    ├── yolov3.csv
    ├── yolov3.xlsx
    ├── yolov4-csp.cfg
    ├── yolov4-csp.csv
    ├── yolov4-csp.xlsx
    ├── yolov4-tiny.cfg
    ├── yolov4-tiny.csv
    ├── yolov4-tiny.xlsx
    ├── yolov4.cfg
    ├── yolov4.csv
    ├── yolov4.xlsx
    ├── yolov4x-mish.cfg
    ├── yolov4x-mish.csv
    └── yolov4x-mish.xlsx
├── utils
    ├── __init__.py
    ├── activations.py
    ├── autoanchor.py
    ├── convert_tflite.py
    ├── datasets.py
    ├── general.py
    ├── google_utils.py
    ├── image_preproc.py
    ├── loss.py
    ├── metrics.py
    ├── nms.py
    ├── plots.py
    └── torch_utils.py
└── weights
    ├── .gitignore
    ├── Dockerfile
    ├── README.md
    ├── build_docker.sh
    ├── compile_edgetpu.sh
    ├── download_yolo.sh
    └── download_yolov5.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # original
132 | TakaoGothic.ttf
133 | superres
134 | results
135 | .DS_Store
136 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Tetsutaro Maruyama
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # yolo_various_framework
  2 | 
  3 | run YOLO (object detection model) on various frameworks
  4 | 
  5 | ## articles corresponding this repository (Qiita)
  6 | 
  7 | following articles were written in Japanese only.
  8 | 
  9 | - [YOLOを色んなフレームワークに変換して動かしてみた件](https://qiita.com/tetutaro/items/7adf3b22e4c42a17fe66)
 10 | - [OpenCVで簡単に実装できる画像前処理](https://qiita.com/tetutaro/items/443dcd22e8aee4884ea8)
 11 | 
 12 | ## my motivations
 13 | 
 14 | - I want to run object detection models on my PC
 15 |     - I don't have so much money to buy any GPU
 16 | - I want to know which framework is the best in the meaning of elapsed time
 17 | - I want to confirm that the predicted results are not so much different when I convert pre-trained weights to another framework
 18 | 
 19 | ## object deteciton models
 20 | 
 21 | - [YOLO V3](https://github.com/pjreddie/darknet)
 22 |     - yolov3-tiny
 23 |         - input image size: 512x512
 24 |     - yolov3
 25 |         - input image size: 512x512
 26 |     - yolov3-spp
 27 |         - input image size: 512x512
 28 | - [YOLO V4](https://github.com/AlexeyAB/darknet)
 29 |     - yolov4-tiny
 30 |         - input image size: 512x512
 31 |     - yolov4
 32 |         - input image size: 512x512
 33 |     - [IN-PROGRESS] yolov4-csp ([Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp))
 34 |         - input image size: 512x512
 35 |     - [IN-PROGRESS] yolov4x-mish ([YOLOv4-large P5](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large))
 36 |         - input image size: 512x512
 37 | - [IN-PROGRESS] YOLO V3 & V4 are able to run only on `tf`, `tflite` and `tf_onnx`
 38 | - [YOLO V5](https://github.com/ultralytics/yolov5)
 39 |     - yolov5s
 40 |         - input image size: 640x640
 41 |     - yolov5m
 42 |         - input image size: 640x640
 43 |     - yolov5l
 44 |         - input image size: 640x640
 45 |     - yolov5x
 46 |         - input image size: 640x640
 47 | 
 48 | ## deep learning frameworks
 49 | 
 50 | all deep learing frameworks below were ran on Python 3.7.9
 51 | 
 52 | - PyTorch (State Dict)
 53 |     - torch 1.8.1
 54 | - TensorFlow (Frozen Graph)
 55 |     - tensorflow 2.4.1
 56 | - TensorFlow Lite (Flat Buffer)
 57 |     - tflite-runtime 2.5.0
 58 | - ONNX
 59 |     - onnxruntime 1.7.0
 60 | - OpenVINO
 61 |     - OpenVINO 2021.2.185
 62 | 
 63 | ## libraries to convert
 64 | 
 65 | - onnx 1.8.1
 66 | - onnx-tf 1.7.0
 67 | - tf2onnx 1.8.4
 68 | 
 69 | ## preparation
 70 | 
 71 | - download font and trained model of super-resolution for detector
 72 |     - `./download_font.sh`
 73 |     - `./download_superres.sh`
 74 | - download COCO dataset and create small dataset (convert annotations)
 75 |     - see [datasets/README](https://github.com/tetutaro/yolo_various_framework/tree/main/datasets)
 76 | - download pre-trained weights
 77 |     - see [weights/README](https://github.com/tetutaro/yolo_various_framework/tree/main/weights)
 78 | - convert pre-trained weights to various frameworks
 79 |     - `./convert_yolo.py`
 80 |     - `./convert_yolov5.py`
 81 | - (if you want to calc metrics) please install [`object_detection_metrics`](https://github.com/tetutaro/object_detection_metrics)
 82 |     - `> pip install "git+https://github.com/tetutaro/object_detection_metrics.git"`
 83 | 
 84 | ## usage
 85 | 
 86 | ```
 87 | usage: detect.py [-h]
 88 |     -m {yolov3-tiny,yolov3,yolov4-tiny,yolov4,yolov5s,yolov5m,yolov5l,yolov5x}
 89 |     -f {torch,torch_onnx,onnx_vino,onnx_tf,tf,tflite,tf_onnx}
 90 |     [-q {fp32,fp16,int8}]
 91 |     -d IMAGE_DIR
 92 |     [-c CONF_THRESHOLD]
 93 |     [-i IOU_THRESHOLD]
 94 |     [--clarify-image]
 95 |     [--use-superres]
 96 |     [--disable-soft-nms]
 97 | 
 98 | detect objects from images
 99 | 
100 | optional arguments:
101 |   -h, --help            show this help message and exit
102 |   -m MODEL, --model MODEL
103 |                         model name
104 |   -f FRAMEWORK, --framework FRAMEWORK
105 |                         framework
106 |   -q QUANTIZE, --quantize QUANTIZE
107 |                         quantization mode (TensorFlow Lite only)
108 |                         default: fp32
109 |   -d IMAGE_DIR, --image-dir IMAGE_DIR
110 |                         directory contains images to detect objects
111 |   -c CONF_THRESHOLD, --conf-threshold CONF_THRESHOLD
112 |                         threshold of confidence score to adopt bounding boxes
113 |                         default: 0.3
114 |   -i IOU_THRESHOLD, --iou-threshold IOU_THRESHOLD
115 |                         threshold of IoU to eliminte bounding boxes in NMS
116 |                         default: 0.45
117 |   --clarify-image
118 |                         image preprocessing
119 |   --use-superres
120 |                         use Super-Resolution at image preprocessing
121 |   --disable-soft-nms    use hard-NMS instead of soft-NMS
122 | ```
123 | 
124 | ## frameworks
125 | 
126 | ```
127 |   torch                 PyTorch (state dict)
128 |   torch_onnx            ONNX converted from PyTorch
129 |   onnx_vino             OpenVINO converted from torch_onnx
130 |   onnx_tf               TensorFlow (SavedModel) converted from torch_onnx
131 |   tf                    TensorFlow (FrozenGraph)
132 |   tflite                TensorFlow Lite
133 |   tf_onnx               ONNX onverted from TensorFlow
134 | ```
135 | 
136 | ## results
137 | 
138 | ### conditions
139 | 
140 | NOTE: following data was measured under certain conditions. Just FYI.
141 | 
142 | - images: randomly selected 100 images from COCO val2017 dataset
143 | - elapsed time includes the times for...
144 |     - image preprocessing
145 |         - adjusting white balance
146 |         - smoothing image with Gaussian Blur
147 |         - correcting contract (brighten dark areas)
148 |         - clarify image using Super-Resolution
149 |         - resize image and put it on the background (square, gray) image
150 |     - inference
151 |     - filtering bounding boxes
152 |         - apply anchors
153 |             - some models calc that inside the model
154 |                 - YOLO V5 (tf, tflite, tf_onnx)
155 |         - calc confidence scores
156 |         - NMS
157 |             - soft NMS
158 |             - eliminate small and unconfident bounding box which is inside of big and confident bounding box
159 | - the size of the input image to models differs depending on the model
160 |     - YOLO V3/V4: 512 x 512
161 |     - YOLO V5: 640 x 640
162 | - confidence score threshold is the same regardless of the model
163 |     - `0.3`
164 | - IoU threshold is the same regardless of the model
165 |     - `0.45`
166 | - tool for calculating mAP: [`object_detection_metrics`](https://github.com/tetutaro/object_detection_metrics) (may be WRONG!!)
167 | 
168 | ### binary size
169 | 
170 | | Model | torch | torch_onnx | onnx_vino | onnx_tf | tf | tflite (fp32) | tflite (fp16) | tflite (int8) | tf_onnx |
171 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
172 | | yolov3-tiny | | | | | 34MB | 34MB | 17MB | 8.6MB | 34MB |
173 | | yolov3 | | | | | 237MB | 236MB | 118MB | 60MB | 236MB |
174 | | yolov3-spp | | | | | 241MB | 240MB | 120MB | 61MB | 240MB |
175 | | yolov4-tiny | | | | | 23MB | 23MB | 12MB | 5.9MB | 23MB |
176 | | yolov4 | | | | | 246MB | 246MB | 123MB | 63MB | 246MB |
177 | | yolov5s | 28MB | 28MB | 28MB | 28MB | 28MB | 28MB | 14MB | 7.4MB | 28MB |
178 | | yolov5m | 82MB | 82MB | 81MB | 82MB | 82MB | 82MB | 41MB | 21MB | 82MB |
179 | | yolov5l | 180MB | 179MB | 179MB | 180MB | 180MB | 180MB | 90MB | 46MB | 180MB |
180 | | yolov5x | 335MB | 335MB | 335MB | 336MB | 336MB | 335MB | 168MB | 85MB | 335MB |
181 | 
182 | ### my environment
183 | 
184 | - MacBook Air (Retina, 2020)
185 |     - CPU: 1.1GHz quad core Intel Core i5
186 |     - Memory: 16GB 3733MHz LPDDR4X
187 | 
188 | ### elapsed time per each image
189 | 
190 | ![](ipynb/time.png)
191 | 
192 | ### accuracy (mAP: mean Average Precision)
193 | 
194 | ![](ipynb/map.png)
195 | 
196 | ### time vs accucary
197 | 
198 | ![](ipynb/time_vs_map.png)
199 | 


--------------------------------------------------------------------------------
/convert_yolo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | # from convertor.yolo.convert_torch_onnx import yolo_convert_torch_onnx
 4 | # from convertor.yolo.convert_onnx_vino import yolo_convert_onnx_vino
 5 | # from convertor.yolo.convert_onnx_tf import yolo_convert_onnx_tf
 6 | from convertor.yolo.convert_tf_tflite import yolo_convert_tf_tflite
 7 | from convertor.yolo.convert_tf_onnx import yolo_convert_tf_onnx
 8 | 
 9 | IMAGE_SIZES = {
10 |     'yolov3-tiny': 512,
11 |     'yolov3': 512,
12 |     'yolov3-spp': 512,
13 |     'yolov4-tiny': 512,
14 |     'yolov4': 512,
15 |     'yolov4-csp': 512,
16 |     'yolov4x-mish': 512,
17 | }
18 | DIRECTORY = 'weights/yolo'
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     for model in [
23 |         'yolov3-tiny', 'yolov3', 'yolov3-spp',
24 |         'yolov4-tiny', 'yolov4',
25 |         # 'yolov4-csp', 'yolov4x-mish',
26 |     ]:
27 |         imgsize = IMAGE_SIZES[model]
28 |         # yolo_convert_torch_onnx(
29 |         #     model=model,
30 |         #     directory=DIRECTORY,
31 |         #     imgsize=[imgsize, imgsize]
32 |         # )
33 |         # yolo_convert_onnx_vino(
34 |         #     model=model,
35 |         #     directory=DIRECTORY,
36 |         #     imgsize=[imgsize, imgsize]
37 |         # )
38 |         # yolo_convert_onnx_tf(
39 |         #     model=model,
40 |         #     directory=DIRECTORY
41 |         # )
42 |         yolo_convert_tf_tflite(
43 |             model=model,
44 |             directory=DIRECTORY,
45 |             imgsize=[imgsize, imgsize]
46 |         )
47 |         yolo_convert_tf_onnx(
48 |             model=model,
49 |             directory=DIRECTORY
50 |         )
51 | 


--------------------------------------------------------------------------------
/convert_yolov5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from convertor.yolov5.convert_torch_onnx import yolov5_convert_torch_onnx
 4 | from convertor.yolov5.convert_onnx_vino import yolov5_convert_onnx_vino
 5 | from convertor.yolov5.convert_onnx_tf import yolov5_convert_onnx_tf
 6 | from convertor.yolov5.convert_tf_tflite import yolov5_convert_tf_tflite
 7 | from convertor.yolov5.convert_tf_onnx import yolov5_convert_tf_onnx
 8 | 
 9 | IMAGE_SIZE = 640
10 | DIRECTORY = 'weights/yolov5'
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     for x in ['s', 'm', 'l', 'x']:
15 |         model = f'yolov5{x}'
16 |         yolov5_convert_torch_onnx(
17 |             model=model,
18 |             directory=DIRECTORY,
19 |             imgsize=[IMAGE_SIZE, IMAGE_SIZE]
20 |         )
21 |         yolov5_convert_onnx_vino(
22 |             model=model,
23 |             directory=DIRECTORY,
24 |             imgsize=[IMAGE_SIZE, IMAGE_SIZE]
25 |         )
26 |         yolov5_convert_onnx_tf(
27 |             model=model,
28 |             directory=DIRECTORY
29 |         )
30 |         yolov5_convert_tf_tflite(
31 |             model=model,
32 |             directory=DIRECTORY,
33 |             imgsize=[IMAGE_SIZE, IMAGE_SIZE]
34 |         )
35 |         yolov5_convert_tf_onnx(
36 |             model=model,
37 |             directory=DIRECTORY
38 |         )
39 | 


--------------------------------------------------------------------------------
/convertor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/convertor/__init__.py


--------------------------------------------------------------------------------
/convertor/yolo/convert_tf_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- cofing:utf-8 -*-
 3 | import os
 4 | import subprocess
 5 | 
 6 | 
 7 | def yolo_convert_tf_onnx(
 8 |     model: str,
 9 |     directory: str
10 | ) -> None:
11 |     path_pb = f'{directory}/{model}.pb'
12 |     path_onnx = f'{directory}/tf_{model}.onnx'
13 |     if not os.path.isfile(path_pb):
14 |         return
15 |     if os.path.isfile(path_onnx):
16 |         return
17 |     options = list()
18 |     options.append(f'--graphdef {path_pb}')
19 |     options.append(f'--output {path_onnx}')
20 |     options.append('--inputs x:0')
21 |     if model in ['yolov3-tiny', 'yolov4-tiny']:
22 |         options.append('--outputs Identity:0,Identity_1:0')
23 |     else:
24 |         options.append('--outputs Identity:0,Identity_1:0,Identity_2:0')
25 |     options.append('--opset 12')
26 |     options.append('--inputs-as-nchw x:0')
27 |     cmd = ' '.join(['python -m tf2onnx.convert'] + options)
28 |     cmd = cmd.split()
29 |     subprocess.run(cmd)
30 |     return
31 | 


--------------------------------------------------------------------------------
/convertor/yolo/convert_tf_tflite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | from typing import List
  4 | import tensorflow as tf
  5 | import os
  6 | import math
  7 | import numpy as np
  8 | # YOLO V3, V4
  9 | from models.tf_yolo import (
 10 |     tf_YoloV3_tiny,
 11 |     tf_YoloV3,
 12 |     tf_YoloV3_spp,
 13 |     tf_YoloV4_tiny,
 14 |     tf_YoloV4,
 15 |     tf_YoloV4_csp,
 16 |     tf_YoloV4x_mish,
 17 | )
 18 | from utils.convert_tflite import (
 19 |     save_frozen_graph,
 20 |     convert_tflite_fp32,
 21 |     convert_tflite_fp16,
 22 |     convert_tflite_int8,
 23 | )
 24 | 
 25 | NUM_CLASS = 80
 26 | MODEL_CLASS = {
 27 |     'yolov3-tiny': tf_YoloV3_tiny,
 28 |     'yolov3': tf_YoloV3,
 29 |     'yolov3-spp': tf_YoloV3_spp,
 30 |     'yolov4-tiny': tf_YoloV4_tiny,
 31 |     'yolov4': tf_YoloV4,
 32 |     'yolov4-csp': tf_YoloV4_csp,
 33 |     'yolov4x-mish': tf_YoloV4x_mish,
 34 | }
 35 | MODEL_SHAPE = {
 36 |     'yolov3-tiny': {
 37 |         'nlayers': 13,
 38 |         'nobn_layers': [9, 12],
 39 |     },
 40 |     'yolov3': {
 41 |         'nlayers': 75,
 42 |         'nobn_layers': [58, 66, 74],
 43 |     },
 44 |     'yolov3-spp': {
 45 |         'nlayers': 76,
 46 |         'nobn_layers': [59, 67, 75],
 47 |     },
 48 |     'yolov4-tiny': {
 49 |         'nlayers': 21,
 50 |         'nobn_layers': [17, 20],
 51 |     },
 52 |     'yolov4': {
 53 |         'nlayers': 110,
 54 |         'nobn_layers': [93, 101, 109],
 55 |     },
 56 |     'yolov4-csp': {
 57 |         'nlayers': 115,
 58 |         'nobn_layers': [94, 104, 114],
 59 |     },
 60 |     'yolov4x-mish': {
 61 |         'nlayers': 137,
 62 |         'nobn_layers': [112, 124, 136],
 63 |     },
 64 | }
 65 | STRIDES = [8, 16, 32, 64, 128]
 66 | DEBUG = False
 67 | 
 68 | 
 69 | def _load_darknet_weights(
 70 |     model: str,
 71 |     path_weights: str,
 72 |     model_keras: tf.keras.Model
 73 | ) -> None:
 74 |     rf = open(path_weights, 'rb')
 75 |     major, minor, revision, seen, _ = np.fromfile(
 76 |         rf, dtype=np.int32, count=5
 77 |     )
 78 |     nlayers = MODEL_SHAPE[model]['nlayers']
 79 |     nobn_layers = MODEL_SHAPE[model]['nobn_layers']
 80 |     if DEBUG:
 81 |         print(model)
 82 |         for i, layers in enumerate(model_keras.weighted_layers):
 83 |             print(i, layers)
 84 |     assert len(model_keras.weighted_layers) == nlayers
 85 |     for i, layers in enumerate(model_keras.weighted_layers):
 86 |         conv_layer = layers.conv
 87 |         norm_layer = layers.norm
 88 |         input_shape = layers.input_shape
 89 |         filters = conv_layer.filters
 90 |         k_size = conv_layer.kernel_size[0]
 91 |         in_dim = input_shape[-1]
 92 |         if i not in nobn_layers:
 93 |             # darknet weights: [beta, gamma, mean, variance]
 94 |             bn_weights = np.fromfile(
 95 |                 rf, dtype=np.float32, count=(4 * filters)
 96 |             )
 97 |             # tf weights: [gamma, beta, mean, variance]
 98 |             bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
 99 |         else:
100 |             conv_bias = np.fromfile(
101 |                 rf, dtype=np.float32, count=filters
102 |             )
103 |             if model in ['yolov4-csp', 'yolov4x-mish']:
104 |                 # https://arxiv.org/abs/1708.02002
105 |                 offset = np.where(np.array(nobn_layers) == i)[0][0]
106 |                 stride = STRIDES[offset]
107 |                 conv_bias = conv_bias.reshape(3, 85)
108 |                 conv_bias[:, 4] += math.log(8 / (640 / stride) ** 2)
109 |                 conv_bias[:, 5:] += math.log(0.6 / (80 - 0.99))
110 |                 conv_bias = conv_bias.reshape(-1)
111 |         # darknet shape (out_dim, in_dim, height, width)
112 |         conv_shape = (filters, in_dim, k_size, k_size)
113 |         conv_weights = np.fromfile(
114 |             rf, dtype=np.float32, count=np.product(conv_shape)
115 |         )
116 |         # tf shape (height, width, in_dim, out_dim)
117 |         conv_weights = conv_weights.reshape(conv_shape).transpose(
118 |             [2, 3, 1, 0]
119 |         )
120 |         if i not in nobn_layers:
121 |             assert norm_layer.__class__.__name__ == 'BatchNormalization'
122 |             conv_layer.set_weights([conv_weights])
123 |             norm_layer.set_weights(bn_weights)
124 |         else:
125 |             assert norm_layer.__class__.__name__ == 'function'
126 |             conv_layer.set_weights([conv_weights, conv_bias])
127 |     rest = len(rf.read())
128 |     assert rest == 0, f'failed to read all data: {rest}'
129 |     rf.close()
130 |     return
131 | 
132 | 
133 | def yolo_convert_tf_tflite(
134 |     model: str,
135 |     directory: str,
136 |     imgsize: List[int]
137 | ) -> None:
138 |     path_weights = f'{directory}/{model}.weights'
139 |     if not os.path.isfile(path_weights):
140 |         print(f'ERROR: {path_weights} not found')
141 |         return
142 |     # load model
143 |     model_keras = MODEL_CLASS[model](nc=NUM_CLASS)
144 |     model_keras.build(input_shape=(1, *imgsize, 3))
145 |     # dummy run
146 |     dummy_image_tf = tf.zeros((1, *imgsize, 3), dtype=tf.float32)  # NHWC
147 |     y = model_keras(dummy_image_tf)
148 |     for yy in y:
149 |         _ = yy.numpy()
150 |     # model_keras.summary()
151 |     # load weights
152 |     _load_darknet_weights(
153 |         model=model, path_weights=path_weights, model_keras=model_keras
154 |     )
155 |     # save as Frozen Graph
156 |     input_keras = tf.keras.Input(
157 |         shape=(*imgsize, 3), batch_size=1, dtype=tf.float32
158 |     )
159 |     save_frozen_graph(
160 |         path_pb=f'{directory}/{model}.pb',
161 |         model_keras=model_keras,
162 |         input_keras=input_keras
163 |     )
164 |     # convert TFLite model
165 |     path_tflite = f'{directory}/{model}_fp32.tflite'
166 |     convert_tflite_fp32(path_tflite=path_tflite, model_keras=model_keras)
167 |     path_tflite = f'{directory}/{model}_fp16.tflite'
168 |     convert_tflite_fp16(path_tflite=path_tflite, model_keras=model_keras)
169 |     path_tflite = f'{directory}/{model}_int8.tflite'
170 |     convert_tflite_int8(
171 |         path_tflite=path_tflite,
172 |         imgsize=imgsize,
173 |         model_keras=model_keras
174 |     )
175 |     return
176 | 


--------------------------------------------------------------------------------
/convertor/yolov5/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/convertor/yolov5/__init__.py


--------------------------------------------------------------------------------
/convertor/yolov5/convert_onnx_tf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import os
 4 | import onnx
 5 | from onnx_tf.backend import prepare
 6 | 
 7 | 
 8 | def yolov5_convert_onnx_tf(model: str, directory: str) -> None:
 9 |     path_onnx = f'{directory}/{model}.onnx'
10 |     if not os.path.isfile(path_onnx):
11 |         return
12 |     path_onnx_tf = f'{directory}/onnx_tf_{model}'
13 |     if os.path.isdir(path_onnx_tf):
14 |         return
15 |     model_onnx = onnx.load(path_onnx)
16 |     tf_rep = prepare(model_onnx)
17 |     tf_rep.export_graph(path_onnx_tf)
18 |     return
19 | 


--------------------------------------------------------------------------------
/convertor/yolov5/convert_onnx_vino.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from typing import List
 4 | import os
 5 | import subprocess
 6 | 
 7 | OPENVINO_VERSION = "2021"
 8 | 
 9 | 
10 | def yolov5_convert_onnx_vino(
11 |     model: str,
12 |     directory: str,
13 |     imgsize: List[int]
14 | ) -> None:
15 |     path_onnx = f'{directory}/{model}.onnx'
16 |     dir_vino = f'{directory}/onnx_vino_{model}'
17 |     if not os.path.isfile(path_onnx):
18 |         return
19 |     if os.path.isdir(dir_vino):
20 |         return
21 |     path_mo = f'/opt/intel/openvino_{OPENVINO_VERSION}/'
22 |     path_mo += 'deployment_tools/model_optimizer/mo.py'
23 |     options = list()
24 |     options.append(f'--input_model {path_onnx}')
25 |     options.append(f'--model_name {model}')
26 |     options.append(f'--output_dir {dir_vino}')
27 |     options.append('--data_type FP32')
28 |     options.append('--input images')
29 |     options.append(f'--input_shape [1,3,{imgsize[0]},{imgsize[1]}]')
30 |     cmd = ' '.join(['python', path_mo] + options)
31 |     cmd = cmd.split()
32 |     subprocess.run(cmd)
33 |     return
34 | 


--------------------------------------------------------------------------------
/convertor/yolov5/convert_tf_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- cofing:utf-8 -*-
 3 | import os
 4 | import subprocess
 5 | 
 6 | 
 7 | def yolov5_convert_tf_onnx(
 8 |     model: str,
 9 |     directory: str
10 | ) -> None:
11 |     path_pb = f'{directory}/{model}.pb'
12 |     path_onnx = f'{directory}/tf_{model}.onnx'
13 |     if not os.path.isfile(path_pb):
14 |         return
15 |     if os.path.isfile(path_onnx):
16 |         return
17 |     options = list()
18 |     options.append(f'--graphdef {path_pb}')
19 |     options.append(f'--output {path_onnx}')
20 |     options.append('--inputs x:0')
21 |     options.append('--outputs Identity:0')
22 |     options.append('--opset 12')
23 |     options.append('--inputs-as-nchw x:0')
24 |     cmd = ' '.join(['python -m tf2onnx.convert'] + options)
25 |     cmd = cmd.split()
26 |     subprocess.run(cmd)
27 |     return
28 | 


--------------------------------------------------------------------------------
/convertor/yolov5/convert_tf_tflite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | from typing import List, Dict
  4 | import os
  5 | import yaml
  6 | # Torch
  7 | import torch
  8 | # TensorFlow
  9 | import tensorflow as tf
 10 | # YOLO V5
 11 | from models.tf_yolov5 import tf_YoloV5, tf_Detect
 12 | from utils.convert_tflite import (
 13 |     save_frozen_graph,
 14 |     convert_tflite_fp32,
 15 |     convert_tflite_fp16,
 16 |     convert_tflite_int8,
 17 | )
 18 | 
 19 | 
 20 | def _convert_tf_keras_model(
 21 |     model: str,
 22 |     imgsize: List[int],
 23 |     model_torch: torch.nn.Module,
 24 |     nclasses: int,
 25 |     config: Dict
 26 | ) -> tf.keras.Model:
 27 |     model_tf = tf_YoloV5(
 28 |         model_torch=model_torch,
 29 |         nclasses=nclasses,
 30 |         config=config
 31 |     )
 32 |     m = model_tf.model.layers[-1]
 33 |     assert isinstance(m, tf_Detect), "the last layer must be Detect"
 34 |     m.training = False
 35 |     # dummy run and check output
 36 |     dummy_image_tf = tf.zeros((1, *imgsize, 3))  # NHWC
 37 |     y = model_tf.predict(dummy_image_tf)
 38 |     for yy in y:
 39 |         _ = yy.numpy()
 40 |     # create keras model
 41 |     inputs_keras = tf.keras.Input(
 42 |         shape=(*imgsize, 3), batch_size=1
 43 |     )
 44 |     outputs_keras = model_tf.predict(inputs=inputs_keras)
 45 |     model_keras = tf.keras.Model(
 46 |         inputs=inputs_keras,
 47 |         outputs=outputs_keras,
 48 |         name=model
 49 |     )
 50 |     # model_keras.summary()
 51 |     return model_keras
 52 | 
 53 | 
 54 | def yolov5_convert_tf_tflite(
 55 |     model: str,
 56 |     directory: str,
 57 |     imgsize: List[int]
 58 | ) -> None:
 59 |     path_weights = f'{directory}/{model}.pt'
 60 |     if not os.path.isfile(path_weights):
 61 |         print(f'ERROR: {path_weights} not found')
 62 |         return
 63 |     # dummy image
 64 |     dummy_image_torch = torch.zeros((1, 3, *imgsize))  # NCHW
 65 |     # Load PyTorch model
 66 |     model_torch = torch.load(
 67 |         path_weights,
 68 |         map_location='cpu'
 69 |     )['model'].float()  # .fuse()
 70 |     model_torch.eval()
 71 |     # export=True to export Detect Layer
 72 |     model_torch.model[-1].export = False
 73 |     # dry run
 74 |     y = model_torch(dummy_image_torch)
 75 |     # number of classes
 76 |     nclasses = y[0].shape[-1] - 5
 77 |     # load configuration for the model
 78 |     path_config = f'models/{model}.yaml'
 79 |     with open(path_config, 'rt') as rf:
 80 |         config = yaml.safe_load(rf)
 81 |     # TensorFlow Keras export
 82 |     model_keras = _convert_tf_keras_model(
 83 |         model=model,
 84 |         imgsize=imgsize,
 85 |         model_torch=model_torch,
 86 |         nclasses=nclasses,
 87 |         config=config
 88 |     )
 89 |     # save as Frozen Graph
 90 |     save_frozen_graph(
 91 |         path_pb=f'{directory}/{model}.pb', model_keras=model_keras
 92 |     )
 93 |     # convert TFLite model
 94 |     path_tflite = f'{directory}/{model}_fp32.tflite'
 95 |     convert_tflite_fp32(path_tflite=path_tflite, model_keras=model_keras)
 96 |     path_tflite = f'{directory}/{model}_fp16.tflite'
 97 |     convert_tflite_fp16(path_tflite=path_tflite, model_keras=model_keras)
 98 |     path_tflite = f'{directory}/{model}_int8.tflite'
 99 |     convert_tflite_int8(
100 |         path_tflite=path_tflite,
101 |         imgsize=imgsize,
102 |         model_keras=model_keras
103 |     )
104 |     return
105 | 


--------------------------------------------------------------------------------
/convertor/yolov5/convert_torch_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from typing import List
 4 | import os
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | import onnx
 9 | 
10 | 
11 | # export-friendly version of nn.SiLU()
12 | class SiLU(nn.Module):
13 |     @staticmethod
14 |     def forward(x):
15 |         return x * torch.sigmoid(x)
16 | 
17 | 
18 | # export-friendly version of nn.Hardswish()
19 | class Hardswish(nn.Module):
20 |     @staticmethod
21 |     def forward(x):
22 |         # for torchscript and CoreML
23 |         # return x * F.hardsigmoid(x)
24 |         # for torchscript, CoreML and ONNX
25 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.
26 | 
27 | 
28 | def yolov5_convert_torch_onnx(
29 |     model: str,
30 |     directory: str,
31 |     imgsize: List[int],
32 |     repo: str = 'ultralytics/yolov5:v4.0'
33 | ) -> None:
34 |     path_weight = f'{directory}/{model}.pt'
35 |     if not os.path.isfile(path_weight):
36 |         return
37 |     path_torch = f'{directory}/{model}.pth'
38 |     path_onnx = f'{directory}/{model}.onnx'
39 |     dummy_image = torch.zeros(1, 3, *imgsize)
40 |     model_torch = torch.hub.load(repo, model)
41 |     ckpt = torch.load(
42 |         path_weight, map_location='cpu'
43 |     )['model']
44 |     model_torch.load_state_dict(ckpt.state_dict())
45 |     model_torch.names = ckpt.names
46 |     # save state dict
47 |     if not os.path.isfile(path_torch):
48 |         torch.save(model_torch.state_dict(), path_torch)
49 |     if os.path.isfile(path_onnx):
50 |         return
51 |     model_torch_onnx = model_torch.fuse()
52 |     model_torch_onnx.eval()
53 |     for k, m in model_torch_onnx.named_modules():
54 |         m._non_persistent_buffers_set = set()
55 |         if m.__class__.__name__ == 'Conv':
56 |             if isinstance(m.act, nn.Hardswish):
57 |                 m.act = Hardswish()
58 |             elif isinstance(m.act, nn.SiLU):
59 |                 m.act = SiLU()
60 |     model_torch_onnx.model[-1].export = True
61 |     _ = model_torch_onnx(dummy_image)
62 |     print('Starting ONNX export with onnx %s...' % onnx.__version__)
63 |     try:
64 |         torch.onnx.export(
65 |             model_torch_onnx, dummy_image, path_onnx,
66 |             verbose=False,
67 |             opset_version=12,
68 |             input_names=['images'],
69 |             output_names=['output', 'output_1', 'output_2']
70 |         )
71 |         model_onnx = onnx.load(path_onnx)
72 |         onnx.checker.check_model(model_onnx)
73 |         print('ONNX export success: %s' % path_onnx)
74 |     except Exception as e:
75 |         print('ONNX export failure: %s' % e)
76 |     return
77 | 


--------------------------------------------------------------------------------
/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | annotations
2 | val2017
3 | sample_dataset
4 | 


--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
 1 | ## downdload COCO dataset and create small dataset
 2 | 
 3 | - download COCO dataset (val2017) and its annotations
 4 |     - `> ./download_coco_val2017.sh`
 5 | - create small dataset and convert annotations to json lines format
 6 |     - `> ./create_small_dataset.py`
 7 | 
 8 | ## usage
 9 | 
10 | ```
11 | usage: create_small_dataset.py [-h] [--number NUMBER] [--directory DIRECTORY]
12 | 
13 | create small dataset from COCO val2017 dataset
14 | 
15 | optional arguments:
16 |   -h, --help            show this help message and exit
17 |   --number NUMBER, -n NUMBER
18 |                         number of images (default: 10)
19 |   --directory DIRECTORY, -d DIRECTORY
20 |                         directory name (defalt: "sample_dataset")
21 | ```
22 | 


--------------------------------------------------------------------------------
/datasets/create_small_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import os
 4 | import glob
 5 | import shutil
 6 | from collections import defaultdict
 7 | import numpy as np
 8 | import simplejson as json
 9 | import argparse
10 | 
11 | 
12 | def create_dataset(number: int, directory: str) -> None:
13 |     # delete old dataset and create new dataset
14 |     if os.path.isdir(directory):
15 |         shutil.rmtree(directory)
16 |     os.makedirs(directory)
17 |     # select images randomly
18 |     fns = glob.glob('val2017/*')
19 |     fns = [
20 |         os.path.basename(x) for x in fns
21 |         if x.endswith(('.png', '.jpg'))
22 |     ]
23 |     np.random.shuffle(fns)
24 |     fns = sorted(fns[:number])
25 |     # convert COCO annotations to the format of `object_detection_metrics`
26 |     with open('annotations/instances_val2017.json', 'rt') as rf:
27 |         orig_anns = json.load(fp=rf)
28 |     bboxes = defaultdict(list)
29 |     for ann in orig_anns['annotations']:
30 |         image_id = '%012d' % ann['image_id']
31 |         bboxes[image_id].append({
32 |             'category_id': ann['category_id'],
33 |             'bbox': [float(x) for x in ann['bbox']],
34 |         })
35 |     # copy images and dump annotations
36 |     new_anns = list()
37 |     for fn in fns:
38 |         shutil.copy(f'val2017/{fn}', f'{directory}/{fn}')
39 |         image_id = os.path.splitext(fn)[0]
40 |         new_anns.append({
41 |             'image_id': image_id,
42 |             'bboxes': bboxes[image_id]
43 |         })
44 |     with open(f'{directory}/ground_truths.jsonl', 'wt') as wf:
45 |         for ann in new_anns:
46 |             wf.write(json.dumps(ann) + '\n')
47 |     return
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     if not (os.path.isdir('val2017') and os.path.isdir('annotations')):
52 |         raise SystemError('run `download_coco_val2017.sh` first')
53 |     parser = argparse.ArgumentParser(
54 |         description='create small dataset from COCO val2017 dataset'
55 |     )
56 |     parser.add_argument(
57 |         '--number', '-n', type=int, default=10,
58 |         help='number of images (default: 10)'
59 |     )
60 |     parser.add_argument(
61 |         '--directory', '-d', type=str, default='sample_dataset',
62 |         help='directory name (defalt: "sample_dataset")'
63 |     )
64 |     args = parser.parse_args()
65 |     create_dataset(**vars(args))
66 | 


--------------------------------------------------------------------------------
/datasets/download_coco_val2017.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ ! -d "val2017" ]; then
 3 |     if [ ! -f "val2017.zip" ]; then
 4 |         wget http://images.cocodataset.org/zips/val2017.zip
 5 |     fi
 6 |     unzip val2017.zip
 7 |     rm -f val2017.zip
 8 | fi
 9 | if [ ! -d "annotations" ]; then
10 |     if [ ! -f "annotations_trainval2017.zip" ]; then
11 |         wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
12 |     fi
13 |     unzip annotations_trainval2017.zip
14 |     rm -f annotations_trainval2017.zip
15 | fi
16 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import os
 4 | import argparse
 5 | from detector.base import Config
 6 | from detector.yolov5 import DetectorYoloV5
 7 | from detector.yolo import DetectorYolo
 8 | 
 9 | 
10 | def main(config: Config) -> None:
11 |     if config.model.startswith(('yolov3', 'yolov4')):
12 |         detector = DetectorYolo(config=config)
13 |     elif config.model.startswith('yolov5'):
14 |         detector = DetectorYoloV5(config=config)
15 |     else:
16 |         raise SystemError(f'model is incorrect ({config.model})')
17 |     detector.print_header()
18 |     for sess in detector.yield_session():
19 |         detector.inference(sess=sess)
20 |         detector.print_result(sess=sess)
21 |         detector.dump_result(sess=sess)
22 |         detector.dump_image(sess=sess)
23 |     detector.close()
24 |     return
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     parser = argparse.ArgumentParser(
29 |         description='detect objects from images'
30 |     )
31 |     parser.add_argument(
32 |         '-m', '--model', type=str, required=True, choices=[
33 |             'yolov3-tiny', 'yolov3', 'yolov3-spp',
34 |             'yolov4-tiny', 'yolov4', 'yolov4-csp', 'yolov4x-mish',
35 |             'yolov5s', 'yolov5m', 'yolov5l', 'yolov5x',
36 |         ], help='model name'
37 |     )
38 |     parser.add_argument(
39 |         '-f', '--framework', type=str, required=True, choices=[
40 |             'torch', 'torch_onnx', 'onnx_vino', 'onnx_tf',
41 |             'tf', 'tflite', 'tf_onnx'
42 |         ], help='framework'
43 |     )
44 |     parser.add_argument(
45 |         '-q', '--quantize', type=str, default='fp32', choices=[
46 |             'fp32', 'fp16', 'int8'
47 |         ], help='quantization mode (TensorFlow Lite only)'
48 |     )
49 |     parser.add_argument(
50 |         '-d', '--image-dir', type=str, required=True,
51 |         help='directory contains images to detect objects'
52 |     )
53 |     parser.add_argument(
54 |         '-c', '--conf-threshold', type=float, default=0.3,
55 |         help='threshold of confidence score to adopt bounding boxes'
56 |     )
57 |     parser.add_argument(
58 |         '-i', '--iou-threshold', type=float, default=0.45,
59 |         help='threshold of IoU to eliminte bounding boxes in NMS'
60 |     )
61 |     parser.add_argument(
62 |         '--clarify-image', action='store_true',
63 |         help='image preprocessing'
64 |     )
65 |     parser.add_argument(
66 |         '--use-superres', action='store_true',
67 |         help='use Super-Resolution at image preprocessing'
68 |     )
69 |     parser.add_argument(
70 |         '--disable-soft-nms', action='store_true',
71 |         help='use hard-NMS instead of soft-NMS'
72 |     )
73 |     args = parser.parse_args()
74 |     if not os.path.isdir(args.image_dir):
75 |         raise ValueError(
76 |             f'image directory not found ({args.image_dir})'
77 |         )
78 |     if (args.conf_threshold < 0.0) or (args.conf_threshold >= 1.0):
79 |         raise ValueError(
80 |             f'confidence threshold is incorrect ({args.conf_threshold})'
81 |         )
82 |     if (args.iou_threshold < 0.0) or (args.iou_threshold >= 1.0):
83 |         raise ValueError(
84 |             f'IoU threshold is incorrect ({args.iou_threshold})'
85 |         )
86 |     config = Config(**vars(args))
87 |     main(config=config)
88 | 


--------------------------------------------------------------------------------
/detect_all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ $# != 1 ]; then
 3 |     echo "Usage: $0 [dir]"
 4 |     exit 1
 5 | fi
 6 | dir=$1
 7 | if [ ! -d ${dir} ]; then
 8 |     echo "${dir} not found"
 9 |     exit 1
10 | fi
11 | datanames=(${dir//\// })
12 | dataname=${datanames[${#datanames[@]}-1]}
13 | models=(
14 |     "yolov3-tiny" "yolov3" "yolov3-spp" "yolov4-tiny" "yolov4"
15 | )
16 | frames=(
17 |     "tf" "tf_onnx"
18 | )
19 | modelsv5=(
20 |     "yolov5s" "yolov5m" "yolov5l" "yolov5x"
21 | )
22 | framesv5=(
23 |     "torch" "torch_onnx" "onnx_vino" "onnx_tf" "tf" "tf_onnx"
24 | )
25 | quants=(
26 |     "fp32" "fp16"
27 | )
28 | for frame in ${frames[@]} ; do
29 |     for model in ${models[@]} ; do
30 |         rdir="results/${dataname}/${model}_${frame}"
31 |         if [ ! -d ${rdir} ] ; then
32 |             ./detect.py -m ${model} -f ${frame} -d ${dir}
33 |         fi
34 |     done
35 | done
36 | for quant in ${quants[@]} ; do
37 |     for model in ${models[@]} ; do
38 |         rdir="results/${dataname}/${model}_tflite_${quant}"
39 |         if [ ! -d ${rdir} ] ; then
40 |             ./detect.py -m ${model} -f tflite -q ${quant} -d ${dir}
41 |         fi
42 |     done
43 | done
44 | for frame in ${framesv5[@]} ; do
45 |     for model in ${modelsv5[@]} ; do
46 |         rdir="results/${dataname}/${model}_${frame}"
47 |         if [ ! -d ${rdir} ] ; then
48 |             ./detect.py -m ${model} -f ${frame} -d ${dir}
49 |         fi
50 |     done
51 | done
52 | for quant in ${quants[@]} ; do
53 |     for model in ${modelsv5[@]} ; do
54 |         rdir="results/${dataname}/${model}_tflite_${quant}"
55 |         if [ ! -d ${rdir} ] ; then
56 |             ./detect.py -m ${model} -f tflite -q ${quant} -d ${dir}
57 |         fi
58 |     done
59 | done
60 | 


--------------------------------------------------------------------------------
/detector/.gitignore:
--------------------------------------------------------------------------------
1 | TakaoGothic.ttf
2 | 


--------------------------------------------------------------------------------
/detector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/detector/__init__.py


--------------------------------------------------------------------------------
/download_font.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -f TakaoGothic.ttf ]; then
 3 |     exit
 4 | fi
 5 | wget https://launchpad.net/takao-fonts/trunk/15.03/+download/TakaoFonts_00303.01.zip
 6 | unzip TakaoFonts_00303.01.zip
 7 | mv TakaoFonts_00303.01/TakaoGothic.ttf .
 8 | rm -rf TakaoFonts_00303.01
 9 | rm -f TakaoFonts_00303.01.zip
10 | 


--------------------------------------------------------------------------------
/download_superres.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ ! -d "superres" ]; then
 4 |     mkdir superres
 5 | fi
 6 | # if [ ! -f "superres/LapSRN_x8.pb" ]; then
 7 | #     wget -P superres https://github.com/fannymonori/TF-LapSRN/raw/master/export/LapSRN_x8.pb
 8 | # fi
 9 | if [ ! -f "superres/ESPCN_x4.pb" ]; then
10 |     wget -P superres https://github.com/fannymonori/TF-ESPCN/raw/master/export/ESPCN_x4.pb
11 | fi
12 | 


--------------------------------------------------------------------------------
/evaluate_all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ $# != 1 ]; then
 3 |     echo "Usage: $0 [dir]"
 4 |     exit 1
 5 | fi
 6 | dir=$1
 7 | if [ ! -d ${dir} ]; then
 8 |     echo "${dir} not found"
 9 |     exit 1
10 | fi
11 | datanames=(${dir//\// })
12 | dataname=${datanames[${#datanames[@]}-1]}
13 | models=(
14 |     "yolov3-tiny" "yolov3" "yolov3-spp" "yolov4-tiny" "yolov4"
15 | )
16 | frames=(
17 |     "tf" "tf_onnx"
18 | )
19 | modelsv5=(
20 |     "yolov5s" "yolov5m" "yolov5l" "yolov5x"
21 | )
22 | framesv5=(
23 |     "torch" "torch_onnx" "onnx_vino" "onnx_tf" "tf" "tf_onnx"
24 | )
25 | quants=(
26 |     "fp32" "fp16"
27 | )
28 | for frame in ${frames[@]} ; do
29 |     for model in ${models[@]} ; do
30 |         object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_${frame}/predictions.jsonl
31 |     done
32 | done
33 | for quant in ${quants[@]} ; do
34 |     for model in ${models[@]} ; do
35 |         object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_tflite_${quant}/predictions.jsonl
36 |     done
37 | done
38 | for frame in ${framesv5[@]} ; do
39 |     for model in ${modelsv5[@]} ; do
40 |         object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_${frame}/predictions.jsonl
41 |     done
42 | done
43 | for quant in ${quants[@]} ; do
44 |     for model in ${modelsv5[@]} ; do
45 |         object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_tflite_${quant}/predictions.jsonl
46 |     done
47 | done
48 | 


--------------------------------------------------------------------------------
/ipynb/map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/ipynb/map.png


--------------------------------------------------------------------------------
/ipynb/time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/ipynb/time.png


--------------------------------------------------------------------------------
/ipynb/time_vs_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/ipynb/time_vs_map.png


--------------------------------------------------------------------------------
/labels/coco_labels.txt:
--------------------------------------------------------------------------------
 1 | 0  person
 2 | 1  bicycle
 3 | 2  car
 4 | 3  motorcycle
 5 | 4  airplane
 6 | 5  bus
 7 | 6  train
 8 | 7  truck
 9 | 8  boat
10 | 9  traffic light
11 | 10  fire hydrant
12 | 12  stop sign
13 | 13  parking meter
14 | 14  bench
15 | 15  bird
16 | 16  cat
17 | 17  dog
18 | 18  horse
19 | 19  sheep
20 | 20  cow
21 | 21  elephant
22 | 22  bear
23 | 23  zebra
24 | 24  giraffe
25 | 26  backpack
26 | 27  umbrella
27 | 30  handbag
28 | 31  tie
29 | 32  suitcase
30 | 33  frisbee
31 | 34  skis
32 | 35  snowboard
33 | 36  sports ball
34 | 37  kite
35 | 38  baseball bat
36 | 39  baseball glove
37 | 40  skateboard
38 | 41  surfboard
39 | 42  tennis racket
40 | 43  bottle
41 | 45  wine glass
42 | 46  cup
43 | 47  fork
44 | 48  knife
45 | 49  spoon
46 | 50  bowl
47 | 51  banana
48 | 52  apple
49 | 53  sandwich
50 | 54  orange
51 | 55  broccoli
52 | 56  carrot
53 | 57  hot dog
54 | 58  pizza
55 | 59  donut
56 | 60  cake
57 | 61  chair
58 | 62  couch
59 | 63  potted plant
60 | 64  bed
61 | 66  dining table
62 | 69  toilet
63 | 71  tv
64 | 72  laptop
65 | 73  mouse
66 | 74  remote
67 | 75  keyboard
68 | 76  cell phone
69 | 77  microwave
70 | 78  oven
71 | 79  toaster
72 | 80  sink
73 | 81  refrigerator
74 | 83  book
75 | 84  clock
76 | 85  vase
77 | 86  scissors
78 | 87  teddy bear
79 | 88  hair drier
80 | 89  toothbrush


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/models/__init__.py


--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # This file contains experimental modules
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from models.common import Conv, DWConv
  8 | from utils.google_utils import attempt_download
  9 | 
 10 | 
 11 | class CrossConv(nn.Module):
 12 |     # Cross Convolution Downsample
 13 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 14 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 15 |         super(CrossConv, self).__init__()
 16 |         c_ = int(c2 * e)  # hidden channels
 17 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 18 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 19 |         self.add = shortcut and c1 == c2
 20 | 
 21 |     def forward(self, x):
 22 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 23 | 
 24 | 
 25 | class Sum(nn.Module):
 26 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 27 |     def __init__(self, n, weight=False):  # n: number of inputs
 28 |         super(Sum, self).__init__()
 29 |         self.weight = weight  # apply weights boolean
 30 |         self.iter = range(n - 1)  # iter object
 31 |         if weight:
 32 |             self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
 33 | 
 34 |     def forward(self, x):
 35 |         y = x[0]  # no weight
 36 |         if self.weight:
 37 |             w = torch.sigmoid(self.w) * 2
 38 |             for i in self.iter:
 39 |                 y = y + x[i + 1] * w[i]
 40 |         else:
 41 |             for i in self.iter:
 42 |                 y = y + x[i + 1]
 43 |         return y
 44 | 
 45 | 
 46 | class GhostConv(nn.Module):
 47 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
 48 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
 49 |         super(GhostConv, self).__init__()
 50 |         c_ = c2 // 2  # hidden channels
 51 |         self.cv1 = Conv(c1, c_, k, s, None, g, act)
 52 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
 53 | 
 54 |     def forward(self, x):
 55 |         y = self.cv1(x)
 56 |         return torch.cat([y, self.cv2(y)], 1)
 57 | 
 58 | 
 59 | class GhostBottleneck(nn.Module):
 60 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
 61 |     def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
 62 |         super(GhostBottleneck, self).__init__()
 63 |         c_ = c2 // 2
 64 |         self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
 65 |                                   DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
 66 |                                   GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
 67 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
 68 |                                       Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
 69 | 
 70 |     def forward(self, x):
 71 |         return self.conv(x) + self.shortcut(x)
 72 | 
 73 | 
 74 | class MixConv2d(nn.Module):
 75 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
 76 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 77 |         super(MixConv2d, self).__init__()
 78 |         groups = len(k)
 79 |         if equal_ch:  # equal c_ per group
 80 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
 81 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
 82 |         else:  # equal weight.numel() per group
 83 |             b = [c2] + [0] * groups
 84 |             a = np.eye(groups + 1, groups, k=-1)
 85 |             a -= np.roll(a, 1, axis=1)
 86 |             a *= np.array(k) ** 2
 87 |             a[0] = 1
 88 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
 89 | 
 90 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
 91 |         self.bn = nn.BatchNorm2d(c2)
 92 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 93 | 
 94 |     def forward(self, x):
 95 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 96 | 
 97 | 
 98 | class Ensemble(nn.ModuleList):
 99 |     # Ensemble of models
100 |     def __init__(self):
101 |         super(Ensemble, self).__init__()
102 | 
103 |     def forward(self, x, augment=False):
104 |         y = []
105 |         for module in self:
106 |             y.append(module(x, augment)[0])
107 |         # y = torch.stack(y).max(0)[0]  # max ensemble
108 |         # y = torch.stack(y).mean(0)  # mean ensemble
109 |         y = torch.cat(y, 1)  # nms ensemble
110 |         return y, None  # inference, train output
111 | 
112 | 
113 | def attempt_load(weights, map_location=None):
114 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
115 |     model = Ensemble()
116 |     for w in weights if isinstance(weights, list) else [weights]:
117 |         attempt_download(w)
118 |         model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval())  # load FP32 model
119 | 
120 |     # Compatibility updates
121 |     for m in model.modules():
122 |         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
123 |             m.inplace = True  # pytorch 1.7.0 compatibility
124 |         elif type(m) is Conv:
125 |             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
126 | 
127 |     if len(model) == 1:
128 |         return model[-1]  # return model
129 |     else:
130 |         print('Ensemble created with %s\n' % weights)
131 |         for k in ['names', 'stride']:
132 |             setattr(model, k, getattr(model[-1], k))
133 |         return model  # return ensemble
134 | 


--------------------------------------------------------------------------------
/models/export.py:
--------------------------------------------------------------------------------
 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
 2 | 
 3 | Usage:
 4 |     $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
 5 | """
 6 | 
 7 | import argparse
 8 | import sys
 9 | import time
10 | 
11 | sys.path.append('./')  # to run '$ python *.py' files in subdirectories
12 | 
13 | import torch
14 | import torch.nn as nn
15 | 
16 | import models
17 | from models.experimental import attempt_load
18 | from utils.activations import Hardswish, SiLU
19 | from utils.general import set_logging, check_img_size
20 | 
21 | if __name__ == '__main__':
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
24 |     parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
25 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
26 |     opt = parser.parse_args()
27 |     opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
28 |     print(opt)
29 |     set_logging()
30 |     t = time.time()
31 | 
32 |     # Load PyTorch model
33 |     model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model
34 |     labels = model.names
35 | 
36 |     # Checks
37 |     gs = int(max(model.stride))  # grid size (max stride)
38 |     opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
39 | 
40 |     # Input
41 |     img = torch.zeros(opt.batch_size, 3, *opt.img_size)  # image size(1,3,320,192) iDetection
42 | 
43 |     # Update model
44 |     for k, m in model.named_modules():
45 |         m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
46 |         if isinstance(m, models.common.Conv):  # assign export-friendly activations
47 |             if isinstance(m.act, nn.Hardswish):
48 |                 m.act = Hardswish()
49 |             elif isinstance(m.act, nn.SiLU):
50 |                 m.act = SiLU()
51 |         # elif isinstance(m, models.yolo.Detect):
52 |         #     m.forward = m.forward_export  # assign forward (optional)
53 |     model.model[-1].export = True  # set Detect() layer export=True
54 |     y = model(img)  # dry run
55 | 
56 |     # TorchScript export
57 |     try:
58 |         print('\nStarting TorchScript export with torch %s...' % torch.__version__)
59 |         f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
60 |         ts = torch.jit.trace(model, img)
61 |         ts.save(f)
62 |         print('TorchScript export success, saved as %s' % f)
63 |     except Exception as e:
64 |         print('TorchScript export failure: %s' % e)
65 | 
66 |     # ONNX export
67 |     try:
68 |         import onnx
69 | 
70 |         print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
71 |         f = opt.weights.replace('.pt', '.onnx')  # filename
72 |         torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
73 |                           output_names=['classes', 'boxes'] if y is None else ['output'])
74 | 
75 |         # Checks
76 |         onnx_model = onnx.load(f)  # load onnx model
77 |         onnx.checker.check_model(onnx_model)  # check onnx model
78 |         # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
79 |         print('ONNX export success, saved as %s' % f)
80 |     except Exception as e:
81 |         print('ONNX export failure: %s' % e)
82 | 
83 |     # CoreML export
84 |     try:
85 |         import coremltools as ct
86 | 
87 |         print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88 |         # convert model from torchscript and apply pixel scaling as per detect.py
89 |         model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90 |         f = opt.weights.replace('.pt', '.mlmodel')  # filename
91 |         model.save(f)
92 |         print('CoreML export success, saved as %s' % f)
93 |     except Exception as e:
94 |         print('CoreML export failure: %s' % e)
95 | 
96 |     # Finish
97 |     print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
98 | 


--------------------------------------------------------------------------------
/models/yolo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import sys
  4 | from copy import deepcopy
  5 | 
  6 | sys.path.append('./')  # to run '$ python *.py' files in subdirectories
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | from models.common import *
 10 | from models.experimental import *
 11 | from utils.autoanchor import check_anchor_order
 12 | from utils.general import make_divisible, check_file, set_logging
 13 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
 14 |     select_device, copy_attr
 15 | 
 16 | try:
 17 |     import thop  # for FLOPS computation
 18 | except ImportError:
 19 |     thop = None
 20 | 
 21 | 
 22 | class Detect(nn.Module):
 23 |     stride = None  # strides computed during build
 24 |     export = False  # onnx export
 25 | 
 26 |     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
 27 |         super(Detect, self).__init__()
 28 |         self.nc = nc  # number of classes
 29 |         self.no = nc + 5  # number of outputs per anchor
 30 |         self.nl = len(anchors)  # number of detection layers
 31 |         self.na = len(anchors[0]) // 2  # number of anchors
 32 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
 33 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
 34 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
 35 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
 36 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
 37 | 
 38 |     def forward(self, x):
 39 |         # x = x.copy()  # for profiling
 40 |         z = []  # inference output
 41 |         self.training |= self.export
 42 |         for i in range(self.nl):
 43 |             x[i] = self.m[i](x[i])  # conv
 44 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 45 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 46 | 
 47 |             if not self.training:  # inference
 48 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
 49 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 50 | 
 51 |                 y = x[i].sigmoid()
 52 |                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
 53 |                 y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
 54 |                 z.append(y.view(bs, -1, self.no))
 55 | 
 56 |         return x if self.training else (torch.cat(z, 1), x)
 57 | 
 58 |     @staticmethod
 59 |     def _make_grid(nx=20, ny=20):
 60 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
 61 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 62 | 
 63 | 
 64 | class Model(nn.Module):
 65 |     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None):  # model, input channels, number of classes
 66 |         super(Model, self).__init__()
 67 |         if isinstance(cfg, dict):
 68 |             self.yaml = cfg  # model dict
 69 |         else:  # is *.yaml
 70 |             import yaml  # for torch hub
 71 |             self.yaml_file = Path(cfg).name
 72 |             with open(cfg) as f:
 73 |                 self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
 74 | 
 75 |         # Define model
 76 |         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
 77 |         if nc and nc != self.yaml['nc']:
 78 |             logger.info('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc))
 79 |             self.yaml['nc'] = nc  # override yaml value
 80 |         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
 81 |         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
 82 |         # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
 83 | 
 84 |         # Build strides, anchors
 85 |         m = self.model[-1]  # Detect()
 86 |         if isinstance(m, Detect):
 87 |             s = 256  # 2x min stride
 88 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
 89 |             m.anchors /= m.stride.view(-1, 1, 1)
 90 |             check_anchor_order(m)
 91 |             self.stride = m.stride
 92 |             self._initialize_biases()  # only run once
 93 |             # print('Strides: %s' % m.stride.tolist())
 94 | 
 95 |         # Init weights, biases
 96 |         initialize_weights(self)
 97 |         self.info()
 98 |         logger.info('')
 99 | 
100 |     def forward(self, x, augment=False, profile=False):
101 |         if augment:
102 |             img_size = x.shape[-2:]  # height, width
103 |             s = [1, 0.83, 0.67]  # scales
104 |             f = [None, 3, None]  # flips (2-ud, 3-lr)
105 |             y = []  # outputs
106 |             for si, fi in zip(s, f):
107 |                 xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
108 |                 yi = self.forward_once(xi)[0]  # forward
109 |                 # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
110 |                 yi[..., :4] /= si  # de-scale
111 |                 if fi == 2:
112 |                     yi[..., 1] = img_size[0] - 1 - yi[..., 1]  # de-flip ud
113 |                 elif fi == 3:
114 |                     yi[..., 0] = img_size[1] - 1 - yi[..., 0]  # de-flip lr
115 |                 y.append(yi)
116 |             return torch.cat(y, 1), None  # augmented inference, train
117 |         else:
118 |             return self.forward_once(x, profile)  # single-scale inference, train
119 | 
120 |     def forward_once(self, x, profile=False):
121 |         y, dt = [], []  # outputs
122 |         for m in self.model:
123 |             if m.f != -1:  # if not from previous layer
124 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
125 | 
126 |             if profile:
127 |                 o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
128 |                 t = time_synchronized()
129 |                 for _ in range(10):
130 |                     _ = m(x)
131 |                 dt.append((time_synchronized() - t) * 100)
132 |                 print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
133 | 
134 |             x = m(x)  # run
135 |             y.append(x if m.i in self.save else None)  # save output
136 | 
137 |         if profile:
138 |             print('%.1fms total' % sum(dt))
139 |         return x
140 | 
141 |     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
142 |         # https://arxiv.org/abs/1708.02002 section 3.3
143 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
144 |         m = self.model[-1]  # Detect() module
145 |         for mi, s in zip(m.m, m.stride):  # from
146 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
147 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
148 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
149 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
150 | 
151 |     def _print_biases(self):
152 |         m = self.model[-1]  # Detect() module
153 |         for mi in m.m:  # from
154 |             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
155 |             print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
156 | 
157 |     # def _print_weights(self):
158 |     #     for m in self.model.modules():
159 |     #         if type(m) is Bottleneck:
160 |     #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
161 | 
162 |     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
163 |         print('Fusing layers... ')
164 |         for m in self.model.modules():
165 |             if type(m) is Conv and hasattr(m, 'bn'):
166 |                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
167 |                 delattr(m, 'bn')  # remove batchnorm
168 |                 m.forward = m.fuseforward  # update forward
169 |         self.info()
170 |         return self
171 | 
172 |     def nms(self, mode=True):  # add or remove NMS module
173 |         present = type(self.model[-1]) is NMS  # last layer is NMS
174 |         if mode and not present:
175 |             print('Adding NMS... ')
176 |             m = NMS()  # module
177 |             m.f = -1  # from
178 |             m.i = self.model[-1].i + 1  # index
179 |             self.model.add_module(name='%s' % m.i, module=m)  # add
180 |             self.eval()
181 |         elif not mode and present:
182 |             print('Removing NMS... ')
183 |             self.model = self.model[:-1]  # remove
184 |         return self
185 | 
186 |     def autoshape(self):  # add autoShape module
187 |         print('Adding autoShape... ')
188 |         m = autoShape(self)  # wrap model
189 |         copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
190 |         return m
191 | 
192 |     def info(self, verbose=False, img_size=640):  # print model information
193 |         model_info(self, verbose, img_size)
194 | 
195 | 
196 | def parse_model(d, ch):  # model_dict, input_channels(3)
197 |     logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
198 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
199 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
200 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
201 | 
202 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
203 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
204 |         m = eval(m) if isinstance(m, str) else m  # eval strings
205 |         for j, a in enumerate(args):
206 |             try:
207 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
208 |             except:
209 |                 pass
210 | 
211 |         n = max(round(n * gd), 1) if n > 1 else n  # depth gain
212 |         if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
213 |                  C3]:
214 |             c1, c2 = ch[f], args[0]
215 |             if c2 != no:  # if not output
216 |                 c2 = make_divisible(c2 * gw, 8)
217 | 
218 |             args = [c1, c2, *args[1:]]
219 |             if m in [BottleneckCSP, C3]:
220 |                 args.insert(2, n)  # number of repeats
221 |                 n = 1
222 |         elif m is nn.BatchNorm2d:
223 |             args = [ch[f]]
224 |         elif m is Concat:
225 |             c2 = sum([ch[x] for x in f])
226 |         elif m is Detect:
227 |             args.append([ch[x] for x in f])
228 |             if isinstance(args[1], int):  # number of anchors
229 |                 args[1] = [list(range(args[1] * 2))] * len(f)
230 |         elif m is Contract:
231 |             c2 = ch[f] * args[0] ** 2
232 |         elif m is Expand:
233 |             c2 = ch[f] // args[0] ** 2
234 |         else:
235 |             c2 = ch[f]
236 | 
237 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
238 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
239 |         np = sum([x.numel() for x in m_.parameters()])  # number params
240 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
241 |         logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
242 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
243 |         layers.append(m_)
244 |         if i == 0:
245 |             ch = []
246 |         ch.append(c2)
247 |     return nn.Sequential(*layers), sorted(save)
248 | 
249 | 
250 | if __name__ == '__main__':
251 |     parser = argparse.ArgumentParser()
252 |     parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
253 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
254 |     opt = parser.parse_args()
255 |     opt.cfg = check_file(opt.cfg)  # check file
256 |     set_logging()
257 |     device = select_device(opt.device)
258 | 
259 |     # Create model
260 |     model = Model(opt.cfg).to(device)
261 |     model.train()
262 | 
263 |     # Profile
264 |     # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
265 |     # y = model(img, profile=True)
266 | 
267 |     # Tensorboard
268 |     # from torch.utils.tensorboard import SummaryWriter
269 |     # tb_writer = SummaryWriter()
270 |     # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
271 |     # tb_writer.add_graph(model.model, img)  # add model to tensorboard
272 |     # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
273 | 


--------------------------------------------------------------------------------
/models/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, C3, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.67  # model depth multiple
 4 | width_multiple: 0.75  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, C3, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, C3, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5x.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.33  # model depth multiple
 4 | width_multiple: 1.25  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, C3, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | matplotlib
 3 | requests
 4 | opencv-contrib-python
 5 | Pillow
 6 | torch
 7 | torchvision
 8 | tensorflow
 9 | tensorflow-addons
10 | onnx
11 | onnxruntime
12 | onnx-tf
13 | tf2onnx
14 | PyYAML
15 | simplejson
16 | tqdm
17 | 


--------------------------------------------------------------------------------
/tools/convert_yolo_config_csv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from collections import OrderedDict
 4 | import configparser
 5 | import os
 6 | import pandas as pd
 7 | import argparse
 8 | 
 9 | 
10 | class LayerDict(OrderedDict):
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.seq = 0
14 | 
15 |     def __setitem__(self, key, val):
16 |         if key.startswith('#'):
17 |             return
18 |         if isinstance(val, dict):
19 |             if key == 'net':
20 |                 return
21 |             self.seq += 1
22 |             key = f'{self.seq}:{key}'
23 |         super().__setitem__(key, val)
24 |         return
25 | 
26 | 
27 | def main(config: str) -> None:
28 |     if not os.path.isfile(config):
29 |         raise ValueError(f'config({config}) is not exists')
30 |     cfg = configparser.ConfigParser(
31 |         defaults=None,
32 |         dict_type=LayerDict,
33 |         strict=False,
34 |         empty_lines_in_values=False,
35 |         comment_prefixes=(';', '#'),
36 |         allow_no_value=True
37 |     )
38 |     cfg.read(config)
39 |     layers = list()
40 |     convs = 0
41 |     for i, section in enumerate(cfg.sections()):
42 |         layer_type = section.split(':')[1]
43 |         layer = dict(cfg.items(section))
44 |         layer['no.'] = i
45 |         layer['type'] = layer_type
46 |         if layer_type == 'convolutional':
47 |             layer['convs'] = convs
48 |             convs += 1
49 |         layers.append(layer)
50 |     df = pd.DataFrame(layers)
51 |     needed_columns = [
52 |         'type', 'convs', 'filters', 'size', 'stride', 'pad',
53 |         'activation', 'batch_normalize',
54 |         'no.', 'from', 'layers'
55 |     ]
56 |     for nc in needed_columns:
57 |         if nc not in list(df.columns):
58 |             df[nc] = [None] * df.shape[0]
59 |     df = df[needed_columns]
60 |     csv = config.replace('.cfg', '.csv')
61 |     df.to_csv(csv, encoding='utf_8_sig', index=False)
62 |     return
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     parser = argparse.ArgumentParser(
67 |         description='convert YOLO config file(.cfg) to csv (utf-8 BOM)'
68 |     )
69 |     parser.add_argument(
70 |         'config', type=str, help='YOLO config file'
71 |     )
72 |     args = parser.parse_args()
73 |     main(**vars(args))
74 | 


--------------------------------------------------------------------------------
/tools/yolov3-spp.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 
823 | 


--------------------------------------------------------------------------------
/tools/yolov3-spp.csv:
--------------------------------------------------------------------------------
  1 | ﻿type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers
  2 | convolutional,0.0,32,3,1,1,leaky,1,0,,
  3 | convolutional,1.0,64,3,2,1,leaky,1,1,,
  4 | convolutional,2.0,32,1,1,1,leaky,1,2,,
  5 | convolutional,3.0,64,3,1,1,leaky,1,3,,
  6 | shortcut,,,,,,linear,,4,-3,
  7 | convolutional,4.0,128,3,2,1,leaky,1,5,,
  8 | convolutional,5.0,64,1,1,1,leaky,1,6,,
  9 | convolutional,6.0,128,3,1,1,leaky,1,7,,
 10 | shortcut,,,,,,linear,,8,-3,
 11 | convolutional,7.0,64,1,1,1,leaky,1,9,,
 12 | convolutional,8.0,128,3,1,1,leaky,1,10,,
 13 | shortcut,,,,,,linear,,11,-3,
 14 | convolutional,9.0,256,3,2,1,leaky,1,12,,
 15 | convolutional,10.0,128,1,1,1,leaky,1,13,,
 16 | convolutional,11.0,256,3,1,1,leaky,1,14,,
 17 | shortcut,,,,,,linear,,15,-3,
 18 | convolutional,12.0,128,1,1,1,leaky,1,16,,
 19 | convolutional,13.0,256,3,1,1,leaky,1,17,,
 20 | shortcut,,,,,,linear,,18,-3,
 21 | convolutional,14.0,128,1,1,1,leaky,1,19,,
 22 | convolutional,15.0,256,3,1,1,leaky,1,20,,
 23 | shortcut,,,,,,linear,,21,-3,
 24 | convolutional,16.0,128,1,1,1,leaky,1,22,,
 25 | convolutional,17.0,256,3,1,1,leaky,1,23,,
 26 | shortcut,,,,,,linear,,24,-3,
 27 | convolutional,18.0,128,1,1,1,leaky,1,25,,
 28 | convolutional,19.0,256,3,1,1,leaky,1,26,,
 29 | shortcut,,,,,,linear,,27,-3,
 30 | convolutional,20.0,128,1,1,1,leaky,1,28,,
 31 | convolutional,21.0,256,3,1,1,leaky,1,29,,
 32 | shortcut,,,,,,linear,,30,-3,
 33 | convolutional,22.0,128,1,1,1,leaky,1,31,,
 34 | convolutional,23.0,256,3,1,1,leaky,1,32,,
 35 | shortcut,,,,,,linear,,33,-3,
 36 | convolutional,24.0,128,1,1,1,leaky,1,34,,
 37 | convolutional,25.0,256,3,1,1,leaky,1,35,,
 38 | shortcut,,,,,,linear,,36,-3,
 39 | convolutional,26.0,512,3,2,1,leaky,1,37,,
 40 | convolutional,27.0,256,1,1,1,leaky,1,38,,
 41 | convolutional,28.0,512,3,1,1,leaky,1,39,,
 42 | shortcut,,,,,,linear,,40,-3,
 43 | convolutional,29.0,256,1,1,1,leaky,1,41,,
 44 | convolutional,30.0,512,3,1,1,leaky,1,42,,
 45 | shortcut,,,,,,linear,,43,-3,
 46 | convolutional,31.0,256,1,1,1,leaky,1,44,,
 47 | convolutional,32.0,512,3,1,1,leaky,1,45,,
 48 | shortcut,,,,,,linear,,46,-3,
 49 | convolutional,33.0,256,1,1,1,leaky,1,47,,
 50 | convolutional,34.0,512,3,1,1,leaky,1,48,,
 51 | shortcut,,,,,,linear,,49,-3,
 52 | convolutional,35.0,256,1,1,1,leaky,1,50,,
 53 | convolutional,36.0,512,3,1,1,leaky,1,51,,
 54 | shortcut,,,,,,linear,,52,-3,
 55 | convolutional,37.0,256,1,1,1,leaky,1,53,,
 56 | convolutional,38.0,512,3,1,1,leaky,1,54,,
 57 | shortcut,,,,,,linear,,55,-3,
 58 | convolutional,39.0,256,1,1,1,leaky,1,56,,
 59 | convolutional,40.0,512,3,1,1,leaky,1,57,,
 60 | shortcut,,,,,,linear,,58,-3,
 61 | convolutional,41.0,256,1,1,1,leaky,1,59,,
 62 | convolutional,42.0,512,3,1,1,leaky,1,60,,
 63 | shortcut,,,,,,linear,,61,-3,
 64 | convolutional,43.0,1024,3,2,1,leaky,1,62,,
 65 | convolutional,44.0,512,1,1,1,leaky,1,63,,
 66 | convolutional,45.0,1024,3,1,1,leaky,1,64,,
 67 | shortcut,,,,,,linear,,65,-3,
 68 | convolutional,46.0,512,1,1,1,leaky,1,66,,
 69 | convolutional,47.0,1024,3,1,1,leaky,1,67,,
 70 | shortcut,,,,,,linear,,68,-3,
 71 | convolutional,48.0,512,1,1,1,leaky,1,69,,
 72 | convolutional,49.0,1024,3,1,1,leaky,1,70,,
 73 | shortcut,,,,,,linear,,71,-3,
 74 | convolutional,50.0,512,1,1,1,leaky,1,72,,
 75 | convolutional,51.0,1024,3,1,1,leaky,1,73,,
 76 | shortcut,,,,,,linear,,74,-3,
 77 | convolutional,52.0,512,1,1,1,leaky,1,75,,
 78 | convolutional,53.0,1024,3,1,1,leaky,1,76,,
 79 | convolutional,54.0,512,1,1,1,leaky,1,77,,
 80 | maxpool,,,5,1,,,,78,,
 81 | route,,,,,,,,79,,-2
 82 | maxpool,,,9,1,,,,80,,
 83 | route,,,,,,,,81,,-4
 84 | maxpool,,,13,1,,,,82,,
 85 | route,,,,,,,,83,,"-1,-3,-5,-6"
 86 | convolutional,55.0,512,1,1,1,leaky,1,84,,
 87 | convolutional,56.0,1024,3,1,1,leaky,1,85,,
 88 | convolutional,57.0,512,1,1,1,leaky,1,86,,
 89 | convolutional,58.0,1024,3,1,1,leaky,1,87,,
 90 | convolutional,59.0,255,1,1,1,linear,,88,,
 91 | yolo,,,,,,,,89,,
 92 | route,,,,,,,,90,,-4
 93 | convolutional,60.0,256,1,1,1,leaky,1,91,,
 94 | upsample,,,,2,,,,92,,
 95 | route,,,,,,,,93,,"-1, 61"
 96 | convolutional,61.0,256,1,1,1,leaky,1,94,,
 97 | convolutional,62.0,512,3,1,1,leaky,1,95,,
 98 | convolutional,63.0,256,1,1,1,leaky,1,96,,
 99 | convolutional,64.0,512,3,1,1,leaky,1,97,,
100 | convolutional,65.0,256,1,1,1,leaky,1,98,,
101 | convolutional,66.0,512,3,1,1,leaky,1,99,,
102 | convolutional,67.0,255,1,1,1,linear,,100,,
103 | yolo,,,,,,,,101,,
104 | route,,,,,,,,102,,-4
105 | convolutional,68.0,128,1,1,1,leaky,1,103,,
106 | upsample,,,,2,,,,104,,
107 | route,,,,,,,,105,,"-1, 36"
108 | convolutional,69.0,128,1,1,1,leaky,1,106,,
109 | convolutional,70.0,256,3,1,1,leaky,1,107,,
110 | convolutional,71.0,128,1,1,1,leaky,1,108,,
111 | convolutional,72.0,256,3,1,1,leaky,1,109,,
112 | convolutional,73.0,128,1,1,1,leaky,1,110,,
113 | convolutional,74.0,256,3,1,1,leaky,1,111,,
114 | convolutional,75.0,255,1,1,1,linear,,112,,
115 | yolo,,,,,,,,113,,
116 | 


--------------------------------------------------------------------------------
/tools/yolov3-spp.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov3-spp.xlsx


--------------------------------------------------------------------------------
/tools/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/tools/yolov3-tiny.csv:
--------------------------------------------------------------------------------
 1 | ﻿type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers
 2 | convolutional,0.0,16,3,1,1,leaky,1,0,,
 3 | maxpool,,,2,2,,,,1,,
 4 | convolutional,1.0,32,3,1,1,leaky,1,2,,
 5 | maxpool,,,2,2,,,,3,,
 6 | convolutional,2.0,64,3,1,1,leaky,1,4,,
 7 | maxpool,,,2,2,,,,5,,
 8 | convolutional,3.0,128,3,1,1,leaky,1,6,,
 9 | maxpool,,,2,2,,,,7,,
10 | convolutional,4.0,256,3,1,1,leaky,1,8,,
11 | maxpool,,,2,2,,,,9,,
12 | convolutional,5.0,512,3,1,1,leaky,1,10,,
13 | maxpool,,,2,1,,,,11,,
14 | convolutional,6.0,1024,3,1,1,leaky,1,12,,
15 | convolutional,7.0,256,1,1,1,leaky,1,13,,
16 | convolutional,8.0,512,3,1,1,leaky,1,14,,
17 | convolutional,9.0,255,1,1,1,linear,,15,,
18 | yolo,,,,,,,,16,,
19 | route,,,,,,,,17,,-4
20 | convolutional,10.0,128,1,1,1,leaky,1,18,,
21 | upsample,,,,2,,,,19,,
22 | route,,,,,,,,20,,"-1, 8"
23 | convolutional,11.0,256,3,1,1,leaky,1,21,,
24 | convolutional,12.0,255,1,1,1,linear,,22,,
25 | yolo,,,,,,,,23,,
26 | 


--------------------------------------------------------------------------------
/tools/yolov3-tiny.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov3-tiny.xlsx


--------------------------------------------------------------------------------
/tools/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 


--------------------------------------------------------------------------------
/tools/yolov3.csv:
--------------------------------------------------------------------------------
  1 | ﻿type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers
  2 | convolutional,0.0,32,3,1,1,leaky,1,0,,
  3 | convolutional,1.0,64,3,2,1,leaky,1,1,,
  4 | convolutional,2.0,32,1,1,1,leaky,1,2,,
  5 | convolutional,3.0,64,3,1,1,leaky,1,3,,
  6 | shortcut,,,,,,linear,,4,-3,
  7 | convolutional,4.0,128,3,2,1,leaky,1,5,,
  8 | convolutional,5.0,64,1,1,1,leaky,1,6,,
  9 | convolutional,6.0,128,3,1,1,leaky,1,7,,
 10 | shortcut,,,,,,linear,,8,-3,
 11 | convolutional,7.0,64,1,1,1,leaky,1,9,,
 12 | convolutional,8.0,128,3,1,1,leaky,1,10,,
 13 | shortcut,,,,,,linear,,11,-3,
 14 | convolutional,9.0,256,3,2,1,leaky,1,12,,
 15 | convolutional,10.0,128,1,1,1,leaky,1,13,,
 16 | convolutional,11.0,256,3,1,1,leaky,1,14,,
 17 | shortcut,,,,,,linear,,15,-3,
 18 | convolutional,12.0,128,1,1,1,leaky,1,16,,
 19 | convolutional,13.0,256,3,1,1,leaky,1,17,,
 20 | shortcut,,,,,,linear,,18,-3,
 21 | convolutional,14.0,128,1,1,1,leaky,1,19,,
 22 | convolutional,15.0,256,3,1,1,leaky,1,20,,
 23 | shortcut,,,,,,linear,,21,-3,
 24 | convolutional,16.0,128,1,1,1,leaky,1,22,,
 25 | convolutional,17.0,256,3,1,1,leaky,1,23,,
 26 | shortcut,,,,,,linear,,24,-3,
 27 | convolutional,18.0,128,1,1,1,leaky,1,25,,
 28 | convolutional,19.0,256,3,1,1,leaky,1,26,,
 29 | shortcut,,,,,,linear,,27,-3,
 30 | convolutional,20.0,128,1,1,1,leaky,1,28,,
 31 | convolutional,21.0,256,3,1,1,leaky,1,29,,
 32 | shortcut,,,,,,linear,,30,-3,
 33 | convolutional,22.0,128,1,1,1,leaky,1,31,,
 34 | convolutional,23.0,256,3,1,1,leaky,1,32,,
 35 | shortcut,,,,,,linear,,33,-3,
 36 | convolutional,24.0,128,1,1,1,leaky,1,34,,
 37 | convolutional,25.0,256,3,1,1,leaky,1,35,,
 38 | shortcut,,,,,,linear,,36,-3,
 39 | convolutional,26.0,512,3,2,1,leaky,1,37,,
 40 | convolutional,27.0,256,1,1,1,leaky,1,38,,
 41 | convolutional,28.0,512,3,1,1,leaky,1,39,,
 42 | shortcut,,,,,,linear,,40,-3,
 43 | convolutional,29.0,256,1,1,1,leaky,1,41,,
 44 | convolutional,30.0,512,3,1,1,leaky,1,42,,
 45 | shortcut,,,,,,linear,,43,-3,
 46 | convolutional,31.0,256,1,1,1,leaky,1,44,,
 47 | convolutional,32.0,512,3,1,1,leaky,1,45,,
 48 | shortcut,,,,,,linear,,46,-3,
 49 | convolutional,33.0,256,1,1,1,leaky,1,47,,
 50 | convolutional,34.0,512,3,1,1,leaky,1,48,,
 51 | shortcut,,,,,,linear,,49,-3,
 52 | convolutional,35.0,256,1,1,1,leaky,1,50,,
 53 | convolutional,36.0,512,3,1,1,leaky,1,51,,
 54 | shortcut,,,,,,linear,,52,-3,
 55 | convolutional,37.0,256,1,1,1,leaky,1,53,,
 56 | convolutional,38.0,512,3,1,1,leaky,1,54,,
 57 | shortcut,,,,,,linear,,55,-3,
 58 | convolutional,39.0,256,1,1,1,leaky,1,56,,
 59 | convolutional,40.0,512,3,1,1,leaky,1,57,,
 60 | shortcut,,,,,,linear,,58,-3,
 61 | convolutional,41.0,256,1,1,1,leaky,1,59,,
 62 | convolutional,42.0,512,3,1,1,leaky,1,60,,
 63 | shortcut,,,,,,linear,,61,-3,
 64 | convolutional,43.0,1024,3,2,1,leaky,1,62,,
 65 | convolutional,44.0,512,1,1,1,leaky,1,63,,
 66 | convolutional,45.0,1024,3,1,1,leaky,1,64,,
 67 | shortcut,,,,,,linear,,65,-3,
 68 | convolutional,46.0,512,1,1,1,leaky,1,66,,
 69 | convolutional,47.0,1024,3,1,1,leaky,1,67,,
 70 | shortcut,,,,,,linear,,68,-3,
 71 | convolutional,48.0,512,1,1,1,leaky,1,69,,
 72 | convolutional,49.0,1024,3,1,1,leaky,1,70,,
 73 | shortcut,,,,,,linear,,71,-3,
 74 | convolutional,50.0,512,1,1,1,leaky,1,72,,
 75 | convolutional,51.0,1024,3,1,1,leaky,1,73,,
 76 | shortcut,,,,,,linear,,74,-3,
 77 | convolutional,52.0,512,1,1,1,leaky,1,75,,
 78 | convolutional,53.0,1024,3,1,1,leaky,1,76,,
 79 | convolutional,54.0,512,1,1,1,leaky,1,77,,
 80 | convolutional,55.0,1024,3,1,1,leaky,1,78,,
 81 | convolutional,56.0,512,1,1,1,leaky,1,79,,
 82 | convolutional,57.0,1024,3,1,1,leaky,1,80,,
 83 | convolutional,58.0,255,1,1,1,linear,,81,,
 84 | yolo,,,,,,,,82,,
 85 | route,,,,,,,,83,,-4
 86 | convolutional,59.0,256,1,1,1,leaky,1,84,,
 87 | upsample,,,,2,,,,85,,
 88 | route,,,,,,,,86,,"-1, 61"
 89 | convolutional,60.0,256,1,1,1,leaky,1,87,,
 90 | convolutional,61.0,512,3,1,1,leaky,1,88,,
 91 | convolutional,62.0,256,1,1,1,leaky,1,89,,
 92 | convolutional,63.0,512,3,1,1,leaky,1,90,,
 93 | convolutional,64.0,256,1,1,1,leaky,1,91,,
 94 | convolutional,65.0,512,3,1,1,leaky,1,92,,
 95 | convolutional,66.0,255,1,1,1,linear,,93,,
 96 | yolo,,,,,,,,94,,
 97 | route,,,,,,,,95,,-4
 98 | convolutional,67.0,128,1,1,1,leaky,1,96,,
 99 | upsample,,,,2,,,,97,,
100 | route,,,,,,,,98,,"-1, 36"
101 | convolutional,68.0,128,1,1,1,leaky,1,99,,
102 | convolutional,69.0,256,3,1,1,leaky,1,100,,
103 | convolutional,70.0,128,1,1,1,leaky,1,101,,
104 | convolutional,71.0,256,3,1,1,leaky,1,102,,
105 | convolutional,72.0,128,1,1,1,leaky,1,103,,
106 | convolutional,73.0,256,3,1,1,leaky,1,104,,
107 | convolutional,74.0,255,1,1,1,linear,,105,,
108 | yolo,,,,,,,,106,,
109 | 


--------------------------------------------------------------------------------
/tools/yolov3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov3.xlsx


--------------------------------------------------------------------------------
/tools/yolov4-csp.csv:
--------------------------------------------------------------------------------
  1 | ﻿type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers
  2 | convolutional,0.0,32,3,1,1,mish,1,0,,
  3 | convolutional,1.0,64,3,2,1,mish,1,1,,
  4 | convolutional,2.0,32,1,1,1,mish,1,2,,
  5 | convolutional,3.0,64,3,1,1,mish,1,3,,
  6 | shortcut,,,,,,linear,,4,-3,
  7 | convolutional,4.0,128,3,2,1,mish,1,5,,
  8 | convolutional,5.0,64,1,1,1,mish,1,6,,
  9 | route,,,,,,,,7,,-2
 10 | convolutional,6.0,64,1,1,1,mish,1,8,,
 11 | convolutional,7.0,64,1,1,1,mish,1,9,,
 12 | convolutional,8.0,64,3,1,1,mish,1,10,,
 13 | shortcut,,,,,,linear,,11,-3,
 14 | convolutional,9.0,64,1,1,1,mish,1,12,,
 15 | convolutional,10.0,64,3,1,1,mish,1,13,,
 16 | shortcut,,,,,,linear,,14,-3,
 17 | convolutional,11.0,64,1,1,1,mish,1,15,,
 18 | route,,,,,,,,16,,"-1,-10"
 19 | convolutional,12.0,128,1,1,1,mish,1,17,,
 20 | convolutional,13.0,256,3,2,1,mish,1,18,,
 21 | convolutional,14.0,128,1,1,1,mish,1,19,,
 22 | route,,,,,,,,20,,-2
 23 | convolutional,15.0,128,1,1,1,mish,1,21,,
 24 | convolutional,16.0,128,1,1,1,mish,1,22,,
 25 | convolutional,17.0,128,3,1,1,mish,1,23,,
 26 | shortcut,,,,,,linear,,24,-3,
 27 | convolutional,18.0,128,1,1,1,mish,1,25,,
 28 | convolutional,19.0,128,3,1,1,mish,1,26,,
 29 | shortcut,,,,,,linear,,27,-3,
 30 | convolutional,20.0,128,1,1,1,mish,1,28,,
 31 | convolutional,21.0,128,3,1,1,mish,1,29,,
 32 | shortcut,,,,,,linear,,30,-3,
 33 | convolutional,22.0,128,1,1,1,mish,1,31,,
 34 | convolutional,23.0,128,3,1,1,mish,1,32,,
 35 | shortcut,,,,,,linear,,33,-3,
 36 | convolutional,24.0,128,1,1,1,mish,1,34,,
 37 | convolutional,25.0,128,3,1,1,mish,1,35,,
 38 | shortcut,,,,,,linear,,36,-3,
 39 | convolutional,26.0,128,1,1,1,mish,1,37,,
 40 | convolutional,27.0,128,3,1,1,mish,1,38,,
 41 | shortcut,,,,,,linear,,39,-3,
 42 | convolutional,28.0,128,1,1,1,mish,1,40,,
 43 | convolutional,29.0,128,3,1,1,mish,1,41,,
 44 | shortcut,,,,,,linear,,42,-3,
 45 | convolutional,30.0,128,1,1,1,mish,1,43,,
 46 | convolutional,31.0,128,3,1,1,mish,1,44,,
 47 | shortcut,,,,,,linear,,45,-3,
 48 | convolutional,32.0,128,1,1,1,mish,1,46,,
 49 | route,,,,,,,,47,,"-1,-28"
 50 | convolutional,33.0,256,1,1,1,mish,1,48,,
 51 | convolutional,34.0,512,3,2,1,mish,1,49,,
 52 | convolutional,35.0,256,1,1,1,mish,1,50,,
 53 | route,,,,,,,,51,,-2
 54 | convolutional,36.0,256,1,1,1,mish,1,52,,
 55 | convolutional,37.0,256,1,1,1,mish,1,53,,
 56 | convolutional,38.0,256,3,1,1,mish,1,54,,
 57 | shortcut,,,,,,linear,,55,-3,
 58 | convolutional,39.0,256,1,1,1,mish,1,56,,
 59 | convolutional,40.0,256,3,1,1,mish,1,57,,
 60 | shortcut,,,,,,linear,,58,-3,
 61 | convolutional,41.0,256,1,1,1,mish,1,59,,
 62 | convolutional,42.0,256,3,1,1,mish,1,60,,
 63 | shortcut,,,,,,linear,,61,-3,
 64 | convolutional,43.0,256,1,1,1,mish,1,62,,
 65 | convolutional,44.0,256,3,1,1,mish,1,63,,
 66 | shortcut,,,,,,linear,,64,-3,
 67 | convolutional,45.0,256,1,1,1,mish,1,65,,
 68 | convolutional,46.0,256,3,1,1,mish,1,66,,
 69 | shortcut,,,,,,linear,,67,-3,
 70 | convolutional,47.0,256,1,1,1,mish,1,68,,
 71 | convolutional,48.0,256,3,1,1,mish,1,69,,
 72 | shortcut,,,,,,linear,,70,-3,
 73 | convolutional,49.0,256,1,1,1,mish,1,71,,
 74 | convolutional,50.0,256,3,1,1,mish,1,72,,
 75 | shortcut,,,,,,linear,,73,-3,
 76 | convolutional,51.0,256,1,1,1,mish,1,74,,
 77 | convolutional,52.0,256,3,1,1,mish,1,75,,
 78 | shortcut,,,,,,linear,,76,-3,
 79 | convolutional,53.0,256,1,1,1,mish,1,77,,
 80 | route,,,,,,,,78,,"-1,-28"
 81 | convolutional,54.0,512,1,1,1,mish,1,79,,
 82 | convolutional,55.0,1024,3,2,1,mish,1,80,,
 83 | convolutional,56.0,512,1,1,1,mish,1,81,,
 84 | route,,,,,,,,82,,-2
 85 | convolutional,57.0,512,1,1,1,mish,1,83,,
 86 | convolutional,58.0,512,1,1,1,mish,1,84,,
 87 | convolutional,59.0,512,3,1,1,mish,1,85,,
 88 | shortcut,,,,,,linear,,86,-3,
 89 | convolutional,60.0,512,1,1,1,mish,1,87,,
 90 | convolutional,61.0,512,3,1,1,mish,1,88,,
 91 | shortcut,,,,,,linear,,89,-3,
 92 | convolutional,62.0,512,1,1,1,mish,1,90,,
 93 | convolutional,63.0,512,3,1,1,mish,1,91,,
 94 | shortcut,,,,,,linear,,92,-3,
 95 | convolutional,64.0,512,1,1,1,mish,1,93,,
 96 | convolutional,65.0,512,3,1,1,mish,1,94,,
 97 | shortcut,,,,,,linear,,95,-3,
 98 | convolutional,66.0,512,1,1,1,mish,1,96,,
 99 | route,,,,,,,,97,,"-1,-16"
100 | convolutional,67.0,1024,1,1,1,mish,1,98,,
101 | convolutional,68.0,512,1,1,1,mish,1,99,,
102 | route,,,,,,,,100,,-2
103 | convolutional,69.0,512,1,1,1,mish,1,101,,
104 | convolutional,70.0,512,3,1,1,mish,1,102,,
105 | convolutional,71.0,512,1,1,1,mish,1,103,,
106 | maxpool,,,5,1,,,,104,,
107 | route,,,,,,,,105,,-2
108 | maxpool,,,9,1,,,,106,,
109 | route,,,,,,,,107,,-4
110 | maxpool,,,13,1,,,,108,,
111 | route,,,,,,,,109,,"-1,-3,-5,-6"
112 | convolutional,72.0,512,1,1,1,mish,1,110,,
113 | convolutional,73.0,512,3,1,1,mish,1,111,,
114 | route,,,,,,,,112,,"-1, -13"
115 | convolutional,74.0,512,1,1,1,mish,1,113,,
116 | convolutional,75.0,256,1,1,1,mish,1,114,,
117 | upsample,,,,2,,,,115,,
118 | route,,,,,,,,116,,79
119 | convolutional,76.0,256,1,1,1,mish,1,117,,
120 | route,,,,,,,,118,,"-1, -3"
121 | convolutional,77.0,256,1,1,1,mish,1,119,,
122 | convolutional,78.0,256,1,1,1,mish,1,120,,
123 | route,,,,,,,,121,,-2
124 | convolutional,79.0,256,1,1,1,mish,1,122,,
125 | convolutional,80.0,256,3,1,1,mish,1,123,,
126 | convolutional,81.0,256,1,1,1,mish,1,124,,
127 | convolutional,82.0,256,3,1,1,mish,1,125,,
128 | route,,,,,,,,126,,"-1, -6"
129 | convolutional,83.0,256,1,1,1,mish,1,127,,
130 | convolutional,84.0,128,1,1,1,mish,1,128,,
131 | upsample,,,,2,,,,129,,
132 | route,,,,,,,,130,,48
133 | convolutional,85.0,128,1,1,1,mish,1,131,,
134 | route,,,,,,,,132,,"-1, -3"
135 | convolutional,86.0,128,1,1,1,mish,1,133,,
136 | convolutional,87.0,128,1,1,1,mish,1,134,,
137 | route,,,,,,,,135,,-2
138 | convolutional,88.0,128,1,1,1,mish,1,136,,
139 | convolutional,89.0,128,3,1,1,mish,1,137,,
140 | convolutional,90.0,128,1,1,1,mish,1,138,,
141 | convolutional,91.0,128,3,1,1,mish,1,139,,
142 | route,,,,,,,,140,,"-1, -6"
143 | convolutional,92.0,128,1,1,1,mish,1,141,,
144 | convolutional,93.0,256,3,1,1,mish,1,142,,
145 | convolutional,94.0,255,1,1,1,logistic,,143,,
146 | yolo,,,,,,,,144,,
147 | route,,,,,,,,145,,-4
148 | convolutional,95.0,256,3,2,1,mish,1,146,,
149 | route,,,,,,,,147,,"-1, -20"
150 | convolutional,96.0,256,1,1,1,mish,1,148,,
151 | convolutional,97.0,256,1,1,1,mish,1,149,,
152 | route,,,,,,,,150,,-2
153 | convolutional,98.0,256,1,1,1,mish,1,151,,
154 | convolutional,99.0,256,3,1,1,mish,1,152,,
155 | convolutional,100.0,256,1,1,1,mish,1,153,,
156 | convolutional,101.0,256,3,1,1,mish,1,154,,
157 | route,,,,,,,,155,,"-1,-6"
158 | convolutional,102.0,256,1,1,1,mish,1,156,,
159 | convolutional,103.0,512,3,1,1,mish,1,157,,
160 | convolutional,104.0,255,1,1,1,logistic,,158,,
161 | yolo,,,,,,,,159,,
162 | route,,,,,,,,160,,-4
163 | convolutional,105.0,512,3,2,1,mish,1,161,,
164 | route,,,,,,,,162,,"-1, -49"
165 | convolutional,106.0,512,1,1,1,mish,1,163,,
166 | convolutional,107.0,512,1,1,1,mish,1,164,,
167 | route,,,,,,,,165,,-2
168 | convolutional,108.0,512,1,1,1,mish,1,166,,
169 | convolutional,109.0,512,3,1,1,mish,1,167,,
170 | convolutional,110.0,512,1,1,1,mish,1,168,,
171 | convolutional,111.0,512,3,1,1,mish,1,169,,
172 | route,,,,,,,,170,,"-1,-6"
173 | convolutional,112.0,512,1,1,1,mish,1,171,,
174 | convolutional,113.0,1024,3,1,1,mish,1,172,,
175 | convolutional,114.0,255,1,1,1,logistic,,173,,
176 | yolo,,,,,,,,174,,
177 | 


--------------------------------------------------------------------------------
/tools/yolov4-csp.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4-csp.xlsx


--------------------------------------------------------------------------------
/tools/yolov4-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.00261
 19 | burn_in=1000
 20 | 
 21 | max_batches = 2000200
 22 | policy=steps
 23 | steps=1600000,1800000
 24 | scales=.1,.1
 25 | 
 26 | 
 27 | #weights_reject_freq=1001
 28 | #ema_alpha=0.9998
 29 | #equidistant_point=1000
 30 | #num_sigmas_reject_badlabels=3
 31 | #badlabels_rejection_percentage=0.2
 32 | 
 33 | 
 34 | [convolutional]
 35 | batch_normalize=1
 36 | filters=32
 37 | size=3
 38 | stride=2
 39 | pad=1
 40 | activation=leaky
 41 | 
 42 | [convolutional]
 43 | batch_normalize=1
 44 | filters=64
 45 | size=3
 46 | stride=2
 47 | pad=1
 48 | activation=leaky
 49 | 
 50 | [convolutional]
 51 | batch_normalize=1
 52 | filters=64
 53 | size=3
 54 | stride=1
 55 | pad=1
 56 | activation=leaky
 57 | 
 58 | [route]
 59 | layers=-1
 60 | groups=2
 61 | group_id=1
 62 | 
 63 | [convolutional]
 64 | batch_normalize=1
 65 | filters=32
 66 | size=3
 67 | stride=1
 68 | pad=1
 69 | activation=leaky
 70 | 
 71 | [convolutional]
 72 | batch_normalize=1
 73 | filters=32
 74 | size=3
 75 | stride=1
 76 | pad=1
 77 | activation=leaky
 78 | 
 79 | [route]
 80 | layers = -1,-2
 81 | 
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=64
 85 | size=1
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | [route]
 91 | layers = -6,-1
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=2
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=128
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | [route]
106 | layers=-1
107 | groups=2
108 | group_id=1
109 | 
110 | [convolutional]
111 | batch_normalize=1
112 | filters=64
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 | 
118 | [convolutional]
119 | batch_normalize=1
120 | filters=64
121 | size=3
122 | stride=1
123 | pad=1
124 | activation=leaky
125 | 
126 | [route]
127 | layers = -1,-2
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=128
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [route]
138 | layers = -6,-1
139 | 
140 | [maxpool]
141 | size=2
142 | stride=2
143 | 
144 | [convolutional]
145 | batch_normalize=1
146 | filters=256
147 | size=3
148 | stride=1
149 | pad=1
150 | activation=leaky
151 | 
152 | [route]
153 | layers=-1
154 | groups=2
155 | group_id=1
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=128
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [route]
174 | layers = -1,-2
175 | 
176 | [convolutional]
177 | batch_normalize=1
178 | filters=256
179 | size=1
180 | stride=1
181 | pad=1
182 | activation=leaky
183 | 
184 | [route]
185 | layers = -6,-1
186 | 
187 | [maxpool]
188 | size=2
189 | stride=2
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=512
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | ##################################
200 | 
201 | [convolutional]
202 | batch_normalize=1
203 | filters=256
204 | size=1
205 | stride=1
206 | pad=1
207 | activation=leaky
208 | 
209 | [convolutional]
210 | batch_normalize=1
211 | filters=512
212 | size=3
213 | stride=1
214 | pad=1
215 | activation=leaky
216 | 
217 | [convolutional]
218 | size=1
219 | stride=1
220 | pad=1
221 | filters=255
222 | activation=linear
223 | 
224 | 
225 | 
226 | [yolo]
227 | mask = 3,4,5
228 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
229 | classes=80
230 | num=6
231 | jitter=.3
232 | scale_x_y = 1.05
233 | cls_normalizer=1.0
234 | iou_normalizer=0.07
235 | iou_loss=ciou
236 | ignore_thresh = .7
237 | truth_thresh = 1
238 | random=0
239 | resize=1.5
240 | nms_kind=greedynms
241 | beta_nms=0.6
242 | #new_coords=1
243 | #scale_x_y = 2.0
244 | 
245 | [route]
246 | layers = -4
247 | 
248 | [convolutional]
249 | batch_normalize=1
250 | filters=128
251 | size=1
252 | stride=1
253 | pad=1
254 | activation=leaky
255 | 
256 | [upsample]
257 | stride=2
258 | 
259 | [route]
260 | layers = -1, 23
261 | 
262 | [convolutional]
263 | batch_normalize=1
264 | filters=256
265 | size=3
266 | stride=1
267 | pad=1
268 | activation=leaky
269 | 
270 | [convolutional]
271 | size=1
272 | stride=1
273 | pad=1
274 | filters=255
275 | activation=linear
276 | 
277 | [yolo]
278 | mask = 1,2,3
279 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
280 | classes=80
281 | num=6
282 | jitter=.3
283 | scale_x_y = 1.05
284 | cls_normalizer=1.0
285 | iou_normalizer=0.07
286 | iou_loss=ciou
287 | ignore_thresh = .7
288 | truth_thresh = 1
289 | random=0
290 | resize=1.5
291 | nms_kind=greedynms
292 | beta_nms=0.6
293 | #new_coords=1
294 | #scale_x_y = 2.0
295 | 


--------------------------------------------------------------------------------
/tools/yolov4-tiny.csv:
--------------------------------------------------------------------------------
 1 | ﻿type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers
 2 | convolutional,0.0,32,3,2,1,leaky,1,0,,
 3 | convolutional,1.0,64,3,2,1,leaky,1,1,,
 4 | convolutional,2.0,64,3,1,1,leaky,1,2,,
 5 | route,,,,,,,,3,,-1
 6 | convolutional,3.0,32,3,1,1,leaky,1,4,,
 7 | convolutional,4.0,32,3,1,1,leaky,1,5,,
 8 | route,,,,,,,,6,,"-1,-2"
 9 | convolutional,5.0,64,1,1,1,leaky,1,7,,
10 | route,,,,,,,,8,,"-6,-1"
11 | maxpool,,,2,2,,,,9,,
12 | convolutional,6.0,128,3,1,1,leaky,1,10,,
13 | route,,,,,,,,11,,-1
14 | convolutional,7.0,64,3,1,1,leaky,1,12,,
15 | convolutional,8.0,64,3,1,1,leaky,1,13,,
16 | route,,,,,,,,14,,"-1,-2"
17 | convolutional,9.0,128,1,1,1,leaky,1,15,,
18 | route,,,,,,,,16,,"-6,-1"
19 | maxpool,,,2,2,,,,17,,
20 | convolutional,10.0,256,3,1,1,leaky,1,18,,
21 | route,,,,,,,,19,,-1
22 | convolutional,11.0,128,3,1,1,leaky,1,20,,
23 | convolutional,12.0,128,3,1,1,leaky,1,21,,
24 | route,,,,,,,,22,,"-1,-2"
25 | convolutional,13.0,256,1,1,1,leaky,1,23,,
26 | route,,,,,,,,24,,"-6,-1"
27 | maxpool,,,2,2,,,,25,,
28 | convolutional,14.0,512,3,1,1,leaky,1,26,,
29 | convolutional,15.0,256,1,1,1,leaky,1,27,,
30 | convolutional,16.0,512,3,1,1,leaky,1,28,,
31 | convolutional,17.0,255,1,1,1,linear,,29,,
32 | yolo,,,,,,,,30,,
33 | route,,,,,,,,31,,-4
34 | convolutional,18.0,128,1,1,1,leaky,1,32,,
35 | upsample,,,,2,,,,33,,
36 | route,,,,,,,,34,,"-1, 23"
37 | convolutional,19.0,256,3,1,1,leaky,1,35,,
38 | convolutional,20.0,255,1,1,1,linear,,36,,
39 | yolo,,,,,,,,37,,
40 | 


--------------------------------------------------------------------------------
/tools/yolov4-tiny.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4-tiny.xlsx


--------------------------------------------------------------------------------
/tools/yolov4.csv:
--------------------------------------------------------------------------------
  1 | ﻿no.,type,convs,filters,size,stride,pad,activation,batch_normalize,from,layers
  2 | 0,convolutional,0.0,32,3,1,1,mish,1,,
  3 | 1,convolutional,1.0,64,3,2,1,mish,1,,
  4 | 2,convolutional,2.0,64,1,1,1,mish,1,,
  5 | 3,route,,,,,,,,,-2
  6 | 4,convolutional,3.0,64,1,1,1,mish,1,,
  7 | 5,convolutional,4.0,32,1,1,1,mish,1,,
  8 | 6,convolutional,5.0,64,3,1,1,mish,1,,
  9 | 7,shortcut,,,,,,linear,,-3,
 10 | 8,convolutional,6.0,64,1,1,1,mish,1,,
 11 | 9,route,,,,,,,,,"-1,-7"
 12 | 10,convolutional,7.0,64,1,1,1,mish,1,,
 13 | 11,convolutional,8.0,128,3,2,1,mish,1,,
 14 | 12,convolutional,9.0,64,1,1,1,mish,1,,
 15 | 13,route,,,,,,,,,-2
 16 | 14,convolutional,10.0,64,1,1,1,mish,1,,
 17 | 15,convolutional,11.0,64,1,1,1,mish,1,,
 18 | 16,convolutional,12.0,64,3,1,1,mish,1,,
 19 | 17,shortcut,,,,,,linear,,-3,
 20 | 18,convolutional,13.0,64,1,1,1,mish,1,,
 21 | 19,convolutional,14.0,64,3,1,1,mish,1,,
 22 | 20,shortcut,,,,,,linear,,-3,
 23 | 21,convolutional,15.0,64,1,1,1,mish,1,,
 24 | 22,route,,,,,,,,,"-1,-10"
 25 | 23,convolutional,16.0,128,1,1,1,mish,1,,
 26 | 24,convolutional,17.0,256,3,2,1,mish,1,,
 27 | 25,convolutional,18.0,128,1,1,1,mish,1,,
 28 | 26,route,,,,,,,,,-2
 29 | 27,convolutional,19.0,128,1,1,1,mish,1,,
 30 | 28,convolutional,20.0,128,1,1,1,mish,1,,
 31 | 29,convolutional,21.0,128,3,1,1,mish,1,,
 32 | 30,shortcut,,,,,,linear,,-3,
 33 | 31,convolutional,22.0,128,1,1,1,mish,1,,
 34 | 32,convolutional,23.0,128,3,1,1,mish,1,,
 35 | 33,shortcut,,,,,,linear,,-3,
 36 | 34,convolutional,24.0,128,1,1,1,mish,1,,
 37 | 35,convolutional,25.0,128,3,1,1,mish,1,,
 38 | 36,shortcut,,,,,,linear,,-3,
 39 | 37,convolutional,26.0,128,1,1,1,mish,1,,
 40 | 38,convolutional,27.0,128,3,1,1,mish,1,,
 41 | 39,shortcut,,,,,,linear,,-3,
 42 | 40,convolutional,28.0,128,1,1,1,mish,1,,
 43 | 41,convolutional,29.0,128,3,1,1,mish,1,,
 44 | 42,shortcut,,,,,,linear,,-3,
 45 | 43,convolutional,30.0,128,1,1,1,mish,1,,
 46 | 44,convolutional,31.0,128,3,1,1,mish,1,,
 47 | 45,shortcut,,,,,,linear,,-3,
 48 | 46,convolutional,32.0,128,1,1,1,mish,1,,
 49 | 47,convolutional,33.0,128,3,1,1,mish,1,,
 50 | 48,shortcut,,,,,,linear,,-3,
 51 | 49,convolutional,34.0,128,1,1,1,mish,1,,
 52 | 50,convolutional,35.0,128,3,1,1,mish,1,,
 53 | 51,shortcut,,,,,,linear,,-3,
 54 | 52,convolutional,36.0,128,1,1,1,mish,1,,
 55 | 53,route,,,,,,,,,"-1,-28"
 56 | 54,convolutional,37.0,256,1,1,1,mish,1,,
 57 | 55,convolutional,38.0,512,3,2,1,mish,1,,
 58 | 56,convolutional,39.0,256,1,1,1,mish,1,,
 59 | 57,route,,,,,,,,,-2
 60 | 58,convolutional,40.0,256,1,1,1,mish,1,,
 61 | 59,convolutional,41.0,256,1,1,1,mish,1,,
 62 | 60,convolutional,42.0,256,3,1,1,mish,1,,
 63 | 61,shortcut,,,,,,linear,,-3,
 64 | 62,convolutional,43.0,256,1,1,1,mish,1,,
 65 | 63,convolutional,44.0,256,3,1,1,mish,1,,
 66 | 64,shortcut,,,,,,linear,,-3,
 67 | 65,convolutional,45.0,256,1,1,1,mish,1,,
 68 | 66,convolutional,46.0,256,3,1,1,mish,1,,
 69 | 67,shortcut,,,,,,linear,,-3,
 70 | 68,convolutional,47.0,256,1,1,1,mish,1,,
 71 | 69,convolutional,48.0,256,3,1,1,mish,1,,
 72 | 70,shortcut,,,,,,linear,,-3,
 73 | 71,convolutional,49.0,256,1,1,1,mish,1,,
 74 | 72,convolutional,50.0,256,3,1,1,mish,1,,
 75 | 73,shortcut,,,,,,linear,,-3,
 76 | 74,convolutional,51.0,256,1,1,1,mish,1,,
 77 | 75,convolutional,52.0,256,3,1,1,mish,1,,
 78 | 76,shortcut,,,,,,linear,,-3,
 79 | 77,convolutional,53.0,256,1,1,1,mish,1,,
 80 | 78,convolutional,54.0,256,3,1,1,mish,1,,
 81 | 79,shortcut,,,,,,linear,,-3,
 82 | 80,convolutional,55.0,256,1,1,1,mish,1,,
 83 | 81,convolutional,56.0,256,3,1,1,mish,1,,
 84 | 82,shortcut,,,,,,linear,,-3,
 85 | 83,convolutional,57.0,256,1,1,1,mish,1,,
 86 | 84,route,,,,,,,,,"-1,-28"
 87 | 85,convolutional,58.0,512,1,1,1,mish,1,,
 88 | 86,convolutional,59.0,1024,3,2,1,mish,1,,
 89 | 87,convolutional,60.0,512,1,1,1,mish,1,,
 90 | 88,route,,,,,,,,,-2
 91 | 89,convolutional,61.0,512,1,1,1,mish,1,,
 92 | 90,convolutional,62.0,512,1,1,1,mish,1,,
 93 | 91,convolutional,63.0,512,3,1,1,mish,1,,
 94 | 92,shortcut,,,,,,linear,,-3,
 95 | 93,convolutional,64.0,512,1,1,1,mish,1,,
 96 | 94,convolutional,65.0,512,3,1,1,mish,1,,
 97 | 95,shortcut,,,,,,linear,,-3,
 98 | 96,convolutional,66.0,512,1,1,1,mish,1,,
 99 | 97,convolutional,67.0,512,3,1,1,mish,1,,
100 | 98,shortcut,,,,,,linear,,-3,
101 | 99,convolutional,68.0,512,1,1,1,mish,1,,
102 | 100,convolutional,69.0,512,3,1,1,mish,1,,
103 | 101,shortcut,,,,,,linear,,-3,
104 | 102,convolutional,70.0,512,1,1,1,mish,1,,
105 | 103,route,,,,,,,,,"-1,-16"
106 | 104,convolutional,71.0,1024,1,1,1,mish,1,,
107 | 105,convolutional,72.0,512,1,1,1,leaky,1,,
108 | 106,convolutional,73.0,1024,3,1,1,leaky,1,,
109 | 107,convolutional,74.0,512,1,1,1,leaky,1,,
110 | 108,maxpool,,,5,1,,,,,
111 | 109,route,,,,,,,,,-2
112 | 110,maxpool,,,9,1,,,,,
113 | 111,route,,,,,,,,,-4
114 | 112,maxpool,,,13,1,,,,,
115 | 113,route,,,,,,,,,"-1,-3,-5,-6"
116 | 114,convolutional,75.0,512,1,1,1,leaky,1,,
117 | 115,convolutional,76.0,1024,3,1,1,leaky,1,,
118 | 116,convolutional,77.0,512,1,1,1,leaky,1,,
119 | 117,convolutional,78.0,256,1,1,1,leaky,1,,
120 | 118,upsample,,,,2,,,,,
121 | 119,route,,,,,,,,,85
122 | 120,convolutional,79.0,256,1,1,1,leaky,1,,
123 | 121,route,,,,,,,,,"-1, -3"
124 | 122,convolutional,80.0,256,1,1,1,leaky,1,,
125 | 123,convolutional,81.0,512,3,1,1,leaky,1,,
126 | 124,convolutional,82.0,256,1,1,1,leaky,1,,
127 | 125,convolutional,83.0,512,3,1,1,leaky,1,,
128 | 126,convolutional,84.0,256,1,1,1,leaky,1,,
129 | 127,convolutional,85.0,128,1,1,1,leaky,1,,
130 | 128,upsample,,,,2,,,,,
131 | 129,route,,,,,,,,,54
132 | 130,convolutional,86.0,128,1,1,1,leaky,1,,
133 | 131,route,,,,,,,,,"-1, -3"
134 | 132,convolutional,87.0,128,1,1,1,leaky,1,,
135 | 133,convolutional,88.0,256,3,1,1,leaky,1,,
136 | 134,convolutional,89.0,128,1,1,1,leaky,1,,
137 | 135,convolutional,90.0,256,3,1,1,leaky,1,,
138 | 136,convolutional,91.0,128,1,1,1,leaky,1,,
139 | 137,convolutional,92.0,256,3,1,1,leaky,1,,
140 | 138,convolutional,93.0,255,1,1,1,linear,,,
141 | 139,yolo,,,,,,,,,
142 | 140,route,,,,,,,,,-4
143 | 141,convolutional,94.0,256,3,2,1,leaky,1,,
144 | 142,route,,,,,,,,,"-1, -16"
145 | 143,convolutional,95.0,256,1,1,1,leaky,1,,
146 | 144,convolutional,96.0,512,3,1,1,leaky,1,,
147 | 145,convolutional,97.0,256,1,1,1,leaky,1,,
148 | 146,convolutional,98.0,512,3,1,1,leaky,1,,
149 | 147,convolutional,99.0,256,1,1,1,leaky,1,,
150 | 148,convolutional,100.0,512,3,1,1,leaky,1,,
151 | 149,convolutional,101.0,255,1,1,1,linear,,,
152 | 150,yolo,,,,,,,,,
153 | 151,route,,,,,,,,,-4
154 | 152,convolutional,102.0,512,3,2,1,leaky,1,,
155 | 153,route,,,,,,,,,"-1, -37"
156 | 154,convolutional,103.0,512,1,1,1,leaky,1,,
157 | 155,convolutional,104.0,1024,3,1,1,leaky,1,,
158 | 156,convolutional,105.0,512,1,1,1,leaky,1,,
159 | 157,convolutional,106.0,1024,3,1,1,leaky,1,,
160 | 158,convolutional,107.0,512,1,1,1,leaky,1,,
161 | 159,convolutional,108.0,1024,3,1,1,leaky,1,,
162 | 160,convolutional,109.0,255,1,1,1,linear,,,
163 | 161,yolo,,,,,,,,,
164 | 


--------------------------------------------------------------------------------
/tools/yolov4.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4.xlsx


--------------------------------------------------------------------------------
/tools/yolov4x-mish.csv:
--------------------------------------------------------------------------------
  1 | ﻿type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers
  2 | convolutional,0.0,32,3,1,1,mish,1,0,,
  3 | convolutional,1.0,80,3,2,1,mish,1,1,,
  4 | convolutional,2.0,40,1,1,1,mish,1,2,,
  5 | convolutional,3.0,80,3,1,1,mish,1,3,,
  6 | shortcut,,,,,,linear,,4,-3,
  7 | convolutional,4.0,160,3,2,1,mish,1,5,,
  8 | convolutional,5.0,80,1,1,1,mish,1,6,,
  9 | route,,,,,,,,7,,-2
 10 | convolutional,6.0,80,1,1,1,mish,1,8,,
 11 | convolutional,7.0,80,1,1,1,mish,1,9,,
 12 | convolutional,8.0,80,3,1,1,mish,1,10,,
 13 | shortcut,,,,,,linear,,11,-3,
 14 | convolutional,9.0,80,1,1,1,mish,1,12,,
 15 | convolutional,10.0,80,3,1,1,mish,1,13,,
 16 | shortcut,,,,,,linear,,14,-3,
 17 | convolutional,11.0,80,1,1,1,mish,1,15,,
 18 | convolutional,12.0,80,3,1,1,mish,1,16,,
 19 | shortcut,,,,,,linear,,17,-3,
 20 | convolutional,13.0,80,1,1,1,mish,1,18,,
 21 | route,,,,,,,,19,,"-1,-13"
 22 | convolutional,14.0,160,1,1,1,mish,1,20,,
 23 | convolutional,15.0,320,3,2,1,mish,1,21,,
 24 | convolutional,16.0,160,1,1,1,mish,1,22,,
 25 | route,,,,,,,,23,,-2
 26 | convolutional,17.0,160,1,1,1,mish,1,24,,
 27 | convolutional,18.0,160,1,1,1,mish,1,25,,
 28 | convolutional,19.0,160,3,1,1,mish,1,26,,
 29 | shortcut,,,,,,linear,,27,-3,
 30 | convolutional,20.0,160,1,1,1,mish,1,28,,
 31 | convolutional,21.0,160,3,1,1,mish,1,29,,
 32 | shortcut,,,,,,linear,,30,-3,
 33 | convolutional,22.0,160,1,1,1,mish,1,31,,
 34 | convolutional,23.0,160,3,1,1,mish,1,32,,
 35 | shortcut,,,,,,linear,,33,-3,
 36 | convolutional,24.0,160,1,1,1,mish,1,34,,
 37 | convolutional,25.0,160,3,1,1,mish,1,35,,
 38 | shortcut,,,,,,linear,,36,-3,
 39 | convolutional,26.0,160,1,1,1,mish,1,37,,
 40 | convolutional,27.0,160,3,1,1,mish,1,38,,
 41 | shortcut,,,,,,linear,,39,-3,
 42 | convolutional,28.0,160,1,1,1,mish,1,40,,
 43 | convolutional,29.0,160,3,1,1,mish,1,41,,
 44 | shortcut,,,,,,linear,,42,-3,
 45 | convolutional,30.0,160,1,1,1,mish,1,43,,
 46 | convolutional,31.0,160,3,1,1,mish,1,44,,
 47 | shortcut,,,,,,linear,,45,-3,
 48 | convolutional,32.0,160,1,1,1,mish,1,46,,
 49 | convolutional,33.0,160,3,1,1,mish,1,47,,
 50 | shortcut,,,,,,linear,,48,-3,
 51 | convolutional,34.0,160,1,1,1,mish,1,49,,
 52 | convolutional,35.0,160,3,1,1,mish,1,50,,
 53 | shortcut,,,,,,linear,,51,-3,
 54 | convolutional,36.0,160,1,1,1,mish,1,52,,
 55 | convolutional,37.0,160,3,1,1,mish,1,53,,
 56 | shortcut,,,,,,linear,,54,-3,
 57 | convolutional,38.0,160,1,1,1,mish,1,55,,
 58 | route,,,,,,,,56,,"-1,-34"
 59 | convolutional,39.0,320,1,1,1,mish,1,57,,
 60 | convolutional,40.0,640,3,2,1,mish,1,58,,
 61 | convolutional,41.0,320,1,1,1,mish,1,59,,
 62 | route,,,,,,,,60,,-2
 63 | convolutional,42.0,320,1,1,1,mish,1,61,,
 64 | convolutional,43.0,320,1,1,1,mish,1,62,,
 65 | convolutional,44.0,320,3,1,1,mish,1,63,,
 66 | shortcut,,,,,,linear,,64,-3,
 67 | convolutional,45.0,320,1,1,1,mish,1,65,,
 68 | convolutional,46.0,320,3,1,1,mish,1,66,,
 69 | shortcut,,,,,,linear,,67,-3,
 70 | convolutional,47.0,320,1,1,1,mish,1,68,,
 71 | convolutional,48.0,320,3,1,1,mish,1,69,,
 72 | shortcut,,,,,,linear,,70,-3,
 73 | convolutional,49.0,320,1,1,1,mish,1,71,,
 74 | convolutional,50.0,320,3,1,1,mish,1,72,,
 75 | shortcut,,,,,,linear,,73,-3,
 76 | convolutional,51.0,320,1,1,1,mish,1,74,,
 77 | convolutional,52.0,320,3,1,1,mish,1,75,,
 78 | shortcut,,,,,,linear,,76,-3,
 79 | convolutional,53.0,320,1,1,1,mish,1,77,,
 80 | convolutional,54.0,320,3,1,1,mish,1,78,,
 81 | shortcut,,,,,,linear,,79,-3,
 82 | convolutional,55.0,320,1,1,1,mish,1,80,,
 83 | convolutional,56.0,320,3,1,1,mish,1,81,,
 84 | shortcut,,,,,,linear,,82,-3,
 85 | convolutional,57.0,320,1,1,1,mish,1,83,,
 86 | convolutional,58.0,320,3,1,1,mish,1,84,,
 87 | shortcut,,,,,,linear,,85,-3,
 88 | convolutional,59.0,320,1,1,1,mish,1,86,,
 89 | convolutional,60.0,320,3,1,1,mish,1,87,,
 90 | shortcut,,,,,,linear,,88,-3,
 91 | convolutional,61.0,320,1,1,1,mish,1,89,,
 92 | convolutional,62.0,320,3,1,1,mish,1,90,,
 93 | shortcut,,,,,,linear,,91,-3,
 94 | convolutional,63.0,320,1,1,1,mish,1,92,,
 95 | route,,,,,,,,93,,"-1,-34"
 96 | convolutional,64.0,640,1,1,1,mish,1,94,,
 97 | convolutional,65.0,1280,3,2,1,mish,1,95,,
 98 | convolutional,66.0,640,1,1,1,mish,1,96,,
 99 | route,,,,,,,,97,,-2
100 | convolutional,67.0,640,1,1,1,mish,1,98,,
101 | convolutional,68.0,640,1,1,1,mish,1,99,,
102 | convolutional,69.0,640,3,1,1,mish,1,100,,
103 | shortcut,,,,,,linear,,101,-3,
104 | convolutional,70.0,640,1,1,1,mish,1,102,,
105 | convolutional,71.0,640,3,1,1,mish,1,103,,
106 | shortcut,,,,,,linear,,104,-3,
107 | convolutional,72.0,640,1,1,1,mish,1,105,,
108 | convolutional,73.0,640,3,1,1,mish,1,106,,
109 | shortcut,,,,,,linear,,107,-3,
110 | convolutional,74.0,640,1,1,1,mish,1,108,,
111 | convolutional,75.0,640,3,1,1,mish,1,109,,
112 | shortcut,,,,,,linear,,110,-3,
113 | convolutional,76.0,640,1,1,1,mish,1,111,,
114 | convolutional,77.0,640,3,1,1,mish,1,112,,
115 | shortcut,,,,,,linear,,113,-3,
116 | convolutional,78.0,640,1,1,1,mish,1,114,,
117 | route,,,,,,,,115,,"-1,-19"
118 | convolutional,79.0,1280,1,1,1,mish,1,116,,
119 | convolutional,80.0,640,1,1,1,mish,1,117,,
120 | route,,,,,,,,118,,-2
121 | convolutional,81.0,640,1,1,1,mish,1,119,,
122 | convolutional,82.0,640,3,1,1,mish,1,120,,
123 | convolutional,83.0,640,1,1,1,mish,1,121,,
124 | maxpool,,,5,1,,,,122,,
125 | route,,,,,,,,123,,-2
126 | maxpool,,,9,1,,,,124,,
127 | route,,,,,,,,125,,-4
128 | maxpool,,,13,1,,,,126,,
129 | route,,,,,,,,127,,"-1,-3,-5,-6"
130 | convolutional,84.0,640,1,1,1,mish,1,128,,
131 | convolutional,85.0,640,3,1,1,mish,1,129,,
132 | convolutional,86.0,640,1,1,1,mish,1,130,,
133 | convolutional,87.0,640,3,1,1,mish,1,131,,
134 | route,,,,,,,,132,,"-1, -15"
135 | convolutional,88.0,640,1,1,1,mish,1,133,,
136 | convolutional,89.0,320,1,1,1,mish,1,134,,
137 | upsample,,,,2,,,,135,,
138 | route,,,,,,,,136,,94
139 | convolutional,90.0,320,1,1,1,mish,1,137,,
140 | route,,,,,,,,138,,"-1, -3"
141 | convolutional,91.0,320,1,1,1,mish,1,139,,
142 | convolutional,92.0,320,1,1,1,mish,1,140,,
143 | route,,,,,,,,141,,-2
144 | convolutional,93.0,320,1,1,1,mish,1,142,,
145 | convolutional,94.0,320,3,1,1,mish,1,143,,
146 | convolutional,95.0,320,1,1,1,mish,1,144,,
147 | convolutional,96.0,320,3,1,1,mish,1,145,,
148 | convolutional,97.0,320,1,1,1,mish,1,146,,
149 | convolutional,98.0,320,3,1,1,mish,1,147,,
150 | route,,,,,,,,148,,"-1, -8"
151 | convolutional,99.0,320,1,1,1,mish,1,149,,
152 | convolutional,100.0,160,1,1,1,mish,1,150,,
153 | upsample,,,,2,,,,151,,
154 | route,,,,,,,,152,,57
155 | convolutional,101.0,160,1,1,1,mish,1,153,,
156 | route,,,,,,,,154,,"-1, -3"
157 | convolutional,102.0,160,1,1,1,mish,1,155,,
158 | convolutional,103.0,160,1,1,1,mish,1,156,,
159 | route,,,,,,,,157,,-2
160 | convolutional,104.0,160,1,1,1,mish,1,158,,
161 | convolutional,105.0,160,3,1,1,mish,1,159,,
162 | convolutional,106.0,160,1,1,1,mish,1,160,,
163 | convolutional,107.0,160,3,1,1,mish,1,161,,
164 | convolutional,108.0,160,1,1,1,mish,1,162,,
165 | convolutional,109.0,160,3,1,1,mish,1,163,,
166 | route,,,,,,,,164,,"-1, -8"
167 | convolutional,110.0,160,1,1,1,mish,1,165,,
168 | convolutional,111.0,320,3,1,1,mish,1,166,,
169 | convolutional,112.0,255,1,1,1,logistic,,167,,
170 | yolo,,,,,,,,168,,
171 | route,,,,,,,,169,,-4
172 | convolutional,113.0,320,3,2,1,mish,1,170,,
173 | route,,,,,,,,171,,"-1, -22"
174 | convolutional,114.0,320,1,1,1,mish,1,172,,
175 | convolutional,115.0,320,1,1,1,mish,1,173,,
176 | route,,,,,,,,174,,-2
177 | convolutional,116.0,320,1,1,1,mish,1,175,,
178 | convolutional,117.0,320,3,1,1,mish,1,176,,
179 | convolutional,118.0,320,1,1,1,mish,1,177,,
180 | convolutional,119.0,320,3,1,1,mish,1,178,,
181 | convolutional,120.0,320,1,1,1,mish,1,179,,
182 | convolutional,121.0,320,3,1,1,mish,1,180,,
183 | route,,,,,,,,181,,"-1,-8"
184 | convolutional,122.0,320,1,1,1,mish,1,182,,
185 | convolutional,123.0,640,3,1,1,mish,1,183,,
186 | convolutional,124.0,255,1,1,1,logistic,,184,,
187 | yolo,,,,,,,,185,,
188 | route,,,,,,,,186,,-4
189 | convolutional,125.0,640,3,2,1,mish,1,187,,
190 | route,,,,,,,,188,,"-1, -55"
191 | convolutional,126.0,640,1,1,1,mish,1,189,,
192 | convolutional,127.0,640,1,1,1,mish,1,190,,
193 | route,,,,,,,,191,,-2
194 | convolutional,128.0,640,1,1,1,mish,1,192,,
195 | convolutional,129.0,640,3,1,1,mish,1,193,,
196 | convolutional,130.0,640,1,1,1,mish,1,194,,
197 | convolutional,131.0,640,3,1,1,mish,1,195,,
198 | convolutional,132.0,640,1,1,1,mish,1,196,,
199 | convolutional,133.0,640,3,1,1,mish,1,197,,
200 | route,,,,,,,,198,,"-1,-8"
201 | convolutional,134.0,640,1,1,1,mish,1,199,,
202 | convolutional,135.0,1280,3,1,1,mish,1,200,,
203 | convolutional,136.0,255,1,1,1,logistic,,201,,
204 | yolo,,,,,,,,202,,
205 | 


--------------------------------------------------------------------------------
/tools/yolov4x-mish.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4x-mish.xlsx


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/utils/__init__.py


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Activation functions
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
 9 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
10 |     @staticmethod
11 |     def forward(x):
12 |         return x * torch.sigmoid(x)
13 | 
14 | 
15 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
16 |     @staticmethod
17 |     def forward(x):
18 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
19 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
20 | 
21 | 
22 | class MemoryEfficientSwish(nn.Module):
23 |     class F(torch.autograd.Function):
24 |         @staticmethod
25 |         def forward(ctx, x):
26 |             ctx.save_for_backward(x)
27 |             return x * torch.sigmoid(x)
28 | 
29 |         @staticmethod
30 |         def backward(ctx, grad_output):
31 |             x = ctx.saved_tensors[0]
32 |             sx = torch.sigmoid(x)
33 |             return grad_output * (sx * (1 + x * (1 - sx)))
34 | 
35 |     def forward(self, x):
36 |         return self.F.apply(x)
37 | 
38 | 
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 |     @staticmethod
42 |     def forward(x):
43 |         return x * F.softplus(x).tanh()
44 | 
45 | 
46 | class MemoryEfficientMish(nn.Module):
47 |     class F(torch.autograd.Function):
48 |         @staticmethod
49 |         def forward(ctx, x):
50 |             ctx.save_for_backward(x)
51 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
52 | 
53 |         @staticmethod
54 |         def backward(ctx, grad_output):
55 |             x = ctx.saved_tensors[0]
56 |             sx = torch.sigmoid(x)
57 |             fx = F.softplus(x).tanh()
58 |             return grad_output * (fx + x * sx * (1 - fx * fx))
59 | 
60 |     def forward(self, x):
61 |         return self.F.apply(x)
62 | 
63 | 
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 |     def __init__(self, c1, k=3):  # ch_in, kernel
67 |         super().__init__()
68 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 |         self.bn = nn.BatchNorm2d(c1)
70 | 
71 |     def forward(self, x):
72 |         return torch.max(x, self.bn(self.conv(x)))
73 | 


--------------------------------------------------------------------------------
/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | # Auto-anchor utils
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import yaml
  6 | from scipy.cluster.vq import kmeans
  7 | from tqdm import tqdm
  8 | 
  9 | from utils.general import colorstr
 10 | 
 11 | 
 12 | def check_anchor_order(m):
 13 |     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
 14 |     a = m.anchor_grid.prod(-1).view(-1)  # anchor area
 15 |     da = a[-1] - a[0]  # delta a
 16 |     ds = m.stride[-1] - m.stride[0]  # delta s
 17 |     if da.sign() != ds.sign():  # same order
 18 |         print('Reversing anchor order')
 19 |         m.anchors[:] = m.anchors.flip(0)
 20 |         m.anchor_grid[:] = m.anchor_grid.flip(0)
 21 | 
 22 | 
 23 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 24 |     # Check anchor fit to data, recompute if necessary
 25 |     prefix = colorstr('autoanchor: ')
 26 |     print(f'\n{prefix}Analyzing anchors... ', end='')
 27 |     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
 28 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 29 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 30 |     wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 31 | 
 32 |     def metric(k):  # compute metric
 33 |         r = wh[:, None] / k[None]
 34 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 35 |         best = x.max(1)[0]  # best_x
 36 |         aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
 37 |         bpr = (best > 1. / thr).float().mean()  # best possible recall
 38 |         return bpr, aat
 39 | 
 40 |     bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
 41 |     print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
 42 |     if bpr < 0.98:  # threshold to recompute
 43 |         print('. Attempting to improve anchors, please wait...')
 44 |         na = m.anchor_grid.numel() // 2  # number of anchors
 45 |         new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
 46 |         new_bpr = metric(new_anchors.reshape(-1, 2))[0]
 47 |         if new_bpr > bpr:  # replace anchors
 48 |             new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
 49 |             m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid)  # for inference
 50 |             m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
 51 |             check_anchor_order(m)
 52 |             print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
 53 |         else:
 54 |             print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
 55 |     print('')  # newline
 56 | 
 57 | 
 58 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
 59 |     """ Creates kmeans-evolved anchors from training dataset
 60 | 
 61 |         Arguments:
 62 |             path: path to dataset *.yaml, or a loaded dataset
 63 |             n: number of anchors
 64 |             img_size: image size used for training
 65 |             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 66 |             gen: generations to evolve anchors using genetic algorithm
 67 |             verbose: print all results
 68 | 
 69 |         Return:
 70 |             k: kmeans evolved anchors
 71 | 
 72 |         Usage:
 73 |             from utils.autoanchor import *; _ = kmean_anchors()
 74 |     """
 75 |     thr = 1. / thr
 76 |     prefix = colorstr('autoanchor: ')
 77 | 
 78 |     def metric(k, wh):  # compute metrics
 79 |         r = wh[:, None] / k[None]
 80 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 81 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
 82 |         return x, x.max(1)[0]  # x, best_x
 83 | 
 84 |     def anchor_fitness(k):  # mutation fitness
 85 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
 86 |         return (best * (best > thr).float()).mean()  # fitness
 87 | 
 88 |     def print_results(k):
 89 |         k = k[np.argsort(k.prod(1))]  # sort small to large
 90 |         x, best = metric(k, wh0)
 91 |         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
 92 |         print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
 93 |         print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
 94 |               f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
 95 |         for i, x in enumerate(k):
 96 |             print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
 97 |         return k
 98 | 
 99 |     if isinstance(path, str):  # *.yaml file
100 |         with open(path) as f:
101 |             data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
102 |         from utils.datasets import LoadImagesAndLabels
103 |         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
104 |     else:
105 |         dataset = path  # dataset
106 | 
107 |     # Get label wh
108 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
109 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
110 | 
111 |     # Filter
112 |     i = (wh0 < 3.0).any(1).sum()
113 |     if i:
114 |         print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
115 |     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
116 |     # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
117 | 
118 |     # Kmeans calculation
119 |     print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
120 |     s = wh.std(0)  # sigmas for whitening
121 |     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
122 |     k *= s
123 |     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
124 |     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
125 |     k = print_results(k)
126 | 
127 |     # Plot
128 |     # k, d = [None] * 20, [None] * 20
129 |     # for i in tqdm(range(1, 21)):
130 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
131 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
132 |     # ax = ax.ravel()
133 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
134 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
135 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
136 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
137 |     # fig.savefig('wh.png', dpi=200)
138 | 
139 |     # Evolve
140 |     npr = np.random
141 |     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
142 |     pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:')  # progress bar
143 |     for _ in pbar:
144 |         v = np.ones(sh)
145 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
146 |             v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
147 |         kg = (k.copy() * v).clip(min=2.0)
148 |         fg = anchor_fitness(kg)
149 |         if fg > f:
150 |             f, k = fg, kg.copy()
151 |             pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
152 |             if verbose:
153 |                 print_results(k)
154 | 
155 |     return print_results(k)
156 | 


--------------------------------------------------------------------------------
/utils/convert_tflite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | from typing import List, Optional
  4 | import os
  5 | import time
  6 | import glob
  7 | import cv2
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | from tensorflow.python.framework.convert_to_constants import (
 11 |     convert_variables_to_constants_v2
 12 | )
 13 | 
 14 | NUM_TRAINING_IMAGES = 100
 15 | 
 16 | 
 17 | def save_frozen_graph(
 18 |     path_pb: str,
 19 |     model_keras: tf.keras.Model,
 20 |     input_keras: Optional[tf.keras.Input] = None
 21 | ) -> None:
 22 |     if os.path.isfile(path_pb):
 23 |         return
 24 |     if input_keras is None:
 25 |         input_shape = model_keras.inputs[0].shape
 26 |         input_dtype = model_keras.inputs[0].dtype
 27 |     else:
 28 |         input_shape = input_keras.shape
 29 |         input_dtype = input_keras.dtype
 30 |     full_model = tf.function(lambda x: model_keras(x))
 31 |     full_model = full_model.get_concrete_function(
 32 |         tf.TensorSpec(input_shape, input_dtype)
 33 |     )
 34 |     frozen_func = convert_variables_to_constants_v2(full_model)
 35 |     frozen_func.graph.as_graph_def()
 36 |     # check inputs and outputs of frozen graph
 37 |     # print(frozen_func.inputs)
 38 |     # print(frozen_func.outputs)
 39 |     tf.io.write_graph(
 40 |         graph_or_graph_def=frozen_func.graph,
 41 |         logdir=os.path.dirname(path_pb),
 42 |         name=os.path.basename(path_pb),
 43 |         as_text=False
 44 |     )
 45 |     return
 46 | 
 47 | 
 48 | def load_frozen_graph(
 49 |     path_pb: str,
 50 |     inputs: List[str],
 51 |     outputs: List[str]
 52 | ) -> tf.function:
 53 |     with tf.io.gfile.GFile(path_pb, "rb") as rf:
 54 |         graph_def = tf.compat.v1.GraphDef()
 55 |         graph_def.ParseFromString(rf.read())
 56 | 
 57 |     def wrap_frozen_graph(graph_def, inputs, outputs):
 58 |         def _imports_graph_def():
 59 |             tf.compat.v1.import_graph_def(graph_def, name="")
 60 | 
 61 |         wrapped_import = tf.compat.v1.wrap_function(
 62 |             _imports_graph_def, []
 63 |         )
 64 |         import_graph = wrapped_import.graph
 65 |         return wrapped_import.prune(
 66 |             tf.nest.map_structure(import_graph.as_graph_element, inputs),
 67 |             tf.nest.map_structure(import_graph.as_graph_element, outputs)
 68 |         )
 69 | 
 70 |     # the name of inputs and outputs can be known with printing
 71 |     # frozen_func.inputs/outputs when `save_frozen_graph()`
 72 |     frozen_func = wrap_frozen_graph(
 73 |         graph_def=graph_def, inputs=inputs, outputs=outputs
 74 |     )
 75 |     return frozen_func
 76 | 
 77 | 
 78 | def convert_tflite_fp32(
 79 |     path_tflite: str,
 80 |     model_keras: tf.keras.Model
 81 | ) -> None:
 82 |     if os.path.isfile(path_tflite):
 83 |         return
 84 |     converter = tf.lite.TFLiteConverter.from_keras_model(model_keras)
 85 |     converter.allow_custom_ops = False
 86 |     converter.experimental_new_converter = True
 87 |     model_tflite = converter.convert()
 88 |     open(path_tflite, "wb").write(model_tflite)
 89 |     return
 90 | 
 91 | 
 92 | def convert_tflite_fp16(
 93 |     path_tflite: str,
 94 |     model_keras: tf.keras.Model
 95 | ) -> None:
 96 |     if os.path.isfile(path_tflite):
 97 |         return
 98 |     converter = tf.lite.TFLiteConverter.from_keras_model(model_keras)
 99 |     converter.optimizations = [
100 |         tf.lite.Optimize.DEFAULT
101 |     ]
102 |     converter.target_spec.supported_types = [
103 |         tf.float16
104 |     ]
105 |     converter.allow_custom_ops = False
106 |     converter.experimental_new_converter = True
107 |     model_tflite = converter.convert()
108 |     open(path_tflite, "wb").write(model_tflite)
109 |     return
110 | 
111 | 
112 | def convert_tflite_int8(
113 |     path_tflite: str,
114 |     imgsize: List[int],
115 |     model_keras: tf.keras.Model
116 | ) -> None:
117 |     if not os.path.isdir('datasets/val2017'):
118 |         raise SystemError(
119 |             'you need COCO 2017 val dataset for post-training'
120 |         )
121 |     if os.path.isfile(path_tflite):
122 |         return
123 | 
124 |     def representative_dataset_gen():
125 |         images = glob.glob('datasets/val2017/*.jpg')
126 |         np.random.shuffle(images)
127 |         for i, ipath in enumerate(images[:NUM_TRAINING_IMAGES]):
128 |             img = cv2.imread(ipath)
129 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
130 |             ih = img.shape[0]
131 |             iw = img.shape[1]
132 |             scale = min(imgsize[0] / ih, imgsize[1] / iw)
133 |             nh = int(ih * scale)
134 |             nw = int(iw * scale)
135 |             oh = (imgsize[0] - nh) // 2
136 |             ow = (imgsize[1] - nw) // 2
137 |             if scale >= 1:
138 |                 interpolation = cv2.INTER_CUBIC
139 |             else:
140 |                 interpolation = cv2.INTER_AREA
141 |             nimg = cv2.resize(
142 |                 img.copy(), (nw, nh),
143 |                 interpolation=interpolation
144 |             )
145 |             rimg = np.full((*imgsize, 3), 128, dtype=np.uint8)
146 |             rimg[oh:oh + nh, ow:ow + nw, :] = nimg
147 |             rimg = rimg[np.newaxis, ...].astype(np.float32)
148 |             rimg /= 255.0
149 |             yield [rimg]
150 |             if i % 10 == 9:
151 |                 print(f'post-training... ({i}/{NUM_TRAINING_IMAGES})')
152 |         return
153 | 
154 |     converter = tf.lite.TFLiteConverter.from_keras_model(model_keras)
155 |     converter.optimizations = [
156 |         tf.lite.Optimize.DEFAULT
157 |     ]
158 |     converter.representative_dataset = representative_dataset_gen
159 |     converter.target_spec.supported_ops = [
160 |         tf.lite.OpsSet.TFLITE_BUILTINS_INT8,
161 |         tf.lite.OpsSet.SELECT_TF_OPS
162 |     ]
163 |     converter.inference_input_type = tf.uint8
164 |     converter.inference_output_type = tf.uint8
165 |     converter.allow_custom_ops = True
166 |     converter.experimental_new_converter = True
167 |     model_tflite = converter.convert()
168 |     open(path_tflite, "wb").write(model_tflite)
169 |     return
170 | 
171 | 
172 | def _print_detail(details: List) -> None:
173 |     for i, detail in enumerate(details):
174 |         print("{}: index={} shape={} dtype={}".format(
175 |             i, detail['index'], detail['shape'], detail['dtype']
176 |         ))
177 |     return
178 | 
179 | 
180 | def test_tflite(path_tflite: str, mode: str) -> None:
181 |     assert mode in ['fp32', 'fp16', 'int8']
182 |     if not os.path.isfile(path_tflite):
183 |         print(f'ERROR: {path_tflite} not found')
184 |         return
185 |     print(f'MODEL: {path_tflite}')
186 |     interpreter = tf.lite.Interpreter(path_tflite)
187 |     interpreter.allocate_tensors()
188 |     input_details = interpreter.get_input_details()
189 |     print('input details')
190 |     _print_detail(input_details)
191 |     output_details = interpreter.get_output_details()
192 |     print('output details')
193 |     _print_detail(output_details)
194 |     input_shape = input_details[0]['shape']
195 |     input_data = np.array(
196 |         np.random.randint(0, 256, input_shape)
197 |     )
198 |     if mode == 'int8':
199 |         input_data = input_data.astype(np.uint8)
200 |     else:
201 |         input_data = (input_data / 255.0).astype(np.float32)
202 |     interpreter.set_tensor(input_details[0]['index'], input_data)
203 |     start_time = time.perf_counter()
204 |     interpreter.invoke()
205 |     end_time = time.perf_counter()
206 |     elapsed = round((end_time - start_time) * 1000, 3)
207 |     print(f'elapsed time taken for inference: {elapsed}[ms]')
208 |     output_data = [
209 |         interpreter.get_tensor(
210 |             output_details[i]['index']
211 |         ) for i in range(len(output_details))
212 |     ]
213 |     for i, out in enumerate(output_data):
214 |         out_shape = output_details[i]['shape']
215 |         assert len(out.shape) == len(out_shape)
216 |         for j, v in enumerate(out.shape):
217 |             assert v == out_shape[j]
218 |     return
219 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
  1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries
  2 | 
  3 | import os
  4 | import platform
  5 | import subprocess
  6 | import time
  7 | from pathlib import Path
  8 | 
  9 | import requests
 10 | import torch
 11 | 
 12 | 
 13 | def gsutil_getsize(url=''):
 14 |     # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
 15 |     s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
 16 |     return eval(s.split(' ')[0]) if len(s) else 0  # bytes
 17 | 
 18 | 
 19 | def attempt_download(file, repo='ultralytics/yolov5'):
 20 |     # Attempt file download if does not exist
 21 |     file = Path(str(file).strip().replace("'", '').lower())
 22 | 
 23 |     if not file.exists():
 24 |         try:
 25 |             response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json()  # github api
 26 |             assets = [x['name'] for x in response['assets']]  # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
 27 |             tag = response['tag_name']  # i.e. 'v1.0'
 28 |         except:  # fallback plan
 29 |             assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']
 30 |             tag = subprocess.check_output('git tag', shell=True).decode().split()[-1]
 31 | 
 32 |         name = file.name
 33 |         if name in assets:
 34 |             msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/'
 35 |             redundant = False  # second download option
 36 |             try:  # GitHub
 37 |                 url = f'https://github.com/{repo}/releases/download/{tag}/{name}'
 38 |                 print(f'Downloading {url} to {file}...')
 39 |                 torch.hub.download_url_to_file(url, file)
 40 |                 assert file.exists() and file.stat().st_size > 1E6  # check
 41 |             except Exception as e:  # GCP
 42 |                 print(f'Download error: {e}')
 43 |                 assert redundant, 'No secondary mirror'
 44 |                 url = f'https://storage.googleapis.com/{repo}/ckpt/{name}'
 45 |                 print(f'Downloading {url} to {file}...')
 46 |                 os.system(f'curl -L {url} -o {file}')  # torch.hub.download_url_to_file(url, weights)
 47 |             finally:
 48 |                 if not file.exists() or file.stat().st_size < 1E6:  # check
 49 |                     file.unlink(missing_ok=True)  # remove partial downloads
 50 |                     print(f'ERROR: Download failure: {msg}')
 51 |                 print('')
 52 |                 return
 53 | 
 54 | 
 55 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
 56 |     # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download()
 57 |     t = time.time()
 58 |     file = Path(file)
 59 |     cookie = Path('cookie')  # gdrive cookie
 60 |     print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
 61 |     file.unlink(missing_ok=True)  # remove existing file
 62 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 63 | 
 64 |     # Attempt file download
 65 |     out = "NUL" if platform.system() == "Windows" else "/dev/null"
 66 |     os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
 67 |     if os.path.exists('cookie'):  # large file
 68 |         s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
 69 |     else:  # small file
 70 |         s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
 71 |     r = os.system(s)  # execute, capture return
 72 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 73 | 
 74 |     # Error check
 75 |     if r != 0:
 76 |         file.unlink(missing_ok=True)  # remove partial
 77 |         print('Download error ')  # raise Exception('Download error')
 78 |         return r
 79 | 
 80 |     # Unzip if archive
 81 |     if file.suffix == '.zip':
 82 |         print('unzipping... ', end='')
 83 |         os.system(f'unzip -q {file}')  # unzip
 84 |         file.unlink()  # remove zip to free space
 85 | 
 86 |     print(f'Done ({time.time() - t:.1f}s)')
 87 |     return r
 88 | 
 89 | 
 90 | def get_token(cookie="./cookie"):
 91 |     with open(cookie) as f:
 92 |         for line in f:
 93 |             if "download" in line:
 94 |                 return line.split()[-1]
 95 |     return ""
 96 | 
 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
 98 | #     # Uploads a file to a bucket
 99 | #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
100 | #
101 | #     storage_client = storage.Client()
102 | #     bucket = storage_client.get_bucket(bucket_name)
103 | #     blob = bucket.blob(destination_blob_name)
104 | #
105 | #     blob.upload_from_filename(source_file_name)
106 | #
107 | #     print('File {} uploaded to {}.'.format(
108 | #         source_file_name,
109 | #         destination_blob_name))
110 | #
111 | #
112 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
113 | #     # Uploads a blob from a bucket
114 | #     storage_client = storage.Client()
115 | #     bucket = storage_client.get_bucket(bucket_name)
116 | #     blob = bucket.blob(source_blob_name)
117 | #
118 | #     blob.download_to_filename(destination_file_name)
119 | #
120 | #     print('Blob {} downloaded to {}.'.format(
121 | #         source_blob_name,
122 | #         destination_file_name))
123 | 


--------------------------------------------------------------------------------
/utils/image_preproc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import numpy as np
 4 | import cv2
 5 | from cv2 import dnn_superres_DnnSuperResImpl
 6 | 
 7 | 
 8 | def adjust_white_balance(image: np.ndarray) -> np.ndarray:
 9 |     # white balance adjustment for strong neutral white
10 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
11 |     avg_a = np.average(image[:, :, 1])
12 |     avg_b = np.average(image[:, :, 2])
13 |     image[:, :, 1] = image[:, :, 1] - (
14 |         (avg_a - 128) * (image[:, :, 0] / 255.0) * 1.1
15 |     )
16 |     image[:, :, 2] = image[:, :, 2] - (
17 |         (avg_b - 128) * (image[:, :, 0] / 255.0) * 1.1
18 |     )
19 |     image = cv2.cvtColor(image, cv2.COLOR_LAB2BGR)
20 |     return image
21 | 
22 | 
23 | def smooth_image(image: np.ndarray) -> np.ndarray:
24 |     # image smoothing for noise removal
25 |     return cv2.GaussianBlur(image, (5, 5), 0)
26 | 
27 | 
28 | def correct_contrast(image: np.ndarray) -> np.ndarray:
29 |     # contrast correction that brightens dark areas
30 |     b = 10  # HEURISTIC !!
31 |     gamma = 1 / np.sqrt(image.mean()) * b
32 |     g_table = np.array([
33 |         ((i / 255.0) ** (1 / gamma)) * 255
34 |         for i in np.arange(0, 256)
35 |     ]).astype("uint8")
36 |     return cv2.LUT(image, g_table)
37 | 
38 | 
39 | def levelize_histogram(image: np.ndarray) -> np.ndarray:
40 |     # make color distributions even
41 |     for i in range(3):
42 |         image[:, :, i] = cv2.equalizeHist(image[:, :, i])
43 |     return image
44 | 
45 | 
46 | def correct_contrast_using_lut(image: np.ndarray) -> np.ndarray:
47 |     # contrast correction using look-up-table
48 |     a = 10
49 |     c_table = np.array([
50 |         255.0 / (1 + np.exp(-a * (i - 128) / 255))
51 |         for i in np.arange(0, 256)
52 |     ]).astype("uint8")
53 |     return cv2.LUT(image, c_table)
54 | 
55 | 
56 | def upsample_image(
57 |     image: np.ndarray,
58 |     sr: dnn_superres_DnnSuperResImpl
59 | ) -> np.ndarray:
60 |     # increase resolution with super-resolution to make the image clearer
61 |     # and then shrink the image
62 |     prev_height = image.shape[0]
63 |     prev_width = image.shape[1]
64 |     # sr.upsample() (super-resolution) is too slow when the image is big
65 |     # so, do super-resolution after shrinking image
66 |     image = cv2.resize(
67 |         image, (prev_width // 2, prev_height // 2),
68 |         interpolation=cv2.INTER_AREA
69 |     )
70 |     image = sr.upsample(image)
71 |     # restore image size
72 |     image = cv2.resize(
73 |         image, (prev_width, prev_height),
74 |         interpolation=cv2.INTER_AREA
75 |     )
76 |     return image
77 | 


--------------------------------------------------------------------------------
/utils/loss.py:
--------------------------------------------------------------------------------
  1 | # Loss functions
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from utils.general import bbox_iou
  7 | from utils.torch_utils import is_parallel
  8 | 
  9 | 
 10 | def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
 11 |     # return positive, negative label smoothing BCE targets
 12 |     return 1.0 - 0.5 * eps, 0.5 * eps
 13 | 
 14 | 
 15 | class BCEBlurWithLogitsLoss(nn.Module):
 16 |     # BCEwithLogitLoss() with reduced missing label effects.
 17 |     def __init__(self, alpha=0.05):
 18 |         super(BCEBlurWithLogitsLoss, self).__init__()
 19 |         self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none')  # must be nn.BCEWithLogitsLoss()
 20 |         self.alpha = alpha
 21 | 
 22 |     def forward(self, pred, true):
 23 |         loss = self.loss_fcn(pred, true)
 24 |         pred = torch.sigmoid(pred)  # prob from logits
 25 |         dx = pred - true  # reduce only missing label effects
 26 |         # dx = (pred - true).abs()  # reduce missing label and false label effects
 27 |         alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
 28 |         loss *= alpha_factor
 29 |         return loss.mean()
 30 | 
 31 | 
 32 | class FocalLoss(nn.Module):
 33 |     # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
 34 |     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
 35 |         super(FocalLoss, self).__init__()
 36 |         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
 37 |         self.gamma = gamma
 38 |         self.alpha = alpha
 39 |         self.reduction = loss_fcn.reduction
 40 |         self.loss_fcn.reduction = 'none'  # required to apply FL to each element
 41 | 
 42 |     def forward(self, pred, true):
 43 |         loss = self.loss_fcn(pred, true)
 44 |         # p_t = torch.exp(-loss)
 45 |         # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability
 46 | 
 47 |         # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
 48 |         pred_prob = torch.sigmoid(pred)  # prob from logits
 49 |         p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
 50 |         alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
 51 |         modulating_factor = (1.0 - p_t) ** self.gamma
 52 |         loss *= alpha_factor * modulating_factor
 53 | 
 54 |         if self.reduction == 'mean':
 55 |             return loss.mean()
 56 |         elif self.reduction == 'sum':
 57 |             return loss.sum()
 58 |         else:  # 'none'
 59 |             return loss
 60 | 
 61 | 
 62 | class QFocalLoss(nn.Module):
 63 |     # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
 64 |     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
 65 |         super(QFocalLoss, self).__init__()
 66 |         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
 67 |         self.gamma = gamma
 68 |         self.alpha = alpha
 69 |         self.reduction = loss_fcn.reduction
 70 |         self.loss_fcn.reduction = 'none'  # required to apply FL to each element
 71 | 
 72 |     def forward(self, pred, true):
 73 |         loss = self.loss_fcn(pred, true)
 74 | 
 75 |         pred_prob = torch.sigmoid(pred)  # prob from logits
 76 |         alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
 77 |         modulating_factor = torch.abs(true - pred_prob) ** self.gamma
 78 |         loss *= alpha_factor * modulating_factor
 79 | 
 80 |         if self.reduction == 'mean':
 81 |             return loss.mean()
 82 |         elif self.reduction == 'sum':
 83 |             return loss.sum()
 84 |         else:  # 'none'
 85 |             return loss
 86 | 
 87 | 
 88 | class ComputeLoss:
 89 |     # Compute losses
 90 |     def __init__(self, model, autobalance=False):
 91 |         super(ComputeLoss, self).__init__()
 92 |         device = next(model.parameters()).device  # get model device
 93 |         h = model.hyp  # hyperparameters
 94 | 
 95 |         # Define criteria
 96 |         BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
 97 |         BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
 98 | 
 99 |         # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
100 |         self.cp, self.cn = smooth_BCE(eps=0.0)
101 | 
102 |         # Focal loss
103 |         g = h['fl_gamma']  # focal loss gamma
104 |         if g > 0:
105 |             BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
106 | 
107 |         det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
108 |         self.balance = {3: [4.0, 1.0, 0.4], 4: [4.0, 1.0, 0.25, 0.06], 5: [4.0, 1.0, 0.25, 0.06, .02]}[det.nl]
109 |         self.ssi = (det.stride == 16).nonzero(as_tuple=False).item()  # stride 16 index
110 |         self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
111 |         for k in 'na', 'nc', 'nl', 'anchors':
112 |             setattr(self, k, getattr(det, k))
113 | 
114 |     def __call__(self, p, targets):  # predictions, targets, model
115 |         device = targets.device
116 |         lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
117 |         tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
118 | 
119 |         # Losses
120 |         for i, pi in enumerate(p):  # layer index, layer predictions
121 |             b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
122 |             tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
123 | 
124 |             n = b.shape[0]  # number of targets
125 |             if n:
126 |                 ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
127 | 
128 |                 # Regression
129 |                 pxy = ps[:, :2].sigmoid() * 2. - 0.5
130 |                 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
131 |                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
132 |                 iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
133 |                 lbox += (1.0 - iou).mean()  # iou loss
134 | 
135 |                 # Objectness
136 |                 tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
137 | 
138 |                 # Classification
139 |                 if self.nc > 1:  # cls loss (only if multiple classes)
140 |                     t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
141 |                     t[range(n), tcls[i]] = self.cp
142 |                     lcls += self.BCEcls(ps[:, 5:], t)  # BCE
143 | 
144 |                 # Append targets to text file
145 |                 # with open('targets.txt', 'a') as file:
146 |                 #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
147 | 
148 |             obji = self.BCEobj(pi[..., 4], tobj)
149 |             lobj += obji * self.balance[i]  # obj loss
150 |             if self.autobalance:
151 |                 self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
152 | 
153 |         if self.autobalance:
154 |             self.balance = [x / self.balance[self.ssi] for x in self.balance]
155 |         lbox *= self.hyp['box']
156 |         lobj *= self.hyp['obj']
157 |         lcls *= self.hyp['cls']
158 |         bs = tobj.shape[0]  # batch size
159 | 
160 |         loss = lbox + lobj + lcls
161 |         return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
162 | 
163 |     def build_targets(self, p, targets):
164 |         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
165 |         na, nt = self.na, targets.shape[0]  # number of anchors, targets
166 |         tcls, tbox, indices, anch = [], [], [], []
167 |         gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
168 |         ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
169 |         targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
170 | 
171 |         g = 0.5  # bias
172 |         off = torch.tensor([[0, 0],
173 |                             [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
174 |                             # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
175 |                             ], device=targets.device).float() * g  # offsets
176 | 
177 |         for i in range(self.nl):
178 |             anchors = self.anchors[i]
179 |             gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
180 | 
181 |             # Match targets to anchors
182 |             t = targets * gain
183 |             if nt:
184 |                 # Matches
185 |                 r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
186 |                 j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
187 |                 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
188 |                 t = t[j]  # filter
189 | 
190 |                 # Offsets
191 |                 gxy = t[:, 2:4]  # grid xy
192 |                 gxi = gain[[2, 3]] - gxy  # inverse
193 |                 j, k = ((gxy % 1. < g) & (gxy > 1.)).T
194 |                 l, m = ((gxi % 1. < g) & (gxi > 1.)).T
195 |                 j = torch.stack((torch.ones_like(j), j, k, l, m))
196 |                 t = t.repeat((5, 1, 1))[j]
197 |                 offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
198 |             else:
199 |                 t = targets[0]
200 |                 offsets = 0
201 | 
202 |             # Define
203 |             b, c = t[:, :2].long().T  # image, class
204 |             gxy = t[:, 2:4]  # grid xy
205 |             gwh = t[:, 4:6]  # grid wh
206 |             gij = (gxy - offsets).long()
207 |             gi, gj = gij.T  # grid xy indices
208 | 
209 |             # Append
210 |             a = t[:, 6].long()  # anchor indices
211 |             indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
212 |             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
213 |             anch.append(anchors[a])  # anchors
214 |             tcls.append(c)  # class
215 | 
216 |         return tcls, tbox, indices, anch
217 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | # Model validation metrics
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from . import general
 10 | 
 11 | 
 12 | def fitness(x):
 13 |     # Model fitness as a weighted combination of metrics
 14 |     w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
 15 |     return (x[:, :4] * w).sum(1)
 16 | 
 17 | 
 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
 19 |     """ Compute the average precision, given the recall and precision curves.
 20 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 21 |     # Arguments
 22 |         tp:  True positives (nparray, nx1 or nx10).
 23 |         conf:  Objectness value from 0-1 (nparray).
 24 |         pred_cls:  Predicted object classes (nparray).
 25 |         target_cls:  True object classes (nparray).
 26 |         plot:  Plot precision-recall curve at mAP@0.5
 27 |         save_dir:  Plot save directory
 28 |     # Returns
 29 |         The average precision as computed in py-faster-rcnn.
 30 |     """
 31 | 
 32 |     # Sort by objectness
 33 |     i = np.argsort(-conf)
 34 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 35 | 
 36 |     # Find unique classes
 37 |     unique_classes = np.unique(target_cls)
 38 |     nc = unique_classes.shape[0]  # number of classes, number of detections
 39 | 
 40 |     # Create Precision-Recall curve and compute AP for each class
 41 |     px, py = np.linspace(0, 1, 1000), []  # for plotting
 42 |     ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
 43 |     for ci, c in enumerate(unique_classes):
 44 |         i = pred_cls == c
 45 |         n_l = (target_cls == c).sum()  # number of labels
 46 |         n_p = i.sum()  # number of predictions
 47 | 
 48 |         if n_p == 0 or n_l == 0:
 49 |             continue
 50 |         else:
 51 |             # Accumulate FPs and TPs
 52 |             fpc = (1 - tp[i]).cumsum(0)
 53 |             tpc = tp[i].cumsum(0)
 54 | 
 55 |             # Recall
 56 |             recall = tpc / (n_l + 1e-16)  # recall curve
 57 |             r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
 58 | 
 59 |             # Precision
 60 |             precision = tpc / (tpc + fpc)  # precision curve
 61 |             p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
 62 | 
 63 |             # AP from recall-precision curve
 64 |             for j in range(tp.shape[1]):
 65 |                 ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
 66 |                 if plot and j == 0:
 67 |                     py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 68 | 
 69 |     # Compute F1 (harmonic mean of precision and recall)
 70 |     f1 = 2 * p * r / (p + r + 1e-16)
 71 |     if plot:
 72 |         plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
 73 |         plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
 74 |         plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
 75 |         plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
 76 | 
 77 |     i = f1.mean(0).argmax()  # max F1 index
 78 |     return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
 79 | 
 80 | 
 81 | def compute_ap(recall, precision):
 82 |     """ Compute the average precision, given the recall and precision curves
 83 |     # Arguments
 84 |         recall:    The recall curve (list)
 85 |         precision: The precision curve (list)
 86 |     # Returns
 87 |         Average precision, precision curve, recall curve
 88 |     """
 89 | 
 90 |     # Append sentinel values to beginning and end
 91 |     mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
 92 |     mpre = np.concatenate(([1.], precision, [0.]))
 93 | 
 94 |     # Compute the precision envelope
 95 |     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
 96 | 
 97 |     # Integrate area under curve
 98 |     method = 'interp'  # methods: 'continuous', 'interp'
 99 |     if method == 'interp':
100 |         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
101 |         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
102 |     else:  # 'continuous'
103 |         i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
104 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
105 | 
106 |     return ap, mpre, mrec
107 | 
108 | 
109 | class ConfusionMatrix:
110 |     # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
111 |     def __init__(self, nc, conf=0.25, iou_thres=0.45):
112 |         self.matrix = np.zeros((nc + 1, nc + 1))
113 |         self.nc = nc  # number of classes
114 |         self.conf = conf
115 |         self.iou_thres = iou_thres
116 | 
117 |     def process_batch(self, detections, labels):
118 |         """
119 |         Return intersection-over-union (Jaccard index) of boxes.
120 |         Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
121 |         Arguments:
122 |             detections (Array[N, 6]), x1, y1, x2, y2, conf, class
123 |             labels (Array[M, 5]), class, x1, y1, x2, y2
124 |         Returns:
125 |             None, updates confusion matrix accordingly
126 |         """
127 |         detections = detections[detections[:, 4] > self.conf]
128 |         gt_classes = labels[:, 0].int()
129 |         detection_classes = detections[:, 5].int()
130 |         iou = general.box_iou(labels[:, 1:], detections[:, :4])
131 | 
132 |         x = torch.where(iou > self.iou_thres)
133 |         if x[0].shape[0]:
134 |             matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
135 |             if x[0].shape[0] > 1:
136 |                 matches = matches[matches[:, 2].argsort()[::-1]]
137 |                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
138 |                 matches = matches[matches[:, 2].argsort()[::-1]]
139 |                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
140 |         else:
141 |             matches = np.zeros((0, 3))
142 | 
143 |         n = matches.shape[0] > 0
144 |         m0, m1, _ = matches.transpose().astype(np.int16)
145 |         for i, gc in enumerate(gt_classes):
146 |             j = m0 == i
147 |             if n and sum(j) == 1:
148 |                 self.matrix[gc, detection_classes[m1[j]]] += 1  # correct
149 |             else:
150 |                 self.matrix[gc, self.nc] += 1  # background FP
151 | 
152 |         if n:
153 |             for i, dc in enumerate(detection_classes):
154 |                 if not any(m1 == i):
155 |                     self.matrix[self.nc, dc] += 1  # background FN
156 | 
157 |     def matrix(self):
158 |         return self.matrix
159 | 
160 |     def plot(self, save_dir='', names=()):
161 |         try:
162 |             import seaborn as sn
163 | 
164 |             array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
165 |             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
166 | 
167 |             fig = plt.figure(figsize=(12, 9), tight_layout=True)
168 |             sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
169 |             labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
170 |             sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
171 |                        xticklabels=names + ['background FN'] if labels else "auto",
172 |                        yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
173 |             fig.axes[0].set_xlabel('True')
174 |             fig.axes[0].set_ylabel('Predicted')
175 |             fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
176 |         except Exception as e:
177 |             pass
178 | 
179 |     def print(self):
180 |         for i in range(self.nc + 1):
181 |             print(' '.join(map(str, self.matrix[i])))
182 | 
183 | 
184 | # Plots ----------------------------------------------------------------------------------------------------------------
185 | 
186 | def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
187 |     # Precision-recall curve
188 |     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
189 |     py = np.stack(py, axis=1)
190 | 
191 |     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
192 |         for i, y in enumerate(py.T):
193 |             ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}')  # plot(recall, precision)
194 |     else:
195 |         ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
196 | 
197 |     ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
198 |     ax.set_xlabel('Recall')
199 |     ax.set_ylabel('Precision')
200 |     ax.set_xlim(0, 1)
201 |     ax.set_ylim(0, 1)
202 |     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
203 |     fig.savefig(Path(save_dir), dpi=250)
204 | 
205 | 
206 | def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
207 |     # Metric-confidence curve
208 |     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
209 | 
210 |     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
211 |         for i, y in enumerate(py):
212 |             ax.plot(px, y, linewidth=1, label=f'{names[i]}')  # plot(confidence, metric)
213 |     else:
214 |         ax.plot(px, py.T, linewidth=1, color='grey')  # plot(confidence, metric)
215 | 
216 |     y = py.mean(0)
217 |     ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
218 |     ax.set_xlabel(xlabel)
219 |     ax.set_ylabel(ylabel)
220 |     ax.set_xlim(0, 1)
221 |     ax.set_ylim(0, 1)
222 |     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
223 |     fig.savefig(Path(save_dir), dpi=250)
224 | 


--------------------------------------------------------------------------------
/utils/nms.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import numpy as np
 4 | 
 5 | 
 6 | # Intersection of Union
 7 | # bboxesX[:4] is numpy array of xyxy (xmin, ymin, xmax, ymax)
 8 | # bboxes1: the bounding box which has the highest confidence score
 9 | # bboxes2: the bounding boxes of same category expect above
10 | def bboxes_iou(
11 |     bboxes1: np.ndarray,
12 |     bboxes2: np.ndarray
13 | ) -> np.ndarray:
14 |     bboxes1_area = (
15 |         bboxes1[:, 2] - bboxes1[:, 0]
16 |     ) * (
17 |         bboxes1[:, 3] - bboxes1[:, 1]
18 |     )
19 |     bboxes2_area = (
20 |         bboxes2[:, 2] - bboxes2[:, 0]
21 |     ) * (
22 |         bboxes2[:, 3] - bboxes2[:, 1]
23 |     )
24 |     left_ups = np.maximum(bboxes1[:, :2], bboxes2[:, :2])
25 |     right_downs = np.minimum(bboxes1[:, 2:4], bboxes2[:, 2:4])
26 |     intersections = np.maximum(right_downs - left_ups, 0.0)
27 |     inter_areas = intersections[:, 0] * intersections[:, 1]
28 |     union_areas = bboxes1_area + bboxes2_area - inter_areas
29 |     ious = np.maximum(
30 |         1.0 * inter_areas / union_areas,
31 |         np.finfo(np.float32).eps
32 |     )
33 |     return ious
34 | 
35 | 
36 | # filter bounding boxes using (soft) Non-Maximum Suppression
37 | # paper of soft NMS: https://arxiv.org/abs/1704.04503
38 | # bboxes is numpy array of
39 | # offset 0-3: xyxy (xmin, ymin, xmax, ymax)
40 | # offset 4: category id (int)
41 | # offset 5: confidence score
42 | def filter_bboxes(
43 |     bboxes: np.ndarray,
44 |     conf_threshold: float = 0.3,
45 |     iou_threshold: float = 0.45,
46 |     disable_soft_nms: bool = False
47 | ) -> np.ndarray:
48 |     if bboxes.shape[0] == 0:
49 |         return bboxes
50 |     # filter by confidence threshold
51 |     bboxes = bboxes[bboxes[:, 5] > conf_threshold]
52 |     if bboxes.shape[0] == 0:
53 |         return bboxes
54 |     # confidence for soft NMS
55 |     bboxes = np.insert(bboxes, 6, bboxes[:, 5], axis=1)
56 |     # (soft) NMS for each class
57 |     unique_category_ids = list(set(bboxes[:, 4]))
58 |     best_bboxes = list()
59 |     for cat in unique_category_ids:
60 |         cat_bboxes = bboxes[bboxes[:, 4] == cat]
61 |         while cat_bboxes.shape[0] > 0:
62 |             if cat_bboxes.shape[0] == 1:
63 |                 best_bboxes.append(cat_bboxes)
64 |                 break
65 |             max_conf = np.argmax(cat_bboxes[:, 6])
66 |             best_bbox = cat_bboxes[max_conf:max_conf + 1]
67 |             best_bboxes.append(best_bbox)
68 |             cat_bboxes = np.delete(cat_bboxes, max_conf, axis=0)
69 |             ious = bboxes_iou(
70 |                 bboxes1=best_bbox,
71 |                 bboxes2=cat_bboxes
72 |             )
73 |             if disable_soft_nms:
74 |                 cat_bboxes = cat_bboxes[ious < iou_threshold]
75 |             else:
76 |                 iou_mask = (ious >= iou_threshold).astype(np.float)
77 |                 cat_bboxes[:, 6] = cat_bboxes[:, 6] * (
78 |                     1.0 - (ious * iou_mask)
79 |                 )
80 |                 cat_bboxes = cat_bboxes[cat_bboxes[:, 6] > conf_threshold]
81 |     return np.concatenate(best_bboxes, axis=0)[:, :6]
82 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | # PyTorch utils
  2 | 
  3 | import logging
  4 | import math
  5 | import os
  6 | import subprocess
  7 | import time
  8 | from contextlib import contextmanager
  9 | from copy import deepcopy
 10 | from pathlib import Path
 11 | 
 12 | import torch
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | import torchvision
 17 | 
 18 | try:
 19 |     import thop  # for FLOPS computation
 20 | except ImportError:
 21 |     thop = None
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | @contextmanager
 26 | def torch_distributed_zero_first(local_rank: int):
 27 |     """
 28 |     Decorator to make all processes in distributed training wait for each local_master to do something.
 29 |     """
 30 |     if local_rank not in [-1, 0]:
 31 |         torch.distributed.barrier()
 32 |     yield
 33 |     if local_rank == 0:
 34 |         torch.distributed.barrier()
 35 | 
 36 | 
 37 | def init_torch_seeds(seed=0):
 38 |     # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
 39 |     torch.manual_seed(seed)
 40 |     if seed == 0:  # slower, more reproducible
 41 |         cudnn.benchmark, cudnn.deterministic = False, True
 42 |     else:  # faster, less reproducible
 43 |         cudnn.benchmark, cudnn.deterministic = True, False
 44 | 
 45 | 
 46 | def git_describe():
 47 |     # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
 48 |     if Path('.git').exists():
 49 |         return subprocess.check_output('git describe --tags --long --always', shell=True).decode('utf-8')[:-1]
 50 |     else:
 51 |         return ''
 52 | 
 53 | 
 54 | def select_device(device='', batch_size=None):
 55 |     # device = 'cpu' or '0' or '0,1,2,3'
 56 |     s = f'YOLOv5 {git_describe()} torch {torch.__version__} '  # string
 57 |     cpu = device.lower() == 'cpu'
 58 |     if cpu:
 59 |         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
 60 |     elif device:  # non-cpu device requested
 61 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 62 |         assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availability
 63 | 
 64 |     cuda = not cpu and torch.cuda.is_available()
 65 |     if cuda:
 66 |         n = torch.cuda.device_count()
 67 |         if n > 1 and batch_size:  # check that batch_size is compatible with device_count
 68 |             assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
 69 |         space = ' ' * len(s)
 70 |         for i, d in enumerate(device.split(',') if device else range(n)):
 71 |             p = torch.cuda.get_device_properties(i)
 72 |             s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n"  # bytes to MB
 73 |     else:
 74 |         s += 'CPU\n'
 75 | 
 76 |     logger.info(s)  # skip a line
 77 |     return torch.device('cuda:0' if cuda else 'cpu')
 78 | 
 79 | 
 80 | def time_synchronized():
 81 |     # pytorch-accurate time
 82 |     if torch.cuda.is_available():
 83 |         torch.cuda.synchronize()
 84 |     return time.time()
 85 | 
 86 | 
 87 | def profile(x, ops, n=100, device=None):
 88 |     # profile a pytorch module or list of modules. Example usage:
 89 |     #     x = torch.randn(16, 3, 640, 640)  # input
 90 |     #     m1 = lambda x: x * torch.sigmoid(x)
 91 |     #     m2 = nn.SiLU()
 92 |     #     profile(x, [m1, m2], n=100)  # profile speed over 100 iterations
 93 | 
 94 |     device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 95 |     x = x.to(device)
 96 |     x.requires_grad = True
 97 |     print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
 98 |     print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
 99 |     for m in ops if isinstance(ops, list) else [ops]:
100 |         m = m.to(device) if hasattr(m, 'to') else m  # device
101 |         m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m  # type
102 |         dtf, dtb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
103 |         try:
104 |             flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPS
105 |         except:
106 |             flops = 0
107 | 
108 |         for _ in range(n):
109 |             t[0] = time_synchronized()
110 |             y = m(x)
111 |             t[1] = time_synchronized()
112 |             try:
113 |                 _ = y.sum().backward()
114 |                 t[2] = time_synchronized()
115 |             except:  # no backward method
116 |                 t[2] = float('nan')
117 |             dtf += (t[1] - t[0]) * 1000 / n  # ms per op forward
118 |             dtb += (t[2] - t[1]) * 1000 / n  # ms per op backward
119 | 
120 |         s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
121 |         s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
122 |         p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
123 |         print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')
124 | 
125 | 
126 | def is_parallel(model):
127 |     return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
128 | 
129 | 
130 | def intersect_dicts(da, db, exclude=()):
131 |     # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
132 |     return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
133 | 
134 | 
135 | def initialize_weights(model):
136 |     for m in model.modules():
137 |         t = type(m)
138 |         if t is nn.Conv2d:
139 |             pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
140 |         elif t is nn.BatchNorm2d:
141 |             m.eps = 1e-3
142 |             m.momentum = 0.03
143 |         elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
144 |             m.inplace = True
145 | 
146 | 
147 | def find_modules(model, mclass=nn.Conv2d):
148 |     # Finds layer indices matching module class 'mclass'
149 |     return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
150 | 
151 | 
152 | def sparsity(model):
153 |     # Return global model sparsity
154 |     a, b = 0., 0.
155 |     for p in model.parameters():
156 |         a += p.numel()
157 |         b += (p == 0).sum()
158 |     return b / a
159 | 
160 | 
161 | def prune(model, amount=0.3):
162 |     # Prune model to requested global sparsity
163 |     import torch.nn.utils.prune as prune
164 |     print('Pruning model... ', end='')
165 |     for name, m in model.named_modules():
166 |         if isinstance(m, nn.Conv2d):
167 |             prune.l1_unstructured(m, name='weight', amount=amount)  # prune
168 |             prune.remove(m, 'weight')  # make permanent
169 |     print(' %.3g global sparsity' % sparsity(model))
170 | 
171 | 
172 | def fuse_conv_and_bn(conv, bn):
173 |     # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
174 |     fusedconv = nn.Conv2d(conv.in_channels,
175 |                           conv.out_channels,
176 |                           kernel_size=conv.kernel_size,
177 |                           stride=conv.stride,
178 |                           padding=conv.padding,
179 |                           groups=conv.groups,
180 |                           bias=True).requires_grad_(False).to(conv.weight.device)
181 | 
182 |     # prepare filters
183 |     w_conv = conv.weight.clone().view(conv.out_channels, -1)
184 |     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
185 |     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
186 | 
187 |     # prepare spatial bias
188 |     b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
189 |     b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
190 |     fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
191 | 
192 |     return fusedconv
193 | 
194 | 
195 | def model_info(model, verbose=False, img_size=640):
196 |     # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
197 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
198 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
199 |     if verbose:
200 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
201 |         for i, (name, p) in enumerate(model.named_parameters()):
202 |             name = name.replace('module_list.', '')
203 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
204 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
205 | 
206 |     try:  # FLOPS
207 |         from thop import profile
208 |         stride = int(model.stride.max()) if hasattr(model, 'stride') else 32
209 |         img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device)  # input
210 |         flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2  # stride GFLOPS
211 |         img_size = img_size if isinstance(img_size, list) else [img_size, img_size]  # expand if int/float
212 |         fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride)  # 640x640 GFLOPS
213 |     except (ImportError, Exception):
214 |         fs = ''
215 | 
216 |     logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
217 | 
218 | 
219 | def load_classifier(name='resnet101', n=2):
220 |     # Loads a pretrained model reshaped to n-class output
221 |     model = torchvision.models.__dict__[name](pretrained=True)
222 | 
223 |     # ResNet model properties
224 |     # input_size = [3, 224, 224]
225 |     # input_space = 'RGB'
226 |     # input_range = [0, 1]
227 |     # mean = [0.485, 0.456, 0.406]
228 |     # std = [0.229, 0.224, 0.225]
229 | 
230 |     # Reshape output to n classes
231 |     filters = model.fc.weight.shape[1]
232 |     model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
233 |     model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
234 |     model.fc.out_features = n
235 |     return model
236 | 
237 | 
238 | def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
239 |     # scales img(bs,3,y,x) by ratio constrained to gs-multiple
240 |     if ratio == 1.0:
241 |         return img
242 |     else:
243 |         h, w = img.shape[2:]
244 |         s = (int(h * ratio), int(w * ratio))  # new size
245 |         img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
246 |         if not same_shape:  # pad/crop img
247 |             h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
248 |         return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
249 | 
250 | 
251 | def copy_attr(a, b, include=(), exclude=()):
252 |     # Copy attributes from b to a, options to only include [...] and to exclude [...]
253 |     for k, v in b.__dict__.items():
254 |         if (len(include) and k not in include) or k.startswith('_') or k in exclude:
255 |             continue
256 |         else:
257 |             setattr(a, k, v)
258 | 
259 | 
260 | class ModelEMA:
261 |     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
262 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
263 |     This is intended to allow functionality like
264 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
265 |     A smoothed version of the weights is necessary for some training schemes to perform well.
266 |     This class is sensitive where it is initialized in the sequence of model init,
267 |     GPU assignment and distributed training wrappers.
268 |     """
269 | 
270 |     def __init__(self, model, decay=0.9999, updates=0):
271 |         # Create EMA
272 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
273 |         # if next(model.parameters()).device.type != 'cpu':
274 |         #     self.ema.half()  # FP16 EMA
275 |         self.updates = updates  # number of EMA updates
276 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
277 |         for p in self.ema.parameters():
278 |             p.requires_grad_(False)
279 | 
280 |     def update(self, model):
281 |         # Update EMA parameters
282 |         with torch.no_grad():
283 |             self.updates += 1
284 |             d = self.decay(self.updates)
285 | 
286 |             msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
287 |             for k, v in self.ema.state_dict().items():
288 |                 if v.dtype.is_floating_point:
289 |                     v *= d
290 |                     v += (1. - d) * msd[k].detach()
291 | 
292 |     def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
293 |         # Update EMA attributes
294 |         copy_attr(self.ema, model, include, exclude)
295 | 


--------------------------------------------------------------------------------
/weights/.gitignore:
--------------------------------------------------------------------------------
1 | yolo
2 | yolov5
3 | 


--------------------------------------------------------------------------------
/weights/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | USER root
 3 | RUN apt-get update && \
 4 |     apt-get install -y --no-install-recommends \
 5 |         curl gnupg2 ca-certificates && \
 6 |     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
 7 |     echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list
 8 | RUN apt-get update && \
 9 |     apt-get install -y --no-install-recommends \
10 |         edgetpu-compiler
11 | 


--------------------------------------------------------------------------------
/weights/README.md:
--------------------------------------------------------------------------------
  1 | # download YOLO pre-trained weights
  2 | 
  3 | ## YOLO V3 and V4
  4 | 
  5 | `> ./download_yolo.py`
  6 | 
  7 | ## YOLO V5
  8 | 
  9 | `> ./download_yolov5.py`
 10 | 
 11 | ## [optional] compile TFLite Flat Buffers for EdgeTPU
 12 | 
 13 | ### setup
 14 | 
 15 | - install docker on your PC
 16 | - `> build_docker.sh`
 17 | - convert pre-trained weights
 18 |     - go to the directory up
 19 |     - `> convert_yolo.py`
 20 |     - `> convert_yolov5.py`
 21 | 
 22 | ### compile
 23 | 
 24 | - run docker
 25 | - ex.) compile yolov3-tiny for Edge TPU
 26 |     - `> compile_edgetpu.sh yolo/yolov3-tiny_int8.tflite`
 27 |     - → `yolo/yolov3-tiny_int8_edgetpu.tflite` will be created
 28 | - ex.) compile yolov5s for Edge TPU
 29 |     - `> compile_edgetpu.sh yolov5/yolov5s_int8.tflite`
 30 |     - → `yolov5/yolov5s_int8_edgetpu.tflite` will be created
 31 | 
 32 | ### notices
 33 | 
 34 | - quantization of compiled model must be `int8`
 35 |     - the filename is `[yolo|yolov5]/*_int8.tflite`
 36 | - It is toooo slow using compiled binery because most of subgraph are not mapped on TPU.
 37 | 
 38 | the case of yolov3-tiny
 39 | ```
 40 | > ./compile_edgetpu.sh yolo/yolov3-tiny_int8.tflite
 41 | Edge TPU Compiler version 15.0.340273435
 42 | 
 43 | Model compiled successfully in 577 ms.
 44 | 
 45 | Input model: /home/yolo/yolov3-tiny_int8.tflite
 46 | Input size: 8.58MiB
 47 | Output model: /home/yolo/yolov3-tiny_int8_edgetpu.tflite
 48 | Output size: 8.70MiB
 49 | On-chip memory used for caching model parameters: 3.00KiB
 50 | On-chip memory remaining for caching model parameters: 7.67MiB
 51 | Off-chip memory used for streaming uncached model parameters: 0.00B
 52 | Number of Edge TPU subgraphs: 1
 53 | Total number of operations: 36
 54 | Operation log: /home/yolo/yolov3-tiny_int8_edgetpu.log
 55 | 
 56 | Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to use only operations supported by the Edge TPU. For details, visit g.co/coral/model-reqs.
 57 | Number of operations that will run on Edge TPU: 2
 58 | Number of operations that will run on CPU: 34
 59 | 
 60 | Operator                       Count      Status
 61 | 
 62 | MAX_POOL_2D                    6          More than one subgraph is not supported
 63 | QUANTIZE                       2          Operation is otherwise supported, but not mapped due to some unspecified limitation
 64 | QUANTIZE                       1          Mapped to Edge TPU
 65 | QUANTIZE                       1          More than one subgraph is not supported
 66 | CONV_2D                        1          Mapped to Edge TPU
 67 | CONV_2D                        12         More than one subgraph is not supported
 68 | RESIZE_NEAREST_NEIGHBOR        1          Operation version not supported
 69 | LEAKY_RELU                     11         Operation not supported
 70 | CONCATENATION                  1          More than one subgraph is not supported
 71 | ```
 72 | 
 73 | the case of yolov5s
 74 | ```
 75 | > ./compile_edgetpu.sh yolov5/yolov5s_int8.tflite
 76 | Edge TPU Compiler version 15.0.340273435
 77 | 
 78 | Model compiled successfully in 93 ms.
 79 | 
 80 | Input model: /home/yolov5/yolov5s_int8.tflite
 81 | Input size: 7.39MiB
 82 | Output model: /home/yolov5/yolov5s_int8_edgetpu.tflite
 83 | Output size: 7.34MiB
 84 | On-chip memory used for caching model parameters: 0.00B
 85 | On-chip memory remaining for caching model parameters: 8.05MiB
 86 | Off-chip memory used for streaming uncached model parameters: 0.00B
 87 | Number of Edge TPU subgraphs: 1
 88 | Total number of operations: 294
 89 | Operation log: /home/yolov5/yolov5s_int8_edgetpu.log
 90 | 
 91 | Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to use only operations supported by the Edge TPU. For details, visit g.co/coral/model-reqs.
 92 | Number of operations that will run on Edge TPU: 1
 93 | Number of operations that will run on CPU: 293
 94 | 
 95 | Operator                       Count      Status
 96 | 
 97 | QUANTIZE                       1          Mapped to Edge TPU
 98 | QUANTIZE                       1          Operation is otherwise supported, but not mapped due to some unspecified limitation
 99 | QUANTIZE                       24         More than one subgraph is not supported
100 | LOGISTIC                       62         More than one subgraph is not supported
101 | TRANSPOSE                      3          Operation not supported
102 | SUB                            3          More than one subgraph is not supported
103 | CONCATENATION                  1          Operation is otherwise supported, but not mapped due to some unspecified limitation
104 | CONCATENATION                  17         More than one subgraph is not supported
105 | MAX_POOL_2D                    3          More than one subgraph is not supported
106 | STRIDED_SLICE                  9          More than one subgraph is not supported
107 | STRIDED_SLICE                  4          Only Strided-Slice with unitary strides supported
108 | CONV_2D                        62         More than one subgraph is not supported
109 | MUL                            80         More than one subgraph is not supported
110 | RESIZE_NEAREST_NEIGHBOR        2          Operation version not supported
111 | RESHAPE                        3          Operation is otherwise supported, but not mapped due to some unspecified limitation
112 | RESHAPE                        3          More than one subgraph is not supported
113 | PAD                            6          More than one subgraph is not supported
114 | ADD                            10         More than one subgraph is not supported
115 | ```
116 | 


--------------------------------------------------------------------------------
/weights/build_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | docker build -t edgetpu_env .
3 | 


--------------------------------------------------------------------------------
/weights/compile_edgetpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | if [ $# != 1 ]; then
 3 |     echo "Usage: $0 <model.tflite>"
 4 |     exit
 5 | elif [ ! -f $1 ] || [ ${1##*_} != 'int8.tflite' ]; then
 6 |     echo "Usage: $0 <model.tflite>"
 7 |     exit
 8 | fi
 9 | dir=${1%/*}
10 | docker run --rm -it --env 'TZ=Asia/Tokyo' --volume ${PWD}:/home --name edgetpu_compile edgetpu_env /usr/bin/edgetpu_compiler -s /home/$1 -o /home/${dir}
11 | 


--------------------------------------------------------------------------------
/weights/download_yolo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ ! -d yolo ] ; then
 4 |     mkdir yolo
 5 | fi
 6 | if [ ! -f yolo/yolov3-tiny.weights ] ; then
 7 |     wget -O yolo/yolov3-tiny.weights https://pjreddie.com/media/files/yolov3-tiny.weights
 8 | fi
 9 | if [ ! -f yolo/yolov3.weights ] ; then
10 |     wget -O yolo/yolov3.weights https://pjreddie.com/media/files/yolov3.weights
11 | fi
12 | if [ ! -f yolo/yolov3-spp.weights ] ; then
13 |     wget -O yolo/yolov3-spp.weights https://pjreddie.com/media/files/yolov3-spp.weights
14 | fi
15 | if [ ! -f yolo/yolov4-tiny.weights ] ; then
16 |     wget -O yolo/yolov4-tiny.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights
17 | fi
18 | if [ ! -f yolo/yolov4.weights ] ; then
19 |     wget -O yolo/yolov4.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights
20 | fi
21 | if [ ! -f yolo/yolov4-csp.weights ] ; then
22 |     wget -O yolo/yolov4-csp.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights
23 | fi
24 | if [ ! -f yolo/yolov4x-mish.weights ] ; then
25 |     wget -O yolo/yolov4x-mish.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights
26 | fi
27 | 


--------------------------------------------------------------------------------
/weights/download_yolov5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import os
 4 | from pathlib import Path
 5 | import requests
 6 | import torch
 7 | 
 8 | directory = 'yolov5'
 9 | 
10 | 
11 | def download_weight(
12 |     model: str,
13 |     repo: str = 'ultralytics/yolov5'
14 | ) -> None:
15 |     assets = [
16 |         'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'
17 |     ]
18 |     assert model in assets, 'invalid model name'
19 |     fpath = Path(os.path.join(
20 |         directory, model.strip().replace("'", '').lower()
21 |     ))
22 |     if fpath.exists():
23 |         return
24 |     try:
25 |         response = requests.get(
26 |             f'https://api.github.com/repos/{repo}/releases/latest'
27 |         ).json()  # github api
28 |         tag = response['tag_name']
29 |     except Exception:
30 |         print('cannot get tag name')
31 |         return
32 |     name = fpath.name
33 |     redundant = False  # second download option
34 |     try:  # GitHub
35 |         url = f'https://github.com/{repo}/releases/download/{tag}/{name}'
36 |         print(f'Downloading {model}...')
37 |         torch.hub.download_url_to_file(url, fpath)
38 |         # check
39 |         assert fpath.exists() and fpath.stat().st_size > 1E6
40 |     except Exception as e:  # GCP
41 |         print(f'Download error: {e}')
42 |         assert redundant, 'No secondary mirror'
43 |         url = f'https://storage.googleapis.com/{repo}/ckpt/{name}'
44 |         print(f'Downloading {model}...')
45 |         os.system(f'curl -L {url} -o {name}')
46 |     finally:
47 |         # check
48 |         if not fpath.exists() or fpath.stat().st_size < 1E6:
49 |             # remove partial downloads
50 |             fpath.unlink(missing_ok=True)
51 |             print('ERROR: Download failure')
52 |     return
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     os.makedirs(directory, exist_ok=True)
57 |     for x in ['s', 'm', 'l', 'x']:
58 |         if os.path.isfile(f'{directory}/yolov5{x}.pt'):
59 |             continue
60 |         download_weight(model=f'yolov5{x}.pt')
61 | 


--------------------------------------------------------------------------------