├── .gitignore ├── LICENSE ├── README.md ├── convert_yolo.py ├── convert_yolov5.py ├── convertor ├── __init__.py ├── yolo │ ├── convert_tf_onnx.py │ └── convert_tf_tflite.py └── yolov5 │ ├── __init__.py │ ├── convert_onnx_tf.py │ ├── convert_onnx_vino.py │ ├── convert_tf_onnx.py │ ├── convert_tf_tflite.py │ └── convert_torch_onnx.py ├── datasets ├── .gitignore ├── README.md ├── create_small_dataset.py └── download_coco_val2017.sh ├── detect.py ├── detect_all.sh ├── detector ├── .gitignore ├── __init__.py ├── base.py ├── yolo.py └── yolov5.py ├── download_font.sh ├── download_superres.sh ├── evaluate_all.sh ├── ipynb ├── accumulate.ipynb ├── map.png ├── time.png └── time_vs_map.png ├── labels └── coco_labels.txt ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── tf_yolo.py ├── tf_yolov5.py ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── requirements.txt ├── tools ├── convert_yolo_config_csv.py ├── yolov3-spp.cfg ├── yolov3-spp.csv ├── yolov3-spp.xlsx ├── yolov3-tiny.cfg ├── yolov3-tiny.csv ├── yolov3-tiny.xlsx ├── yolov3.cfg ├── yolov3.csv ├── yolov3.xlsx ├── yolov4-csp.cfg ├── yolov4-csp.csv ├── yolov4-csp.xlsx ├── yolov4-tiny.cfg ├── yolov4-tiny.csv ├── yolov4-tiny.xlsx ├── yolov4.cfg ├── yolov4.csv ├── yolov4.xlsx ├── yolov4x-mish.cfg ├── yolov4x-mish.csv └── yolov4x-mish.xlsx ├── utils ├── __init__.py ├── activations.py ├── autoanchor.py ├── convert_tflite.py ├── datasets.py ├── general.py ├── google_utils.py ├── image_preproc.py ├── loss.py ├── metrics.py ├── nms.py ├── plots.py └── torch_utils.py └── weights ├── .gitignore ├── Dockerfile ├── README.md ├── build_docker.sh ├── compile_edgetpu.sh ├── download_yolo.sh └── download_yolov5.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # original 132 | TakaoGothic.ttf 133 | superres 134 | results 135 | .DS_Store 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tetsutaro Maruyama 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolo_various_framework 2 | 3 | run YOLO (object detection model) on various frameworks 4 | 5 | ## articles corresponding this repository (Qiita) 6 | 7 | following articles were written in Japanese only. 8 | 9 | - [YOLOを色んなフレームワークに変換して動かしてみた件](https://qiita.com/tetutaro/items/7adf3b22e4c42a17fe66) 10 | - [OpenCVで簡単に実装できる画像前処理](https://qiita.com/tetutaro/items/443dcd22e8aee4884ea8) 11 | 12 | ## my motivations 13 | 14 | - I want to run object detection models on my PC 15 | - I don't have so much money to buy any GPU 16 | - I want to know which framework is the best in the meaning of elapsed time 17 | - I want to confirm that the predicted results are not so much different when I convert pre-trained weights to another framework 18 | 19 | ## object deteciton models 20 | 21 | - [YOLO V3](https://github.com/pjreddie/darknet) 22 | - yolov3-tiny 23 | - input image size: 512x512 24 | - yolov3 25 | - input image size: 512x512 26 | - yolov3-spp 27 | - input image size: 512x512 28 | - [YOLO V4](https://github.com/AlexeyAB/darknet) 29 | - yolov4-tiny 30 | - input image size: 512x512 31 | - yolov4 32 | - input image size: 512x512 33 | - [IN-PROGRESS] yolov4-csp ([Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp)) 34 | - input image size: 512x512 35 | - [IN-PROGRESS] yolov4x-mish ([YOLOv4-large P5](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large)) 36 | - input image size: 512x512 37 | - [IN-PROGRESS] YOLO V3 & V4 are able to run only on `tf`, `tflite` and `tf_onnx` 38 | - [YOLO V5](https://github.com/ultralytics/yolov5) 39 | - yolov5s 40 | - input image size: 640x640 41 | - yolov5m 42 | - input image size: 640x640 43 | - yolov5l 44 | - input image size: 640x640 45 | - yolov5x 46 | - input image size: 640x640 47 | 48 | ## deep learning frameworks 49 | 50 | all deep learing frameworks below were ran on Python 3.7.9 51 | 52 | - PyTorch (State Dict) 53 | - torch 1.8.1 54 | - TensorFlow (Frozen Graph) 55 | - tensorflow 2.4.1 56 | - TensorFlow Lite (Flat Buffer) 57 | - tflite-runtime 2.5.0 58 | - ONNX 59 | - onnxruntime 1.7.0 60 | - OpenVINO 61 | - OpenVINO 2021.2.185 62 | 63 | ## libraries to convert 64 | 65 | - onnx 1.8.1 66 | - onnx-tf 1.7.0 67 | - tf2onnx 1.8.4 68 | 69 | ## preparation 70 | 71 | - download font and trained model of super-resolution for detector 72 | - `./download_font.sh` 73 | - `./download_superres.sh` 74 | - download COCO dataset and create small dataset (convert annotations) 75 | - see [datasets/README](https://github.com/tetutaro/yolo_various_framework/tree/main/datasets) 76 | - download pre-trained weights 77 | - see [weights/README](https://github.com/tetutaro/yolo_various_framework/tree/main/weights) 78 | - convert pre-trained weights to various frameworks 79 | - `./convert_yolo.py` 80 | - `./convert_yolov5.py` 81 | - (if you want to calc metrics) please install [`object_detection_metrics`](https://github.com/tetutaro/object_detection_metrics) 82 | - `> pip install "git+https://github.com/tetutaro/object_detection_metrics.git"` 83 | 84 | ## usage 85 | 86 | ``` 87 | usage: detect.py [-h] 88 | -m {yolov3-tiny,yolov3,yolov4-tiny,yolov4,yolov5s,yolov5m,yolov5l,yolov5x} 89 | -f {torch,torch_onnx,onnx_vino,onnx_tf,tf,tflite,tf_onnx} 90 | [-q {fp32,fp16,int8}] 91 | -d IMAGE_DIR 92 | [-c CONF_THRESHOLD] 93 | [-i IOU_THRESHOLD] 94 | [--clarify-image] 95 | [--use-superres] 96 | [--disable-soft-nms] 97 | 98 | detect objects from images 99 | 100 | optional arguments: 101 | -h, --help show this help message and exit 102 | -m MODEL, --model MODEL 103 | model name 104 | -f FRAMEWORK, --framework FRAMEWORK 105 | framework 106 | -q QUANTIZE, --quantize QUANTIZE 107 | quantization mode (TensorFlow Lite only) 108 | default: fp32 109 | -d IMAGE_DIR, --image-dir IMAGE_DIR 110 | directory contains images to detect objects 111 | -c CONF_THRESHOLD, --conf-threshold CONF_THRESHOLD 112 | threshold of confidence score to adopt bounding boxes 113 | default: 0.3 114 | -i IOU_THRESHOLD, --iou-threshold IOU_THRESHOLD 115 | threshold of IoU to eliminte bounding boxes in NMS 116 | default: 0.45 117 | --clarify-image 118 | image preprocessing 119 | --use-superres 120 | use Super-Resolution at image preprocessing 121 | --disable-soft-nms use hard-NMS instead of soft-NMS 122 | ``` 123 | 124 | ## frameworks 125 | 126 | ``` 127 | torch PyTorch (state dict) 128 | torch_onnx ONNX converted from PyTorch 129 | onnx_vino OpenVINO converted from torch_onnx 130 | onnx_tf TensorFlow (SavedModel) converted from torch_onnx 131 | tf TensorFlow (FrozenGraph) 132 | tflite TensorFlow Lite 133 | tf_onnx ONNX onverted from TensorFlow 134 | ``` 135 | 136 | ## results 137 | 138 | ### conditions 139 | 140 | NOTE: following data was measured under certain conditions. Just FYI. 141 | 142 | - images: randomly selected 100 images from COCO val2017 dataset 143 | - elapsed time includes the times for... 144 | - image preprocessing 145 | - adjusting white balance 146 | - smoothing image with Gaussian Blur 147 | - correcting contract (brighten dark areas) 148 | - clarify image using Super-Resolution 149 | - resize image and put it on the background (square, gray) image 150 | - inference 151 | - filtering bounding boxes 152 | - apply anchors 153 | - some models calc that inside the model 154 | - YOLO V5 (tf, tflite, tf_onnx) 155 | - calc confidence scores 156 | - NMS 157 | - soft NMS 158 | - eliminate small and unconfident bounding box which is inside of big and confident bounding box 159 | - the size of the input image to models differs depending on the model 160 | - YOLO V3/V4: 512 x 512 161 | - YOLO V5: 640 x 640 162 | - confidence score threshold is the same regardless of the model 163 | - `0.3` 164 | - IoU threshold is the same regardless of the model 165 | - `0.45` 166 | - tool for calculating mAP: [`object_detection_metrics`](https://github.com/tetutaro/object_detection_metrics) (may be WRONG!!) 167 | 168 | ### binary size 169 | 170 | | Model | torch | torch_onnx | onnx_vino | onnx_tf | tf | tflite (fp32) | tflite (fp16) | tflite (int8) | tf_onnx | 171 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| 172 | | yolov3-tiny | | | | | 34MB | 34MB | 17MB | 8.6MB | 34MB | 173 | | yolov3 | | | | | 237MB | 236MB | 118MB | 60MB | 236MB | 174 | | yolov3-spp | | | | | 241MB | 240MB | 120MB | 61MB | 240MB | 175 | | yolov4-tiny | | | | | 23MB | 23MB | 12MB | 5.9MB | 23MB | 176 | | yolov4 | | | | | 246MB | 246MB | 123MB | 63MB | 246MB | 177 | | yolov5s | 28MB | 28MB | 28MB | 28MB | 28MB | 28MB | 14MB | 7.4MB | 28MB | 178 | | yolov5m | 82MB | 82MB | 81MB | 82MB | 82MB | 82MB | 41MB | 21MB | 82MB | 179 | | yolov5l | 180MB | 179MB | 179MB | 180MB | 180MB | 180MB | 90MB | 46MB | 180MB | 180 | | yolov5x | 335MB | 335MB | 335MB | 336MB | 336MB | 335MB | 168MB | 85MB | 335MB | 181 | 182 | ### my environment 183 | 184 | - MacBook Air (Retina, 2020) 185 | - CPU: 1.1GHz quad core Intel Core i5 186 | - Memory: 16GB 3733MHz LPDDR4X 187 | 188 | ### elapsed time per each image 189 | 190 | ![](ipynb/time.png) 191 | 192 | ### accuracy (mAP: mean Average Precision) 193 | 194 | ![](ipynb/map.png) 195 | 196 | ### time vs accucary 197 | 198 | ![](ipynb/time_vs_map.png) 199 | -------------------------------------------------------------------------------- /convert_yolo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # from convertor.yolo.convert_torch_onnx import yolo_convert_torch_onnx 4 | # from convertor.yolo.convert_onnx_vino import yolo_convert_onnx_vino 5 | # from convertor.yolo.convert_onnx_tf import yolo_convert_onnx_tf 6 | from convertor.yolo.convert_tf_tflite import yolo_convert_tf_tflite 7 | from convertor.yolo.convert_tf_onnx import yolo_convert_tf_onnx 8 | 9 | IMAGE_SIZES = { 10 | 'yolov3-tiny': 512, 11 | 'yolov3': 512, 12 | 'yolov3-spp': 512, 13 | 'yolov4-tiny': 512, 14 | 'yolov4': 512, 15 | 'yolov4-csp': 512, 16 | 'yolov4x-mish': 512, 17 | } 18 | DIRECTORY = 'weights/yolo' 19 | 20 | 21 | if __name__ == '__main__': 22 | for model in [ 23 | 'yolov3-tiny', 'yolov3', 'yolov3-spp', 24 | 'yolov4-tiny', 'yolov4', 25 | # 'yolov4-csp', 'yolov4x-mish', 26 | ]: 27 | imgsize = IMAGE_SIZES[model] 28 | # yolo_convert_torch_onnx( 29 | # model=model, 30 | # directory=DIRECTORY, 31 | # imgsize=[imgsize, imgsize] 32 | # ) 33 | # yolo_convert_onnx_vino( 34 | # model=model, 35 | # directory=DIRECTORY, 36 | # imgsize=[imgsize, imgsize] 37 | # ) 38 | # yolo_convert_onnx_tf( 39 | # model=model, 40 | # directory=DIRECTORY 41 | # ) 42 | yolo_convert_tf_tflite( 43 | model=model, 44 | directory=DIRECTORY, 45 | imgsize=[imgsize, imgsize] 46 | ) 47 | yolo_convert_tf_onnx( 48 | model=model, 49 | directory=DIRECTORY 50 | ) 51 | -------------------------------------------------------------------------------- /convert_yolov5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from convertor.yolov5.convert_torch_onnx import yolov5_convert_torch_onnx 4 | from convertor.yolov5.convert_onnx_vino import yolov5_convert_onnx_vino 5 | from convertor.yolov5.convert_onnx_tf import yolov5_convert_onnx_tf 6 | from convertor.yolov5.convert_tf_tflite import yolov5_convert_tf_tflite 7 | from convertor.yolov5.convert_tf_onnx import yolov5_convert_tf_onnx 8 | 9 | IMAGE_SIZE = 640 10 | DIRECTORY = 'weights/yolov5' 11 | 12 | 13 | if __name__ == '__main__': 14 | for x in ['s', 'm', 'l', 'x']: 15 | model = f'yolov5{x}' 16 | yolov5_convert_torch_onnx( 17 | model=model, 18 | directory=DIRECTORY, 19 | imgsize=[IMAGE_SIZE, IMAGE_SIZE] 20 | ) 21 | yolov5_convert_onnx_vino( 22 | model=model, 23 | directory=DIRECTORY, 24 | imgsize=[IMAGE_SIZE, IMAGE_SIZE] 25 | ) 26 | yolov5_convert_onnx_tf( 27 | model=model, 28 | directory=DIRECTORY 29 | ) 30 | yolov5_convert_tf_tflite( 31 | model=model, 32 | directory=DIRECTORY, 33 | imgsize=[IMAGE_SIZE, IMAGE_SIZE] 34 | ) 35 | yolov5_convert_tf_onnx( 36 | model=model, 37 | directory=DIRECTORY 38 | ) 39 | -------------------------------------------------------------------------------- /convertor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/convertor/__init__.py -------------------------------------------------------------------------------- /convertor/yolo/convert_tf_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- cofing:utf-8 -*- 3 | import os 4 | import subprocess 5 | 6 | 7 | def yolo_convert_tf_onnx( 8 | model: str, 9 | directory: str 10 | ) -> None: 11 | path_pb = f'{directory}/{model}.pb' 12 | path_onnx = f'{directory}/tf_{model}.onnx' 13 | if not os.path.isfile(path_pb): 14 | return 15 | if os.path.isfile(path_onnx): 16 | return 17 | options = list() 18 | options.append(f'--graphdef {path_pb}') 19 | options.append(f'--output {path_onnx}') 20 | options.append('--inputs x:0') 21 | if model in ['yolov3-tiny', 'yolov4-tiny']: 22 | options.append('--outputs Identity:0,Identity_1:0') 23 | else: 24 | options.append('--outputs Identity:0,Identity_1:0,Identity_2:0') 25 | options.append('--opset 12') 26 | options.append('--inputs-as-nchw x:0') 27 | cmd = ' '.join(['python -m tf2onnx.convert'] + options) 28 | cmd = cmd.split() 29 | subprocess.run(cmd) 30 | return 31 | -------------------------------------------------------------------------------- /convertor/yolo/convert_tf_tflite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from typing import List 4 | import tensorflow as tf 5 | import os 6 | import math 7 | import numpy as np 8 | # YOLO V3, V4 9 | from models.tf_yolo import ( 10 | tf_YoloV3_tiny, 11 | tf_YoloV3, 12 | tf_YoloV3_spp, 13 | tf_YoloV4_tiny, 14 | tf_YoloV4, 15 | tf_YoloV4_csp, 16 | tf_YoloV4x_mish, 17 | ) 18 | from utils.convert_tflite import ( 19 | save_frozen_graph, 20 | convert_tflite_fp32, 21 | convert_tflite_fp16, 22 | convert_tflite_int8, 23 | ) 24 | 25 | NUM_CLASS = 80 26 | MODEL_CLASS = { 27 | 'yolov3-tiny': tf_YoloV3_tiny, 28 | 'yolov3': tf_YoloV3, 29 | 'yolov3-spp': tf_YoloV3_spp, 30 | 'yolov4-tiny': tf_YoloV4_tiny, 31 | 'yolov4': tf_YoloV4, 32 | 'yolov4-csp': tf_YoloV4_csp, 33 | 'yolov4x-mish': tf_YoloV4x_mish, 34 | } 35 | MODEL_SHAPE = { 36 | 'yolov3-tiny': { 37 | 'nlayers': 13, 38 | 'nobn_layers': [9, 12], 39 | }, 40 | 'yolov3': { 41 | 'nlayers': 75, 42 | 'nobn_layers': [58, 66, 74], 43 | }, 44 | 'yolov3-spp': { 45 | 'nlayers': 76, 46 | 'nobn_layers': [59, 67, 75], 47 | }, 48 | 'yolov4-tiny': { 49 | 'nlayers': 21, 50 | 'nobn_layers': [17, 20], 51 | }, 52 | 'yolov4': { 53 | 'nlayers': 110, 54 | 'nobn_layers': [93, 101, 109], 55 | }, 56 | 'yolov4-csp': { 57 | 'nlayers': 115, 58 | 'nobn_layers': [94, 104, 114], 59 | }, 60 | 'yolov4x-mish': { 61 | 'nlayers': 137, 62 | 'nobn_layers': [112, 124, 136], 63 | }, 64 | } 65 | STRIDES = [8, 16, 32, 64, 128] 66 | DEBUG = False 67 | 68 | 69 | def _load_darknet_weights( 70 | model: str, 71 | path_weights: str, 72 | model_keras: tf.keras.Model 73 | ) -> None: 74 | rf = open(path_weights, 'rb') 75 | major, minor, revision, seen, _ = np.fromfile( 76 | rf, dtype=np.int32, count=5 77 | ) 78 | nlayers = MODEL_SHAPE[model]['nlayers'] 79 | nobn_layers = MODEL_SHAPE[model]['nobn_layers'] 80 | if DEBUG: 81 | print(model) 82 | for i, layers in enumerate(model_keras.weighted_layers): 83 | print(i, layers) 84 | assert len(model_keras.weighted_layers) == nlayers 85 | for i, layers in enumerate(model_keras.weighted_layers): 86 | conv_layer = layers.conv 87 | norm_layer = layers.norm 88 | input_shape = layers.input_shape 89 | filters = conv_layer.filters 90 | k_size = conv_layer.kernel_size[0] 91 | in_dim = input_shape[-1] 92 | if i not in nobn_layers: 93 | # darknet weights: [beta, gamma, mean, variance] 94 | bn_weights = np.fromfile( 95 | rf, dtype=np.float32, count=(4 * filters) 96 | ) 97 | # tf weights: [gamma, beta, mean, variance] 98 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] 99 | else: 100 | conv_bias = np.fromfile( 101 | rf, dtype=np.float32, count=filters 102 | ) 103 | if model in ['yolov4-csp', 'yolov4x-mish']: 104 | # https://arxiv.org/abs/1708.02002 105 | offset = np.where(np.array(nobn_layers) == i)[0][0] 106 | stride = STRIDES[offset] 107 | conv_bias = conv_bias.reshape(3, 85) 108 | conv_bias[:, 4] += math.log(8 / (640 / stride) ** 2) 109 | conv_bias[:, 5:] += math.log(0.6 / (80 - 0.99)) 110 | conv_bias = conv_bias.reshape(-1) 111 | # darknet shape (out_dim, in_dim, height, width) 112 | conv_shape = (filters, in_dim, k_size, k_size) 113 | conv_weights = np.fromfile( 114 | rf, dtype=np.float32, count=np.product(conv_shape) 115 | ) 116 | # tf shape (height, width, in_dim, out_dim) 117 | conv_weights = conv_weights.reshape(conv_shape).transpose( 118 | [2, 3, 1, 0] 119 | ) 120 | if i not in nobn_layers: 121 | assert norm_layer.__class__.__name__ == 'BatchNormalization' 122 | conv_layer.set_weights([conv_weights]) 123 | norm_layer.set_weights(bn_weights) 124 | else: 125 | assert norm_layer.__class__.__name__ == 'function' 126 | conv_layer.set_weights([conv_weights, conv_bias]) 127 | rest = len(rf.read()) 128 | assert rest == 0, f'failed to read all data: {rest}' 129 | rf.close() 130 | return 131 | 132 | 133 | def yolo_convert_tf_tflite( 134 | model: str, 135 | directory: str, 136 | imgsize: List[int] 137 | ) -> None: 138 | path_weights = f'{directory}/{model}.weights' 139 | if not os.path.isfile(path_weights): 140 | print(f'ERROR: {path_weights} not found') 141 | return 142 | # load model 143 | model_keras = MODEL_CLASS[model](nc=NUM_CLASS) 144 | model_keras.build(input_shape=(1, *imgsize, 3)) 145 | # dummy run 146 | dummy_image_tf = tf.zeros((1, *imgsize, 3), dtype=tf.float32) # NHWC 147 | y = model_keras(dummy_image_tf) 148 | for yy in y: 149 | _ = yy.numpy() 150 | # model_keras.summary() 151 | # load weights 152 | _load_darknet_weights( 153 | model=model, path_weights=path_weights, model_keras=model_keras 154 | ) 155 | # save as Frozen Graph 156 | input_keras = tf.keras.Input( 157 | shape=(*imgsize, 3), batch_size=1, dtype=tf.float32 158 | ) 159 | save_frozen_graph( 160 | path_pb=f'{directory}/{model}.pb', 161 | model_keras=model_keras, 162 | input_keras=input_keras 163 | ) 164 | # convert TFLite model 165 | path_tflite = f'{directory}/{model}_fp32.tflite' 166 | convert_tflite_fp32(path_tflite=path_tflite, model_keras=model_keras) 167 | path_tflite = f'{directory}/{model}_fp16.tflite' 168 | convert_tflite_fp16(path_tflite=path_tflite, model_keras=model_keras) 169 | path_tflite = f'{directory}/{model}_int8.tflite' 170 | convert_tflite_int8( 171 | path_tflite=path_tflite, 172 | imgsize=imgsize, 173 | model_keras=model_keras 174 | ) 175 | return 176 | -------------------------------------------------------------------------------- /convertor/yolov5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/convertor/yolov5/__init__.py -------------------------------------------------------------------------------- /convertor/yolov5/convert_onnx_tf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import os 4 | import onnx 5 | from onnx_tf.backend import prepare 6 | 7 | 8 | def yolov5_convert_onnx_tf(model: str, directory: str) -> None: 9 | path_onnx = f'{directory}/{model}.onnx' 10 | if not os.path.isfile(path_onnx): 11 | return 12 | path_onnx_tf = f'{directory}/onnx_tf_{model}' 13 | if os.path.isdir(path_onnx_tf): 14 | return 15 | model_onnx = onnx.load(path_onnx) 16 | tf_rep = prepare(model_onnx) 17 | tf_rep.export_graph(path_onnx_tf) 18 | return 19 | -------------------------------------------------------------------------------- /convertor/yolov5/convert_onnx_vino.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from typing import List 4 | import os 5 | import subprocess 6 | 7 | OPENVINO_VERSION = "2021" 8 | 9 | 10 | def yolov5_convert_onnx_vino( 11 | model: str, 12 | directory: str, 13 | imgsize: List[int] 14 | ) -> None: 15 | path_onnx = f'{directory}/{model}.onnx' 16 | dir_vino = f'{directory}/onnx_vino_{model}' 17 | if not os.path.isfile(path_onnx): 18 | return 19 | if os.path.isdir(dir_vino): 20 | return 21 | path_mo = f'/opt/intel/openvino_{OPENVINO_VERSION}/' 22 | path_mo += 'deployment_tools/model_optimizer/mo.py' 23 | options = list() 24 | options.append(f'--input_model {path_onnx}') 25 | options.append(f'--model_name {model}') 26 | options.append(f'--output_dir {dir_vino}') 27 | options.append('--data_type FP32') 28 | options.append('--input images') 29 | options.append(f'--input_shape [1,3,{imgsize[0]},{imgsize[1]}]') 30 | cmd = ' '.join(['python', path_mo] + options) 31 | cmd = cmd.split() 32 | subprocess.run(cmd) 33 | return 34 | -------------------------------------------------------------------------------- /convertor/yolov5/convert_tf_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- cofing:utf-8 -*- 3 | import os 4 | import subprocess 5 | 6 | 7 | def yolov5_convert_tf_onnx( 8 | model: str, 9 | directory: str 10 | ) -> None: 11 | path_pb = f'{directory}/{model}.pb' 12 | path_onnx = f'{directory}/tf_{model}.onnx' 13 | if not os.path.isfile(path_pb): 14 | return 15 | if os.path.isfile(path_onnx): 16 | return 17 | options = list() 18 | options.append(f'--graphdef {path_pb}') 19 | options.append(f'--output {path_onnx}') 20 | options.append('--inputs x:0') 21 | options.append('--outputs Identity:0') 22 | options.append('--opset 12') 23 | options.append('--inputs-as-nchw x:0') 24 | cmd = ' '.join(['python -m tf2onnx.convert'] + options) 25 | cmd = cmd.split() 26 | subprocess.run(cmd) 27 | return 28 | -------------------------------------------------------------------------------- /convertor/yolov5/convert_tf_tflite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from typing import List, Dict 4 | import os 5 | import yaml 6 | # Torch 7 | import torch 8 | # TensorFlow 9 | import tensorflow as tf 10 | # YOLO V5 11 | from models.tf_yolov5 import tf_YoloV5, tf_Detect 12 | from utils.convert_tflite import ( 13 | save_frozen_graph, 14 | convert_tflite_fp32, 15 | convert_tflite_fp16, 16 | convert_tflite_int8, 17 | ) 18 | 19 | 20 | def _convert_tf_keras_model( 21 | model: str, 22 | imgsize: List[int], 23 | model_torch: torch.nn.Module, 24 | nclasses: int, 25 | config: Dict 26 | ) -> tf.keras.Model: 27 | model_tf = tf_YoloV5( 28 | model_torch=model_torch, 29 | nclasses=nclasses, 30 | config=config 31 | ) 32 | m = model_tf.model.layers[-1] 33 | assert isinstance(m, tf_Detect), "the last layer must be Detect" 34 | m.training = False 35 | # dummy run and check output 36 | dummy_image_tf = tf.zeros((1, *imgsize, 3)) # NHWC 37 | y = model_tf.predict(dummy_image_tf) 38 | for yy in y: 39 | _ = yy.numpy() 40 | # create keras model 41 | inputs_keras = tf.keras.Input( 42 | shape=(*imgsize, 3), batch_size=1 43 | ) 44 | outputs_keras = model_tf.predict(inputs=inputs_keras) 45 | model_keras = tf.keras.Model( 46 | inputs=inputs_keras, 47 | outputs=outputs_keras, 48 | name=model 49 | ) 50 | # model_keras.summary() 51 | return model_keras 52 | 53 | 54 | def yolov5_convert_tf_tflite( 55 | model: str, 56 | directory: str, 57 | imgsize: List[int] 58 | ) -> None: 59 | path_weights = f'{directory}/{model}.pt' 60 | if not os.path.isfile(path_weights): 61 | print(f'ERROR: {path_weights} not found') 62 | return 63 | # dummy image 64 | dummy_image_torch = torch.zeros((1, 3, *imgsize)) # NCHW 65 | # Load PyTorch model 66 | model_torch = torch.load( 67 | path_weights, 68 | map_location='cpu' 69 | )['model'].float() # .fuse() 70 | model_torch.eval() 71 | # export=True to export Detect Layer 72 | model_torch.model[-1].export = False 73 | # dry run 74 | y = model_torch(dummy_image_torch) 75 | # number of classes 76 | nclasses = y[0].shape[-1] - 5 77 | # load configuration for the model 78 | path_config = f'models/{model}.yaml' 79 | with open(path_config, 'rt') as rf: 80 | config = yaml.safe_load(rf) 81 | # TensorFlow Keras export 82 | model_keras = _convert_tf_keras_model( 83 | model=model, 84 | imgsize=imgsize, 85 | model_torch=model_torch, 86 | nclasses=nclasses, 87 | config=config 88 | ) 89 | # save as Frozen Graph 90 | save_frozen_graph( 91 | path_pb=f'{directory}/{model}.pb', model_keras=model_keras 92 | ) 93 | # convert TFLite model 94 | path_tflite = f'{directory}/{model}_fp32.tflite' 95 | convert_tflite_fp32(path_tflite=path_tflite, model_keras=model_keras) 96 | path_tflite = f'{directory}/{model}_fp16.tflite' 97 | convert_tflite_fp16(path_tflite=path_tflite, model_keras=model_keras) 98 | path_tflite = f'{directory}/{model}_int8.tflite' 99 | convert_tflite_int8( 100 | path_tflite=path_tflite, 101 | imgsize=imgsize, 102 | model_keras=model_keras 103 | ) 104 | return 105 | -------------------------------------------------------------------------------- /convertor/yolov5/convert_torch_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from typing import List 4 | import os 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import onnx 9 | 10 | 11 | # export-friendly version of nn.SiLU() 12 | class SiLU(nn.Module): 13 | @staticmethod 14 | def forward(x): 15 | return x * torch.sigmoid(x) 16 | 17 | 18 | # export-friendly version of nn.Hardswish() 19 | class Hardswish(nn.Module): 20 | @staticmethod 21 | def forward(x): 22 | # for torchscript and CoreML 23 | # return x * F.hardsigmoid(x) 24 | # for torchscript, CoreML and ONNX 25 | return x * F.hardtanh(x + 3, 0., 6.) / 6. 26 | 27 | 28 | def yolov5_convert_torch_onnx( 29 | model: str, 30 | directory: str, 31 | imgsize: List[int], 32 | repo: str = 'ultralytics/yolov5:v4.0' 33 | ) -> None: 34 | path_weight = f'{directory}/{model}.pt' 35 | if not os.path.isfile(path_weight): 36 | return 37 | path_torch = f'{directory}/{model}.pth' 38 | path_onnx = f'{directory}/{model}.onnx' 39 | dummy_image = torch.zeros(1, 3, *imgsize) 40 | model_torch = torch.hub.load(repo, model) 41 | ckpt = torch.load( 42 | path_weight, map_location='cpu' 43 | )['model'] 44 | model_torch.load_state_dict(ckpt.state_dict()) 45 | model_torch.names = ckpt.names 46 | # save state dict 47 | if not os.path.isfile(path_torch): 48 | torch.save(model_torch.state_dict(), path_torch) 49 | if os.path.isfile(path_onnx): 50 | return 51 | model_torch_onnx = model_torch.fuse() 52 | model_torch_onnx.eval() 53 | for k, m in model_torch_onnx.named_modules(): 54 | m._non_persistent_buffers_set = set() 55 | if m.__class__.__name__ == 'Conv': 56 | if isinstance(m.act, nn.Hardswish): 57 | m.act = Hardswish() 58 | elif isinstance(m.act, nn.SiLU): 59 | m.act = SiLU() 60 | model_torch_onnx.model[-1].export = True 61 | _ = model_torch_onnx(dummy_image) 62 | print('Starting ONNX export with onnx %s...' % onnx.__version__) 63 | try: 64 | torch.onnx.export( 65 | model_torch_onnx, dummy_image, path_onnx, 66 | verbose=False, 67 | opset_version=12, 68 | input_names=['images'], 69 | output_names=['output', 'output_1', 'output_2'] 70 | ) 71 | model_onnx = onnx.load(path_onnx) 72 | onnx.checker.check_model(model_onnx) 73 | print('ONNX export success: %s' % path_onnx) 74 | except Exception as e: 75 | print('ONNX export failure: %s' % e) 76 | return 77 | -------------------------------------------------------------------------------- /datasets/.gitignore: -------------------------------------------------------------------------------- 1 | annotations 2 | val2017 3 | sample_dataset 4 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | ## downdload COCO dataset and create small dataset 2 | 3 | - download COCO dataset (val2017) and its annotations 4 | - `> ./download_coco_val2017.sh` 5 | - create small dataset and convert annotations to json lines format 6 | - `> ./create_small_dataset.py` 7 | 8 | ## usage 9 | 10 | ``` 11 | usage: create_small_dataset.py [-h] [--number NUMBER] [--directory DIRECTORY] 12 | 13 | create small dataset from COCO val2017 dataset 14 | 15 | optional arguments: 16 | -h, --help show this help message and exit 17 | --number NUMBER, -n NUMBER 18 | number of images (default: 10) 19 | --directory DIRECTORY, -d DIRECTORY 20 | directory name (defalt: "sample_dataset") 21 | ``` 22 | -------------------------------------------------------------------------------- /datasets/create_small_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import os 4 | import glob 5 | import shutil 6 | from collections import defaultdict 7 | import numpy as np 8 | import simplejson as json 9 | import argparse 10 | 11 | 12 | def create_dataset(number: int, directory: str) -> None: 13 | # delete old dataset and create new dataset 14 | if os.path.isdir(directory): 15 | shutil.rmtree(directory) 16 | os.makedirs(directory) 17 | # select images randomly 18 | fns = glob.glob('val2017/*') 19 | fns = [ 20 | os.path.basename(x) for x in fns 21 | if x.endswith(('.png', '.jpg')) 22 | ] 23 | np.random.shuffle(fns) 24 | fns = sorted(fns[:number]) 25 | # convert COCO annotations to the format of `object_detection_metrics` 26 | with open('annotations/instances_val2017.json', 'rt') as rf: 27 | orig_anns = json.load(fp=rf) 28 | bboxes = defaultdict(list) 29 | for ann in orig_anns['annotations']: 30 | image_id = '%012d' % ann['image_id'] 31 | bboxes[image_id].append({ 32 | 'category_id': ann['category_id'], 33 | 'bbox': [float(x) for x in ann['bbox']], 34 | }) 35 | # copy images and dump annotations 36 | new_anns = list() 37 | for fn in fns: 38 | shutil.copy(f'val2017/{fn}', f'{directory}/{fn}') 39 | image_id = os.path.splitext(fn)[0] 40 | new_anns.append({ 41 | 'image_id': image_id, 42 | 'bboxes': bboxes[image_id] 43 | }) 44 | with open(f'{directory}/ground_truths.jsonl', 'wt') as wf: 45 | for ann in new_anns: 46 | wf.write(json.dumps(ann) + '\n') 47 | return 48 | 49 | 50 | if __name__ == '__main__': 51 | if not (os.path.isdir('val2017') and os.path.isdir('annotations')): 52 | raise SystemError('run `download_coco_val2017.sh` first') 53 | parser = argparse.ArgumentParser( 54 | description='create small dataset from COCO val2017 dataset' 55 | ) 56 | parser.add_argument( 57 | '--number', '-n', type=int, default=10, 58 | help='number of images (default: 10)' 59 | ) 60 | parser.add_argument( 61 | '--directory', '-d', type=str, default='sample_dataset', 62 | help='directory name (defalt: "sample_dataset")' 63 | ) 64 | args = parser.parse_args() 65 | create_dataset(**vars(args)) 66 | -------------------------------------------------------------------------------- /datasets/download_coco_val2017.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ ! -d "val2017" ]; then 3 | if [ ! -f "val2017.zip" ]; then 4 | wget http://images.cocodataset.org/zips/val2017.zip 5 | fi 6 | unzip val2017.zip 7 | rm -f val2017.zip 8 | fi 9 | if [ ! -d "annotations" ]; then 10 | if [ ! -f "annotations_trainval2017.zip" ]; then 11 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 12 | fi 13 | unzip annotations_trainval2017.zip 14 | rm -f annotations_trainval2017.zip 15 | fi 16 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import os 4 | import argparse 5 | from detector.base import Config 6 | from detector.yolov5 import DetectorYoloV5 7 | from detector.yolo import DetectorYolo 8 | 9 | 10 | def main(config: Config) -> None: 11 | if config.model.startswith(('yolov3', 'yolov4')): 12 | detector = DetectorYolo(config=config) 13 | elif config.model.startswith('yolov5'): 14 | detector = DetectorYoloV5(config=config) 15 | else: 16 | raise SystemError(f'model is incorrect ({config.model})') 17 | detector.print_header() 18 | for sess in detector.yield_session(): 19 | detector.inference(sess=sess) 20 | detector.print_result(sess=sess) 21 | detector.dump_result(sess=sess) 22 | detector.dump_image(sess=sess) 23 | detector.close() 24 | return 25 | 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser( 29 | description='detect objects from images' 30 | ) 31 | parser.add_argument( 32 | '-m', '--model', type=str, required=True, choices=[ 33 | 'yolov3-tiny', 'yolov3', 'yolov3-spp', 34 | 'yolov4-tiny', 'yolov4', 'yolov4-csp', 'yolov4x-mish', 35 | 'yolov5s', 'yolov5m', 'yolov5l', 'yolov5x', 36 | ], help='model name' 37 | ) 38 | parser.add_argument( 39 | '-f', '--framework', type=str, required=True, choices=[ 40 | 'torch', 'torch_onnx', 'onnx_vino', 'onnx_tf', 41 | 'tf', 'tflite', 'tf_onnx' 42 | ], help='framework' 43 | ) 44 | parser.add_argument( 45 | '-q', '--quantize', type=str, default='fp32', choices=[ 46 | 'fp32', 'fp16', 'int8' 47 | ], help='quantization mode (TensorFlow Lite only)' 48 | ) 49 | parser.add_argument( 50 | '-d', '--image-dir', type=str, required=True, 51 | help='directory contains images to detect objects' 52 | ) 53 | parser.add_argument( 54 | '-c', '--conf-threshold', type=float, default=0.3, 55 | help='threshold of confidence score to adopt bounding boxes' 56 | ) 57 | parser.add_argument( 58 | '-i', '--iou-threshold', type=float, default=0.45, 59 | help='threshold of IoU to eliminte bounding boxes in NMS' 60 | ) 61 | parser.add_argument( 62 | '--clarify-image', action='store_true', 63 | help='image preprocessing' 64 | ) 65 | parser.add_argument( 66 | '--use-superres', action='store_true', 67 | help='use Super-Resolution at image preprocessing' 68 | ) 69 | parser.add_argument( 70 | '--disable-soft-nms', action='store_true', 71 | help='use hard-NMS instead of soft-NMS' 72 | ) 73 | args = parser.parse_args() 74 | if not os.path.isdir(args.image_dir): 75 | raise ValueError( 76 | f'image directory not found ({args.image_dir})' 77 | ) 78 | if (args.conf_threshold < 0.0) or (args.conf_threshold >= 1.0): 79 | raise ValueError( 80 | f'confidence threshold is incorrect ({args.conf_threshold})' 81 | ) 82 | if (args.iou_threshold < 0.0) or (args.iou_threshold >= 1.0): 83 | raise ValueError( 84 | f'IoU threshold is incorrect ({args.iou_threshold})' 85 | ) 86 | config = Config(**vars(args)) 87 | main(config=config) 88 | -------------------------------------------------------------------------------- /detect_all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ $# != 1 ]; then 3 | echo "Usage: $0 [dir]" 4 | exit 1 5 | fi 6 | dir=$1 7 | if [ ! -d ${dir} ]; then 8 | echo "${dir} not found" 9 | exit 1 10 | fi 11 | datanames=(${dir//\// }) 12 | dataname=${datanames[${#datanames[@]}-1]} 13 | models=( 14 | "yolov3-tiny" "yolov3" "yolov3-spp" "yolov4-tiny" "yolov4" 15 | ) 16 | frames=( 17 | "tf" "tf_onnx" 18 | ) 19 | modelsv5=( 20 | "yolov5s" "yolov5m" "yolov5l" "yolov5x" 21 | ) 22 | framesv5=( 23 | "torch" "torch_onnx" "onnx_vino" "onnx_tf" "tf" "tf_onnx" 24 | ) 25 | quants=( 26 | "fp32" "fp16" 27 | ) 28 | for frame in ${frames[@]} ; do 29 | for model in ${models[@]} ; do 30 | rdir="results/${dataname}/${model}_${frame}" 31 | if [ ! -d ${rdir} ] ; then 32 | ./detect.py -m ${model} -f ${frame} -d ${dir} 33 | fi 34 | done 35 | done 36 | for quant in ${quants[@]} ; do 37 | for model in ${models[@]} ; do 38 | rdir="results/${dataname}/${model}_tflite_${quant}" 39 | if [ ! -d ${rdir} ] ; then 40 | ./detect.py -m ${model} -f tflite -q ${quant} -d ${dir} 41 | fi 42 | done 43 | done 44 | for frame in ${framesv5[@]} ; do 45 | for model in ${modelsv5[@]} ; do 46 | rdir="results/${dataname}/${model}_${frame}" 47 | if [ ! -d ${rdir} ] ; then 48 | ./detect.py -m ${model} -f ${frame} -d ${dir} 49 | fi 50 | done 51 | done 52 | for quant in ${quants[@]} ; do 53 | for model in ${modelsv5[@]} ; do 54 | rdir="results/${dataname}/${model}_tflite_${quant}" 55 | if [ ! -d ${rdir} ] ; then 56 | ./detect.py -m ${model} -f tflite -q ${quant} -d ${dir} 57 | fi 58 | done 59 | done 60 | -------------------------------------------------------------------------------- /detector/.gitignore: -------------------------------------------------------------------------------- 1 | TakaoGothic.ttf 2 | -------------------------------------------------------------------------------- /detector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/detector/__init__.py -------------------------------------------------------------------------------- /download_font.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -f TakaoGothic.ttf ]; then 3 | exit 4 | fi 5 | wget https://launchpad.net/takao-fonts/trunk/15.03/+download/TakaoFonts_00303.01.zip 6 | unzip TakaoFonts_00303.01.zip 7 | mv TakaoFonts_00303.01/TakaoGothic.ttf . 8 | rm -rf TakaoFonts_00303.01 9 | rm -f TakaoFonts_00303.01.zip 10 | -------------------------------------------------------------------------------- /download_superres.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ ! -d "superres" ]; then 4 | mkdir superres 5 | fi 6 | # if [ ! -f "superres/LapSRN_x8.pb" ]; then 7 | # wget -P superres https://github.com/fannymonori/TF-LapSRN/raw/master/export/LapSRN_x8.pb 8 | # fi 9 | if [ ! -f "superres/ESPCN_x4.pb" ]; then 10 | wget -P superres https://github.com/fannymonori/TF-ESPCN/raw/master/export/ESPCN_x4.pb 11 | fi 12 | -------------------------------------------------------------------------------- /evaluate_all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ $# != 1 ]; then 3 | echo "Usage: $0 [dir]" 4 | exit 1 5 | fi 6 | dir=$1 7 | if [ ! -d ${dir} ]; then 8 | echo "${dir} not found" 9 | exit 1 10 | fi 11 | datanames=(${dir//\// }) 12 | dataname=${datanames[${#datanames[@]}-1]} 13 | models=( 14 | "yolov3-tiny" "yolov3" "yolov3-spp" "yolov4-tiny" "yolov4" 15 | ) 16 | frames=( 17 | "tf" "tf_onnx" 18 | ) 19 | modelsv5=( 20 | "yolov5s" "yolov5m" "yolov5l" "yolov5x" 21 | ) 22 | framesv5=( 23 | "torch" "torch_onnx" "onnx_vino" "onnx_tf" "tf" "tf_onnx" 24 | ) 25 | quants=( 26 | "fp32" "fp16" 27 | ) 28 | for frame in ${frames[@]} ; do 29 | for model in ${models[@]} ; do 30 | object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_${frame}/predictions.jsonl 31 | done 32 | done 33 | for quant in ${quants[@]} ; do 34 | for model in ${models[@]} ; do 35 | object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_tflite_${quant}/predictions.jsonl 36 | done 37 | done 38 | for frame in ${framesv5[@]} ; do 39 | for model in ${modelsv5[@]} ; do 40 | object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_${frame}/predictions.jsonl 41 | done 42 | done 43 | for quant in ${quants[@]} ; do 44 | for model in ${modelsv5[@]} ; do 45 | object_detection_metrics -t ${dir}/ground_truths.jsonl -p results/${dataname}/${model}_tflite_${quant}/predictions.jsonl 46 | done 47 | done 48 | -------------------------------------------------------------------------------- /ipynb/map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/ipynb/map.png -------------------------------------------------------------------------------- /ipynb/time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/ipynb/time.png -------------------------------------------------------------------------------- /ipynb/time_vs_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/ipynb/time_vs_map.png -------------------------------------------------------------------------------- /labels/coco_labels.txt: -------------------------------------------------------------------------------- 1 | 0 person 2 | 1 bicycle 3 | 2 car 4 | 3 motorcycle 5 | 4 airplane 6 | 5 bus 7 | 6 train 8 | 7 truck 9 | 8 boat 10 | 9 traffic light 11 | 10 fire hydrant 12 | 12 stop sign 13 | 13 parking meter 14 | 14 bench 15 | 15 bird 16 | 16 cat 17 | 17 dog 18 | 18 horse 19 | 19 sheep 20 | 20 cow 21 | 21 elephant 22 | 22 bear 23 | 23 zebra 24 | 24 giraffe 25 | 26 backpack 26 | 27 umbrella 27 | 30 handbag 28 | 31 tie 29 | 32 suitcase 30 | 33 frisbee 31 | 34 skis 32 | 35 snowboard 33 | 36 sports ball 34 | 37 kite 35 | 38 baseball bat 36 | 39 baseball glove 37 | 40 skateboard 38 | 41 surfboard 39 | 42 tennis racket 40 | 43 bottle 41 | 45 wine glass 42 | 46 cup 43 | 47 fork 44 | 48 knife 45 | 49 spoon 46 | 50 bowl 47 | 51 banana 48 | 52 apple 49 | 53 sandwich 50 | 54 orange 51 | 55 broccoli 52 | 56 carrot 53 | 57 hot dog 54 | 58 pizza 55 | 59 donut 56 | 60 cake 57 | 61 chair 58 | 62 couch 59 | 63 potted plant 60 | 64 bed 61 | 66 dining table 62 | 69 toilet 63 | 71 tv 64 | 72 laptop 65 | 73 mouse 66 | 74 remote 67 | 75 keyboard 68 | 76 cell phone 69 | 77 microwave 70 | 78 oven 71 | 79 toaster 72 | 80 sink 73 | 81 refrigerator 74 | 83 book 75 | 84 clock 76 | 85 vase 77 | 86 scissors 78 | 87 teddy bear 79 | 88 hair drier 80 | 89 toothbrush -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/models/__init__.py -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.stack(y).mean(0) # mean ensemble 109 | y = torch.cat(y, 1) # nms ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None): 114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 119 | 120 | # Compatibility updates 121 | for m in model.modules(): 122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: 123 | m.inplace = True # pytorch 1.7.0 compatibility 124 | elif type(m) is Conv: 125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 126 | 127 | if len(model) == 1: 128 | return model[-1] # return model 129 | else: 130 | print('Ensemble created with %s\n' % weights) 131 | for k in ['names', 'stride']: 132 | setattr(model, k, getattr(model[-1], k)) 133 | return model # return ensemble 134 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | # elif isinstance(m, models.yolo.Detect): 52 | # m.forward = m.forward_export # assign forward (optional) 53 | model.model[-1].export = True # set Detect() layer export=True 54 | y = model(img) # dry run 55 | 56 | # TorchScript export 57 | try: 58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 60 | ts = torch.jit.trace(model, img) 61 | ts.save(f) 62 | print('TorchScript export success, saved as %s' % f) 63 | except Exception as e: 64 | print('TorchScript export failure: %s' % e) 65 | 66 | # ONNX export 67 | try: 68 | import onnx 69 | 70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 71 | f = opt.weights.replace('.pt', '.onnx') # filename 72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 73 | output_names=['classes', 'boxes'] if y is None else ['output']) 74 | 75 | # Checks 76 | onnx_model = onnx.load(f) # load onnx model 77 | onnx.checker.check_model(onnx_model) # check onnx model 78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 79 | print('ONNX export success, saved as %s' % f) 80 | except Exception as e: 81 | print('ONNX export failure: %s' % e) 82 | 83 | # CoreML export 84 | try: 85 | import coremltools as ct 86 | 87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 88 | # convert model from torchscript and apply pixel scaling as per detect.py 89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 90 | f = opt.weights.replace('.pt', '.mlmodel') # filename 91 | model.save(f) 92 | print('CoreML export success, saved as %s' % f) 93 | except Exception as e: 94 | print('CoreML export failure: %s' % e) 95 | 96 | # Finish 97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 98 | -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | from copy import deepcopy 5 | 6 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 7 | logger = logging.getLogger(__name__) 8 | 9 | from models.common import * 10 | from models.experimental import * 11 | from utils.autoanchor import check_anchor_order 12 | from utils.general import make_divisible, check_file, set_logging 13 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 14 | select_device, copy_attr 15 | 16 | try: 17 | import thop # for FLOPS computation 18 | except ImportError: 19 | thop = None 20 | 21 | 22 | class Detect(nn.Module): 23 | stride = None # strides computed during build 24 | export = False # onnx export 25 | 26 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 27 | super(Detect, self).__init__() 28 | self.nc = nc # number of classes 29 | self.no = nc + 5 # number of outputs per anchor 30 | self.nl = len(anchors) # number of detection layers 31 | self.na = len(anchors[0]) // 2 # number of anchors 32 | self.grid = [torch.zeros(1)] * self.nl # init grid 33 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 34 | self.register_buffer('anchors', a) # shape(nl,na,2) 35 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 36 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 37 | 38 | def forward(self, x): 39 | # x = x.copy() # for profiling 40 | z = [] # inference output 41 | self.training |= self.export 42 | for i in range(self.nl): 43 | x[i] = self.m[i](x[i]) # conv 44 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 45 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 46 | 47 | if not self.training: # inference 48 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 49 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 50 | 51 | y = x[i].sigmoid() 52 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 53 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 54 | z.append(y.view(bs, -1, self.no)) 55 | 56 | return x if self.training else (torch.cat(z, 1), x) 57 | 58 | @staticmethod 59 | def _make_grid(nx=20, ny=20): 60 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 61 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 62 | 63 | 64 | class Model(nn.Module): 65 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 66 | super(Model, self).__init__() 67 | if isinstance(cfg, dict): 68 | self.yaml = cfg # model dict 69 | else: # is *.yaml 70 | import yaml # for torch hub 71 | self.yaml_file = Path(cfg).name 72 | with open(cfg) as f: 73 | self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict 74 | 75 | # Define model 76 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 77 | if nc and nc != self.yaml['nc']: 78 | logger.info('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) 79 | self.yaml['nc'] = nc # override yaml value 80 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 81 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 82 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 83 | 84 | # Build strides, anchors 85 | m = self.model[-1] # Detect() 86 | if isinstance(m, Detect): 87 | s = 256 # 2x min stride 88 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 89 | m.anchors /= m.stride.view(-1, 1, 1) 90 | check_anchor_order(m) 91 | self.stride = m.stride 92 | self._initialize_biases() # only run once 93 | # print('Strides: %s' % m.stride.tolist()) 94 | 95 | # Init weights, biases 96 | initialize_weights(self) 97 | self.info() 98 | logger.info('') 99 | 100 | def forward(self, x, augment=False, profile=False): 101 | if augment: 102 | img_size = x.shape[-2:] # height, width 103 | s = [1, 0.83, 0.67] # scales 104 | f = [None, 3, None] # flips (2-ud, 3-lr) 105 | y = [] # outputs 106 | for si, fi in zip(s, f): 107 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 108 | yi = self.forward_once(xi)[0] # forward 109 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 110 | yi[..., :4] /= si # de-scale 111 | if fi == 2: 112 | yi[..., 1] = img_size[0] - 1 - yi[..., 1] # de-flip ud 113 | elif fi == 3: 114 | yi[..., 0] = img_size[1] - 1 - yi[..., 0] # de-flip lr 115 | y.append(yi) 116 | return torch.cat(y, 1), None # augmented inference, train 117 | else: 118 | return self.forward_once(x, profile) # single-scale inference, train 119 | 120 | def forward_once(self, x, profile=False): 121 | y, dt = [], [] # outputs 122 | for m in self.model: 123 | if m.f != -1: # if not from previous layer 124 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 125 | 126 | if profile: 127 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS 128 | t = time_synchronized() 129 | for _ in range(10): 130 | _ = m(x) 131 | dt.append((time_synchronized() - t) * 100) 132 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 133 | 134 | x = m(x) # run 135 | y.append(x if m.i in self.save else None) # save output 136 | 137 | if profile: 138 | print('%.1fms total' % sum(dt)) 139 | return x 140 | 141 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 142 | # https://arxiv.org/abs/1708.02002 section 3.3 143 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 144 | m = self.model[-1] # Detect() module 145 | for mi, s in zip(m.m, m.stride): # from 146 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 147 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 148 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 149 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 150 | 151 | def _print_biases(self): 152 | m = self.model[-1] # Detect() module 153 | for mi in m.m: # from 154 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 155 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 156 | 157 | # def _print_weights(self): 158 | # for m in self.model.modules(): 159 | # if type(m) is Bottleneck: 160 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 161 | 162 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 163 | print('Fusing layers... ') 164 | for m in self.model.modules(): 165 | if type(m) is Conv and hasattr(m, 'bn'): 166 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 167 | delattr(m, 'bn') # remove batchnorm 168 | m.forward = m.fuseforward # update forward 169 | self.info() 170 | return self 171 | 172 | def nms(self, mode=True): # add or remove NMS module 173 | present = type(self.model[-1]) is NMS # last layer is NMS 174 | if mode and not present: 175 | print('Adding NMS... ') 176 | m = NMS() # module 177 | m.f = -1 # from 178 | m.i = self.model[-1].i + 1 # index 179 | self.model.add_module(name='%s' % m.i, module=m) # add 180 | self.eval() 181 | elif not mode and present: 182 | print('Removing NMS... ') 183 | self.model = self.model[:-1] # remove 184 | return self 185 | 186 | def autoshape(self): # add autoShape module 187 | print('Adding autoShape... ') 188 | m = autoShape(self) # wrap model 189 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 190 | return m 191 | 192 | def info(self, verbose=False, img_size=640): # print model information 193 | model_info(self, verbose, img_size) 194 | 195 | 196 | def parse_model(d, ch): # model_dict, input_channels(3) 197 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 198 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 199 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 200 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 201 | 202 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 203 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 204 | m = eval(m) if isinstance(m, str) else m # eval strings 205 | for j, a in enumerate(args): 206 | try: 207 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 208 | except: 209 | pass 210 | 211 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 212 | if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, 213 | C3]: 214 | c1, c2 = ch[f], args[0] 215 | if c2 != no: # if not output 216 | c2 = make_divisible(c2 * gw, 8) 217 | 218 | args = [c1, c2, *args[1:]] 219 | if m in [BottleneckCSP, C3]: 220 | args.insert(2, n) # number of repeats 221 | n = 1 222 | elif m is nn.BatchNorm2d: 223 | args = [ch[f]] 224 | elif m is Concat: 225 | c2 = sum([ch[x] for x in f]) 226 | elif m is Detect: 227 | args.append([ch[x] for x in f]) 228 | if isinstance(args[1], int): # number of anchors 229 | args[1] = [list(range(args[1] * 2))] * len(f) 230 | elif m is Contract: 231 | c2 = ch[f] * args[0] ** 2 232 | elif m is Expand: 233 | c2 = ch[f] // args[0] ** 2 234 | else: 235 | c2 = ch[f] 236 | 237 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 238 | t = str(m)[8:-2].replace('__main__.', '') # module type 239 | np = sum([x.numel() for x in m_.parameters()]) # number params 240 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 241 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 242 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 243 | layers.append(m_) 244 | if i == 0: 245 | ch = [] 246 | ch.append(c2) 247 | return nn.Sequential(*layers), sorted(save) 248 | 249 | 250 | if __name__ == '__main__': 251 | parser = argparse.ArgumentParser() 252 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 253 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 254 | opt = parser.parse_args() 255 | opt.cfg = check_file(opt.cfg) # check file 256 | set_logging() 257 | device = select_device(opt.device) 258 | 259 | # Create model 260 | model = Model(opt.cfg).to(device) 261 | model.train() 262 | 263 | # Profile 264 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 265 | # y = model(img, profile=True) 266 | 267 | # Tensorboard 268 | # from torch.utils.tensorboard import SummaryWriter 269 | # tb_writer = SummaryWriter() 270 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 271 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 272 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 273 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | requests 4 | opencv-contrib-python 5 | Pillow 6 | torch 7 | torchvision 8 | tensorflow 9 | tensorflow-addons 10 | onnx 11 | onnxruntime 12 | onnx-tf 13 | tf2onnx 14 | PyYAML 15 | simplejson 16 | tqdm 17 | -------------------------------------------------------------------------------- /tools/convert_yolo_config_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from collections import OrderedDict 4 | import configparser 5 | import os 6 | import pandas as pd 7 | import argparse 8 | 9 | 10 | class LayerDict(OrderedDict): 11 | def __init__(self): 12 | super().__init__() 13 | self.seq = 0 14 | 15 | def __setitem__(self, key, val): 16 | if key.startswith('#'): 17 | return 18 | if isinstance(val, dict): 19 | if key == 'net': 20 | return 21 | self.seq += 1 22 | key = f'{self.seq}:{key}' 23 | super().__setitem__(key, val) 24 | return 25 | 26 | 27 | def main(config: str) -> None: 28 | if not os.path.isfile(config): 29 | raise ValueError(f'config({config}) is not exists') 30 | cfg = configparser.ConfigParser( 31 | defaults=None, 32 | dict_type=LayerDict, 33 | strict=False, 34 | empty_lines_in_values=False, 35 | comment_prefixes=(';', '#'), 36 | allow_no_value=True 37 | ) 38 | cfg.read(config) 39 | layers = list() 40 | convs = 0 41 | for i, section in enumerate(cfg.sections()): 42 | layer_type = section.split(':')[1] 43 | layer = dict(cfg.items(section)) 44 | layer['no.'] = i 45 | layer['type'] = layer_type 46 | if layer_type == 'convolutional': 47 | layer['convs'] = convs 48 | convs += 1 49 | layers.append(layer) 50 | df = pd.DataFrame(layers) 51 | needed_columns = [ 52 | 'type', 'convs', 'filters', 'size', 'stride', 'pad', 53 | 'activation', 'batch_normalize', 54 | 'no.', 'from', 'layers' 55 | ] 56 | for nc in needed_columns: 57 | if nc not in list(df.columns): 58 | df[nc] = [None] * df.shape[0] 59 | df = df[needed_columns] 60 | csv = config.replace('.cfg', '.csv') 61 | df.to_csv(csv, encoding='utf_8_sig', index=False) 62 | return 63 | 64 | 65 | if __name__ == '__main__': 66 | parser = argparse.ArgumentParser( 67 | description='convert YOLO config file(.cfg) to csv (utf-8 BOM)' 68 | ) 69 | parser.add_argument( 70 | 'config', type=str, help='YOLO config file' 71 | ) 72 | args = parser.parse_args() 73 | main(**vars(args)) 74 | -------------------------------------------------------------------------------- /tools/yolov3-spp.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=255 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 6,7,8 642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 643 | classes=80 644 | num=9 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=255 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 3,4,5 728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 729 | classes=80 730 | num=9 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=255 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0,1,2 815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 816 | classes=80 817 | num=9 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | 823 | -------------------------------------------------------------------------------- /tools/yolov3-spp.csv: -------------------------------------------------------------------------------- 1 | type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers 2 | convolutional,0.0,32,3,1,1,leaky,1,0,, 3 | convolutional,1.0,64,3,2,1,leaky,1,1,, 4 | convolutional,2.0,32,1,1,1,leaky,1,2,, 5 | convolutional,3.0,64,3,1,1,leaky,1,3,, 6 | shortcut,,,,,,linear,,4,-3, 7 | convolutional,4.0,128,3,2,1,leaky,1,5,, 8 | convolutional,5.0,64,1,1,1,leaky,1,6,, 9 | convolutional,6.0,128,3,1,1,leaky,1,7,, 10 | shortcut,,,,,,linear,,8,-3, 11 | convolutional,7.0,64,1,1,1,leaky,1,9,, 12 | convolutional,8.0,128,3,1,1,leaky,1,10,, 13 | shortcut,,,,,,linear,,11,-3, 14 | convolutional,9.0,256,3,2,1,leaky,1,12,, 15 | convolutional,10.0,128,1,1,1,leaky,1,13,, 16 | convolutional,11.0,256,3,1,1,leaky,1,14,, 17 | shortcut,,,,,,linear,,15,-3, 18 | convolutional,12.0,128,1,1,1,leaky,1,16,, 19 | convolutional,13.0,256,3,1,1,leaky,1,17,, 20 | shortcut,,,,,,linear,,18,-3, 21 | convolutional,14.0,128,1,1,1,leaky,1,19,, 22 | convolutional,15.0,256,3,1,1,leaky,1,20,, 23 | shortcut,,,,,,linear,,21,-3, 24 | convolutional,16.0,128,1,1,1,leaky,1,22,, 25 | convolutional,17.0,256,3,1,1,leaky,1,23,, 26 | shortcut,,,,,,linear,,24,-3, 27 | convolutional,18.0,128,1,1,1,leaky,1,25,, 28 | convolutional,19.0,256,3,1,1,leaky,1,26,, 29 | shortcut,,,,,,linear,,27,-3, 30 | convolutional,20.0,128,1,1,1,leaky,1,28,, 31 | convolutional,21.0,256,3,1,1,leaky,1,29,, 32 | shortcut,,,,,,linear,,30,-3, 33 | convolutional,22.0,128,1,1,1,leaky,1,31,, 34 | convolutional,23.0,256,3,1,1,leaky,1,32,, 35 | shortcut,,,,,,linear,,33,-3, 36 | convolutional,24.0,128,1,1,1,leaky,1,34,, 37 | convolutional,25.0,256,3,1,1,leaky,1,35,, 38 | shortcut,,,,,,linear,,36,-3, 39 | convolutional,26.0,512,3,2,1,leaky,1,37,, 40 | convolutional,27.0,256,1,1,1,leaky,1,38,, 41 | convolutional,28.0,512,3,1,1,leaky,1,39,, 42 | shortcut,,,,,,linear,,40,-3, 43 | convolutional,29.0,256,1,1,1,leaky,1,41,, 44 | convolutional,30.0,512,3,1,1,leaky,1,42,, 45 | shortcut,,,,,,linear,,43,-3, 46 | convolutional,31.0,256,1,1,1,leaky,1,44,, 47 | convolutional,32.0,512,3,1,1,leaky,1,45,, 48 | shortcut,,,,,,linear,,46,-3, 49 | convolutional,33.0,256,1,1,1,leaky,1,47,, 50 | convolutional,34.0,512,3,1,1,leaky,1,48,, 51 | shortcut,,,,,,linear,,49,-3, 52 | convolutional,35.0,256,1,1,1,leaky,1,50,, 53 | convolutional,36.0,512,3,1,1,leaky,1,51,, 54 | shortcut,,,,,,linear,,52,-3, 55 | convolutional,37.0,256,1,1,1,leaky,1,53,, 56 | convolutional,38.0,512,3,1,1,leaky,1,54,, 57 | shortcut,,,,,,linear,,55,-3, 58 | convolutional,39.0,256,1,1,1,leaky,1,56,, 59 | convolutional,40.0,512,3,1,1,leaky,1,57,, 60 | shortcut,,,,,,linear,,58,-3, 61 | convolutional,41.0,256,1,1,1,leaky,1,59,, 62 | convolutional,42.0,512,3,1,1,leaky,1,60,, 63 | shortcut,,,,,,linear,,61,-3, 64 | convolutional,43.0,1024,3,2,1,leaky,1,62,, 65 | convolutional,44.0,512,1,1,1,leaky,1,63,, 66 | convolutional,45.0,1024,3,1,1,leaky,1,64,, 67 | shortcut,,,,,,linear,,65,-3, 68 | convolutional,46.0,512,1,1,1,leaky,1,66,, 69 | convolutional,47.0,1024,3,1,1,leaky,1,67,, 70 | shortcut,,,,,,linear,,68,-3, 71 | convolutional,48.0,512,1,1,1,leaky,1,69,, 72 | convolutional,49.0,1024,3,1,1,leaky,1,70,, 73 | shortcut,,,,,,linear,,71,-3, 74 | convolutional,50.0,512,1,1,1,leaky,1,72,, 75 | convolutional,51.0,1024,3,1,1,leaky,1,73,, 76 | shortcut,,,,,,linear,,74,-3, 77 | convolutional,52.0,512,1,1,1,leaky,1,75,, 78 | convolutional,53.0,1024,3,1,1,leaky,1,76,, 79 | convolutional,54.0,512,1,1,1,leaky,1,77,, 80 | maxpool,,,5,1,,,,78,, 81 | route,,,,,,,,79,,-2 82 | maxpool,,,9,1,,,,80,, 83 | route,,,,,,,,81,,-4 84 | maxpool,,,13,1,,,,82,, 85 | route,,,,,,,,83,,"-1,-3,-5,-6" 86 | convolutional,55.0,512,1,1,1,leaky,1,84,, 87 | convolutional,56.0,1024,3,1,1,leaky,1,85,, 88 | convolutional,57.0,512,1,1,1,leaky,1,86,, 89 | convolutional,58.0,1024,3,1,1,leaky,1,87,, 90 | convolutional,59.0,255,1,1,1,linear,,88,, 91 | yolo,,,,,,,,89,, 92 | route,,,,,,,,90,,-4 93 | convolutional,60.0,256,1,1,1,leaky,1,91,, 94 | upsample,,,,2,,,,92,, 95 | route,,,,,,,,93,,"-1, 61" 96 | convolutional,61.0,256,1,1,1,leaky,1,94,, 97 | convolutional,62.0,512,3,1,1,leaky,1,95,, 98 | convolutional,63.0,256,1,1,1,leaky,1,96,, 99 | convolutional,64.0,512,3,1,1,leaky,1,97,, 100 | convolutional,65.0,256,1,1,1,leaky,1,98,, 101 | convolutional,66.0,512,3,1,1,leaky,1,99,, 102 | convolutional,67.0,255,1,1,1,linear,,100,, 103 | yolo,,,,,,,,101,, 104 | route,,,,,,,,102,,-4 105 | convolutional,68.0,128,1,1,1,leaky,1,103,, 106 | upsample,,,,2,,,,104,, 107 | route,,,,,,,,105,,"-1, 36" 108 | convolutional,69.0,128,1,1,1,leaky,1,106,, 109 | convolutional,70.0,256,3,1,1,leaky,1,107,, 110 | convolutional,71.0,128,1,1,1,leaky,1,108,, 111 | convolutional,72.0,256,3,1,1,leaky,1,109,, 112 | convolutional,73.0,128,1,1,1,leaky,1,110,, 113 | convolutional,74.0,256,3,1,1,leaky,1,111,, 114 | convolutional,75.0,255,1,1,1,linear,,112,, 115 | yolo,,,,,,,,113,, 116 | -------------------------------------------------------------------------------- /tools/yolov3-spp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov3-spp.xlsx -------------------------------------------------------------------------------- /tools/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /tools/yolov3-tiny.csv: -------------------------------------------------------------------------------- 1 | type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers 2 | convolutional,0.0,16,3,1,1,leaky,1,0,, 3 | maxpool,,,2,2,,,,1,, 4 | convolutional,1.0,32,3,1,1,leaky,1,2,, 5 | maxpool,,,2,2,,,,3,, 6 | convolutional,2.0,64,3,1,1,leaky,1,4,, 7 | maxpool,,,2,2,,,,5,, 8 | convolutional,3.0,128,3,1,1,leaky,1,6,, 9 | maxpool,,,2,2,,,,7,, 10 | convolutional,4.0,256,3,1,1,leaky,1,8,, 11 | maxpool,,,2,2,,,,9,, 12 | convolutional,5.0,512,3,1,1,leaky,1,10,, 13 | maxpool,,,2,1,,,,11,, 14 | convolutional,6.0,1024,3,1,1,leaky,1,12,, 15 | convolutional,7.0,256,1,1,1,leaky,1,13,, 16 | convolutional,8.0,512,3,1,1,leaky,1,14,, 17 | convolutional,9.0,255,1,1,1,linear,,15,, 18 | yolo,,,,,,,,16,, 19 | route,,,,,,,,17,,-4 20 | convolutional,10.0,128,1,1,1,leaky,1,18,, 21 | upsample,,,,2,,,,19,, 22 | route,,,,,,,,20,,"-1, 8" 23 | convolutional,11.0,256,3,1,1,leaky,1,21,, 24 | convolutional,12.0,255,1,1,1,linear,,22,, 25 | yolo,,,,,,,,23,, 26 | -------------------------------------------------------------------------------- /tools/yolov3-tiny.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov3-tiny.xlsx -------------------------------------------------------------------------------- /tools/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | 790 | -------------------------------------------------------------------------------- /tools/yolov3.csv: -------------------------------------------------------------------------------- 1 | type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers 2 | convolutional,0.0,32,3,1,1,leaky,1,0,, 3 | convolutional,1.0,64,3,2,1,leaky,1,1,, 4 | convolutional,2.0,32,1,1,1,leaky,1,2,, 5 | convolutional,3.0,64,3,1,1,leaky,1,3,, 6 | shortcut,,,,,,linear,,4,-3, 7 | convolutional,4.0,128,3,2,1,leaky,1,5,, 8 | convolutional,5.0,64,1,1,1,leaky,1,6,, 9 | convolutional,6.0,128,3,1,1,leaky,1,7,, 10 | shortcut,,,,,,linear,,8,-3, 11 | convolutional,7.0,64,1,1,1,leaky,1,9,, 12 | convolutional,8.0,128,3,1,1,leaky,1,10,, 13 | shortcut,,,,,,linear,,11,-3, 14 | convolutional,9.0,256,3,2,1,leaky,1,12,, 15 | convolutional,10.0,128,1,1,1,leaky,1,13,, 16 | convolutional,11.0,256,3,1,1,leaky,1,14,, 17 | shortcut,,,,,,linear,,15,-3, 18 | convolutional,12.0,128,1,1,1,leaky,1,16,, 19 | convolutional,13.0,256,3,1,1,leaky,1,17,, 20 | shortcut,,,,,,linear,,18,-3, 21 | convolutional,14.0,128,1,1,1,leaky,1,19,, 22 | convolutional,15.0,256,3,1,1,leaky,1,20,, 23 | shortcut,,,,,,linear,,21,-3, 24 | convolutional,16.0,128,1,1,1,leaky,1,22,, 25 | convolutional,17.0,256,3,1,1,leaky,1,23,, 26 | shortcut,,,,,,linear,,24,-3, 27 | convolutional,18.0,128,1,1,1,leaky,1,25,, 28 | convolutional,19.0,256,3,1,1,leaky,1,26,, 29 | shortcut,,,,,,linear,,27,-3, 30 | convolutional,20.0,128,1,1,1,leaky,1,28,, 31 | convolutional,21.0,256,3,1,1,leaky,1,29,, 32 | shortcut,,,,,,linear,,30,-3, 33 | convolutional,22.0,128,1,1,1,leaky,1,31,, 34 | convolutional,23.0,256,3,1,1,leaky,1,32,, 35 | shortcut,,,,,,linear,,33,-3, 36 | convolutional,24.0,128,1,1,1,leaky,1,34,, 37 | convolutional,25.0,256,3,1,1,leaky,1,35,, 38 | shortcut,,,,,,linear,,36,-3, 39 | convolutional,26.0,512,3,2,1,leaky,1,37,, 40 | convolutional,27.0,256,1,1,1,leaky,1,38,, 41 | convolutional,28.0,512,3,1,1,leaky,1,39,, 42 | shortcut,,,,,,linear,,40,-3, 43 | convolutional,29.0,256,1,1,1,leaky,1,41,, 44 | convolutional,30.0,512,3,1,1,leaky,1,42,, 45 | shortcut,,,,,,linear,,43,-3, 46 | convolutional,31.0,256,1,1,1,leaky,1,44,, 47 | convolutional,32.0,512,3,1,1,leaky,1,45,, 48 | shortcut,,,,,,linear,,46,-3, 49 | convolutional,33.0,256,1,1,1,leaky,1,47,, 50 | convolutional,34.0,512,3,1,1,leaky,1,48,, 51 | shortcut,,,,,,linear,,49,-3, 52 | convolutional,35.0,256,1,1,1,leaky,1,50,, 53 | convolutional,36.0,512,3,1,1,leaky,1,51,, 54 | shortcut,,,,,,linear,,52,-3, 55 | convolutional,37.0,256,1,1,1,leaky,1,53,, 56 | convolutional,38.0,512,3,1,1,leaky,1,54,, 57 | shortcut,,,,,,linear,,55,-3, 58 | convolutional,39.0,256,1,1,1,leaky,1,56,, 59 | convolutional,40.0,512,3,1,1,leaky,1,57,, 60 | shortcut,,,,,,linear,,58,-3, 61 | convolutional,41.0,256,1,1,1,leaky,1,59,, 62 | convolutional,42.0,512,3,1,1,leaky,1,60,, 63 | shortcut,,,,,,linear,,61,-3, 64 | convolutional,43.0,1024,3,2,1,leaky,1,62,, 65 | convolutional,44.0,512,1,1,1,leaky,1,63,, 66 | convolutional,45.0,1024,3,1,1,leaky,1,64,, 67 | shortcut,,,,,,linear,,65,-3, 68 | convolutional,46.0,512,1,1,1,leaky,1,66,, 69 | convolutional,47.0,1024,3,1,1,leaky,1,67,, 70 | shortcut,,,,,,linear,,68,-3, 71 | convolutional,48.0,512,1,1,1,leaky,1,69,, 72 | convolutional,49.0,1024,3,1,1,leaky,1,70,, 73 | shortcut,,,,,,linear,,71,-3, 74 | convolutional,50.0,512,1,1,1,leaky,1,72,, 75 | convolutional,51.0,1024,3,1,1,leaky,1,73,, 76 | shortcut,,,,,,linear,,74,-3, 77 | convolutional,52.0,512,1,1,1,leaky,1,75,, 78 | convolutional,53.0,1024,3,1,1,leaky,1,76,, 79 | convolutional,54.0,512,1,1,1,leaky,1,77,, 80 | convolutional,55.0,1024,3,1,1,leaky,1,78,, 81 | convolutional,56.0,512,1,1,1,leaky,1,79,, 82 | convolutional,57.0,1024,3,1,1,leaky,1,80,, 83 | convolutional,58.0,255,1,1,1,linear,,81,, 84 | yolo,,,,,,,,82,, 85 | route,,,,,,,,83,,-4 86 | convolutional,59.0,256,1,1,1,leaky,1,84,, 87 | upsample,,,,2,,,,85,, 88 | route,,,,,,,,86,,"-1, 61" 89 | convolutional,60.0,256,1,1,1,leaky,1,87,, 90 | convolutional,61.0,512,3,1,1,leaky,1,88,, 91 | convolutional,62.0,256,1,1,1,leaky,1,89,, 92 | convolutional,63.0,512,3,1,1,leaky,1,90,, 93 | convolutional,64.0,256,1,1,1,leaky,1,91,, 94 | convolutional,65.0,512,3,1,1,leaky,1,92,, 95 | convolutional,66.0,255,1,1,1,linear,,93,, 96 | yolo,,,,,,,,94,, 97 | route,,,,,,,,95,,-4 98 | convolutional,67.0,128,1,1,1,leaky,1,96,, 99 | upsample,,,,2,,,,97,, 100 | route,,,,,,,,98,,"-1, 36" 101 | convolutional,68.0,128,1,1,1,leaky,1,99,, 102 | convolutional,69.0,256,3,1,1,leaky,1,100,, 103 | convolutional,70.0,128,1,1,1,leaky,1,101,, 104 | convolutional,71.0,256,3,1,1,leaky,1,102,, 105 | convolutional,72.0,128,1,1,1,leaky,1,103,, 106 | convolutional,73.0,256,3,1,1,leaky,1,104,, 107 | convolutional,74.0,255,1,1,1,linear,,105,, 108 | yolo,,,,,,,,106,, 109 | -------------------------------------------------------------------------------- /tools/yolov3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov3.xlsx -------------------------------------------------------------------------------- /tools/yolov4-csp.csv: -------------------------------------------------------------------------------- 1 | type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers 2 | convolutional,0.0,32,3,1,1,mish,1,0,, 3 | convolutional,1.0,64,3,2,1,mish,1,1,, 4 | convolutional,2.0,32,1,1,1,mish,1,2,, 5 | convolutional,3.0,64,3,1,1,mish,1,3,, 6 | shortcut,,,,,,linear,,4,-3, 7 | convolutional,4.0,128,3,2,1,mish,1,5,, 8 | convolutional,5.0,64,1,1,1,mish,1,6,, 9 | route,,,,,,,,7,,-2 10 | convolutional,6.0,64,1,1,1,mish,1,8,, 11 | convolutional,7.0,64,1,1,1,mish,1,9,, 12 | convolutional,8.0,64,3,1,1,mish,1,10,, 13 | shortcut,,,,,,linear,,11,-3, 14 | convolutional,9.0,64,1,1,1,mish,1,12,, 15 | convolutional,10.0,64,3,1,1,mish,1,13,, 16 | shortcut,,,,,,linear,,14,-3, 17 | convolutional,11.0,64,1,1,1,mish,1,15,, 18 | route,,,,,,,,16,,"-1,-10" 19 | convolutional,12.0,128,1,1,1,mish,1,17,, 20 | convolutional,13.0,256,3,2,1,mish,1,18,, 21 | convolutional,14.0,128,1,1,1,mish,1,19,, 22 | route,,,,,,,,20,,-2 23 | convolutional,15.0,128,1,1,1,mish,1,21,, 24 | convolutional,16.0,128,1,1,1,mish,1,22,, 25 | convolutional,17.0,128,3,1,1,mish,1,23,, 26 | shortcut,,,,,,linear,,24,-3, 27 | convolutional,18.0,128,1,1,1,mish,1,25,, 28 | convolutional,19.0,128,3,1,1,mish,1,26,, 29 | shortcut,,,,,,linear,,27,-3, 30 | convolutional,20.0,128,1,1,1,mish,1,28,, 31 | convolutional,21.0,128,3,1,1,mish,1,29,, 32 | shortcut,,,,,,linear,,30,-3, 33 | convolutional,22.0,128,1,1,1,mish,1,31,, 34 | convolutional,23.0,128,3,1,1,mish,1,32,, 35 | shortcut,,,,,,linear,,33,-3, 36 | convolutional,24.0,128,1,1,1,mish,1,34,, 37 | convolutional,25.0,128,3,1,1,mish,1,35,, 38 | shortcut,,,,,,linear,,36,-3, 39 | convolutional,26.0,128,1,1,1,mish,1,37,, 40 | convolutional,27.0,128,3,1,1,mish,1,38,, 41 | shortcut,,,,,,linear,,39,-3, 42 | convolutional,28.0,128,1,1,1,mish,1,40,, 43 | convolutional,29.0,128,3,1,1,mish,1,41,, 44 | shortcut,,,,,,linear,,42,-3, 45 | convolutional,30.0,128,1,1,1,mish,1,43,, 46 | convolutional,31.0,128,3,1,1,mish,1,44,, 47 | shortcut,,,,,,linear,,45,-3, 48 | convolutional,32.0,128,1,1,1,mish,1,46,, 49 | route,,,,,,,,47,,"-1,-28" 50 | convolutional,33.0,256,1,1,1,mish,1,48,, 51 | convolutional,34.0,512,3,2,1,mish,1,49,, 52 | convolutional,35.0,256,1,1,1,mish,1,50,, 53 | route,,,,,,,,51,,-2 54 | convolutional,36.0,256,1,1,1,mish,1,52,, 55 | convolutional,37.0,256,1,1,1,mish,1,53,, 56 | convolutional,38.0,256,3,1,1,mish,1,54,, 57 | shortcut,,,,,,linear,,55,-3, 58 | convolutional,39.0,256,1,1,1,mish,1,56,, 59 | convolutional,40.0,256,3,1,1,mish,1,57,, 60 | shortcut,,,,,,linear,,58,-3, 61 | convolutional,41.0,256,1,1,1,mish,1,59,, 62 | convolutional,42.0,256,3,1,1,mish,1,60,, 63 | shortcut,,,,,,linear,,61,-3, 64 | convolutional,43.0,256,1,1,1,mish,1,62,, 65 | convolutional,44.0,256,3,1,1,mish,1,63,, 66 | shortcut,,,,,,linear,,64,-3, 67 | convolutional,45.0,256,1,1,1,mish,1,65,, 68 | convolutional,46.0,256,3,1,1,mish,1,66,, 69 | shortcut,,,,,,linear,,67,-3, 70 | convolutional,47.0,256,1,1,1,mish,1,68,, 71 | convolutional,48.0,256,3,1,1,mish,1,69,, 72 | shortcut,,,,,,linear,,70,-3, 73 | convolutional,49.0,256,1,1,1,mish,1,71,, 74 | convolutional,50.0,256,3,1,1,mish,1,72,, 75 | shortcut,,,,,,linear,,73,-3, 76 | convolutional,51.0,256,1,1,1,mish,1,74,, 77 | convolutional,52.0,256,3,1,1,mish,1,75,, 78 | shortcut,,,,,,linear,,76,-3, 79 | convolutional,53.0,256,1,1,1,mish,1,77,, 80 | route,,,,,,,,78,,"-1,-28" 81 | convolutional,54.0,512,1,1,1,mish,1,79,, 82 | convolutional,55.0,1024,3,2,1,mish,1,80,, 83 | convolutional,56.0,512,1,1,1,mish,1,81,, 84 | route,,,,,,,,82,,-2 85 | convolutional,57.0,512,1,1,1,mish,1,83,, 86 | convolutional,58.0,512,1,1,1,mish,1,84,, 87 | convolutional,59.0,512,3,1,1,mish,1,85,, 88 | shortcut,,,,,,linear,,86,-3, 89 | convolutional,60.0,512,1,1,1,mish,1,87,, 90 | convolutional,61.0,512,3,1,1,mish,1,88,, 91 | shortcut,,,,,,linear,,89,-3, 92 | convolutional,62.0,512,1,1,1,mish,1,90,, 93 | convolutional,63.0,512,3,1,1,mish,1,91,, 94 | shortcut,,,,,,linear,,92,-3, 95 | convolutional,64.0,512,1,1,1,mish,1,93,, 96 | convolutional,65.0,512,3,1,1,mish,1,94,, 97 | shortcut,,,,,,linear,,95,-3, 98 | convolutional,66.0,512,1,1,1,mish,1,96,, 99 | route,,,,,,,,97,,"-1,-16" 100 | convolutional,67.0,1024,1,1,1,mish,1,98,, 101 | convolutional,68.0,512,1,1,1,mish,1,99,, 102 | route,,,,,,,,100,,-2 103 | convolutional,69.0,512,1,1,1,mish,1,101,, 104 | convolutional,70.0,512,3,1,1,mish,1,102,, 105 | convolutional,71.0,512,1,1,1,mish,1,103,, 106 | maxpool,,,5,1,,,,104,, 107 | route,,,,,,,,105,,-2 108 | maxpool,,,9,1,,,,106,, 109 | route,,,,,,,,107,,-4 110 | maxpool,,,13,1,,,,108,, 111 | route,,,,,,,,109,,"-1,-3,-5,-6" 112 | convolutional,72.0,512,1,1,1,mish,1,110,, 113 | convolutional,73.0,512,3,1,1,mish,1,111,, 114 | route,,,,,,,,112,,"-1, -13" 115 | convolutional,74.0,512,1,1,1,mish,1,113,, 116 | convolutional,75.0,256,1,1,1,mish,1,114,, 117 | upsample,,,,2,,,,115,, 118 | route,,,,,,,,116,,79 119 | convolutional,76.0,256,1,1,1,mish,1,117,, 120 | route,,,,,,,,118,,"-1, -3" 121 | convolutional,77.0,256,1,1,1,mish,1,119,, 122 | convolutional,78.0,256,1,1,1,mish,1,120,, 123 | route,,,,,,,,121,,-2 124 | convolutional,79.0,256,1,1,1,mish,1,122,, 125 | convolutional,80.0,256,3,1,1,mish,1,123,, 126 | convolutional,81.0,256,1,1,1,mish,1,124,, 127 | convolutional,82.0,256,3,1,1,mish,1,125,, 128 | route,,,,,,,,126,,"-1, -6" 129 | convolutional,83.0,256,1,1,1,mish,1,127,, 130 | convolutional,84.0,128,1,1,1,mish,1,128,, 131 | upsample,,,,2,,,,129,, 132 | route,,,,,,,,130,,48 133 | convolutional,85.0,128,1,1,1,mish,1,131,, 134 | route,,,,,,,,132,,"-1, -3" 135 | convolutional,86.0,128,1,1,1,mish,1,133,, 136 | convolutional,87.0,128,1,1,1,mish,1,134,, 137 | route,,,,,,,,135,,-2 138 | convolutional,88.0,128,1,1,1,mish,1,136,, 139 | convolutional,89.0,128,3,1,1,mish,1,137,, 140 | convolutional,90.0,128,1,1,1,mish,1,138,, 141 | convolutional,91.0,128,3,1,1,mish,1,139,, 142 | route,,,,,,,,140,,"-1, -6" 143 | convolutional,92.0,128,1,1,1,mish,1,141,, 144 | convolutional,93.0,256,3,1,1,mish,1,142,, 145 | convolutional,94.0,255,1,1,1,logistic,,143,, 146 | yolo,,,,,,,,144,, 147 | route,,,,,,,,145,,-4 148 | convolutional,95.0,256,3,2,1,mish,1,146,, 149 | route,,,,,,,,147,,"-1, -20" 150 | convolutional,96.0,256,1,1,1,mish,1,148,, 151 | convolutional,97.0,256,1,1,1,mish,1,149,, 152 | route,,,,,,,,150,,-2 153 | convolutional,98.0,256,1,1,1,mish,1,151,, 154 | convolutional,99.0,256,3,1,1,mish,1,152,, 155 | convolutional,100.0,256,1,1,1,mish,1,153,, 156 | convolutional,101.0,256,3,1,1,mish,1,154,, 157 | route,,,,,,,,155,,"-1,-6" 158 | convolutional,102.0,256,1,1,1,mish,1,156,, 159 | convolutional,103.0,512,3,1,1,mish,1,157,, 160 | convolutional,104.0,255,1,1,1,logistic,,158,, 161 | yolo,,,,,,,,159,, 162 | route,,,,,,,,160,,-4 163 | convolutional,105.0,512,3,2,1,mish,1,161,, 164 | route,,,,,,,,162,,"-1, -49" 165 | convolutional,106.0,512,1,1,1,mish,1,163,, 166 | convolutional,107.0,512,1,1,1,mish,1,164,, 167 | route,,,,,,,,165,,-2 168 | convolutional,108.0,512,1,1,1,mish,1,166,, 169 | convolutional,109.0,512,3,1,1,mish,1,167,, 170 | convolutional,110.0,512,1,1,1,mish,1,168,, 171 | convolutional,111.0,512,3,1,1,mish,1,169,, 172 | route,,,,,,,,170,,"-1,-6" 173 | convolutional,112.0,512,1,1,1,mish,1,171,, 174 | convolutional,113.0,1024,3,1,1,mish,1,172,, 175 | convolutional,114.0,255,1,1,1,logistic,,173,, 176 | yolo,,,,,,,,174,, 177 | -------------------------------------------------------------------------------- /tools/yolov4-csp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4-csp.xlsx -------------------------------------------------------------------------------- /tools/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | 21 | max_batches = 2000200 22 | policy=steps 23 | steps=1600000,1800000 24 | scales=.1,.1 25 | 26 | 27 | #weights_reject_freq=1001 28 | #ema_alpha=0.9998 29 | #equidistant_point=1000 30 | #num_sigmas_reject_badlabels=3 31 | #badlabels_rejection_percentage=0.2 32 | 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=32 37 | size=3 38 | stride=2 39 | pad=1 40 | activation=leaky 41 | 42 | [convolutional] 43 | batch_normalize=1 44 | filters=64 45 | size=3 46 | stride=2 47 | pad=1 48 | activation=leaky 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [route] 59 | layers=-1 60 | groups=2 61 | group_id=1 62 | 63 | [convolutional] 64 | batch_normalize=1 65 | filters=32 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [convolutional] 72 | batch_normalize=1 73 | filters=32 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [route] 80 | layers = -1,-2 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=64 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [route] 91 | layers = -6,-1 92 | 93 | [maxpool] 94 | size=2 95 | stride=2 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=128 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [route] 106 | layers=-1 107 | groups=2 108 | group_id=1 109 | 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=64 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [route] 127 | layers = -1,-2 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=128 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [route] 138 | layers = -6,-1 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=256 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [route] 153 | layers=-1 154 | groups=2 155 | group_id=1 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=128 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [route] 174 | layers = -1,-2 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=256 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [route] 185 | layers = -6,-1 186 | 187 | [maxpool] 188 | size=2 189 | stride=2 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=512 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | ################################## 200 | 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=1 205 | stride=1 206 | pad=1 207 | activation=leaky 208 | 209 | [convolutional] 210 | batch_normalize=1 211 | filters=512 212 | size=3 213 | stride=1 214 | pad=1 215 | activation=leaky 216 | 217 | [convolutional] 218 | size=1 219 | stride=1 220 | pad=1 221 | filters=255 222 | activation=linear 223 | 224 | 225 | 226 | [yolo] 227 | mask = 3,4,5 228 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 229 | classes=80 230 | num=6 231 | jitter=.3 232 | scale_x_y = 1.05 233 | cls_normalizer=1.0 234 | iou_normalizer=0.07 235 | iou_loss=ciou 236 | ignore_thresh = .7 237 | truth_thresh = 1 238 | random=0 239 | resize=1.5 240 | nms_kind=greedynms 241 | beta_nms=0.6 242 | #new_coords=1 243 | #scale_x_y = 2.0 244 | 245 | [route] 246 | layers = -4 247 | 248 | [convolutional] 249 | batch_normalize=1 250 | filters=128 251 | size=1 252 | stride=1 253 | pad=1 254 | activation=leaky 255 | 256 | [upsample] 257 | stride=2 258 | 259 | [route] 260 | layers = -1, 23 261 | 262 | [convolutional] 263 | batch_normalize=1 264 | filters=256 265 | size=3 266 | stride=1 267 | pad=1 268 | activation=leaky 269 | 270 | [convolutional] 271 | size=1 272 | stride=1 273 | pad=1 274 | filters=255 275 | activation=linear 276 | 277 | [yolo] 278 | mask = 1,2,3 279 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 280 | classes=80 281 | num=6 282 | jitter=.3 283 | scale_x_y = 1.05 284 | cls_normalizer=1.0 285 | iou_normalizer=0.07 286 | iou_loss=ciou 287 | ignore_thresh = .7 288 | truth_thresh = 1 289 | random=0 290 | resize=1.5 291 | nms_kind=greedynms 292 | beta_nms=0.6 293 | #new_coords=1 294 | #scale_x_y = 2.0 295 | -------------------------------------------------------------------------------- /tools/yolov4-tiny.csv: -------------------------------------------------------------------------------- 1 | type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers 2 | convolutional,0.0,32,3,2,1,leaky,1,0,, 3 | convolutional,1.0,64,3,2,1,leaky,1,1,, 4 | convolutional,2.0,64,3,1,1,leaky,1,2,, 5 | route,,,,,,,,3,,-1 6 | convolutional,3.0,32,3,1,1,leaky,1,4,, 7 | convolutional,4.0,32,3,1,1,leaky,1,5,, 8 | route,,,,,,,,6,,"-1,-2" 9 | convolutional,5.0,64,1,1,1,leaky,1,7,, 10 | route,,,,,,,,8,,"-6,-1" 11 | maxpool,,,2,2,,,,9,, 12 | convolutional,6.0,128,3,1,1,leaky,1,10,, 13 | route,,,,,,,,11,,-1 14 | convolutional,7.0,64,3,1,1,leaky,1,12,, 15 | convolutional,8.0,64,3,1,1,leaky,1,13,, 16 | route,,,,,,,,14,,"-1,-2" 17 | convolutional,9.0,128,1,1,1,leaky,1,15,, 18 | route,,,,,,,,16,,"-6,-1" 19 | maxpool,,,2,2,,,,17,, 20 | convolutional,10.0,256,3,1,1,leaky,1,18,, 21 | route,,,,,,,,19,,-1 22 | convolutional,11.0,128,3,1,1,leaky,1,20,, 23 | convolutional,12.0,128,3,1,1,leaky,1,21,, 24 | route,,,,,,,,22,,"-1,-2" 25 | convolutional,13.0,256,1,1,1,leaky,1,23,, 26 | route,,,,,,,,24,,"-6,-1" 27 | maxpool,,,2,2,,,,25,, 28 | convolutional,14.0,512,3,1,1,leaky,1,26,, 29 | convolutional,15.0,256,1,1,1,leaky,1,27,, 30 | convolutional,16.0,512,3,1,1,leaky,1,28,, 31 | convolutional,17.0,255,1,1,1,linear,,29,, 32 | yolo,,,,,,,,30,, 33 | route,,,,,,,,31,,-4 34 | convolutional,18.0,128,1,1,1,leaky,1,32,, 35 | upsample,,,,2,,,,33,, 36 | route,,,,,,,,34,,"-1, 23" 37 | convolutional,19.0,256,3,1,1,leaky,1,35,, 38 | convolutional,20.0,255,1,1,1,linear,,36,, 39 | yolo,,,,,,,,37,, 40 | -------------------------------------------------------------------------------- /tools/yolov4-tiny.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4-tiny.xlsx -------------------------------------------------------------------------------- /tools/yolov4.csv: -------------------------------------------------------------------------------- 1 | no.,type,convs,filters,size,stride,pad,activation,batch_normalize,from,layers 2 | 0,convolutional,0.0,32,3,1,1,mish,1,, 3 | 1,convolutional,1.0,64,3,2,1,mish,1,, 4 | 2,convolutional,2.0,64,1,1,1,mish,1,, 5 | 3,route,,,,,,,,,-2 6 | 4,convolutional,3.0,64,1,1,1,mish,1,, 7 | 5,convolutional,4.0,32,1,1,1,mish,1,, 8 | 6,convolutional,5.0,64,3,1,1,mish,1,, 9 | 7,shortcut,,,,,,linear,,-3, 10 | 8,convolutional,6.0,64,1,1,1,mish,1,, 11 | 9,route,,,,,,,,,"-1,-7" 12 | 10,convolutional,7.0,64,1,1,1,mish,1,, 13 | 11,convolutional,8.0,128,3,2,1,mish,1,, 14 | 12,convolutional,9.0,64,1,1,1,mish,1,, 15 | 13,route,,,,,,,,,-2 16 | 14,convolutional,10.0,64,1,1,1,mish,1,, 17 | 15,convolutional,11.0,64,1,1,1,mish,1,, 18 | 16,convolutional,12.0,64,3,1,1,mish,1,, 19 | 17,shortcut,,,,,,linear,,-3, 20 | 18,convolutional,13.0,64,1,1,1,mish,1,, 21 | 19,convolutional,14.0,64,3,1,1,mish,1,, 22 | 20,shortcut,,,,,,linear,,-3, 23 | 21,convolutional,15.0,64,1,1,1,mish,1,, 24 | 22,route,,,,,,,,,"-1,-10" 25 | 23,convolutional,16.0,128,1,1,1,mish,1,, 26 | 24,convolutional,17.0,256,3,2,1,mish,1,, 27 | 25,convolutional,18.0,128,1,1,1,mish,1,, 28 | 26,route,,,,,,,,,-2 29 | 27,convolutional,19.0,128,1,1,1,mish,1,, 30 | 28,convolutional,20.0,128,1,1,1,mish,1,, 31 | 29,convolutional,21.0,128,3,1,1,mish,1,, 32 | 30,shortcut,,,,,,linear,,-3, 33 | 31,convolutional,22.0,128,1,1,1,mish,1,, 34 | 32,convolutional,23.0,128,3,1,1,mish,1,, 35 | 33,shortcut,,,,,,linear,,-3, 36 | 34,convolutional,24.0,128,1,1,1,mish,1,, 37 | 35,convolutional,25.0,128,3,1,1,mish,1,, 38 | 36,shortcut,,,,,,linear,,-3, 39 | 37,convolutional,26.0,128,1,1,1,mish,1,, 40 | 38,convolutional,27.0,128,3,1,1,mish,1,, 41 | 39,shortcut,,,,,,linear,,-3, 42 | 40,convolutional,28.0,128,1,1,1,mish,1,, 43 | 41,convolutional,29.0,128,3,1,1,mish,1,, 44 | 42,shortcut,,,,,,linear,,-3, 45 | 43,convolutional,30.0,128,1,1,1,mish,1,, 46 | 44,convolutional,31.0,128,3,1,1,mish,1,, 47 | 45,shortcut,,,,,,linear,,-3, 48 | 46,convolutional,32.0,128,1,1,1,mish,1,, 49 | 47,convolutional,33.0,128,3,1,1,mish,1,, 50 | 48,shortcut,,,,,,linear,,-3, 51 | 49,convolutional,34.0,128,1,1,1,mish,1,, 52 | 50,convolutional,35.0,128,3,1,1,mish,1,, 53 | 51,shortcut,,,,,,linear,,-3, 54 | 52,convolutional,36.0,128,1,1,1,mish,1,, 55 | 53,route,,,,,,,,,"-1,-28" 56 | 54,convolutional,37.0,256,1,1,1,mish,1,, 57 | 55,convolutional,38.0,512,3,2,1,mish,1,, 58 | 56,convolutional,39.0,256,1,1,1,mish,1,, 59 | 57,route,,,,,,,,,-2 60 | 58,convolutional,40.0,256,1,1,1,mish,1,, 61 | 59,convolutional,41.0,256,1,1,1,mish,1,, 62 | 60,convolutional,42.0,256,3,1,1,mish,1,, 63 | 61,shortcut,,,,,,linear,,-3, 64 | 62,convolutional,43.0,256,1,1,1,mish,1,, 65 | 63,convolutional,44.0,256,3,1,1,mish,1,, 66 | 64,shortcut,,,,,,linear,,-3, 67 | 65,convolutional,45.0,256,1,1,1,mish,1,, 68 | 66,convolutional,46.0,256,3,1,1,mish,1,, 69 | 67,shortcut,,,,,,linear,,-3, 70 | 68,convolutional,47.0,256,1,1,1,mish,1,, 71 | 69,convolutional,48.0,256,3,1,1,mish,1,, 72 | 70,shortcut,,,,,,linear,,-3, 73 | 71,convolutional,49.0,256,1,1,1,mish,1,, 74 | 72,convolutional,50.0,256,3,1,1,mish,1,, 75 | 73,shortcut,,,,,,linear,,-3, 76 | 74,convolutional,51.0,256,1,1,1,mish,1,, 77 | 75,convolutional,52.0,256,3,1,1,mish,1,, 78 | 76,shortcut,,,,,,linear,,-3, 79 | 77,convolutional,53.0,256,1,1,1,mish,1,, 80 | 78,convolutional,54.0,256,3,1,1,mish,1,, 81 | 79,shortcut,,,,,,linear,,-3, 82 | 80,convolutional,55.0,256,1,1,1,mish,1,, 83 | 81,convolutional,56.0,256,3,1,1,mish,1,, 84 | 82,shortcut,,,,,,linear,,-3, 85 | 83,convolutional,57.0,256,1,1,1,mish,1,, 86 | 84,route,,,,,,,,,"-1,-28" 87 | 85,convolutional,58.0,512,1,1,1,mish,1,, 88 | 86,convolutional,59.0,1024,3,2,1,mish,1,, 89 | 87,convolutional,60.0,512,1,1,1,mish,1,, 90 | 88,route,,,,,,,,,-2 91 | 89,convolutional,61.0,512,1,1,1,mish,1,, 92 | 90,convolutional,62.0,512,1,1,1,mish,1,, 93 | 91,convolutional,63.0,512,3,1,1,mish,1,, 94 | 92,shortcut,,,,,,linear,,-3, 95 | 93,convolutional,64.0,512,1,1,1,mish,1,, 96 | 94,convolutional,65.0,512,3,1,1,mish,1,, 97 | 95,shortcut,,,,,,linear,,-3, 98 | 96,convolutional,66.0,512,1,1,1,mish,1,, 99 | 97,convolutional,67.0,512,3,1,1,mish,1,, 100 | 98,shortcut,,,,,,linear,,-3, 101 | 99,convolutional,68.0,512,1,1,1,mish,1,, 102 | 100,convolutional,69.0,512,3,1,1,mish,1,, 103 | 101,shortcut,,,,,,linear,,-3, 104 | 102,convolutional,70.0,512,1,1,1,mish,1,, 105 | 103,route,,,,,,,,,"-1,-16" 106 | 104,convolutional,71.0,1024,1,1,1,mish,1,, 107 | 105,convolutional,72.0,512,1,1,1,leaky,1,, 108 | 106,convolutional,73.0,1024,3,1,1,leaky,1,, 109 | 107,convolutional,74.0,512,1,1,1,leaky,1,, 110 | 108,maxpool,,,5,1,,,,, 111 | 109,route,,,,,,,,,-2 112 | 110,maxpool,,,9,1,,,,, 113 | 111,route,,,,,,,,,-4 114 | 112,maxpool,,,13,1,,,,, 115 | 113,route,,,,,,,,,"-1,-3,-5,-6" 116 | 114,convolutional,75.0,512,1,1,1,leaky,1,, 117 | 115,convolutional,76.0,1024,3,1,1,leaky,1,, 118 | 116,convolutional,77.0,512,1,1,1,leaky,1,, 119 | 117,convolutional,78.0,256,1,1,1,leaky,1,, 120 | 118,upsample,,,,2,,,,, 121 | 119,route,,,,,,,,,85 122 | 120,convolutional,79.0,256,1,1,1,leaky,1,, 123 | 121,route,,,,,,,,,"-1, -3" 124 | 122,convolutional,80.0,256,1,1,1,leaky,1,, 125 | 123,convolutional,81.0,512,3,1,1,leaky,1,, 126 | 124,convolutional,82.0,256,1,1,1,leaky,1,, 127 | 125,convolutional,83.0,512,3,1,1,leaky,1,, 128 | 126,convolutional,84.0,256,1,1,1,leaky,1,, 129 | 127,convolutional,85.0,128,1,1,1,leaky,1,, 130 | 128,upsample,,,,2,,,,, 131 | 129,route,,,,,,,,,54 132 | 130,convolutional,86.0,128,1,1,1,leaky,1,, 133 | 131,route,,,,,,,,,"-1, -3" 134 | 132,convolutional,87.0,128,1,1,1,leaky,1,, 135 | 133,convolutional,88.0,256,3,1,1,leaky,1,, 136 | 134,convolutional,89.0,128,1,1,1,leaky,1,, 137 | 135,convolutional,90.0,256,3,1,1,leaky,1,, 138 | 136,convolutional,91.0,128,1,1,1,leaky,1,, 139 | 137,convolutional,92.0,256,3,1,1,leaky,1,, 140 | 138,convolutional,93.0,255,1,1,1,linear,,, 141 | 139,yolo,,,,,,,,, 142 | 140,route,,,,,,,,,-4 143 | 141,convolutional,94.0,256,3,2,1,leaky,1,, 144 | 142,route,,,,,,,,,"-1, -16" 145 | 143,convolutional,95.0,256,1,1,1,leaky,1,, 146 | 144,convolutional,96.0,512,3,1,1,leaky,1,, 147 | 145,convolutional,97.0,256,1,1,1,leaky,1,, 148 | 146,convolutional,98.0,512,3,1,1,leaky,1,, 149 | 147,convolutional,99.0,256,1,1,1,leaky,1,, 150 | 148,convolutional,100.0,512,3,1,1,leaky,1,, 151 | 149,convolutional,101.0,255,1,1,1,linear,,, 152 | 150,yolo,,,,,,,,, 153 | 151,route,,,,,,,,,-4 154 | 152,convolutional,102.0,512,3,2,1,leaky,1,, 155 | 153,route,,,,,,,,,"-1, -37" 156 | 154,convolutional,103.0,512,1,1,1,leaky,1,, 157 | 155,convolutional,104.0,1024,3,1,1,leaky,1,, 158 | 156,convolutional,105.0,512,1,1,1,leaky,1,, 159 | 157,convolutional,106.0,1024,3,1,1,leaky,1,, 160 | 158,convolutional,107.0,512,1,1,1,leaky,1,, 161 | 159,convolutional,108.0,1024,3,1,1,leaky,1,, 162 | 160,convolutional,109.0,255,1,1,1,linear,,, 163 | 161,yolo,,,,,,,,, 164 | -------------------------------------------------------------------------------- /tools/yolov4.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4.xlsx -------------------------------------------------------------------------------- /tools/yolov4x-mish.csv: -------------------------------------------------------------------------------- 1 | type,convs,filters,size,stride,pad,activation,batch_normalize,no.,from,layers 2 | convolutional,0.0,32,3,1,1,mish,1,0,, 3 | convolutional,1.0,80,3,2,1,mish,1,1,, 4 | convolutional,2.0,40,1,1,1,mish,1,2,, 5 | convolutional,3.0,80,3,1,1,mish,1,3,, 6 | shortcut,,,,,,linear,,4,-3, 7 | convolutional,4.0,160,3,2,1,mish,1,5,, 8 | convolutional,5.0,80,1,1,1,mish,1,6,, 9 | route,,,,,,,,7,,-2 10 | convolutional,6.0,80,1,1,1,mish,1,8,, 11 | convolutional,7.0,80,1,1,1,mish,1,9,, 12 | convolutional,8.0,80,3,1,1,mish,1,10,, 13 | shortcut,,,,,,linear,,11,-3, 14 | convolutional,9.0,80,1,1,1,mish,1,12,, 15 | convolutional,10.0,80,3,1,1,mish,1,13,, 16 | shortcut,,,,,,linear,,14,-3, 17 | convolutional,11.0,80,1,1,1,mish,1,15,, 18 | convolutional,12.0,80,3,1,1,mish,1,16,, 19 | shortcut,,,,,,linear,,17,-3, 20 | convolutional,13.0,80,1,1,1,mish,1,18,, 21 | route,,,,,,,,19,,"-1,-13" 22 | convolutional,14.0,160,1,1,1,mish,1,20,, 23 | convolutional,15.0,320,3,2,1,mish,1,21,, 24 | convolutional,16.0,160,1,1,1,mish,1,22,, 25 | route,,,,,,,,23,,-2 26 | convolutional,17.0,160,1,1,1,mish,1,24,, 27 | convolutional,18.0,160,1,1,1,mish,1,25,, 28 | convolutional,19.0,160,3,1,1,mish,1,26,, 29 | shortcut,,,,,,linear,,27,-3, 30 | convolutional,20.0,160,1,1,1,mish,1,28,, 31 | convolutional,21.0,160,3,1,1,mish,1,29,, 32 | shortcut,,,,,,linear,,30,-3, 33 | convolutional,22.0,160,1,1,1,mish,1,31,, 34 | convolutional,23.0,160,3,1,1,mish,1,32,, 35 | shortcut,,,,,,linear,,33,-3, 36 | convolutional,24.0,160,1,1,1,mish,1,34,, 37 | convolutional,25.0,160,3,1,1,mish,1,35,, 38 | shortcut,,,,,,linear,,36,-3, 39 | convolutional,26.0,160,1,1,1,mish,1,37,, 40 | convolutional,27.0,160,3,1,1,mish,1,38,, 41 | shortcut,,,,,,linear,,39,-3, 42 | convolutional,28.0,160,1,1,1,mish,1,40,, 43 | convolutional,29.0,160,3,1,1,mish,1,41,, 44 | shortcut,,,,,,linear,,42,-3, 45 | convolutional,30.0,160,1,1,1,mish,1,43,, 46 | convolutional,31.0,160,3,1,1,mish,1,44,, 47 | shortcut,,,,,,linear,,45,-3, 48 | convolutional,32.0,160,1,1,1,mish,1,46,, 49 | convolutional,33.0,160,3,1,1,mish,1,47,, 50 | shortcut,,,,,,linear,,48,-3, 51 | convolutional,34.0,160,1,1,1,mish,1,49,, 52 | convolutional,35.0,160,3,1,1,mish,1,50,, 53 | shortcut,,,,,,linear,,51,-3, 54 | convolutional,36.0,160,1,1,1,mish,1,52,, 55 | convolutional,37.0,160,3,1,1,mish,1,53,, 56 | shortcut,,,,,,linear,,54,-3, 57 | convolutional,38.0,160,1,1,1,mish,1,55,, 58 | route,,,,,,,,56,,"-1,-34" 59 | convolutional,39.0,320,1,1,1,mish,1,57,, 60 | convolutional,40.0,640,3,2,1,mish,1,58,, 61 | convolutional,41.0,320,1,1,1,mish,1,59,, 62 | route,,,,,,,,60,,-2 63 | convolutional,42.0,320,1,1,1,mish,1,61,, 64 | convolutional,43.0,320,1,1,1,mish,1,62,, 65 | convolutional,44.0,320,3,1,1,mish,1,63,, 66 | shortcut,,,,,,linear,,64,-3, 67 | convolutional,45.0,320,1,1,1,mish,1,65,, 68 | convolutional,46.0,320,3,1,1,mish,1,66,, 69 | shortcut,,,,,,linear,,67,-3, 70 | convolutional,47.0,320,1,1,1,mish,1,68,, 71 | convolutional,48.0,320,3,1,1,mish,1,69,, 72 | shortcut,,,,,,linear,,70,-3, 73 | convolutional,49.0,320,1,1,1,mish,1,71,, 74 | convolutional,50.0,320,3,1,1,mish,1,72,, 75 | shortcut,,,,,,linear,,73,-3, 76 | convolutional,51.0,320,1,1,1,mish,1,74,, 77 | convolutional,52.0,320,3,1,1,mish,1,75,, 78 | shortcut,,,,,,linear,,76,-3, 79 | convolutional,53.0,320,1,1,1,mish,1,77,, 80 | convolutional,54.0,320,3,1,1,mish,1,78,, 81 | shortcut,,,,,,linear,,79,-3, 82 | convolutional,55.0,320,1,1,1,mish,1,80,, 83 | convolutional,56.0,320,3,1,1,mish,1,81,, 84 | shortcut,,,,,,linear,,82,-3, 85 | convolutional,57.0,320,1,1,1,mish,1,83,, 86 | convolutional,58.0,320,3,1,1,mish,1,84,, 87 | shortcut,,,,,,linear,,85,-3, 88 | convolutional,59.0,320,1,1,1,mish,1,86,, 89 | convolutional,60.0,320,3,1,1,mish,1,87,, 90 | shortcut,,,,,,linear,,88,-3, 91 | convolutional,61.0,320,1,1,1,mish,1,89,, 92 | convolutional,62.0,320,3,1,1,mish,1,90,, 93 | shortcut,,,,,,linear,,91,-3, 94 | convolutional,63.0,320,1,1,1,mish,1,92,, 95 | route,,,,,,,,93,,"-1,-34" 96 | convolutional,64.0,640,1,1,1,mish,1,94,, 97 | convolutional,65.0,1280,3,2,1,mish,1,95,, 98 | convolutional,66.0,640,1,1,1,mish,1,96,, 99 | route,,,,,,,,97,,-2 100 | convolutional,67.0,640,1,1,1,mish,1,98,, 101 | convolutional,68.0,640,1,1,1,mish,1,99,, 102 | convolutional,69.0,640,3,1,1,mish,1,100,, 103 | shortcut,,,,,,linear,,101,-3, 104 | convolutional,70.0,640,1,1,1,mish,1,102,, 105 | convolutional,71.0,640,3,1,1,mish,1,103,, 106 | shortcut,,,,,,linear,,104,-3, 107 | convolutional,72.0,640,1,1,1,mish,1,105,, 108 | convolutional,73.0,640,3,1,1,mish,1,106,, 109 | shortcut,,,,,,linear,,107,-3, 110 | convolutional,74.0,640,1,1,1,mish,1,108,, 111 | convolutional,75.0,640,3,1,1,mish,1,109,, 112 | shortcut,,,,,,linear,,110,-3, 113 | convolutional,76.0,640,1,1,1,mish,1,111,, 114 | convolutional,77.0,640,3,1,1,mish,1,112,, 115 | shortcut,,,,,,linear,,113,-3, 116 | convolutional,78.0,640,1,1,1,mish,1,114,, 117 | route,,,,,,,,115,,"-1,-19" 118 | convolutional,79.0,1280,1,1,1,mish,1,116,, 119 | convolutional,80.0,640,1,1,1,mish,1,117,, 120 | route,,,,,,,,118,,-2 121 | convolutional,81.0,640,1,1,1,mish,1,119,, 122 | convolutional,82.0,640,3,1,1,mish,1,120,, 123 | convolutional,83.0,640,1,1,1,mish,1,121,, 124 | maxpool,,,5,1,,,,122,, 125 | route,,,,,,,,123,,-2 126 | maxpool,,,9,1,,,,124,, 127 | route,,,,,,,,125,,-4 128 | maxpool,,,13,1,,,,126,, 129 | route,,,,,,,,127,,"-1,-3,-5,-6" 130 | convolutional,84.0,640,1,1,1,mish,1,128,, 131 | convolutional,85.0,640,3,1,1,mish,1,129,, 132 | convolutional,86.0,640,1,1,1,mish,1,130,, 133 | convolutional,87.0,640,3,1,1,mish,1,131,, 134 | route,,,,,,,,132,,"-1, -15" 135 | convolutional,88.0,640,1,1,1,mish,1,133,, 136 | convolutional,89.0,320,1,1,1,mish,1,134,, 137 | upsample,,,,2,,,,135,, 138 | route,,,,,,,,136,,94 139 | convolutional,90.0,320,1,1,1,mish,1,137,, 140 | route,,,,,,,,138,,"-1, -3" 141 | convolutional,91.0,320,1,1,1,mish,1,139,, 142 | convolutional,92.0,320,1,1,1,mish,1,140,, 143 | route,,,,,,,,141,,-2 144 | convolutional,93.0,320,1,1,1,mish,1,142,, 145 | convolutional,94.0,320,3,1,1,mish,1,143,, 146 | convolutional,95.0,320,1,1,1,mish,1,144,, 147 | convolutional,96.0,320,3,1,1,mish,1,145,, 148 | convolutional,97.0,320,1,1,1,mish,1,146,, 149 | convolutional,98.0,320,3,1,1,mish,1,147,, 150 | route,,,,,,,,148,,"-1, -8" 151 | convolutional,99.0,320,1,1,1,mish,1,149,, 152 | convolutional,100.0,160,1,1,1,mish,1,150,, 153 | upsample,,,,2,,,,151,, 154 | route,,,,,,,,152,,57 155 | convolutional,101.0,160,1,1,1,mish,1,153,, 156 | route,,,,,,,,154,,"-1, -3" 157 | convolutional,102.0,160,1,1,1,mish,1,155,, 158 | convolutional,103.0,160,1,1,1,mish,1,156,, 159 | route,,,,,,,,157,,-2 160 | convolutional,104.0,160,1,1,1,mish,1,158,, 161 | convolutional,105.0,160,3,1,1,mish,1,159,, 162 | convolutional,106.0,160,1,1,1,mish,1,160,, 163 | convolutional,107.0,160,3,1,1,mish,1,161,, 164 | convolutional,108.0,160,1,1,1,mish,1,162,, 165 | convolutional,109.0,160,3,1,1,mish,1,163,, 166 | route,,,,,,,,164,,"-1, -8" 167 | convolutional,110.0,160,1,1,1,mish,1,165,, 168 | convolutional,111.0,320,3,1,1,mish,1,166,, 169 | convolutional,112.0,255,1,1,1,logistic,,167,, 170 | yolo,,,,,,,,168,, 171 | route,,,,,,,,169,,-4 172 | convolutional,113.0,320,3,2,1,mish,1,170,, 173 | route,,,,,,,,171,,"-1, -22" 174 | convolutional,114.0,320,1,1,1,mish,1,172,, 175 | convolutional,115.0,320,1,1,1,mish,1,173,, 176 | route,,,,,,,,174,,-2 177 | convolutional,116.0,320,1,1,1,mish,1,175,, 178 | convolutional,117.0,320,3,1,1,mish,1,176,, 179 | convolutional,118.0,320,1,1,1,mish,1,177,, 180 | convolutional,119.0,320,3,1,1,mish,1,178,, 181 | convolutional,120.0,320,1,1,1,mish,1,179,, 182 | convolutional,121.0,320,3,1,1,mish,1,180,, 183 | route,,,,,,,,181,,"-1,-8" 184 | convolutional,122.0,320,1,1,1,mish,1,182,, 185 | convolutional,123.0,640,3,1,1,mish,1,183,, 186 | convolutional,124.0,255,1,1,1,logistic,,184,, 187 | yolo,,,,,,,,185,, 188 | route,,,,,,,,186,,-4 189 | convolutional,125.0,640,3,2,1,mish,1,187,, 190 | route,,,,,,,,188,,"-1, -55" 191 | convolutional,126.0,640,1,1,1,mish,1,189,, 192 | convolutional,127.0,640,1,1,1,mish,1,190,, 193 | route,,,,,,,,191,,-2 194 | convolutional,128.0,640,1,1,1,mish,1,192,, 195 | convolutional,129.0,640,3,1,1,mish,1,193,, 196 | convolutional,130.0,640,1,1,1,mish,1,194,, 197 | convolutional,131.0,640,3,1,1,mish,1,195,, 198 | convolutional,132.0,640,1,1,1,mish,1,196,, 199 | convolutional,133.0,640,3,1,1,mish,1,197,, 200 | route,,,,,,,,198,,"-1,-8" 201 | convolutional,134.0,640,1,1,1,mish,1,199,, 202 | convolutional,135.0,1280,3,1,1,mish,1,200,, 203 | convolutional,136.0,255,1,1,1,logistic,,201,, 204 | yolo,,,,,,,,202,, 205 | -------------------------------------------------------------------------------- /tools/yolov4x-mish.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/tools/yolov4x-mish.xlsx -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tetutaro/yolo_various_framework/ad547d1567017990ab03ef6226074128c4174a77/utils/__init__.py -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | from utils.general import colorstr 10 | 11 | 12 | def check_anchor_order(m): 13 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 14 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 15 | da = a[-1] - a[0] # delta a 16 | ds = m.stride[-1] - m.stride[0] # delta s 17 | if da.sign() != ds.sign(): # same order 18 | print('Reversing anchor order') 19 | m.anchors[:] = m.anchors.flip(0) 20 | m.anchor_grid[:] = m.anchor_grid.flip(0) 21 | 22 | 23 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 24 | # Check anchor fit to data, recompute if necessary 25 | prefix = colorstr('autoanchor: ') 26 | print(f'\n{prefix}Analyzing anchors... ', end='') 27 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 28 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 29 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 30 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 31 | 32 | def metric(k): # compute metric 33 | r = wh[:, None] / k[None] 34 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 35 | best = x.max(1)[0] # best_x 36 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 37 | bpr = (best > 1. / thr).float().mean() # best possible recall 38 | return bpr, aat 39 | 40 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 41 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 42 | if bpr < 0.98: # threshold to recompute 43 | print('. Attempting to improve anchors, please wait...') 44 | na = m.anchor_grid.numel() // 2 # number of anchors 45 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 46 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 47 | if new_bpr > bpr: # replace anchors 48 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 49 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference 50 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 51 | check_anchor_order(m) 52 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 53 | else: 54 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 55 | print('') # newline 56 | 57 | 58 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 59 | """ Creates kmeans-evolved anchors from training dataset 60 | 61 | Arguments: 62 | path: path to dataset *.yaml, or a loaded dataset 63 | n: number of anchors 64 | img_size: image size used for training 65 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 66 | gen: generations to evolve anchors using genetic algorithm 67 | verbose: print all results 68 | 69 | Return: 70 | k: kmeans evolved anchors 71 | 72 | Usage: 73 | from utils.autoanchor import *; _ = kmean_anchors() 74 | """ 75 | thr = 1. / thr 76 | prefix = colorstr('autoanchor: ') 77 | 78 | def metric(k, wh): # compute metrics 79 | r = wh[:, None] / k[None] 80 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 81 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 82 | return x, x.max(1)[0] # x, best_x 83 | 84 | def anchor_fitness(k): # mutation fitness 85 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 86 | return (best * (best > thr).float()).mean() # fitness 87 | 88 | def print_results(k): 89 | k = k[np.argsort(k.prod(1))] # sort small to large 90 | x, best = metric(k, wh0) 91 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 92 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 93 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 94 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 95 | for i, x in enumerate(k): 96 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 97 | return k 98 | 99 | if isinstance(path, str): # *.yaml file 100 | with open(path) as f: 101 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 102 | from utils.datasets import LoadImagesAndLabels 103 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 104 | else: 105 | dataset = path # dataset 106 | 107 | # Get label wh 108 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 109 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 110 | 111 | # Filter 112 | i = (wh0 < 3.0).any(1).sum() 113 | if i: 114 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 115 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 116 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 117 | 118 | # Kmeans calculation 119 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 120 | s = wh.std(0) # sigmas for whitening 121 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 122 | k *= s 123 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 124 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 125 | k = print_results(k) 126 | 127 | # Plot 128 | # k, d = [None] * 20, [None] * 20 129 | # for i in tqdm(range(1, 21)): 130 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 131 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 132 | # ax = ax.ravel() 133 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 134 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 135 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 136 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 137 | # fig.savefig('wh.png', dpi=200) 138 | 139 | # Evolve 140 | npr = np.random 141 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 142 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 143 | for _ in pbar: 144 | v = np.ones(sh) 145 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 146 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 147 | kg = (k.copy() * v).clip(min=2.0) 148 | fg = anchor_fitness(kg) 149 | if fg > f: 150 | f, k = fg, kg.copy() 151 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 152 | if verbose: 153 | print_results(k) 154 | 155 | return print_results(k) 156 | -------------------------------------------------------------------------------- /utils/convert_tflite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from typing import List, Optional 4 | import os 5 | import time 6 | import glob 7 | import cv2 8 | import numpy as np 9 | import tensorflow as tf 10 | from tensorflow.python.framework.convert_to_constants import ( 11 | convert_variables_to_constants_v2 12 | ) 13 | 14 | NUM_TRAINING_IMAGES = 100 15 | 16 | 17 | def save_frozen_graph( 18 | path_pb: str, 19 | model_keras: tf.keras.Model, 20 | input_keras: Optional[tf.keras.Input] = None 21 | ) -> None: 22 | if os.path.isfile(path_pb): 23 | return 24 | if input_keras is None: 25 | input_shape = model_keras.inputs[0].shape 26 | input_dtype = model_keras.inputs[0].dtype 27 | else: 28 | input_shape = input_keras.shape 29 | input_dtype = input_keras.dtype 30 | full_model = tf.function(lambda x: model_keras(x)) 31 | full_model = full_model.get_concrete_function( 32 | tf.TensorSpec(input_shape, input_dtype) 33 | ) 34 | frozen_func = convert_variables_to_constants_v2(full_model) 35 | frozen_func.graph.as_graph_def() 36 | # check inputs and outputs of frozen graph 37 | # print(frozen_func.inputs) 38 | # print(frozen_func.outputs) 39 | tf.io.write_graph( 40 | graph_or_graph_def=frozen_func.graph, 41 | logdir=os.path.dirname(path_pb), 42 | name=os.path.basename(path_pb), 43 | as_text=False 44 | ) 45 | return 46 | 47 | 48 | def load_frozen_graph( 49 | path_pb: str, 50 | inputs: List[str], 51 | outputs: List[str] 52 | ) -> tf.function: 53 | with tf.io.gfile.GFile(path_pb, "rb") as rf: 54 | graph_def = tf.compat.v1.GraphDef() 55 | graph_def.ParseFromString(rf.read()) 56 | 57 | def wrap_frozen_graph(graph_def, inputs, outputs): 58 | def _imports_graph_def(): 59 | tf.compat.v1.import_graph_def(graph_def, name="") 60 | 61 | wrapped_import = tf.compat.v1.wrap_function( 62 | _imports_graph_def, [] 63 | ) 64 | import_graph = wrapped_import.graph 65 | return wrapped_import.prune( 66 | tf.nest.map_structure(import_graph.as_graph_element, inputs), 67 | tf.nest.map_structure(import_graph.as_graph_element, outputs) 68 | ) 69 | 70 | # the name of inputs and outputs can be known with printing 71 | # frozen_func.inputs/outputs when `save_frozen_graph()` 72 | frozen_func = wrap_frozen_graph( 73 | graph_def=graph_def, inputs=inputs, outputs=outputs 74 | ) 75 | return frozen_func 76 | 77 | 78 | def convert_tflite_fp32( 79 | path_tflite: str, 80 | model_keras: tf.keras.Model 81 | ) -> None: 82 | if os.path.isfile(path_tflite): 83 | return 84 | converter = tf.lite.TFLiteConverter.from_keras_model(model_keras) 85 | converter.allow_custom_ops = False 86 | converter.experimental_new_converter = True 87 | model_tflite = converter.convert() 88 | open(path_tflite, "wb").write(model_tflite) 89 | return 90 | 91 | 92 | def convert_tflite_fp16( 93 | path_tflite: str, 94 | model_keras: tf.keras.Model 95 | ) -> None: 96 | if os.path.isfile(path_tflite): 97 | return 98 | converter = tf.lite.TFLiteConverter.from_keras_model(model_keras) 99 | converter.optimizations = [ 100 | tf.lite.Optimize.DEFAULT 101 | ] 102 | converter.target_spec.supported_types = [ 103 | tf.float16 104 | ] 105 | converter.allow_custom_ops = False 106 | converter.experimental_new_converter = True 107 | model_tflite = converter.convert() 108 | open(path_tflite, "wb").write(model_tflite) 109 | return 110 | 111 | 112 | def convert_tflite_int8( 113 | path_tflite: str, 114 | imgsize: List[int], 115 | model_keras: tf.keras.Model 116 | ) -> None: 117 | if not os.path.isdir('datasets/val2017'): 118 | raise SystemError( 119 | 'you need COCO 2017 val dataset for post-training' 120 | ) 121 | if os.path.isfile(path_tflite): 122 | return 123 | 124 | def representative_dataset_gen(): 125 | images = glob.glob('datasets/val2017/*.jpg') 126 | np.random.shuffle(images) 127 | for i, ipath in enumerate(images[:NUM_TRAINING_IMAGES]): 128 | img = cv2.imread(ipath) 129 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 130 | ih = img.shape[0] 131 | iw = img.shape[1] 132 | scale = min(imgsize[0] / ih, imgsize[1] / iw) 133 | nh = int(ih * scale) 134 | nw = int(iw * scale) 135 | oh = (imgsize[0] - nh) // 2 136 | ow = (imgsize[1] - nw) // 2 137 | if scale >= 1: 138 | interpolation = cv2.INTER_CUBIC 139 | else: 140 | interpolation = cv2.INTER_AREA 141 | nimg = cv2.resize( 142 | img.copy(), (nw, nh), 143 | interpolation=interpolation 144 | ) 145 | rimg = np.full((*imgsize, 3), 128, dtype=np.uint8) 146 | rimg[oh:oh + nh, ow:ow + nw, :] = nimg 147 | rimg = rimg[np.newaxis, ...].astype(np.float32) 148 | rimg /= 255.0 149 | yield [rimg] 150 | if i % 10 == 9: 151 | print(f'post-training... ({i}/{NUM_TRAINING_IMAGES})') 152 | return 153 | 154 | converter = tf.lite.TFLiteConverter.from_keras_model(model_keras) 155 | converter.optimizations = [ 156 | tf.lite.Optimize.DEFAULT 157 | ] 158 | converter.representative_dataset = representative_dataset_gen 159 | converter.target_spec.supported_ops = [ 160 | tf.lite.OpsSet.TFLITE_BUILTINS_INT8, 161 | tf.lite.OpsSet.SELECT_TF_OPS 162 | ] 163 | converter.inference_input_type = tf.uint8 164 | converter.inference_output_type = tf.uint8 165 | converter.allow_custom_ops = True 166 | converter.experimental_new_converter = True 167 | model_tflite = converter.convert() 168 | open(path_tflite, "wb").write(model_tflite) 169 | return 170 | 171 | 172 | def _print_detail(details: List) -> None: 173 | for i, detail in enumerate(details): 174 | print("{}: index={} shape={} dtype={}".format( 175 | i, detail['index'], detail['shape'], detail['dtype'] 176 | )) 177 | return 178 | 179 | 180 | def test_tflite(path_tflite: str, mode: str) -> None: 181 | assert mode in ['fp32', 'fp16', 'int8'] 182 | if not os.path.isfile(path_tflite): 183 | print(f'ERROR: {path_tflite} not found') 184 | return 185 | print(f'MODEL: {path_tflite}') 186 | interpreter = tf.lite.Interpreter(path_tflite) 187 | interpreter.allocate_tensors() 188 | input_details = interpreter.get_input_details() 189 | print('input details') 190 | _print_detail(input_details) 191 | output_details = interpreter.get_output_details() 192 | print('output details') 193 | _print_detail(output_details) 194 | input_shape = input_details[0]['shape'] 195 | input_data = np.array( 196 | np.random.randint(0, 256, input_shape) 197 | ) 198 | if mode == 'int8': 199 | input_data = input_data.astype(np.uint8) 200 | else: 201 | input_data = (input_data / 255.0).astype(np.float32) 202 | interpreter.set_tensor(input_details[0]['index'], input_data) 203 | start_time = time.perf_counter() 204 | interpreter.invoke() 205 | end_time = time.perf_counter() 206 | elapsed = round((end_time - start_time) * 1000, 3) 207 | print(f'elapsed time taken for inference: {elapsed}[ms]') 208 | output_data = [ 209 | interpreter.get_tensor( 210 | output_details[i]['index'] 211 | ) for i in range(len(output_details)) 212 | ] 213 | for i, out in enumerate(output_data): 214 | out_shape = output_details[i]['shape'] 215 | assert len(out.shape) == len(out_shape) 216 | for j, v in enumerate(out.shape): 217 | assert v == out_shape[j] 218 | return 219 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=''): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 16 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(file, repo='ultralytics/yolov5'): 20 | # Attempt file download if does not exist 21 | file = Path(str(file).strip().replace("'", '').lower()) 22 | 23 | if not file.exists(): 24 | try: 25 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 26 | assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 27 | tag = response['tag_name'] # i.e. 'v1.0' 28 | except: # fallback plan 29 | assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] 30 | tag = subprocess.check_output('git tag', shell=True).decode().split()[-1] 31 | 32 | name = file.name 33 | if name in assets: 34 | msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/' 35 | redundant = False # second download option 36 | try: # GitHub 37 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 38 | print(f'Downloading {url} to {file}...') 39 | torch.hub.download_url_to_file(url, file) 40 | assert file.exists() and file.stat().st_size > 1E6 # check 41 | except Exception as e: # GCP 42 | print(f'Download error: {e}') 43 | assert redundant, 'No secondary mirror' 44 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 45 | print(f'Downloading {url} to {file}...') 46 | os.system(f'curl -L {url} -o {file}') # torch.hub.download_url_to_file(url, weights) 47 | finally: 48 | if not file.exists() or file.stat().st_size < 1E6: # check 49 | file.unlink(missing_ok=True) # remove partial downloads 50 | print(f'ERROR: Download failure: {msg}') 51 | print('') 52 | return 53 | 54 | 55 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 56 | # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download() 57 | t = time.time() 58 | file = Path(file) 59 | cookie = Path('cookie') # gdrive cookie 60 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 61 | file.unlink(missing_ok=True) # remove existing file 62 | cookie.unlink(missing_ok=True) # remove existing cookie 63 | 64 | # Attempt file download 65 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 66 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 67 | if os.path.exists('cookie'): # large file 68 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 69 | else: # small file 70 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 71 | r = os.system(s) # execute, capture return 72 | cookie.unlink(missing_ok=True) # remove existing cookie 73 | 74 | # Error check 75 | if r != 0: 76 | file.unlink(missing_ok=True) # remove partial 77 | print('Download error ') # raise Exception('Download error') 78 | return r 79 | 80 | # Unzip if archive 81 | if file.suffix == '.zip': 82 | print('unzipping... ', end='') 83 | os.system(f'unzip -q {file}') # unzip 84 | file.unlink() # remove zip to free space 85 | 86 | print(f'Done ({time.time() - t:.1f}s)') 87 | return r 88 | 89 | 90 | def get_token(cookie="./cookie"): 91 | with open(cookie) as f: 92 | for line in f: 93 | if "download" in line: 94 | return line.split()[-1] 95 | return "" 96 | 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 98 | # # Uploads a file to a bucket 99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 100 | # 101 | # storage_client = storage.Client() 102 | # bucket = storage_client.get_bucket(bucket_name) 103 | # blob = bucket.blob(destination_blob_name) 104 | # 105 | # blob.upload_from_filename(source_file_name) 106 | # 107 | # print('File {} uploaded to {}.'.format( 108 | # source_file_name, 109 | # destination_blob_name)) 110 | # 111 | # 112 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 113 | # # Uploads a blob from a bucket 114 | # storage_client = storage.Client() 115 | # bucket = storage_client.get_bucket(bucket_name) 116 | # blob = bucket.blob(source_blob_name) 117 | # 118 | # blob.download_to_filename(destination_file_name) 119 | # 120 | # print('Blob {} downloaded to {}.'.format( 121 | # source_blob_name, 122 | # destination_file_name)) 123 | -------------------------------------------------------------------------------- /utils/image_preproc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import numpy as np 4 | import cv2 5 | from cv2 import dnn_superres_DnnSuperResImpl 6 | 7 | 8 | def adjust_white_balance(image: np.ndarray) -> np.ndarray: 9 | # white balance adjustment for strong neutral white 10 | image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) 11 | avg_a = np.average(image[:, :, 1]) 12 | avg_b = np.average(image[:, :, 2]) 13 | image[:, :, 1] = image[:, :, 1] - ( 14 | (avg_a - 128) * (image[:, :, 0] / 255.0) * 1.1 15 | ) 16 | image[:, :, 2] = image[:, :, 2] - ( 17 | (avg_b - 128) * (image[:, :, 0] / 255.0) * 1.1 18 | ) 19 | image = cv2.cvtColor(image, cv2.COLOR_LAB2BGR) 20 | return image 21 | 22 | 23 | def smooth_image(image: np.ndarray) -> np.ndarray: 24 | # image smoothing for noise removal 25 | return cv2.GaussianBlur(image, (5, 5), 0) 26 | 27 | 28 | def correct_contrast(image: np.ndarray) -> np.ndarray: 29 | # contrast correction that brightens dark areas 30 | b = 10 # HEURISTIC !! 31 | gamma = 1 / np.sqrt(image.mean()) * b 32 | g_table = np.array([ 33 | ((i / 255.0) ** (1 / gamma)) * 255 34 | for i in np.arange(0, 256) 35 | ]).astype("uint8") 36 | return cv2.LUT(image, g_table) 37 | 38 | 39 | def levelize_histogram(image: np.ndarray) -> np.ndarray: 40 | # make color distributions even 41 | for i in range(3): 42 | image[:, :, i] = cv2.equalizeHist(image[:, :, i]) 43 | return image 44 | 45 | 46 | def correct_contrast_using_lut(image: np.ndarray) -> np.ndarray: 47 | # contrast correction using look-up-table 48 | a = 10 49 | c_table = np.array([ 50 | 255.0 / (1 + np.exp(-a * (i - 128) / 255)) 51 | for i in np.arange(0, 256) 52 | ]).astype("uint8") 53 | return cv2.LUT(image, c_table) 54 | 55 | 56 | def upsample_image( 57 | image: np.ndarray, 58 | sr: dnn_superres_DnnSuperResImpl 59 | ) -> np.ndarray: 60 | # increase resolution with super-resolution to make the image clearer 61 | # and then shrink the image 62 | prev_height = image.shape[0] 63 | prev_width = image.shape[1] 64 | # sr.upsample() (super-resolution) is too slow when the image is big 65 | # so, do super-resolution after shrinking image 66 | image = cv2.resize( 67 | image, (prev_width // 2, prev_height // 2), 68 | interpolation=cv2.INTER_AREA 69 | ) 70 | image = sr.upsample(image) 71 | # restore image size 72 | image = cv2.resize( 73 | image, (prev_width, prev_height), 74 | interpolation=cv2.INTER_AREA 75 | ) 76 | return image 77 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | # Loss functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from utils.general import bbox_iou 7 | from utils.torch_utils import is_parallel 8 | 9 | 10 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 11 | # return positive, negative label smoothing BCE targets 12 | return 1.0 - 0.5 * eps, 0.5 * eps 13 | 14 | 15 | class BCEBlurWithLogitsLoss(nn.Module): 16 | # BCEwithLogitLoss() with reduced missing label effects. 17 | def __init__(self, alpha=0.05): 18 | super(BCEBlurWithLogitsLoss, self).__init__() 19 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 20 | self.alpha = alpha 21 | 22 | def forward(self, pred, true): 23 | loss = self.loss_fcn(pred, true) 24 | pred = torch.sigmoid(pred) # prob from logits 25 | dx = pred - true # reduce only missing label effects 26 | # dx = (pred - true).abs() # reduce missing label and false label effects 27 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 28 | loss *= alpha_factor 29 | return loss.mean() 30 | 31 | 32 | class FocalLoss(nn.Module): 33 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 34 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 35 | super(FocalLoss, self).__init__() 36 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 37 | self.gamma = gamma 38 | self.alpha = alpha 39 | self.reduction = loss_fcn.reduction 40 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 41 | 42 | def forward(self, pred, true): 43 | loss = self.loss_fcn(pred, true) 44 | # p_t = torch.exp(-loss) 45 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 46 | 47 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 48 | pred_prob = torch.sigmoid(pred) # prob from logits 49 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 50 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 51 | modulating_factor = (1.0 - p_t) ** self.gamma 52 | loss *= alpha_factor * modulating_factor 53 | 54 | if self.reduction == 'mean': 55 | return loss.mean() 56 | elif self.reduction == 'sum': 57 | return loss.sum() 58 | else: # 'none' 59 | return loss 60 | 61 | 62 | class QFocalLoss(nn.Module): 63 | # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 64 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 65 | super(QFocalLoss, self).__init__() 66 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 67 | self.gamma = gamma 68 | self.alpha = alpha 69 | self.reduction = loss_fcn.reduction 70 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 71 | 72 | def forward(self, pred, true): 73 | loss = self.loss_fcn(pred, true) 74 | 75 | pred_prob = torch.sigmoid(pred) # prob from logits 76 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 77 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma 78 | loss *= alpha_factor * modulating_factor 79 | 80 | if self.reduction == 'mean': 81 | return loss.mean() 82 | elif self.reduction == 'sum': 83 | return loss.sum() 84 | else: # 'none' 85 | return loss 86 | 87 | 88 | class ComputeLoss: 89 | # Compute losses 90 | def __init__(self, model, autobalance=False): 91 | super(ComputeLoss, self).__init__() 92 | device = next(model.parameters()).device # get model device 93 | h = model.hyp # hyperparameters 94 | 95 | # Define criteria 96 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) 97 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 98 | 99 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 100 | self.cp, self.cn = smooth_BCE(eps=0.0) 101 | 102 | # Focal loss 103 | g = h['fl_gamma'] # focal loss gamma 104 | if g > 0: 105 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 106 | 107 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module 108 | self.balance = {3: [4.0, 1.0, 0.4], 4: [4.0, 1.0, 0.25, 0.06], 5: [4.0, 1.0, 0.25, 0.06, .02]}[det.nl] 109 | self.ssi = (det.stride == 16).nonzero(as_tuple=False).item() # stride 16 index 110 | self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance 111 | for k in 'na', 'nc', 'nl', 'anchors': 112 | setattr(self, k, getattr(det, k)) 113 | 114 | def __call__(self, p, targets): # predictions, targets, model 115 | device = targets.device 116 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) 117 | tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets 118 | 119 | # Losses 120 | for i, pi in enumerate(p): # layer index, layer predictions 121 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 122 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj 123 | 124 | n = b.shape[0] # number of targets 125 | if n: 126 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets 127 | 128 | # Regression 129 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 130 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 131 | pbox = torch.cat((pxy, pwh), 1) # predicted box 132 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) 133 | lbox += (1.0 - iou).mean() # iou loss 134 | 135 | # Objectness 136 | tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio 137 | 138 | # Classification 139 | if self.nc > 1: # cls loss (only if multiple classes) 140 | t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets 141 | t[range(n), tcls[i]] = self.cp 142 | lcls += self.BCEcls(ps[:, 5:], t) # BCE 143 | 144 | # Append targets to text file 145 | # with open('targets.txt', 'a') as file: 146 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 147 | 148 | obji = self.BCEobj(pi[..., 4], tobj) 149 | lobj += obji * self.balance[i] # obj loss 150 | if self.autobalance: 151 | self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() 152 | 153 | if self.autobalance: 154 | self.balance = [x / self.balance[self.ssi] for x in self.balance] 155 | lbox *= self.hyp['box'] 156 | lobj *= self.hyp['obj'] 157 | lcls *= self.hyp['cls'] 158 | bs = tobj.shape[0] # batch size 159 | 160 | loss = lbox + lobj + lcls 161 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() 162 | 163 | def build_targets(self, p, targets): 164 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 165 | na, nt = self.na, targets.shape[0] # number of anchors, targets 166 | tcls, tbox, indices, anch = [], [], [], [] 167 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain 168 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 169 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices 170 | 171 | g = 0.5 # bias 172 | off = torch.tensor([[0, 0], 173 | [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m 174 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 175 | ], device=targets.device).float() * g # offsets 176 | 177 | for i in range(self.nl): 178 | anchors = self.anchors[i] 179 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain 180 | 181 | # Match targets to anchors 182 | t = targets * gain 183 | if nt: 184 | # Matches 185 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio 186 | j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare 187 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 188 | t = t[j] # filter 189 | 190 | # Offsets 191 | gxy = t[:, 2:4] # grid xy 192 | gxi = gain[[2, 3]] - gxy # inverse 193 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 194 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 195 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 196 | t = t.repeat((5, 1, 1))[j] 197 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 198 | else: 199 | t = targets[0] 200 | offsets = 0 201 | 202 | # Define 203 | b, c = t[:, :2].long().T # image, class 204 | gxy = t[:, 2:4] # grid xy 205 | gwh = t[:, 4:6] # grid wh 206 | gij = (gxy - offsets).long() 207 | gi, gj = gij.T # grid xy indices 208 | 209 | # Append 210 | a = t[:, 6].long() # anchor indices 211 | indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices 212 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 213 | anch.append(anchors[a]) # anchors 214 | tcls.append(c) # class 215 | 216 | return tcls, tbox, indices, anch 217 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | nc = unique_classes.shape[0] # number of classes, number of detections 39 | 40 | # Create Precision-Recall curve and compute AP for each class 41 | px, py = np.linspace(0, 1, 1000), [] # for plotting 42 | ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) 43 | for ci, c in enumerate(unique_classes): 44 | i = pred_cls == c 45 | n_l = (target_cls == c).sum() # number of labels 46 | n_p = i.sum() # number of predictions 47 | 48 | if n_p == 0 or n_l == 0: 49 | continue 50 | else: 51 | # Accumulate FPs and TPs 52 | fpc = (1 - tp[i]).cumsum(0) 53 | tpc = tp[i].cumsum(0) 54 | 55 | # Recall 56 | recall = tpc / (n_l + 1e-16) # recall curve 57 | r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases 58 | 59 | # Precision 60 | precision = tpc / (tpc + fpc) # precision curve 61 | p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score 62 | 63 | # AP from recall-precision curve 64 | for j in range(tp.shape[1]): 65 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 66 | if plot and j == 0: 67 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 68 | 69 | # Compute F1 (harmonic mean of precision and recall) 70 | f1 = 2 * p * r / (p + r + 1e-16) 71 | if plot: 72 | plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) 73 | plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') 74 | plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') 75 | plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') 76 | 77 | i = f1.mean(0).argmax() # max F1 index 78 | return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32') 79 | 80 | 81 | def compute_ap(recall, precision): 82 | """ Compute the average precision, given the recall and precision curves 83 | # Arguments 84 | recall: The recall curve (list) 85 | precision: The precision curve (list) 86 | # Returns 87 | Average precision, precision curve, recall curve 88 | """ 89 | 90 | # Append sentinel values to beginning and end 91 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 92 | mpre = np.concatenate(([1.], precision, [0.])) 93 | 94 | # Compute the precision envelope 95 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 96 | 97 | # Integrate area under curve 98 | method = 'interp' # methods: 'continuous', 'interp' 99 | if method == 'interp': 100 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 101 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 102 | else: # 'continuous' 103 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 104 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 105 | 106 | return ap, mpre, mrec 107 | 108 | 109 | class ConfusionMatrix: 110 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 111 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 112 | self.matrix = np.zeros((nc + 1, nc + 1)) 113 | self.nc = nc # number of classes 114 | self.conf = conf 115 | self.iou_thres = iou_thres 116 | 117 | def process_batch(self, detections, labels): 118 | """ 119 | Return intersection-over-union (Jaccard index) of boxes. 120 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 121 | Arguments: 122 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 123 | labels (Array[M, 5]), class, x1, y1, x2, y2 124 | Returns: 125 | None, updates confusion matrix accordingly 126 | """ 127 | detections = detections[detections[:, 4] > self.conf] 128 | gt_classes = labels[:, 0].int() 129 | detection_classes = detections[:, 5].int() 130 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 131 | 132 | x = torch.where(iou > self.iou_thres) 133 | if x[0].shape[0]: 134 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 135 | if x[0].shape[0] > 1: 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 138 | matches = matches[matches[:, 2].argsort()[::-1]] 139 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 140 | else: 141 | matches = np.zeros((0, 3)) 142 | 143 | n = matches.shape[0] > 0 144 | m0, m1, _ = matches.transpose().astype(np.int16) 145 | for i, gc in enumerate(gt_classes): 146 | j = m0 == i 147 | if n and sum(j) == 1: 148 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 149 | else: 150 | self.matrix[gc, self.nc] += 1 # background FP 151 | 152 | if n: 153 | for i, dc in enumerate(detection_classes): 154 | if not any(m1 == i): 155 | self.matrix[self.nc, dc] += 1 # background FN 156 | 157 | def matrix(self): 158 | return self.matrix 159 | 160 | def plot(self, save_dir='', names=()): 161 | try: 162 | import seaborn as sn 163 | 164 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 165 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 166 | 167 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 168 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 169 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 170 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 171 | xticklabels=names + ['background FN'] if labels else "auto", 172 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1)) 173 | fig.axes[0].set_xlabel('True') 174 | fig.axes[0].set_ylabel('Predicted') 175 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 176 | except Exception as e: 177 | pass 178 | 179 | def print(self): 180 | for i in range(self.nc + 1): 181 | print(' '.join(map(str, self.matrix[i]))) 182 | 183 | 184 | # Plots ---------------------------------------------------------------------------------------------------------------- 185 | 186 | def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()): 187 | # Precision-recall curve 188 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 189 | py = np.stack(py, axis=1) 190 | 191 | if 0 < len(names) < 21: # display per-class legend if < 21 classes 192 | for i, y in enumerate(py.T): 193 | ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) 194 | else: 195 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 196 | 197 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 198 | ax.set_xlabel('Recall') 199 | ax.set_ylabel('Precision') 200 | ax.set_xlim(0, 1) 201 | ax.set_ylim(0, 1) 202 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 203 | fig.savefig(Path(save_dir), dpi=250) 204 | 205 | 206 | def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'): 207 | # Metric-confidence curve 208 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 209 | 210 | if 0 < len(names) < 21: # display per-class legend if < 21 classes 211 | for i, y in enumerate(py): 212 | ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) 213 | else: 214 | ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) 215 | 216 | y = py.mean(0) 217 | ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') 218 | ax.set_xlabel(xlabel) 219 | ax.set_ylabel(ylabel) 220 | ax.set_xlim(0, 1) 221 | ax.set_ylim(0, 1) 222 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 223 | fig.savefig(Path(save_dir), dpi=250) 224 | -------------------------------------------------------------------------------- /utils/nms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import numpy as np 4 | 5 | 6 | # Intersection of Union 7 | # bboxesX[:4] is numpy array of xyxy (xmin, ymin, xmax, ymax) 8 | # bboxes1: the bounding box which has the highest confidence score 9 | # bboxes2: the bounding boxes of same category expect above 10 | def bboxes_iou( 11 | bboxes1: np.ndarray, 12 | bboxes2: np.ndarray 13 | ) -> np.ndarray: 14 | bboxes1_area = ( 15 | bboxes1[:, 2] - bboxes1[:, 0] 16 | ) * ( 17 | bboxes1[:, 3] - bboxes1[:, 1] 18 | ) 19 | bboxes2_area = ( 20 | bboxes2[:, 2] - bboxes2[:, 0] 21 | ) * ( 22 | bboxes2[:, 3] - bboxes2[:, 1] 23 | ) 24 | left_ups = np.maximum(bboxes1[:, :2], bboxes2[:, :2]) 25 | right_downs = np.minimum(bboxes1[:, 2:4], bboxes2[:, 2:4]) 26 | intersections = np.maximum(right_downs - left_ups, 0.0) 27 | inter_areas = intersections[:, 0] * intersections[:, 1] 28 | union_areas = bboxes1_area + bboxes2_area - inter_areas 29 | ious = np.maximum( 30 | 1.0 * inter_areas / union_areas, 31 | np.finfo(np.float32).eps 32 | ) 33 | return ious 34 | 35 | 36 | # filter bounding boxes using (soft) Non-Maximum Suppression 37 | # paper of soft NMS: https://arxiv.org/abs/1704.04503 38 | # bboxes is numpy array of 39 | # offset 0-3: xyxy (xmin, ymin, xmax, ymax) 40 | # offset 4: category id (int) 41 | # offset 5: confidence score 42 | def filter_bboxes( 43 | bboxes: np.ndarray, 44 | conf_threshold: float = 0.3, 45 | iou_threshold: float = 0.45, 46 | disable_soft_nms: bool = False 47 | ) -> np.ndarray: 48 | if bboxes.shape[0] == 0: 49 | return bboxes 50 | # filter by confidence threshold 51 | bboxes = bboxes[bboxes[:, 5] > conf_threshold] 52 | if bboxes.shape[0] == 0: 53 | return bboxes 54 | # confidence for soft NMS 55 | bboxes = np.insert(bboxes, 6, bboxes[:, 5], axis=1) 56 | # (soft) NMS for each class 57 | unique_category_ids = list(set(bboxes[:, 4])) 58 | best_bboxes = list() 59 | for cat in unique_category_ids: 60 | cat_bboxes = bboxes[bboxes[:, 4] == cat] 61 | while cat_bboxes.shape[0] > 0: 62 | if cat_bboxes.shape[0] == 1: 63 | best_bboxes.append(cat_bboxes) 64 | break 65 | max_conf = np.argmax(cat_bboxes[:, 6]) 66 | best_bbox = cat_bboxes[max_conf:max_conf + 1] 67 | best_bboxes.append(best_bbox) 68 | cat_bboxes = np.delete(cat_bboxes, max_conf, axis=0) 69 | ious = bboxes_iou( 70 | bboxes1=best_bbox, 71 | bboxes2=cat_bboxes 72 | ) 73 | if disable_soft_nms: 74 | cat_bboxes = cat_bboxes[ious < iou_threshold] 75 | else: 76 | iou_mask = (ious >= iou_threshold).astype(np.float) 77 | cat_bboxes[:, 6] = cat_bboxes[:, 6] * ( 78 | 1.0 - (ious * iou_mask) 79 | ) 80 | cat_bboxes = cat_bboxes[cat_bboxes[:, 6] > conf_threshold] 81 | return np.concatenate(best_bboxes, axis=0)[:, :6] 82 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # PyTorch utils 2 | 3 | import logging 4 | import math 5 | import os 6 | import subprocess 7 | import time 8 | from contextlib import contextmanager 9 | from copy import deepcopy 10 | from pathlib import Path 11 | 12 | import torch 13 | import torch.backends.cudnn as cudnn 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | import torchvision 17 | 18 | try: 19 | import thop # for FLOPS computation 20 | except ImportError: 21 | thop = None 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | @contextmanager 26 | def torch_distributed_zero_first(local_rank: int): 27 | """ 28 | Decorator to make all processes in distributed training wait for each local_master to do something. 29 | """ 30 | if local_rank not in [-1, 0]: 31 | torch.distributed.barrier() 32 | yield 33 | if local_rank == 0: 34 | torch.distributed.barrier() 35 | 36 | 37 | def init_torch_seeds(seed=0): 38 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 39 | torch.manual_seed(seed) 40 | if seed == 0: # slower, more reproducible 41 | cudnn.benchmark, cudnn.deterministic = False, True 42 | else: # faster, less reproducible 43 | cudnn.benchmark, cudnn.deterministic = True, False 44 | 45 | 46 | def git_describe(): 47 | # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe 48 | if Path('.git').exists(): 49 | return subprocess.check_output('git describe --tags --long --always', shell=True).decode('utf-8')[:-1] 50 | else: 51 | return '' 52 | 53 | 54 | def select_device(device='', batch_size=None): 55 | # device = 'cpu' or '0' or '0,1,2,3' 56 | s = f'YOLOv5 {git_describe()} torch {torch.__version__} ' # string 57 | cpu = device.lower() == 'cpu' 58 | if cpu: 59 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 60 | elif device: # non-cpu device requested 61 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 62 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 63 | 64 | cuda = not cpu and torch.cuda.is_available() 65 | if cuda: 66 | n = torch.cuda.device_count() 67 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 68 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 69 | space = ' ' * len(s) 70 | for i, d in enumerate(device.split(',') if device else range(n)): 71 | p = torch.cuda.get_device_properties(i) 72 | s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 73 | else: 74 | s += 'CPU\n' 75 | 76 | logger.info(s) # skip a line 77 | return torch.device('cuda:0' if cuda else 'cpu') 78 | 79 | 80 | def time_synchronized(): 81 | # pytorch-accurate time 82 | if torch.cuda.is_available(): 83 | torch.cuda.synchronize() 84 | return time.time() 85 | 86 | 87 | def profile(x, ops, n=100, device=None): 88 | # profile a pytorch module or list of modules. Example usage: 89 | # x = torch.randn(16, 3, 640, 640) # input 90 | # m1 = lambda x: x * torch.sigmoid(x) 91 | # m2 = nn.SiLU() 92 | # profile(x, [m1, m2], n=100) # profile speed over 100 iterations 93 | 94 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 95 | x = x.to(device) 96 | x.requires_grad = True 97 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 98 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 99 | for m in ops if isinstance(ops, list) else [ops]: 100 | m = m.to(device) if hasattr(m, 'to') else m # device 101 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 102 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 103 | try: 104 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 105 | except: 106 | flops = 0 107 | 108 | for _ in range(n): 109 | t[0] = time_synchronized() 110 | y = m(x) 111 | t[1] = time_synchronized() 112 | try: 113 | _ = y.sum().backward() 114 | t[2] = time_synchronized() 115 | except: # no backward method 116 | t[2] = float('nan') 117 | dtf += (t[1] - t[0]) * 1000 / n # ms per op forward 118 | dtb += (t[2] - t[1]) * 1000 / n # ms per op backward 119 | 120 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 121 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 122 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters 123 | print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 124 | 125 | 126 | def is_parallel(model): 127 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 128 | 129 | 130 | def intersect_dicts(da, db, exclude=()): 131 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 132 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 133 | 134 | 135 | def initialize_weights(model): 136 | for m in model.modules(): 137 | t = type(m) 138 | if t is nn.Conv2d: 139 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 140 | elif t is nn.BatchNorm2d: 141 | m.eps = 1e-3 142 | m.momentum = 0.03 143 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 144 | m.inplace = True 145 | 146 | 147 | def find_modules(model, mclass=nn.Conv2d): 148 | # Finds layer indices matching module class 'mclass' 149 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 150 | 151 | 152 | def sparsity(model): 153 | # Return global model sparsity 154 | a, b = 0., 0. 155 | for p in model.parameters(): 156 | a += p.numel() 157 | b += (p == 0).sum() 158 | return b / a 159 | 160 | 161 | def prune(model, amount=0.3): 162 | # Prune model to requested global sparsity 163 | import torch.nn.utils.prune as prune 164 | print('Pruning model... ', end='') 165 | for name, m in model.named_modules(): 166 | if isinstance(m, nn.Conv2d): 167 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 168 | prune.remove(m, 'weight') # make permanent 169 | print(' %.3g global sparsity' % sparsity(model)) 170 | 171 | 172 | def fuse_conv_and_bn(conv, bn): 173 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 174 | fusedconv = nn.Conv2d(conv.in_channels, 175 | conv.out_channels, 176 | kernel_size=conv.kernel_size, 177 | stride=conv.stride, 178 | padding=conv.padding, 179 | groups=conv.groups, 180 | bias=True).requires_grad_(False).to(conv.weight.device) 181 | 182 | # prepare filters 183 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 184 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 185 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 186 | 187 | # prepare spatial bias 188 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 189 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 190 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 191 | 192 | return fusedconv 193 | 194 | 195 | def model_info(model, verbose=False, img_size=640): 196 | # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] 197 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 198 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 199 | if verbose: 200 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 201 | for i, (name, p) in enumerate(model.named_parameters()): 202 | name = name.replace('module_list.', '') 203 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 204 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 205 | 206 | try: # FLOPS 207 | from thop import profile 208 | stride = int(model.stride.max()) if hasattr(model, 'stride') else 32 209 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 210 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS 211 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float 212 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS 213 | except (ImportError, Exception): 214 | fs = '' 215 | 216 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 217 | 218 | 219 | def load_classifier(name='resnet101', n=2): 220 | # Loads a pretrained model reshaped to n-class output 221 | model = torchvision.models.__dict__[name](pretrained=True) 222 | 223 | # ResNet model properties 224 | # input_size = [3, 224, 224] 225 | # input_space = 'RGB' 226 | # input_range = [0, 1] 227 | # mean = [0.485, 0.456, 0.406] 228 | # std = [0.229, 0.224, 0.225] 229 | 230 | # Reshape output to n classes 231 | filters = model.fc.weight.shape[1] 232 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 233 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 234 | model.fc.out_features = n 235 | return model 236 | 237 | 238 | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) 239 | # scales img(bs,3,y,x) by ratio constrained to gs-multiple 240 | if ratio == 1.0: 241 | return img 242 | else: 243 | h, w = img.shape[2:] 244 | s = (int(h * ratio), int(w * ratio)) # new size 245 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 246 | if not same_shape: # pad/crop img 247 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 248 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 249 | 250 | 251 | def copy_attr(a, b, include=(), exclude=()): 252 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 253 | for k, v in b.__dict__.items(): 254 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 255 | continue 256 | else: 257 | setattr(a, k, v) 258 | 259 | 260 | class ModelEMA: 261 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 262 | Keep a moving average of everything in the model state_dict (parameters and buffers). 263 | This is intended to allow functionality like 264 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 265 | A smoothed version of the weights is necessary for some training schemes to perform well. 266 | This class is sensitive where it is initialized in the sequence of model init, 267 | GPU assignment and distributed training wrappers. 268 | """ 269 | 270 | def __init__(self, model, decay=0.9999, updates=0): 271 | # Create EMA 272 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 273 | # if next(model.parameters()).device.type != 'cpu': 274 | # self.ema.half() # FP16 EMA 275 | self.updates = updates # number of EMA updates 276 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 277 | for p in self.ema.parameters(): 278 | p.requires_grad_(False) 279 | 280 | def update(self, model): 281 | # Update EMA parameters 282 | with torch.no_grad(): 283 | self.updates += 1 284 | d = self.decay(self.updates) 285 | 286 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 287 | for k, v in self.ema.state_dict().items(): 288 | if v.dtype.is_floating_point: 289 | v *= d 290 | v += (1. - d) * msd[k].detach() 291 | 292 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 293 | # Update EMA attributes 294 | copy_attr(self.ema, model, include, exclude) 295 | -------------------------------------------------------------------------------- /weights/.gitignore: -------------------------------------------------------------------------------- 1 | yolo 2 | yolov5 3 | -------------------------------------------------------------------------------- /weights/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | USER root 3 | RUN apt-get update && \ 4 | apt-get install -y --no-install-recommends \ 5 | curl gnupg2 ca-certificates && \ 6 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ 7 | echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list 8 | RUN apt-get update && \ 9 | apt-get install -y --no-install-recommends \ 10 | edgetpu-compiler 11 | -------------------------------------------------------------------------------- /weights/README.md: -------------------------------------------------------------------------------- 1 | # download YOLO pre-trained weights 2 | 3 | ## YOLO V3 and V4 4 | 5 | `> ./download_yolo.py` 6 | 7 | ## YOLO V5 8 | 9 | `> ./download_yolov5.py` 10 | 11 | ## [optional] compile TFLite Flat Buffers for EdgeTPU 12 | 13 | ### setup 14 | 15 | - install docker on your PC 16 | - `> build_docker.sh` 17 | - convert pre-trained weights 18 | - go to the directory up 19 | - `> convert_yolo.py` 20 | - `> convert_yolov5.py` 21 | 22 | ### compile 23 | 24 | - run docker 25 | - ex.) compile yolov3-tiny for Edge TPU 26 | - `> compile_edgetpu.sh yolo/yolov3-tiny_int8.tflite` 27 | - → `yolo/yolov3-tiny_int8_edgetpu.tflite` will be created 28 | - ex.) compile yolov5s for Edge TPU 29 | - `> compile_edgetpu.sh yolov5/yolov5s_int8.tflite` 30 | - → `yolov5/yolov5s_int8_edgetpu.tflite` will be created 31 | 32 | ### notices 33 | 34 | - quantization of compiled model must be `int8` 35 | - the filename is `[yolo|yolov5]/*_int8.tflite` 36 | - It is toooo slow using compiled binery because most of subgraph are not mapped on TPU. 37 | 38 | the case of yolov3-tiny 39 | ``` 40 | > ./compile_edgetpu.sh yolo/yolov3-tiny_int8.tflite 41 | Edge TPU Compiler version 15.0.340273435 42 | 43 | Model compiled successfully in 577 ms. 44 | 45 | Input model: /home/yolo/yolov3-tiny_int8.tflite 46 | Input size: 8.58MiB 47 | Output model: /home/yolo/yolov3-tiny_int8_edgetpu.tflite 48 | Output size: 8.70MiB 49 | On-chip memory used for caching model parameters: 3.00KiB 50 | On-chip memory remaining for caching model parameters: 7.67MiB 51 | Off-chip memory used for streaming uncached model parameters: 0.00B 52 | Number of Edge TPU subgraphs: 1 53 | Total number of operations: 36 54 | Operation log: /home/yolo/yolov3-tiny_int8_edgetpu.log 55 | 56 | Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to use only operations supported by the Edge TPU. For details, visit g.co/coral/model-reqs. 57 | Number of operations that will run on Edge TPU: 2 58 | Number of operations that will run on CPU: 34 59 | 60 | Operator Count Status 61 | 62 | MAX_POOL_2D 6 More than one subgraph is not supported 63 | QUANTIZE 2 Operation is otherwise supported, but not mapped due to some unspecified limitation 64 | QUANTIZE 1 Mapped to Edge TPU 65 | QUANTIZE 1 More than one subgraph is not supported 66 | CONV_2D 1 Mapped to Edge TPU 67 | CONV_2D 12 More than one subgraph is not supported 68 | RESIZE_NEAREST_NEIGHBOR 1 Operation version not supported 69 | LEAKY_RELU 11 Operation not supported 70 | CONCATENATION 1 More than one subgraph is not supported 71 | ``` 72 | 73 | the case of yolov5s 74 | ``` 75 | > ./compile_edgetpu.sh yolov5/yolov5s_int8.tflite 76 | Edge TPU Compiler version 15.0.340273435 77 | 78 | Model compiled successfully in 93 ms. 79 | 80 | Input model: /home/yolov5/yolov5s_int8.tflite 81 | Input size: 7.39MiB 82 | Output model: /home/yolov5/yolov5s_int8_edgetpu.tflite 83 | Output size: 7.34MiB 84 | On-chip memory used for caching model parameters: 0.00B 85 | On-chip memory remaining for caching model parameters: 8.05MiB 86 | Off-chip memory used for streaming uncached model parameters: 0.00B 87 | Number of Edge TPU subgraphs: 1 88 | Total number of operations: 294 89 | Operation log: /home/yolov5/yolov5s_int8_edgetpu.log 90 | 91 | Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to use only operations supported by the Edge TPU. For details, visit g.co/coral/model-reqs. 92 | Number of operations that will run on Edge TPU: 1 93 | Number of operations that will run on CPU: 293 94 | 95 | Operator Count Status 96 | 97 | QUANTIZE 1 Mapped to Edge TPU 98 | QUANTIZE 1 Operation is otherwise supported, but not mapped due to some unspecified limitation 99 | QUANTIZE 24 More than one subgraph is not supported 100 | LOGISTIC 62 More than one subgraph is not supported 101 | TRANSPOSE 3 Operation not supported 102 | SUB 3 More than one subgraph is not supported 103 | CONCATENATION 1 Operation is otherwise supported, but not mapped due to some unspecified limitation 104 | CONCATENATION 17 More than one subgraph is not supported 105 | MAX_POOL_2D 3 More than one subgraph is not supported 106 | STRIDED_SLICE 9 More than one subgraph is not supported 107 | STRIDED_SLICE 4 Only Strided-Slice with unitary strides supported 108 | CONV_2D 62 More than one subgraph is not supported 109 | MUL 80 More than one subgraph is not supported 110 | RESIZE_NEAREST_NEIGHBOR 2 Operation version not supported 111 | RESHAPE 3 Operation is otherwise supported, but not mapped due to some unspecified limitation 112 | RESHAPE 3 More than one subgraph is not supported 113 | PAD 6 More than one subgraph is not supported 114 | ADD 10 More than one subgraph is not supported 115 | ``` 116 | -------------------------------------------------------------------------------- /weights/build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | docker build -t edgetpu_env . 3 | -------------------------------------------------------------------------------- /weights/compile_edgetpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ $# != 1 ]; then 3 | echo "Usage: $0 " 4 | exit 5 | elif [ ! -f $1 ] || [ ${1##*_} != 'int8.tflite' ]; then 6 | echo "Usage: $0 " 7 | exit 8 | fi 9 | dir=${1%/*} 10 | docker run --rm -it --env 'TZ=Asia/Tokyo' --volume ${PWD}:/home --name edgetpu_compile edgetpu_env /usr/bin/edgetpu_compiler -s /home/$1 -o /home/${dir} 11 | -------------------------------------------------------------------------------- /weights/download_yolo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ ! -d yolo ] ; then 4 | mkdir yolo 5 | fi 6 | if [ ! -f yolo/yolov3-tiny.weights ] ; then 7 | wget -O yolo/yolov3-tiny.weights https://pjreddie.com/media/files/yolov3-tiny.weights 8 | fi 9 | if [ ! -f yolo/yolov3.weights ] ; then 10 | wget -O yolo/yolov3.weights https://pjreddie.com/media/files/yolov3.weights 11 | fi 12 | if [ ! -f yolo/yolov3-spp.weights ] ; then 13 | wget -O yolo/yolov3-spp.weights https://pjreddie.com/media/files/yolov3-spp.weights 14 | fi 15 | if [ ! -f yolo/yolov4-tiny.weights ] ; then 16 | wget -O yolo/yolov4-tiny.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights 17 | fi 18 | if [ ! -f yolo/yolov4.weights ] ; then 19 | wget -O yolo/yolov4.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights 20 | fi 21 | if [ ! -f yolo/yolov4-csp.weights ] ; then 22 | wget -O yolo/yolov4-csp.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights 23 | fi 24 | if [ ! -f yolo/yolov4x-mish.weights ] ; then 25 | wget -O yolo/yolov4x-mish.weights https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights 26 | fi 27 | -------------------------------------------------------------------------------- /weights/download_yolov5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import os 4 | from pathlib import Path 5 | import requests 6 | import torch 7 | 8 | directory = 'yolov5' 9 | 10 | 11 | def download_weight( 12 | model: str, 13 | repo: str = 'ultralytics/yolov5' 14 | ) -> None: 15 | assets = [ 16 | 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt' 17 | ] 18 | assert model in assets, 'invalid model name' 19 | fpath = Path(os.path.join( 20 | directory, model.strip().replace("'", '').lower() 21 | )) 22 | if fpath.exists(): 23 | return 24 | try: 25 | response = requests.get( 26 | f'https://api.github.com/repos/{repo}/releases/latest' 27 | ).json() # github api 28 | tag = response['tag_name'] 29 | except Exception: 30 | print('cannot get tag name') 31 | return 32 | name = fpath.name 33 | redundant = False # second download option 34 | try: # GitHub 35 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 36 | print(f'Downloading {model}...') 37 | torch.hub.download_url_to_file(url, fpath) 38 | # check 39 | assert fpath.exists() and fpath.stat().st_size > 1E6 40 | except Exception as e: # GCP 41 | print(f'Download error: {e}') 42 | assert redundant, 'No secondary mirror' 43 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 44 | print(f'Downloading {model}...') 45 | os.system(f'curl -L {url} -o {name}') 46 | finally: 47 | # check 48 | if not fpath.exists() or fpath.stat().st_size < 1E6: 49 | # remove partial downloads 50 | fpath.unlink(missing_ok=True) 51 | print('ERROR: Download failure') 52 | return 53 | 54 | 55 | if __name__ == '__main__': 56 | os.makedirs(directory, exist_ok=True) 57 | for x in ['s', 'm', 'l', 'x']: 58 | if os.path.isfile(f'{directory}/yolov5{x}.pt'): 59 | continue 60 | download_weight(model=f'yolov5{x}.pt') 61 | --------------------------------------------------------------------------------