├── .dockerignore
├── .github
    └── ISSUE_TEMPLATE
    │   ├── --bug-report.md
    │   └── --feature-request.md
├── .gitignore
├── .slurm.sh.swp
├── Dockerfile
├── README.md
├── cfg
    ├── yolov3-1cls.cfg
    ├── yolov3-spp-1cls.cfg
    ├── yolov3-spp-3cls.cfg
    ├── yolov3-spp-matrix.cfg
    ├── yolov3-spp-pan-scale.cfg
    ├── yolov3-spp.cfg
    ├── yolov3-spp3.cfg
    ├── yolov3-tiny-1cls.cfg
    ├── yolov3-tiny-3cls.cfg
    ├── yolov3-tiny-mask.cfg
    ├── yolov3-tiny.cfg
    ├── yolov3-tiny3-1cls.cfg
    ├── yolov3-tiny3.cfg
    ├── yolov3.cfg
    └── yolov3s.cfg
├── change_name.ipynb
├── detect.py
├── makeMain.ipynb
├── mask_on.wav
├── models.py
├── project
    ├── datasets.py
    ├── gcp.sh
    ├── parse_config.py
    ├── torch_utils.py
    └── utils.py
├── record.py
├── requirements.txt
├── slurm.sh
├── test.py
├── train.py
├── voc_label.ipynb
└── weights
    └── download_yolov3_weights.sh


/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific DockerIgnore -------------------------------------------------------------------------------------------
  2 | # .git
  3 | .cache
  4 | .idea
  5 | runs
  6 | output
  7 | coco
  8 | storage.googleapis.com
  9 | 
 10 | data/samples/*
 11 | !data/samples/zidane.jpg
 12 | !data/samples/bus.jpg
 13 | **/results*.txt
 14 | *.jpg
 15 | 
 16 | # Neural Network weights -----------------------------------------------------------------------------------------------
 17 | **/*.weights
 18 | **/*.pt
 19 | **/*.onnx
 20 | **/*.mlmodel
 21 | **/darknet53.conv.74
 22 | **/yolov3-tiny.conv.15
 23 | 
 24 | 
 25 | # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 26 | # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 27 | 
 28 | 
 29 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 30 | # Byte-compiled / optimized / DLL files
 31 | __pycache__/
 32 | *.py[cod]
 33 | *$py.class
 34 | 
 35 | # C extensions
 36 | *.so
 37 | 
 38 | # Distribution / packaging
 39 | .Python
 40 | env/
 41 | build/
 42 | develop-eggs/
 43 | dist/
 44 | downloads/
 45 | eggs/
 46 | .eggs/
 47 | lib/
 48 | lib64/
 49 | parts/
 50 | sdist/
 51 | var/
 52 | wheels/
 53 | *.egg-info/
 54 | .installed.cfg
 55 | *.egg
 56 | 
 57 | # PyInstaller
 58 | #  Usually these files are written by a python script from a template
 59 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 60 | *.manifest
 61 | *.spec
 62 | 
 63 | # Installer logs
 64 | pip-log.txt
 65 | pip-delete-this-directory.txt
 66 | 
 67 | # Unit test / coverage reports
 68 | htmlcov/
 69 | .tox/
 70 | .coverage
 71 | .coverage.*
 72 | .cache
 73 | nosetests.xml
 74 | coverage.xml
 75 | *.cover
 76 | .hypothesis/
 77 | 
 78 | # Translations
 79 | *.mo
 80 | *.pot
 81 | 
 82 | # Django stuff:
 83 | *.log
 84 | local_settings.py
 85 | 
 86 | # Flask stuff:
 87 | instance/
 88 | .webassets-cache
 89 | 
 90 | # Scrapy stuff:
 91 | .scrapy
 92 | 
 93 | # Sphinx documentation
 94 | docs/_build/
 95 | 
 96 | # PyBuilder
 97 | target/
 98 | 
 99 | # Jupyter Notebook
100 | .ipynb_checkpoints
101 | 
102 | # pyenv
103 | .python-version
104 | 
105 | # celery beat schedule file
106 | celerybeat-schedule
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # dotenv
112 | .env
113 | 
114 | # virtualenv
115 | .venv
116 | venv/
117 | ENV/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | 
132 | 
133 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
134 | 
135 | # General
136 | .DS_Store
137 | .AppleDouble
138 | .LSOverride
139 | 
140 | # Icon must end with two \r
141 | Icon
142 | Icon?
143 | 
144 | # Thumbnails
145 | ._*
146 | 
147 | # Files that might appear in the root of a volume
148 | .DocumentRevisions-V100
149 | .fseventsd
150 | .Spotlight-V100
151 | .TemporaryItems
152 | .Trashes
153 | .VolumeIcon.icns
154 | .com.apple.timemachine.donotpresent
155 | 
156 | # Directories potentially created on remote AFP share
157 | .AppleDB
158 | .AppleDesktop
159 | Network Trash Folder
160 | Temporary Items
161 | .apdisk
162 | 
163 | 
164 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
165 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
166 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
167 | 
168 | # User-specific stuff:
169 | .idea/*
170 | .idea/**/workspace.xml
171 | .idea/**/tasks.xml
172 | .idea/dictionaries
173 | .html  # Bokeh Plots
174 | .pg  # TensorFlow Frozen Graphs
175 | .avi # videos
176 | 
177 | # Sensitive or high-churn files:
178 | .idea/**/dataSources/
179 | .idea/**/dataSources.ids
180 | .idea/**/dataSources.local.xml
181 | .idea/**/sqlDataSources.xml
182 | .idea/**/dynamic.xml
183 | .idea/**/uiDesigner.xml
184 | 
185 | # Gradle:
186 | .idea/**/gradle.xml
187 | .idea/**/libraries
188 | 
189 | # CMake
190 | cmake-build-debug/
191 | cmake-build-release/
192 | 
193 | # Mongo Explorer plugin:
194 | .idea/**/mongoSettings.xml
195 | 
196 | ## File-based project format:
197 | *.iws
198 | 
199 | ## Plugin-specific files:
200 | 
201 | # IntelliJ
202 | out/
203 | 
204 | # mpeltonen/sbt-idea plugin
205 | .idea_modules/
206 | 
207 | # JIRA plugin
208 | atlassian-ide-plugin.xml
209 | 
210 | # Cursive Clojure plugin
211 | .idea/replstate.xml
212 | 
213 | # Crashlytics plugin (for Android Studio and IntelliJ)
214 | com_crashlytics_export_strings.xml
215 | crashlytics.properties
216 | crashlytics-build.properties
217 | fabric.properties
218 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41BBug report"
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 🐛 Bug
11 | A clear and concise description of what the bug is.
12 | 
13 | ## To Reproduce
14 | Steps to reproduce the behavior:
15 | 1. 
16 | 2. 
17 | 3. 
18 | 
19 | ## Expected behavior
20 | A clear and concise description of what you expected to happen.
21 | 
22 | ## Environment
23 | If applicable, add screenshots to help explain your problem.
24 | 
25 | **Desktop (please complete the following information):**
26 |  - OS: [e.g. iOS]
27 |  - Version [e.g. 22]
28 | 
29 | **Smartphone (please complete the following information):**
30 |  - Device: [e.g. iPhoneXS]
31 |  - OS: [e.g. iOS8.1]
32 |  - Version [e.g. 22]
33 | 
34 | ## Additional context
35 | Add any other context about the problem here.
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680Feature request"
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 🚀 Feature
11 | <!-- A clear and concise description of the feature proposal -->
12 | 
13 | ## Motivation
14 | 
15 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
16 | 
17 | ## Pitch
18 | 
19 | <!-- A clear and concise description of what you want to happen. -->
20 | 
21 | ## Alternatives
22 | 
23 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
24 | 
25 | ## Additional context
26 | 
27 | <!-- Add any other context or screenshots about the feature request here. -->
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
  2 | *.jpg
  3 | *.png
  4 | *.bmp
  5 | *.tif
  6 | *.heic
  7 | *.JPG
  8 | *.PNG
  9 | *.TIF
 10 | *.HEIC
 11 | *.mp4
 12 | *.mov
 13 | *.MOV
 14 | *.avi
 15 | *.data
 16 | *.json
 17 | 
 18 | *.cfg
 19 | !cfg/yolov3*.cfg
 20 | 
 21 | storage.googleapis.com
 22 | runs/*
 23 | data/*
 24 | !data/samples/zidane.jpg
 25 | !data/samples/bus.jpg
 26 | !data/coco.names
 27 | !data/coco_paper.names
 28 | !data/coco.data
 29 | !data/coco_*.data
 30 | !data/coco_*.txt
 31 | !data/trainvalno5k.shapes
 32 | !data/*.sh
 33 | 
 34 | pycocotools/*
 35 | results*.txt
 36 | gcp_test*.sh
 37 | 
 38 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
 39 | *.m~
 40 | *.mat
 41 | !targets*.mat
 42 | 
 43 | # Neural Network weights -----------------------------------------------------------------------------------------------
 44 | *.weights
 45 | *.pt
 46 | *.onnx
 47 | *.mlmodel
 48 | darknet53.conv.74
 49 | yolov3-tiny.conv.15
 50 | 
 51 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 52 | # Byte-compiled / optimized / DLL files
 53 | __pycache__/
 54 | *.py[cod]
 55 | *$py.class
 56 | 
 57 | # C extensions
 58 | *.so
 59 | 
 60 | # Distribution / packaging
 61 | .Python
 62 | env/
 63 | build/
 64 | develop-eggs/
 65 | dist/
 66 | downloads/
 67 | eggs/
 68 | .eggs/
 69 | lib/
 70 | lib64/
 71 | parts/
 72 | sdist/
 73 | var/
 74 | wheels/
 75 | *.egg-info/
 76 | .installed.cfg
 77 | *.egg
 78 | 
 79 | # PyInstaller
 80 | #  Usually these files are written by a python script from a template
 81 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 82 | *.manifest
 83 | *.spec
 84 | 
 85 | # Installer logs
 86 | pip-log.txt
 87 | pip-delete-this-directory.txt
 88 | 
 89 | # Unit test / coverage reports
 90 | htmlcov/
 91 | .tox/
 92 | .coverage
 93 | .coverage.*
 94 | .cache
 95 | nosetests.xml
 96 | coverage.xml
 97 | *.cover
 98 | .hypothesis/
 99 | 
100 | # Translations
101 | *.mo
102 | *.pot
103 | 
104 | # Django stuff:
105 | *.log
106 | local_settings.py
107 | 
108 | # Flask stuff:
109 | instance/
110 | .webassets-cache
111 | 
112 | # Scrapy stuff:
113 | .scrapy
114 | 
115 | # Sphinx documentation
116 | docs/_build/
117 | 
118 | # PyBuilder
119 | target/
120 | 
121 | # Jupyter Notebook
122 | .ipynb_checkpoints
123 | 
124 | # pyenv
125 | .python-version
126 | 
127 | # celery beat schedule file
128 | celerybeat-schedule
129 | 
130 | # SageMath parsed files
131 | *.sage.py
132 | 
133 | # dotenv
134 | .env
135 | 
136 | # virtualenv
137 | .venv
138 | venv/
139 | ENV/
140 | 
141 | # Spyder project settings
142 | .spyderproject
143 | .spyproject
144 | 
145 | # Rope project settings
146 | .ropeproject
147 | 
148 | # mkdocs documentation
149 | /site
150 | 
151 | # mypy
152 | .mypy_cache/
153 | 
154 | 
155 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
156 | 
157 | # General
158 | .DS_Store
159 | .AppleDouble
160 | .LSOverride
161 | 
162 | # Icon must end with two \r
163 | Icon
164 | Icon?
165 | 
166 | # Thumbnails
167 | ._*
168 | 
169 | # Files that might appear in the root of a volume
170 | .DocumentRevisions-V100
171 | .fseventsd
172 | .Spotlight-V100
173 | .TemporaryItems
174 | .Trashes
175 | .VolumeIcon.icns
176 | .com.apple.timemachine.donotpresent
177 | 
178 | # Directories potentially created on remote AFP share
179 | .AppleDB
180 | .AppleDesktop
181 | Network Trash Folder
182 | Temporary Items
183 | .apdisk
184 | 
185 | 
186 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
187 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
188 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
189 | 
190 | # User-specific stuff:
191 | .idea/*
192 | .idea/**/workspace.xml
193 | .idea/**/tasks.xml
194 | .idea/dictionaries
195 | .html  # Bokeh Plots
196 | .pg  # TensorFlow Frozen Graphs
197 | .avi # videos
198 | 
199 | # Sensitive or high-churn files:
200 | .idea/**/dataSources/
201 | .idea/**/dataSources.ids
202 | .idea/**/dataSources.local.xml
203 | .idea/**/sqlDataSources.xml
204 | .idea/**/dynamic.xml
205 | .idea/**/uiDesigner.xml
206 | 
207 | # Gradle:
208 | .idea/**/gradle.xml
209 | .idea/**/libraries
210 | 
211 | # CMake
212 | cmake-build-debug/
213 | cmake-build-release/
214 | 
215 | # Mongo Explorer plugin:
216 | .idea/**/mongoSettings.xml
217 | 
218 | ## File-based project format:
219 | *.iws
220 | 
221 | ## Plugin-specific files:
222 | 
223 | # IntelliJ
224 | out/
225 | 
226 | # mpeltonen/sbt-idea plugin
227 | .idea_modules/
228 | 
229 | # JIRA plugin
230 | atlassian-ide-plugin.xml
231 | 
232 | # Cursive Clojure plugin
233 | .idea/replstate.xml
234 | 
235 | # Crashlytics plugin (for Android Studio and IntelliJ)
236 | com_crashlytics_export_strings.xml
237 | crashlytics.properties
238 | crashlytics-build.properties
239 | fabric.properties
240 | 


--------------------------------------------------------------------------------
/.slurm.sh.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhengmingzhang/mask-detection/ff8a57b81ced6bc3fa6c1ae01f3b08cf1cb23e60/.slurm.sh.swp


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
 2 | FROM nvcr.io/nvidia/pytorch:20.01-py3
 3 | 
 4 | # Install dependencies (pip or conda)
 5 | RUN pip install -U gsutil
 6 | # RUN pip install -U -r requirements.txt
 7 | # RUN conda update -n base -c defaults conda
 8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow
 9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 | 
11 | ## Install OpenCV with Gstreamer support
12 | #WORKDIR /usr/src
13 | #RUN pip uninstall -y opencv-python
14 | #RUN apt-get update
15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev
16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build
17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1
18 | #RUN cd opencv/build && cmake ../ \
19 | #    -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
20 | #    -D BUILD_OPENCV_PYTHON3=ON \
21 | #    -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \
22 | #    -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \
23 | #    -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \
24 | #    -D WITH_GSTREAMER=ON \
25 | #    -D WITH_FFMPEG=OFF \
26 | #    && make && make install && ldconfig
27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so
28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so
29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())"
30 | 
31 | # Create working directory
32 | RUN mkdir -p /usr/src/app
33 | WORKDIR /usr/src/app
34 | 
35 | # Copy contents
36 | COPY . /usr/src/app
37 | 
38 | # Copy weights
39 | #RUN python3 -c "from models import *; \
40 | #attempt_download('weights/yolov3.pt'); \
41 | #attempt_download('weights/yolov3-spp.pt')"
42 | 
43 | 
44 | # ---------------------------------------------------  Extras Below  ---------------------------------------------------
45 | 
46 | # Build and Push
47 | # t=ultralytics/yolov3:v0 && sudo docker build -t $t . && sudo docker push $t
48 | 
49 | # Run
50 | # t=ultralytics/yolov3:v0 && sudo docker pull $t && sudo docker run -it $t bash
51 | 
52 | # Pull and Run with local directory access
53 | # t=ultralytics/yolov3:v0 && sudo docker pull $t && sudo docker run -it -v "$(pwd)"/coco:/usr/src/coco $t bash
54 | 
55 | # Kill all
56 | # sudo docker kill "$(sudo docker ps -q)"
57 | 
58 | # Kill all image-based
59 | # sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov3:v0)
60 | 
61 | # Run bash for loop
62 | # sudo docker run --gpus all --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mask-detetcion
 2 | ## 项目说明
 3 | 针对当前疫情情况，各大科技公司开始使用科技手段助力防疫。本项目通过yolov3对人脸口罩数据集进行训练，通过该模型可以识别一个人是否带了口罩，如果识别出此人没有佩戴口罩，则语音播报“请佩戴口罩”，如果此人是佩戴了口罩的，那么系统不会有反应。
 4 | ## 数据集
 5 | 用来训练的数据集来源于B站UP主：HamlinZheng，非常感谢该UP主的无私奉献～
 6 | ## 如何使用该项目
 7 | 链接：https://pan.baidu.com/s/11z6hmBitSHG4TjilDNFJfQ 
 8 | 提取码：2zvl
 9 | 
10 | 将best.pt放入weights文件夹中，创建一个data文件夹将mask.data和mask.name放进去，配置好项目所需的环境之后，在命令行执行
11 | 
12 |  python detect.py --data-cfg data/mask.data --cfg cfg/yolov3-tiny-mask.cfg --weights weights/best.pt
13 |  便可看到结果。
14 |  
15 |  语音播报的声音是我自己录的，不咋好听，如果想录自己的语音提醒可以直接运行record.py，录下自己的语音
16 |  
17 |  模型我使用的是yolov3-tiny，模型训练结果实际上不算特别好，计划这两天对数据进行增强，优化一下模型。
18 |  ## 如何训练自己的数据集
19 |  想要使用自己的数据集进行重新训练，需要对数据集的格式改成VOC数据集的形式，这个在网上有非常多的教程，我训练的时候参考的是
20 |  https://blog.csdn.net/qq_21578849/article/details/84980298
21 |  大家感兴趣也可以自己训练一下
22 | 


--------------------------------------------------------------------------------
/cfg/yolov3-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=18
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=1
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=18
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=1
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=18
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=1
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=100
 20 | max_batches = 5000
 21 | policy=steps
 22 | steps=4000,4500
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=1
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=18
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=1
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=18
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=1
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp-3cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=100
 20 | max_batches = 5000
 21 | policy=steps
 22 | steps=4000,4500
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=24
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=3
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=24
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=3
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=24
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=3
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp-pan-scale.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=32
  8 | width=544
  9 | height=544
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | 
 19 | learning_rate=0.001
 20 | burn_in=1000
 21 | max_batches = 10000
 22 | 
 23 | policy=steps
 24 | steps=8000,9000
 25 | scales=.1,.1
 26 | 
 27 | #policy=sgdr
 28 | #sgdr_cycle=1000
 29 | #sgdr_mult=2
 30 | #steps=4000,6000,8000,9000
 31 | #scales=1, 1, 0.1, 0.1
 32 | 
 33 | [convolutional]
 34 | batch_normalize=1
 35 | filters=32
 36 | size=3
 37 | stride=1
 38 | pad=1
 39 | activation=leaky
 40 | 
 41 | # Downsample
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=64
 46 | size=3
 47 | stride=2
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=32
 54 | size=1
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [convolutional]
 60 | batch_normalize=1
 61 | filters=64
 62 | size=3
 63 | stride=1
 64 | pad=1
 65 | activation=leaky
 66 | 
 67 | [shortcut]
 68 | from=-3
 69 | activation=linear
 70 | 
 71 | # Downsample
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=128
 76 | size=3
 77 | stride=2
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=64
 84 | size=1
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [convolutional]
 90 | batch_normalize=1
 91 | filters=128
 92 | size=3
 93 | stride=1
 94 | pad=1
 95 | activation=leaky
 96 | 
 97 | [shortcut]
 98 | from=-3
 99 | activation=linear
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=64
104 | size=1
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [convolutional]
110 | batch_normalize=1
111 | filters=128
112 | size=3
113 | stride=1
114 | pad=1
115 | activation=leaky
116 | 
117 | [shortcut]
118 | from=-3
119 | activation=linear
120 | 
121 | # Downsample
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=256
126 | size=3
127 | stride=2
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=128
134 | size=1
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [convolutional]
140 | batch_normalize=1
141 | filters=256
142 | size=3
143 | stride=1
144 | pad=1
145 | activation=leaky
146 | 
147 | [shortcut]
148 | from=-3
149 | activation=linear
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=128
154 | size=1
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [shortcut]
168 | from=-3
169 | activation=linear
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=128
174 | size=1
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [convolutional]
180 | batch_normalize=1
181 | filters=256
182 | size=3
183 | stride=1
184 | pad=1
185 | activation=leaky
186 | 
187 | [shortcut]
188 | from=-3
189 | activation=linear
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=128
194 | size=1
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [convolutional]
200 | batch_normalize=1
201 | filters=256
202 | size=3
203 | stride=1
204 | pad=1
205 | activation=leaky
206 | 
207 | [shortcut]
208 | from=-3
209 | activation=linear
210 | 
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=128
215 | size=1
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [convolutional]
221 | batch_normalize=1
222 | filters=256
223 | size=3
224 | stride=1
225 | pad=1
226 | activation=leaky
227 | 
228 | [shortcut]
229 | from=-3
230 | activation=linear
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=128
235 | size=1
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [convolutional]
241 | batch_normalize=1
242 | filters=256
243 | size=3
244 | stride=1
245 | pad=1
246 | activation=leaky
247 | 
248 | [shortcut]
249 | from=-3
250 | activation=linear
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=128
255 | size=1
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [convolutional]
261 | batch_normalize=1
262 | filters=256
263 | size=3
264 | stride=1
265 | pad=1
266 | activation=leaky
267 | 
268 | [shortcut]
269 | from=-3
270 | activation=linear
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=128
275 | size=1
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [convolutional]
281 | batch_normalize=1
282 | filters=256
283 | size=3
284 | stride=1
285 | pad=1
286 | activation=leaky
287 | 
288 | [shortcut]
289 | from=-3
290 | activation=linear
291 | 
292 | # Downsample
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=512
297 | size=3
298 | stride=2
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=256
305 | size=1
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [convolutional]
311 | batch_normalize=1
312 | filters=512
313 | size=3
314 | stride=1
315 | pad=1
316 | activation=leaky
317 | 
318 | [shortcut]
319 | from=-3
320 | activation=linear
321 | 
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=256
326 | size=1
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [convolutional]
332 | batch_normalize=1
333 | filters=512
334 | size=3
335 | stride=1
336 | pad=1
337 | activation=leaky
338 | 
339 | [shortcut]
340 | from=-3
341 | activation=linear
342 | 
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=256
347 | size=1
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [convolutional]
353 | batch_normalize=1
354 | filters=512
355 | size=3
356 | stride=1
357 | pad=1
358 | activation=leaky
359 | 
360 | [shortcut]
361 | from=-3
362 | activation=linear
363 | 
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=256
368 | size=1
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [convolutional]
374 | batch_normalize=1
375 | filters=512
376 | size=3
377 | stride=1
378 | pad=1
379 | activation=leaky
380 | 
381 | [shortcut]
382 | from=-3
383 | activation=linear
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=256
388 | size=1
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [convolutional]
394 | batch_normalize=1
395 | filters=512
396 | size=3
397 | stride=1
398 | pad=1
399 | activation=leaky
400 | 
401 | [shortcut]
402 | from=-3
403 | activation=linear
404 | 
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=256
409 | size=1
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [convolutional]
415 | batch_normalize=1
416 | filters=512
417 | size=3
418 | stride=1
419 | pad=1
420 | activation=leaky
421 | 
422 | [shortcut]
423 | from=-3
424 | activation=linear
425 | 
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=256
430 | size=1
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [convolutional]
436 | batch_normalize=1
437 | filters=512
438 | size=3
439 | stride=1
440 | pad=1
441 | activation=leaky
442 | 
443 | [shortcut]
444 | from=-3
445 | activation=linear
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=256
450 | size=1
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [convolutional]
456 | batch_normalize=1
457 | filters=512
458 | size=3
459 | stride=1
460 | pad=1
461 | activation=leaky
462 | 
463 | [shortcut]
464 | from=-3
465 | activation=linear
466 | 
467 | # Downsample
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=1024
472 | size=3
473 | stride=2
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=512
480 | size=1
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [convolutional]
486 | batch_normalize=1
487 | filters=1024
488 | size=3
489 | stride=1
490 | pad=1
491 | activation=leaky
492 | 
493 | [shortcut]
494 | from=-3
495 | activation=linear
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=512
500 | size=1
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [convolutional]
506 | batch_normalize=1
507 | filters=1024
508 | size=3
509 | stride=1
510 | pad=1
511 | activation=leaky
512 | 
513 | [shortcut]
514 | from=-3
515 | activation=linear
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=512
520 | size=1
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [convolutional]
526 | batch_normalize=1
527 | filters=1024
528 | size=3
529 | stride=1
530 | pad=1
531 | activation=leaky
532 | 
533 | [shortcut]
534 | from=-3
535 | activation=linear
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=512
540 | size=1
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [convolutional]
546 | batch_normalize=1
547 | filters=1024
548 | size=3
549 | stride=1
550 | pad=1
551 | activation=leaky
552 | 
553 | [shortcut]
554 | from=-3
555 | activation=linear
556 | 
557 | ######################
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | filters=512
562 | size=1
563 | stride=1
564 | pad=1
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | size=3
570 | stride=1
571 | pad=1
572 | filters=1024
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | filters=512
578 | size=1
579 | stride=1
580 | pad=1
581 | activation=leaky
582 | 
583 | ### SPP ###
584 | [maxpool]
585 | stride=1
586 | size=5
587 | 
588 | [route]
589 | layers=-2
590 | 
591 | [maxpool]
592 | stride=1
593 | size=9
594 | 
595 | [route]
596 | layers=-4
597 | 
598 | [maxpool]
599 | stride=1
600 | size=13
601 | 
602 | [route]
603 | layers=-1,-3,-5,-6
604 | 
605 | ### End SPP ###
606 | 
607 | [convolutional]
608 | batch_normalize=1
609 | filters=512
610 | size=1
611 | stride=1
612 | pad=1
613 | activation=leaky
614 | 
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | size=3
619 | stride=1
620 | pad=1
621 | filters=1024
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | filters=512
627 | size=1
628 | stride=1
629 | pad=1
630 | activation=leaky
631 | 
632 | 
633 | 
634 | ########### to [yolo-3]
635 | 
636 | 
637 | 
638 | [route]
639 | layers = -4
640 | 
641 | [convolutional]
642 | batch_normalize=1
643 | filters=256
644 | size=1
645 | stride=1
646 | pad=1
647 | activation=leaky
648 | 
649 | [upsample]
650 | stride=2
651 | 
652 | [route]
653 | layers = -1, 61
654 | 
655 | 
656 | 
657 | [convolutional]
658 | batch_normalize=1
659 | filters=256
660 | size=1
661 | stride=1
662 | pad=1
663 | activation=leaky
664 | 
665 | [convolutional]
666 | batch_normalize=1
667 | size=3
668 | stride=1
669 | pad=1
670 | filters=512
671 | activation=leaky
672 | 
673 | [convolutional]
674 | batch_normalize=1
675 | filters=256
676 | size=1
677 | stride=1
678 | pad=1
679 | activation=leaky
680 | 
681 | [convolutional]
682 | batch_normalize=1
683 | size=3
684 | stride=1
685 | pad=1
686 | filters=512
687 | activation=leaky
688 | 
689 | [convolutional]
690 | batch_normalize=1
691 | filters=256
692 | size=1
693 | stride=1
694 | pad=1
695 | activation=leaky
696 | 
697 | 
698 | ########### to [yolo-2]
699 | 
700 | 
701 | 
702 | 
703 | [route]
704 | layers = -4
705 | 
706 | [convolutional]
707 | batch_normalize=1
708 | filters=128
709 | size=1
710 | stride=1
711 | pad=1
712 | activation=leaky
713 | 
714 | [upsample]
715 | stride=2
716 | 
717 | [route]
718 | layers = -1, 36
719 | 
720 | 
721 | 
722 | [convolutional]
723 | batch_normalize=1
724 | filters=128
725 | size=1
726 | stride=1
727 | pad=1
728 | activation=leaky
729 | 
730 | [convolutional]
731 | batch_normalize=1
732 | size=3
733 | stride=1
734 | pad=1
735 | filters=256
736 | activation=leaky
737 | 
738 | [convolutional]
739 | batch_normalize=1
740 | filters=128
741 | size=1
742 | stride=1
743 | pad=1
744 | activation=leaky
745 | 
746 | [convolutional]
747 | batch_normalize=1
748 | size=3
749 | stride=1
750 | pad=1
751 | filters=256
752 | activation=leaky
753 | 
754 | [convolutional]
755 | batch_normalize=1
756 | filters=128
757 | size=1
758 | stride=1
759 | pad=1
760 | activation=leaky
761 | 
762 | 
763 | 
764 | ########### to [yolo-1]
765 | 
766 | 
767 | ########### features of different layers
768 | 
769 | 
770 | [route]
771 | layers=1
772 | 
773 | [reorg3d]
774 | stride=2
775 | 
776 | [route]
777 | layers=5,-1
778 | 
779 | [reorg3d]
780 | stride=2
781 | 
782 | [route]
783 | layers=12,-1
784 | 
785 | [reorg3d]
786 | stride=2
787 | 
788 | [route]
789 | layers=37,-1
790 | 
791 | [reorg3d]
792 | stride=2
793 | 
794 | [route]
795 | layers=62,-1
796 | 
797 | 
798 | 
799 | ########### [yolo-1]
800 | 
801 | [convolutional]
802 | batch_normalize=1
803 | filters=128
804 | size=1
805 | stride=1
806 | pad=1
807 | activation=leaky
808 | 
809 | [upsample]
810 | stride=4
811 | 
812 | [route]
813 | layers = -1,-12
814 | 
815 | 
816 | [convolutional]
817 | batch_normalize=1
818 | size=3
819 | stride=1
820 | pad=1
821 | filters=256
822 | activation=leaky
823 | 
824 | [convolutional]
825 | size=1
826 | stride=1
827 | pad=1
828 | filters=340
829 | activation=linear
830 | 
831 | 
832 | [yolo]
833 | mask = 0,1,2,3
834 | anchors = 8,8, 10,13, 16,30, 33,23,  32,32, 30,61, 62,45, 64,64,  59,119, 116,90, 156,198, 373,326
835 | classes=80
836 | num=12
837 | jitter=.3
838 | ignore_thresh = .7
839 | truth_thresh = 1
840 | scale_x_y = 1.05
841 | random=0
842 | 
843 | 
844 | 
845 | 
846 | ########### [yolo-2]
847 | 
848 | 
849 | [route]
850 | layers = -7
851 | 
852 | [convolutional]
853 | batch_normalize=1
854 | filters=256
855 | size=1
856 | stride=1
857 | pad=1
858 | activation=leaky
859 | 
860 | [upsample]
861 | stride=2
862 | 
863 | [route]
864 | layers = -1,-28
865 | 
866 | 
867 | [convolutional]
868 | batch_normalize=1
869 | size=3
870 | stride=1
871 | pad=1
872 | filters=512
873 | activation=leaky
874 | 
875 | [convolutional]
876 | size=1
877 | stride=1
878 | pad=1
879 | filters=340
880 | activation=linear
881 | 
882 | 
883 | [yolo]
884 | mask = 4,5,6,7
885 | anchors = 8,8, 10,13, 16,30, 33,23,  32,32, 30,61, 62,45, 64,64,  59,119, 116,90, 156,198, 373,326
886 | classes=80
887 | num=12
888 | jitter=.3
889 | ignore_thresh = .7
890 | truth_thresh = 1
891 | scale_x_y = 1.1
892 | random=0
893 | 
894 | 
895 | 
896 | ########### [yolo-3]
897 | 
898 | [route]
899 | layers = -14
900 | 
901 | [convolutional]
902 | batch_normalize=1
903 | filters=512
904 | size=1
905 | stride=1
906 | pad=1
907 | activation=leaky
908 | 
909 | [route]
910 | layers = -1,-43
911 | 
912 | [convolutional]
913 | batch_normalize=1
914 | size=3
915 | stride=1
916 | pad=1
917 | filters=1024
918 | activation=leaky
919 | 
920 | 
921 | [convolutional]
922 | size=1
923 | stride=1
924 | pad=1
925 | filters=340
926 | activation=linear
927 | 
928 | 
929 | [yolo]
930 | mask = 8,9,10,11
931 | anchors = 8,8, 10,13, 16,30, 33,23,  32,32, 30,61, 62,45, 59,119,   80,80, 116,90, 156,198, 373,326
932 | classes=80
933 | num=12
934 | jitter=.3
935 | ignore_thresh = .7
936 | truth_thresh = 1
937 | scale_x_y = 1.2
938 | random=0
939 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 120200
 21 | policy=steps
 22 | steps=70000,100000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | ### SPP ###
687 | [maxpool]
688 | stride=1
689 | size=5
690 | 
691 | [route]
692 | layers=-2
693 | 
694 | [maxpool]
695 | stride=1
696 | size=9
697 | 
698 | [route]
699 | layers=-4
700 | 
701 | [maxpool]
702 | stride=1
703 | size=13
704 | 
705 | [route]
706 | layers=-1,-3,-5,-6
707 | 
708 | ### End SPP ###
709 | 
710 | 
711 | [convolutional]
712 | batch_normalize=1
713 | filters=256
714 | size=1
715 | stride=1
716 | pad=1
717 | activation=leaky
718 | 
719 | [convolutional]
720 | batch_normalize=1
721 | size=3
722 | stride=1
723 | pad=1
724 | filters=512
725 | activation=leaky
726 | 
727 | [convolutional]
728 | batch_normalize=1
729 | filters=256
730 | size=1
731 | stride=1
732 | pad=1
733 | activation=leaky
734 | 
735 | [convolutional]
736 | batch_normalize=1
737 | size=3
738 | stride=1
739 | pad=1
740 | filters=512
741 | activation=leaky
742 | 
743 | [convolutional]
744 | size=1
745 | stride=1
746 | pad=1
747 | filters=255
748 | activation=linear
749 | 
750 | 
751 | [yolo]
752 | mask = 3,4,5
753 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
754 | classes=80
755 | num=9
756 | jitter=.3
757 | ignore_thresh = .7
758 | truth_thresh = 1
759 | random=1
760 | 
761 | 
762 | 
763 | [route]
764 | layers = -4
765 | 
766 | [convolutional]
767 | batch_normalize=1
768 | filters=128
769 | size=1
770 | stride=1
771 | pad=1
772 | activation=leaky
773 | 
774 | [upsample]
775 | stride=2
776 | 
777 | [route]
778 | layers = -1, 36
779 | 
780 | 
781 | 
782 | [convolutional]
783 | batch_normalize=1
784 | filters=128
785 | size=1
786 | stride=1
787 | pad=1
788 | activation=leaky
789 | 
790 | [convolutional]
791 | batch_normalize=1
792 | size=3
793 | stride=1
794 | pad=1
795 | filters=256
796 | activation=leaky
797 | 
798 | [convolutional]
799 | batch_normalize=1
800 | filters=128
801 | size=1
802 | stride=1
803 | pad=1
804 | activation=leaky
805 | 
806 | ### SPP ###
807 | [maxpool]
808 | stride=1
809 | size=5
810 | 
811 | [route]
812 | layers=-2
813 | 
814 | [maxpool]
815 | stride=1
816 | size=9
817 | 
818 | [route]
819 | layers=-4
820 | 
821 | [maxpool]
822 | stride=1
823 | size=13
824 | 
825 | [route]
826 | layers=-1,-3,-5,-6
827 | 
828 | ### End SPP ###
829 | 
830 | [convolutional]
831 | batch_normalize=1
832 | size=3
833 | stride=1
834 | pad=1
835 | filters=256
836 | activation=leaky
837 | 
838 | [convolutional]
839 | batch_normalize=1
840 | filters=128
841 | size=1
842 | stride=1
843 | pad=1
844 | activation=leaky
845 | 
846 | [convolutional]
847 | batch_normalize=1
848 | size=3
849 | stride=1
850 | pad=1
851 | filters=256
852 | activation=leaky
853 | 
854 | [convolutional]
855 | size=1
856 | stride=1
857 | pad=1
858 | filters=255
859 | activation=linear
860 | 
861 | 
862 | [yolo]
863 | mask = 0,1,2
864 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
865 | classes=80
866 | num=9
867 | jitter=.3
868 | ignore_thresh = .7
869 | truth_thresh = 1
870 | random=1
871 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=18
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=1
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=1
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-3cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=24
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=3
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=24
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=3
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-mask.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=21
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=2
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=21
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=2
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny3-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 200000
 21 | policy=steps
 22 | steps=180000,190000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | [maxpool]
 35 | size=2
 36 | stride=2
 37 | 
 38 | [convolutional]
 39 | batch_normalize=1
 40 | filters=32
 41 | size=3
 42 | stride=1
 43 | pad=1
 44 | activation=leaky
 45 | 
 46 | [maxpool]
 47 | size=2
 48 | stride=2
 49 | 
 50 | [convolutional]
 51 | batch_normalize=1
 52 | filters=64
 53 | size=3
 54 | stride=1
 55 | pad=1
 56 | activation=leaky
 57 | 
 58 | [maxpool]
 59 | size=2
 60 | stride=2
 61 | 
 62 | [convolutional]
 63 | batch_normalize=1
 64 | filters=128
 65 | size=3
 66 | stride=1
 67 | pad=1
 68 | activation=leaky
 69 | 
 70 | [maxpool]
 71 | size=2
 72 | stride=2
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=256
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [maxpool]
 83 | size=2
 84 | stride=2
 85 | 
 86 | [convolutional]
 87 | batch_normalize=1
 88 | filters=512
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [maxpool]
 95 | size=2
 96 | stride=1
 97 | 
 98 | [convolutional]
 99 | batch_normalize=1
100 | filters=1024
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 | 
106 | ###########
107 | 
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | size=1
126 | stride=1
127 | pad=1
128 | filters=18
129 | activation=linear
130 | 
131 | 
132 | 
133 | [yolo]
134 | mask = 6,7,8
135 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
136 | classes=1
137 | num=9
138 | jitter=.3
139 | ignore_thresh = .7
140 | truth_thresh = 1
141 | random=1
142 | 
143 | [route]
144 | layers = -4
145 | 
146 | [convolutional]
147 | batch_normalize=1
148 | filters=128
149 | size=1
150 | stride=1
151 | pad=1
152 | activation=leaky
153 | 
154 | [upsample]
155 | stride=2
156 | 
157 | [route]
158 | layers = -1, 8
159 | 
160 | [convolutional]
161 | batch_normalize=1
162 | filters=256
163 | size=3
164 | stride=1
165 | pad=1
166 | activation=leaky
167 | 
168 | [convolutional]
169 | size=1
170 | stride=1
171 | pad=1
172 | filters=18
173 | activation=linear
174 | 
175 | [yolo]
176 | mask = 3,4,5
177 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
178 | classes=1
179 | num=9
180 | jitter=.3
181 | ignore_thresh = .7
182 | truth_thresh = 1
183 | random=1
184 | 
185 | 
186 | 
187 | [route]
188 | layers = -3
189 | 
190 | [convolutional]
191 | batch_normalize=1
192 | filters=128
193 | size=1
194 | stride=1
195 | pad=1
196 | activation=leaky
197 | 
198 | [upsample]
199 | stride=2
200 | 
201 | [route]
202 | layers = -1, 6
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=3
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=18
217 | activation=linear
218 | 
219 | [yolo]
220 | mask = 0,1,2
221 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
222 | classes=1
223 | num=9
224 | jitter=.3
225 | ignore_thresh = .7
226 | truth_thresh = 1
227 | random=1
228 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 200000
 21 | policy=steps
 22 | steps=180000,190000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | [maxpool]
 35 | size=2
 36 | stride=2
 37 | 
 38 | [convolutional]
 39 | batch_normalize=1
 40 | filters=32
 41 | size=3
 42 | stride=1
 43 | pad=1
 44 | activation=leaky
 45 | 
 46 | [maxpool]
 47 | size=2
 48 | stride=2
 49 | 
 50 | [convolutional]
 51 | batch_normalize=1
 52 | filters=64
 53 | size=3
 54 | stride=1
 55 | pad=1
 56 | activation=leaky
 57 | 
 58 | [maxpool]
 59 | size=2
 60 | stride=2
 61 | 
 62 | [convolutional]
 63 | batch_normalize=1
 64 | filters=128
 65 | size=3
 66 | stride=1
 67 | pad=1
 68 | activation=leaky
 69 | 
 70 | [maxpool]
 71 | size=2
 72 | stride=2
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=256
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [maxpool]
 83 | size=2
 84 | stride=2
 85 | 
 86 | [convolutional]
 87 | batch_normalize=1
 88 | filters=512
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [maxpool]
 95 | size=2
 96 | stride=1
 97 | 
 98 | [convolutional]
 99 | batch_normalize=1
100 | filters=1024
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 | 
106 | ###########
107 | 
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | size=1
126 | stride=1
127 | pad=1
128 | filters=255
129 | activation=linear
130 | 
131 | 
132 | 
133 | [yolo]
134 | mask = 6,7,8
135 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
136 | classes=80
137 | num=9
138 | jitter=.3
139 | ignore_thresh = .7
140 | truth_thresh = 1
141 | random=1
142 | 
143 | [route]
144 | layers = -4
145 | 
146 | [convolutional]
147 | batch_normalize=1
148 | filters=128
149 | size=1
150 | stride=1
151 | pad=1
152 | activation=leaky
153 | 
154 | [upsample]
155 | stride=2
156 | 
157 | [route]
158 | layers = -1, 8
159 | 
160 | [convolutional]
161 | batch_normalize=1
162 | filters=256
163 | size=3
164 | stride=1
165 | pad=1
166 | activation=leaky
167 | 
168 | [convolutional]
169 | size=1
170 | stride=1
171 | pad=1
172 | filters=255
173 | activation=linear
174 | 
175 | [yolo]
176 | mask = 3,4,5
177 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
178 | classes=80
179 | num=9
180 | jitter=.3
181 | ignore_thresh = .7
182 | truth_thresh = 1
183 | random=1
184 | 
185 | 
186 | 
187 | [route]
188 | layers = -3
189 | 
190 | [convolutional]
191 | batch_normalize=1
192 | filters=128
193 | size=1
194 | stride=1
195 | pad=1
196 | activation=leaky
197 | 
198 | [upsample]
199 | stride=2
200 | 
201 | [route]
202 | layers = -1, 6
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=3
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=255
217 | activation=linear
218 | 
219 | [yolo]
220 | mask = 0,1,2
221 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
222 | classes=80
223 | num=9
224 | jitter=.3
225 | ignore_thresh = .7
226 | truth_thresh = 1
227 | random=1
228 | 


--------------------------------------------------------------------------------
/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/cfg/yolov3s.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=swish
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=swish
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=swish
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=swish
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=swish
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=swish
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=swish
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=swish
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=swish
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=swish
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=swish
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=swish
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=swish
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=swish
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=swish
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=swish
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=swish
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=swish
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=swish
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=swish
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=swish
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=swish
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=swish
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=swish
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=swish
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=swish
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=swish
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=swish
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=swish
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=swish
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=swish
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=swish
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=swish
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=swish
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=swish
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=swish
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=swish
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=swish
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=swish
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=swish
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=swish
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=swish
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=swish
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=swish
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=swish
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=swish
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=swish
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=swish
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=swish
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=swish
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=swish
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=swish
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=swish
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=swish
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=swish
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=swish
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=swish
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=swish
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=swish
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=swish
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=swish
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=swish
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=swish
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=swish
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=swish
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=swish
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=swish
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=swish
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=swish
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=swish
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=swish
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=swish
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=swish
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import threading
  4 | from sys import platform
  5 | from models import *
  6 | from project.datasets import *
  7 | from project.utils import *
  8 | from pydub import AudioSegment
  9 |  
 10 | from pydub.playback import play
 11 | 
 12 | def show(weights, im0):
 13 |     cv2.imshow(weights, im0)
 14 | def playsound():
 15 |     song = AudioSegment.from_wav('mask_on.wav')
 16 |     play(song) 
 17 | def detect(
 18 |         cfg,
 19 |         data_cfg,
 20 |         weights,
 21 |         images='data/samples',  # input folder
 22 |         output='output',  # output folder
 23 |         fourcc='mp4v',
 24 |         img_size=416,
 25 |         conf_thres=0.5,
 26 |         nms_thres=0.5,
 27 |         save_txt=False,
 28 |         save_images=True,
 29 |         webcam=True
 30 | ):
 31 |     device = torch_utils.select_device()
 32 |     if os.path.exists(output):
 33 |         shutil.rmtree(output)  # delete output folder
 34 |     os.makedirs(output)  # make new output folder
 35 | 
 36 |     # Initialize model
 37 |     if ONNX_EXPORT:
 38 |         s = (320, 192)  # onnx model image size (height, width)
 39 |         model = Darknet(cfg, s)
 40 |     else:
 41 |         model = Darknet(cfg, img_size)
 42 | 
 43 |     # Load weights
 44 |     if weights.endswith('.pt'):  # pytorch format
 45 |         model.load_state_dict(torch.load(weights, map_location=device)['model'])
 46 |     else:  # darknet format
 47 |         _ = load_darknet_weights(model, weights)
 48 | 
 49 |     # Fuse Conv2d + BatchNorm2d layers
 50 |     model.fuse()
 51 | 
 52 |     # Eval mode
 53 |     model.to(device).eval()
 54 | 
 55 |     if ONNX_EXPORT:
 56 |         img = torch.zeros((1, 3, s[0], s[1]))
 57 |         torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
 58 |         return
 59 | 
 60 |     # Set Dataloader
 61 |     vid_path, vid_writer = None, None
 62 |     if webcam:
 63 |         save_images = False
 64 |         dataloader = LoadWebcam(img_size=img_size)
 65 |     else:
 66 |         dataloader = LoadImages(images, img_size=img_size)
 67 | 
 68 |     # Get classes and colors
 69 |     classes = load_classes(parse_data_cfg(data_cfg)['names'])
 70 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
 71 |     res = ""
 72 |     times = 0
 73 |     for i, (path, img, im0, vid_cap) in enumerate(dataloader):
 74 |         t = time.time()
 75 |         save_path = str(Path(output) / Path(path).name)
 76 | 
 77 |         # Get detections
 78 |         img = torch.from_numpy(img).unsqueeze(0).to(device)
 79 |         pred, _ = model(img)
 80 |         det = non_max_suppression(pred, conf_thres, nms_thres)[0]
 81 |         if det is not None and len(det) > 0:
 82 |             # Rescale boxes from 416 to true image size
 83 |             det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
 84 | 
 85 |             # Print results to screen
 86 |             print('%gx%g ' % img.shape[2:], end='')  # print image size
 87 |             for c in det[:, -1].unique():
 88 |                 n = (det[:, -1] == c).sum()
 89 |                 print('%g %ss' % (n, classes[int(c)]), end=', ')
 90 | 
 91 |             # Draw bounding boxes and labels of detections
 92 |             for *xyxy, conf, cls_conf, cls in det:
 93 |                 if save_txt:  # Write to file
 94 |                     with open(save_path + '.txt', 'a') as file:
 95 |                         file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
 96 | 
 97 |                 # Add bbox to the image
 98 |                 label = '%s %.2f' % (classes[int(cls)], conf)
 99 |                 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
100 |                 res = classes[int(cls)]
101 | 
102 |         print('Done. (%.3fs)' % (time.time() - t))
103 |         if webcam:
104 |             show(weights, im0)
105 |             if res == "no_mask" and (times%50) == 0:
106 |                 threading.Thread(target=playsound).start()
107 |         if save_images:  # Save image with detections
108 |             if dataloader.mode == 'images':
109 |                 cv2.imwrite(save_path, im0)
110 |             else:
111 |                 if vid_path != save_path:  # new video
112 |                     vid_path = save_path
113 |                     if isinstance(vid_writer, cv2.VideoWriter):
114 |                         vid_writer.release()  # release previous video writer
115 | 
116 |                     fps = vid_cap.get(cv2.CAP_PROP_FPS)
117 |                     width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
118 |                     height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
119 |                     vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (width, height))
120 |                 vid_writer.write(im0)
121 |         times += 1
122 | 
123 |     if save_images:
124 |         print('Results saved to %s' % os.getcwd() + os.sep + output)
125 |         if platform == 'darwin':  # macos
126 |             os.system('open ' + output + ' ' + save_path)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     parser = argparse.ArgumentParser()
131 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
132 |     parser.add_argument('--data-cfg', type=str, default='data/coco.data', help='coco.data file path')
133 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
134 |     parser.add_argument('--images', type=str, default='data/samples', help='path to images')
135 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
136 |     parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
137 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
138 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='specifies the fourcc code for output video encoding (make sure ffmpeg supports specified fourcc codec)')
139 |     parser.add_argument('--output', type=str, default='output',help='specifies the output path for images and videos')
140 |     opt = parser.parse_args()
141 |     print(opt)
142 | 
143 |     with torch.no_grad():
144 |         detect(
145 |             opt.cfg,
146 |             opt.data_cfg,
147 |             opt.weights,
148 |             images=opt.images,
149 |             img_size=opt.img_size,
150 |             conf_thres=opt.conf_thres,
151 |             nms_thres=opt.nms_thres,
152 |             fourcc=opt.fourcc,
153 |             output=opt.output
154 |         )
155 | 


--------------------------------------------------------------------------------
/makeMain.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 7,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import os  \n",
10 |     "import random  \n",
11 |     "  \n",
12 |     "trainval_percent = 0.8\n",
13 |     "train_percent = 0.8  \n",
14 |     "xmlfilepath = 'Annotations'  \n",
15 |     "txtsavepath = 'ImageSets\\Main'  \n",
16 |     "total_xml = os.listdir(xmlfilepath)  \n",
17 |     "  \n",
18 |     "num=len(total_xml)  \n",
19 |     "list=range(num)  \n",
20 |     "tv=int(num*trainval_percent)  \n",
21 |     "tr=int(tv*train_percent)  \n",
22 |     "trainval= random.sample(list,tv)  \n",
23 |     "train=random.sample(trainval,tr)  \n",
24 |     "  \n",
25 |     "ftrainval = open('ImageSets/Main/trainval.txt', 'w')  \n",
26 |     "ftest = open('ImageSets/Main/test.txt', 'w')  \n",
27 |     "ftrain = open('ImageSets/Main/train.txt', 'w')  \n",
28 |     "fval = open('ImageSets/Main/val.txt', 'w')  \n",
29 |     "  \n",
30 |     "for i  in list:  \n",
31 |     "    name=total_xml[i][:-4]+'\\n'  \n",
32 |     "    if i in trainval:  \n",
33 |     "        ftrainval.write(name)  \n",
34 |     "        if i in train:  \n",
35 |     "            ftrain.write(name)  \n",
36 |     "        else:  \n",
37 |     "            fval.write(name)  \n",
38 |     "    else:  \n",
39 |     "        ftest.write(name)  \n",
40 |     "  \n",
41 |     "ftrainval.close()  \n",
42 |     "ftrain.close()  \n",
43 |     "fval.close()  \n",
44 |     "ftest.close()"
45 |    ]
46 |   },
47 |   {
48 |    "cell_type": "code",
49 |    "execution_count": null,
50 |    "metadata": {},
51 |    "outputs": [],
52 |    "source": []
53 |   }
54 |  ],
55 |  "metadata": {
56 |   "kernelspec": {
57 |    "display_name": "Python 3",
58 |    "language": "python",
59 |    "name": "python3"
60 |   },
61 |   "language_info": {
62 |    "codemirror_mode": {
63 |     "name": "ipython",
64 |     "version": 3
65 |    },
66 |    "file_extension": ".py",
67 |    "mimetype": "text/x-python",
68 |    "name": "python",
69 |    "nbconvert_exporter": "python",
70 |    "pygments_lexer": "ipython3",
71 |    "version": "3.7.0"
72 |   }
73 |  },
74 |  "nbformat": 4,
75 |  "nbformat_minor": 2
76 | }
77 | 


--------------------------------------------------------------------------------
/mask_on.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhengmingzhang/mask-detection/ff8a57b81ced6bc3fa6c1ae01f3b08cf1cb23e60/mask_on.wav


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch.nn.functional as F
  3 | from project.parse_config import *
  4 | from project.utils import *
  5 | 
  6 | ONNX_EXPORT = False
  7 | 
  8 | 
  9 | def create_modules(module_defs):
 10 |     """
 11 |     Constructs module list of layer blocks from module configuration in module_defs
 12 |     """
 13 |     hyperparams = module_defs.pop(0)
 14 |     output_filters = [int(hyperparams['channels'])]
 15 |     module_list = nn.ModuleList()
 16 |     yolo_layer_count = 0
 17 |     for i, module_def in enumerate(module_defs):
 18 |         modules = nn.Sequential()
 19 | 
 20 |         if module_def['type'] == 'convolutional':
 21 |             bn = int(module_def['batch_normalize'])
 22 |             filters = int(module_def['filters'])
 23 |             kernel_size = int(module_def['size'])
 24 |             pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
 25 |             modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
 26 |                                                         out_channels=filters,
 27 |                                                         kernel_size=kernel_size,
 28 |                                                         stride=int(module_def['stride']),
 29 |                                                         padding=pad,
 30 |                                                         bias=not bn))
 31 |             if bn:
 32 |                 modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
 33 |             if module_def['activation'] == 'leaky':
 34 |                 modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1, inplace=True))
 35 | 
 36 |         elif module_def['type'] == 'maxpool':
 37 |             kernel_size = int(module_def['size'])
 38 |             stride = int(module_def['stride'])
 39 |             if kernel_size == 2 and stride == 1:
 40 |                 modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1)))
 41 |             maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
 42 |             modules.add_module('maxpool_%d' % i, maxpool)
 43 | 
 44 |         elif module_def['type'] == 'upsample':
 45 |             # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')  # WARNING: deprecated
 46 |             upsample = Upsample(scale_factor=int(module_def['stride']))
 47 |             modules.add_module('upsample_%d' % i, upsample)
 48 | 
 49 |         elif module_def['type'] == 'route':
 50 |             layers = [int(x) for x in module_def['layers'].split(',')]
 51 |             filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
 52 |             modules.add_module('route_%d' % i, EmptyLayer())
 53 | 
 54 |         elif module_def['type'] == 'shortcut':
 55 |             filters = output_filters[int(module_def['from'])]
 56 |             modules.add_module('shortcut_%d' % i, EmptyLayer())
 57 | 
 58 |         elif module_def['type'] == 'yolo':
 59 |             anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
 60 |             # Extract anchors
 61 |             anchors = [float(x) for x in module_def['anchors'].split(',')]
 62 |             anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
 63 |             anchors = [anchors[i] for i in anchor_idxs]
 64 |             nc = int(module_def['classes'])  # number of classes
 65 |             img_size = hyperparams['height']
 66 |             # Define detection layer
 67 |             yolo_layer = YOLOLayer(anchors, nc, img_size, yolo_layer_count, cfg=hyperparams['cfg'])
 68 |             modules.add_module('yolo_%d' % i, yolo_layer)
 69 |             yolo_layer_count += 1
 70 | 
 71 |         # Register module list and number of output filters
 72 |         module_list.append(modules)
 73 |         output_filters.append(filters)
 74 | 
 75 |     return hyperparams, module_list
 76 | 
 77 | 
 78 | class EmptyLayer(nn.Module):
 79 |     """Placeholder for 'route' and 'shortcut' layers"""
 80 | 
 81 |     def __init__(self):
 82 |         super(EmptyLayer, self).__init__()
 83 | 
 84 |     def forward(self, x):
 85 |         return x
 86 | 
 87 | 
 88 | class Upsample(nn.Module):
 89 |     # Custom Upsample layer (nn.Upsample gives deprecated warning message)
 90 | 
 91 |     def __init__(self, scale_factor=1, mode='nearest'):
 92 |         super(Upsample, self).__init__()
 93 |         self.scale_factor = scale_factor
 94 |         self.mode = mode
 95 | 
 96 |     def forward(self, x):
 97 |         return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
 98 | 
 99 | 
100 | class YOLOLayer(nn.Module):
101 |     def __init__(self, anchors, nc, img_size, yolo_layer, cfg):
102 |         super(YOLOLayer, self).__init__()
103 | 
104 |         self.anchors = torch.Tensor(anchors)
105 |         self.na = len(anchors)  # number of anchors (3)
106 |         self.nc = nc  # number of classes (80)
107 |         self.nx = 0  # initialize number of x gridpoints
108 |         self.ny = 0  # initialize number of y gridpoints
109 | 
110 |         if ONNX_EXPORT:  # grids must be computed in __init__
111 |             stride = [32, 16, 8][yolo_layer]  # stride of this layer
112 |             nx = int(img_size[1] / stride)  # number x grid points
113 |             ny = int(img_size[0] / stride)  # number y grid points
114 |             create_grids(self, max(img_size), (nx, ny))
115 | 
116 |     def forward(self, p, img_size, var=None):
117 |         if ONNX_EXPORT:
118 |             bs = 1  # batch size
119 |         else:
120 |             bs, ny, nx = p.shape[0], p.shape[-2], p.shape[-1]
121 |             if (self.nx, self.ny) != (nx, ny):
122 |                 create_grids(self, img_size, (nx, ny), p.device)
123 | 
124 |         # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85)  # (bs, anchors, grid, grid, classes + xywh)
125 |         p = p.view(bs, self.na, self.nc + 5, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous()  # prediction
126 | 
127 |         if self.training:
128 |             return p
129 | 
130 |         elif ONNX_EXPORT:
131 |             # Constants CAN NOT BE BROADCAST, ensure correct shape!
132 |             ngu = self.ng.repeat((1, self.na * self.nx * self.ny, 1))
133 |             grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2))
134 |             anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view((1, -1, 2)) / ngu
135 | 
136 |             # p = p.view(-1, 5 + self.nc)
137 |             # xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0]  # x, y
138 |             # wh = torch.exp(p[..., 2:4]) * anchor_wh[0]  # width, height
139 |             # p_conf = torch.sigmoid(p[:, 4:5])  # Conf
140 |             # p_cls = F.softmax(p[:, 5:85], 1) * p_conf  # SSD-like conf
141 |             # return torch.cat((xy / ngu[0], wh, p_conf, p_cls), 1).t()
142 | 
143 |             p = p.view(1, -1, 5 + self.nc)
144 |             xy = torch.sigmoid(p[..., 0:2]) + grid_xy  # x, y
145 |             wh = torch.exp(p[..., 2:4]) * anchor_wh  # width, height
146 |             p_conf = torch.sigmoid(p[..., 4:5])  # Conf
147 |             p_cls = p[..., 5:5 + self.nc]
148 |             # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
149 |             # p_cls = F.softmax(p_cls, 2) * p_conf  # SSD-like conf
150 |             p_cls = torch.exp(p_cls).permute((2, 1, 0))
151 |             p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0))  # F.softmax() equivalent
152 |             p_cls = p_cls.permute(2, 1, 0)
153 |             return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t()
154 | 
155 |         else:  # inference
156 |             io = p.clone()  # inference output
157 |             io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy  # xy
158 |             io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method
159 |             # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh  # wh power method
160 |             io[..., 4:] = torch.sigmoid(io[..., 4:])  # p_conf, p_cls
161 |             # io[..., 5:] = F.softmax(io[..., 5:], dim=4)  # p_cls
162 |             io[..., :4] *= self.stride
163 |             if self.nc == 1:
164 |                 io[..., 5] = 1  # single-class model https://github.com/ultralytics/yolov3/issues/235
165 | 
166 |             # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
167 |             return io.view(bs, -1, 5 + self.nc), p
168 | 
169 | 
170 | class Darknet(nn.Module):
171 |     """YOLOv3 object detection model"""
172 | 
173 |     def __init__(self, cfg, img_size=(416, 416)):
174 |         super(Darknet, self).__init__()
175 | 
176 |         self.module_defs = parse_model_cfg(cfg)
177 |         self.module_defs[0]['cfg'] = cfg
178 |         self.module_defs[0]['height'] = img_size
179 |         self.hyperparams, self.module_list = create_modules(self.module_defs)
180 |         self.yolo_layers = get_yolo_layers(self)
181 | 
182 |         # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
183 |         self.version = np.array([0, 2, 5], dtype=np.int32)  # (int32) version info: major, minor, revision
184 |         self.seen = np.array([0], dtype=np.int64)  # (int64) number of images seen during training
185 | 
186 |     def forward(self, x, var=None):
187 |         img_size = max(x.shape[-2:])
188 |         layer_outputs = []
189 |         output = []
190 | 
191 |         for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
192 |             mtype = module_def['type']
193 |             if mtype in ['convolutional', 'upsample', 'maxpool']:
194 |                 x = module(x)
195 |             elif mtype == 'route':
196 |                 layer_i = [int(x) for x in module_def['layers'].split(',')]
197 |                 if len(layer_i) == 1:
198 |                     x = layer_outputs[layer_i[0]]
199 |                 else:
200 |                     x = torch.cat([layer_outputs[i] for i in layer_i], 1)
201 |             elif mtype == 'shortcut':
202 |                 layer_i = int(module_def['from'])
203 |                 x = layer_outputs[-1] + layer_outputs[layer_i]
204 |             elif mtype == 'yolo':
205 |                 x = module[0](x, img_size)
206 |                 output.append(x)
207 |             layer_outputs.append(x)
208 | 
209 |         if self.training:
210 |             return output
211 |         elif ONNX_EXPORT:
212 |             output = torch.cat(output, 1)  # cat 3 layers 85 x (507, 2028, 8112) to 85 x 10647
213 |             nc = self.module_list[self.yolo_layers[0]][0].nc  # number of classes
214 |             return output[5:5 + nc].t(), output[:4].t()  # ONNX scores, boxes
215 |         else:
216 |             io, p = list(zip(*output))  # inference output, training output
217 |             return torch.cat(io, 1), p
218 | 
219 |     def fuse(self):
220 |         # Fuse Conv2d + BatchNorm2d layers throughout model
221 |         fused_list = nn.ModuleList()
222 |         for a in list(self.children())[0]:
223 |             for i, b in enumerate(a):
224 |                 if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
225 |                     # fuse this bn layer with the previous conv2d layer
226 |                     conv = a[i - 1]
227 |                     fused = torch_utils.fuse_conv_and_bn(conv, b)
228 |                     a = nn.Sequential(fused, *list(a.children())[i + 1:])
229 |                     break
230 |             fused_list.append(a)
231 |         self.module_list = fused_list
232 |         # model_info(self)  # yolov3-spp reduced from 225 to 152 layers
233 | 
234 | 
235 | def get_yolo_layers(model):
236 |     a = [module_def['type'] == 'yolo' for module_def in model.module_defs]
237 |     return [i for i, x in enumerate(a) if x]  # [82, 94, 106] for yolov3
238 | 
239 | 
240 | def create_grids(self, img_size=416, ng=(13, 13), device='cpu'):
241 |     nx, ny = ng  # x and y grid size
242 |     self.img_size = img_size
243 |     self.stride = img_size / max(ng)
244 | 
245 |     # build xy offsets
246 |     yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
247 |     self.grid_xy = torch.stack((xv, yv), 2).to(device).float().view((1, 1, ny, nx, 2))
248 | 
249 |     # build wh gains
250 |     self.anchor_vec = self.anchors.to(device) / self.stride
251 |     self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device)
252 |     self.ng = torch.Tensor(ng).to(device)
253 |     self.nx = nx
254 |     self.ny = ny
255 | 
256 | 
257 | def load_darknet_weights(self, weights, cutoff=-1):
258 |     # Parses and loads the weights stored in 'weights'
259 |     # cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved)
260 |     weights_file = weights.split(os.sep)[-1]
261 | 
262 |     # Try to download weights if not available locally
263 |     if not os.path.isfile(weights):
264 |         try:
265 |             os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -O ' + weights)
266 |         except IOError:
267 |             print(weights + ' not found.\nTry https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI')
268 | 
269 |     # Establish cutoffs
270 |     if weights_file == 'darknet53.conv.74':
271 |         cutoff = 75
272 |     elif weights_file == 'yolov3-tiny.conv.15':
273 |         cutoff = 15
274 | 
275 |     # Read weights file
276 |     with open(weights, 'rb') as f:
277 |         # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
278 |         self.version = np.fromfile(f, dtype=np.int32, count=3)  # (int32) version info: major, minor, revision
279 |         self.seen = np.fromfile(f, dtype=np.int64, count=1)  # (int64) number of images seen during training
280 | 
281 |         weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
282 | 
283 |     ptr = 0
284 |     for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
285 |         if module_def['type'] == 'convolutional':
286 |             conv_layer = module[0]
287 |             if module_def['batch_normalize']:
288 |                 # Load BN bias, weights, running mean and running variance
289 |                 bn_layer = module[1]
290 |                 num_b = bn_layer.bias.numel()  # Number of biases
291 |                 # Bias
292 |                 bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
293 |                 bn_layer.bias.data.copy_(bn_b)
294 |                 ptr += num_b
295 |                 # Weight
296 |                 bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
297 |                 bn_layer.weight.data.copy_(bn_w)
298 |                 ptr += num_b
299 |                 # Running Mean
300 |                 bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
301 |                 bn_layer.running_mean.data.copy_(bn_rm)
302 |                 ptr += num_b
303 |                 # Running Var
304 |                 bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
305 |                 bn_layer.running_var.data.copy_(bn_rv)
306 |                 ptr += num_b
307 |             else:
308 |                 # Load conv. bias
309 |                 num_b = conv_layer.bias.numel()
310 |                 conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
311 |                 conv_layer.bias.data.copy_(conv_b)
312 |                 ptr += num_b
313 |             # Load conv. weights
314 |             num_w = conv_layer.weight.numel()
315 |             conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
316 |             conv_layer.weight.data.copy_(conv_w)
317 |             ptr += num_w
318 | 
319 |     return cutoff
320 | 
321 | 
322 | def save_weights(self, path='model.weights', cutoff=-1):
323 |     # Converts a PyTorch model to Darket format (*.pt to *.weights)
324 |     # Note: Does not work if model.fuse() is applied
325 |     with open(path, 'wb') as f:
326 |         # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
327 |         self.version.tofile(f)  # (int32) version info: major, minor, revision
328 |         self.seen.tofile(f)  # (int64) number of images seen during training
329 | 
330 |         # Iterate through layers
331 |         for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
332 |             if module_def['type'] == 'convolutional':
333 |                 conv_layer = module[0]
334 |                 # If batch norm, load bn first
335 |                 if module_def['batch_normalize']:
336 |                     bn_layer = module[1]
337 |                     bn_layer.bias.data.cpu().numpy().tofile(f)
338 |                     bn_layer.weight.data.cpu().numpy().tofile(f)
339 |                     bn_layer.running_mean.data.cpu().numpy().tofile(f)
340 |                     bn_layer.running_var.data.cpu().numpy().tofile(f)
341 |                 # Load conv bias
342 |                 else:
343 |                     conv_layer.bias.data.cpu().numpy().tofile(f)
344 |                 # Load conv weights
345 |                 conv_layer.weight.data.cpu().numpy().tofile(f)
346 | 
347 | 
348 | def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'):
349 |     # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
350 |     # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
351 | 
352 |     # Initialize model
353 |     model = Darknet(cfg)
354 | 
355 |     # Load weights and save
356 |     if weights.endswith('.pt'):  # if PyTorch format
357 |         model.load_state_dict(torch.load(weights, map_location='cpu')['model'])
358 |         save_weights(model, path='converted.weights', cutoff=-1)
359 |         print("Success: converted '%s' to 'converted.weights'" % weights)
360 | 
361 |     elif weights.endswith('.weights'):  # darknet format
362 |         _ = load_darknet_weights(model, weights)
363 |         chkpt = {'epoch': -1, 'best_loss': None, 'model': model.state_dict(), 'optimizer': None}
364 |         torch.save(chkpt, 'converted.pt')
365 |         print("Success: converted '%s' to 'converted.pt'" % weights)
366 | 
367 |     else:
368 |         print('Error: extension not supported.')
369 | 


--------------------------------------------------------------------------------
/project/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import math
  3 | import os
  4 | import random
  5 | import shutil
  6 | from pathlib import Path
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | import torch
 11 | from torch.utils.data import Dataset
 12 | from tqdm import tqdm
 13 | 
 14 | from project.utils import xyxy2xywh
 15 | 
 16 | 
 17 | class LoadImages:  # for inference
 18 |     def __init__(self, path, img_size=416):
 19 |         self.height = img_size
 20 |         img_formats = ['.jpg', '.jpeg', '.png', '.tif']
 21 |         vid_formats = ['.mov', '.avi', '.mp4']
 22 | 
 23 |         files = []
 24 |         if os.path.isdir(path):
 25 |             files = sorted(glob.glob('%s/*.*' % path))
 26 |         elif os.path.isfile(path):
 27 |             files = [path]
 28 | 
 29 |         images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
 30 |         videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
 31 |         nI, nV = len(images), len(videos)
 32 | 
 33 |         self.files = images + videos
 34 |         self.nF = nI + nV  # number of files
 35 |         self.video_flag = [False] * nI + [True] * nV
 36 |         self.mode = 'images'
 37 |         if any(videos):
 38 |             self.new_video(videos[0])  # new video
 39 |         else:
 40 |             self.cap = None
 41 |         assert self.nF > 0, 'No images or videos found in ' + path
 42 | 
 43 |     def __iter__(self):
 44 |         self.count = 0
 45 |         return self
 46 | 
 47 |     def __next__(self):
 48 |         if self.count == self.nF:
 49 |             raise StopIteration
 50 |         path = self.files[self.count]
 51 | 
 52 |         if self.video_flag[self.count]:
 53 |             # Read video
 54 |             self.mode = 'video'
 55 |             ret_val, img0 = self.cap.read()
 56 |             if not ret_val:
 57 |                 self.count += 1
 58 |                 self.cap.release()
 59 |                 if self.count == self.nF:  # last video
 60 |                     raise StopIteration
 61 |                 else:
 62 |                     path = self.files[self.count]
 63 |                     self.new_video(path)
 64 |                     ret_val, img0 = self.cap.read()
 65 | 
 66 |             self.frame += 1
 67 |             print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
 68 | 
 69 |         else:
 70 |             # Read image
 71 |             self.count += 1
 72 |             img0 = cv2.imread(path)  # BGR
 73 |             assert img0 is not None, 'File Not Found ' + path
 74 |             print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
 75 | 
 76 |         # Padded resize
 77 |         img, _, _, _ = letterbox(img0, new_shape=self.height)
 78 | 
 79 |         # Normalize RGB
 80 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
 81 |         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
 82 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 83 | 
 84 |         # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
 85 |         return path, img, img0, self.cap
 86 | 
 87 |     def new_video(self, path):
 88 |         self.frame = 0
 89 |         self.cap = cv2.VideoCapture(path)
 90 |         self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 91 | 
 92 |     def __len__(self):
 93 |         return self.nF  # number of files
 94 | 
 95 | 
 96 | class LoadWebcam:  # for inference
 97 |     def __init__(self, img_size=416):
 98 |         self.cam = cv2.VideoCapture(0)
 99 |         self.height = img_size
100 | 
101 |     def __iter__(self):
102 |         self.count = -1
103 |         return self
104 | 
105 |     def __next__(self):
106 |         self.count += 1
107 |         if cv2.waitKey(1) == 27:  # esc to quit
108 |             cv2.destroyAllWindows()
109 |             raise StopIteration
110 | 
111 |         # Read image
112 |         ret_val, img0 = self.cam.read()
113 |         assert ret_val, 'Webcam Error'
114 |         img_path = 'webcam_%g.jpg' % self.count
115 |         img0 = cv2.flip(img0, 1)  # flip left-right
116 |         print('webcam %g: ' % self.count, end='')
117 | 
118 |         # Padded resize
119 |         img, _, _, _ = letterbox(img0, new_shape=self.height)
120 | 
121 |         # Normalize RGB
122 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
123 |         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
124 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
125 | 
126 |         return img_path, img, img0, None
127 | 
128 |     def __len__(self):
129 |         return 0
130 | 
131 | 
132 | class LoadImagesAndLabels(Dataset):  # for training/testing
133 |     def __init__(self, path, img_size=416, batch_size=16, augment=False, rect=True, image_weights=False):
134 |         with open(path, 'r') as f:
135 |             img_files = f.read().splitlines()
136 |             self.img_files = list(filter(lambda x: len(x) > 0, img_files))
137 | 
138 |         n = len(self.img_files)
139 |         bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
140 |         nb = bi[-1] + 1  # number of batches
141 |         assert n > 0, 'No images found in %s' % path
142 | 
143 |         self.n = n
144 |         self.batch = bi  # batch index of image
145 |         self.img_size = img_size
146 |         self.augment = augment
147 |         self.image_weights = image_weights
148 |         self.rect = False if image_weights else rect
149 |         self.label_files = [x.replace('images', 'labels').
150 |                                 replace('.jpeg', '.txt').
151 |                                 replace('.jpg', '.txt').
152 |                                 replace('.bmp', '.txt').
153 |                                 replace('.png', '.txt') for x in self.img_files]
154 | 
155 |         # Rectangular Training  https://github.com/ultralytics/yolov3/issues/232
156 |         if self.rect:
157 |             from PIL import Image
158 | 
159 |             # Read image shapes
160 |             sp = 'data' + os.sep + path.replace('.txt', '.shapes').split(os.sep)[-1]  # shapefile path
161 |             if os.path.exists(sp):  # read existing shapefile
162 |                 with open(sp, 'r') as f:
163 |                     s = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
164 |                 assert len(s) == n, 'Shapefile out of sync, please delete %s and rerun' % sp
165 |             else:  # no shapefile, so read shape using PIL and write shapefile for next time (faster)
166 |                 s = np.array([Image.open(f).size for f in tqdm(self.img_files, desc='Reading image shapes')])
167 |                 np.savetxt(sp, s, fmt='%g')
168 | 
169 |             # Sort by aspect ratio
170 |             ar = s[:, 1] / s[:, 0]  # aspect ratio
171 |             i = ar.argsort()
172 |             ar = ar[i]
173 |             self.img_files = [self.img_files[i] for i in i]
174 |             self.label_files = [self.label_files[i] for i in i]
175 | 
176 |             # Set training image shapes
177 |             shapes = [[1, 1]] * nb
178 |             for i in range(nb):
179 |                 ari = ar[bi == i]
180 |                 mini, maxi = ari.min(), ari.max()
181 |                 if maxi < 1:
182 |                     shapes[i] = [maxi, 1]
183 |                 elif mini > 1:
184 |                     shapes[i] = [1, 1 / mini]
185 | 
186 |             self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32
187 | 
188 |         # Preload labels (required for weighted CE training)
189 |         self.imgs = [None] * n
190 |         self.labels = [np.zeros((0, 5))] * n
191 |         iter = tqdm(self.label_files, desc='Reading labels') if n > 1000 else self.label_files
192 |         for i, file in enumerate(iter):
193 |             try:
194 |                 with open(file, 'r') as f:
195 |                     l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
196 |                     if l.shape[0]:
197 |                         assert l.shape[1] == 5, '> 5 label columns: %s' % file
198 |                         assert (l >= 0).all(), 'negative labels: %s' % file
199 |                         assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
200 |                         self.labels[i] = l
201 |             except:
202 |                 pass  # print('Warning: missing labels for %s' % self.img_files[i])  # missing label file
203 |         assert len(np.concatenate(self.labels, 0)) > 0, 'No labels found. Incorrect label paths provided.'
204 | 
205 |     def __len__(self):
206 |         return len(self.img_files)
207 | 
208 |     # def __iter__(self):
209 |     #     self.count = -1
210 |     #     print('ran dataset iter')
211 |     #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
212 |     #     return self
213 | 
214 |     def __getitem__(self, index):
215 |         if self.image_weights:
216 |             index = self.indices[index]
217 | 
218 |         img_path = self.img_files[index]
219 |         label_path = self.label_files[index]
220 | 
221 |         # Load image
222 |         img = self.imgs[index]
223 |         if img is None:
224 |             img = cv2.imread(img_path)  # BGR
225 |             assert img is not None, 'File Not Found ' + img_path
226 |             if self.n < 1001:
227 |                 self.imgs[index] = img  # cache image into memory
228 | 
229 |         # Augment colorspace
230 |         augment_hsv = True
231 |         if self.augment and augment_hsv:
232 |             # SV augmentation by 50%
233 |             fraction = 0.50  # must be < 1.0
234 |             img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
235 |             S = img_hsv[:, :, 1].astype(np.float32)  # saturation
236 |             V = img_hsv[:, :, 2].astype(np.float32)  # value
237 | 
238 |             a = (random.random() * 2 - 1) * fraction + 1
239 |             b = (random.random() * 2 - 1) * fraction + 1
240 |             S *= a
241 |             V *= b
242 | 
243 |             img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
244 |             img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
245 |             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
246 | 
247 |         # Letterbox
248 |         h, w, _ = img.shape
249 |         if self.rect:
250 |             shape = self.batch_shapes[self.batch[index]]
251 |             img, ratio, padw, padh = letterbox(img, new_shape=shape, mode='rect')
252 |         else:
253 |             shape = self.img_size
254 |             img, ratio, padw, padh = letterbox(img, new_shape=shape, mode='square')
255 | 
256 |         # Load labels
257 |         labels = []
258 |         if os.path.isfile(label_path):
259 |             # with open(label_path, 'r') as f:
260 |             #     x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
261 |             x = self.labels[index]
262 |             if x.size > 0:
263 |                 # Normalized xywh to pixel xyxy format
264 |                 labels = x.copy()
265 |                 labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
266 |                 labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
267 |                 labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
268 |                 labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
269 | 
270 |         # Augment image and labels
271 |         if self.augment:
272 |             img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))
273 | 
274 |         nL = len(labels)  # number of labels
275 |         if nL:
276 |             # convert xyxy to xywh
277 |             labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
278 | 
279 |             # Normalize coordinates 0 - 1
280 |             labels[:, [2, 4]] /= img.shape[0]  # height
281 |             labels[:, [1, 3]] /= img.shape[1]  # width
282 | 
283 |         if self.augment:
284 |             # random left-right flip
285 |             lr_flip = True
286 |             if lr_flip and random.random() > 0.5:
287 |                 img = np.fliplr(img)
288 |                 if nL:
289 |                     labels[:, 1] = 1 - labels[:, 1]
290 | 
291 |             # random up-down flip
292 |             ud_flip = False
293 |             if ud_flip and random.random() > 0.5:
294 |                 img = np.flipud(img)
295 |                 if nL:
296 |                     labels[:, 2] = 1 - labels[:, 2]
297 | 
298 |         labels_out = torch.zeros((nL, 6))
299 |         if nL:
300 |             labels_out[:, 1:] = torch.from_numpy(labels)
301 | 
302 |         # Normalize
303 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
304 |         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
305 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
306 | 
307 |         return torch.from_numpy(img), labels_out, img_path, (h, w)
308 | 
309 |     @staticmethod
310 |     def collate_fn(batch):
311 |         img, label, path, hw = list(zip(*batch))  # transposed
312 |         for i, l in enumerate(label):
313 |             l[:, 0] = i  # add target image index for build_targets()
314 |         return torch.stack(img, 0), torch.cat(label, 0), path, hw
315 | 
316 | 
317 | def letterbox(img, new_shape=416, color=(127.5, 127.5, 127.5), mode='auto'):
318 |     # Resize a rectangular image to a 32 pixel multiple rectangle
319 |     # https://github.com/ultralytics/yolov3/issues/232
320 |     shape = img.shape[:2]  # current shape [height, width]
321 |     if isinstance(new_shape, int):
322 |         ratio = float(new_shape) / max(shape)
323 |     else:
324 |         ratio = max(new_shape) / max(shape)  # ratio  = new / old
325 |     new_unpad = (int(round(shape[1] * ratio)), int(round(shape[0] * ratio)))
326 | 
327 |     # Compute padding https://github.com/ultralytics/yolov3/issues/232
328 |     if mode is 'auto':  # minimum rectangle
329 |         dw = np.mod(new_shape - new_unpad[0], 32) / 2  # width padding
330 |         dh = np.mod(new_shape - new_unpad[1], 32) / 2  # height padding
331 |     elif mode is 'square':  # square
332 |         dw = (new_shape - new_unpad[0]) / 2  # width padding
333 |         dh = (new_shape - new_unpad[1]) / 2  # height padding
334 |     elif mode is 'rect':  # square
335 |         dw = (new_shape[1] - new_unpad[0]) / 2  # width padding
336 |         dh = (new_shape[0] - new_unpad[1]) / 2  # height padding
337 | 
338 |     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
339 |     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
340 |     img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)  # resized, no border
341 |     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
342 |     return img, ratio, dw, dh
343 | 
344 | 
345 | def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
346 |                   borderValue=(127.5, 127.5, 127.5)):
347 |     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
348 |     # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
349 | 
350 |     if targets is None:
351 |         targets = []
352 |     border = 0  # width of added border (optional)
353 |     height = img.shape[0] + border * 2
354 |     width = img.shape[1] + border * 2
355 | 
356 |     # Rotation and Scale
357 |     R = np.eye(3)
358 |     a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
359 |     # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
360 |     s = random.random() * (scale[1] - scale[0]) + scale[0]
361 |     R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
362 | 
363 |     # Translation
364 |     T = np.eye(3)
365 |     T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
366 |     T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
367 | 
368 |     # Shear
369 |     S = np.eye(3)
370 |     S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
371 |     S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
372 | 
373 |     M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
374 |     imw = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR,
375 |                               borderValue=borderValue)  # BGR order borderValue
376 | 
377 |     # Return warped points also
378 |     if len(targets) > 0:
379 |         n = targets.shape[0]
380 |         points = targets[:, 1:5].copy()
381 |         area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
382 | 
383 |         # warp points
384 |         xy = np.ones((n * 4, 3))
385 |         xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
386 |         xy = (xy @ M.T)[:, :2].reshape(n, 8)
387 | 
388 |         # create new boxes
389 |         x = xy[:, [0, 2, 4, 6]]
390 |         y = xy[:, [1, 3, 5, 7]]
391 |         xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
392 | 
393 |         # # apply angle-based reduction of bounding boxes
394 |         # radians = a * math.pi / 180
395 |         # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
396 |         # x = (xy[:, 2] + xy[:, 0]) / 2
397 |         # y = (xy[:, 3] + xy[:, 1]) / 2
398 |         # w = (xy[:, 2] - xy[:, 0]) * reduction
399 |         # h = (xy[:, 3] - xy[:, 1]) * reduction
400 |         # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
401 | 
402 |         # reject warped points outside of image
403 |         xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
404 |         xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
405 |         w = xy[:, 2] - xy[:, 0]
406 |         h = xy[:, 3] - xy[:, 1]
407 |         area = w * h
408 |         ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
409 |         i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
410 | 
411 |         targets = targets[i]
412 |         targets[:, 1:5] = xy[i]
413 | 
414 |     return imw, targets
415 | 
416 | 
417 | def convert_images2bmp():
418 |     # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
419 |     for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
420 |         folder = os.sep + Path(path).name
421 |         output = path.replace(folder, folder + 'bmp')
422 |         if os.path.exists(output):
423 |             shutil.rmtree(output)  # delete output folder
424 |         os.makedirs(output)  # make new output folder
425 | 
426 |         for f in tqdm(glob.glob('%s*.jpg' % path)):
427 |             save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
428 |             cv2.imwrite(save_name, cv2.imread(f))
429 | 
430 |     for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
431 |         with open(label_path, 'r') as file:
432 |             lines = file.read()
433 |         lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
434 |             '/Users/glennjocher/PycharmProjects/', '../')
435 |         with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
436 |             file.write(lines)
437 | 


--------------------------------------------------------------------------------
/project/gcp.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # New VM
  4 | rm -rf yolov3 weights coco
  5 | git clone https://github.com/ultralytics/yolov3
  6 | bash yolov3/weights/download_yolov3_weights.sh && cp -r weights yolov3
  7 | bash yolov3/data/get_coco_dataset.sh
  8 | git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
  9 | sudo shutdown
 10 | 
 11 | # Re-clone
 12 | rm -rf yolov3  # Warning: remove existing
 13 | git clone https://github.com/ultralytics/yolov3  # master
 14 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test  # branch
 15 | cp -r cocoapi/PythonAPI/pycocotools yolov3
 16 | cp -r weights yolov3 && cd yolov3
 17 | 
 18 | # Train
 19 | python3 train.py
 20 | 
 21 | # Resume
 22 | python3 train.py --resume
 23 | 
 24 | # Detect
 25 | python3 detect.py
 26 | 
 27 | # Test
 28 | python3 test.py --save-json
 29 | 
 30 | # Git pull
 31 | git pull https://github.com/ultralytics/yolov3  # master
 32 | git pull https://github.com/ultralytics/yolov3 test  # branch
 33 | 
 34 | # Test Darknet training
 35 | python3 test.py --weights ../darknet/backup/yolov3.backup
 36 | 
 37 | # Copy latest.pt TO bucket
 38 | gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics
 39 | 
 40 | # Copy latest.pt FROM bucket
 41 | gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt
 42 | wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt
 43 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
 44 | 
 45 | # Reproduce tutorials
 46 | rm results*.txt  # WARNING: removes existing results
 47 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt
 48 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt
 49 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt
 50 | #python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
 51 | python3 -c "from utils import utils; utils.plot_results()"
 52 | gsutil cp results*.txt gs://ultralytics
 53 | gsutil cp results.png gs://ultralytics
 54 | sudo shutdown
 55 | 
 56 | # Reproduce mAP
 57 | python3 test.py --save-json --img-size 608
 58 | python3 test.py --save-json --img-size 416
 59 | python3 test.py --save-json --img-size 320
 60 | sudo shutdown
 61 | 
 62 | # Unit tests
 63 | python3 detect.py  # detect 2 persons, 1 tie
 64 | python3 test.py --data data/coco_32img.data  # test mAP = 0.8
 65 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave  # train 5 epochs
 66 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave  # train 5 epochs
 67 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave  # train 5 epochs
 68 | 
 69 | # AlexyAB Darknet
 70 | gsutil cp -r gs://sm6/supermarket2 .  # dataset from bucket
 71 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74  # sudo apt install libopencv-dev && make
 72 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320  # kmeans anchor calculation
 73 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp
 74 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco
 75 | 
 76 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-spp-sm2-1cls-scalexy_variable.cfg darknet53.conv.74 -map -dont_show # train spp
 77 | gsutil cp -r backup/*5000.weights gs://sm6/weights
 78 | sudo shutdown
 79 | 
 80 | 
 81 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny
 82 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights  # resume
 83 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave  # train ultralytics
 84 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg  # test
 85 | gsutil cp -r backup/*.weights gs://sm6/weights  # weights to bucket
 86 | 
 87 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2  # test
 88 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2  # test
 89 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2  # test
 90 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2  # test
 91 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2  # test
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | # Debug/Development
 98 | python3 train.py --evolve --data data/coco_1k5k.data --epochs 30 --img-size 320
 99 | gsutil cp evolve.txt gs://ultralytics
100 | sudo shutdown
101 | 


--------------------------------------------------------------------------------
/project/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             module_defs[-1][key.rstrip()] = value.strip()
18 | 
19 |     return module_defs
20 | 
21 | 
22 | def parse_data_cfg(path):
23 |     """Parses the data configuration file"""
24 |     options = dict()
25 |     options['gpus'] = '0,1,2,3'
26 |     options['num_workers'] = '10'
27 |     with open(path, 'r') as fp:
28 |         lines = fp.readlines()
29 |     for line in lines:
30 |         line = line.strip()
31 |         if line == '' or line.startswith('#'):
32 |             continue
33 |         key, value = line.split('=')
34 |         options[key.strip()] = value.strip()
35 |     return options
36 | 


--------------------------------------------------------------------------------
/project/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def init_seeds(seed=0):
 5 |     torch.manual_seed(seed)
 6 |     torch.cuda.manual_seed(seed)
 7 |     torch.cuda.manual_seed_all(seed)
 8 | 
 9 | 
10 | def select_device(force_cpu=False):
11 |     cuda = False if force_cpu else torch.cuda.is_available()
12 |     device = torch.device('cuda:0' if cuda else 'cpu')
13 | 
14 |     if not cuda:
15 |         print('Using CPU')
16 |     if cuda:
17 |         c = 1024 ** 2  # bytes to MB
18 |         ng = torch.cuda.device_count()
19 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
20 |         print("Using CUDA device0 _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
21 |               (x[0].name, x[0].total_memory / c))
22 |         if ng > 0:
23 |             # torch.cuda.set_device(0)  # OPTIONAL: Set GPU ID
24 |             for i in range(1, ng):
25 |                 print("           device%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
26 |                       (i, x[i].name, x[i].total_memory / c))
27 | 
28 |     print('')  # skip a line
29 |     return device
30 | 
31 | 
32 | def fuse_conv_and_bn(conv, bn):
33 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
34 |     with torch.no_grad():
35 |         # init
36 |         fusedconv = torch.nn.Conv2d(
37 |             conv.in_channels,
38 |             conv.out_channels,
39 |             kernel_size=conv.kernel_size,
40 |             stride=conv.stride,
41 |             padding=conv.padding,
42 |             bias=True
43 |         )
44 | 
45 |         # prepare filters
46 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
47 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
48 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
49 | 
50 |         # prepare spatial bias
51 |         if conv.bias is not None:
52 |             b_conv = conv.bias
53 |         else:
54 |             b_conv = torch.zeros(conv.weight.size(0))
55 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
56 |         fusedconv.bias.copy_(b_conv + b_bn)
57 | 
58 |         return fusedconv
59 | 


--------------------------------------------------------------------------------
/record.py:
--------------------------------------------------------------------------------
 1 | import wave
 2 | import pyaudio
 3 | import matplotlib.pyplot as plt
 4 | import time
 5 | 
 6 | CHUNK = 1024
 7 | FORMAT = pyaudio.paInt16
 8 | CHANNELS = 2
 9 | RATE = 44100
10 | RECORD_SECONDS = 2
11 | 
12 | 
13 | def record(filename='output.wav'):
14 |     """官方录音教程
15 |     """
16 |     
17 |     p = pyaudio.PyAudio()
18 |     
19 |     stream = p.open(format=FORMAT,
20 |                     channels=CHANNELS,
21 |                     rate=RATE,
22 |                     input=True,
23 |                     frames_per_buffer=CHUNK)
24 |     
25 |     print("* recording")
26 |     
27 |     frames = []
28 | 
29 |     for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
30 |         data = stream.read(CHUNK)
31 |         frames.append(data)
32 |     
33 |     print("* done recording")
34 |     
35 |     stream.stop_stream()
36 |     stream.close()
37 |     p.terminate()
38 |     
39 |     wf = wave.open(filename, 'wb')
40 |     wf.setnchannels(CHANNELS)
41 |     wf.setsampwidth(p.get_sample_size(FORMAT))
42 |     wf.setframerate(RATE)
43 |     wf.writeframes(b''.join(frames))
44 |     wf.close()
45 | 
46 | 
47 | def multi_record(num=3):
48 |     """implement 多次录音"""
49 |     for i in range(1,num+1):
50 |         print('第{}次录音准备'.format(i))
51 |         filename = 'record_{}.wav'.format(i)
52 |         record(filename)
53 |         time.sleep(second)
54 |         _ = input('进行下一次录音？')
55 | 
56 | 
57 | def main():
58 |     record(filename='mask_on.wav')
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 
63 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip install -U -r requirements.txt
 2 | numpy
 3 | opencv-python >= 4.1
 4 | torch >= 1.4
 5 | matplotlib
 6 | pycocotools
 7 | tqdm
 8 | pillow
 9 | 
10 | # Nvidia Apex (optional) for mixed precision training --------------------------
11 | # git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
12 | 
13 | # Tensorboard (optional) pip requirements --------------------------------------
14 | # tb-nightly
15 | # future
16 | 
17 | # Conda commands (in place of pip) ---------------------------------------------
18 | # conda update -yn base -c defaults conda
19 | # conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython future
20 | # conda install -yc conda-forge scikit-image pycocotools tensorboard
21 | # conda install -yc spyder-ide spyder-line-profiler
22 | # conda install -yc pytorch pytorch torchvision
23 | # conda install -yc conda-forge protobuf numpy && pip install onnx  # https://github.com/onnx/onnx#linux-and-macos
24 | 


--------------------------------------------------------------------------------
/slurm.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | mkdir -p log
3 | now=$(date +"%Y%m%d_%H%M%S")
4 | srun --partition=Data --gres=gpu:4 --ntasks-per-node=1  --job-name=maskdetect python train.py --data data/mask.data --cfg cfg/yolov3-tiny-mask.cfg --epochs 100 2>&1|tee log/train-$now.log
5 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | 
  4 | from torch.utils.data import DataLoader
  5 | 
  6 | from models import *
  7 | from project.datasets import *
  8 | from project.utils import *
  9 | 
 10 | 
 11 | def test(
 12 |         cfg,
 13 |         data_cfg,
 14 |         weights=None,
 15 |         batch_size=16,
 16 |         img_size=416,
 17 |         iou_thres=0.5,
 18 |         conf_thres=0.001,
 19 |         nms_thres=0.5,
 20 |         save_json=False,
 21 |         model=None
 22 | ):
 23 |     if model is None:
 24 |         device = torch_utils.select_device()
 25 | 
 26 |         # Initialize model
 27 |         model = Darknet(cfg, img_size).to(device)
 28 | 
 29 |         # Load weights
 30 |         if weights.endswith('.pt'):  # pytorch format
 31 |             model.load_state_dict(torch.load(weights, map_location=device)['model'])
 32 |         else:  # darknet format
 33 |             _ = load_darknet_weights(model, weights)
 34 | 
 35 |         if torch.cuda.device_count() > 1:
 36 |             model = nn.DataParallel(model)
 37 |     else:
 38 |         device = next(model.parameters()).device  # get model device
 39 | 
 40 |     # Configure run
 41 |     data_cfg = parse_data_cfg(data_cfg)
 42 |     nc = int(data_cfg['classes'])  # number of classes
 43 |     test_path = data_cfg['valid']  # path to test images
 44 |     names = load_classes(data_cfg['names'])  # class names
 45 | 
 46 |     # Dataloader
 47 |     dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
 48 |     dataloader = DataLoader(dataset,
 49 |                             batch_size=batch_size,
 50 |                             num_workers=4,
 51 |                             pin_memory=True,
 52 |                             collate_fn=dataset.collate_fn)
 53 | 
 54 |     seen = 0
 55 |     model.eval()
 56 |     coco91class = coco80_to_coco91_class()
 57 |     print(('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1'))
 58 |     loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0.
 59 |     jdict, stats, ap, ap_class = [], [], [], []
 60 |     for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Computing mAP')):
 61 |         targets = targets.to(device)
 62 |         imgs = imgs.to(device)
 63 |         _, _, height, width = imgs.shape  # batch size, channels, height, width
 64 | 
 65 |         # Plot images with bounding boxes
 66 |         if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
 67 |             plot_images(imgs=imgs, targets=targets, fname='test_batch0.jpg')
 68 | 
 69 |         # Run model
 70 |         inf_out, train_out = model(imgs)  # inference and training outputs
 71 | 
 72 |         # Compute loss
 73 |         if hasattr(model, 'hyp'):  # if model has loss hyperparameters
 74 |             loss_i, _ = compute_loss(train_out, targets, model)
 75 |             loss += loss_i.item()
 76 | 
 77 |         # Run NMS
 78 |         output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
 79 | 
 80 |         # Statistics per image
 81 |         for si, pred in enumerate(output):
 82 |             labels = targets[targets[:, 0] == si, 1:]
 83 |             nl = len(labels)
 84 |             tcls = labels[:, 0].tolist() if nl else []  # target class
 85 |             seen += 1
 86 | 
 87 |             if pred is None:
 88 |                 if nl:
 89 |                     stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
 90 |                 continue
 91 | 
 92 |             # Append to text file
 93 |             # with open('test.txt', 'a') as file:
 94 |             #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
 95 | 
 96 |             # Append to pycocotools JSON dictionary
 97 |             if save_json:
 98 |                 # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
 99 |                 image_id = int(Path(paths[si]).stem.split('_')[-1])
100 |                 box = pred[:, :4].clone()  # xyxy
101 |                 scale_coords(imgs[si].shape[1:], box, shapes[si])  # to original shape
102 |                 box = xyxy2xywh(box)  # xywh
103 |                 box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
104 |                 for di, d in enumerate(pred):
105 |                     jdict.append({
106 |                         'image_id': image_id,
107 |                         'category_id': coco91class[int(d[6])],
108 |                         'bbox': [float3(x) for x in box[di]],
109 |                         'score': float(d[4])
110 |                     })
111 | 
112 |             # Assign all predictions as incorrect
113 |             correct = [0] * len(pred)
114 |             if nl:
115 |                 detected = []
116 |                 tcls_tensor = labels[:, 0]
117 | 
118 |                 # target boxes
119 |                 tbox = xywh2xyxy(labels[:, 1:5])
120 |                 tbox[:, [0, 2]] *= width
121 |                 tbox[:, [1, 3]] *= height
122 | 
123 |                 # Search for correct predictions
124 |                 for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
125 | 
126 |                     # Break if all targets already located in image
127 |                     if len(detected) == nl:
128 |                         break
129 | 
130 |                     # Continue if predicted class not among image classes
131 |                     if pcls.item() not in tcls:
132 |                         continue
133 | 
134 |                     # Best iou, index between pred and targets
135 |                     m = (pcls == tcls_tensor).nonzero().view(-1)
136 |                     iou, bi = bbox_iou(pbox, tbox[m]).max(0)
137 | 
138 |                     # If iou > threshold and class is correct mark as correct
139 |                     if iou > iou_thres and m[bi] not in detected:  # and pcls == tcls[bi]:
140 |                         correct[i] = 1
141 |                         detected.append(m[bi])
142 | 
143 |             # Append statistics (correct, conf, pcls, tcls)
144 |             stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
145 | 
146 |     # Compute statistics
147 |     stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
148 |     nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
149 |     if len(stats):
150 |         p, r, ap, f1, ap_class = ap_per_class(*stats)
151 |         mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
152 | 
153 |     # Print results
154 |     pf = '%20s' + '%10.3g' * 6  # print format
155 |     print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
156 | 
157 |     # Print results per class
158 |     if nc > 1 and len(stats):
159 |         for i, c in enumerate(ap_class):
160 |             print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
161 | 
162 |     # Save JSON
163 |     if save_json and map and len(jdict):
164 |         imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
165 |         with open('results.json', 'w') as file:
166 |             json.dump(jdict, file)
167 | 
168 |         from pycocotools.coco import COCO
169 |         from pycocotools.cocoeval import COCOeval
170 | 
171 |         # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
172 |         cocoGt = COCO('../coco/annotations/instances_val2014.json')  # initialize COCO ground truth api
173 |         cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api
174 | 
175 |         cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
176 |         cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
177 |         cocoEval.evaluate()
178 |         cocoEval.accumulate()
179 |         cocoEval.summarize()
180 |         map = cocoEval.stats[1]  # update mAP to pycocotools mAP
181 | 
182 |     # Return results
183 |     maps = np.zeros(nc) + map
184 |     for i, c in enumerate(ap_class):
185 |         maps[c] = ap[i]
186 |     return (mp, mr, map, mf1, loss / len(dataloader)), maps
187 | 
188 | 
189 | if __name__ == '__main__':
190 |     parser = argparse.ArgumentParser(prog='test.py')
191 |     parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
192 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
193 |     parser.add_argument('--data-cfg', type=str, default='data/coco.data', help='coco.data file path')
194 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
195 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
196 |     parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
197 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
198 |     parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
199 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
200 |     opt = parser.parse_args()
201 |     print(opt)
202 | 
203 |     with torch.no_grad():
204 |         mAP = test(
205 |             opt.cfg,
206 |             opt.data_cfg,
207 |             opt.weights,
208 |             opt.batch_size,
209 |             opt.img_size,
210 |             opt.iou_thres,
211 |             opt.conf_thres,
212 |             opt.nms_thres,
213 |             opt.save_json
214 |         )
215 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | import torch.distributed as dist
  5 | import torch.optim as optim
  6 | import torch.optim.lr_scheduler as lr_scheduler
  7 | from torch.utils.data import DataLoader
  8 | 
  9 | import test  # Import test.py to get mAP after each epoch
 10 | from models import *
 11 | from project.datasets import *
 12 | from project.utils import *
 13 | 
 14 | # Hyperparameters: train.py --evolve --epochs 2 --img-size 320, Metrics: 0.204      0.302      0.175      0.234 (square smart)
 15 | hyp = {'xy': 0.1,  # xy loss gain  (giou is about 0.02)
 16 |        'wh': 0.1,  # wh loss gain
 17 |        'cls': 0.04,  # cls loss gain
 18 |        'conf': 4.5,  # conf loss gain
 19 |        'iou_t': 0.5,  # iou target-anchor training threshold
 20 |        'lr0': 0.001,  # initial learning rate
 21 |        'lrf': -4.,  # final learning rate = lr0 * (10 ** lrf)
 22 |        'momentum': 0.90,  # SGD momentum
 23 |        'weight_decay': 0.0005}  # optimizer weight decay
 24 | 
 25 | 
 26 | # Hyperparameters: Original, Metrics: 0.172      0.304      0.156      0.205 (square)
 27 | # hyp = {'xy': 0.5,  # xy loss gain
 28 | #        'wh': 0.0625,  # wh loss gain
 29 | #        'cls': 0.0625,  # cls loss gain
 30 | #        'conf': 4,  # conf loss gain
 31 | #        'iou_t': 0.1,  # iou target-anchor training threshold
 32 | #        'lr0': 0.001,  # initial learning rate
 33 | #        'lrf': -5.,  # final learning rate = lr0 * (10 ** lrf)
 34 | #        'momentum': 0.9,  # SGD momentum
 35 | #        'weight_decay': 0.0005}  # optimizer weight decay
 36 | 
 37 | # Hyperparameters: train.py --evolve --epochs 2 --img-size 320, Metrics: 0.225      0.251      0.145      0.218 (rect)
 38 | # hyp = {'xy': 0.4499,  # xy loss gain
 39 | #        'wh': 0.05121,  # wh loss gain
 40 | #        'cls': 0.04207,  # cls loss gain
 41 | #        'conf': 2.853,  # conf loss gain
 42 | #        'iou_t': 0.2487,  # iou target-anchor training threshold
 43 | #        'lr0': 0.0005301,  # initial learning rate
 44 | #        'lrf': -5.,  # final learning rate = lr0 * (10 ** lrf)
 45 | #        'momentum': 0.8823,  # SGD momentum
 46 | #        'weight_decay': 0.0004149}  # optimizer weight decay
 47 | 
 48 | # Hyperparameters: train.py --evolve --epochs 2 --img-size 320, Metrics: 0.178      0.313      0.167      0.212 (square)
 49 | # hyp = {'xy': 0.4664,  # xy loss gain
 50 | #        'wh': 0.08437,  # wh loss gain
 51 | #        'cls': 0.05145,  # cls loss gain
 52 | #        'conf': 4.244,  # conf loss gain
 53 | #        'iou_t': 0.09121,  # iou target-anchor training threshold
 54 | #        'lr0': 0.0004938,  # initial learning rate
 55 | #        'lrf': -5.,  # final learning rate = lr0 * (10 ** lrf)
 56 | #        'momentum': 0.9025,  # SGD momentum
 57 | #        'weight_decay': 0.0005417}  # optimizer weight decay
 58 | 
 59 | def train(
 60 |         cfg,
 61 |         data_cfg,
 62 |         img_size=416,
 63 |         resume=False,
 64 |         epochs=100,  # 500200 batches at bs 4, 117263 images = 68 epochs
 65 |         batch_size=16,
 66 |         accumulate=4,  # effective bs = 64 = batch_size * accumulate
 67 |         freeze_backbone=False,
 68 |         transfer=False  # Transfer learning (train only YOLO layers)
 69 | ):
 70 |     init_seeds()
 71 |     weights = 'weights' + os.sep
 72 |     latest = weights + 'latest.pt'
 73 |     best = weights + 'best.pt'
 74 |     device = torch_utils.select_device()
 75 |     torch.backends.cudnn.benchmark = True  # possibly unsuitable for multiscale
 76 |     img_size_test = img_size  # image size for testing
 77 | 
 78 |     if opt.multi_scale:
 79 |         img_size_min = round(img_size / 32 / 1.5)
 80 |         img_size_max = round(img_size / 32 * 1.5)
 81 |         img_size = img_size_max * 32  # initiate with maximum multi_scale size
 82 | 
 83 |     # Configure run
 84 |     data_dict = parse_data_cfg(data_cfg)
 85 |     train_path = data_dict['train']
 86 |     nc = int(data_dict['classes'])  # number of classes
 87 | 
 88 |     # Initialize model
 89 |     model = Darknet(cfg).to(device)
 90 | 
 91 |     # Optimizer
 92 |     optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'])
 93 | 
 94 |     cutoff = -1  # backbone reaches to cutoff layer
 95 |     start_epoch = 0
 96 |     best_loss = float('inf')
 97 |     nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255)
 98 |     if resume:  # Load previously saved model
 99 |         if transfer:  # Transfer learning
100 |             chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device)
101 |             model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255},
102 |                                   strict=False)
103 |             for p in model.parameters():
104 |                 p.requires_grad = True if p.shape[0] == nf else False
105 | 
106 |         else:  # resume from latest.pt
107 |             chkpt = torch.load(latest, map_location=device)  # load checkpoint
108 |             model.load_state_dict(chkpt['model'])
109 | 
110 |         start_epoch = chkpt['epoch'] + 1
111 |         if chkpt['optimizer'] is not None:
112 |             optimizer.load_state_dict(chkpt['optimizer'])
113 |             best_loss = chkpt['best_loss']
114 |         del chkpt
115 | 
116 |     else:  # Initialize model with backbone (optional)
117 |         if '-tiny' in cfg:
118 |             cutoff = load_darknet_weights(model, weights + 'yolov3-tiny.conv.15')
119 |         else:
120 |             cutoff = load_darknet_weights(model, weights + 'darknet53.conv.74')
121 | 
122 |     # Scheduler https://github.com/ultralytics/yolov3/issues/238
123 |     # lf = lambda x: 1 - x / epochs  # linear ramp to zero
124 |     # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs)  # exp ramp
125 |     # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs))  # inverse exp ramp
126 |     # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
127 |     scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in (0.8, 0.9)], gamma=0.1)
128 |     scheduler.last_epoch = start_epoch - 1
129 | 
130 |     # # Plot lr schedule
131 |     # y = []
132 |     # for _ in range(epochs):
133 |     #     scheduler.step()
134 |     #     y.append(optimizer.param_groups[0]['lr'])
135 |     # plt.plot(y, label='LambdaLR')
136 |     # plt.xlabel('epoch')
137 |     # plt.xlabel('LR')
138 |     # plt.tight_layout()
139 |     # plt.savefig('LR.png', dpi=300)
140 | 
141 |     # Dataset
142 |     dataset = LoadImagesAndLabels(train_path,
143 |                                   img_size,
144 |                                   batch_size,
145 |                                   augment=True,
146 |                                   rect=False)
147 | 
148 |     # Initialize distributed training
149 |     if torch.cuda.device_count() > 1:
150 |         dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank)
151 |         model = torch.nn.parallel.DistributedDataParallel(model)
152 |         # sampler = torch.project.data.distributed.DistributedSampler(dataset)
153 | 
154 |     # Dataloader
155 |     dataloader = DataLoader(dataset,
156 |                             batch_size=batch_size,
157 |                             num_workers=opt.num_workers,
158 |                             shuffle=True,  # disable rectangular training if True
159 |                             pin_memory=True,
160 |                             collate_fn=dataset.collate_fn)
161 | 
162 |     # Mixed precision training https://github.com/NVIDIA/apex
163 |     # install help: https://github.com/NVIDIA/apex/issues/259
164 |     mixed_precision = False
165 |     if mixed_precision:
166 |         from apex import amp
167 |         model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
168 | 
169 |     # Remove old results
170 |     for f in glob.glob('*_batch*.jpg') + glob.glob('results.txt'):
171 |         os.remove(f)
172 | 
173 |     # Start training
174 |     model.hyp = hyp  # attach hyperparameters to model
175 |     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
176 |     model_info(model)
177 |     nb = len(dataloader)
178 |     maps = np.zeros(nc)  # mAP per class
179 |     results = (0, 0, 0, 0, 0)  # P, R, mAP, F1, test_loss
180 |     n_burnin = min(round(nb / 5 + 1), 1000)  # burn-in batches
181 |     t, t0 = time.time(), time.time()
182 |     for epoch in range(start_epoch, epochs):
183 |         print(epoch)
184 |         model.train()
185 |         print(('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'time'))
186 | 
187 |         # Update scheduler
188 |         scheduler.step()
189 | 
190 |         # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
191 |         if freeze_backbone and epoch < 2:
192 |             for name, p in model.named_parameters():
193 |                 if int(name.split('.')[1]) < cutoff:  # if layer < 75
194 |                     p.requires_grad = False if epoch == 0 else True
195 | 
196 |         # # Update image weights (optional)
197 |         # w = model.class_weights.cpu().numpy() * (1 - maps)  # class weights
198 |         # image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
199 |         # dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # random weighted index
200 | 
201 |         mloss = torch.zeros(5).to(device)  # mean losses
202 |         for i, (imgs, targets, _, _) in enumerate(dataloader):
203 |             imgs = imgs.to(device)
204 |             targets = targets.to(device)
205 | 
206 |             # Multi-Scale training
207 |             if opt.multi_scale:
208 |                 if (i + 1 + nb * epoch) % 10 == 0:  #  adjust (67% - 150%) every 10 batches
209 |                     img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32
210 |                     print('img_size = %g' % img_size)
211 |                 scale_factor = img_size / max(imgs.shape[-2:])
212 |                 imgs = F.interpolate(imgs, scale_factor=scale_factor, mode='bilinear', align_corners=False)
213 | 
214 |             # Plot images with bounding boxes
215 |             if epoch == 0 and i == 0:
216 |                 plot_images(imgs=imgs, targets=targets, fname='train_batch%g.jpg' % i)
217 | 
218 |             # SGD burn-in
219 |             if epoch == 0 and i <= n_burnin:
220 |                 lr = hyp['lr0'] * (i / n_burnin) ** 4
221 |                 for x in optimizer.param_groups:
222 |                     x['lr'] = lr
223 | 
224 |             # Run model
225 |             pred = model(imgs)
226 | 
227 |             # Compute loss
228 |             loss, loss_items = compute_loss(pred, targets, model)
229 |             if torch.isnan(loss):
230 |                 print('WARNING: nan loss detected, ending training')
231 |                 return results
232 | 
233 |             # Compute gradient
234 |             if mixed_precision:
235 |                 with amp.scale_loss(loss, optimizer) as scaled_loss:
236 |                     scaled_loss.backward()
237 |             else:
238 |                 loss.backward()
239 | 
240 |             # Accumulate gradient for x batches before optimizing
241 |             if (i + 1) % accumulate == 0 or (i + 1) == nb:
242 |                 optimizer.step()
243 |                 optimizer.zero_grad()
244 | 
245 |             # Print batch results
246 |             mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
247 |             s = ('%8s%12s' + '%10.3g' * 7) % (
248 |                 '%g/%g' % (epoch, epochs - 1),
249 |                 '%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t)
250 |             t = time.time()
251 |             print(s)
252 | 
253 |         # Calculate mAP (always test final epoch, skip first 5 if opt.nosave)
254 |         if not (opt.notest or (opt.nosave and epoch < 10)) or epoch == epochs - 1:
255 |             with torch.no_grad():
256 |                 results, maps = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size_test, model=model,
257 |                                           conf_thres=0.1)
258 | 
259 |         # Write epoch results
260 |         with open('results.txt', 'a') as file:
261 |             file.write(s + '%11.3g' * 5 % results + '\n')  # P, R, mAP, F1, test_loss
262 | 
263 |         # Update best loss
264 |         test_loss = results[4]
265 |         if test_loss < best_loss:
266 |             best_loss = test_loss
267 | 
268 |         # Save training results
269 |         save = (not opt.nosave) or (epoch == epochs - 1)
270 |         if save:
271 |             # Create checkpoint
272 |             chkpt = {'epoch': epoch,
273 |                      'best_loss': best_loss,
274 |                      'model': model.module.state_dict() if type(
275 |                          model) is nn.parallel.DistributedDataParallel else model.state_dict(),
276 |                      'optimizer': optimizer.state_dict()}
277 | 
278 |             # Save latest checkpoint
279 |             torch.save(chkpt, latest)
280 | 
281 |             # Save best checkpoint
282 |             if best_loss == test_loss:
283 |                 torch.save(chkpt, best)
284 | 
285 |             # Save backup every 10 epochs (optional)
286 |             if epoch > 0 and epoch % 10 == 0:
287 |                 torch.save(chkpt, weights + 'backup%g.pt' % epoch)
288 | 
289 |             # Delete checkpoint
290 |             del chkpt
291 | 
292 |     dt = (time.time() - t0) / 3600
293 |     print('%g epochs completed in %.3f hours.' % (epoch - start_epoch + 1, dt))
294 |     return results
295 | 
296 | 
297 | def print_mutation(hyp, results):
298 |     # Write mutation results
299 |     a = '%11s' * len(hyp) % tuple(hyp.keys())  # hyperparam keys
300 |     b = '%11.4g' * len(hyp) % tuple(hyp.values())  # hyperparam values
301 |     c = '%11.3g' * len(results) % results  # results (P, R, mAP, F1, test_loss)
302 |     print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
303 |     with open('evolve.txt', 'a') as f:
304 |         f.write(c + b + '\n')
305 | 
306 | 
307 | if __name__ == '__main__':
308 |     parser = argparse.ArgumentParser()
309 |     parser.add_argument('--epochs', type=int, default=68, help='number of epochs')
310 |     parser.add_argument('--batch-size', type=int, default=8, help='batch size')
311 |     parser.add_argument('--accumulate', type=int, default=8, help='number of batches to accumulate before optimizing')
312 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
313 |     parser.add_argument('--data-cfg', type=str, default='data/coco_64img.data', help='coco.data file path')
314 |     parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608')
315 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
316 |     parser.add_argument('--resume', action='store_true', help='resume training flag')
317 |     parser.add_argument('--transfer', action='store_true', help='transfer learning flag')
318 |     parser.add_argument('--num-workers', type=int, default=4, help='number of Pytorch DataLoader workers')
319 |     parser.add_argument('--dist-url', default='tcp://127.0.0.1:9999', type=str, help='distributed training init method')
320 |     parser.add_argument('--rank', default=0, type=int, help='distributed training node rank')
321 |     parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training')
322 |     parser.add_argument('--backend', default='nccl', type=str, help='distributed backend')
323 |     parser.add_argument('--nosave', action='store_true', help='do not save training results')
324 |     parser.add_argument('--notest', action='store_true', help='only test final epoch')
325 |     parser.add_argument('--evolve', action='store_true', help='run hyperparameter evolution')
326 |     parser.add_argument('--var', default=0, type=int, help='debug variable')
327 |     opt = parser.parse_args()
328 |     print(opt)
329 | 
330 |     if opt.evolve:
331 |         opt.notest = True  # save time by only testing final epoch
332 |         opt.nosave = True  # do not save checkpoints
333 | 
334 |     # Train
335 |     results = train(
336 |         opt.cfg,
337 |         opt.data_cfg,
338 |         img_size=opt.img_size,
339 |         resume=opt.resume or opt.transfer,
340 |         transfer=opt.transfer,
341 |         epochs=opt.epochs,
342 |         batch_size=opt.batch_size,
343 |         accumulate=opt.accumulate,
344 |     )
345 | 
346 |     # Evolve hyperparameters (optional)
347 |     if opt.evolve:
348 |         best_fitness = results[2]  # use mAP for fitness
349 | 
350 |         # Write mutation results
351 |         print_mutation(hyp, results)
352 | 
353 |         gen = 1000  # generations to evolve
354 |         for _ in range(gen):
355 | 
356 |             # Mutate hyperparameters
357 |             old_hyp = hyp.copy()
358 |             init_seeds(seed=int(time.time()))
359 |             s = [.3, .3, .3, .3, .3, .3, .3, .03, .3]  # xy, wh, cls, conf, iou_t, lr0, lrf, momentum, weight_decay
360 |             for i, k in enumerate(hyp.keys()):
361 |                 x = (np.random.randn(1) * s[i] + 1) ** 1.1  # plt.hist(x.ravel(), 100)
362 |                 hyp[k] = hyp[k] * float(x)  # vary by about 30% 1sigma
363 | 
364 |             # Clip to limits
365 |             keys = ['lr0', 'iou_t', 'momentum', 'weight_decay']
366 |             limits = [(1e-4, 1e-2), (0, 0.90), (0.70, 0.99), (0, 0.01)]
367 |             for k, v in zip(keys, limits):
368 |                 hyp[k] = np.clip(hyp[k], v[0], v[1])
369 | 
370 |             # Determine mutation fitness
371 |             results = train(
372 |                 opt.cfg,
373 |                 opt.data_cfg,
374 |                 img_size=opt.img_size,
375 |                 resume=opt.resume or opt.transfer,
376 |                 transfer=opt.transfer,
377 |                 epochs=opt.epochs,
378 |                 batch_size=opt.batch_size,
379 |                 accumulate=opt.accumulate,
380 |             )
381 |             mutation_fitness = results[2]
382 | 
383 |             # Write mutation results
384 |             print_mutation(hyp, results)
385 | 
386 |             # Update hyperparameters if fitness improved
387 |             if mutation_fitness > best_fitness:
388 |                 # Fitness improved!
389 |                 print('Fitness improved!')
390 |                 best_fitness = mutation_fitness
391 |             else:
392 |                 hyp = old_hyp.copy()  # reset hyp to
393 | 
394 |             # # Plot results
395 |             # import numpy as np
396 |             # import matplotlib.pyplot as plt
397 |             # a = np.loadtxt('evolve_1000val.txt')
398 |             # x = a[:, 2] * a[:, 3]  # metric = mAP * F1
399 |             # weights = (x - x.min()) ** 2
400 |             # fig = plt.figure(figsize=(14, 7))
401 |             # for i in range(len(hyp)):
402 |             #     y = a[:, i + 5]
403 |             #     mu = (y * weights).sum() / weights.sum()
404 |             #     plt.subplot(2, 5, i+1)
405 |             #     plt.plot(x.max(), mu, 'o')
406 |             #     plt.plot(x, y, '.')
407 |             #     print(list(hyp.keys())[i],'%.4g' % mu)
408 | 
409 | 


--------------------------------------------------------------------------------
/voc_label.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/home/zhangzhengming/下载/yolov3/data\n"
 13 |      ]
 14 |     },
 15 |     {
 16 |      "ename": "FileNotFoundError",
 17 |      "evalue": "[Errno 2] No such file or directory: 'data/ImageSets/train.txt'",
 18 |      "output_type": "error",
 19 |      "traceback": [
 20 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 21 |       "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
 22 |       "\u001b[0;32m<ipython-input-1-fee732128917>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     51\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/labels/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     52\u001b[0m         \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmakedirs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/labels/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m     \u001b[0mimage_ids\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/ImageSets/%s.txt'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mimage_set\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     54\u001b[0m     \u001b[0mlist_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/%s.txt'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mimage_set\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mimage_id\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mimage_ids\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 23 |       "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data/ImageSets/train.txt'"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "import xml.etree.ElementTree as ET\n",
 29 |     "import pickle\n",
 30 |     "import os\n",
 31 |     "from os import listdir, getcwd\n",
 32 |     "from os.path import join\n",
 33 |     " \n",
 34 |     "sets = ['train', 'test','val']\n",
 35 |     " \n",
 36 |     "classes = [\"collector\"]\n",
 37 |     " \n",
 38 |     " \n",
 39 |     "def convert(size, box):\n",
 40 |     "    dw = 1. / size[0]\n",
 41 |     "    dh = 1. / size[1]\n",
 42 |     "    x = (box[0] + box[1]) / 2.0\n",
 43 |     "    y = (box[2] + box[3]) / 2.0\n",
 44 |     "    w = box[1] - box[0]\n",
 45 |     "    h = box[3] - box[2]\n",
 46 |     "    x = x * dw\n",
 47 |     "    w = w * dw\n",
 48 |     "    y = y * dh\n",
 49 |     "    h = h * dh\n",
 50 |     "    return (x, y, w, h)\n",
 51 |     " \n",
 52 |     " \n",
 53 |     "def convert_annotation(image_id):\n",
 54 |     "    in_file = open('Annotations/%s.xml' % (image_id))\n",
 55 |     "    out_file = open('data/labels/%s.txt' % (image_id), 'w')\n",
 56 |     "    tree = ET.parse(in_file)\n",
 57 |     "    root = tree.getroot()\n",
 58 |     "    size = root.find('size')\n",
 59 |     "    w = int(size.find('width').text)\n",
 60 |     "    h = int(size.find('height').text)\n",
 61 |     " \n",
 62 |     "    for obj in root.iter('object'):\n",
 63 |     "        difficult = obj.find('Difficult').text\n",
 64 |     "        cls = obj.find('name').text\n",
 65 |     "        if cls not in classes or int(difficult) == 1:\n",
 66 |     "            continue\n",
 67 |     "        cls_id = classes.index(cls)\n",
 68 |     "        xmlbox = obj.find('bndbox')\n",
 69 |     "        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),\n",
 70 |     "             float(xmlbox.find('ymax').text))\n",
 71 |     "        bb = convert((w, h), b)\n",
 72 |     "        out_file.write(str(cls_id) + \" \" + \" \".join([str(a) for a in bb]) + '\\n')\n",
 73 |     " \n",
 74 |     " \n",
 75 |     "wd = getcwd()\n",
 76 |     "print(wd)\n",
 77 |     "for image_set in sets:\n",
 78 |     "    if not os.path.exists('data/labels/'):\n",
 79 |     "        os.makedirs('data/labels/')\n",
 80 |     "    image_ids = open('data/ImageSets/%s.txt' % (image_set)).read().strip().split()\n",
 81 |     "    list_file = open('data/%s.txt' % (image_set), 'w')\n",
 82 |     "    for image_id in image_ids:\n",
 83 |     "        list_file.write('images/%s.jpg\\n' % (image_id))\n",
 84 |     "        convert_annotation(image_id)\n",
 85 |     "    list_file.close()\n"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": []
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": "Python 3",
 99 |    "language": "python",
100 |    "name": "python3"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 3
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython3",
112 |    "version": "3.7.0"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 2
117 | }
118 | 


--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make '/weights' directory if it does not exist and cd into it
 4 | # mkdir -p weights && cd weights
 5 | 
 6 | # copy darknet weight files, continue '-c' if partially downloaded
 7 | # wget -c https://pjreddie.com/media/files/yolov3.weights
 8 | # wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
 9 | # wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 | 
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 | 
14 | # darknet53 weights (first 75 layers only)
15 | # wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 | 
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 | 
21 | # new method
22 | python3 -c "from models import *;
23 | attempt_download('weights/yolov3.pt');
24 | attempt_download('weights/yolov3-spp.pt')"
25 | 


--------------------------------------------------------------------------------