├── Dockerfile ├── README.md ├── cfg ├── oxfordhand.data ├── yolov3-1cls.cfg ├── yolov3-hand.cfg ├── yolov3-quantize-hand.cfg ├── yolov3-spp-1cls.cfg ├── yolov3-spp-pan-scale.cfg ├── yolov3-spp.cfg ├── yolov3-tiny-1cls.cfg ├── yolov3-tiny-hand.cfg ├── yolov3-tiny.cfg ├── yolov3.cfg ├── yolov3s-18a320.cfg ├── yolov3s-30a320.cfg ├── yolov3s-3a320.cfg └── yolov3s-9a320.cfg ├── data ├── 5k.shapes ├── 5k.txt ├── coco.data ├── coco.names ├── coco_1000img.data ├── coco_1000img.txt ├── coco_1000val.data ├── coco_1000val.txt ├── coco_16img.data ├── coco_16img.txt ├── coco_1cls.data ├── coco_1cls.txt ├── coco_1img.data ├── coco_1img.txt ├── coco_1k5k.data ├── coco_32img.data ├── coco_32img.txt ├── coco_500img.txt ├── coco_500val.data ├── coco_500val.txt ├── coco_64img.data ├── coco_64img.txt ├── coco_paper.names ├── converter.py ├── get_coco_dataset.sh ├── get_coco_dataset_gdrive.sh ├── oxfordhand.data ├── oxfordhand.names ├── samples │ ├── bus.jpg │ └── zidane.jpg └── trainvalno5k.shapes ├── detect.py ├── examples.ipynb ├── github_files └── 64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg ├── models.py ├── normal_prune.py ├── prune_tiny_yolo.py ├── quant_dorefa.py ├── regular_prune.py ├── requirements.txt ├── shortcut_prune.py ├── test.py ├── train.py ├── utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── datasets.cpython-37.pyc │ ├── google_utils.cpython-37.pyc │ ├── parse_config.cpython-37.pyc │ ├── prune_utils.cpython-37.pyc │ ├── torch_utils.cpython-37.pyc │ └── utils.cpython-37.pyc ├── adabound.py ├── datasets.py ├── gcp.sh ├── google_utils.py ├── parse_config.py ├── prune_utils.py ├── tiny_prune_utils.py ├── torch_utils.py └── utils.py └── weights └── download_yolov3_weights.sh /Dockerfile: -------------------------------------------------------------------------------- 1 | # Start from Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 2 | FROM nvcr.io/nvidia/pytorch:19.08-py3 3 | 4 | # Install dependencies (pip or conda) 5 | RUN pip install -U gsutil 6 | # RUN pip install -U -r requirements.txt 7 | # RUN conda update -n base -c defaults conda 8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow 9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | 11 | ## Install OpenCV with Gstreamer support 12 | #WORKDIR /usr/src 13 | #RUN pip uninstall -y opencv-python 14 | #RUN apt-get update 15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev 16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build 17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1 18 | #RUN cd opencv/build && cmake ../ \ 19 | # -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \ 20 | # -D BUILD_OPENCV_PYTHON3=ON \ 21 | # -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \ 22 | # -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \ 23 | # -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \ 24 | # -D WITH_GSTREAMER=ON \ 25 | # -D WITH_FFMPEG=OFF \ 26 | # && make && make install && ldconfig 27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so 28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so 29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())" 30 | 31 | # Create working directory 32 | RUN mkdir -p /usr/src/app 33 | WORKDIR /usr/src/app 34 | 35 | # Copy contents 36 | COPY . /usr/src/app 37 | 38 | # Copy weights 39 | #RUN python3 -c "from utils.google_utils import *; \ 40 | # gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name='weights/darknet53.conv.74'); \ 41 | # gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name='weights/yolov3-spp.weights'); \ 42 | # gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name='weights/yolov3-spp.pt)" 43 | 44 | 45 | # --------------------------------------------------- Extras Below --------------------------------------------------- 46 | 47 | # Build 48 | # rm -rf yolov3 # Warning: remove existing 49 | # git clone https://github.com/ultralytics/yolov3 && cd yolov3 && python3 detect.py 50 | # sudo docker image prune -af && sudo docker build -t ultralytics/yolov3:v0 . 51 | 52 | # Run 53 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 python3 detect.py 54 | 55 | # Run with local directory access 56 | # sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v0 python3 train.py 57 | 58 | # Pull and Run with local directory access 59 | # export tag=ultralytics/yolov3:v0 && sudo docker pull $tag && sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco $tag python3 train.py 60 | 61 | # Build and Push 62 | # export tag=ultralytics/yolov3:v0 && sudo docker build -t $tag . && docker push $tag 63 | 64 | # Kill all 65 | # sudo docker kill $(sudo docker ps -q) 66 | 67 | # Run bash for loop 68 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3-complete-pruning 2 | 3 | 本项目以[ultralytics/yolov3](https://github.com/ultralytics/yolov3)为YOLOv3的Pytorch实现,并在[YOLOv3-model-pruning](https://github.com/Lam1360/YOLOv3-model-pruning)剪枝的基础上,推出了4个YOLO-v3剪枝版本。(在此致谢两位) 4 | 5 | 此外,最近还更新了YOLO的1bit、4bit、8bit、16bit量化。 6 | 7 | |剪枝方式|
优点
|
缺点
| 8 | | --- | --- | --- | 9 | | 正常剪枝 |不对shortcut剪枝,拥有可观且稳定的压缩率,无需微调。 |压缩率达不到极致。 | 10 | | 极限剪枝 |极高的压缩率。 |需要微调。 | 11 | | 规整剪枝 |专为硬件部署设计,剪枝后filter个数均为8的倍数,无需微调。 | 为规整牺牲了部分压缩率。 | 12 | | Tiny剪枝 |稳定的压缩率。 |由于Tiny本来已很小,压缩率中规中矩。 | 13 | 14 | ## 项目特点 15 | 16 | 1.采用的YOLO-v3实现较为准确,mAP相对较高。 17 | 18 | 模型 | 320 | 416 | 608 19 | --- | --- | --- | --- 20 | `YOLOv3` | 51.8 (51.5) | 55.4 (55.3) | 58.2 (57.9) 21 | `YOLOv3-tiny` | 29.0 | 32.9 (33.1) | 35.5 22 | 23 | 2.提供对YOLOv3及Tiny的多种剪枝版本、量化版本,以适应不同的需求。 24 | 25 | 3.剪枝后保存为.weights格式,可在任何框架下继续训练、推理,或以图像视频展示。 26 | 27 | 28 | 29 | 4.目前支持情况 30 | 31 | |
剪枝方式
|
单卡
|
多卡
| 32 | | --- | --- | --- | 33 | |
正常训练
|
|
| 34 | |
稀疏化
|
|
| 35 | |
正常剪枝
|
|
| 36 | |
规整剪枝
|
|
| 37 | |
极限剪枝(shortcut)
|
|
| 38 | |
Tiny剪枝
|
|
| 39 | 40 | |
二值量化
|
8bit量化
|
16bit量化
|
混合量化
|
任意bit量化
| 41 | | --- | --- | --- | --- | --- | 42 | |
|
|
|
|
| 43 | 44 | 45 | ## 最新进展 46 | 47 | - 2019年12月26日:支持任意bit量化。 48 | - 2019年11月6日:极限剪枝已支持无需微调。 49 | - 非常感谢github大佬[tanluren](https://github.com/tanluren),对该项目指出的众多问题和支持,实在太强了^_^。 50 | 51 | 52 | ## 环境搭建 53 | 54 | 1.由于采用[ultralytics/yolov3](https://github.com/ultralytics/yolov3)的YOLO实现,环境搭建见[ultralytics/yolov3](https://github.com/ultralytics/yolov3)。这里重复介绍一下: 55 | 56 | - `numpy` 57 | - `torch >= 1.1.0` 58 | - `opencv-python` 59 | - `tqdm` 60 | 61 | 可直接`pip3 install -U -r requirements.txt`搭建环境,或根据该.txt文件使用conda搭建。 62 | 63 | ## 数据获取 64 | 65 | 依然采用oxford hand数据集 66 | 67 | 68 | 69 | 70 | 1.下载[数据集](http://www.robots.ox.ac.uk/~vgg/data/hands/downloads/hand_dataset.tar.gz),并解压至/data目录下,得到hand_dataset文件夹。 71 | 72 | 2.执行`python converter.py` ,生成 images、labels 文件夹和 train.txt、valid.txt 文件。 73 | 74 | 3.获取YOLO预训练权重,/weights文件夹下执行`bash download_yolov3_weights.sh`,或自行下载。 75 | 76 | 4.至此,数据部分完成。 77 | 78 | ## 剪枝篇 79 | 80 | 1.正常训练 81 | 82 | ```bash 83 | python3 train.py --data data/oxfordhand.data --batch-size 32 --accumulate 1 --weights weights/yolov3.weights --cfg cfg/yolov3-hand.cfg 84 | ``` 85 | 86 | 2.稀疏化训练 87 | 88 | `-sr`开启稀疏化,`--s`指定稀疏因子大小,`--prune`指定稀疏类型。 89 | 90 | 其中: 91 | 92 | `--prune 0`为正常剪枝和规整剪枝的稀疏化 93 | 94 | `--prune 1`为极限剪枝的稀疏化 95 | 96 | `--prune 2`为Tiny剪枝的稀疏化 97 | 98 | ```bash 99 | python3 train.py --data data/oxfordhand.data --batch-size 32 --accumulate 1 --weights weights/yolov3.weights --cfg cfg/yolov3-hand.cfg -sr --s 0.001 --prune 0 100 | ``` 101 | 102 | 3.模型剪枝 103 | 104 | - 正常剪枝 105 | ```bash 106 | python3 normal_prune.py 107 | ``` 108 | - 规整剪枝 109 | ```bash 110 | python3 regular_prune.py 111 | ``` 112 | - 极限剪枝 113 | ```bash 114 | python3 shortcut_prune.py 115 | ``` 116 | - Tiny剪枝 117 | ```bash 118 | python3 prune_tiny_yolo.py 119 | ``` 120 | 需要注意的是,这里需要在.py文件内,将opt内的cfg和weights变量指向第2步稀疏化后生成的cfg文件和weights文件。 121 | 此外,可通过增大代码中percent的值来获得更大的压缩率。(若稀疏化不到位,且percent值过大,程序会报错。) 122 | 123 | ## 量化篇 124 | 125 | 1 指定需要量化的层 126 | 127 | 打开任意一个可用的配置文件,例如yolov3-hand.cfg。将需要量化的层,从原来的convolutional替换为quantize_convolutional。 128 | 129 | 2 指定量化方式 130 | 131 | 通过修改models.py中的W_bit和A_bit,指定权重的量化方式,激活的量化方式。(目前默认为16bit量化) 132 | 133 | 3 量化训练 134 | 135 | 136 | ```bash 137 | python3 train.py --data data/oxfordhand.data --batch-size 32 --accumulate 1 --weights weights/yolov3.weights --cfg cfg/yolov3-quantize-hand.cfg 138 | ``` 139 | 140 | 与正常训练相同,只是cfg要指向修改过的cfg文件。 141 | 142 | ## 推理展示 143 | 144 | 这里,我们不仅可以使用原始的YOLOV3用来推理展示,还可使用我们剪枝后的模型来推理展示。(修改cfg,weights的指向即可) 145 | 146 | 147 | 148 | ```bash 149 | python3 detect.py --source ... 150 | ``` 151 | 152 | - Image: `--source file.jpg` 153 | - Video: `--source file.mp4` 154 | - Directory: `--source dir/` 155 | - Webcam: `--source 0` 156 | - RTSP stream: `--source rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa` 157 | - HTTP stream: `--source http://wmccpinetop.axiscam.net/mjpg/video.mjpg` 158 | 159 | 160 | 161 | 例如: 162 | ```bash 163 | python3 detect.py --cfg cfg/prune_0.8_yolov3-hand.cfg --weights weights/yolov3_hand_pruning_percent0.8.weights --data data/oxfordhand.data --source test.jpg 164 | ``` 165 | 166 | ## 剪枝效果 167 | 168 | 以下数据除极限剪枝外,均未微调。 169 | 170 | ### YOLO-v3剪枝 171 | 172 | | 模型 | 参数量 |模型体积 |压缩率 |耗时 |mAP | 173 | | --- | --- | --- | --- | --- | --- | 174 | | Baseline(416)| 61.5M |246.4MB |0% |11.7ms |0.7924 | 175 | | 正常剪枝 | 10.9M |43.9MB |82.2% |5.92ms |0.7712 | 176 | | 规整剪枝 | 15.31M |61.4MB |75.1% |6.01ms |0.7832 | 177 | | 极限剪枝 | 7.13M |28.6MB |88.4% |5.90ms |0.7382 | 178 | 179 | ### YOLO-v3-Tiny剪枝 180 | 181 | | 模型 |参数量 | 模型体积 | 压缩率| 耗时 | mAP | 182 | | --- | --- | --- | --- | --- | --- | 183 | | Baseline(416) | 8.7M | 33.1MB | 0% | 2.2ms | 0.6378 | 184 | | Tiny剪枝 | 4.4M | 16.8MB | 40.1% | 2.0ms| 0.6132 | 185 | 186 | ## 量化效果(数据更新中) 187 | 188 | 以下量化数据中,权重激活均被量化。 189 | 190 | ### YOLO-v3量化 191 | 192 | | 模型 |mAP | 193 | | --- | --- | 194 | | Baseline(416)|0.8246 | 195 | | 第一层float32+中间层8bit+最后一层float32 |0.8174 | 196 | | 全16bit量化 |0.8132 | 197 | | 全8bit量化 | 0.8024 | 198 | 199 | 200 | 201 | ## 核心思想 202 | 203 | 剪枝方法来源于论文[Learning Efficient Convolutional Networks through Network Slimming](https://arxiv.org/abs/1708.06519),剪枝无需微调方法来源于[Rethinking the Smaller-Norm-Less-Informative Assumption in Channel Pruning of Convolution Layers](https://arxiv.org/abs/1802.00124?context=cs)。 204 | 205 | 206 | 量化方法来源于论文[DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients](https://arxiv.org/abs/1606.06160)。 207 | 208 | 此外,具体实现时,在论文作者的基础上做了改进。 209 | 210 | 211 | ## 互动 212 | 213 | ### 1.如何获得较高的压缩率? 214 | 215 | 提高压缩率的关键在于稀疏化训练,可以加大`--s`的值并迭代训练多次等手段。 216 | 217 | ### 2.我的压缩率比表格中更高! 218 | 219 | 以上数据仅仅是测试了不到20次的结果,如果有同学的压缩率更高,欢迎在评论区分享! 220 | 221 | ### 3.程序报错怎么办? 222 | 223 | #### YOLOv3报错 224 | 由于采用了[ultralytics/yolov3](https://github.com/ultralytics/yolov3)为YOLOv3的Pytorch实现,因此这类错误可跳转至此链接询问。 225 | 226 | #### 剪枝量化错误 227 | 228 | 一定要在本评论区留言,我会尽快修正! 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /cfg/oxfordhand.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train=data/train.txt 3 | valid=data/valid.txt 4 | names=data/oxfordhand.names 5 | -------------------------------------------------------------------------------- /cfg/yolov3-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=18 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=1 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=18 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=1 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=18 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=1 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /cfg/yolov3-hand.cfg: -------------------------------------------------------------------------------- 1 | 2 | [net] 3 | # Testing 4 | #batch=1 5 | #subdivisions=1 6 | # Training 7 | batch=16 8 | subdivisions=1 9 | width=416 10 | height=416 11 | channels=3 12 | momentum=0.9 13 | decay=0.0005 14 | angle=0 15 | saturation = 1.5 16 | exposure = 1.5 17 | hue=.1 18 | 19 | learning_rate=0.001 20 | burn_in=1000 21 | max_batches = 500200 22 | policy=steps 23 | steps=400000,450000 24 | scales=.1,.1 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=32 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # Downsample 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=64 39 | size=3 40 | stride=2 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=32 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [shortcut] 61 | from=-3 62 | activation=linear 63 | 64 | # Downsample 65 | 66 | [convolutional] 67 | batch_normalize=1 68 | filters=128 69 | size=3 70 | stride=2 71 | pad=1 72 | activation=leaky 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=64 77 | size=1 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=128 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [shortcut] 91 | from=-3 92 | activation=linear 93 | 94 | [convolutional] 95 | batch_normalize=1 96 | filters=64 97 | size=1 98 | stride=1 99 | pad=1 100 | activation=leaky 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=128 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | [shortcut] 111 | from=-3 112 | activation=linear 113 | 114 | # Downsample 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=256 119 | size=3 120 | stride=2 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=128 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=256 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [shortcut] 141 | from=-3 142 | activation=linear 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=128 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=256 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [shortcut] 161 | from=-3 162 | activation=linear 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=256 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [shortcut] 181 | from=-3 182 | activation=linear 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=128 187 | size=1 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=256 195 | size=3 196 | stride=1 197 | pad=1 198 | activation=leaky 199 | 200 | [shortcut] 201 | from=-3 202 | activation=linear 203 | 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=128 208 | size=1 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | filters=256 216 | size=3 217 | stride=1 218 | pad=1 219 | activation=leaky 220 | 221 | [shortcut] 222 | from=-3 223 | activation=linear 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=1 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [convolutional] 234 | batch_normalize=1 235 | filters=256 236 | size=3 237 | stride=1 238 | pad=1 239 | activation=leaky 240 | 241 | [shortcut] 242 | from=-3 243 | activation=linear 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=1 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [convolutional] 254 | batch_normalize=1 255 | filters=256 256 | size=3 257 | stride=1 258 | pad=1 259 | activation=leaky 260 | 261 | [shortcut] 262 | from=-3 263 | activation=linear 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=1 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [convolutional] 274 | batch_normalize=1 275 | filters=256 276 | size=3 277 | stride=1 278 | pad=1 279 | activation=leaky 280 | 281 | [shortcut] 282 | from=-3 283 | activation=linear 284 | 285 | # Downsample 286 | 287 | [convolutional] 288 | batch_normalize=1 289 | filters=512 290 | size=3 291 | stride=2 292 | pad=1 293 | activation=leaky 294 | 295 | [convolutional] 296 | batch_normalize=1 297 | filters=256 298 | size=1 299 | stride=1 300 | pad=1 301 | activation=leaky 302 | 303 | [convolutional] 304 | batch_normalize=1 305 | filters=512 306 | size=3 307 | stride=1 308 | pad=1 309 | activation=leaky 310 | 311 | [shortcut] 312 | from=-3 313 | activation=linear 314 | 315 | 316 | [convolutional] 317 | batch_normalize=1 318 | filters=256 319 | size=1 320 | stride=1 321 | pad=1 322 | activation=leaky 323 | 324 | [convolutional] 325 | batch_normalize=1 326 | filters=512 327 | size=3 328 | stride=1 329 | pad=1 330 | activation=leaky 331 | 332 | [shortcut] 333 | from=-3 334 | activation=linear 335 | 336 | 337 | [convolutional] 338 | batch_normalize=1 339 | filters=256 340 | size=1 341 | stride=1 342 | pad=1 343 | activation=leaky 344 | 345 | [convolutional] 346 | batch_normalize=1 347 | filters=512 348 | size=3 349 | stride=1 350 | pad=1 351 | activation=leaky 352 | 353 | [shortcut] 354 | from=-3 355 | activation=linear 356 | 357 | 358 | [convolutional] 359 | batch_normalize=1 360 | filters=256 361 | size=1 362 | stride=1 363 | pad=1 364 | activation=leaky 365 | 366 | [convolutional] 367 | batch_normalize=1 368 | filters=512 369 | size=3 370 | stride=1 371 | pad=1 372 | activation=leaky 373 | 374 | [shortcut] 375 | from=-3 376 | activation=linear 377 | 378 | [convolutional] 379 | batch_normalize=1 380 | filters=256 381 | size=1 382 | stride=1 383 | pad=1 384 | activation=leaky 385 | 386 | [convolutional] 387 | batch_normalize=1 388 | filters=512 389 | size=3 390 | stride=1 391 | pad=1 392 | activation=leaky 393 | 394 | [shortcut] 395 | from=-3 396 | activation=linear 397 | 398 | 399 | [convolutional] 400 | batch_normalize=1 401 | filters=256 402 | size=1 403 | stride=1 404 | pad=1 405 | activation=leaky 406 | 407 | [convolutional] 408 | batch_normalize=1 409 | filters=512 410 | size=3 411 | stride=1 412 | pad=1 413 | activation=leaky 414 | 415 | [shortcut] 416 | from=-3 417 | activation=linear 418 | 419 | 420 | [convolutional] 421 | batch_normalize=1 422 | filters=256 423 | size=1 424 | stride=1 425 | pad=1 426 | activation=leaky 427 | 428 | [convolutional] 429 | batch_normalize=1 430 | filters=512 431 | size=3 432 | stride=1 433 | pad=1 434 | activation=leaky 435 | 436 | [shortcut] 437 | from=-3 438 | activation=linear 439 | 440 | [convolutional] 441 | batch_normalize=1 442 | filters=256 443 | size=1 444 | stride=1 445 | pad=1 446 | activation=leaky 447 | 448 | [convolutional] 449 | batch_normalize=1 450 | filters=512 451 | size=3 452 | stride=1 453 | pad=1 454 | activation=leaky 455 | 456 | [shortcut] 457 | from=-3 458 | activation=linear 459 | 460 | # Downsample 461 | 462 | [convolutional] 463 | batch_normalize=1 464 | filters=1024 465 | size=3 466 | stride=2 467 | pad=1 468 | activation=leaky 469 | 470 | [convolutional] 471 | batch_normalize=1 472 | filters=512 473 | size=1 474 | stride=1 475 | pad=1 476 | activation=leaky 477 | 478 | [convolutional] 479 | batch_normalize=1 480 | filters=1024 481 | size=3 482 | stride=1 483 | pad=1 484 | activation=leaky 485 | 486 | [shortcut] 487 | from=-3 488 | activation=linear 489 | 490 | [convolutional] 491 | batch_normalize=1 492 | filters=512 493 | size=1 494 | stride=1 495 | pad=1 496 | activation=leaky 497 | 498 | [convolutional] 499 | batch_normalize=1 500 | filters=1024 501 | size=3 502 | stride=1 503 | pad=1 504 | activation=leaky 505 | 506 | [shortcut] 507 | from=-3 508 | activation=linear 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=512 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=leaky 517 | 518 | [convolutional] 519 | batch_normalize=1 520 | filters=1024 521 | size=3 522 | stride=1 523 | pad=1 524 | activation=leaky 525 | 526 | [shortcut] 527 | from=-3 528 | activation=linear 529 | 530 | [convolutional] 531 | batch_normalize=1 532 | filters=512 533 | size=1 534 | stride=1 535 | pad=1 536 | activation=leaky 537 | 538 | [convolutional] 539 | batch_normalize=1 540 | filters=1024 541 | size=3 542 | stride=1 543 | pad=1 544 | activation=leaky 545 | 546 | [shortcut] 547 | from=-3 548 | activation=linear 549 | 550 | ###################### 551 | 552 | [convolutional] 553 | batch_normalize=1 554 | filters=512 555 | size=1 556 | stride=1 557 | pad=1 558 | activation=leaky 559 | 560 | [convolutional] 561 | batch_normalize=1 562 | size=3 563 | stride=1 564 | pad=1 565 | filters=1024 566 | activation=leaky 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=512 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=leaky 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | size=3 579 | stride=1 580 | pad=1 581 | filters=1024 582 | activation=leaky 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=512 587 | size=1 588 | stride=1 589 | pad=1 590 | activation=leaky 591 | 592 | [convolutional] 593 | batch_normalize=1 594 | size=3 595 | stride=1 596 | pad=1 597 | filters=1024 598 | activation=leaky 599 | 600 | [convolutional] 601 | size=1 602 | stride=1 603 | pad=1 604 | filters=18 605 | activation=linear 606 | 607 | 608 | [yolo] 609 | mask = 6,7,8 610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 611 | classes=1 612 | num=9 613 | jitter=.3 614 | ignore_thresh = .7 615 | truth_thresh = 1 616 | random=1 617 | 618 | 619 | [route] 620 | layers = -4 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=256 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [upsample] 631 | stride=2 632 | 633 | [route] 634 | layers = -1, 61 635 | 636 | 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=256 641 | size=1 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | size=3 649 | stride=1 650 | pad=1 651 | filters=512 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | size=3 665 | stride=1 666 | pad=1 667 | filters=512 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | size=1 688 | stride=1 689 | pad=1 690 | filters=18 691 | activation=linear 692 | 693 | 694 | [yolo] 695 | mask = 3,4,5 696 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 697 | classes=1 698 | num=9 699 | jitter=.3 700 | ignore_thresh = .7 701 | truth_thresh = 1 702 | random=1 703 | 704 | 705 | 706 | [route] 707 | layers = -4 708 | 709 | [convolutional] 710 | batch_normalize=1 711 | filters=128 712 | size=1 713 | stride=1 714 | pad=1 715 | activation=leaky 716 | 717 | [upsample] 718 | stride=2 719 | 720 | [route] 721 | layers = -1, 36 722 | 723 | 724 | 725 | [convolutional] 726 | batch_normalize=1 727 | filters=128 728 | size=1 729 | stride=1 730 | pad=1 731 | activation=leaky 732 | 733 | [convolutional] 734 | batch_normalize=1 735 | size=3 736 | stride=1 737 | pad=1 738 | filters=256 739 | activation=leaky 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [convolutional] 750 | batch_normalize=1 751 | size=3 752 | stride=1 753 | pad=1 754 | filters=256 755 | activation=leaky 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | size=1 775 | stride=1 776 | pad=1 777 | filters=18 778 | activation=linear 779 | 780 | 781 | [yolo] 782 | mask = 0,1,2 783 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 784 | classes=1 785 | num=9 786 | jitter=.3 787 | ignore_thresh = .7 788 | truth_thresh = 1 789 | random=1 790 | 791 | -------------------------------------------------------------------------------- /cfg/yolov3-spp-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=100 20 | max_batches = 5000 21 | policy=steps 22 | steps=4000,4500 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=18 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 6,7,8 642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 643 | classes=1 644 | num=9 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=18 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 3,4,5 728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 729 | classes=1 730 | num=9 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=18 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0,1,2 815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 816 | classes=1 817 | num=9 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | -------------------------------------------------------------------------------- /cfg/yolov3-spp.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=255 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 6,7,8 642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 643 | classes=80 644 | num=9 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=255 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 3,4,5 728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 729 | classes=80 730 | num=9 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=255 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0,1,2 815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 816 | classes=80 817 | num=9 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=18 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=1 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=18 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=1 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny-hand.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=15,25,60,99,150,160,180 23 | scales=0.5,0.5,0.1,0.5,0.5,0.1,0.1 24 | 25 | # 0 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # 1 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | # 2 40 | [convolutional] 41 | batch_normalize=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | # 3 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | # 4 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | # 5 63 | [maxpool] 64 | size=2 65 | stride=2 66 | 67 | # 6 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | # 7 77 | [maxpool] 78 | size=2 79 | stride=2 80 | 81 | # 8 82 | [convolutional] 83 | batch_normalize=1 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | # 9 91 | [maxpool] 92 | size=2 93 | stride=2 94 | 95 | # 10 96 | [convolutional] 97 | batch_normalize=1 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | # 11 105 | [maxpool] 106 | size=2 107 | stride=1 108 | 109 | # 12 110 | [convolutional] 111 | batch_normalize=1 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | ########### 119 | 120 | # 13 121 | [convolutional] 122 | batch_normalize=1 123 | filters=256 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | # 14 130 | [convolutional] 131 | batch_normalize=1 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | # 15 139 | [convolutional] 140 | size=1 141 | stride=1 142 | pad=1 143 | filters=18 144 | activation=linear 145 | 146 | 147 | 148 | # 16 149 | [yolo] 150 | mask = 3,4,5 151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 152 | classes=1 153 | num=6 154 | jitter=.3 155 | ignore_thresh = .7 156 | truth_thresh = 1 157 | random=1 158 | 159 | # 17 160 | [route] 161 | layers = -4 162 | 163 | # 18 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | # 19 173 | [upsample] 174 | stride=2 175 | 176 | # 20 177 | [route] 178 | layers = -1, 8 179 | 180 | # 21 181 | [convolutional] 182 | batch_normalize=1 183 | filters=256 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | # 22 190 | [convolutional] 191 | size=1 192 | stride=1 193 | pad=1 194 | filters=18 195 | activation=linear 196 | 197 | # 23 198 | [yolo] 199 | mask = 1,2,3 200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 201 | classes=1 202 | num=6 203 | jitter=.3 204 | ignore_thresh = .7 205 | truth_thresh = 1 206 | random=1 207 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 1,2,3 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /cfg/yolov3s-3a320.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=85 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 2 642 | anchors = 16,30, 62,45, 156,198 643 | classes=80 644 | num=3 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=85 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 1 728 | anchors = 16,30, 62,45, 156,198 729 | classes=80 730 | num=3 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=85 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0 815 | anchors = 16,30, 62,45, 156,198 816 | classes=80 817 | num=3 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | -------------------------------------------------------------------------------- /cfg/yolov3s-9a320.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=255 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 6,7,8 642 | anchors = 9,11, 25,27, 33,63, 71,43, 62,120, 135,86, 123,199, 257,100, 264,223 643 | classes=80 644 | num=9 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=255 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 3,4,5 728 | anchors = 9,11, 25,27, 33,63, 71,43, 62,120, 135,86, 123,199, 257,100, 264,223 729 | classes=80 730 | num=9 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=255 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0,1,2 815 | anchors = 9,11, 25,27, 33,63, 71,43, 62,120, 135,86, 123,199, 257,100, 264,223 816 | classes=80 817 | num=9 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | -------------------------------------------------------------------------------- /data/coco.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/trainvalno5k.txt 3 | valid=../coco/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/coco_1000img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1000img.txt 3 | valid=./data/coco_1000img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_1000val.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1000img.txt 3 | valid=./data/coco_1000val.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_16img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_16img.txt 3 | valid=./data/coco_16img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_16img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg 10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg 11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg 12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg 13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg 14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg 15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg 16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg 17 | -------------------------------------------------------------------------------- /data/coco_1cls.data: -------------------------------------------------------------------------------- 1 | classes=1 2 | train=./data/coco_1cls.txt 3 | valid=./data/coco_1cls.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_1cls.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/val2014/COCO_val2014_000000013992.jpg 2 | ../coco/images/val2014/COCO_val2014_000000047226.jpg 3 | ../coco/images/val2014/COCO_val2014_000000050324.jpg 4 | ../coco/images/val2014/COCO_val2014_000000121497.jpg 5 | ../coco/images/val2014/COCO_val2014_000000001464.jpg 6 | -------------------------------------------------------------------------------- /data/coco_1img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1img.txt 3 | valid=./data/coco_1img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_1img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/val2014/COCO_val2014_000000581886.jpg 2 | -------------------------------------------------------------------------------- /data/coco_1k5k.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1000img.txt 3 | valid=./data/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_32img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_32img.txt 3 | valid=./data/coco_32img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_32img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg 10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg 11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg 12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg 13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg 14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg 15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg 16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg 17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg 18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg 19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg 20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg 21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg 22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg 23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg 24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg 25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg 26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg 27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg 28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg 29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg 30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg 31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg 32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg 33 | -------------------------------------------------------------------------------- /data/coco_500val.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_500img.txt 3 | valid=./data/coco_500val.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_64img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_64img.txt 3 | valid=./data/coco_64img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_64img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg 10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg 11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg 12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg 13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg 14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg 15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg 16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg 17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg 18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg 19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg 20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg 21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg 22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg 23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg 24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg 25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg 26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg 27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg 28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg 29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg 30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg 31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg 32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg 33 | ../coco/images/train2014/COCO_train2014_000000000263.jpg 34 | ../coco/images/train2014/COCO_train2014_000000000307.jpg 35 | ../coco/images/train2014/COCO_train2014_000000000308.jpg 36 | ../coco/images/train2014/COCO_train2014_000000000309.jpg 37 | ../coco/images/train2014/COCO_train2014_000000000312.jpg 38 | ../coco/images/train2014/COCO_train2014_000000000315.jpg 39 | ../coco/images/train2014/COCO_train2014_000000000321.jpg 40 | ../coco/images/train2014/COCO_train2014_000000000322.jpg 41 | ../coco/images/train2014/COCO_train2014_000000000326.jpg 42 | ../coco/images/train2014/COCO_train2014_000000000332.jpg 43 | ../coco/images/train2014/COCO_train2014_000000000349.jpg 44 | ../coco/images/train2014/COCO_train2014_000000000368.jpg 45 | ../coco/images/train2014/COCO_train2014_000000000370.jpg 46 | ../coco/images/train2014/COCO_train2014_000000000382.jpg 47 | ../coco/images/train2014/COCO_train2014_000000000384.jpg 48 | ../coco/images/train2014/COCO_train2014_000000000389.jpg 49 | ../coco/images/train2014/COCO_train2014_000000000394.jpg 50 | ../coco/images/train2014/COCO_train2014_000000000404.jpg 51 | ../coco/images/train2014/COCO_train2014_000000000419.jpg 52 | ../coco/images/train2014/COCO_train2014_000000000431.jpg 53 | ../coco/images/train2014/COCO_train2014_000000000436.jpg 54 | ../coco/images/train2014/COCO_train2014_000000000438.jpg 55 | ../coco/images/train2014/COCO_train2014_000000000443.jpg 56 | ../coco/images/train2014/COCO_train2014_000000000446.jpg 57 | ../coco/images/train2014/COCO_train2014_000000000450.jpg 58 | ../coco/images/train2014/COCO_train2014_000000000471.jpg 59 | ../coco/images/train2014/COCO_train2014_000000000490.jpg 60 | ../coco/images/train2014/COCO_train2014_000000000491.jpg 61 | ../coco/images/train2014/COCO_train2014_000000000510.jpg 62 | ../coco/images/train2014/COCO_train2014_000000000514.jpg 63 | ../coco/images/train2014/COCO_train2014_000000000529.jpg 64 | ../coco/images/train2014/COCO_train2014_000000000531.jpg 65 | -------------------------------------------------------------------------------- /data/coco_paper.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | street sign 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | hat 27 | backpack 28 | umbrella 29 | shoe 30 | eye glasses 31 | handbag 32 | tie 33 | suitcase 34 | frisbee 35 | skis 36 | snowboard 37 | sports ball 38 | kite 39 | baseball bat 40 | baseball glove 41 | skateboard 42 | surfboard 43 | tennis racket 44 | bottle 45 | plate 46 | wine glass 47 | cup 48 | fork 49 | knife 50 | spoon 51 | bowl 52 | banana 53 | apple 54 | sandwich 55 | orange 56 | broccoli 57 | carrot 58 | hot dog 59 | pizza 60 | donut 61 | cake 62 | chair 63 | couch 64 | potted plant 65 | bed 66 | mirror 67 | dining table 68 | window 69 | desk 70 | toilet 71 | door 72 | tv 73 | laptop 74 | mouse 75 | remote 76 | keyboard 77 | cell phone 78 | microwave 79 | oven 80 | toaster 81 | sink 82 | refrigerator 83 | blender 84 | book 85 | clock 86 | vase 87 | scissors 88 | teddy bear 89 | hair drier 90 | toothbrush 91 | hair brush -------------------------------------------------------------------------------- /data/converter.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | from PIL import Image 3 | import os, glob 4 | import datetime 5 | import shutil 6 | 7 | running_from_path = os.getcwd() 8 | created_images_dir = 'images' 9 | created_labels_dir = 'labels' 10 | data_dir = 'data' # data_dir为脚本所在的文件夹 11 | 12 | def hms_string(sec_elapsed): # 格式化显示已消耗时间 13 | h = int(sec_elapsed / (60 * 60)) 14 | m = int((sec_elapsed % (60 * 60)) / 60) 15 | s = sec_elapsed % 60. 16 | return "{}:{:>02}:{:>05.2f}".format(h, m, s) 17 | 18 | def generate_dir(set_name, root_path): # 往images和labels文件夹下生成相应的文件夹 19 | images_dir = os.path.join(root_path, 'images') 20 | annotation_dir = os.path.join(root_path, 'annotations') 21 | 22 | new_images_dir = os.path.join(created_images_dir, set_name) # 将图片从原来的文件夹复制到该文件夹下 23 | new_annotation_dir = os.path.join(created_labels_dir, set_name) 24 | 25 | if not os.path.exists(new_images_dir): 26 | os.makedirs(new_images_dir) 27 | 28 | if not os.path.exists(new_annotation_dir): 29 | os.makedirs(new_annotation_dir) 30 | 31 | for img in glob.glob(os.path.join(images_dir, "*.jpg")): # 将图片从原来的文件夹复制到新文件夹下 32 | shutil.copy(img, new_images_dir) 33 | 34 | os.chdir(annotation_dir) # 切换到annotation的路径下 35 | matlab_annotations = glob.glob("*.mat") # 仅仅包含文件名,不包含路径 36 | os.chdir(running_from_path) # 切换回原来的路径 37 | 38 | for matfile in matlab_annotations: 39 | filename = matfile.split(".")[0] 40 | 41 | pil_image = Image.open(os.path.join(images_dir, filename+".jpg")) 42 | 43 | content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False) 44 | 45 | boxes = content["boxes"] 46 | 47 | width, height = pil_image.size 48 | 49 | with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs: 50 | for box_idx, box in enumerate(boxes.T): 51 | a = box[0][0][0][0] 52 | b = box[0][0][0][1] 53 | c = box[0][0][0][2] 54 | d = box[0][0][0][3] 55 | 56 | aXY = (a[0][1], a[0][0]) 57 | bXY = (b[0][1], b[0][0]) 58 | cXY = (c[0][1], c[0][0]) 59 | dXY = (d[0][1], d[0][0]) 60 | 61 | maxX = max(aXY[0], bXY[0], cXY[0], dXY[0]) 62 | minX = min(aXY[0], bXY[0], cXY[0], dXY[0]) 63 | maxY = max(aXY[1], bXY[1], cXY[1], dXY[1]) 64 | minY = min(aXY[1], bXY[1], cXY[1], dXY[1]) 65 | 66 | # clip,防止超出边界 67 | maxX = min(maxX, width-1) 68 | minX = max(minX, 0) 69 | maxY = min(maxY, height-1) 70 | minY = max(minY, 0) 71 | 72 | # ( / ) 73 | norm_width = (maxX - minX) / width 74 | 75 | # ( / ) 76 | norm_height = (maxY - minY) / height 77 | 78 | center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2 79 | 80 | norm_center_x = center_x / width 81 | norm_center_y = center_y / height 82 | 83 | if box_idx != 0: 84 | hs.write("\n") 85 | 86 | hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别 87 | 88 | def create_txt(dirlist, filename): 89 | with open(filename, "w") as txtfile: # 在data文件夹下生成txt文件 90 | imglist = [] 91 | 92 | for dir in dirlist: # dir='images/test' 93 | imglist.extend(glob.glob(os.path.join(dir, "*.jpg"))) # img='images/test/abc.jpg' 94 | 95 | for idx, img in enumerate(imglist): 96 | if idx != 0: 97 | txtfile.write("\n") 98 | txtfile.write(os.path.join(data_dir, img)) # 加上前缀data 99 | 100 | if __name__ == '__main__': 101 | start_time = datetime.datetime.now() 102 | 103 | generate_dir("train", "hand_dataset/training_dataset/training_data") # 第一个参数表示生成的文件夹的名称 104 | generate_dir("test", "hand_dataset/test_dataset/test_data") 105 | generate_dir("validation", "hand_dataset/validation_dataset/validation_data") 106 | 107 | create_txt((os.path.join(created_images_dir, 'train'), # 将train和validation文件夹下的图片合并成train 108 | os.path.join(created_images_dir, 'validation')), 109 | 'train.txt') 110 | create_txt((os.path.join(created_images_dir, 'test'), ), 111 | 'valid.txt') 112 | 113 | end_time = datetime.datetime.now() 114 | seconds_elapsed = (end_time - start_time).total_seconds() 115 | print("It took {} to execute this".format(hms_string(seconds_elapsed))) -------------------------------------------------------------------------------- /data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 3 | 4 | # Clone COCO API 5 | git clone https://github.com/pdollar/coco && cd coco 6 | 7 | # Download Images 8 | mkdir images && cd images 9 | wget -c https://pjreddie.com/media/files/train2014.zip 10 | wget -c https://pjreddie.com/media/files/val2014.zip 11 | 12 | # Unzip 13 | unzip -q train2014.zip 14 | unzip -q val2014.zip 15 | 16 | # (optional) Delete zip files 17 | rm -rf *.zip 18 | 19 | cd .. 20 | 21 | # Download COCO Metadata 22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 23 | wget -c https://pjreddie.com/media/files/coco/5k.part 24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 26 | tar xzf labels.tgz 27 | unzip -q instances_train-val2014.zip 28 | 29 | # Set Up Image Lists 30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 32 | 33 | # get xview training data 34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ' 35 | # tar -xvzf train_images.tgz 36 | # sudo rm -rf train_images/._* 37 | # lastly convert each .tif to a .bmp for faster loading in cv2 38 | 39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg # corrupted image 40 | -------------------------------------------------------------------------------- /data/get_coco_dataset_gdrive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859 3 | 4 | # Zip coco folder 5 | # zip -r coco.zip coco 6 | # tar -czvf coco.tar.gz coco 7 | 8 | # Set fileid and filename 9 | filename="coco.zip" 10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" # coco.zip 11 | 12 | # Download from Google Drive, accepting presented query 13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 15 | rm ./cookie 16 | 17 | # Unzip 18 | unzip -q ${filename} # for coco.zip 19 | # tar -xzf ${filename} # for coco.tar.gz 20 | -------------------------------------------------------------------------------- /data/oxfordhand.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train=data/train.txt 3 | valid=data/valid.txt 4 | names=data/oxfordhand.names 5 | -------------------------------------------------------------------------------- /data/oxfordhand.names: -------------------------------------------------------------------------------- 1 | hand 2 | 3 | -------------------------------------------------------------------------------- /data/samples/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/data/samples/bus.jpg -------------------------------------------------------------------------------- /data/samples/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/data/samples/zidane.jpg -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from sys import platform 3 | 4 | from models import * # set ONNX_EXPORT in models.py 5 | from utils.datasets import * 6 | from utils.utils import * 7 | 8 | 9 | def detect(save_txt=False, save_img=False): 10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) 11 | out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img 12 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 13 | 14 | # Initialize 15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) 16 | if os.path.exists(out): 17 | shutil.rmtree(out) # delete output folder 18 | os.makedirs(out) # make new output folder 19 | 20 | # Initialize model 21 | model = Darknet(opt.cfg, img_size) 22 | 23 | # Load weights 24 | attempt_download(weights) 25 | if weights.endswith('.pt'): # pytorch format 26 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 27 | else: # darknet format 28 | _ = load_darknet_weights(model, weights) 29 | 30 | # Second-stage classifier 31 | classify = False 32 | if classify: 33 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize 34 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights 35 | modelc.to(device).eval() 36 | 37 | # Fuse Conv2d + BatchNorm2d layers 38 | # model.fuse() 39 | 40 | # Eval mode 41 | model.to(device).eval() 42 | 43 | # Export mode 44 | if ONNX_EXPORT: 45 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) 46 | torch.onnx.export(model, img, 'weights/export.onnx', verbose=True) 47 | return 48 | 49 | # Half precision 50 | half = half and device.type != 'cpu' # half precision only supported on CUDA 51 | if half: 52 | model.half() 53 | 54 | # Set Dataloader 55 | vid_path, vid_writer = None, None 56 | if webcam: 57 | view_img = True 58 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference 59 | dataset = LoadStreams(source, img_size=img_size, half=half) 60 | else: 61 | save_img = True 62 | dataset = LoadImages(source, img_size=img_size, half=half) 63 | 64 | # Get classes and colors 65 | classes = load_classes(parse_data_cfg(opt.data)['names']) 66 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] 67 | 68 | # Run inference 69 | t0 = time.time() 70 | for path, img, im0s, vid_cap in dataset: 71 | t = time.time() 72 | 73 | # Get detections 74 | img = torch.from_numpy(img).to(device) 75 | if img.ndimension() == 3: 76 | img = img.unsqueeze(0) 77 | pred = model(img)[0] 78 | 79 | if opt.half: 80 | pred = pred.float() 81 | 82 | # Apply NMS 83 | pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres) 84 | 85 | # Apply 86 | if classify: 87 | pred = apply_classifier(pred, modelc, img, im0s) 88 | 89 | # Process detections 90 | for i, det in enumerate(pred): # detections per image 91 | if webcam: # batch_size >= 1 92 | p, s, im0 = path[i], '%g: ' % i, im0s[i] 93 | else: 94 | p, s, im0 = path, '', im0s 95 | 96 | save_path = str(Path(out) / Path(p).name) 97 | s += '%gx%g ' % img.shape[2:] # print string 98 | if det is not None and len(det): 99 | # Rescale boxes from img_size to im0 size 100 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 101 | 102 | # Print results 103 | for c in det[:, -1].unique(): 104 | n = (det[:, -1] == c).sum() # detections per class 105 | s += '%g %ss, ' % (n, classes[int(c)]) # add to string 106 | 107 | # Write results 108 | for *xyxy, conf, _, cls in det: 109 | if save_txt: # Write to file 110 | with open(save_path + '.txt', 'a') as file: 111 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) 112 | 113 | if save_img or view_img: # Add bbox to image 114 | label = '%s %.2f' % (classes[int(cls)], conf) 115 | #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) 116 | plot_one_box(xyxy, im0, label=None, color=colors[int(cls)]) 117 | 118 | print('%sDone. (%.3fs)' % (s, time.time() - t)) 119 | 120 | # Stream results 121 | if view_img: 122 | cv2.imshow(p, im0) 123 | 124 | # Save results (image with detections) 125 | if save_img: 126 | if dataset.mode == 'images': 127 | cv2.imwrite(save_path, im0) 128 | else: 129 | if vid_path != save_path: # new video 130 | vid_path = save_path 131 | if isinstance(vid_writer, cv2.VideoWriter): 132 | vid_writer.release() # release previous video writer 133 | 134 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 135 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 136 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 137 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 138 | vid_writer.write(im0) 139 | 140 | if save_txt or save_img: 141 | print('Results saved to %s' % os.getcwd() + os.sep + out) 142 | if platform == 'darwin': # MacOS 143 | os.system('open ' + out + ' ' + save_path) 144 | 145 | print('Done. (%.3fs)' % (time.time() - t0)) 146 | 147 | 148 | if __name__ == '__main__': 149 | parser = argparse.ArgumentParser() 150 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') 151 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 152 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 153 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam 154 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder 155 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 156 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') 157 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 158 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)') 159 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference') 160 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 161 | parser.add_argument('--view-img', action='store_true', help='display results') 162 | opt = parser.parse_args() 163 | print(opt) 164 | 165 | with torch.no_grad(): 166 | detect() 167 | -------------------------------------------------------------------------------- /github_files/64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/github_files/64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg -------------------------------------------------------------------------------- /normal_prune.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import torch 4 | import numpy as np 5 | from copy import deepcopy 6 | from test import test 7 | from terminaltables import AsciiTable 8 | import time 9 | from utils.utils import * 10 | from utils.prune_utils import * 11 | import os 12 | 13 | 14 | class opt(): 15 | model_def = "cfg/yolov3-hand.cfg" 16 | data_config = "cfg/oxfordhand.data" 17 | model = 'weights/last.pt' 18 | 19 | #指定GPU 20 | #torch.cuda.set_device(2) 21 | percent = 0.5 22 | 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | model = Darknet(opt.model_def).to(device) 25 | 26 | if opt.model: 27 | if opt.model.endswith(".pt"): 28 | model.load_state_dict(torch.load(opt.model, map_location=device)['model']) 29 | else: 30 | _ = load_darknet_weights(model, opt.model) 31 | 32 | 33 | data_config = parse_data_cfg(opt.data_config) 34 | 35 | valid_path = data_config["valid"] 36 | class_names = load_classes(data_config["names"]) 37 | 38 | 39 | eval_model = lambda model:test(model=model,cfg=opt.model_def, data=opt.data_config) 40 | 41 | 42 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 43 | 44 | #这个不应该注释掉,等会要恢复 45 | with torch.no_grad(): 46 | origin_model_metric = eval_model(model) 47 | origin_nparameters = obtain_num_parameters(model) 48 | 49 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs) 50 | 51 | 52 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表 53 | bn_weights = gather_bn_weights(model.module_list, prune_idx) 54 | 55 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引 56 | sorted_bn = torch.sort(bn_weights)[0] 57 | 58 | 59 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限) 60 | highest_thre = [] 61 | for idx in prune_idx: 62 | #.item()可以得到张量里的元素值 63 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item()) 64 | highest_thre = min(highest_thre) 65 | 66 | # 找到highest_thre对应的下标对应的百分比 67 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights) 68 | 69 | print(f'Threshold should be less than {highest_thre:.4f}.') 70 | print(f'The corresponding prune ratio is {percent_limit:.3f}.') 71 | 72 | 73 | # 该函数有很重要的意义: 74 | # ①先用深拷贝将原始模型拷贝下来,得到model_copy 75 | # ②将model_copy中,BN层中低于阈值的α参数赋值为0 76 | # ③在BN层中,输出y=α*x+β,由于α参数的值被赋值为0,因此输入仅加了一个偏置β 77 | # ④很神奇的是,network slimming中是将α参数和β参数都置0,该处只将α参数置0,但效果却很好:其实在另外一篇论文中,已经提到,可以先将β参数的效果移到 78 | # 下一层卷积层,再去剪掉本层的α参数 79 | 80 | # 该函数用最简单的方法,让我们看到了,如何快速看到剪枝后的效果 81 | 82 | 83 | 84 | def prune_and_eval(model, sorted_bn, percent=.0): 85 | model_copy = deepcopy(model) 86 | thre_index = int(len(sorted_bn) * percent) 87 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉 88 | thre = sorted_bn[thre_index] 89 | 90 | print(f'Channels with Gamma value less than {thre:.4f} are pruned!') 91 | 92 | remain_num = 0 93 | for idx in prune_idx: 94 | 95 | bn_module = model_copy.module_list[idx][1] 96 | 97 | mask = obtain_bn_mask(bn_module, thre) 98 | 99 | remain_num += int(mask.sum()) 100 | bn_module.weight.data.mul_(mask) 101 | with torch.no_grad(): 102 | mAP = eval_model(model_copy)[1].mean() 103 | 104 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}') 105 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}') 106 | print(f'mAP of the pruned model is {mAP:.4f}') 107 | 108 | return thre 109 | 110 | 111 | threshold = prune_and_eval(model, sorted_bn, percent) 112 | 113 | 114 | 115 | #**************************************************************** 116 | #虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型 117 | 118 | 119 | #%% 120 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx): 121 | 122 | pruned = 0 123 | total = 0 124 | num_filters = [] 125 | filters_mask = [] 126 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的) 127 | for idx in CBL_idx: 128 | bn_module = model.module_list[idx][1] 129 | if idx in prune_idx: 130 | 131 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy() 132 | remain = int(mask.sum()) 133 | pruned = pruned + mask.shape[0] - remain 134 | 135 | if remain == 0: 136 | print("Channels would be all pruned!") 137 | raise Exception 138 | 139 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 140 | f'remaining channel: {remain:>4d}') 141 | else: 142 | mask = np.ones(bn_module.weight.data.shape) 143 | remain = mask.shape[0] 144 | 145 | total += mask.shape[0] 146 | num_filters.append(remain) 147 | filters_mask.append(mask.copy()) 148 | 149 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数 150 | prune_ratio = pruned / total 151 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}') 152 | 153 | return num_filters, filters_mask 154 | 155 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx) 156 | 157 | 158 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask 159 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)} 160 | 161 | pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask) 162 | 163 | 164 | 165 | 166 | with torch.no_grad(): 167 | mAP = eval_model(pruned_model)[1].mean() 168 | print('after prune_model_keep_size map is {}'.format(mAP)) 169 | 170 | 171 | #获得原始模型的module_defs,并修改该defs中的卷积核数量 172 | compact_module_defs = deepcopy(model.module_defs) 173 | for idx, num in zip(CBL_idx, num_filters): 174 | assert compact_module_defs[idx]['type'] == 'convolutional' 175 | compact_module_defs[idx]['filters'] = str(num) 176 | 177 | 178 | 179 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device) 180 | compact_nparameters = obtain_num_parameters(compact_model) 181 | 182 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 183 | 184 | 185 | random_input = torch.rand((16, 3, 416, 416)).to(device) 186 | 187 | def obtain_avg_forward_time(input, model, repeat=200): 188 | 189 | model.eval() 190 | start = time.time() 191 | with torch.no_grad(): 192 | for i in range(repeat): 193 | output = model(input) 194 | avg_infer_time = (time.time() - start) / repeat 195 | 196 | return avg_infer_time, output 197 | 198 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 199 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 200 | 201 | 202 | 203 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量 204 | with torch.no_grad(): 205 | compact_model_metric = eval_model(compact_model) 206 | 207 | 208 | # 比较剪枝前后参数数量的变化、指标性能的变化 209 | metric_table = [ 210 | ["Metric", "Before", "After"], 211 | ["mAP", f'{origin_model_metric[1].mean():.6f}', f'{compact_model_metric[1].mean():.6f}'], 212 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 213 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 214 | ] 215 | print(AsciiTable(metric_table).table) 216 | 217 | 218 | 219 | # 生成剪枝后的cfg文件并保存模型 220 | pruned_cfg_name = opt.model_def.replace('/', f'/prune_{percent}_') 221 | 222 | #由于原始的compact_module_defs将anchor从字符串变为了数组,因此这里将anchors重新变为字符串 223 | 224 | for item in compact_module_defs: 225 | if item['type']=='yolo': 226 | item['anchors']='10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326' 227 | 228 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 229 | print(f'Config file has been saved: {pruned_cfg_file}') 230 | 231 | #compact_model_name = opt.model.replace('/', f'/prune_{percent}_') 232 | compact_model_name = 'weights/yolov3_hand_normal_pruning_'+str(percent)+'percent.weights' 233 | 234 | save_weights(compact_model, path=compact_model_name) 235 | print(f'Compact model has been saved: {compact_model_name}') 236 | 237 | 238 | 239 | -------------------------------------------------------------------------------- /prune_tiny_yolo.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import torch 4 | import numpy as np 5 | from copy import deepcopy 6 | from test import test 7 | from terminaltables import AsciiTable 8 | import time 9 | import os 10 | from utils.tiny_prune_utils import * 11 | 12 | class opt(): 13 | model_def = "cfg/yolov3-tiny-hand.cfg" 14 | data_config = "cfg/oxfordhand.data" 15 | model = 'weights/last.pt' 16 | 17 | percent = 0.3 18 | 19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 20 | model = Darknet(opt.model_def).to(device) 21 | 22 | if opt.model: 23 | if opt.model.endswith(".pt"): 24 | model.load_state_dict(torch.load(opt.model, map_location=device)['model']) 25 | else: 26 | _ = load_darknet_weights(model, opt.model) 27 | 28 | data_config = parse_data_cfg(opt.data_config) 29 | 30 | valid_path = data_config["valid"] 31 | class_names = load_classes(data_config["names"]) 32 | 33 | eval_model = lambda model:test(model=model,cfg=opt.model_def, data=opt.data_config) 34 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 35 | 36 | #这个不应该注释掉,等会要恢复 37 | with torch.no_grad(): 38 | origin_model_metric = eval_model(model) 39 | origin_nparameters = obtain_num_parameters(model) 40 | 41 | 42 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs) 43 | 44 | 45 | 46 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表 47 | bn_weights = gather_bn_weights(model.module_list, prune_idx) 48 | 49 | 50 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引 51 | sorted_bn = torch.sort(bn_weights)[0] 52 | 53 | 54 | 55 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限) 56 | highest_thre = [] 57 | for idx in prune_idx: 58 | #.item()可以得到张量里的元素值 59 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item()) 60 | highest_thre = min(highest_thre) 61 | 62 | # 找到highest_thre对应的下标对应的百分比 63 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights) 64 | 65 | print(f'Threshold should be less than {highest_thre:.4f}.') 66 | print(f'The corresponding prune ratio is {percent_limit:.3f}.') 67 | 68 | 69 | 70 | 71 | # 该函数有很重要的意义: 72 | # ①先用深拷贝将原始模型拷贝下来,得到model_copy 73 | # ②将model_copy中,BN层中低于阈值的α参数赋值为0 74 | # ③在BN层中,输出y=α*x+β,由于α参数的值被赋值为0,因此输入仅加了一个偏置β 75 | # ④很神奇的是,network slimming中是将α参数和β参数都置0,该处只将α参数置0,但效果却很好:其实在另外一篇论文中,已经提到,可以先将β参数的效果移到 76 | # 下一层卷积层,再去剪掉本层的α参数 77 | 78 | # 该函数用最简单的方法,让我们看到了,如何快速看到剪枝后的效果 79 | 80 | 81 | 82 | def prune_and_eval(model, sorted_bn, percent=.0): 83 | model_copy = deepcopy(model) 84 | thre_index = int(len(sorted_bn) * percent) 85 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉 86 | thre = sorted_bn[thre_index] 87 | 88 | print(f'Channels with Gamma value less than {thre:.4f} are pruned!') 89 | 90 | remain_num = 0 91 | for idx in prune_idx: 92 | 93 | bn_module = model_copy.module_list[idx][1] 94 | 95 | mask = obtain_bn_mask(bn_module, thre) 96 | 97 | remain_num += int(mask.sum()) 98 | bn_module.weight.data.mul_(mask) 99 | 100 | with torch.no_grad(): 101 | mAP = eval_model(model_copy)[1].mean() 102 | 103 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}') 104 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}') 105 | print(f'mAP of the pruned model is {mAP:.4f}') 106 | 107 | return thre 108 | 109 | 110 | threshold = prune_and_eval(model, sorted_bn, percent) 111 | 112 | 113 | 114 | # **************************************************************** 115 | # 虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型 116 | 117 | 118 | 119 | 120 | 121 | 122 | #%% 123 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx): 124 | 125 | pruned = 0 126 | total = 0 127 | num_filters = [] 128 | filters_mask = [] 129 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的) 130 | for idx in CBL_idx: 131 | bn_module = model.module_list[idx][1] 132 | if idx in prune_idx: 133 | 134 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy() 135 | remain = int(mask.sum()) 136 | pruned = pruned + mask.shape[0] - remain 137 | 138 | if remain == 0: 139 | print("Channels would be all pruned!") 140 | raise Exception 141 | 142 | # print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 143 | # f'remaining channel: {remain:>4d}') 144 | else: 145 | mask = np.ones(bn_module.weight.data.shape) 146 | remain = mask.shape[0] 147 | 148 | total += mask.shape[0] 149 | num_filters.append(remain) 150 | filters_mask.append(mask.copy()) 151 | 152 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数 153 | prune_ratio = pruned / total 154 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}') 155 | 156 | return num_filters, filters_mask 157 | 158 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx) 159 | 160 | 161 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask 162 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)} 163 | 164 | 165 | 166 | pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask) 167 | 168 | 169 | 170 | 171 | with torch.no_grad(): 172 | mAP = eval_model(pruned_model)[1].mean() 173 | print('after prune_model_keep_size map is {}'.format(mAP)) 174 | 175 | 176 | 177 | 178 | #%% 179 | 180 | #获得原始模型的module_defs,并修改该defs中的卷积核数量 181 | compact_module_defs = deepcopy(model.module_defs) 182 | for idx, num in zip(CBL_idx, num_filters): 183 | assert compact_module_defs[idx]['type'] == 'convolutional' 184 | compact_module_defs[idx]['filters'] = str(num) 185 | 186 | 187 | 188 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device) 189 | compact_nparameters = obtain_num_parameters(compact_model) 190 | 191 | 192 | def get_input_mask2(module_defs, idx, CBLidx2mask): 193 | 194 | if idx == 0: 195 | #如果是一层卷积层,它的上一通道mask为3(因为图像为三通道) 196 | return np.ones(3) 197 | if idx<=12: 198 | if module_defs[idx - 2]['type'] == 'convolutional': 199 | return CBLidx2mask[idx - 2] 200 | 201 | else: 202 | if module_defs[idx - 1]['type'] == 'convolutional': 203 | return CBLidx2mask[idx - 1] 204 | elif module_defs[idx - 1]['type'] == 'shortcut': 205 | return CBLidx2mask[idx - 2] 206 | elif module_defs[idx - 1]['type'] == 'route': 207 | route_in_idxs = [] 208 | for layer_i in module_defs[idx - 1]['layers'].split(","): 209 | if int(layer_i) < 0: 210 | route_in_idxs.append(idx - 1 + int(layer_i)) 211 | else: 212 | route_in_idxs.append(int(layer_i)) 213 | if len(route_in_idxs) == 1: 214 | return CBLidx2mask[route_in_idxs[0]] 215 | 216 | elif len(route_in_idxs)==2: 217 | return np.concatenate([CBLidx2mask[route_in_idxs[0]-1],CBLidx2mask[route_in_idxs[1]]]) 218 | 219 | else: 220 | print("Something wrong with route module!") 221 | raise Exception 222 | 223 | def init_weights_from_loose_model2(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask): 224 | #compact_model新模型,loose_model旧模型 225 | for idx in CBL_idx: 226 | compact_CBL = compact_model.module_list[idx] 227 | loose_CBL = loose_model.module_list[idx] 228 | #np.argwhere返回非零元素的索引 229 | 230 | #[3, 4, 7, 8, 9, 10, 11, 12, 13, 16, 19, 22, 23, 24, 26, 30, 31] 231 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 232 | 233 | 234 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1] 235 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone() 236 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone() 237 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone() 238 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone() 239 | 240 | 241 | input_mask = get_input_mask2(loose_model.module_defs, idx, CBLidx2mask) 242 | 243 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 244 | 245 | 246 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 247 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 248 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 249 | 250 | for idx in Conv_idx: 251 | compact_conv = compact_model.module_list[idx][0] 252 | loose_conv = loose_model.module_list[idx][0] 253 | 254 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 255 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 256 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 257 | compact_conv.bias.data = loose_conv.bias.data.clone() 258 | 259 | 260 | init_weights_from_loose_model2(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 261 | 262 | 263 | random_input = torch.rand((1, 3, 416, 416)).to(device) 264 | 265 | def obtain_avg_forward_time(input, model, repeat=200): 266 | 267 | model.eval() 268 | start = time.time() 269 | with torch.no_grad(): 270 | for i in range(repeat): 271 | output = model(input) 272 | avg_infer_time = (time.time() - start) / repeat 273 | 274 | return avg_infer_time, output 275 | 276 | 277 | 278 | 279 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 280 | 281 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 282 | 283 | 284 | 285 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量 286 | with torch.no_grad(): 287 | compact_model_metric = eval_model(compact_model) 288 | 289 | #%% 290 | # 比较剪枝前后参数数量的变化、指标性能的变化 291 | metric_table = [ 292 | ["Metric", "Before", "After"], 293 | ["mAP", f'{origin_model_metric[1].mean():.6f}', f'{compact_model_metric[1].mean():.6f}'], 294 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 295 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 296 | ] 297 | print(AsciiTable(metric_table).table) 298 | 299 | #%% 300 | # 生成剪枝后的cfg文件并保存模型 301 | pruned_cfg_name = opt.model_def.replace('/', f'/prune_{percent}_') 302 | #由于原始的compact_module_defs将anchor从字符串变为了数组,因此这里将anchors重新变为字符串 303 | for item in compact_module_defs: 304 | if item['type']=='yolo': 305 | item['anchors']='10,14, 23,27, 37,58, 81,82, 135,169, 344,319' 306 | 307 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 308 | print(f'Config file has been saved: {pruned_cfg_file}') 309 | 310 | 311 | compact_model_name = 'weights/yolov3_tiny_hand_pruning_'+str(percent)+'percent.weights' 312 | 313 | save_weights(compact_model, path=compact_model_name) 314 | print(f'Compact model has been saved: {compact_model_name}') 315 | 316 | -------------------------------------------------------------------------------- /quant_dorefa.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | import torch 4 | import torch.nn as nn 5 | import numpy as np 6 | from torch.autograd import Function 7 | import torch.nn.functional as F 8 | 9 | 10 | 11 | 12 | class ScaleSigner(Function): 13 | """take a real value x, output sign(x)*E(|x|)""" 14 | @staticmethod 15 | def forward(ctx, input): 16 | return torch.sign(input) * torch.mean(torch.abs(input)) 17 | 18 | @staticmethod 19 | def backward(ctx, grad_output): 20 | return grad_output 21 | 22 | 23 | def scale_sign(input): 24 | return ScaleSigner.apply(input) 25 | 26 | 27 | #真正起作用的量化函数 28 | class Quantizer(Function): 29 | @staticmethod 30 | def forward(ctx, input, nbit): 31 | scale = 2 ** nbit - 1 32 | return torch.round(input * scale) / scale 33 | 34 | @staticmethod 35 | def backward(ctx, grad_output): 36 | return grad_output, None 37 | 38 | 39 | def quantize(input, nbit): 40 | return Quantizer.apply(input, nbit) 41 | 42 | 43 | def dorefa_w(w, nbit_w): 44 | if nbit_w == 1: 45 | w = scale_sign(w) 46 | else: 47 | w = torch.tanh(w) 48 | #将权重限制在[0,1]之间 49 | w = w / (2 * torch.max(torch.abs(w))) + 0.5 50 | #权重量化 51 | w = 2 * quantize(w, nbit_w) - 1 52 | 53 | return w 54 | 55 | 56 | def dorefa_a(input, nbit_a): 57 | return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) 58 | 59 | 60 | class QuanConv(nn.Conv2d): 61 | """docstring for QuanConv""" 62 | def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=32, 63 | nbit_a=32, stride=1, 64 | padding=0, dilation=1, groups=1, 65 | bias=True): 66 | super(QuanConv, self).__init__( 67 | in_channels, out_channels, kernel_size, stride, padding, dilation, 68 | groups, bias) 69 | self.nbit_w = nbit_w 70 | self.nbit_a = nbit_a 71 | name_w_dict = {'dorefa': dorefa_w} 72 | name_a_dict = {'dorefa': dorefa_a} 73 | self.quan_w = name_w_dict[quan_name_w] 74 | self.quan_a = name_a_dict[quan_name_a] 75 | 76 | # @weak_script_method 77 | def forward(self, input): 78 | if self.nbit_w <=32: 79 | #量化卷积 80 | w = self.quan_w(self.weight, self.nbit_w) 81 | else: 82 | #卷积保持不变 83 | w = self.weight 84 | 85 | if self.nbit_a <=32: 86 | #量化激活 87 | x = self.quan_a(input, self.nbit_a) 88 | else: 89 | #激活保持不变 90 | x = input 91 | # print('x unique',np.unique(x.detach().numpy()).shape) 92 | # print('w unique',np.unique(w.detach().numpy()).shape) 93 | 94 | #做真正的卷积运算 95 | 96 | output = F.conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) 97 | 98 | return output 99 | 100 | class Linear_Q(nn.Linear): 101 | def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=32, nbit_a=32): 102 | super(Linear_Q, self).__init__(in_features, out_features, bias) 103 | self.nbit_w = nbit_w 104 | self.nbit_a = nbit_a 105 | name_w_dict = {'dorefa': dorefa_w} 106 | name_a_dict = {'dorefa': dorefa_a} 107 | self.quan_w = name_w_dict[quan_name_w] 108 | self.quan_a = name_a_dict[quan_name_a] 109 | 110 | # @weak_script_method 111 | def forward(self, input): 112 | if self.nbit_w < 32: 113 | w = self.quan_w(self.weight, self.nbit_w) 114 | else: 115 | w = self.weight 116 | 117 | if self.nbit_a < 32: 118 | x = self.quan_a(input, self.nbit_a) 119 | else: 120 | x = input 121 | 122 | # print('x unique',np.unique(x.detach().numpy())) 123 | # print('w unique',np.unique(w.detach().numpy())) 124 | 125 | output = F.linear(x, w, self.bias) 126 | 127 | return output 128 | 129 | 130 | -------------------------------------------------------------------------------- /regular_prune.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import torch 4 | import numpy as np 5 | from copy import deepcopy 6 | from test import test 7 | from terminaltables import AsciiTable 8 | import time 9 | from utils.utils import * 10 | from utils.prune_utils import * 11 | import os 12 | 13 | #规整剪枝 14 | class opt(): 15 | model_def = "cfg/yolov3-hand.cfg" 16 | data_config = "cfg/oxfordhand.data" 17 | model = 'weights/last.pt' 18 | 19 | #指定GPU 20 | # torch.cuda.set_device(2) 21 | percent = 0.5 22 | filter_switch=[8,16,32,64,128,256,512,1024] 23 | 24 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 25 | model = Darknet(opt.model_def).to(device) 26 | 27 | if opt.model: 28 | if opt.model.endswith(".pt"): 29 | model.load_state_dict(torch.load(opt.model, map_location=device)['model']) 30 | else: 31 | _ = load_darknet_weights(model, opt.model) 32 | 33 | 34 | 35 | data_config = parse_data_cfg(opt.data_config) 36 | 37 | valid_path = data_config["valid"] 38 | class_names = load_classes(data_config["names"]) 39 | 40 | eval_model = lambda model:test(model=model,cfg=opt.model_def, data=opt.data_config) 41 | 42 | 43 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 44 | 45 | #这个不应该注释掉,等会要恢复 46 | with torch.no_grad(): 47 | origin_model_metric = eval_model(model) 48 | origin_nparameters = obtain_num_parameters(model) 49 | 50 | 51 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs) 52 | 53 | 54 | 55 | 56 | 57 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表 58 | bn_weights = gather_bn_weights(model.module_list, prune_idx) 59 | 60 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引 61 | sorted_bn = torch.sort(bn_weights)[0] 62 | 63 | 64 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限) 65 | highest_thre = [] 66 | for idx in prune_idx: 67 | #.item()可以得到张量里的元素值 68 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item()) 69 | highest_thre = min(highest_thre) 70 | 71 | # 找到highest_thre对应的下标对应的百分比 72 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights) 73 | 74 | print(f'Threshold should be less than {highest_thre:.4f}.') 75 | print(f'The corresponding prune ratio is {percent_limit:.3f}.') 76 | 77 | 78 | 79 | 80 | 81 | # 该函数有很重要的意义: 82 | # ①先用深拷贝将原始模型拷贝下来,得到model_copy 83 | # ②将model_copy中,BN层中低于阈值的α参数赋值为0 84 | # ③在BN层中,输出y=α*x+β,由于α参数的值被赋值为0,因此输入仅加了一个偏置β 85 | # ④很神奇的是,network slimming中是将α参数和β参数都置0,该处只将α参数置0,但效果却很好:其实在另外一篇论文中,已经提到,可以先将β参数的效果移到 86 | # 下一层卷积层,再去剪掉本层的α参数 87 | 88 | # 该函数用最简单的方法,让我们看到了,如何快速看到剪枝后的效果 89 | 90 | 91 | 92 | def prune_and_eval(model, sorted_bn, percent=.0): 93 | model_copy = deepcopy(model) 94 | thre_index = int(len(sorted_bn) * percent) 95 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉 96 | thre = sorted_bn[thre_index] 97 | 98 | print(f'Channels with Gamma value less than {thre:.4f} are pruned!') 99 | 100 | remain_num = 0 101 | for idx in prune_idx: 102 | 103 | bn_module = model_copy.module_list[idx][1] 104 | 105 | mask = obtain_bn_mask(bn_module, thre) 106 | mask_cnt=int(mask.sum()) 107 | if mask_cnt==0: 108 | this_layer_sort_bn=bn_module.weight.data.abs().clone() 109 | sort_bn_values= torch.sort(this_layer_sort_bn)[0] 110 | bn_cnt=bn_module.weight.shape[0] 111 | this_layer_thre=sort_bn_values[bn_cnt-8] 112 | mask = obtain_bn_mask(bn_module, this_layer_thre) 113 | else: 114 | for i in range(len(filter_switch)): 115 | if mask_cnt<=filter_switch[i]: 116 | mask_cnt=filter_switch[i] 117 | break 118 | this_layer_sort_bn=bn_module.weight.data.abs().clone() 119 | sort_bn_values= torch.sort(this_layer_sort_bn)[0] 120 | bn_cnt=bn_module.weight.shape[0] 121 | this_layer_thre=sort_bn_values[bn_cnt-mask_cnt] 122 | mask = obtain_bn_mask(bn_module, this_layer_thre) 123 | 124 | 125 | remain_num += int(mask.sum()) 126 | bn_module.weight.data.mul_(mask) 127 | 128 | with torch.no_grad(): 129 | mAP = eval_model(model_copy)[1].mean() 130 | 131 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}') 132 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}') 133 | print(f'mAP of the pruned model is {mAP:.4f}') 134 | 135 | return thre 136 | 137 | 138 | threshold = prune_and_eval(model, sorted_bn, percent) 139 | 140 | 141 | 142 | #**************************************************************** 143 | #虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型 144 | 145 | 146 | 147 | 148 | 149 | 150 | #%% 151 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx): 152 | 153 | pruned = 0 154 | total = 0 155 | num_filters = [] 156 | filters_mask = [] 157 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的) 158 | for idx in CBL_idx: 159 | bn_module = model.module_list[idx][1] 160 | if idx in prune_idx: 161 | 162 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy() 163 | 164 | mask_cnt=int(mask.sum()) 165 | 166 | if mask_cnt==0: 167 | this_layer_sort_bn=bn_module.weight.data.abs().clone() 168 | sort_bn_values= torch.sort(this_layer_sort_bn)[0] 169 | bn_cnt=bn_module.weight.shape[0] 170 | this_layer_thre=sort_bn_values[bn_cnt-8] 171 | mask = obtain_bn_mask(bn_module, this_layer_thre).cpu().numpy() 172 | 173 | else: 174 | for i in range(len(filter_switch)): 175 | if mask_cnt<=filter_switch[i]: 176 | mask_cnt=filter_switch[i] 177 | break 178 | this_layer_sort_bn=bn_module.weight.data.abs().clone() 179 | sort_bn_values= torch.sort(this_layer_sort_bn)[0] 180 | bn_cnt=bn_module.weight.shape[0] 181 | this_layer_thre=sort_bn_values[bn_cnt-mask_cnt] 182 | mask = obtain_bn_mask(bn_module, this_layer_thre).cpu().numpy() 183 | 184 | remain = int(mask.sum()) 185 | pruned = pruned + mask.shape[0] - remain 186 | 187 | 188 | if remain == 0: 189 | print("Channels would be all pruned!") 190 | raise Exception 191 | 192 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 193 | f'remaining channel: {remain:>4d}') 194 | else: 195 | mask = np.ones(bn_module.weight.data.shape) 196 | remain = mask.shape[0] 197 | 198 | total += mask.shape[0] 199 | num_filters.append(remain) 200 | filters_mask.append(mask.copy()) 201 | 202 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数 203 | prune_ratio = pruned / total 204 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}') 205 | 206 | return num_filters, filters_mask 207 | 208 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx) 209 | 210 | 211 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask 212 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)} 213 | 214 | pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask) 215 | 216 | 217 | 218 | with torch.no_grad(): 219 | mAP = eval_model(pruned_model)[1].mean() 220 | print('after prune_model_keep_size map is {}'.format(mAP)) 221 | 222 | 223 | #获得原始模型的module_defs,并修改该defs中的卷积核数量 224 | compact_module_defs = deepcopy(model.module_defs) 225 | for idx, num in zip(CBL_idx, num_filters): 226 | assert compact_module_defs[idx]['type'] == 'convolutional' 227 | compact_module_defs[idx]['filters'] = str(num) 228 | 229 | 230 | 231 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device) 232 | compact_nparameters = obtain_num_parameters(compact_model) 233 | 234 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 235 | 236 | 237 | random_input = torch.rand((16, 3, 416, 416)).to(device) 238 | 239 | def obtain_avg_forward_time(input, model, repeat=200): 240 | 241 | model.eval() 242 | start = time.time() 243 | with torch.no_grad(): 244 | for i in range(repeat): 245 | output = model(input) 246 | avg_infer_time = (time.time() - start) / repeat 247 | 248 | return avg_infer_time, output 249 | 250 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 251 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 252 | 253 | 254 | 255 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量 256 | with torch.no_grad(): 257 | compact_model_metric = eval_model(compact_model) 258 | 259 | 260 | # 比较剪枝前后参数数量的变化、指标性能的变化 261 | metric_table = [ 262 | ["Metric", "Before", "After"], 263 | ["mAP", f'{origin_model_metric[1].mean():.6f}', f'{compact_model_metric[1].mean():.6f}'], 264 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 265 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 266 | ] 267 | print(AsciiTable(metric_table).table) 268 | 269 | 270 | 271 | # 生成剪枝后的cfg文件并保存模型 272 | pruned_cfg_name = opt.model_def.replace('/', f'/prune_{percent}_') 273 | for item in compact_module_defs: 274 | if item['type']=='yolo': 275 | item['anchors']='10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326' 276 | 277 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 278 | print(f'Config file has been saved: {pruned_cfg_file}') 279 | 280 | compact_model_name = 'weights/yolov3_hand_regular_pruning_'+str(percent)+'percent.weights' 281 | 282 | save_weights(compact_model, path=compact_model_name) 283 | print(f'Compact model has been saved: {compact_model_name}') 284 | 285 | 286 | 287 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip3 install -U -r requirements.txt 2 | numpy 3 | opencv-python 4 | torch >= 1.2 5 | matplotlib 6 | pycocotools 7 | tqdm 8 | tb-nightly 9 | future 10 | Pillow 11 | 12 | # Equivalent conda commands ---------------------------------------------------- 13 | # conda update -n base -c defaults conda 14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow 15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools 16 | # conda install -yc spyder-ide spyder-line-profiler 17 | # conda install -yc pytorch pytorch torchvision 18 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from torch.utils.data import DataLoader 5 | 6 | from models import * 7 | from utils.datasets import * 8 | from utils.utils import * 9 | 10 | 11 | 12 | def test(cfg, 13 | data, 14 | weights=None, 15 | batch_size=16, 16 | img_size=416, 17 | iou_thres=0.5, 18 | conf_thres=0.001, 19 | nms_thres=0.5, 20 | save_json=False, 21 | model=None): 22 | # Initialize/load model and set device 23 | if model is None: 24 | device = torch_utils.select_device(opt.device) 25 | verbose = True 26 | 27 | # Initialize model 28 | model = Darknet(cfg, img_size).to(device) 29 | #print(model) 30 | # Load weights 31 | #本身有,被我去掉了 32 | attempt_download(weights) 33 | if weights.endswith('.pt'): # pytorch format 34 | print('.pth is reading') 35 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 36 | 37 | 38 | else: # darknet format 39 | print('darknet weights is reading') 40 | _ = load_darknet_weights(model, weights) 41 | 42 | if torch.cuda.device_count() > 1: 43 | model = nn.DataParallel(model) 44 | else: 45 | device = next(model.parameters()).device # get model device 46 | verbose = False 47 | 48 | # Configure run 49 | data = parse_data_cfg(data) 50 | nc = int(data['classes']) # number of classes 51 | test_path = data['valid'] # path to test images 52 | names = load_classes(data['names']) # class names 53 | 54 | # Dataloader 55 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size) 56 | dataloader = DataLoader(dataset, 57 | batch_size=batch_size, 58 | num_workers=min([os.cpu_count(), batch_size, 16]), 59 | pin_memory=True, 60 | collate_fn=dataset.collate_fn) 61 | 62 | seen = 0 63 | model.eval() 64 | coco91class = coco80_to_coco91_class() 65 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') 66 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. 67 | loss = torch.zeros(3) 68 | jdict, stats, ap, ap_class = [], [], [], [] 69 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 70 | targets = targets.to(device) 71 | imgs = imgs.to(device) 72 | _, _, height, width = imgs.shape # batch size, channels, height, width 73 | 74 | # Plot images with bounding boxes 75 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'): 76 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') 77 | 78 | # Run model 79 | inf_out, train_out = model(imgs) # inference and training outputs 80 | 81 | # Compute loss 82 | if hasattr(model, 'hyp'): # if model has loss hyperparameters 83 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls 84 | 85 | # Run NMS 86 | output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) 87 | 88 | # Statistics per image 89 | for si, pred in enumerate(output): 90 | labels = targets[targets[:, 0] == si, 1:] 91 | nl = len(labels) 92 | tcls = labels[:, 0].tolist() if nl else [] # target class 93 | seen += 1 94 | 95 | if pred is None: 96 | if nl: 97 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) 98 | continue 99 | 100 | # Append to text file 101 | # with open('test.txt', 'a') as file: 102 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] 103 | 104 | # Append to pycocotools JSON dictionary 105 | if save_json: 106 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 107 | image_id = int(Path(paths[si]).stem.split('_')[-1]) 108 | box = pred[:, :4].clone() # xyxy 109 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape 110 | box = xyxy2xywh(box) # xywh 111 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 112 | for di, d in enumerate(pred): 113 | jdict.append({'image_id': image_id, 114 | 'category_id': coco91class[int(d[6])], 115 | 'bbox': [floatn(x, 3) for x in box[di]], 116 | 'score': floatn(d[4], 5)}) 117 | 118 | # Clip boxes to image bounds 119 | clip_coords(pred, (height, width)) 120 | 121 | # Assign all predictions as incorrect 122 | correct = [0] * len(pred) 123 | if nl: 124 | detected = [] 125 | tcls_tensor = labels[:, 0] 126 | 127 | # target boxes 128 | tbox = xywh2xyxy(labels[:, 1:5]) 129 | tbox[:, [0, 2]] *= width 130 | tbox[:, [1, 3]] *= height 131 | 132 | # Search for correct predictions 133 | for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): 134 | 135 | # Break if all targets already located in image 136 | if len(detected) == nl: 137 | break 138 | 139 | # Continue if predicted class not among image classes 140 | if pcls.item() not in tcls: 141 | continue 142 | 143 | # Best iou, index between pred and targets 144 | m = (pcls == tcls_tensor).nonzero().view(-1) 145 | iou, bi = bbox_iou(pbox, tbox[m]).max(0) 146 | 147 | # If iou > threshold and class is correct mark as correct 148 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]: 149 | correct[i] = 1 150 | detected.append(m[bi]) 151 | 152 | # Append statistics (correct, conf, pcls, tcls) 153 | stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) 154 | 155 | # Compute statistics 156 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy 157 | if len(stats): 158 | p, r, ap, f1, ap_class = ap_per_class(*stats) 159 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() 160 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 161 | else: 162 | nt = torch.zeros(1) 163 | 164 | # Print results 165 | pf = '%20s' + '%10.3g' * 6 # print format 166 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) 167 | 168 | # Print results per class 169 | if verbose and nc > 1 and len(stats): 170 | for i, c in enumerate(ap_class): 171 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) 172 | 173 | # Save JSON 174 | if save_json and map and len(jdict): 175 | try: 176 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files] 177 | with open('results.json', 'w') as file: 178 | json.dump(jdict, file) 179 | 180 | from pycocotools.coco import COCO 181 | from pycocotools.cocoeval import COCOeval 182 | 183 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 184 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api 185 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api 186 | 187 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 188 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images 189 | cocoEval.evaluate() 190 | cocoEval.accumulate() 191 | cocoEval.summarize() 192 | map = cocoEval.stats[1] # update mAP to pycocotools mAP 193 | except: 194 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.') 195 | 196 | # Return results 197 | maps = np.zeros(nc) + map 198 | for i, c in enumerate(ap_class): 199 | maps[c] = ap[i] 200 | return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps 201 | 202 | 203 | if __name__ == '__main__': 204 | parser = argparse.ArgumentParser(prog='test.py') 205 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') 206 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 207 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 208 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') 209 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 210 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') 211 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 212 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 213 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 214 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 215 | opt = parser.parse_args() 216 | print(opt) 217 | 218 | with torch.no_grad(): 219 | test(opt.cfg, 220 | opt.data, 221 | opt.weights, 222 | opt.batch_size, 223 | opt.img_size, 224 | opt.iou_thres, 225 | opt.conf_thres, 226 | opt.nms_thres, 227 | opt.save_json) 228 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__init__.py -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/datasets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/datasets.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/google_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/google_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parse_config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/parse_config.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/prune_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/prune_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/torch_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/adabound.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim import Optimizer 5 | 6 | 7 | class AdaBound(Optimizer): 8 | """Implements AdaBound algorithm. 9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 10 | Arguments: 11 | params (iterable): iterable of parameters to optimize or dicts defining 12 | parameter groups 13 | lr (float, optional): Adam learning rate (default: 1e-3) 14 | betas (Tuple[float, float], optional): coefficients used for computing 15 | running averages of gradient and its square (default: (0.9, 0.999)) 16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 18 | eps (float, optional): term added to the denominator to improve 19 | numerical stability (default: 1e-8) 20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 23 | https://openreview.net/forum?id=Bkg3g2R9FX 24 | """ 25 | 26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 27 | eps=1e-8, weight_decay=0, amsbound=False): 28 | if not 0.0 <= lr: 29 | raise ValueError("Invalid learning rate: {}".format(lr)) 30 | if not 0.0 <= eps: 31 | raise ValueError("Invalid epsilon value: {}".format(eps)) 32 | if not 0.0 <= betas[0] < 1.0: 33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 36 | if not 0.0 <= final_lr: 37 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 38 | if not 0.0 <= gamma < 1.0: 39 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 41 | weight_decay=weight_decay, amsbound=amsbound) 42 | super(AdaBound, self).__init__(params, defaults) 43 | 44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 45 | 46 | def __setstate__(self, state): 47 | super(AdaBound, self).__setstate__(state) 48 | for group in self.param_groups: 49 | group.setdefault('amsbound', False) 50 | 51 | def step(self, closure=None): 52 | """Performs a single optimization step. 53 | Arguments: 54 | closure (callable, optional): A closure that reevaluates the model 55 | and returns the loss. 56 | """ 57 | loss = None 58 | if closure is not None: 59 | loss = closure() 60 | 61 | for group, base_lr in zip(self.param_groups, self.base_lrs): 62 | for p in group['params']: 63 | if p.grad is None: 64 | continue 65 | grad = p.grad.data 66 | if grad.is_sparse: 67 | raise RuntimeError( 68 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 69 | amsbound = group['amsbound'] 70 | 71 | state = self.state[p] 72 | 73 | # State initialization 74 | if len(state) == 0: 75 | state['step'] = 0 76 | # Exponential moving average of gradient values 77 | state['exp_avg'] = torch.zeros_like(p.data) 78 | # Exponential moving average of squared gradient values 79 | state['exp_avg_sq'] = torch.zeros_like(p.data) 80 | if amsbound: 81 | # Maintains max of all exp. moving avg. of sq. grad. values 82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 83 | 84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 85 | if amsbound: 86 | max_exp_avg_sq = state['max_exp_avg_sq'] 87 | beta1, beta2 = group['betas'] 88 | 89 | state['step'] += 1 90 | 91 | if group['weight_decay'] != 0: 92 | grad = grad.add(group['weight_decay'], p.data) 93 | 94 | # Decay the first and second moment running average coefficient 95 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 97 | if amsbound: 98 | # Maintains the maximum of all 2nd moment running avg. till now 99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 100 | # Use the max. for normalizing running avg. of gradient 101 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 102 | else: 103 | denom = exp_avg_sq.sqrt().add_(group['eps']) 104 | 105 | bias_correction1 = 1 - beta1 ** state['step'] 106 | bias_correction2 = 1 - beta2 ** state['step'] 107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 108 | 109 | # Applies bounds on actual learning rate 110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 111 | final_lr = group['final_lr'] * group['lr'] / base_lr 112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 114 | step_size = torch.full_like(denom, step_size) 115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 116 | 117 | p.data.add_(-step_size) 118 | 119 | return loss 120 | 121 | 122 | class AdaBoundW(Optimizer): 123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) 124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 125 | Arguments: 126 | params (iterable): iterable of parameters to optimize or dicts defining 127 | parameter groups 128 | lr (float, optional): Adam learning rate (default: 1e-3) 129 | betas (Tuple[float, float], optional): coefficients used for computing 130 | running averages of gradient and its square (default: (0.9, 0.999)) 131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 133 | eps (float, optional): term added to the denominator to improve 134 | numerical stability (default: 1e-8) 135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 138 | https://openreview.net/forum?id=Bkg3g2R9FX 139 | """ 140 | 141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 142 | eps=1e-8, weight_decay=0, amsbound=False): 143 | if not 0.0 <= lr: 144 | raise ValueError("Invalid learning rate: {}".format(lr)) 145 | if not 0.0 <= eps: 146 | raise ValueError("Invalid epsilon value: {}".format(eps)) 147 | if not 0.0 <= betas[0] < 1.0: 148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 149 | if not 0.0 <= betas[1] < 1.0: 150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 151 | if not 0.0 <= final_lr: 152 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 153 | if not 0.0 <= gamma < 1.0: 154 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 156 | weight_decay=weight_decay, amsbound=amsbound) 157 | super(AdaBoundW, self).__init__(params, defaults) 158 | 159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 160 | 161 | def __setstate__(self, state): 162 | super(AdaBoundW, self).__setstate__(state) 163 | for group in self.param_groups: 164 | group.setdefault('amsbound', False) 165 | 166 | def step(self, closure=None): 167 | """Performs a single optimization step. 168 | Arguments: 169 | closure (callable, optional): A closure that reevaluates the model 170 | and returns the loss. 171 | """ 172 | loss = None 173 | if closure is not None: 174 | loss = closure() 175 | 176 | for group, base_lr in zip(self.param_groups, self.base_lrs): 177 | for p in group['params']: 178 | if p.grad is None: 179 | continue 180 | grad = p.grad.data 181 | if grad.is_sparse: 182 | raise RuntimeError( 183 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 184 | amsbound = group['amsbound'] 185 | 186 | state = self.state[p] 187 | 188 | # State initialization 189 | if len(state) == 0: 190 | state['step'] = 0 191 | # Exponential moving average of gradient values 192 | state['exp_avg'] = torch.zeros_like(p.data) 193 | # Exponential moving average of squared gradient values 194 | state['exp_avg_sq'] = torch.zeros_like(p.data) 195 | if amsbound: 196 | # Maintains max of all exp. moving avg. of sq. grad. values 197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 198 | 199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 200 | if amsbound: 201 | max_exp_avg_sq = state['max_exp_avg_sq'] 202 | beta1, beta2 = group['betas'] 203 | 204 | state['step'] += 1 205 | 206 | # Decay the first and second moment running average coefficient 207 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 209 | if amsbound: 210 | # Maintains the maximum of all 2nd moment running avg. till now 211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 212 | # Use the max. for normalizing running avg. of gradient 213 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 214 | else: 215 | denom = exp_avg_sq.sqrt().add_(group['eps']) 216 | 217 | bias_correction1 = 1 - beta1 ** state['step'] 218 | bias_correction2 = 1 - beta2 ** state['step'] 219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 220 | 221 | # Applies bounds on actual learning rate 222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 223 | final_lr = group['final_lr'] * group['lr'] / base_lr 224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 226 | step_size = torch.full_like(denom, step_size) 227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 228 | 229 | if group['weight_decay'] != 0: 230 | decayed_weights = torch.mul(p.data, group['weight_decay']) 231 | p.data.add_(-step_size) 232 | p.data.sub_(decayed_weights) 233 | else: 234 | p.data.add_(-step_size) 235 | 236 | return loss 237 | -------------------------------------------------------------------------------- /utils/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec 5 | git clone https://github.com/ultralytics/yolov3 6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd .. 7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex 8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | python3 -c " 11 | from yolov3.utils.google_utils import gdrive_download 12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" 13 | sudo shutdown 14 | 15 | # Re-clone 16 | rm -rf yolov3 # Warning: remove existing 17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master 18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch 19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1 20 | 21 | # Train 22 | python3 train.py 23 | 24 | # Resume 25 | python3 train.py --resume 26 | 27 | # Detect 28 | python3 detect.py 29 | 30 | # Test 31 | python3 test.py --save-json 32 | 33 | # Evolve 34 | for i in {0..500} 35 | do 36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4 37 | done 38 | 39 | # Git pull 40 | git pull https://github.com/ultralytics/yolov3 # master 41 | git pull https://github.com/ultralytics/yolov3 test # branch 42 | 43 | # Test Darknet training 44 | python3 test.py --weights ../darknet/backup/yolov3.backup 45 | 46 | # Copy last.pt TO bucket 47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics 48 | 49 | # Copy last.pt FROM bucket 50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt 51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt 52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt 53 | 54 | # Reproduce tutorials 55 | rm results*.txt # WARNING: removes existing results 56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt 57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt 58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt 59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt 60 | python3 -c "from utils import utils; utils.plot_results()" 61 | # gsutil cp results*.txt gs://ultralytics 62 | gsutil cp results.png gs://ultralytics 63 | sudo shutdown 64 | 65 | # Reproduce mAP 66 | python3 test.py --save-json --img-size 608 67 | python3 test.py --save-json --img-size 416 68 | python3 test.py --save-json --img-size 320 69 | sudo shutdown 70 | 71 | # Benchmark script 72 | git clone https://github.com/ultralytics/yolov3 # clone our repo 73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex 74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB) 75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min) 76 | 77 | # Unit tests 78 | python3 detect.py # detect 2 persons, 1 tie 79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8 80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs 81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs 82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs 83 | 84 | # AlexyAB Darknet 85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket 86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make 87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation 88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp 89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco 90 | 91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp 92 | gsutil cp -r backup/*5000.weights gs://sm6/weights 93 | sudo shutdown 94 | 95 | 96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny 97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume 98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics 99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test 100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket 101 | 102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test 103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test 104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test 105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test 106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test 107 | 108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown 109 | 110 | # Debug/Development 111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou 112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320 113 | 114 | gsutil cp evolve.txt gs://ultralytics 115 | sudo shutdown 116 | 117 | #Docker 118 | sudo docker kill $(sudo docker ps -q) 119 | sudo docker pull ultralytics/yolov3:v1 120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1 121 | 122 | clear 123 | while true 124 | do 125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1 126 | done 127 | 128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias 129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done 130 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | if os.path.exists(name): # remove existing 19 | os.remove(name) 20 | 21 | # Attempt large file download 22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id, 23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 24 | id, name), 25 | 'rm ./cookie'] 26 | [os.system(x) for x in s] # run commands 27 | 28 | # Attempt small file download 29 | if not os.path.exists(name): # file size < 40MB 30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id) 31 | os.system(s) 32 | 33 | # Unzip if archive 34 | if name.endswith('.zip'): 35 | print('unzipping... ', end='') 36 | os.system('unzip -q %s' % name) # unzip 37 | os.remove(name) # remove zip to free space 38 | 39 | print('Done (%.1fs)' % (time.time() - t)) 40 | 41 | 42 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 43 | # Uploads a file to a bucket 44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 45 | 46 | storage_client = storage.Client() 47 | bucket = storage_client.get_bucket(bucket_name) 48 | blob = bucket.blob(destination_blob_name) 49 | 50 | blob.upload_from_filename(source_file_name) 51 | 52 | print('File {} uploaded to {}.'.format( 53 | source_file_name, 54 | destination_blob_name)) 55 | 56 | 57 | def download_blob(bucket_name, source_blob_name, destination_file_name): 58 | # Uploads a blob from a bucket 59 | storage_client = storage.Client() 60 | bucket = storage_client.get_bucket(bucket_name) 61 | blob = bucket.blob(source_blob_name) 62 | 63 | blob.download_to_filename(destination_file_name) 64 | 65 | print('Blob {} downloaded to {}.'.format( 66 | source_blob_name, 67 | destination_file_name)) 68 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def parse_model_cfg(path): 5 | # Parses the yolo-v3 layer configuration file and returns module definitions 6 | file = open(path, 'r') 7 | lines = file.read().split('\n') 8 | lines = [x for x in lines if x and not x.startswith('#')] 9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 10 | mdefs = [] # module definitions 11 | for line in lines: 12 | if line.startswith('['): # This marks the start of a new block 13 | mdefs.append({}) 14 | mdefs[-1]['type'] = line[1:-1].rstrip() 15 | if mdefs[-1]['type'] == 'convolutional': 16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 17 | else: 18 | key, val = line.split("=") 19 | key = key.rstrip() 20 | 21 | if 'anchors' in key: 22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 23 | else: 24 | mdefs[-1][key] = val.strip() 25 | 26 | return mdefs 27 | 28 | 29 | def parse_data_cfg(path): 30 | # Parses the data configuration file 31 | options = dict() 32 | with open(path, 'r') as fp: 33 | lines = fp.readlines() 34 | 35 | for line in lines: 36 | line = line.strip() 37 | if line == '' or line.startswith('#'): 38 | continue 39 | key, val = line.split('=') 40 | options[key.strip()] = val.strip() 41 | 42 | return options 43 | -------------------------------------------------------------------------------- /utils/prune_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from terminaltables import AsciiTable 3 | from copy import deepcopy 4 | import numpy as np 5 | import torch.nn.functional as F 6 | 7 | 8 | def get_sr_flag(epoch, sr): 9 | # return epoch >= 5 and sr 10 | return sr 11 | 12 | def parse_module_defs3(module_defs): 13 | 14 | CBL_idx = [] 15 | Conv_idx = [] 16 | for i, module_def in enumerate(module_defs): 17 | if module_def['type'] == 'convolutional': 18 | if module_def['batch_normalize'] == '1': 19 | CBL_idx.append(i) 20 | else: 21 | Conv_idx.append(i) 22 | 23 | ignore_idx = set() 24 | 25 | ignore_idx.add(18) 26 | 27 | 28 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 29 | 30 | return CBL_idx, Conv_idx, prune_idx 31 | 32 | def parse_module_defs2(module_defs): 33 | 34 | CBL_idx = [] 35 | Conv_idx = [] 36 | shortcut_idx=dict() 37 | shortcut_all=set() 38 | for i, module_def in enumerate(module_defs): 39 | if module_def['type'] == 'convolutional': 40 | if module_def['batch_normalize'] == '1': 41 | CBL_idx.append(i) 42 | else: 43 | Conv_idx.append(i) 44 | 45 | ignore_idx = set() 46 | for i, module_def in enumerate(module_defs): 47 | if module_def['type'] == 'shortcut': 48 | identity_idx = (i + int(module_def['from'])) 49 | if module_defs[identity_idx]['type'] == 'convolutional': 50 | 51 | #ignore_idx.add(identity_idx) 52 | shortcut_idx[i-1]=identity_idx 53 | shortcut_all.add(identity_idx) 54 | elif module_defs[identity_idx]['type'] == 'shortcut': 55 | 56 | #ignore_idx.add(identity_idx - 1) 57 | shortcut_idx[i-1]=identity_idx-1 58 | shortcut_all.add(identity_idx-1) 59 | shortcut_all.add(i-1) 60 | #上采样层前的卷积层不裁剪 61 | ignore_idx.add(84) 62 | ignore_idx.add(96) 63 | 64 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 65 | 66 | return CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all 67 | 68 | def parse_module_defs(module_defs): 69 | 70 | CBL_idx = [] 71 | Conv_idx = [] 72 | for i, module_def in enumerate(module_defs): 73 | if module_def['type'] == 'convolutional': 74 | if module_def['batch_normalize'] == '1': 75 | CBL_idx.append(i) 76 | else: 77 | Conv_idx.append(i) 78 | ignore_idx = set() 79 | for i, module_def in enumerate(module_defs): 80 | if module_def['type'] == 'shortcut': 81 | ignore_idx.add(i-1) 82 | identity_idx = (i + int(module_def['from'])) 83 | if module_defs[identity_idx]['type'] == 'convolutional': 84 | ignore_idx.add(identity_idx) 85 | elif module_defs[identity_idx]['type'] == 'shortcut': 86 | ignore_idx.add(identity_idx - 1) 87 | #上采样层前的卷积层不裁剪 88 | ignore_idx.add(84) 89 | ignore_idx.add(96) 90 | 91 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 92 | 93 | return CBL_idx, Conv_idx, prune_idx 94 | 95 | 96 | def gather_bn_weights(module_list, prune_idx): 97 | 98 | size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx] 99 | 100 | bn_weights = torch.zeros(sum(size_list)) 101 | index = 0 102 | for idx, size in zip(prune_idx, size_list): 103 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone() 104 | index += size 105 | 106 | return bn_weights 107 | 108 | 109 | def write_cfg(cfg_file, module_defs): 110 | 111 | with open(cfg_file, 'w') as f: 112 | for module_def in module_defs: 113 | f.write(f"[{module_def['type']}]\n") 114 | for key, value in module_def.items(): 115 | if key != 'type': 116 | f.write(f"{key}={value}\n") 117 | f.write("\n") 118 | return cfg_file 119 | 120 | 121 | class BNOptimizer(): 122 | 123 | @staticmethod 124 | def updateBN(sr_flag, module_list, s, prune_idx): 125 | if sr_flag: 126 | for idx in prune_idx: 127 | # Squential(Conv, BN, Lrelu) 128 | bn_module = module_list[idx][1] 129 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1 130 | 131 | 132 | def obtain_quantiles(bn_weights, num_quantile=5): 133 | 134 | sorted_bn_weights, i = torch.sort(bn_weights) 135 | total = sorted_bn_weights.shape[0] 136 | quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1] 137 | print("\nBN weights quantile:") 138 | quantile_table = [ 139 | [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)], 140 | ["%.3f" % quantile for quantile in quantiles] 141 | ] 142 | print(AsciiTable(quantile_table).table) 143 | 144 | return quantiles 145 | 146 | 147 | def get_input_mask(module_defs, idx, CBLidx2mask): 148 | 149 | if idx == 0: 150 | return np.ones(3) 151 | 152 | if module_defs[idx - 1]['type'] == 'convolutional': 153 | return CBLidx2mask[idx - 1] 154 | elif module_defs[idx - 1]['type'] == 'shortcut': 155 | return CBLidx2mask[idx - 2] 156 | elif module_defs[idx - 1]['type'] == 'route': 157 | route_in_idxs = [] 158 | for layer_i in module_defs[idx - 1]['layers'].split(","): 159 | if int(layer_i) < 0: 160 | route_in_idxs.append(idx - 1 + int(layer_i)) 161 | else: 162 | route_in_idxs.append(int(layer_i)) 163 | if len(route_in_idxs) == 1: 164 | return CBLidx2mask[route_in_idxs[0]] 165 | elif len(route_in_idxs) == 2: 166 | return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs]) 167 | else: 168 | print("Something wrong with route module!") 169 | raise Exception 170 | 171 | 172 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask): 173 | 174 | for idx in CBL_idx: 175 | compact_CBL = compact_model.module_list[idx] 176 | loose_CBL = loose_model.module_list[idx] 177 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 178 | 179 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1] 180 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone() 181 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone() 182 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone() 183 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone() 184 | 185 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 186 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 187 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 188 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 189 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 190 | 191 | for idx in Conv_idx: 192 | compact_conv = compact_model.module_list[idx][0] 193 | loose_conv = loose_model.module_list[idx][0] 194 | 195 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 196 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 197 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 198 | compact_conv.bias.data = loose_conv.bias.data.clone() 199 | 200 | 201 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask): 202 | 203 | pruned_model = deepcopy(model) 204 | for idx in prune_idx: 205 | mask = torch.from_numpy(CBLidx2mask[idx]).cuda() 206 | bn_module = pruned_model.module_list[idx][1] 207 | 208 | bn_module.weight.data.mul_(mask) 209 | 210 | activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1) 211 | 212 | # 两个上采样层前的卷积层 213 | next_idx_list = [idx + 1] 214 | if idx == 79: 215 | next_idx_list.append(84) 216 | elif idx == 91: 217 | next_idx_list.append(96) 218 | 219 | for next_idx in next_idx_list: 220 | next_conv = pruned_model.module_list[next_idx][0] 221 | conv_sum = next_conv.weight.data.sum(dim=(2, 3)) 222 | offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1) 223 | if next_idx in CBL_idx: 224 | next_bn = pruned_model.module_list[next_idx][1] 225 | next_bn.running_mean.data.sub_(offset) 226 | else: 227 | #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias 228 | next_conv.bias.data.add_(offset) 229 | 230 | bn_module.bias.data.mul_(mask) 231 | 232 | return pruned_model 233 | 234 | 235 | def obtain_bn_mask(bn_module, thre): 236 | 237 | thre = thre.cuda() 238 | mask = bn_module.weight.data.abs().ge(thre).float() 239 | 240 | return mask 241 | -------------------------------------------------------------------------------- /utils/tiny_prune_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from terminaltables import AsciiTable 3 | from copy import deepcopy 4 | import numpy as np 5 | import torch.nn.functional as F 6 | 7 | 8 | def get_sr_flag(epoch, sr): 9 | # return epoch >= 5 and sr 10 | return sr 11 | 12 | 13 | def parse_module_defs(module_defs): 14 | 15 | CBL_idx = [] 16 | Conv_idx = [] 17 | for i, module_def in enumerate(module_defs): 18 | if module_def['type'] == 'convolutional': 19 | if module_def['batch_normalize'] == '1': 20 | CBL_idx.append(i) 21 | else: 22 | Conv_idx.append(i) 23 | 24 | ignore_idx = set() 25 | 26 | ignore_idx.add(18) 27 | 28 | 29 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 30 | 31 | return CBL_idx, Conv_idx, prune_idx 32 | 33 | 34 | def gather_bn_weights(module_list, prune_idx): 35 | 36 | size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx] 37 | 38 | bn_weights = torch.zeros(sum(size_list)) 39 | index = 0 40 | for idx, size in zip(prune_idx, size_list): 41 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone() 42 | index += size 43 | 44 | return bn_weights 45 | 46 | 47 | def write_cfg(cfg_file, module_defs): 48 | 49 | with open(cfg_file, 'w') as f: 50 | for module_def in module_defs: 51 | f.write(f"[{module_def['type']}]\n") 52 | for key, value in module_def.items(): 53 | if key != 'type': 54 | f.write(f"{key}={value}\n") 55 | f.write("\n") 56 | return cfg_file 57 | 58 | 59 | class BNOptimizer(): 60 | 61 | @staticmethod 62 | def updateBN(sr_flag, module_list, s, prune_idx): 63 | if sr_flag: 64 | for idx in prune_idx: 65 | # Squential(Conv, BN, Lrelu) 66 | bn_module = module_list[idx][1] 67 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1 68 | 69 | 70 | def obtain_quantiles(bn_weights, num_quantile=5): 71 | 72 | sorted_bn_weights, i = torch.sort(bn_weights) 73 | total = sorted_bn_weights.shape[0] 74 | quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1] 75 | print("\nBN weights quantile:") 76 | quantile_table = [ 77 | [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)], 78 | ["%.3f" % quantile for quantile in quantiles] 79 | ] 80 | print(AsciiTable(quantile_table).table) 81 | 82 | return quantiles 83 | 84 | 85 | def get_input_mask(module_defs, idx, CBLidx2mask): 86 | 87 | if idx == 0: 88 | return np.ones(3) 89 | 90 | if module_defs[idx - 1]['type'] == 'convolutional': 91 | return CBLidx2mask[idx - 1] 92 | elif module_defs[idx - 1]['type'] == 'shortcut': 93 | return CBLidx2mask[idx - 2] 94 | elif module_defs[idx - 1]['type'] == 'route': 95 | route_in_idxs = [] 96 | for layer_i in module_defs[idx - 1]['layers'].split(","): 97 | if int(layer_i) < 0: 98 | route_in_idxs.append(idx - 1 + int(layer_i)) 99 | else: 100 | route_in_idxs.append(int(layer_i)) 101 | if len(route_in_idxs) == 1: 102 | return CBLidx2mask[route_in_idxs[0]] 103 | elif len(route_in_idxs) == 2: 104 | return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs]) 105 | else: 106 | print("Something wrong with route module!") 107 | raise Exception 108 | 109 | 110 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask): 111 | 112 | for idx in CBL_idx: 113 | compact_CBL = compact_model.module_list[idx] 114 | loose_CBL = loose_model.module_list[idx] 115 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 116 | 117 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1] 118 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone() 119 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone() 120 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone() 121 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone() 122 | 123 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 124 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 125 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 126 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 127 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 128 | 129 | for idx in Conv_idx: 130 | compact_conv = compact_model.module_list[idx][0] 131 | loose_conv = loose_model.module_list[idx][0] 132 | 133 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 134 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 135 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 136 | compact_conv.bias.data = loose_conv.bias.data.clone() 137 | 138 | 139 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask): 140 | 141 | pruned_model = deepcopy(model) 142 | for idx in prune_idx: 143 | mask = torch.from_numpy(CBLidx2mask[idx]).cuda() 144 | bn_module = pruned_model.module_list[idx][1] 145 | 146 | bn_module.weight.data.mul_(mask) 147 | 148 | activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1) 149 | 150 | 151 | if idx<12: 152 | next_idx_list = [idx + 2] 153 | else: 154 | next_idx_list = [idx + 1] 155 | 156 | 157 | #next_idx_list = [idx + 1] 158 | if idx == 13: 159 | next_idx_list.append(18) 160 | 161 | 162 | 163 | for next_idx in next_idx_list: 164 | next_conv = pruned_model.module_list[next_idx][0] 165 | conv_sum = next_conv.weight.data.sum(dim=(2, 3)) 166 | offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1) 167 | 168 | if next_idx in CBL_idx: 169 | next_bn = pruned_model.module_list[next_idx][1] 170 | next_bn.running_mean.data.sub_(offset) 171 | else: 172 | #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias 173 | next_conv.bias.data.add_(offset) 174 | 175 | 176 | bn_module.bias.data.mul_(mask) 177 | 178 | return pruned_model 179 | 180 | 181 | def obtain_bn_mask(bn_module, thre): 182 | 183 | thre = thre.cuda() 184 | mask = bn_module.weight.data.abs().ge(thre).float() 185 | 186 | return mask 187 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | def init_seeds(seed=0): 7 | torch.manual_seed(seed) 8 | torch.cuda.manual_seed(seed) 9 | torch.cuda.manual_seed_all(seed) 10 | 11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 12 | if seed == 0: 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | 17 | def select_device(device='', apex=False): 18 | # device = 'cpu' or '0' or '0,1,2,3' 19 | cpu_request = device.lower() == 'cpu' 20 | if device and not cpu_request: # if device requested other than 'cpu' 21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 23 | 24 | cuda = False if cpu_request else torch.cuda.is_available() 25 | if cuda: 26 | c = 1024 ** 2 # bytes to MB 27 | ng = torch.cuda.device_count() 28 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 29 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 30 | for i in range(0, ng): 31 | if i == 1: 32 | cuda_str = ' ' * len(cuda_str) 33 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 34 | (cuda_str, i, x[i].name, x[i].total_memory / c)) 35 | else: 36 | print('Using CPU') 37 | 38 | print('') # skip a line 39 | return torch.device('cuda:0' if cuda else 'cpu') 40 | 41 | 42 | def fuse_conv_and_bn(conv, bn): 43 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 44 | with torch.no_grad(): 45 | # init 46 | fusedconv = torch.nn.Conv2d(conv.in_channels, 47 | conv.out_channels, 48 | kernel_size=conv.kernel_size, 49 | stride=conv.stride, 50 | padding=conv.padding, 51 | bias=True) 52 | 53 | # prepare filters 54 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 55 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 56 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 57 | 58 | # prepare spatial bias 59 | if conv.bias is not None: 60 | b_conv = conv.bias 61 | else: 62 | b_conv = torch.zeros(conv.weight.size(0)).cuda() 63 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 64 | fusedconv.bias.copy_(b_conv + b_bn) 65 | 66 | return fusedconv 67 | 68 | 69 | def model_info(model, report='summary'): 70 | # Plots a line-by-line description of a PyTorch model 71 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 72 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 73 | if report is 'full': 74 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 75 | for i, (name, p) in enumerate(model.named_parameters()): 76 | name = name.replace('module_list.', '') 77 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 78 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 79 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 80 | 81 | 82 | def load_classifier(name='resnet101', n=2): 83 | # Loads a pretrained model reshaped to n-class output 84 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 85 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 86 | 87 | # Display model properties 88 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 89 | print(x + ' =', eval(x)) 90 | 91 | # Reshape output to n classes 92 | filters = model.last_linear.weight.shape[1] 93 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 94 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 95 | model.last_linear.out_features = n 96 | return model 97 | -------------------------------------------------------------------------------- /weights/download_yolov3_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make '/weights' directory if it does not exist and cd into it 4 | mkdir -p weights && cd weights 5 | 6 | # copy darknet weight files, continue '-c' if partially downloaded 7 | wget -c https://pjreddie.com/media/files/yolov3.weights 8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights 10 | 11 | # yolov3 pytorch weights 12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI 13 | 14 | # darknet53 weights (first 75 layers only) 15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74 16 | 17 | # yolov3-tiny weights from darknet (first 16 layers only) 18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15 19 | # mv yolov3-tiny.conv.15 ../ 20 | 21 | --------------------------------------------------------------------------------