├── .gitattributes
├── .gitignore
├── .ipynb_checkpoints
    ├── data_process-checkpoint.ipynb
    └── data_vision-checkpoint.ipynb
├── LICENSE
├── README.md
├── __pycache__
    ├── cosine_annealing.cpython-36.pyc
    ├── swa.cpython-36.pyc
    └── train.cpython-36.pyc
├── convert.py
├── cosine_annealing.py
├── data_augmention
    ├── .ipynb_checkpoints
    │   └── 未命名-checkpoint.ipynb
    ├── ColorCorrect.py
    ├── DepthMap_RTM.py
    ├── GuidedFilter.py
    ├── Saturation.py
    ├── Saturation_Max.py
    ├── __pycache__
    │   ├── ColorCorrect.cpython-36.pyc
    │   ├── ColorCorrect.cpython-37.pyc
    │   ├── DepthMap_RTM.cpython-36.pyc
    │   ├── DepthMap_RTM.cpython-37.pyc
    │   ├── GuidedFilter.cpython-36.pyc
    │   ├── GuidedFilter.cpython-37.pyc
    │   ├── Saturation_Max.cpython-36.pyc
    │   ├── Saturation_Max.cpython-37.pyc
    │   ├── augment.cpython-36.pyc
    │   ├── depthMapEstimation.cpython-36.pyc
    │   ├── depthMapEstimation.cpython-37.pyc
    │   ├── depthMin.cpython-36.pyc
    │   ├── depthMin.cpython-37.pyc
    │   ├── getGBTransmission.cpython-36.pyc
    │   ├── getGBTransmission.cpython-37.pyc
    │   ├── getRefinedTransmission.cpython-36.pyc
    │   ├── getRefinedTransmission.cpython-37.pyc
    │   ├── getTransmissionMap.cpython-36.pyc
    │   ├── getTransmissionMap.cpython-37.pyc
    │   ├── global_histogram_stretching.cpython-36.pyc
    │   ├── global_histogram_stretching.cpython-37.pyc
    │   ├── sceneRadiance.cpython-36.pyc
    │   └── sceneRadiance.cpython-37.pyc
    ├── augment.py
    ├── depthMapEstimation.py
    ├── depthMin.py
    ├── getGBTransmission.py
    ├── getRefinedTransmission.py
    ├── getTransmissionMap.py
    ├── global_histogram_stretching.py
    └── sceneRadiance.py
├── data_process.ipynb
├── data_process.py
├── data_vision.ipynb
├── image_aug.py
├── kmeans.py
├── predict.py
├── swa.py
├── train.py
├── yolo3
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   ├── model.cpython-36.pyc
    │   └── utils.cpython-36.pyc
    ├── model.py
    └── utils.py
└── yolov3.cfg


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=python
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | *pre_train/
3 | *.h5
4 | *models/
5 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/data_process-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from bs4 import BeautifulSoup\n",
 10 |     "import os\n",
 11 |     "import cv2"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# 导入数据"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "class_label = ['holothurian', 'echinus', 'scallop', 'starfish', 'waterweeds']\n",
 28 |     "file_path = 'data/train/box/'\n",
 29 |     "image_path = 'data/train/augment/'\n",
 30 |     "all_name = os.listdir(file_path)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "# 数据处理"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# train_result用于后续yolo训练,k_mean_result用于计算anchor boxes\n",
 47 |     "train_result = []\n",
 48 |     "k_mean_result = []\n",
 49 |     "box_size = {}\n",
 50 |     "label_number = {}\n",
 51 |     "del_number = 0\n",
 52 |     "\n",
 53 |     "# 遍历图片\n",
 54 |     "for i in all_name:\n",
 55 |     "    image_name = image_path+i.rstrip('xml')+'jpg'\n",
 56 |     "    img_name_append = False\n",
 57 |     "    soup = BeautifulSoup(open(file_path+i), 'lxml')\n",
 58 |     "    bbx = soup.find_all('object')\n",
 59 |     "    img = cv2.imread(image_name)\n",
 60 |     "    height, width, _ = img.shape\n",
 61 |     "    \n",
 62 |     "    # 对各尺寸数量进行统计\n",
 63 |     "    height, width, _ = img.shape\n",
 64 |     "    if box_size.get((height, width), False):\n",
 65 |     "        box_size[(height, width)] += 1\n",
 66 |     "    else:\n",
 67 |     "        box_size[(height, width)] = 1\n",
 68 |     "    \n",
 69 |     "    # 遍历候选框\n",
 70 |     "    for j in bbx:\n",
 71 |     "        # 同时对范围进行修正，像素起点改为0\n",
 72 |     "        name = str(j.contents[1].string)\n",
 73 |     "        xmin = int(j.xmin.string) - 1\n",
 74 |     "        ymin = int(j.ymin.string) - 1\n",
 75 |     "        xmax = int(j.xmax.string) - 1\n",
 76 |     "        ymax = int(j.ymax.string) - 1\n",
 77 |     "        index = str(class_label.index(name))\n",
 78 |     "\n",
 79 |     "        # 防止大小混淆\n",
 80 |     "        if xmin > xmax:\n",
 81 |     "            xmax, xmin = xmin, xmax\n",
 82 |     "        if ymin > ymax:\n",
 83 |     "            ymax, ymin = ymin, ymax\n",
 84 |     "            \n",
 85 |     "        # 判断ground truth是否超出图像范围\n",
 86 |     "        if xmin > width - 1 or xmax < 0:\n",
 87 |     "            continue\n",
 88 |     "        if ymin > height - 1 or ymax < 0:\n",
 89 |     "            continue\n",
 90 |     "\n",
 91 |     "        # 处于图像边缘候选框的处理\n",
 92 |     "        if xmin < 0:\n",
 93 |     "            xmin = 0\n",
 94 |     "        if ymin < 0:\n",
 95 |     "            ymin = 0\n",
 96 |     "        if xmax > width - 1:\n",
 97 |     "            xmax = width - 1\n",
 98 |     "        if ymax > height - 1:\n",
 99 |     "            ymax = height - 1\n",
100 |     "            \n",
101 |     "        # 判断是否为无效框\n",
102 |     "        x_distance = xmax - xmin\n",
103 |     "        y_distance = ymax - ymin\n",
104 |     "        if x_distance == 0 or y_distance == 0:\n",
105 |     "            continue\n",
106 |     "            \n",
107 |     "        # 对于过小的图片进行删除\n",
108 |     "        square = (xmax - xmin) * (ymax - ymin)\n",
109 |     "        if square < 120:\n",
110 |     "            del_number += 1\n",
111 |     "            continue\n",
112 |     "        \n",
113 |     "        # 防止有图片没有任何候选框\n",
114 |     "        if img_name_append == False:\n",
115 |     "            train_result.append(image_name)\n",
116 |     "            img_name_append = True\n",
117 |     "\n",
118 |     "        # 判断是否增加kmean缩放候选框\n",
119 |     "        scale = min(448/width, 448/height)\n",
120 |     "        square = ((xmax - xmin)*scale) * ((ymax - ymin)*scale)\n",
121 |     "        if square < 120:\n",
122 |     "            kmean_scale = False\n",
123 |     "        else:\n",
124 |     "            kmean_scale = True\n",
125 |     "        \n",
126 |     "        xmin, xmax, ymin, ymax = str(xmin), str(xmax), str(ymin), str(ymax)\n",
127 |     "        train_result.append(' ')\n",
128 |     "        for x in [xmin, ymin, xmax, ymax]:\n",
129 |     "            train_result.append(x)\n",
130 |     "            train_result.append(',')\n",
131 |     "            if x is ymax:\n",
132 |     "                train_result.append(index)\n",
133 |     "        \n",
134 |     "        # kmean添加缩放候选框\n",
135 |     "        if kmean_scale == True:\n",
136 |     "            for x in [xmin, ymin, xmax, ymax]:\n",
137 |     "                k_mean_result.append(int(round(int(x)*scale)))\n",
138 |     "                if x is not ymax:\n",
139 |     "                    k_mean_result.append(',')\n",
140 |     "                else:\n",
141 |     "                    k_mean_result.append('\\n')\n",
142 |     "        \n",
143 |     "        # 对各label数量统计\n",
144 |     "        if label_number.get(name, False):\n",
145 |     "            label_number[name] += 1\n",
146 |     "        else:\n",
147 |     "            label_number[name] = 1\n",
148 |     "    if img_name_append == True:\n",
149 |     "        train_result.append('\\n')"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "# 查看统计结果"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 4,
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "data": {
166 |       "text/plain": [
167 |        "10"
168 |       ]
169 |      },
170 |      "execution_count": 4,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "del_number"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 5,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "data": {
186 |       "text/plain": [
187 |        "{'echinus': 22098,\n",
188 |        " 'scallop': 6694,\n",
189 |        " 'starfish': 6827,\n",
190 |        " 'holothurian': 5503,\n",
191 |        " 'waterweeds': 82}"
192 |       ]
193 |      },
194 |      "execution_count": 5,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "label_number"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 6,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "{(405, 720): 3066,\n",
212 |        " (2160, 3840): 1644,\n",
213 |        " (1080, 1920): 595,\n",
214 |        " (576, 704): 38,\n",
215 |        " (480, 586): 44}"
216 |       ]
217 |      },
218 |      "execution_count": 6,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "box_size"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "# 保存"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 7,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "with open('data/train_data.txt', 'w') as f:\n",
241 |     "    f.writelines(train_result)\n",
242 |     "with open('data/k_mean_data.txt', 'w') as f:\n",
243 |     "    f.writelines(k_mean_result)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": []
252 |   }
253 |  ],
254 |  "metadata": {
255 |   "kernelspec": {
256 |    "display_name": "Python 3",
257 |    "language": "python",
258 |    "name": "python3"
259 |   },
260 |   "language_info": {
261 |    "codemirror_mode": {
262 |     "name": "ipython",
263 |     "version": 3
264 |    },
265 |    "file_extension": ".py",
266 |    "mimetype": "text/x-python",
267 |    "name": "python",
268 |    "nbconvert_exporter": "python",
269 |    "pygments_lexer": "ipython3",
270 |    "version": "3.6.9"
271 |   }
272 |  },
273 |  "nbformat": 4,
274 |  "nbformat_minor": 2
275 | }
276 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Fieldhunter
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest
 2 | 
 3 | [![](https://img.shields.io/badge/license-MIT-green)](https://github.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/blob/master/LICENSE)
 4 | [![](https://img.shields.io/badge/author-Fieldhunter-blue)](https://github.com/Fieldhunter)
 5 | ![](https://img.shields.io/badge/frame-keras-yellow)
 6 | 
 7 | 2020湛江水下目标检测算法赛
 8 | 
 9 | ## 前言
10 | 此次比赛分数不是很理想，A榜20.14，B榜19.63。我认为的原因应该有很多，比如说模型，水下图像增强算法，trick，
11 | 又或者是评价指标比较苛刻（赛事采用的是COCO mAP[@0.5:0.05:0.95] 指标）等等，具体的原因会在之后的分部分详细解释。当然，这也是我第一次参加目标检测的比赛，
12 | 所以过程中难免会有疏漏。而因为此次比赛，让我对keras非常失望，
13 | 具体原因可见我的这一个blog:[Choice keras or pytorch?](https://fieldhunter.github.io/talking_about_keras_and_pytorch/)。虽然说分数不理想，
14 | 但也是好歹做了出来，自己也有自己的idea在里面，只不过应该是中途哪里出了问题，我没发现罢了（但我真的看了源码很久觉得应该没问题），
15 | 可能换了一些策略或者单换个PyTorch实现的模型之后分数就会大幅上涨，毕竟大佬提供的PyTorch的baseline能达到挺高的分数。但肯定的是自己的idea是挺有效的，
16 | **从裸模型+图像增强的10分提高到了20分**。（所以我就怀疑是yolo的问题）所以在此将它放入我的github中，对今后参加类似的比赛或者项目肯定有借鉴意义。
17 | (租GPU就花了500块，真的吐血)
18 | 
19 | ## 说明
20 | 没有上传的文件夹有data、models和pre_train，即文件太大，不宜上传。原data目录当中，test目录下是测试集，包括图像增强后的图像，trian目录下是训练集，
21 | 也包括图像增强后的图像。data目录中还有yolo模型所需的classes.txt、yolo_anchors.txt和train_data.txt。在pre_train目录当中，有yolo的预训练模型yolov3.weight，
22 | 以及用convert.py转换的yolo_weights.h5。在models目录中，存储训练结束的模型和Tensorboard记录。
23 | 
24 | ## 图像增强
25 | 首先，对图像采取增强算法增强(image_aug.py)后进行存储。对于水下图像识别这个领域，我也是参加这个比赛才接触的，同样是第一次参加目标检测的比赛，
26 | 所以着重的去对模型算法做更改而不是针对数据，所以图像增强的算法只是照搬了赛事群里的一个大佬的开源，可视化出来后看着效果是挺不错的，
27 | 但具体怎么样我也不敢下定论，所以可能水下图像增强算法的效果不是很好，导致分数不是很高。具体的算法在data_augmention目录里。为了满足后续的数据增强需求，
28 | **对训练数据进行原图尺寸增强**，但会使得处理速度很慢。为了解决这个问题，尝试采用gpu版的numpy:cupy来调用gpu加速处理，
29 | 但增强算法中的有些numpy函数cupy尚未支持，所以只能用numpy，之后采用了10个进程的多进程方式来尽量的加速处理。**对于测试集，直接缩放至目标尺寸**来增强，
30 | 这样处理速度就会非常快，避免大尺寸图片耗时的问题，之后再进行输入处理，以满足模型输入需求。具体的处理方式在[数据增强](#数据增强)部分介绍。
31 | 
32 | 另外，在使用大佬的开源增强算法之前，使用过两个UnderWaterGAN，但是他们的模型输出都是256\*256像素，整张图片和目标都扭曲的不成样子，效果还不是特别的好，其中一个还非常的差。所以就放弃使用对抗生成网络。（可能图像的尺寸和GAN网络本身训练的不符，强行将几千的像素压缩到256，因为看那些README效果都很不错）
33 | 
34 | ## 数据预处理
35 | data_process.py中，对所有图像进行统计，对每一个候选框，进行细致的处理，代码中给出了详细的注释，这里不再阐述。
36 | 候选框处理时**删除那些面积小于120的过小候选框**。之后再靠kmeans.py来生成yolo所需的anchors，最后得到data目录中的train_data.txt和yolo_anchors.txt。
37 | 
38 | 另外，赛事的目标种类只有4种，但数据集里有5种，多了一个水草。观察数据集后发现，水草和海胆在某些情况下十分相似，为了使模型增强对水草的区分能力，
39 | **在训练时用5个类别去训练，在预测的时候把水草类丢弃即可**。同时对所有的5k多张训练集人眼过了一遍，**删去了那些过分模糊的图片**，约为300张，以提高数据集质量。
40 | 
41 | ## 数据增强
42 | 数据增强代码在yolo3目录的utils.py中。对于模型的输入尺寸，**采用480*480**，原yolo尺寸是416\*416。经过实验验证，**从416到448再到480，分数都是稳步提高的**。
43 | 由于时间和成本的关系，并没有尝试512\*512尺寸，这或许会进一步提高。针对目标检测的task，并不希望把任意尺寸的图片强行resize，策略是选出高与宽较长的一边，
44 | 计算这条边缩放到480的比例，然后按照这个比例对原图进行**等比例缩放**。等比缩放后，长的一边是480，短的一边则不是。
45 | 之后在短边上**用灰色像素来填充**以满足480\*480尺寸。统计过数据集的尺寸，都是长大于高，所以短边都是高这条边。灰色像素数量在缩放后的原图上下两侧均分，
46 | 这样使得处理后的图像**正中间是缩放后的训练图像，上下两侧是灰色像素**。[图像增强](#图像增强)部分的测试集处理也是如此。ps:此处尝试用base64来导入图片演示，
47 | 但根本加载不出来，不知道为什么，只能这样靠着文字和代码来理解一下。
48 | 
49 | 具体步骤：对于训练数据，首先按照7:3的可能性进行**直接缩放或者随机裁剪**。直接缩放过程中删除缩放后面积小于120的过小候选框，
50 | 随机裁剪出来的尺寸也与按照原图直接缩放策略缩放后的尺寸相同。之后都是按照1:1的可能性**按顺序进行二次的轻度图像增强，水平翻转和垂直翻转**。对于测试集，
51 | 直接进行缩放即可。之后按照上述所说的策略填充灰色像素。最后进行模型输入格式的处理，**像素归一化和候选框处理**。同样，代码中有详细的注释。
52 | 
53 | ## 模型训练
54 | 在train.py中，输入尺寸为480*480，训练集和测试集按照9:1划分。训练过程分为两个阶段来训练，第一阶段**冻结预训练所有层**，采用RAdam(最小值设为1e-5)，
55 | warm_up策略，batch_size设为32，训练100轮，同时做Tensorboard记录。在第二阶段**打开全部网络层来训练**，采用RAdam(最小值设为1e-6)，warm_up策略，
56 | **swa算法，cosine-annealing学习率策略（范围1e-2到1e-6），batch_size设为8（主要原因是显存限制）**，训练200轮，同时做Tensorboard记录，
57 | 通过ModelCheckpoint策略对每一轮按照val_loss来决定是否保存模型，最终选用val_loss最小的模型来做预测。即将最好的模型更名为trained_weights_final.h5。
58 | 
59 | 在模型设计上(yolo3目录中的model.py)，对class_loss进行修改，采用**label_smoothing策略**。由于在预测的可视化结果中看到候选框的及分类的结果都挺不错，
60 | 但是一些很明显且已经识别出来的目标的置顶度却很低，一些显然不是目标的置信度相对的有点高，所以想对confidence_loss对更改，
61 | 即给它整体或者正样本部分(confidence_loss分为正样本和负样本两部分)乘上一个值，让模型更注重置信度的损失，简单粗暴的想法。但结果并不是很好，
62 | 在训练过程中模型的loss也相对于之前高了几十个点并且下不去，最终的分数也比之前下降了一些。加上COCO mAP[@0.5:0.05:0.95] 这个评价指标候选框越多越好，
63 | 所以最终放弃了对confidence_loss的修改。
64 | 
65 | 对于其他模型，比如Faster-RCNN，我也有考虑去用过。并且听赛事群里说比赛基本没人用单阶段模型，想想也是，二阶段模型肯定准确率会高一些。但是......算了，
66 | 不想吐槽keras了，至于为什么没有用除了yolo以外的模型，还是看看[前言](#前言)里我提到的我写的blog吧。
67 | 
68 | ## 预测
69 | 在predict.py中，对预测出来的候选框进行**WBF**，相对于之前使用的NMS，WBF策略**提升了不少分数**。之后就是一系列的后处理操作，代码中的注释很详细，
70 | 这里不再阐述。另外，**赛事数据集的候选框坐标起点是1，在训练前已经更改为0，所以在后处理中需要再次更改为1**。
71 | 
72 | 在参数上，**score阈值设为0.001，iou阈值（包括模型阈值和WBF阈值）设为0.25**能得到最高的分数。
73 | 
74 | ## ps
75 | **数据预处理和数据可视化**都有Jupyter版本。分别进行了**训练集的统计信息**工作和**各阶段数据（训练集，数据增强，预测结果）可视化**工作。同样也有详细的注释。
76 | 
77 | 最终训练出来最好的模型和Tensorboard记录我会存放在releases当中。
78 | 
79 | yolov3部分的代码是基于[qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3)进行更改的。
80 | 


--------------------------------------------------------------------------------
/__pycache__/cosine_annealing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/__pycache__/cosine_annealing.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/swa.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/__pycache__/swa.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/train.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/__pycache__/train.cpython-36.pyc


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Reads Darknet config and weights and creates Keras model with TF backend.
  4 | 
  5 | """
  6 | 
  7 | import argparse
  8 | import configparser
  9 | import io
 10 | import os
 11 | from collections import defaultdict
 12 | 
 13 | import numpy as np
 14 | from keras import backend as K
 15 | from keras.layers import (Conv2D, Input, ZeroPadding2D, Add,
 16 |                           UpSampling2D, MaxPooling2D, Concatenate)
 17 | from keras.layers.advanced_activations import LeakyReLU
 18 | from keras.layers.normalization import BatchNormalization
 19 | from keras.models import Model
 20 | from keras.regularizers import l2
 21 | from keras.utils.vis_utils import plot_model as plot
 22 | 
 23 | 
 24 | parser = argparse.ArgumentParser(description='Darknet To Keras Converter.')
 25 | parser.add_argument('config_path', help='Path to Darknet cfg file.')
 26 | parser.add_argument('weights_path', help='Path to Darknet weights file.')
 27 | parser.add_argument('output_path', help='Path to output Keras model file.')
 28 | parser.add_argument(
 29 |     '-p',
 30 |     '--plot_model',
 31 |     help='Plot generated Keras model and save as image.',
 32 |     action='store_true')
 33 | parser.add_argument(
 34 |     '-w',
 35 |     '--weights_only',
 36 |     help='Save as Keras weights file instead of model file.',
 37 |     action='store_true')
 38 | 
 39 | def unique_config_sections(config_file):
 40 |     """Convert all config sections to have unique names.
 41 | 
 42 |     Adds unique suffixes to config sections for compability with configparser.
 43 |     """
 44 |     section_counters = defaultdict(int)
 45 |     output_stream = io.StringIO()
 46 |     with open(config_file) as fin:
 47 |         for line in fin:
 48 |             if line.startswith('['):
 49 |                 section = line.strip().strip('[]')
 50 |                 _section = section + '_' + str(section_counters[section])
 51 |                 section_counters[section] += 1
 52 |                 line = line.replace(section, _section)
 53 |             output_stream.write(line)
 54 |     output_stream.seek(0)
 55 |     return output_stream
 56 | 
 57 | # %%
 58 | def _main(args):
 59 |     config_path = os.path.expanduser(args.config_path)
 60 |     weights_path = os.path.expanduser(args.weights_path)
 61 |     assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
 62 |         config_path)
 63 |     assert weights_path.endswith(
 64 |         '.weights'), '{} is not a .weights file'.format(weights_path)
 65 | 
 66 |     output_path = os.path.expanduser(args.output_path)
 67 |     assert output_path.endswith(
 68 |         '.h5'), 'output path {} is not a .h5 file'.format(output_path)
 69 |     output_root = os.path.splitext(output_path)[0]
 70 | 
 71 |     # Load weights and config.
 72 |     print('Loading weights.')
 73 |     weights_file = open(weights_path, 'rb')
 74 |     major, minor, revision = np.ndarray(
 75 |         shape=(3, ), dtype='int32', buffer=weights_file.read(12))
 76 |     if (major*10+minor)>=2 and major<1000 and minor<1000:
 77 |         seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
 78 |     else:
 79 |         seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
 80 |     print('Weights Header: ', major, minor, revision, seen)
 81 | 
 82 |     print('Parsing Darknet config.')
 83 |     unique_config_file = unique_config_sections(config_path)
 84 |     cfg_parser = configparser.ConfigParser()
 85 |     cfg_parser.read_file(unique_config_file)
 86 | 
 87 |     print('Creating Keras model.')
 88 |     input_layer = Input(shape=(None, None, 3))
 89 |     prev_layer = input_layer
 90 |     all_layers = []
 91 | 
 92 |     weight_decay = float(cfg_parser['net_0']['decay']
 93 |                          ) if 'net_0' in cfg_parser.sections() else 5e-4
 94 |     count = 0
 95 |     out_index = []
 96 |     for section in cfg_parser.sections():
 97 |         print('Parsing section {}'.format(section))
 98 |         if section.startswith('convolutional'):
 99 |             filters = int(cfg_parser[section]['filters'])
100 |             size = int(cfg_parser[section]['size'])
101 |             stride = int(cfg_parser[section]['stride'])
102 |             pad = int(cfg_parser[section]['pad'])
103 |             activation = cfg_parser[section]['activation']
104 |             batch_normalize = 'batch_normalize' in cfg_parser[section]
105 | 
106 |             padding = 'same' if pad == 1 and stride == 1 else 'valid'
107 | 
108 |             # Setting weights.
109 |             # Darknet serializes convolutional weights as:
110 |             # [bias/beta, [gamma, mean, variance], conv_weights]
111 |             prev_layer_shape = K.int_shape(prev_layer)
112 | 
113 |             weights_shape = (size, size, prev_layer_shape[-1], filters)
114 |             darknet_w_shape = (filters, weights_shape[2], size, size)
115 |             weights_size = np.product(weights_shape)
116 | 
117 |             print('conv2d', 'bn'
118 |                   if batch_normalize else '  ', activation, weights_shape)
119 | 
120 |             conv_bias = np.ndarray(
121 |                 shape=(filters, ),
122 |                 dtype='float32',
123 |                 buffer=weights_file.read(filters * 4))
124 |             count += filters
125 | 
126 |             if batch_normalize:
127 |                 bn_weights = np.ndarray(
128 |                     shape=(3, filters),
129 |                     dtype='float32',
130 |                     buffer=weights_file.read(filters * 12))
131 |                 count += 3 * filters
132 | 
133 |                 bn_weight_list = [
134 |                     bn_weights[0],  # scale gamma
135 |                     conv_bias,  # shift beta
136 |                     bn_weights[1],  # running mean
137 |                     bn_weights[2]  # running var
138 |                 ]
139 | 
140 |             conv_weights = np.ndarray(
141 |                 shape=darknet_w_shape,
142 |                 dtype='float32',
143 |                 buffer=weights_file.read(weights_size * 4))
144 |             count += weights_size
145 | 
146 |             # DarkNet conv_weights are serialized Caffe-style:
147 |             # (out_dim, in_dim, height, width)
148 |             # We would like to set these to Tensorflow order:
149 |             # (height, width, in_dim, out_dim)
150 |             conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
151 |             conv_weights = [conv_weights] if batch_normalize else [
152 |                 conv_weights, conv_bias
153 |             ]
154 | 
155 |             # Handle activation.
156 |             act_fn = None
157 |             if activation == 'leaky':
158 |                 pass  # Add advanced activation later.
159 |             elif activation != 'linear':
160 |                 raise ValueError(
161 |                     'Unknown activation function `{}` in section {}'.format(
162 |                         activation, section))
163 | 
164 |             # Create Conv2D layer
165 |             if stride>1:
166 |                 # Darknet uses left and top padding instead of 'same' mode
167 |                 prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
168 |             conv_layer = (Conv2D(
169 |                 filters, (size, size),
170 |                 strides=(stride, stride),
171 |                 kernel_regularizer=l2(weight_decay),
172 |                 use_bias=not batch_normalize,
173 |                 weights=conv_weights,
174 |                 activation=act_fn,
175 |                 padding=padding))(prev_layer)
176 | 
177 |             if batch_normalize:
178 |                 conv_layer = (BatchNormalization(
179 |                     weights=bn_weight_list))(conv_layer)
180 |             prev_layer = conv_layer
181 | 
182 |             if activation == 'linear':
183 |                 all_layers.append(prev_layer)
184 |             elif activation == 'leaky':
185 |                 act_layer = LeakyReLU(alpha=0.1)(prev_layer)
186 |                 prev_layer = act_layer
187 |                 all_layers.append(act_layer)
188 | 
189 |         elif section.startswith('route'):
190 |             ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
191 |             layers = [all_layers[i] for i in ids]
192 |             if len(layers) > 1:
193 |                 print('Concatenating route layers:', layers)
194 |                 concatenate_layer = Concatenate()(layers)
195 |                 all_layers.append(concatenate_layer)
196 |                 prev_layer = concatenate_layer
197 |             else:
198 |                 skip_layer = layers[0]  # only one layer to route
199 |                 all_layers.append(skip_layer)
200 |                 prev_layer = skip_layer
201 | 
202 |         elif section.startswith('maxpool'):
203 |             size = int(cfg_parser[section]['size'])
204 |             stride = int(cfg_parser[section]['stride'])
205 |             all_layers.append(
206 |                 MaxPooling2D(
207 |                     pool_size=(size, size),
208 |                     strides=(stride, stride),
209 |                     padding='same')(prev_layer))
210 |             prev_layer = all_layers[-1]
211 | 
212 |         elif section.startswith('shortcut'):
213 |             index = int(cfg_parser[section]['from'])
214 |             activation = cfg_parser[section]['activation']
215 |             assert activation == 'linear', 'Only linear activation supported.'
216 |             all_layers.append(Add()([all_layers[index], prev_layer]))
217 |             prev_layer = all_layers[-1]
218 | 
219 |         elif section.startswith('upsample'):
220 |             stride = int(cfg_parser[section]['stride'])
221 |             assert stride == 2, 'Only stride=2 supported.'
222 |             all_layers.append(UpSampling2D(stride)(prev_layer))
223 |             prev_layer = all_layers[-1]
224 | 
225 |         elif section.startswith('yolo'):
226 |             out_index.append(len(all_layers)-1)
227 |             all_layers.append(None)
228 |             prev_layer = all_layers[-1]
229 | 
230 |         elif section.startswith('net'):
231 |             pass
232 | 
233 |         else:
234 |             raise ValueError(
235 |                 'Unsupported section header type: {}'.format(section))
236 | 
237 |     # Create and save model.
238 |     if len(out_index)==0: out_index.append(len(all_layers)-1)
239 |     model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
240 |     print(model.summary())
241 |     if args.weights_only:
242 |         model.save_weights('{}'.format(output_path))
243 |         print('Saved Keras weights to {}'.format(output_path))
244 |     else:
245 |         model.save('{}'.format(output_path))
246 |         print('Saved Keras model to {}'.format(output_path))
247 | 
248 |     # Check to see if all weights have been read.
249 |     remaining_weights = len(weights_file.read()) / 4
250 |     weights_file.close()
251 |     print('Read {} of {} from Darknet weights.'.format(count, count +
252 |                                                        remaining_weights))
253 |     if remaining_weights > 0:
254 |         print('Warning: {} unused weights'.format(remaining_weights))
255 | 
256 |     if args.plot_model:
257 |         plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
258 |         print('Saved model plot to {}.png'.format(output_root))
259 | 
260 | 
261 | if __name__ == '__main__':
262 |     _main(parser.parse_args())
263 | 


--------------------------------------------------------------------------------
/cosine_annealing.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from keras.callbacks import Callback
 3 | from keras import backend as K
 4 | 
 5 | 
 6 | class CosineAnnealingScheduler(Callback):
 7 |     """Cosine annealing scheduler.
 8 |     """
 9 | 
10 |     def __init__(self, init_epoch, T_max, eta_max, eta_min=0, verbose=0):
11 |         super(CosineAnnealingScheduler, self).__init__()
12 |         self.init_epoch = init_epoch
13 |         self.T_max = T_max
14 |         self.eta_max = eta_max
15 |         self.eta_min = eta_min
16 |         self.verbose = verbose
17 | 
18 |     def on_epoch_begin(self, epoch, logs=None):
19 |         if not hasattr(self.model.optimizer, 'lr'):
20 |             raise ValueError('Optimizer must have a "lr" attribute.')
21 |         lr = self.eta_min + (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * (epoch - self.init_epoch) / self.T_max)) / 2
22 |         K.set_value(self.model.optimizer.lr, lr)
23 |         if self.verbose > 0:
24 |             print('\nEpoch %05d: CosineAnnealingScheduler setting learning '
25 |                   'rate to %s.' % (epoch + 1, lr))
26 | 
27 |     def on_epoch_end(self, epoch, logs=None):
28 |         logs = logs or {}
29 |         logs['lr'] = K.get_value(self.model.optimizer.lr)
30 | 


--------------------------------------------------------------------------------
/data_augmention/.ipynb_checkpoints/未命名-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/data_augmention/ColorCorrect.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import datetime
 4 | import cv2
 5 | import natsort
 6 | 
 7 | np.seterr(over='ignore')
 8 | if __name__ == '__main__':
 9 |     pass
10 | 
11 | def color_correction(r,u_r,u_ref,L2):
12 |     L1 = np.max(r)
13 |     gainFactor = L1 * (u_r/ u_ref) +L2
14 |     Out = r / gainFactor
15 |     return Out
16 | 
17 | def OptimalParameter(sceneRadiance):
18 |     img = np.float64(sceneRadiance / 255)
19 |     b, g, r = cv2.split(img)
20 | 
21 |     u_r = np.sum(r)
22 |     u_g = np.sum(g)
23 |     u_b = np.sum(b)
24 |     u_ref = (u_r ** 2 + u_g ** 2 + u_b ** 2) ** 0.5
25 |     L2 = 0.25
26 |     r = color_correction(r, u_r, u_ref, L2)
27 |     g = color_correction(g, u_g, u_ref, L2)
28 |     b = color_correction(b, u_b, u_ref, L2)
29 | 
30 |     sceneRadiance = np.zeros((img.shape), 'float64')
31 |     sceneRadiance[:, :, 0] = b
32 |     sceneRadiance[:, :, 1] = g
33 |     sceneRadiance[:, :, 2] = r
34 |     sceneRadiance = sceneRadiance * 255
35 |     sceneRadiance = np.clip(sceneRadiance,0, 255)
36 |     sceneRadiance = np.uint8(sceneRadiance)
37 |     return sceneRadiance
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/data_augmention/DepthMap_RTM.py:
--------------------------------------------------------------------------------
 1 | from depthMapEstimation import depthMap
 2 | from depthMin import minDepth
 3 | 
 4 | 
 5 | def Depth_TM(img, AtomsphericLight):
 6 | 
 7 |     DepthMap = depthMap(img)
 8 |     t0, t1 = 0.05, 0.95
 9 |     DepthMap = DepthMap.clip(t0, t1)
10 |     d_0 = minDepth(img, AtomsphericLight)
11 | 
12 |     d_f = 8 * (DepthMap + d_0)
13 |     TM_R_modified = 0.85 ** d_f
14 |     return TM_R_modified


--------------------------------------------------------------------------------
/data_augmention/GuidedFilter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | import cv2
  4 | 
  5 | class GuidedFilter:
  6 |     
  7 |     # def __init__(self, I, radius=5, epsilon=0.4):
  8 |     def __init__(self, I, radius, epsilon):
  9 | 
 10 |         self._radius = 2 * radius + 1
 11 |         self._epsilon = epsilon
 12 |         self._I = self._toFloatImg(I)
 13 |         self._initFilter()
 14 | 
 15 |         # print('radius',self._radius)
 16 |         # print('epsilon',self._epsilon)
 17 | 
 18 |     def _toFloatImg(self, img):
 19 |         if img.dtype == np.float32:
 20 |             return img
 21 |         return ( 1.0 / 255.0 ) * np.float32(img)
 22 | 
 23 |     def _initFilter(self):
 24 |         I = self._I
 25 |         r = self._radius
 26 |         eps = self._epsilon
 27 | 
 28 |         Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2]
 29 | 
 30 |         # self._Ir_mean = cv2.blur(Ir, (r, r))
 31 |         # self._Ig_mean = cv2.blur(Ig, (r, r))
 32 |         # self._Ib_mean = cv2.blur(Ib, (r, r))
 33 |         #
 34 |         # Irr_var = cv2.blur(Ir ** 2, (r, r)) - self._Ir_mean ** 2 + eps
 35 |         # Irg_var = cv2.blur(Ir * Ig, (r, r)) - self._Ir_mean * self._Ig_mean
 36 |         # Irb_var = cv2.blur(Ir * Ib, (r, r)) - self._Ir_mean * self._Ib_mean
 37 |         # Igg_var = cv2.blur(Ig * Ig, (r, r)) - self._Ig_mean * self._Ig_mean + eps
 38 |         # Igb_var = cv2.blur(Ig * Ib, (r, r)) - self._Ig_mean * self._Ib_mean
 39 |         # Ibb_var = cv2.blur(Ib * Ib, (r, r)) - self._Ib_mean * self._Ib_mean + eps
 40 | 
 41 | 
 42 |         self._Ir_mean = cv2.blur(Ir, (r, r))
 43 |         self._Ig_mean = cv2.blur(Ig, (r, r))
 44 |         self._Ib_mean = cv2.blur(Ib, (r, r))
 45 | 
 46 |         Irr_var = cv2.blur(Ir ** 2, (r, r)) - self._Ir_mean ** 2 + eps                                       
 47 |         Irg_var = cv2.blur(Ir * Ig, (r, r)) - self._Ir_mean * self._Ig_mean                                  
 48 |         Irb_var = cv2.blur(Ir * Ib, (r, r)) - self._Ir_mean * self._Ib_mean                                  
 49 |         Igg_var = cv2.blur(Ig * Ig, (r, r)) - self._Ig_mean * self._Ig_mean + eps                            
 50 |         Igb_var = cv2.blur(Ig * Ib, (r, r)) - self._Ig_mean * self._Ib_mean                                  
 51 |         Ibb_var = cv2.blur(Ib * Ib, (r, r)) - self._Ib_mean * self._Ib_mean + eps                                                       
 52 | 
 53 | 
 54 |         Irr_inv = Igg_var * Ibb_var - Igb_var * Igb_var                                                      
 55 |         Irg_inv = Igb_var * Irb_var - Irg_var * Ibb_var                                                      
 56 |         Irb_inv = Irg_var * Igb_var - Igg_var * Irb_var                                                      
 57 |         Igg_inv = Irr_var * Ibb_var - Irb_var * Irb_var                                                      
 58 |         Igb_inv = Irb_var * Irg_var - Irr_var * Igb_var                                                      
 59 |         Ibb_inv = Irr_var * Igg_var - Irg_var * Irg_var                                                      
 60 |         
 61 |         I_cov = Irr_inv * Irr_var + Irg_inv * Irg_var + Irb_inv * Irb_var                                    
 62 |         Irr_inv /= I_cov                                                                                     
 63 |         Irg_inv /= I_cov                                                                                     
 64 |         Irb_inv /= I_cov                                                                                     
 65 |         Igg_inv /= I_cov                                                                                     
 66 |         Igb_inv /= I_cov                                                                                     
 67 |         Ibb_inv /= I_cov                                                                                     
 68 |         
 69 |         self._Irr_inv = Irr_inv                                                                              
 70 |         self._Irg_inv = Irg_inv                                                                              
 71 |         self._Irb_inv = Irb_inv                                                                              
 72 |         self._Igg_inv = Igg_inv                                                                              
 73 |         self._Igb_inv = Igb_inv                                                                              
 74 |         self._Ibb_inv = Ibb_inv                  
 75 | 
 76 |     def _computeCoefficients(self, p):
 77 |         r = self._radius                                                             
 78 |         I = self._I                                                                 
 79 |         Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2]                                                          
 80 |         
 81 | 
 82 |         p_mean = cv2.blur(p, (r, r))                             
 83 |         Ipr_mean = cv2.blur(Ir * p, (r, r))                                                         
 84 |         Ipg_mean = cv2.blur(Ig * p, (r, r))                                                    
 85 |         Ipb_mean = cv2.blur(Ib * p, (r, r))             
 86 | 
 87 | 
 88 | 
 89 |         Ipr_cov = Ipr_mean - self._Ir_mean * p_mean                                                 
 90 |         Ipg_cov = Ipg_mean - self._Ig_mean * p_mean                                                     
 91 |         Ipb_cov = Ipb_mean - self._Ib_mean * p_mean                                                       
 92 |                                                                                                                  
 93 |         ar = self._Irr_inv * Ipr_cov + self._Irg_inv * Ipg_cov + self._Irb_inv * Ipb_cov                 
 94 |         ag = self._Irg_inv * Ipr_cov + self._Igg_inv * Ipg_cov + self._Igb_inv * Ipb_cov                
 95 |         ab = self._Irb_inv * Ipr_cov + self._Igb_inv * Ipg_cov + self._Ibb_inv * Ipb_cov    
 96 | 
 97 |         b = p_mean - ar * self._Ir_mean - ag * self._Ig_mean - ab * self._Ib_mean                                                                                                                                         
 98 | 
 99 |         ar_mean = cv2.blur(ar, (r, r))          
100 |         ag_mean = cv2.blur(ag, (r, r))                                                                   
101 |         ab_mean = cv2.blur(ab, (r, r))                                                                      
102 |         b_mean = cv2.blur(b, (r, r))                                                                                                                                              
103 | 
104 |         return ar_mean, ag_mean, ab_mean, b_mean            
105 | 
106 |     def _computeOutput(self, ab, I):
107 |     
108 |         ar_mean, ag_mean, ab_mean, b_mean = ab
109 |         Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2]
110 |         q = ar_mean * Ir + ag_mean * Ig + ab_mean * Ib + b_mean
111 |         return q
112 | 
113 |     def filter(self, p):
114 | 
115 |         p_32F = self._toFloatImg(p)
116 | 
117 |         ab = self._computeCoefficients(p)
118 |         return self._computeOutput(ab, self._I)
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/data_augmention/Saturation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import datetime
 4 | import cv2
 5 | import natsort
 6 | from skimage.color import rgb2lab, lab2rgb
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | 
10 | np.seterr(over='ignore')
11 | if __name__ == '__main__':
12 |     pass
13 | 
14 | path = "F:/PaperExperiments/ACMMM2018/OptimalTM_BLs_Restor/InputImages"
15 | # path = "F:/PaperExperiments/ACMMM2018/OptimalTM_BLs_Restor/Temps"
16 | files = os.listdir(path)
17 | files =  natsort.natsorted(files)
18 | starttime = datetime.datetime.now()
19 | 
20 | # BLs = read_xls_file()
21 | # print('BLs',BLs)
22 | 
23 | for i in range(len(files)):
24 |     file = files[i]
25 |     Num  = file.split('.')[0]
26 |     filepath = path + "/" + file
27 |     # BL = BLs[i]
28 |     print('********    file   ********', file)
29 |     img = cv2.imread('InputImages/' + file)
30 |     # img = cv2.imread('Temps/' + file)
31 |     height = len(img)
32 |     width = len(img[0])
33 |     # print('img[0,0,:]',img[0,0,:])
34 |     Sat = np.zeros((height,width ))
35 |     for i in range(0, img.shape[0]):
36 |         for j in range(0, img.shape[1]):
37 |             if(np.max(img[i,j,:]) == 0):
38 |                 Sat[i,j] = 1
39 |             else:
40 |                 Sat[i, j] = (np.max(img[i,j,:]) - np.min(img[i,j,:]))/np.max(img[i,j,:])
41 |     # print('Sat',Sat)
42 |     Sat = 1- Sat
43 |     lamba = 1 - np.mean(Sat)
44 |     print('lamba',lamba)
45 | 
46 |     #
47 |     # cv2.imwrite('Results_Saturation/' + Num + 'Sat_TM_lamba.jpg', np.uint8((Sat * lamba) * 255))
48 | 
49 | 


--------------------------------------------------------------------------------
/data_augmention/Saturation_Max.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def Sat_max(img):
 5 |     height = len(img)
 6 |     width = len(img[0])
 7 |     # print('img[0,0,:]',img[0,0,:])
 8 |     Sat = np.zeros((height,width ))
 9 |     for i in range(0, img.shape[0]):
10 |         for j in range(0, img.shape[1]):
11 |             if(np.max(img[i,j,:]) == 0):
12 |                 Sat[i,j] = 1
13 |             else:
14 |                 Sat[i, j] = (np.max(img[i,j,:]) - np.min(img[i,j,:]))/np.max(img[i,j,:])
15 |     Sat = 1 - Sat
16 | 
17 |     # lamba = 1 - np.mean(Sat)
18 |     lamba = 1
19 | 
20 |     Sat = Sat * lamba
21 |     return Sat
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/data_augmention/__pycache__/ColorCorrect.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/ColorCorrect.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/ColorCorrect.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/ColorCorrect.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/DepthMap_RTM.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/DepthMap_RTM.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/DepthMap_RTM.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/DepthMap_RTM.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/GuidedFilter.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/GuidedFilter.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/GuidedFilter.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/GuidedFilter.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/Saturation_Max.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/Saturation_Max.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/Saturation_Max.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/Saturation_Max.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/augment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/augment.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/depthMapEstimation.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMapEstimation.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/depthMapEstimation.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMapEstimation.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/depthMin.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMin.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/depthMin.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMin.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/getGBTransmission.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getGBTransmission.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/getGBTransmission.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getGBTransmission.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/getRefinedTransmission.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getRefinedTransmission.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/getRefinedTransmission.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getRefinedTransmission.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/getTransmissionMap.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getTransmissionMap.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/getTransmissionMap.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getTransmissionMap.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/global_histogram_stretching.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/global_histogram_stretching.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/global_histogram_stretching.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/global_histogram_stretching.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/sceneRadiance.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/sceneRadiance.cpython-36.pyc


--------------------------------------------------------------------------------
/data_augmention/__pycache__/sceneRadiance.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/sceneRadiance.cpython-37.pyc


--------------------------------------------------------------------------------
/data_augmention/augment.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ColorCorrect import OptimalParameter
 3 | from DepthMap_RTM import Depth_TM
 4 | from Saturation_Max import Sat_max
 5 | from getGBTransmission import getGBTransmissionESt
 6 | from getRefinedTransmission import Refinedtransmission
 7 | from getTransmissionMap import getTransmission
 8 | from global_histogram_stretching import stretching
 9 | from sceneRadiance import sceneRadianceRGB
10 | 
11 | def augment(img): 
12 |     np.seterr(over='ignore')
13 |     
14 |     blockSize = 9
15 |     height = len(img)
16 |     width = len(img[0])
17 |     gimfiltR = 50  # Radius size of guided filter
18 |     eps = 10 ** -3  # Epsilon value of guided filter
19 |     Nrer = [0.95, 0.93, 0.85] # Normalized residual energy ratio of G-B-R channels
20 | 
21 |     AtomsphericLight = np.zeros(3)
22 |     AtomsphericLight[0] = (1.13 * np.mean(img[:, :, 0])) + 1.11 * np.std(img[:, :, 0]) - 25.6
23 |     AtomsphericLight[1] = (1.13 * np.mean(img[:, :, 1])) + 1.11 * np.std(img[:, :, 1]) - 25.6
24 |     AtomsphericLight[2] = 140 / (1 + 14.4 * np.exp(-0.034 * np.median(img[:, :, 2])))
25 |     AtomsphericLight = np.clip(AtomsphericLight, 5, 250)
26 |     transmissionR = getTransmission(img, AtomsphericLight, blockSize)
27 |     TM_R_modified = Depth_TM(img, AtomsphericLight)
28 |     TM_R_modified_Art = Sat_max(img)
29 |     transmissionR_new = np.copy(transmissionR)
30 |     for i in range(0, img.shape[0]):
31 |         for j in range(0, img.shape[1]):
32 |             if(transmissionR_new[i, j] > TM_R_modified[i, j]):
33 |                 transmissionR_new[i, j] = TM_R_modified[i, j]
34 |             if(transmissionR_new[i, j] < TM_R_modified_Art[i, j]):
35 |                 transmissionR_new[i, j] = TM_R_modified_Art[i, j]
36 | 
37 |     transmissionR_Stretched = stretching(transmissionR_new, height, width)
38 |     transmissionB, transmissionG, depth_map = getGBTransmissionESt(transmissionR_Stretched, AtomsphericLight)
39 |     transmission = Refinedtransmission(transmissionB, transmissionG, transmissionR_Stretched, img)
40 |     sceneRadiance = sceneRadianceRGB(img, transmission, AtomsphericLight)
41 |     sceneRadiance = OptimalParameter(sceneRadiance)
42 | 
43 |     return sceneRadiance
44 | 


--------------------------------------------------------------------------------
/data_augmention/depthMapEstimation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def depthMap(img):
 5 | 
 6 |     theta_0 = 0.51157954
 7 |     theta_1 = 0.50516165
 8 |     theta_2 = -0.90511117
 9 |     img = img / 255.0
10 |     b = img[:, :, 0]
11 |     g = img[:, :, 1]
12 |     r = img[:, :, 2]
13 |     x_1 = np.maximum(g, b)
14 |     x_2 = r
15 | 
16 |     Deptmap = theta_0 + theta_1 * x_1 + theta_2 * x_2
17 | 
18 | 
19 |     return Deptmap
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/data_augmention/depthMin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | def  minDepth(img, BL):
 6 |     img = img/255.0
 7 |     BL = BL/255.0
 8 |     Max = []
 9 |     img = np.float32(img)
10 |     for i in range(0,3):
11 |         Max_Abs =  np.absolute(img[i] - BL[i])
12 |         Max_I = np.max(Max_Abs)
13 |         Max_B = np.max([BL[i],(1 -BL[i])])
14 |         temp  = Max_I / Max_B
15 |         Max.append(temp)
16 |     K_b = np.max(Max)
17 |     min_depth = 1 - K_b
18 | 
19 |     return min_depth
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/data_augmention/getGBTransmission.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | def getGBTransmissionESt(transmissionR,AtomsphericLightTM):
 6 |     depth_map = np.zeros(transmissionR.shape)
 7 |     for i in range(0,transmissionR.shape[0]):
 8 |         for j in range(0, transmissionR.shape[1]):
 9 |             depth_map[i,j]  = math.log(transmissionR[i,j],0.82)
10 |             # if(depth_map[i,j]>15):
11 |             #     depth_map[i, j] = 15
12 |             # if (depth_map[i, j] < 1):
13 |             #     depth_map[i, j] = 1
14 | 
15 | 
16 | 
17 |     transmissionG = 0.93 ** depth_map
18 |     transmissionB = 0.95 ** depth_map
19 |     # transmissionG = np.zeros(transmissionR.shape)
20 | 
21 |     # transmissionB = np.zeros(transmissionR.shape)
22 |     # ratioB = (AtomsphericLightTM[2] * (-0.00113 * 450 + 1.62517) )/(AtomsphericLightTM[0] * (-0.00113 * 620 + 1.62517))*1.3
23 |     # ratioG = (AtomsphericLightTM[2] * (-0.00113 * 540 + 1.62517) )/(AtomsphericLightTM[1] * (-0.00113 * 620 + 1.62517))*1.2
24 |     # print('ratioB',ratioB)
25 |     # print('ratioG',ratioG)
26 |     # transmissionG = transmissionR ** ratioG
27 |     # transmissionB = transmissionR ** ratioB
28 |     # print('getGBTransmissionESttransmissionB',transmissionB)
29 |     return transmissionB,transmissionG,depth_map
30 | 
31 | 
32 | 
33 | 
34 | # def getGBTransmissionESt(transmissionR,AtomsphericLightTM):
35 | #     # transmissionG = np.zeros(transmissionR.shape)
36 | #     # transmissionB = np.zeros(transmissionR.shape)
37 | #     ratioB = (AtomsphericLightTM[2] * (-0.00113 * 450 + 1.62517) )/(AtomsphericLightTM[0] * (-0.00113 * 620 + 1.62517))
38 | #     ratioG = (AtomsphericLightTM[2] * (-0.00113 * 540 + 1.62517) )/(AtomsphericLightTM[1] * (-0.00113 * 620 + 1.62517))
39 | #     print('ratioB',ratioB)
40 | #     print('ratioG',ratioG)
41 | #     transmissionG = transmissionR ** ratioG
42 | #     transmissionB = transmissionR ** ratioB
43 | #     print('getGBTransmissionESttransmissionB',transmissionB)
44 | #
45 | #     return transmissionB,transmissionG
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/data_augmention/getRefinedTransmission.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | from GuidedFilter import GuidedFilter
 5 | 
 6 | 
 7 | def  Refinedtransmission(transmissionB,transmissionG,transmissionR_Stretched,img):
 8 | 
 9 | 
10 |     gimfiltR = 50  # 引导滤波时半径的大小
11 |     eps = 10 ** -3  # 引导滤波时epsilon的值
12 | 
13 |     # gimfiltR = 5  # 引导滤波时半径的大小
14 |     # eps = 0.4  # 引导滤波时epsilon的值
15 | 
16 |     guided_filter = GuidedFilter(img, gimfiltR, eps)
17 |     transmissionR_Stretched = guided_filter.filter(transmissionR_Stretched)
18 |     transmissionG = guided_filter.filter(transmissionG)
19 |     transmissionB = guided_filter.filter(transmissionB)
20 | 
21 |     transmission = np.zeros(img.shape)
22 |     transmission[:, :, 0] = transmissionB
23 |     transmission[:, :, 1] = transmissionG
24 |     transmission[:, :, 2] = transmissionR_Stretched
25 |     transmission = np.clip(transmission,0.05, 0.95)
26 | 
27 |     return transmission
28 | 
29 | 
30 | 
31 | 
32 | 
33 |     #
34 |     # transmissionB = FilterTran(transmissionB,0.1,0.9)
35 |     # transmissionG = FilterTran(transmissionG,0.25,0.95)
36 |     # transmissionR = FilterTran(transmissionR,0.35,0.975)
37 |     # transmissionB = FilterTran(transmissionB, 0.2, 0.9, 15, 95)
38 |     # transmissionG = FilterTran(transmissionG, 0.25, 0.95, 15, 95)
39 |     # transmissionR = FilterTran(transmissionR, 0.35, 0.975, 15, 95)
40 |     # print('transmissionB',transmissionB)


--------------------------------------------------------------------------------
/data_augmention/getTransmissionMap.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | def getMinChannel(img,AtomsphericLight):
 3 |     imgGrayNormalization = np.zeros((img.shape[0], img.shape[1]), dtype=np.float16)
 4 |     for i in range(0, img.shape[0]):
 5 |         for j in range(0, img.shape[1]):
 6 |             localMin = 1
 7 |             for k in range(0, 3):
 8 |                 # print('AtomsphericLight[k]',AtomsphericLight[k])
 9 |                 imgNormalization = img.item((i, j, k)) / AtomsphericLight[k]
10 |                 if imgNormalization < localMin:
11 |                     localMin = imgNormalization
12 |             imgGrayNormalization[i, j] = localMin
13 |     # print('imgGrayNormalization',imgGrayNormalization)
14 |     # print('np.max(imgGrayNormalization)',np.max(imgGrayNormalization))
15 |     return imgGrayNormalization
16 | 
17 | def getTransmission(img,AtomsphericLight ,blockSize):
18 |     img = np.float16(img)
19 |     img = getMinChannel(img,AtomsphericLight)
20 |     AtomsphericLight = AtomsphericLight / 255.0
21 |     addSize = int((blockSize - 1) / 2)
22 |     newHeight = img.shape[0] + blockSize - 1
23 |     newWidth = img.shape[1] + blockSize - 1
24 |     # 中间结果
25 |     imgMiddle = np.zeros((newHeight, newWidth))
26 |     imgMiddle[:, :] = 1
27 |     imgMiddle[addSize:newHeight - addSize, addSize:newWidth - addSize] = img
28 |     # print('imgMiddle',imgMiddle)
29 |     imgDark = np.zeros((img.shape[0], img.shape[1]))
30 |     localMin = 1
31 |     for i in range(addSize, newHeight - addSize):
32 |         for j in range(addSize, newWidth - addSize):
33 |             localMin = 1
34 |             for k in range(i - addSize, i + addSize + 1):
35 |                 for l in range(j - addSize, j + addSize + 1):
36 |                     if imgMiddle.item((k, l)) < localMin:
37 |                         localMin = imgMiddle.item((k, l))
38 |             imgDark[i - addSize, j - addSize] = localMin
39 |         transmission = (1 - imgDark) / (1 - 0.1 / np.max(AtomsphericLight))
40 |     transmission = np.clip(transmission, 0.1, 0.9)
41 |     # for i in range(0, transmission.shape[0]):
42 |     #     for j in range(0, transmission.shape[1]):
43 |     #         if transmission[i, j] < 0.01:
44 |     #             transmission[i, j] = 0.01
45 |     #         if transmission[i, j] > 0.99:
46 |     #             transmission[i, j] = 0.99
47 | 
48 |     return transmission


--------------------------------------------------------------------------------
/data_augmention/global_histogram_stretching.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import datetime
 4 | import math
 5 | 
 6 | 
 7 | import numpy as np
 8 | from scipy import stats
 9 | import cv2
10 | 
11 | pi = math.pi
12 | e = math.e
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | def global_stretching_depth(img_L):
20 |     height = len(img_L)
21 |     width = len(img_L[0])
22 |     length = height * width
23 |     R_rray = []
24 |     for i in range(height):
25 |         for j in range(width):
26 |             R_rray.append(img_L[i][j])
27 |     R_rray.sort()
28 |     I_min = R_rray[int(length / 1000)]
29 |     I_max = R_rray[-int(length / 1000)]
30 | 
31 |     array_Global_histogram_stretching_L = np.zeros((height, width))
32 |     for i in range(0, height):
33 |         for j in range(0, width):
34 |             if img_L[i][j] < I_min:
35 |                 p_out = img_L[i][j]
36 |                 array_Global_histogram_stretching_L[i][j] = 0.2
37 |             elif (img_L[i][j] > I_max):
38 |                 p_out = img_L[i][j]
39 |                 array_Global_histogram_stretching_L[i][j] = 0.9
40 |             else:
41 |                 p_out = (img_L[i][j] - I_min) * ((0.9-0.2) / (I_max - I_min))+ 0.2
42 |                 array_Global_histogram_stretching_L[i][j] = p_out
43 |     return (array_Global_histogram_stretching_L)
44 | 
45 | 
46 | 
47 | def stretching(sceneRadiance,height, width):
48 | 
49 |     r_array_Global_histogram_stretching = global_stretching_depth(sceneRadiance)
50 |     sceneRadiance = r_array_Global_histogram_stretching
51 | 
52 | 
53 |     return sceneRadiance
54 | 


--------------------------------------------------------------------------------
/data_augmention/sceneRadiance.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def sceneRadianceRGB(img, transmission, AtomsphericLight):
 4 |     sceneRadiance = np.zeros(img.shape)
 5 |     img = np.float32(img)
 6 |     for i in range(0, 3):
 7 |         sceneRadiance[:, :, i] = (img[:, :, i] - AtomsphericLight[i]) / transmission[:, :, i]  + AtomsphericLight[i]
 8 |         # 限制透射率 在0～255
 9 | 
10 | 
11 |     sceneRadiance = np.clip(sceneRadiance, 0, 255)
12 |     sceneRadiance = np.uint8(sceneRadiance)
13 |     return sceneRadiance
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/data_process.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from bs4 import BeautifulSoup\n",
 10 |     "import os\n",
 11 |     "import cv2"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# 导入数据"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "class_label = ['holothurian', 'echinus', 'scallop', 'starfish', 'waterweeds']\n",
 28 |     "file_path = 'data/train/box/'\n",
 29 |     "image_path = 'data/train/augment/'\n",
 30 |     "all_name = os.listdir(file_path)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "# 数据处理"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# train_result用于后续yolo训练,k_mean_result用于计算anchor boxes\n",
 47 |     "train_result = []\n",
 48 |     "k_mean_result = []\n",
 49 |     "box_size = {}\n",
 50 |     "label_number = {}\n",
 51 |     "del_number = 0\n",
 52 |     "\n",
 53 |     "# 遍历图片\n",
 54 |     "for i in all_name:\n",
 55 |     "    image_name = image_path+i.rstrip('xml')+'jpg'\n",
 56 |     "    img_name_append = False\n",
 57 |     "    soup = BeautifulSoup(open(file_path+i), 'lxml')\n",
 58 |     "    bbx = soup.find_all('object')\n",
 59 |     "    img = cv2.imread(image_name)\n",
 60 |     "    height, width, _ = img.shape\n",
 61 |     "    \n",
 62 |     "    # 对各尺寸数量进行统计\n",
 63 |     "    height, width, _ = img.shape\n",
 64 |     "    if box_size.get((height, width), False):\n",
 65 |     "        box_size[(height, width)] += 1\n",
 66 |     "    else:\n",
 67 |     "        box_size[(height, width)] = 1\n",
 68 |     "    \n",
 69 |     "    # 遍历候选框\n",
 70 |     "    for j in bbx:\n",
 71 |     "        # 同时对范围进行修正，像素起点改为0\n",
 72 |     "        name = str(j.contents[1].string)\n",
 73 |     "        xmin = int(j.xmin.string) - 1\n",
 74 |     "        ymin = int(j.ymin.string) - 1\n",
 75 |     "        xmax = int(j.xmax.string) - 1\n",
 76 |     "        ymax = int(j.ymax.string) - 1\n",
 77 |     "        index = str(class_label.index(name))\n",
 78 |     "\n",
 79 |     "        # 防止大小混淆\n",
 80 |     "        if xmin > xmax:\n",
 81 |     "            xmax, xmin = xmin, xmax\n",
 82 |     "        if ymin > ymax:\n",
 83 |     "            ymax, ymin = ymin, ymax\n",
 84 |     "            \n",
 85 |     "        # 判断ground truth是否超出图像范围\n",
 86 |     "        if xmin > width - 1 or xmax < 0:\n",
 87 |     "            continue\n",
 88 |     "        if ymin > height - 1 or ymax < 0:\n",
 89 |     "            continue\n",
 90 |     "\n",
 91 |     "        # 处于图像边缘候选框的处理\n",
 92 |     "        if xmin < 0:\n",
 93 |     "            xmin = 0\n",
 94 |     "        if ymin < 0:\n",
 95 |     "            ymin = 0\n",
 96 |     "        if xmax > width - 1:\n",
 97 |     "            xmax = width - 1\n",
 98 |     "        if ymax > height - 1:\n",
 99 |     "            ymax = height - 1\n",
100 |     "            \n",
101 |     "        # 判断是否为无效框\n",
102 |     "        x_distance = xmax - xmin\n",
103 |     "        y_distance = ymax - ymin\n",
104 |     "        if x_distance == 0 or y_distance == 0:\n",
105 |     "            continue\n",
106 |     "            \n",
107 |     "        # 对于过小的图片进行删除\n",
108 |     "        square = (xmax - xmin) * (ymax - ymin)\n",
109 |     "        if square < 120:\n",
110 |     "            del_number += 1\n",
111 |     "            continue\n",
112 |     "        \n",
113 |     "        # 防止有图片没有任何候选框\n",
114 |     "        if img_name_append == False:\n",
115 |     "            train_result.append(image_name)\n",
116 |     "            img_name_append = True\n",
117 |     "\n",
118 |     "        # 判断是否增加kmean缩放候选框\n",
119 |     "        scale = min(448/width, 448/height)\n",
120 |     "        square = ((xmax - xmin)*scale) * ((ymax - ymin)*scale)\n",
121 |     "        if square < 120:\n",
122 |     "            kmean_scale = False\n",
123 |     "        else:\n",
124 |     "            kmean_scale = True\n",
125 |     "        \n",
126 |     "        xmin, xmax, ymin, ymax = str(xmin), str(xmax), str(ymin), str(ymax)\n",
127 |     "        train_result.append(' ')\n",
128 |     "        for x in [xmin, ymin, xmax, ymax]:\n",
129 |     "            train_result.append(x)\n",
130 |     "            train_result.append(',')\n",
131 |     "            if x is ymax:\n",
132 |     "                train_result.append(index)\n",
133 |     "        \n",
134 |     "        # kmean添加缩放候选框\n",
135 |     "        if kmean_scale == True:\n",
136 |     "            for x in [xmin, ymin, xmax, ymax]:\n",
137 |     "                k_mean_result.append(int(round(int(x)*scale)))\n",
138 |     "                if x is not ymax:\n",
139 |     "                    k_mean_result.append(',')\n",
140 |     "                else:\n",
141 |     "                    k_mean_result.append('\\n')\n",
142 |     "        \n",
143 |     "        # 对各label数量统计\n",
144 |     "        if label_number.get(name, False):\n",
145 |     "            label_number[name] += 1\n",
146 |     "        else:\n",
147 |     "            label_number[name] = 1\n",
148 |     "    if img_name_append == True:\n",
149 |     "        train_result.append('\\n')"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "# 查看统计结果"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 4,
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "data": {
166 |       "text/plain": [
167 |        "10"
168 |       ]
169 |      },
170 |      "execution_count": 4,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "del_number"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 5,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "data": {
186 |       "text/plain": [
187 |        "{'echinus': 22098,\n",
188 |        " 'scallop': 6694,\n",
189 |        " 'starfish': 6827,\n",
190 |        " 'holothurian': 5503,\n",
191 |        " 'waterweeds': 82}"
192 |       ]
193 |      },
194 |      "execution_count": 5,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "label_number"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 6,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "{(405, 720): 3066,\n",
212 |        " (2160, 3840): 1644,\n",
213 |        " (1080, 1920): 595,\n",
214 |        " (576, 704): 38,\n",
215 |        " (480, 586): 44}"
216 |       ]
217 |      },
218 |      "execution_count": 6,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "box_size"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "# 保存"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 7,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "with open('data/train_data.txt', 'w') as f:\n",
241 |     "    f.writelines(train_result)\n",
242 |     "with open('data/k_mean_data.txt', 'w') as f:\n",
243 |     "    f.writelines(k_mean_result)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": []
252 |   }
253 |  ],
254 |  "metadata": {
255 |   "kernelspec": {
256 |    "display_name": "Python 3",
257 |    "language": "python",
258 |    "name": "python3"
259 |   },
260 |   "language_info": {
261 |    "codemirror_mode": {
262 |     "name": "ipython",
263 |     "version": 3
264 |    },
265 |    "file_extension": ".py",
266 |    "mimetype": "text/x-python",
267 |    "name": "python",
268 |    "nbconvert_exporter": "python",
269 |    "pygments_lexer": "ipython3",
270 |    "version": "3.6.9"
271 |   }
272 |  },
273 |  "nbformat": 4,
274 |  "nbformat_minor": 2
275 | }
276 | 


--------------------------------------------------------------------------------
/data_process.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | import os
  3 | import cv2
  4 | from tqdm import tqdm
  5 | 
  6 | 
  7 | def process():
  8 |     # train_result用于后续yolo训练,k_mean_result用于计算anchor boxes
  9 |     train_result = []
 10 |     k_mean_result = []
 11 | 
 12 |     # 遍历图片
 13 |     for i in tqdm(all_name):
 14 |         image_name = image_path+i.rstrip('xml')+'jpg'
 15 |         img_name_append = False
 16 |         soup = BeautifulSoup(open(file_path+i), 'lxml')
 17 |         bbx = soup.find_all('object')
 18 |         img = cv2.imread(image_name)
 19 |         height, width, _ = img.shape
 20 | 
 21 |         # 遍历候选框
 22 |         for j in bbx:
 23 |             # 同时对范围进行修正，像素起点改为0
 24 |             name = str(j.contents[1].string)
 25 |             xmin = int(j.xmin.string) - 1
 26 |             ymin = int(j.ymin.string) - 1
 27 |             xmax = int(j.xmax.string) - 1
 28 |             ymax = int(j.ymax.string) - 1
 29 |             index = str(class_label.index(name))
 30 | 
 31 |             # 防止大小混淆
 32 |             if xmin > xmax:
 33 |                 xmax, xmin = xmin, xmax
 34 |             if ymin > ymax:
 35 |                 ymax, ymin = ymin, ymax
 36 | 
 37 |             # 判断ground truth是否超出图像范围
 38 |             if xmin > width - 1 or xmax < 0:
 39 |                 continue
 40 |             if ymin > height - 1 or ymax < 0:
 41 |                 continue
 42 | 
 43 |             # 处于图像边缘候选框的处理
 44 |             if xmin < 0:
 45 |                 xmin = 0
 46 |             if ymin < 0:
 47 |                 ymin = 0
 48 |             if xmax > width - 1:
 49 |                 xmax = width - 1
 50 |             if ymax > height - 1:
 51 |                 ymax = height - 1
 52 | 
 53 |             # 判断是否为无效框
 54 |             x_distance = xmax - xmin
 55 |             y_distance = ymax - ymin
 56 |             if x_distance == 0 or y_distance == 0:
 57 |                 continue
 58 | 
 59 |             # 对于过小的候选框进行删除
 60 |             square = (xmax - xmin) * (ymax - ymin)
 61 |             if square < 120:
 62 |                 continue
 63 | 
 64 |             # 防止有图片没有任何候选框
 65 |             if img_name_append == False:
 66 |                 train_result.append(image_name)
 67 |                 img_name_append = True
 68 | 
 69 |              # 判断是否增加kmean缩放候选框
 70 |             scale = min(input_shape/width, input_shape/height)
 71 |             square = ((xmax - xmin)*scale) * ((ymax - ymin)*scale)
 72 |             if square < 120:
 73 |                 kmean_scale = False
 74 |             else:
 75 |                 kmean_scale = True
 76 | 
 77 |             xmin, xmax, ymin, ymax = str(xmin), str(xmax), str(ymin), str(ymax)
 78 |             train_result.append(' ')
 79 |             for x in [xmin, ymin, xmax, ymax]:
 80 |                 train_result.append(x)
 81 |                 train_result.append(',')
 82 |                 if x is ymax:
 83 |                     train_result.append(index)
 84 | 
 85 |             # kmean添加缩放候选框
 86 |             if kmean_scale == True:
 87 |                 for x in [xmin, ymin, xmax, ymax]:
 88 |                     k_mean_result.append(str(int(round(int(x)*scale))))
 89 |                     if x is not ymax:
 90 |                         k_mean_result.append(',')
 91 |                     else:
 92 |                         k_mean_result.append('\n')
 93 | 
 94 |         if img_name_append == True:
 95 |             train_result.append('\n')
 96 | 
 97 |     return train_result, k_mean_result
 98 | 
 99 | 
100 | def save_file(train_result, k_mean_result):
101 |     with open('data/train_data.txt', 'w') as f:
102 |         f.writelines(train_result)
103 |     with open('data/k_mean_data.txt', 'w') as f:
104 |         f.writelines(k_mean_result)
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     input_shape = 480
109 |     class_label = ['holothurian', 'echinus', 'scallop', 'starfish', 'waterweeds']
110 |     file_path = 'data/train/box/'
111 |     image_path = 'data/train/augment/'
112 |     all_name = os.listdir(file_path)
113 | 
114 |     train_result, k_mean_result = process()
115 |     save_file(train_result, k_mean_result)
116 | 


--------------------------------------------------------------------------------
/image_aug.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0,'data_augmention/')
 3 | from augment import augment
 4 | import os
 5 | import cv2
 6 | import numpy as np
 7 | from multiprocessing import Pool
 8 | 
 9 | number = 0
10 | def img_aug(name, train=False, input_shape=(480,480)):
11 |     global number
12 | 
13 |     img = cv2.imread(image_path+name)
14 |     if train == False:
15 |         h, w = input_shape
16 |         ih, iw = img.shape[:2]
17 |         scale = min(w/iw, h/ih)
18 |         nw = int(iw*scale)
19 |         nh = int(ih*scale)
20 |         img = cv2.resize(img,(nw,nh))
21 |         img = augment(img)
22 | 
23 |         # 用灰色像素块来做背景扩充图片满足输入尺寸需求
24 |         dx = (w-nw) // 2
25 |         dy = (h-nh) // 2
26 |         img = np.pad(img, ((dy, dy), (dx, dx), (0, 0)),
27 |                      'constant', constant_values=128)
28 |         if tuple(img.shape[:2]) != input_shape:
29 |             img = np.pad(img, ((0, input_shape[0]-img.shape[0]), 
30 |                     (0, input_shape[1]-img.shape[1]), (0, 0)),
31 |                     'constant', constant_values=128)
32 |     else:
33 |         img = augment(img)
34 | 
35 |     cv2.imwrite(result_path+name, img)
36 |     number += 1
37 |     print('{}. {} is finish!'.format(number, name))
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     image_path = 'data/test/test-B-image/'
42 |     result_path = 'data/test/test_B_augment/'
43 |     all_name = os.listdir(image_path)
44 | 
45 |     # 使用多进程来加速处理
46 |     p = Pool(10)
47 |     for i in all_name:
48 |         p.apply_async(img_aug, args=(i,))
49 |     p.close()
50 |     p.join()
51 | 


--------------------------------------------------------------------------------
/kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class YOLO_Kmeans:
 5 | 
 6 |     def __init__(self, cluster_number, filename):
 7 |         self.cluster_number = cluster_number
 8 |         self.filename = filename
 9 | 
10 |     def iou(self, boxes, clusters):  # 1 box -> k clusters
11 |         n = boxes.shape[0]
12 |         k = self.cluster_number
13 | 
14 |         box_area = boxes[:, 0] * boxes[:, 1]
15 |         box_area = box_area.repeat(k)
16 |         box_area = np.reshape(box_area, (n, k))
17 | 
18 |         cluster_area = clusters[:, 0] * clusters[:, 1]
19 |         cluster_area = np.tile(cluster_area, [1, n])
20 |         cluster_area = np.reshape(cluster_area, (n, k))
21 | 
22 |         box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
23 |         cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
24 |         min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
25 | 
26 |         box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
27 |         cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
28 |         min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
29 |         inter_area = np.multiply(min_w_matrix, min_h_matrix)
30 | 
31 |         result = inter_area / (box_area + cluster_area - inter_area)
32 |         return result
33 | 
34 |     def avg_iou(self, boxes, clusters):
35 |         accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
36 |         return accuracy
37 | 
38 |     def kmeans(self, boxes, k, dist=np.median):
39 |         box_number = boxes.shape[0]
40 |         distances = np.empty((box_number, k))
41 |         last_nearest = np.zeros((box_number,))
42 |         np.random.seed()
43 |         clusters = boxes[np.random.choice(
44 |             box_number, k, replace=False)]  # init k clusters
45 |         epoch = 0
46 | 
47 |         while True:
48 |             distances = 1 - self.iou(boxes, clusters)
49 | 
50 |             current_nearest = np.argmin(distances, axis=1)
51 |             if (last_nearest == current_nearest).all():
52 |                 break  # clusters won't change
53 |             for cluster in range(k):
54 |                 clusters[cluster] = dist(  # update clusters
55 |                     boxes[current_nearest == cluster], axis=0)
56 | 
57 |             last_nearest = current_nearest
58 | 
59 |         return clusters
60 | 
61 |     def result2txt(self, data):
62 |         f = open("data/yolo_anchors.txt", 'w')
63 |         row = np.shape(data)[0]
64 |         for i in range(row):
65 |             if i == 0:
66 |                 x_y = "%d,%d" % (data[i][0], data[i][1])
67 |             else:
68 |                 x_y = ", %d,%d" % (data[i][0], data[i][1])
69 |             f.write(x_y)
70 |         f.close()
71 | 
72 |     def txt2boxes(self):
73 |         f = open(self.filename, 'r')
74 |         dataSet = []
75 |         for line in f:
76 |             infos = line.split(",")
77 |             width = int(infos[2]) - int(infos[0])
78 |             height = int(infos[3]) - int(infos[1])
79 |             dataSet.append([width, height])
80 |         result = np.array(dataSet)
81 |         f.close()
82 |         return result
83 | 
84 |     def txt2clusters(self):
85 |         all_boxes = self.txt2boxes()
86 |         result = self.kmeans(all_boxes, k=self.cluster_number)
87 |         result = result[np.lexsort(result.T[0, None])]
88 |         self.result2txt(result)
89 |         print("K anchors:\n {}".format(result))
90 |         print("Accuracy: {:.2f}%".format(
91 |             self.avg_iou(all_boxes, result) * 100))
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     cluster_number = 9
96 |     filename = "data/k_mean_data.txt"
97 |     kmeans = YOLO_Kmeans(cluster_number, filename)
98 |     kmeans.txt2clusters()
99 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | import colorsys
  2 | import os
  3 | import numpy as np
  4 | from keras import backend as K
  5 | from keras.models import load_model
  6 | from keras.layers import Input
  7 | import cv2
  8 | from yolo3.model import yolo_eval, yolo_body
  9 | from keras.utils import multi_gpu_model
 10 | import pandas as pd
 11 | from tqdm import tqdm
 12 | from ensemble_boxes import *
 13 | import glob
 14 | 
 15 | 
 16 | def create_model(input_shape, num_anchors, num_classes, weights_path):
 17 |     '''create the training model'''
 18 |     image_input = Input(shape=(None, None, 3))
 19 |     h, w = input_shape
 20 | 
 21 |     model_body = yolo_body(image_input, num_anchors//3, num_classes)
 22 |     model_body.load_weights(weights_path)
 23 | 
 24 |     return model_body
 25 | 
 26 | class YOLO(object):
 27 |     _defaults = {
 28 |         "model_path": 'models/trained_weights_final.h5',
 29 |         "anchors_path": 'data/yolo_anchors.txt',
 30 |         "classes_path": 'data/classes.txt',
 31 |         "score" : 0.001,
 32 |         "iou" : 0.25,
 33 |         "model_image_size" : (480, 480),
 34 |         "gpu_num" : 1,
 35 |     }
 36 | 
 37 |     @classmethod
 38 |     def get_defaults(cls, n):
 39 |         if n in cls._defaults:
 40 |             return cls._defaults[n]
 41 |         else:
 42 |             return "Unrecognized attribute name '" + n + "'"
 43 | 
 44 |     def __init__(self):
 45 |         self.__dict__.update(self._defaults) # set up default values
 46 |         self.class_names = self._get_class()
 47 |         self.anchors = self._get_anchors()
 48 |         self.sess = K.get_session()
 49 |         self.boxes, self.scores, self.classes = self.generate()
 50 | 
 51 |     def _get_class(self):
 52 |         classes_path = os.path.expanduser(self.classes_path)
 53 |         with open(classes_path) as f:
 54 |             class_names = f.readlines()
 55 |         class_names = [c.strip() for c in class_names]
 56 |         return class_names
 57 | 
 58 |     def _get_anchors(self):
 59 |         anchors_path = os.path.expanduser(self.anchors_path)
 60 |         with open(anchors_path) as f:
 61 |             anchors = f.readline()
 62 |         anchors = [float(x) for x in anchors.split(',')]
 63 |         return np.array(anchors).reshape(-1, 2)
 64 | 
 65 |     def generate(self):
 66 |         model_path = os.path.expanduser(self.model_path)
 67 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 68 | 
 69 |         # Load model, or construct model and load weights.
 70 |         num_anchors = len(self.anchors)
 71 |         num_classes = len(self.class_names)
 72 |         self.yolo_model = create_model(self.model_image_size, num_anchors, num_classes, self.model_path)
 73 | 
 74 |         print('{} model, anchors, and classes loaded.'.format(model_path))
 75 | 
 76 |         # Generate colors for drawing bounding boxes.
 77 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)
 78 |                       for x in range(len(self.class_names))]
 79 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 80 |         self.colors = list(
 81 |             map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
 82 |                 self.colors))
 83 |         np.random.seed(10101)  # Fixed seed for consistent colors across runs.
 84 |         np.random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
 85 |         np.random.seed(None)  # Reset seed to default.
 86 | 
 87 |         # Generate output tensor targets for filtered bounding boxes.
 88 |         self.input_image_shape = K.placeholder(shape=(2, ))
 89 |         if self.gpu_num>=2:
 90 |             self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
 91 |         boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
 92 |                 len(self.class_names), self.input_image_shape,
 93 |                 score_threshold=self.score, iou_threshold=self.iou)
 94 |         return boxes, scores, classes
 95 | 
 96 |     def detect_image(self, image):
 97 |         assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
 98 |         assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
 99 | 
100 |         image_data = np.array(image, dtype='float32')
101 |         image_data /= 255.
102 |         image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
103 | 
104 |         out_boxes, out_scores, out_classes = self.sess.run(
105 |             [self.boxes, self.scores, self.classes],
106 |             feed_dict={
107 |                 self.yolo_model.input: image_data,
108 |                 self.input_image_shape: [image.shape[1], image.shape[0]],
109 |                 K.learning_phase(): 0
110 |             })
111 | 
112 |         return out_classes, out_scores, out_boxes
113 | 
114 | 
115 | def detect_img(yolo, test, input_shape):
116 |     class_label = ['holothurian', 'echinus', 'scallop', 'starfish']
117 |     name, image_id, confidence, xmin, ymin, xmax, ymax = \
118 |         [], [], [], [], [], [], []
119 | 
120 |     for img in tqdm(test):
121 |         image = cv2.imread(img)
122 |         height, width, _ = cv2.imread(img.replace('test_B_augment', 'test-B-image')).shape
123 |         scale = min(input_shape/width, input_shape/height)
124 |         nw = int(width*scale)
125 |         nh = int(height*scale)
126 |         dx = (input_shape-nw) // 2
127 |         dy = (input_shape-nh) // 2
128 |         out_classes, out_scores, out_boxes = yolo.detect_image(image)
129 | 
130 |         # 采用WBF代替NMS
131 |         out_boxes, out_scores, out_classes = weighted_boxes_fusion([out_boxes], [out_scores], [out_classes], weights=None, iou_thr=0.25, skip_box_thr=0.0)
132 |         out_boxes = out_boxes.tolist()
133 |         out_scores = out_scores.tolist()
134 |         out_classes = out_classes.tolist()
135 | 
136 |         for v, i in enumerate(out_boxes):
137 |             # 水草一类删除
138 |             if int(out_classes[v]) == 4:
139 |                 continue
140 | 
141 |             ym, xm, ya, xa = i
142 |             # 范围修正
143 |             xm, ym, xa, ya = (xm-dx)/scale+1, (ym-dy)/scale+1, (xa-dx)/scale+1, (ya-dy)/scale+1
144 | 
145 |             # 防止大小混淆
146 |             if xm > xa:
147 |                 xa, xm = xm, xa
148 |             if ym > ya:
149 |                 ya, ym = ym, ya
150 | 
151 |             # 判断ground truth是否超出图像范围
152 |             if xm > width or xa < 1:
153 |                 continue
154 |             if ym > height or ya < 1:
155 |                 continue
156 | 
157 |             # 处于图像边缘候选框的处理
158 |             if xm < 1:
159 |                 xm = 1
160 |             if ym < 1:
161 |                 ym = 1
162 |             if xa > width:
163 |                 xa = width
164 |             if ya > height:
165 |                 ya = height
166 | 
167 |             # 判断是否为无效框
168 |             x_distance = xa - xm
169 |             y_distance = ya - ym
170 |             if x_distance == 0 or y_distance == 0:
171 |                 continue
172 | 
173 |             # 四舍五入转换为int
174 |             xm = int(round(xm))
175 |             ym = int(round(ym))
176 |             xa = int(round(xa))
177 |             ya = int(round(ya))
178 | 
179 |             xmin.append(xm)
180 |             ymin.append(ym)
181 |             xmax.append(xa)
182 |             ymax.append(ya)
183 | 
184 |             name.append(class_label[int(out_classes[v])])
185 |             confidence.append(out_scores[v])
186 |             image_id.append(img.replace('jpg', 'xml').lstrip(TEST_PATH))
187 | 
188 |     save_csv(name, image_id, confidence, xmin, ymin, xmax, ymax)
189 | 
190 | 
191 | def save_csv(name, image_id, confidence, xmin, ymin, xmax, ymax):
192 |     result_table = pd.DataFrame({"name": name,
193 |                                  "image_id": image_id, 
194 |                                  "confidence":confidence, 
195 |                                  "xmin":xmin, 
196 |                                  "ymin":ymin, 
197 |                                  "xmax":xmax, 
198 |                                  "ymax":ymax})
199 |     result_table.to_csv("predict.csv", index=False)
200 | 
201 | 
202 | if __name__ == '__main__':
203 |     input_shape = 480
204 |     TEST_PATH = "data/test/test_B_augment/"
205 |     TEST_NAME = glob.glob(TEST_PATH + "*.jpg")
206 |     yolo = YOLO()
207 | 
208 |     detect_img(yolo, TEST_NAME, input_shape)
209 | 


--------------------------------------------------------------------------------
/swa.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | paper implementaton:
 5 | Stochastic Weight Averaging: https://arxiv.org/abs/1803.05407
 6 | 
 7 | """
 8 | 
 9 | import keras as K
10 | 
11 | 
12 | class SWA(K.callbacks.Callback):
13 | 
14 |     def __init__(self, filepath, SWA_START):
15 |         super(SWA, self).__init__()
16 |         self.filepath = filepath
17 |         self.SWA_START = SWA_START
18 | 
19 |     def on_train_begin(self, logs=None):
20 |         self.nb_epoch = self.params['epochs']
21 |         print('Stochastic weight averaging selected for last {} epochs.'
22 |               .format(self.nb_epoch - self.SWA_START))
23 | 
24 |     def on_epoch_begin(self, epoch, logs=None):
25 |         lr = float(K.backend.get_value(self.model.optimizer.lr))
26 |         print('learning rate of current epoch is : {}'.format(lr))
27 | 
28 |     def on_epoch_end(self, epoch, logs=None):
29 |         if epoch == self.SWA_START:
30 |             self.swa_weights = self.model.get_weights()
31 | 
32 |         elif epoch > self.SWA_START:
33 |             for i, layer in enumerate(self.model.layers):
34 |                 self.swa_weights[i] = (self.swa_weights[i] *
35 |                                        (epoch - self.SWA_START) + self.model.get_weights()[i]) / (
36 |                                               (epoch - self.SWA_START) + 1)
37 |         else:
38 |             pass
39 | 
40 |     def on_train_end(self, logs=None):
41 |         self.model.set_weights(self.swa_weights)
42 |         print('set stochastic weight average as final model parameters [FINISH].')
43 |         # self.model.save_weights(self.filepath)
44 |         # print('save final stochastic averaged weights model to file [FINISH].')
45 |         
46 |         
47 | class LearningRateDisplay(K.callbacks.Callback):
48 |     def on_epoch_begin(self, epoch, logs=None):
49 |         lr = float(K.backend.get_value(self.model.optimizer.lr))
50 |         print('learning rate of current epoch is : {}'.format(lr))


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Retrain the YOLO model for your own dataset.
  3 | """
  4 | 
  5 | import numpy as np
  6 | import keras.backend as K
  7 | from keras.layers import Input, Lambda
  8 | from keras.models import Model
  9 | from keras_radam import RAdam
 10 | from keras.callbacks import TensorBoard, ModelCheckpoint
 11 | from cosine_annealing import CosineAnnealingScheduler
 12 | import tensorflow as tf
 13 | from swa import SWA
 14 | 
 15 | from yolo3.model import preprocess_true_boxes, yolo_body, yolo_loss
 16 | from yolo3.utils import get_random_data
 17 | 
 18 | 
 19 | def _main():
 20 |     annotation_path = 'data/train_data.txt'
 21 |     log_dir = 'models/'
 22 |     classes_path = 'data/classes.txt'
 23 |     anchors_path = 'data/yolo_anchors.txt'
 24 |     class_names = get_classes(classes_path)
 25 |     num_classes = len(class_names)
 26 |     anchors = get_anchors(anchors_path)
 27 | 
 28 |     input_shape = (480,480) # multiple of 32, hw
 29 | 
 30 |     model = create_model(input_shape, anchors, num_classes,
 31 |         freeze_body=2, weights_path='pre_train/yolo_weights.h5') # make sure you know what you freeze
 32 |     logging = TensorBoard(log_dir=log_dir)
 33 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=1)
 34 | 
 35 |     # use swa
 36 |     swa_start = 100
 37 |     swa_obj = SWA('',swa_start)
 38 | 
 39 |     # use cosine
 40 |     cosine = CosineAnnealingScheduler(init_epoch=100, T_max=200, eta_max=1e-2, eta_min=1e-6)
 41 | 
 42 |     val_split = 0.1
 43 |     with open(annotation_path) as f:
 44 |         lines = f.readlines()
 45 |     np.random.seed(10101)
 46 |     np.random.shuffle(lines)
 47 |     np.random.seed(None)
 48 |     num_val = int(len(lines)*val_split)
 49 |     num_train = len(lines) - num_val
 50 | 
 51 |     # Train with frozen layers first, to get a stable loss.
 52 |     # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
 53 |     if True:
 54 |         model.compile(optimizer=RAdam(warmup_proportion=0.1, min_lr=1e-5), loss={
 55 |             # use custom yolo_loss Lambda layer.
 56 |             'yolo_loss': lambda y_true, y_pred: y_pred})
 57 | 
 58 |         batch_size = 32
 59 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 60 |         model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes, trainable=True),
 61 |                 steps_per_epoch=max(1, num_train//batch_size),
 62 |                 validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes, trainable=False),
 63 |                 validation_steps=max(1, num_val//batch_size),
 64 |                 epochs=100,
 65 |                 initial_epoch=0,
 66 |                 callbacks=[logging])
 67 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
 68 | 
 69 |     # Unfreeze and continue training, to fine-tune.
 70 |     # Train longer if the result is not good.
 71 | 
 72 |     if True:
 73 |         for i in range(len(model.layers)):
 74 |             model.layers[i].trainable = True
 75 |         model.compile(optimizer=RAdam(warmup_proportion=0.1, min_lr=1e-6), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
 76 |         print('Unfreeze all of the layers.')
 77 | 
 78 |         batch_size = 8 # note that more GPU memory is required after unfreezing the body
 79 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 80 |         model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes, trainable=True),
 81 |             steps_per_epoch=max(1, num_train//batch_size),
 82 |             validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes, trainable=False),
 83 |             validation_steps=max(1, num_val//batch_size),
 84 |             epochs=300,
 85 |             initial_epoch=100,
 86 |             callbacks=[logging, cosine, swa_obj, checkpoint])
 87 |         model.save_weights(log_dir + 'trained_weights_final.h5')
 88 | 
 89 |     # Further training if needed.
 90 | 
 91 | 
 92 | def get_classes(classes_path):
 93 |     '''loads the classes'''
 94 |     with open(classes_path) as f:
 95 |         class_names = f.readlines()
 96 |     class_names = [c.strip() for c in class_names]
 97 |     return class_names
 98 | 
 99 | def get_anchors(anchors_path):
100 |     '''loads the anchors from a file'''
101 |     with open(anchors_path) as f:
102 |         anchors = f.readline()
103 |     anchors = [float(x) for x in anchors.split(',')]
104 |     return np.array(anchors).reshape(-1, 2)
105 | 
106 | 
107 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
108 |             weights_path='model_data/yolo_weights.h5'):
109 |     '''create the training model'''
110 |     K.clear_session() # get a new session
111 |     image_input = Input(shape=(None, None, 3))
112 |     h, w = input_shape
113 |     num_anchors = len(anchors)
114 | 
115 |     y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
116 |         num_anchors//3, num_classes+5)) for l in range(3)]
117 | 
118 |     model_body = yolo_body(image_input, num_anchors//3, num_classes)
119 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
120 | 
121 |     if load_pretrained:
122 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
123 |         print('Load weights {}.'.format(weights_path))
124 |         if freeze_body in [1, 2]:
125 |             # Freeze darknet53 body or freeze all but 3 output layers.
126 |             num = (185, len(model_body.layers)-3)[freeze_body-1]
127 |             for i in range(num): model_body.layers[i].trainable = False
128 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
129 | 
130 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
131 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
132 |         [*model_body.output, *y_true])
133 |     model = Model([model_body.input, *y_true], model_loss)
134 | 
135 |     return model
136 | 
137 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, trainable):
138 |     '''data generator for fit_generator'''
139 |     n = len(annotation_lines)
140 |     i = 0
141 |     while True:
142 |         image_data = []
143 |         box_data = []
144 |         for b in range(batch_size):
145 |             if i==0:
146 |                 np.random.shuffle(annotation_lines)
147 |             image, box = get_random_data(annotation_lines[i], input_shape, trainable=trainable)
148 |             while type(image) == type(None):
149 |                 i = (i+1) % n
150 |                 if i==0:
151 |                     np.random.shuffle(annotation_lines)
152 |                 image, box = get_random_data(annotation_lines[i], input_shape, trainable=trainable)
153 |             image_data.append(image)
154 |             box_data.append(box)
155 |             i = (i+1) % n
156 |         image_data = np.array(image_data)
157 |         box_data = np.array(box_data)
158 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
159 |         yield [image_data, *y_true], np.zeros(batch_size)
160 | 
161 | def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes, trainable):
162 |     n = len(annotation_lines)
163 |     if n==0 or batch_size<=0: return None
164 |     return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, trainable)
165 | 
166 | if __name__ == '__main__':
167 |     _main()
168 | 


--------------------------------------------------------------------------------
/yolo3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__init__.py


--------------------------------------------------------------------------------
/yolo3/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/yolo3/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/yolo3/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/yolo3/model.py:
--------------------------------------------------------------------------------
  1 | """YOLO_v3 Model Defined in Keras."""
  2 | 
  3 | from functools import wraps
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
  9 | from keras.layers.advanced_activations import LeakyReLU
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.models import Model
 12 | from keras.regularizers import l2
 13 | 
 14 | from yolo3.utils import compose
 15 | 
 16 | 
 17 | @wraps(Conv2D)
 18 | def DarknetConv2D(*args, **kwargs):
 19 |     """Wrapper to set Darknet parameters for Convolution2D."""
 20 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 21 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 22 |     darknet_conv_kwargs.update(kwargs)
 23 |     return Conv2D(*args, **darknet_conv_kwargs)
 24 | 
 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 26 |     """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
 27 |     no_bias_kwargs = {'use_bias': False}
 28 |     no_bias_kwargs.update(kwargs)
 29 |     return compose(
 30 |         DarknetConv2D(*args, **no_bias_kwargs),
 31 |         BatchNormalization(),
 32 |         LeakyReLU(alpha=0.1))
 33 | 
 34 | def resblock_body(x, num_filters, num_blocks):
 35 |     '''A series of resblocks starting with a downsampling Convolution2D'''
 36 |     # Darknet uses left and top padding instead of 'same' mode
 37 |     x = ZeroPadding2D(((1,0),(1,0)))(x)
 38 |     x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
 39 |     for i in range(num_blocks):
 40 |         y = compose(
 41 |                 DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),
 42 |                 DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)
 43 |         x = Add()([x,y])
 44 |     return x
 45 | 
 46 | def darknet_body(x):
 47 |     '''Darknent body having 52 Convolution2D layers'''
 48 |     x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
 49 |     x = resblock_body(x, 64, 1)
 50 |     x = resblock_body(x, 128, 2)
 51 |     x = resblock_body(x, 256, 8)
 52 |     x = resblock_body(x, 512, 8)
 53 |     x = resblock_body(x, 1024, 4)
 54 |     return x
 55 | 
 56 | def make_last_layers(x, num_filters, out_filters):
 57 |     '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
 58 |     x = compose(
 59 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)),
 60 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 61 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)),
 62 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 63 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)
 64 |     y = compose(
 65 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 66 |             DarknetConv2D(out_filters, (1,1)))(x)
 67 |     return x, y
 68 | 
 69 | 
 70 | def yolo_body(inputs, num_anchors, num_classes):
 71 |     """Create YOLO_V3 model CNN body in Keras."""
 72 |     darknet = Model(inputs, darknet_body(inputs))
 73 |     x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
 74 | 
 75 |     x = compose(
 76 |             DarknetConv2D_BN_Leaky(256, (1,1)),
 77 |             UpSampling2D(2))(x)
 78 |     x = Concatenate()([x,darknet.layers[152].output])
 79 |     x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))
 80 | 
 81 |     x = compose(
 82 |             DarknetConv2D_BN_Leaky(128, (1,1)),
 83 |             UpSampling2D(2))(x)
 84 |     x = Concatenate()([x,darknet.layers[92].output])
 85 |     x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))
 86 | 
 87 |     return Model(inputs, [y1,y2,y3])
 88 | 
 89 | 
 90 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
 91 |     """Convert final layer features to bounding box parameters."""
 92 |     num_anchors = len(anchors)
 93 |     # Reshape to batch, height, width, num_anchors, box_params.
 94 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
 95 | 
 96 |     grid_shape = K.shape(feats)[1:3] # height, width
 97 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
 98 |         [1, grid_shape[1], 1, 1])
 99 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
100 |         [grid_shape[0], 1, 1, 1])
101 |     grid = K.concatenate([grid_x, grid_y])
102 |     grid = K.cast(grid, K.dtype(feats))
103 | 
104 |     feats = K.reshape(
105 |         feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
106 | 
107 |     # Adjust preditions to each spatial grid point and anchor size.
108 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
109 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
110 |     box_confidence = K.sigmoid(feats[..., 4:5])
111 |     box_class_probs = K.sigmoid(feats[..., 5:])
112 | 
113 |     if calc_loss == True:
114 |         return grid, feats, box_xy, box_wh
115 |     return box_xy, box_wh, box_confidence, box_class_probs
116 | 
117 | 
118 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
119 |     '''Get corrected boxes'''
120 |     box_yx = box_xy[..., ::-1]
121 |     box_hw = box_wh[..., ::-1]
122 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
123 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
124 |     new_shape = K.round(image_shape * K.min(input_shape/image_shape))
125 |     offset = (input_shape-new_shape)/2./input_shape
126 |     scale = input_shape/new_shape
127 |     box_yx = (box_yx - offset) * scale
128 |     box_hw *= scale
129 | 
130 |     box_mins = box_yx - (box_hw / 2.)
131 |     box_maxes = box_yx + (box_hw / 2.)
132 |     boxes =  K.concatenate([
133 |         box_mins[..., 0:1],  # y_min
134 |         box_mins[..., 1:2],  # x_min
135 |         box_maxes[..., 0:1],  # y_max
136 |         box_maxes[..., 1:2]  # x_max
137 |     ])
138 | 
139 |     # Scale boxes back to original image shape.
140 |     boxes *= K.concatenate([image_shape, image_shape])
141 |     return boxes
142 | 
143 | 
144 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
145 |     '''Process Conv layer output'''
146 |     box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
147 |         anchors, num_classes, input_shape)
148 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
149 |     boxes = K.reshape(boxes, [-1, 4])
150 |     box_scores = box_confidence * box_class_probs
151 |     box_scores = K.reshape(box_scores, [-1, num_classes])
152 |     return boxes, box_scores
153 | 
154 | 
155 | def yolo_eval(yolo_outputs,
156 |               anchors,
157 |               num_classes,
158 |               image_shape,
159 |               max_boxes=20,
160 |               score_threshold=.6,
161 |               iou_threshold=.5):
162 |     """Evaluate YOLO model on given input and return filtered boxes."""
163 |     num_layers = len(yolo_outputs)
164 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
165 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
166 |     boxes = []
167 |     box_scores = []
168 |     for l in range(num_layers):
169 |         _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
170 |             anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
171 |         boxes.append(_boxes)
172 |         box_scores.append(_box_scores)
173 |     boxes = K.concatenate(boxes, axis=0)
174 |     box_scores = K.concatenate(box_scores, axis=0)
175 | 
176 |     mask = box_scores >= score_threshold
177 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
178 |     boxes_ = []
179 |     scores_ = []
180 |     classes_ = []
181 |     for c in range(num_classes):
182 |         # TODO: use keras backend instead of tf.
183 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
184 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
185 |         # nms_index = tf.image.non_max_suppression(
186 |         #     class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
187 |         # class_boxes = K.gather(class_boxes, nms_index)
188 |         # class_box_scores = K.gather(class_box_scores, nms_index)
189 |         classes = K.ones_like(class_box_scores, 'int32') * c
190 |         boxes_.append(class_boxes)
191 |         scores_.append(class_box_scores)
192 |         classes_.append(classes)
193 |     boxes_ = K.concatenate(boxes_, axis=0)
194 |     scores_ = K.concatenate(scores_, axis=0)
195 |     classes_ = K.concatenate(classes_, axis=0)
196 | 
197 |     return boxes_, scores_, classes_
198 | 
199 | 
200 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
201 |     '''Preprocess true boxes to training input format
202 | 
203 |     Parameters
204 |     ----------
205 |     true_boxes: array, shape=(m, T, 5)
206 |         Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
207 |     input_shape: array-like, hw, multiples of 32
208 |     anchors: array, shape=(N, 2), wh
209 |     num_classes: integer
210 | 
211 |     Returns
212 |     -------
213 |     y_true: list of array, shape like yolo_outputs, xywh are reletive value
214 | 
215 |     '''
216 |     assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
217 |     num_layers = len(anchors)//3 # default setting
218 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
219 | 
220 |     true_boxes = np.array(true_boxes, dtype='float32')
221 |     input_shape = np.array(input_shape, dtype='int32')
222 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
223 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
224 |     true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
225 |     true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
226 | 
227 |     m = true_boxes.shape[0]
228 |     grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
229 |     y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
230 |         dtype='float32') for l in range(num_layers)]
231 | 
232 |     # Expand dim to apply broadcasting.
233 |     anchors = np.expand_dims(anchors, 0)
234 |     anchor_maxes = anchors / 2.
235 |     anchor_mins = -anchor_maxes
236 |     valid_mask = boxes_wh[..., 0]>0
237 | 
238 |     for b in range(m):
239 |         # Discard zero rows.
240 |         wh = boxes_wh[b, valid_mask[b]]
241 |         if len(wh)==0: continue
242 |         # Expand dim to apply broadcasting.
243 |         wh = np.expand_dims(wh, -2)
244 |         box_maxes = wh / 2.
245 |         box_mins = -box_maxes
246 | 
247 |         intersect_mins = np.maximum(box_mins, anchor_mins)
248 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
249 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
250 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
251 |         box_area = wh[..., 0] * wh[..., 1]
252 |         anchor_area = anchors[..., 0] * anchors[..., 1]
253 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
254 | 
255 |         # Find best anchor for each true box
256 |         best_anchor = np.argmax(iou, axis=-1)
257 | 
258 |         for t, n in enumerate(best_anchor):
259 |             for l in range(num_layers):
260 |                 if n in anchor_mask[l]:
261 |                     i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
262 |                     j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
263 |                     k = anchor_mask[l].index(n)
264 |                     c = true_boxes[b,t, 4].astype('int32')
265 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
266 |                     y_true[l][b, j, i, k, 4] = 1
267 |                     y_true[l][b, j, i, k, 5+c] = 1
268 | 
269 |     return y_true
270 | 
271 | 
272 | def box_iou(b1, b2):
273 |     '''Return iou tensor
274 | 
275 |     Parameters
276 |     ----------
277 |     b1: tensor, shape=(i1,...,iN, 4), xywh
278 |     b2: tensor, shape=(j, 4), xywh
279 | 
280 |     Returns
281 |     -------
282 |     iou: tensor, shape=(i1,...,iN, j)
283 | 
284 |     '''
285 | 
286 |     # Expand dim to apply broadcasting.
287 |     b1 = K.expand_dims(b1, -2)
288 |     b1_xy = b1[..., :2]
289 |     b1_wh = b1[..., 2:4]
290 |     b1_wh_half = b1_wh/2.
291 |     b1_mins = b1_xy - b1_wh_half
292 |     b1_maxes = b1_xy + b1_wh_half
293 | 
294 |     # Expand dim to apply broadcasting.
295 |     b2 = K.expand_dims(b2, 0)
296 |     b2_xy = b2[..., :2]
297 |     b2_wh = b2[..., 2:4]
298 |     b2_wh_half = b2_wh/2.
299 |     b2_mins = b2_xy - b2_wh_half
300 |     b2_maxes = b2_xy + b2_wh_half
301 | 
302 |     intersect_mins = K.maximum(b1_mins, b2_mins)
303 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
304 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
305 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
306 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
307 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
308 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
309 | 
310 |     return iou
311 | 
312 | 
313 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
314 |     '''Return yolo_loss tensor
315 | 
316 |     Parameters
317 |     ----------
318 |     yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
319 |     y_true: list of array, the output of preprocess_true_boxes
320 |     anchors: array, shape=(N, 2), wh
321 |     num_classes: integer
322 |     ignore_thresh: float, the iou threshold whether to ignore object confidence loss
323 | 
324 |     Returns
325 |     -------
326 |     loss: tensor, shape=(1,)
327 | 
328 |     '''
329 |     num_layers = len(anchors)//3 # default setting
330 |     yolo_outputs = args[:num_layers]
331 |     y_true = args[num_layers:]
332 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
333 |     input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
334 |     grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
335 |     loss = 0
336 |     m = K.shape(yolo_outputs[0])[0] # batch size, tensor
337 |     mf = K.cast(m, K.dtype(yolo_outputs[0]))
338 | 
339 |     for l in range(num_layers):
340 |         object_mask = y_true[l][..., 4:5]
341 |         true_class_probs = y_true[l][..., 5:]
342 | 
343 |         grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
344 |              anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
345 |         pred_box = K.concatenate([pred_xy, pred_wh])
346 | 
347 |         # Darknet raw box to calculate loss.
348 |         raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
349 |         raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
350 |         raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
351 |         box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
352 | 
353 |         # Find ignore mask, iterate over each of batch.
354 |         ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
355 |         object_mask_bool = K.cast(object_mask, 'bool')
356 |         def loop_body(b, ignore_mask):
357 |             true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
358 |             iou = box_iou(pred_box[b], true_box)
359 |             best_iou = K.max(iou, axis=-1)
360 |             ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
361 |             return b+1, ignore_mask
362 |         _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
363 |         ignore_mask = ignore_mask.stack()
364 |         ignore_mask = K.expand_dims(ignore_mask, -1)
365 | 
366 |         # K.binary_crossentropy is helpful to avoid exp overflow.
367 |         xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
368 |         wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
369 |         confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
370 |             (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
371 |         # 标签平滑正则化
372 |         label_smoothing = 0.1
373 |         true_class_probs = true_class_probs * (1 - label_smoothing) + label_smoothing / num_classes
374 |         
375 |         class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)
376 | 
377 |         xy_loss = K.sum(xy_loss) / mf
378 |         wh_loss = K.sum(wh_loss) / mf
379 |         confidence_loss = K.sum(confidence_loss) / mf
380 |         class_loss = K.sum(class_loss) / mf
381 |         loss += xy_loss + wh_loss + confidence_loss + class_loss
382 |         if print_loss:
383 |             loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
384 |     return loss
385 | 


--------------------------------------------------------------------------------
/yolo3/utils.py:
--------------------------------------------------------------------------------
  1 | """Miscellaneous utility functions."""
  2 | 
  3 | from functools import reduce
  4 | import numpy as np
  5 | import cv2
  6 | import random
  7 | import albumentations as A
  8 | from albumentations import (
  9 |     HorizontalFlip,
 10 |     VerticalFlip,
 11 |     RandomCrop,
 12 |     Resize,
 13 |     Compose
 14 | )
 15 | 
 16 | 
 17 | def compose(*funcs):
 18 |     """Compose arbitrarily many functions, evaluated left to right.
 19 | 
 20 |     Reference: https://mathieularose.com/function-composition-in-python/
 21 |     """
 22 |     # return lambda x: reduce(lambda v, f: f(v), funcs, x)
 23 |     if funcs:
 24 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
 25 |     else:
 26 |         raise ValueError('Composition of empty sequence not supported.')
 27 | 
 28 | def rand(a=0, b=1):
 29 |     return np.random.rand()*(b-a) + a
 30 | 
 31 | def get_aug(aug, min_area=0., min_visibility=0.):
 32 |     return Compose(aug, bbox_params={'format': 'pascal_voc', 
 33 |                                      'min_area': min_area, 
 34 |                                      'min_visibility': min_visibility,
 35 |                                      'label_fields': ['category_id']})
 36 | 
 37 | def get_random_data(annotation_line, input_shape, max_boxes=100,trainable=True):
 38 |     # 读入图片，候选框和尺寸信息
 39 |     line = annotation_line.split()
 40 |     image = cv2.imread(line[0])
 41 |     ih, iw = image.shape[:2]
 42 |     h, w = input_shape
 43 |     box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 44 | 
 45 |     # 候选框和标签分离
 46 |     bboxes = box.tolist()
 47 |     category_id = []
 48 |     for n in range(len(bboxes)):
 49 |         category_id.append(bboxes[n][-1])
 50 |         del bboxes[n][-1]
 51 | 
 52 |     # 直接缩放或者随机裁剪来缩放图片，目的是保持图片原有的长宽比
 53 |     scale = min(w/iw, h/ih)
 54 |     nw = int(iw*scale)
 55 |     nh = int(ih*scale)
 56 | 
 57 |     if trainable:
 58 |         # 直接缩放
 59 |         if rand() < .7:
 60 |             annotations = {'image': image,
 61 |                            'bboxes':bboxes,
 62 |                            'category_id': category_id}
 63 |             aug = get_aug([Resize(p=1, height=nh, width=nw)])
 64 |             augmented = aug(**annotations)
 65 | 
 66 |             # 删除面积过小的候选框
 67 |             bboxes = augmented['bboxes']
 68 |             category_id = augmented['category_id']
 69 |             for n in range(len(bboxes)):
 70 |                 if n >= len(bboxes):
 71 |                     break
 72 | 
 73 |                 i = bboxes[n]
 74 |                 if (i[2]-i[0]) * (i[3]-i[1]) < 120:
 75 |                     del bboxes[n]
 76 |                     del category_id[n]
 77 |             if len(bboxes) == 0:
 78 |                 return None, None
 79 | 
 80 |             augmented['bboxes'] = bboxes
 81 |             augmented['category_id'] = category_id
 82 |         # 随机剪裁
 83 |         else:
 84 |             annotations = {'image': image,
 85 |                            'bboxes':bboxes,
 86 |                            'category_id': category_id}
 87 |             aug = get_aug([RandomCrop(p=1, height=nh, width=nw)], 
 88 |                            min_visibility=0.3)
 89 |             augmented = aug(**annotations)
 90 |             if len(augmented['bboxes']) == 0:
 91 |                 return None, None
 92 | 
 93 |         # 图像轻度增强
 94 |         if rand() < .5:
 95 |             aug = get_aug([ A.Compose([
 96 |                             A.RandomBrightness(p=1),
 97 |                             A.RandomContrast(p=1),
 98 |                             A.RandomGamma(p=1),
 99 |                             A.CLAHE(p=1),
100 |                         ], p=1)])
101 |             augmented = aug(**augmented)
102 | 
103 |         # 水平和垂直翻转
104 |         if rand() < .5:
105 |             aug = get_aug([VerticalFlip(p=1)])
106 |             augmented = aug(**augmented)
107 |         if rand() < .5:
108 |             aug = get_aug([HorizontalFlip(p=1)])
109 |             augmented = aug(**augmented)
110 |     else:
111 |         # 测试集直接缩放
112 |         annotations = {'image': image,
113 |                        'bboxes':bboxes,
114 |                        'category_id': category_id}
115 |         aug = get_aug([Resize(p=1, height=nh, width=nw)])
116 |         augmented = aug(**annotations)
117 | 
118 |     # 标签数组和候选框数组合并
119 |     image = augmented['image']
120 |     box = augmented['bboxes']
121 |     category_id = augmented['category_id']
122 |     for n in range(len(box)):
123 |         box[n] = list(map(lambda x : round(x), box[n]))
124 |         box[n].append(category_id[n])
125 |     
126 |     # 用灰色像素块来做背景扩充图片满足输入尺寸需求
127 |     dx = (w-nw) // 2
128 |     dy = (h-nh) // 2
129 |     image = np.pad(image, ((dy, dy), (dx, dx), (0, 0)),
130 |                    'constant', constant_values=128)
131 |     if tuple(image.shape[:2]) != input_shape:
132 |         image = np.pad(image, ((0, input_shape[0]-image.shape[0]), 
133 |                 (0, input_shape[1]-image.shape[1]), (0, 0)),
134 |                 'constant', constant_values=128)
135 | 
136 |     # 模型输入格式处理
137 |     image = image / 255.
138 |     box_data = np.zeros((max_boxes,5))
139 |     box = np.array([np.array(box[i]) for i in range(len(box))])
140 |     box[:, [0,2]] = box[:, [0,2]] + dx
141 |     box[:, [1,3]] = box[:, [1,3]] + dy
142 |     box_data[:len(box)] = box
143 | 
144 |     return image, box_data
145 | 


--------------------------------------------------------------------------------
/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .5
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .5
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .5
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 


--------------------------------------------------------------------------------