├── .gitattributes ├── .gitignore ├── .ipynb_checkpoints ├── data_process-checkpoint.ipynb └── data_vision-checkpoint.ipynb ├── LICENSE ├── README.md ├── __pycache__ ├── cosine_annealing.cpython-36.pyc ├── swa.cpython-36.pyc └── train.cpython-36.pyc ├── convert.py ├── cosine_annealing.py ├── data_augmention ├── .ipynb_checkpoints │ └── 未命名-checkpoint.ipynb ├── ColorCorrect.py ├── DepthMap_RTM.py ├── GuidedFilter.py ├── Saturation.py ├── Saturation_Max.py ├── __pycache__ │ ├── ColorCorrect.cpython-36.pyc │ ├── ColorCorrect.cpython-37.pyc │ ├── DepthMap_RTM.cpython-36.pyc │ ├── DepthMap_RTM.cpython-37.pyc │ ├── GuidedFilter.cpython-36.pyc │ ├── GuidedFilter.cpython-37.pyc │ ├── Saturation_Max.cpython-36.pyc │ ├── Saturation_Max.cpython-37.pyc │ ├── augment.cpython-36.pyc │ ├── depthMapEstimation.cpython-36.pyc │ ├── depthMapEstimation.cpython-37.pyc │ ├── depthMin.cpython-36.pyc │ ├── depthMin.cpython-37.pyc │ ├── getGBTransmission.cpython-36.pyc │ ├── getGBTransmission.cpython-37.pyc │ ├── getRefinedTransmission.cpython-36.pyc │ ├── getRefinedTransmission.cpython-37.pyc │ ├── getTransmissionMap.cpython-36.pyc │ ├── getTransmissionMap.cpython-37.pyc │ ├── global_histogram_stretching.cpython-36.pyc │ ├── global_histogram_stretching.cpython-37.pyc │ ├── sceneRadiance.cpython-36.pyc │ └── sceneRadiance.cpython-37.pyc ├── augment.py ├── depthMapEstimation.py ├── depthMin.py ├── getGBTransmission.py ├── getRefinedTransmission.py ├── getTransmissionMap.py ├── global_histogram_stretching.py └── sceneRadiance.py ├── data_process.ipynb ├── data_process.py ├── data_vision.ipynb ├── image_aug.py ├── kmeans.py ├── predict.py ├── swa.py ├── train.py ├── yolo3 ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── model.cpython-36.pyc │ └── utils.cpython-36.pyc ├── model.py └── utils.py └── yolov3.cfg /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-language=python 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | *pre_train/ 3 | *.h5 4 | *models/ 5 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/data_process-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from bs4 import BeautifulSoup\n", 10 | "import os\n", 11 | "import cv2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# 导入数据" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "class_label = ['holothurian', 'echinus', 'scallop', 'starfish', 'waterweeds']\n", 28 | "file_path = 'data/train/box/'\n", 29 | "image_path = 'data/train/augment/'\n", 30 | "all_name = os.listdir(file_path)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# 数据处理" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# train_result用于后续yolo训练,k_mean_result用于计算anchor boxes\n", 47 | "train_result = []\n", 48 | "k_mean_result = []\n", 49 | "box_size = {}\n", 50 | "label_number = {}\n", 51 | "del_number = 0\n", 52 | "\n", 53 | "# 遍历图片\n", 54 | "for i in all_name:\n", 55 | " image_name = image_path+i.rstrip('xml')+'jpg'\n", 56 | " img_name_append = False\n", 57 | " soup = BeautifulSoup(open(file_path+i), 'lxml')\n", 58 | " bbx = soup.find_all('object')\n", 59 | " img = cv2.imread(image_name)\n", 60 | " height, width, _ = img.shape\n", 61 | " \n", 62 | " # 对各尺寸数量进行统计\n", 63 | " height, width, _ = img.shape\n", 64 | " if box_size.get((height, width), False):\n", 65 | " box_size[(height, width)] += 1\n", 66 | " else:\n", 67 | " box_size[(height, width)] = 1\n", 68 | " \n", 69 | " # 遍历候选框\n", 70 | " for j in bbx:\n", 71 | " # 同时对范围进行修正,像素起点改为0\n", 72 | " name = str(j.contents[1].string)\n", 73 | " xmin = int(j.xmin.string) - 1\n", 74 | " ymin = int(j.ymin.string) - 1\n", 75 | " xmax = int(j.xmax.string) - 1\n", 76 | " ymax = int(j.ymax.string) - 1\n", 77 | " index = str(class_label.index(name))\n", 78 | "\n", 79 | " # 防止大小混淆\n", 80 | " if xmin > xmax:\n", 81 | " xmax, xmin = xmin, xmax\n", 82 | " if ymin > ymax:\n", 83 | " ymax, ymin = ymin, ymax\n", 84 | " \n", 85 | " # 判断ground truth是否超出图像范围\n", 86 | " if xmin > width - 1 or xmax < 0:\n", 87 | " continue\n", 88 | " if ymin > height - 1 or ymax < 0:\n", 89 | " continue\n", 90 | "\n", 91 | " # 处于图像边缘候选框的处理\n", 92 | " if xmin < 0:\n", 93 | " xmin = 0\n", 94 | " if ymin < 0:\n", 95 | " ymin = 0\n", 96 | " if xmax > width - 1:\n", 97 | " xmax = width - 1\n", 98 | " if ymax > height - 1:\n", 99 | " ymax = height - 1\n", 100 | " \n", 101 | " # 判断是否为无效框\n", 102 | " x_distance = xmax - xmin\n", 103 | " y_distance = ymax - ymin\n", 104 | " if x_distance == 0 or y_distance == 0:\n", 105 | " continue\n", 106 | " \n", 107 | " # 对于过小的图片进行删除\n", 108 | " square = (xmax - xmin) * (ymax - ymin)\n", 109 | " if square < 120:\n", 110 | " del_number += 1\n", 111 | " continue\n", 112 | " \n", 113 | " # 防止有图片没有任何候选框\n", 114 | " if img_name_append == False:\n", 115 | " train_result.append(image_name)\n", 116 | " img_name_append = True\n", 117 | "\n", 118 | " # 判断是否增加kmean缩放候选框\n", 119 | " scale = min(448/width, 448/height)\n", 120 | " square = ((xmax - xmin)*scale) * ((ymax - ymin)*scale)\n", 121 | " if square < 120:\n", 122 | " kmean_scale = False\n", 123 | " else:\n", 124 | " kmean_scale = True\n", 125 | " \n", 126 | " xmin, xmax, ymin, ymax = str(xmin), str(xmax), str(ymin), str(ymax)\n", 127 | " train_result.append(' ')\n", 128 | " for x in [xmin, ymin, xmax, ymax]:\n", 129 | " train_result.append(x)\n", 130 | " train_result.append(',')\n", 131 | " if x is ymax:\n", 132 | " train_result.append(index)\n", 133 | " \n", 134 | " # kmean添加缩放候选框\n", 135 | " if kmean_scale == True:\n", 136 | " for x in [xmin, ymin, xmax, ymax]:\n", 137 | " k_mean_result.append(int(round(int(x)*scale)))\n", 138 | " if x is not ymax:\n", 139 | " k_mean_result.append(',')\n", 140 | " else:\n", 141 | " k_mean_result.append('\\n')\n", 142 | " \n", 143 | " # 对各label数量统计\n", 144 | " if label_number.get(name, False):\n", 145 | " label_number[name] += 1\n", 146 | " else:\n", 147 | " label_number[name] = 1\n", 148 | " if img_name_append == True:\n", 149 | " train_result.append('\\n')" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "# 查看统计结果" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 4, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "10" 168 | ] 169 | }, 170 | "execution_count": 4, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "del_number" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 5, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "{'echinus': 22098,\n", 188 | " 'scallop': 6694,\n", 189 | " 'starfish': 6827,\n", 190 | " 'holothurian': 5503,\n", 191 | " 'waterweeds': 82}" 192 | ] 193 | }, 194 | "execution_count": 5, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "label_number" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 6, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "{(405, 720): 3066,\n", 212 | " (2160, 3840): 1644,\n", 213 | " (1080, 1920): 595,\n", 214 | " (576, 704): 38,\n", 215 | " (480, 586): 44}" 216 | ] 217 | }, 218 | "execution_count": 6, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "box_size" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "# 保存" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 7, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "with open('data/train_data.txt', 'w') as f:\n", 241 | " f.writelines(train_result)\n", 242 | "with open('data/k_mean_data.txt', 'w') as f:\n", 243 | " f.writelines(k_mean_result)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | } 253 | ], 254 | "metadata": { 255 | "kernelspec": { 256 | "display_name": "Python 3", 257 | "language": "python", 258 | "name": "python3" 259 | }, 260 | "language_info": { 261 | "codemirror_mode": { 262 | "name": "ipython", 263 | "version": 3 264 | }, 265 | "file_extension": ".py", 266 | "mimetype": "text/x-python", 267 | "name": "python", 268 | "nbconvert_exporter": "python", 269 | "pygments_lexer": "ipython3", 270 | "version": "3.6.9" 271 | } 272 | }, 273 | "nbformat": 4, 274 | "nbformat_minor": 2 275 | } 276 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Fieldhunter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest 2 | 3 | [![](https://img.shields.io/badge/license-MIT-green)](https://github.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/blob/master/LICENSE) 4 | [![](https://img.shields.io/badge/author-Fieldhunter-blue)](https://github.com/Fieldhunter) 5 | ![](https://img.shields.io/badge/frame-keras-yellow) 6 | 7 | 2020湛江水下目标检测算法赛 8 | 9 | ## 前言 10 | 此次比赛分数不是很理想,A榜20.14,B榜19.63。我认为的原因应该有很多,比如说模型,水下图像增强算法,trick, 11 | 又或者是评价指标比较苛刻(赛事采用的是COCO mAP[@0.5:0.05:0.95] 指标)等等,具体的原因会在之后的分部分详细解释。当然,这也是我第一次参加目标检测的比赛, 12 | 所以过程中难免会有疏漏。而因为此次比赛,让我对keras非常失望, 13 | 具体原因可见我的这一个blog:[Choice keras or pytorch?](https://fieldhunter.github.io/talking_about_keras_and_pytorch/)。虽然说分数不理想, 14 | 但也是好歹做了出来,自己也有自己的idea在里面,只不过应该是中途哪里出了问题,我没发现罢了(但我真的看了源码很久觉得应该没问题), 15 | 可能换了一些策略或者单换个PyTorch实现的模型之后分数就会大幅上涨,毕竟大佬提供的PyTorch的baseline能达到挺高的分数。但肯定的是自己的idea是挺有效的, 16 | **从裸模型+图像增强的10分提高到了20分**。(所以我就怀疑是yolo的问题)所以在此将它放入我的github中,对今后参加类似的比赛或者项目肯定有借鉴意义。 17 | (租GPU就花了500块,真的吐血) 18 | 19 | ## 说明 20 | 没有上传的文件夹有data、models和pre_train,即文件太大,不宜上传。原data目录当中,test目录下是测试集,包括图像增强后的图像,trian目录下是训练集, 21 | 也包括图像增强后的图像。data目录中还有yolo模型所需的classes.txt、yolo_anchors.txt和train_data.txt。在pre_train目录当中,有yolo的预训练模型yolov3.weight, 22 | 以及用convert.py转换的yolo_weights.h5。在models目录中,存储训练结束的模型和Tensorboard记录。 23 | 24 | ## 图像增强 25 | 首先,对图像采取增强算法增强(image_aug.py)后进行存储。对于水下图像识别这个领域,我也是参加这个比赛才接触的,同样是第一次参加目标检测的比赛, 26 | 所以着重的去对模型算法做更改而不是针对数据,所以图像增强的算法只是照搬了赛事群里的一个大佬的开源,可视化出来后看着效果是挺不错的, 27 | 但具体怎么样我也不敢下定论,所以可能水下图像增强算法的效果不是很好,导致分数不是很高。具体的算法在data_augmention目录里。为了满足后续的数据增强需求, 28 | **对训练数据进行原图尺寸增强**,但会使得处理速度很慢。为了解决这个问题,尝试采用gpu版的numpy:cupy来调用gpu加速处理, 29 | 但增强算法中的有些numpy函数cupy尚未支持,所以只能用numpy,之后采用了10个进程的多进程方式来尽量的加速处理。**对于测试集,直接缩放至目标尺寸**来增强, 30 | 这样处理速度就会非常快,避免大尺寸图片耗时的问题,之后再进行输入处理,以满足模型输入需求。具体的处理方式在[数据增强](#数据增强)部分介绍。 31 | 32 | 另外,在使用大佬的开源增强算法之前,使用过两个UnderWaterGAN,但是他们的模型输出都是256\*256像素,整张图片和目标都扭曲的不成样子,效果还不是特别的好,其中一个还非常的差。所以就放弃使用对抗生成网络。(可能图像的尺寸和GAN网络本身训练的不符,强行将几千的像素压缩到256,因为看那些README效果都很不错) 33 | 34 | ## 数据预处理 35 | data_process.py中,对所有图像进行统计,对每一个候选框,进行细致的处理,代码中给出了详细的注释,这里不再阐述。 36 | 候选框处理时**删除那些面积小于120的过小候选框**。之后再靠kmeans.py来生成yolo所需的anchors,最后得到data目录中的train_data.txt和yolo_anchors.txt。 37 | 38 | 另外,赛事的目标种类只有4种,但数据集里有5种,多了一个水草。观察数据集后发现,水草和海胆在某些情况下十分相似,为了使模型增强对水草的区分能力, 39 | **在训练时用5个类别去训练,在预测的时候把水草类丢弃即可**。同时对所有的5k多张训练集人眼过了一遍,**删去了那些过分模糊的图片**,约为300张,以提高数据集质量。 40 | 41 | ## 数据增强 42 | 数据增强代码在yolo3目录的utils.py中。对于模型的输入尺寸,**采用480*480**,原yolo尺寸是416\*416。经过实验验证,**从416到448再到480,分数都是稳步提高的**。 43 | 由于时间和成本的关系,并没有尝试512\*512尺寸,这或许会进一步提高。针对目标检测的task,并不希望把任意尺寸的图片强行resize,策略是选出高与宽较长的一边, 44 | 计算这条边缩放到480的比例,然后按照这个比例对原图进行**等比例缩放**。等比缩放后,长的一边是480,短的一边则不是。 45 | 之后在短边上**用灰色像素来填充**以满足480\*480尺寸。统计过数据集的尺寸,都是长大于高,所以短边都是高这条边。灰色像素数量在缩放后的原图上下两侧均分, 46 | 这样使得处理后的图像**正中间是缩放后的训练图像,上下两侧是灰色像素**。[图像增强](#图像增强)部分的测试集处理也是如此。ps:此处尝试用base64来导入图片演示, 47 | 但根本加载不出来,不知道为什么,只能这样靠着文字和代码来理解一下。 48 | 49 | 具体步骤:对于训练数据,首先按照7:3的可能性进行**直接缩放或者随机裁剪**。直接缩放过程中删除缩放后面积小于120的过小候选框, 50 | 随机裁剪出来的尺寸也与按照原图直接缩放策略缩放后的尺寸相同。之后都是按照1:1的可能性**按顺序进行二次的轻度图像增强,水平翻转和垂直翻转**。对于测试集, 51 | 直接进行缩放即可。之后按照上述所说的策略填充灰色像素。最后进行模型输入格式的处理,**像素归一化和候选框处理**。同样,代码中有详细的注释。 52 | 53 | ## 模型训练 54 | 在train.py中,输入尺寸为480*480,训练集和测试集按照9:1划分。训练过程分为两个阶段来训练,第一阶段**冻结预训练所有层**,采用RAdam(最小值设为1e-5), 55 | warm_up策略,batch_size设为32,训练100轮,同时做Tensorboard记录。在第二阶段**打开全部网络层来训练**,采用RAdam(最小值设为1e-6),warm_up策略, 56 | **swa算法,cosine-annealing学习率策略(范围1e-2到1e-6),batch_size设为8(主要原因是显存限制)**,训练200轮,同时做Tensorboard记录, 57 | 通过ModelCheckpoint策略对每一轮按照val_loss来决定是否保存模型,最终选用val_loss最小的模型来做预测。即将最好的模型更名为trained_weights_final.h5。 58 | 59 | 在模型设计上(yolo3目录中的model.py),对class_loss进行修改,采用**label_smoothing策略**。由于在预测的可视化结果中看到候选框的及分类的结果都挺不错, 60 | 但是一些很明显且已经识别出来的目标的置顶度却很低,一些显然不是目标的置信度相对的有点高,所以想对confidence_loss对更改, 61 | 即给它整体或者正样本部分(confidence_loss分为正样本和负样本两部分)乘上一个值,让模型更注重置信度的损失,简单粗暴的想法。但结果并不是很好, 62 | 在训练过程中模型的loss也相对于之前高了几十个点并且下不去,最终的分数也比之前下降了一些。加上COCO mAP[@0.5:0.05:0.95] 这个评价指标候选框越多越好, 63 | 所以最终放弃了对confidence_loss的修改。 64 | 65 | 对于其他模型,比如Faster-RCNN,我也有考虑去用过。并且听赛事群里说比赛基本没人用单阶段模型,想想也是,二阶段模型肯定准确率会高一些。但是......算了, 66 | 不想吐槽keras了,至于为什么没有用除了yolo以外的模型,还是看看[前言](#前言)里我提到的我写的blog吧。 67 | 68 | ## 预测 69 | 在predict.py中,对预测出来的候选框进行**WBF**,相对于之前使用的NMS,WBF策略**提升了不少分数**。之后就是一系列的后处理操作,代码中的注释很详细, 70 | 这里不再阐述。另外,**赛事数据集的候选框坐标起点是1,在训练前已经更改为0,所以在后处理中需要再次更改为1**。 71 | 72 | 在参数上,**score阈值设为0.001,iou阈值(包括模型阈值和WBF阈值)设为0.25**能得到最高的分数。 73 | 74 | ## ps 75 | **数据预处理和数据可视化**都有Jupyter版本。分别进行了**训练集的统计信息**工作和**各阶段数据(训练集,数据增强,预测结果)可视化**工作。同样也有详细的注释。 76 | 77 | 最终训练出来最好的模型和Tensorboard记录我会存放在releases当中。 78 | 79 | yolov3部分的代码是基于[qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3)进行更改的。 80 | -------------------------------------------------------------------------------- /__pycache__/cosine_annealing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/__pycache__/cosine_annealing.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/swa.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/__pycache__/swa.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/train.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/__pycache__/train.cpython-36.pyc -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Reads Darknet config and weights and creates Keras model with TF backend. 4 | 5 | """ 6 | 7 | import argparse 8 | import configparser 9 | import io 10 | import os 11 | from collections import defaultdict 12 | 13 | import numpy as np 14 | from keras import backend as K 15 | from keras.layers import (Conv2D, Input, ZeroPadding2D, Add, 16 | UpSampling2D, MaxPooling2D, Concatenate) 17 | from keras.layers.advanced_activations import LeakyReLU 18 | from keras.layers.normalization import BatchNormalization 19 | from keras.models import Model 20 | from keras.regularizers import l2 21 | from keras.utils.vis_utils import plot_model as plot 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Darknet To Keras Converter.') 25 | parser.add_argument('config_path', help='Path to Darknet cfg file.') 26 | parser.add_argument('weights_path', help='Path to Darknet weights file.') 27 | parser.add_argument('output_path', help='Path to output Keras model file.') 28 | parser.add_argument( 29 | '-p', 30 | '--plot_model', 31 | help='Plot generated Keras model and save as image.', 32 | action='store_true') 33 | parser.add_argument( 34 | '-w', 35 | '--weights_only', 36 | help='Save as Keras weights file instead of model file.', 37 | action='store_true') 38 | 39 | def unique_config_sections(config_file): 40 | """Convert all config sections to have unique names. 41 | 42 | Adds unique suffixes to config sections for compability with configparser. 43 | """ 44 | section_counters = defaultdict(int) 45 | output_stream = io.StringIO() 46 | with open(config_file) as fin: 47 | for line in fin: 48 | if line.startswith('['): 49 | section = line.strip().strip('[]') 50 | _section = section + '_' + str(section_counters[section]) 51 | section_counters[section] += 1 52 | line = line.replace(section, _section) 53 | output_stream.write(line) 54 | output_stream.seek(0) 55 | return output_stream 56 | 57 | # %% 58 | def _main(args): 59 | config_path = os.path.expanduser(args.config_path) 60 | weights_path = os.path.expanduser(args.weights_path) 61 | assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( 62 | config_path) 63 | assert weights_path.endswith( 64 | '.weights'), '{} is not a .weights file'.format(weights_path) 65 | 66 | output_path = os.path.expanduser(args.output_path) 67 | assert output_path.endswith( 68 | '.h5'), 'output path {} is not a .h5 file'.format(output_path) 69 | output_root = os.path.splitext(output_path)[0] 70 | 71 | # Load weights and config. 72 | print('Loading weights.') 73 | weights_file = open(weights_path, 'rb') 74 | major, minor, revision = np.ndarray( 75 | shape=(3, ), dtype='int32', buffer=weights_file.read(12)) 76 | if (major*10+minor)>=2 and major<1000 and minor<1000: 77 | seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) 78 | else: 79 | seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) 80 | print('Weights Header: ', major, minor, revision, seen) 81 | 82 | print('Parsing Darknet config.') 83 | unique_config_file = unique_config_sections(config_path) 84 | cfg_parser = configparser.ConfigParser() 85 | cfg_parser.read_file(unique_config_file) 86 | 87 | print('Creating Keras model.') 88 | input_layer = Input(shape=(None, None, 3)) 89 | prev_layer = input_layer 90 | all_layers = [] 91 | 92 | weight_decay = float(cfg_parser['net_0']['decay'] 93 | ) if 'net_0' in cfg_parser.sections() else 5e-4 94 | count = 0 95 | out_index = [] 96 | for section in cfg_parser.sections(): 97 | print('Parsing section {}'.format(section)) 98 | if section.startswith('convolutional'): 99 | filters = int(cfg_parser[section]['filters']) 100 | size = int(cfg_parser[section]['size']) 101 | stride = int(cfg_parser[section]['stride']) 102 | pad = int(cfg_parser[section]['pad']) 103 | activation = cfg_parser[section]['activation'] 104 | batch_normalize = 'batch_normalize' in cfg_parser[section] 105 | 106 | padding = 'same' if pad == 1 and stride == 1 else 'valid' 107 | 108 | # Setting weights. 109 | # Darknet serializes convolutional weights as: 110 | # [bias/beta, [gamma, mean, variance], conv_weights] 111 | prev_layer_shape = K.int_shape(prev_layer) 112 | 113 | weights_shape = (size, size, prev_layer_shape[-1], filters) 114 | darknet_w_shape = (filters, weights_shape[2], size, size) 115 | weights_size = np.product(weights_shape) 116 | 117 | print('conv2d', 'bn' 118 | if batch_normalize else ' ', activation, weights_shape) 119 | 120 | conv_bias = np.ndarray( 121 | shape=(filters, ), 122 | dtype='float32', 123 | buffer=weights_file.read(filters * 4)) 124 | count += filters 125 | 126 | if batch_normalize: 127 | bn_weights = np.ndarray( 128 | shape=(3, filters), 129 | dtype='float32', 130 | buffer=weights_file.read(filters * 12)) 131 | count += 3 * filters 132 | 133 | bn_weight_list = [ 134 | bn_weights[0], # scale gamma 135 | conv_bias, # shift beta 136 | bn_weights[1], # running mean 137 | bn_weights[2] # running var 138 | ] 139 | 140 | conv_weights = np.ndarray( 141 | shape=darknet_w_shape, 142 | dtype='float32', 143 | buffer=weights_file.read(weights_size * 4)) 144 | count += weights_size 145 | 146 | # DarkNet conv_weights are serialized Caffe-style: 147 | # (out_dim, in_dim, height, width) 148 | # We would like to set these to Tensorflow order: 149 | # (height, width, in_dim, out_dim) 150 | conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) 151 | conv_weights = [conv_weights] if batch_normalize else [ 152 | conv_weights, conv_bias 153 | ] 154 | 155 | # Handle activation. 156 | act_fn = None 157 | if activation == 'leaky': 158 | pass # Add advanced activation later. 159 | elif activation != 'linear': 160 | raise ValueError( 161 | 'Unknown activation function `{}` in section {}'.format( 162 | activation, section)) 163 | 164 | # Create Conv2D layer 165 | if stride>1: 166 | # Darknet uses left and top padding instead of 'same' mode 167 | prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer) 168 | conv_layer = (Conv2D( 169 | filters, (size, size), 170 | strides=(stride, stride), 171 | kernel_regularizer=l2(weight_decay), 172 | use_bias=not batch_normalize, 173 | weights=conv_weights, 174 | activation=act_fn, 175 | padding=padding))(prev_layer) 176 | 177 | if batch_normalize: 178 | conv_layer = (BatchNormalization( 179 | weights=bn_weight_list))(conv_layer) 180 | prev_layer = conv_layer 181 | 182 | if activation == 'linear': 183 | all_layers.append(prev_layer) 184 | elif activation == 'leaky': 185 | act_layer = LeakyReLU(alpha=0.1)(prev_layer) 186 | prev_layer = act_layer 187 | all_layers.append(act_layer) 188 | 189 | elif section.startswith('route'): 190 | ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] 191 | layers = [all_layers[i] for i in ids] 192 | if len(layers) > 1: 193 | print('Concatenating route layers:', layers) 194 | concatenate_layer = Concatenate()(layers) 195 | all_layers.append(concatenate_layer) 196 | prev_layer = concatenate_layer 197 | else: 198 | skip_layer = layers[0] # only one layer to route 199 | all_layers.append(skip_layer) 200 | prev_layer = skip_layer 201 | 202 | elif section.startswith('maxpool'): 203 | size = int(cfg_parser[section]['size']) 204 | stride = int(cfg_parser[section]['stride']) 205 | all_layers.append( 206 | MaxPooling2D( 207 | pool_size=(size, size), 208 | strides=(stride, stride), 209 | padding='same')(prev_layer)) 210 | prev_layer = all_layers[-1] 211 | 212 | elif section.startswith('shortcut'): 213 | index = int(cfg_parser[section]['from']) 214 | activation = cfg_parser[section]['activation'] 215 | assert activation == 'linear', 'Only linear activation supported.' 216 | all_layers.append(Add()([all_layers[index], prev_layer])) 217 | prev_layer = all_layers[-1] 218 | 219 | elif section.startswith('upsample'): 220 | stride = int(cfg_parser[section]['stride']) 221 | assert stride == 2, 'Only stride=2 supported.' 222 | all_layers.append(UpSampling2D(stride)(prev_layer)) 223 | prev_layer = all_layers[-1] 224 | 225 | elif section.startswith('yolo'): 226 | out_index.append(len(all_layers)-1) 227 | all_layers.append(None) 228 | prev_layer = all_layers[-1] 229 | 230 | elif section.startswith('net'): 231 | pass 232 | 233 | else: 234 | raise ValueError( 235 | 'Unsupported section header type: {}'.format(section)) 236 | 237 | # Create and save model. 238 | if len(out_index)==0: out_index.append(len(all_layers)-1) 239 | model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) 240 | print(model.summary()) 241 | if args.weights_only: 242 | model.save_weights('{}'.format(output_path)) 243 | print('Saved Keras weights to {}'.format(output_path)) 244 | else: 245 | model.save('{}'.format(output_path)) 246 | print('Saved Keras model to {}'.format(output_path)) 247 | 248 | # Check to see if all weights have been read. 249 | remaining_weights = len(weights_file.read()) / 4 250 | weights_file.close() 251 | print('Read {} of {} from Darknet weights.'.format(count, count + 252 | remaining_weights)) 253 | if remaining_weights > 0: 254 | print('Warning: {} unused weights'.format(remaining_weights)) 255 | 256 | if args.plot_model: 257 | plot(model, to_file='{}.png'.format(output_root), show_shapes=True) 258 | print('Saved model plot to {}.png'.format(output_root)) 259 | 260 | 261 | if __name__ == '__main__': 262 | _main(parser.parse_args()) 263 | -------------------------------------------------------------------------------- /cosine_annealing.py: -------------------------------------------------------------------------------- 1 | import math 2 | from keras.callbacks import Callback 3 | from keras import backend as K 4 | 5 | 6 | class CosineAnnealingScheduler(Callback): 7 | """Cosine annealing scheduler. 8 | """ 9 | 10 | def __init__(self, init_epoch, T_max, eta_max, eta_min=0, verbose=0): 11 | super(CosineAnnealingScheduler, self).__init__() 12 | self.init_epoch = init_epoch 13 | self.T_max = T_max 14 | self.eta_max = eta_max 15 | self.eta_min = eta_min 16 | self.verbose = verbose 17 | 18 | def on_epoch_begin(self, epoch, logs=None): 19 | if not hasattr(self.model.optimizer, 'lr'): 20 | raise ValueError('Optimizer must have a "lr" attribute.') 21 | lr = self.eta_min + (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * (epoch - self.init_epoch) / self.T_max)) / 2 22 | K.set_value(self.model.optimizer.lr, lr) 23 | if self.verbose > 0: 24 | print('\nEpoch %05d: CosineAnnealingScheduler setting learning ' 25 | 'rate to %s.' % (epoch + 1, lr)) 26 | 27 | def on_epoch_end(self, epoch, logs=None): 28 | logs = logs or {} 29 | logs['lr'] = K.get_value(self.model.optimizer.lr) 30 | -------------------------------------------------------------------------------- /data_augmention/.ipynb_checkpoints/未命名-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /data_augmention/ColorCorrect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import datetime 4 | import cv2 5 | import natsort 6 | 7 | np.seterr(over='ignore') 8 | if __name__ == '__main__': 9 | pass 10 | 11 | def color_correction(r,u_r,u_ref,L2): 12 | L1 = np.max(r) 13 | gainFactor = L1 * (u_r/ u_ref) +L2 14 | Out = r / gainFactor 15 | return Out 16 | 17 | def OptimalParameter(sceneRadiance): 18 | img = np.float64(sceneRadiance / 255) 19 | b, g, r = cv2.split(img) 20 | 21 | u_r = np.sum(r) 22 | u_g = np.sum(g) 23 | u_b = np.sum(b) 24 | u_ref = (u_r ** 2 + u_g ** 2 + u_b ** 2) ** 0.5 25 | L2 = 0.25 26 | r = color_correction(r, u_r, u_ref, L2) 27 | g = color_correction(g, u_g, u_ref, L2) 28 | b = color_correction(b, u_b, u_ref, L2) 29 | 30 | sceneRadiance = np.zeros((img.shape), 'float64') 31 | sceneRadiance[:, :, 0] = b 32 | sceneRadiance[:, :, 1] = g 33 | sceneRadiance[:, :, 2] = r 34 | sceneRadiance = sceneRadiance * 255 35 | sceneRadiance = np.clip(sceneRadiance,0, 255) 36 | sceneRadiance = np.uint8(sceneRadiance) 37 | return sceneRadiance 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /data_augmention/DepthMap_RTM.py: -------------------------------------------------------------------------------- 1 | from depthMapEstimation import depthMap 2 | from depthMin import minDepth 3 | 4 | 5 | def Depth_TM(img, AtomsphericLight): 6 | 7 | DepthMap = depthMap(img) 8 | t0, t1 = 0.05, 0.95 9 | DepthMap = DepthMap.clip(t0, t1) 10 | d_0 = minDepth(img, AtomsphericLight) 11 | 12 | d_f = 8 * (DepthMap + d_0) 13 | TM_R_modified = 0.85 ** d_f 14 | return TM_R_modified -------------------------------------------------------------------------------- /data_augmention/GuidedFilter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import cv2 4 | 5 | class GuidedFilter: 6 | 7 | # def __init__(self, I, radius=5, epsilon=0.4): 8 | def __init__(self, I, radius, epsilon): 9 | 10 | self._radius = 2 * radius + 1 11 | self._epsilon = epsilon 12 | self._I = self._toFloatImg(I) 13 | self._initFilter() 14 | 15 | # print('radius',self._radius) 16 | # print('epsilon',self._epsilon) 17 | 18 | def _toFloatImg(self, img): 19 | if img.dtype == np.float32: 20 | return img 21 | return ( 1.0 / 255.0 ) * np.float32(img) 22 | 23 | def _initFilter(self): 24 | I = self._I 25 | r = self._radius 26 | eps = self._epsilon 27 | 28 | Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2] 29 | 30 | # self._Ir_mean = cv2.blur(Ir, (r, r)) 31 | # self._Ig_mean = cv2.blur(Ig, (r, r)) 32 | # self._Ib_mean = cv2.blur(Ib, (r, r)) 33 | # 34 | # Irr_var = cv2.blur(Ir ** 2, (r, r)) - self._Ir_mean ** 2 + eps 35 | # Irg_var = cv2.blur(Ir * Ig, (r, r)) - self._Ir_mean * self._Ig_mean 36 | # Irb_var = cv2.blur(Ir * Ib, (r, r)) - self._Ir_mean * self._Ib_mean 37 | # Igg_var = cv2.blur(Ig * Ig, (r, r)) - self._Ig_mean * self._Ig_mean + eps 38 | # Igb_var = cv2.blur(Ig * Ib, (r, r)) - self._Ig_mean * self._Ib_mean 39 | # Ibb_var = cv2.blur(Ib * Ib, (r, r)) - self._Ib_mean * self._Ib_mean + eps 40 | 41 | 42 | self._Ir_mean = cv2.blur(Ir, (r, r)) 43 | self._Ig_mean = cv2.blur(Ig, (r, r)) 44 | self._Ib_mean = cv2.blur(Ib, (r, r)) 45 | 46 | Irr_var = cv2.blur(Ir ** 2, (r, r)) - self._Ir_mean ** 2 + eps 47 | Irg_var = cv2.blur(Ir * Ig, (r, r)) - self._Ir_mean * self._Ig_mean 48 | Irb_var = cv2.blur(Ir * Ib, (r, r)) - self._Ir_mean * self._Ib_mean 49 | Igg_var = cv2.blur(Ig * Ig, (r, r)) - self._Ig_mean * self._Ig_mean + eps 50 | Igb_var = cv2.blur(Ig * Ib, (r, r)) - self._Ig_mean * self._Ib_mean 51 | Ibb_var = cv2.blur(Ib * Ib, (r, r)) - self._Ib_mean * self._Ib_mean + eps 52 | 53 | 54 | Irr_inv = Igg_var * Ibb_var - Igb_var * Igb_var 55 | Irg_inv = Igb_var * Irb_var - Irg_var * Ibb_var 56 | Irb_inv = Irg_var * Igb_var - Igg_var * Irb_var 57 | Igg_inv = Irr_var * Ibb_var - Irb_var * Irb_var 58 | Igb_inv = Irb_var * Irg_var - Irr_var * Igb_var 59 | Ibb_inv = Irr_var * Igg_var - Irg_var * Irg_var 60 | 61 | I_cov = Irr_inv * Irr_var + Irg_inv * Irg_var + Irb_inv * Irb_var 62 | Irr_inv /= I_cov 63 | Irg_inv /= I_cov 64 | Irb_inv /= I_cov 65 | Igg_inv /= I_cov 66 | Igb_inv /= I_cov 67 | Ibb_inv /= I_cov 68 | 69 | self._Irr_inv = Irr_inv 70 | self._Irg_inv = Irg_inv 71 | self._Irb_inv = Irb_inv 72 | self._Igg_inv = Igg_inv 73 | self._Igb_inv = Igb_inv 74 | self._Ibb_inv = Ibb_inv 75 | 76 | def _computeCoefficients(self, p): 77 | r = self._radius 78 | I = self._I 79 | Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2] 80 | 81 | 82 | p_mean = cv2.blur(p, (r, r)) 83 | Ipr_mean = cv2.blur(Ir * p, (r, r)) 84 | Ipg_mean = cv2.blur(Ig * p, (r, r)) 85 | Ipb_mean = cv2.blur(Ib * p, (r, r)) 86 | 87 | 88 | 89 | Ipr_cov = Ipr_mean - self._Ir_mean * p_mean 90 | Ipg_cov = Ipg_mean - self._Ig_mean * p_mean 91 | Ipb_cov = Ipb_mean - self._Ib_mean * p_mean 92 | 93 | ar = self._Irr_inv * Ipr_cov + self._Irg_inv * Ipg_cov + self._Irb_inv * Ipb_cov 94 | ag = self._Irg_inv * Ipr_cov + self._Igg_inv * Ipg_cov + self._Igb_inv * Ipb_cov 95 | ab = self._Irb_inv * Ipr_cov + self._Igb_inv * Ipg_cov + self._Ibb_inv * Ipb_cov 96 | 97 | b = p_mean - ar * self._Ir_mean - ag * self._Ig_mean - ab * self._Ib_mean 98 | 99 | ar_mean = cv2.blur(ar, (r, r)) 100 | ag_mean = cv2.blur(ag, (r, r)) 101 | ab_mean = cv2.blur(ab, (r, r)) 102 | b_mean = cv2.blur(b, (r, r)) 103 | 104 | return ar_mean, ag_mean, ab_mean, b_mean 105 | 106 | def _computeOutput(self, ab, I): 107 | 108 | ar_mean, ag_mean, ab_mean, b_mean = ab 109 | Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2] 110 | q = ar_mean * Ir + ag_mean * Ig + ab_mean * Ib + b_mean 111 | return q 112 | 113 | def filter(self, p): 114 | 115 | p_32F = self._toFloatImg(p) 116 | 117 | ab = self._computeCoefficients(p) 118 | return self._computeOutput(ab, self._I) 119 | 120 | 121 | -------------------------------------------------------------------------------- /data_augmention/Saturation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import datetime 4 | import cv2 5 | import natsort 6 | from skimage.color import rgb2lab, lab2rgb 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | np.seterr(over='ignore') 11 | if __name__ == '__main__': 12 | pass 13 | 14 | path = "F:/PaperExperiments/ACMMM2018/OptimalTM_BLs_Restor/InputImages" 15 | # path = "F:/PaperExperiments/ACMMM2018/OptimalTM_BLs_Restor/Temps" 16 | files = os.listdir(path) 17 | files = natsort.natsorted(files) 18 | starttime = datetime.datetime.now() 19 | 20 | # BLs = read_xls_file() 21 | # print('BLs',BLs) 22 | 23 | for i in range(len(files)): 24 | file = files[i] 25 | Num = file.split('.')[0] 26 | filepath = path + "/" + file 27 | # BL = BLs[i] 28 | print('******** file ********', file) 29 | img = cv2.imread('InputImages/' + file) 30 | # img = cv2.imread('Temps/' + file) 31 | height = len(img) 32 | width = len(img[0]) 33 | # print('img[0,0,:]',img[0,0,:]) 34 | Sat = np.zeros((height,width )) 35 | for i in range(0, img.shape[0]): 36 | for j in range(0, img.shape[1]): 37 | if(np.max(img[i,j,:]) == 0): 38 | Sat[i,j] = 1 39 | else: 40 | Sat[i, j] = (np.max(img[i,j,:]) - np.min(img[i,j,:]))/np.max(img[i,j,:]) 41 | # print('Sat',Sat) 42 | Sat = 1- Sat 43 | lamba = 1 - np.mean(Sat) 44 | print('lamba',lamba) 45 | 46 | # 47 | # cv2.imwrite('Results_Saturation/' + Num + 'Sat_TM_lamba.jpg', np.uint8((Sat * lamba) * 255)) 48 | 49 | -------------------------------------------------------------------------------- /data_augmention/Saturation_Max.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def Sat_max(img): 5 | height = len(img) 6 | width = len(img[0]) 7 | # print('img[0,0,:]',img[0,0,:]) 8 | Sat = np.zeros((height,width )) 9 | for i in range(0, img.shape[0]): 10 | for j in range(0, img.shape[1]): 11 | if(np.max(img[i,j,:]) == 0): 12 | Sat[i,j] = 1 13 | else: 14 | Sat[i, j] = (np.max(img[i,j,:]) - np.min(img[i,j,:]))/np.max(img[i,j,:]) 15 | Sat = 1 - Sat 16 | 17 | # lamba = 1 - np.mean(Sat) 18 | lamba = 1 19 | 20 | Sat = Sat * lamba 21 | return Sat 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /data_augmention/__pycache__/ColorCorrect.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/ColorCorrect.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/ColorCorrect.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/ColorCorrect.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/DepthMap_RTM.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/DepthMap_RTM.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/DepthMap_RTM.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/DepthMap_RTM.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/GuidedFilter.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/GuidedFilter.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/GuidedFilter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/GuidedFilter.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/Saturation_Max.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/Saturation_Max.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/Saturation_Max.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/Saturation_Max.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/augment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/augment.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/depthMapEstimation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMapEstimation.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/depthMapEstimation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMapEstimation.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/depthMin.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMin.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/depthMin.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/depthMin.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/getGBTransmission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getGBTransmission.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/getGBTransmission.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getGBTransmission.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/getRefinedTransmission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getRefinedTransmission.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/getRefinedTransmission.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getRefinedTransmission.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/getTransmissionMap.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getTransmissionMap.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/getTransmissionMap.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/getTransmissionMap.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/global_histogram_stretching.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/global_histogram_stretching.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/global_histogram_stretching.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/global_histogram_stretching.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/sceneRadiance.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/sceneRadiance.cpython-36.pyc -------------------------------------------------------------------------------- /data_augmention/__pycache__/sceneRadiance.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/data_augmention/__pycache__/sceneRadiance.cpython-37.pyc -------------------------------------------------------------------------------- /data_augmention/augment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ColorCorrect import OptimalParameter 3 | from DepthMap_RTM import Depth_TM 4 | from Saturation_Max import Sat_max 5 | from getGBTransmission import getGBTransmissionESt 6 | from getRefinedTransmission import Refinedtransmission 7 | from getTransmissionMap import getTransmission 8 | from global_histogram_stretching import stretching 9 | from sceneRadiance import sceneRadianceRGB 10 | 11 | def augment(img): 12 | np.seterr(over='ignore') 13 | 14 | blockSize = 9 15 | height = len(img) 16 | width = len(img[0]) 17 | gimfiltR = 50 # Radius size of guided filter 18 | eps = 10 ** -3 # Epsilon value of guided filter 19 | Nrer = [0.95, 0.93, 0.85] # Normalized residual energy ratio of G-B-R channels 20 | 21 | AtomsphericLight = np.zeros(3) 22 | AtomsphericLight[0] = (1.13 * np.mean(img[:, :, 0])) + 1.11 * np.std(img[:, :, 0]) - 25.6 23 | AtomsphericLight[1] = (1.13 * np.mean(img[:, :, 1])) + 1.11 * np.std(img[:, :, 1]) - 25.6 24 | AtomsphericLight[2] = 140 / (1 + 14.4 * np.exp(-0.034 * np.median(img[:, :, 2]))) 25 | AtomsphericLight = np.clip(AtomsphericLight, 5, 250) 26 | transmissionR = getTransmission(img, AtomsphericLight, blockSize) 27 | TM_R_modified = Depth_TM(img, AtomsphericLight) 28 | TM_R_modified_Art = Sat_max(img) 29 | transmissionR_new = np.copy(transmissionR) 30 | for i in range(0, img.shape[0]): 31 | for j in range(0, img.shape[1]): 32 | if(transmissionR_new[i, j] > TM_R_modified[i, j]): 33 | transmissionR_new[i, j] = TM_R_modified[i, j] 34 | if(transmissionR_new[i, j] < TM_R_modified_Art[i, j]): 35 | transmissionR_new[i, j] = TM_R_modified_Art[i, j] 36 | 37 | transmissionR_Stretched = stretching(transmissionR_new, height, width) 38 | transmissionB, transmissionG, depth_map = getGBTransmissionESt(transmissionR_Stretched, AtomsphericLight) 39 | transmission = Refinedtransmission(transmissionB, transmissionG, transmissionR_Stretched, img) 40 | sceneRadiance = sceneRadianceRGB(img, transmission, AtomsphericLight) 41 | sceneRadiance = OptimalParameter(sceneRadiance) 42 | 43 | return sceneRadiance 44 | -------------------------------------------------------------------------------- /data_augmention/depthMapEstimation.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def depthMap(img): 5 | 6 | theta_0 = 0.51157954 7 | theta_1 = 0.50516165 8 | theta_2 = -0.90511117 9 | img = img / 255.0 10 | b = img[:, :, 0] 11 | g = img[:, :, 1] 12 | r = img[:, :, 2] 13 | x_1 = np.maximum(g, b) 14 | x_2 = r 15 | 16 | Deptmap = theta_0 + theta_1 * x_1 + theta_2 * x_2 17 | 18 | 19 | return Deptmap 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /data_augmention/depthMin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | def minDepth(img, BL): 6 | img = img/255.0 7 | BL = BL/255.0 8 | Max = [] 9 | img = np.float32(img) 10 | for i in range(0,3): 11 | Max_Abs = np.absolute(img[i] - BL[i]) 12 | Max_I = np.max(Max_Abs) 13 | Max_B = np.max([BL[i],(1 -BL[i])]) 14 | temp = Max_I / Max_B 15 | Max.append(temp) 16 | K_b = np.max(Max) 17 | min_depth = 1 - K_b 18 | 19 | return min_depth 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /data_augmention/getGBTransmission.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | def getGBTransmissionESt(transmissionR,AtomsphericLightTM): 6 | depth_map = np.zeros(transmissionR.shape) 7 | for i in range(0,transmissionR.shape[0]): 8 | for j in range(0, transmissionR.shape[1]): 9 | depth_map[i,j] = math.log(transmissionR[i,j],0.82) 10 | # if(depth_map[i,j]>15): 11 | # depth_map[i, j] = 15 12 | # if (depth_map[i, j] < 1): 13 | # depth_map[i, j] = 1 14 | 15 | 16 | 17 | transmissionG = 0.93 ** depth_map 18 | transmissionB = 0.95 ** depth_map 19 | # transmissionG = np.zeros(transmissionR.shape) 20 | 21 | # transmissionB = np.zeros(transmissionR.shape) 22 | # ratioB = (AtomsphericLightTM[2] * (-0.00113 * 450 + 1.62517) )/(AtomsphericLightTM[0] * (-0.00113 * 620 + 1.62517))*1.3 23 | # ratioG = (AtomsphericLightTM[2] * (-0.00113 * 540 + 1.62517) )/(AtomsphericLightTM[1] * (-0.00113 * 620 + 1.62517))*1.2 24 | # print('ratioB',ratioB) 25 | # print('ratioG',ratioG) 26 | # transmissionG = transmissionR ** ratioG 27 | # transmissionB = transmissionR ** ratioB 28 | # print('getGBTransmissionESttransmissionB',transmissionB) 29 | return transmissionB,transmissionG,depth_map 30 | 31 | 32 | 33 | 34 | # def getGBTransmissionESt(transmissionR,AtomsphericLightTM): 35 | # # transmissionG = np.zeros(transmissionR.shape) 36 | # # transmissionB = np.zeros(transmissionR.shape) 37 | # ratioB = (AtomsphericLightTM[2] * (-0.00113 * 450 + 1.62517) )/(AtomsphericLightTM[0] * (-0.00113 * 620 + 1.62517)) 38 | # ratioG = (AtomsphericLightTM[2] * (-0.00113 * 540 + 1.62517) )/(AtomsphericLightTM[1] * (-0.00113 * 620 + 1.62517)) 39 | # print('ratioB',ratioB) 40 | # print('ratioG',ratioG) 41 | # transmissionG = transmissionR ** ratioG 42 | # transmissionB = transmissionR ** ratioB 43 | # print('getGBTransmissionESttransmissionB',transmissionB) 44 | # 45 | # return transmissionB,transmissionG 46 | 47 | 48 | -------------------------------------------------------------------------------- /data_augmention/getRefinedTransmission.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from GuidedFilter import GuidedFilter 5 | 6 | 7 | def Refinedtransmission(transmissionB,transmissionG,transmissionR_Stretched,img): 8 | 9 | 10 | gimfiltR = 50 # 引导滤波时半径的大小 11 | eps = 10 ** -3 # 引导滤波时epsilon的值 12 | 13 | # gimfiltR = 5 # 引导滤波时半径的大小 14 | # eps = 0.4 # 引导滤波时epsilon的值 15 | 16 | guided_filter = GuidedFilter(img, gimfiltR, eps) 17 | transmissionR_Stretched = guided_filter.filter(transmissionR_Stretched) 18 | transmissionG = guided_filter.filter(transmissionG) 19 | transmissionB = guided_filter.filter(transmissionB) 20 | 21 | transmission = np.zeros(img.shape) 22 | transmission[:, :, 0] = transmissionB 23 | transmission[:, :, 1] = transmissionG 24 | transmission[:, :, 2] = transmissionR_Stretched 25 | transmission = np.clip(transmission,0.05, 0.95) 26 | 27 | return transmission 28 | 29 | 30 | 31 | 32 | 33 | # 34 | # transmissionB = FilterTran(transmissionB,0.1,0.9) 35 | # transmissionG = FilterTran(transmissionG,0.25,0.95) 36 | # transmissionR = FilterTran(transmissionR,0.35,0.975) 37 | # transmissionB = FilterTran(transmissionB, 0.2, 0.9, 15, 95) 38 | # transmissionG = FilterTran(transmissionG, 0.25, 0.95, 15, 95) 39 | # transmissionR = FilterTran(transmissionR, 0.35, 0.975, 15, 95) 40 | # print('transmissionB',transmissionB) -------------------------------------------------------------------------------- /data_augmention/getTransmissionMap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def getMinChannel(img,AtomsphericLight): 3 | imgGrayNormalization = np.zeros((img.shape[0], img.shape[1]), dtype=np.float16) 4 | for i in range(0, img.shape[0]): 5 | for j in range(0, img.shape[1]): 6 | localMin = 1 7 | for k in range(0, 3): 8 | # print('AtomsphericLight[k]',AtomsphericLight[k]) 9 | imgNormalization = img.item((i, j, k)) / AtomsphericLight[k] 10 | if imgNormalization < localMin: 11 | localMin = imgNormalization 12 | imgGrayNormalization[i, j] = localMin 13 | # print('imgGrayNormalization',imgGrayNormalization) 14 | # print('np.max(imgGrayNormalization)',np.max(imgGrayNormalization)) 15 | return imgGrayNormalization 16 | 17 | def getTransmission(img,AtomsphericLight ,blockSize): 18 | img = np.float16(img) 19 | img = getMinChannel(img,AtomsphericLight) 20 | AtomsphericLight = AtomsphericLight / 255.0 21 | addSize = int((blockSize - 1) / 2) 22 | newHeight = img.shape[0] + blockSize - 1 23 | newWidth = img.shape[1] + blockSize - 1 24 | # 中间结果 25 | imgMiddle = np.zeros((newHeight, newWidth)) 26 | imgMiddle[:, :] = 1 27 | imgMiddle[addSize:newHeight - addSize, addSize:newWidth - addSize] = img 28 | # print('imgMiddle',imgMiddle) 29 | imgDark = np.zeros((img.shape[0], img.shape[1])) 30 | localMin = 1 31 | for i in range(addSize, newHeight - addSize): 32 | for j in range(addSize, newWidth - addSize): 33 | localMin = 1 34 | for k in range(i - addSize, i + addSize + 1): 35 | for l in range(j - addSize, j + addSize + 1): 36 | if imgMiddle.item((k, l)) < localMin: 37 | localMin = imgMiddle.item((k, l)) 38 | imgDark[i - addSize, j - addSize] = localMin 39 | transmission = (1 - imgDark) / (1 - 0.1 / np.max(AtomsphericLight)) 40 | transmission = np.clip(transmission, 0.1, 0.9) 41 | # for i in range(0, transmission.shape[0]): 42 | # for j in range(0, transmission.shape[1]): 43 | # if transmission[i, j] < 0.01: 44 | # transmission[i, j] = 0.01 45 | # if transmission[i, j] > 0.99: 46 | # transmission[i, j] = 0.99 47 | 48 | return transmission -------------------------------------------------------------------------------- /data_augmention/global_histogram_stretching.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import datetime 4 | import math 5 | 6 | 7 | import numpy as np 8 | from scipy import stats 9 | import cv2 10 | 11 | pi = math.pi 12 | e = math.e 13 | 14 | 15 | 16 | 17 | 18 | 19 | def global_stretching_depth(img_L): 20 | height = len(img_L) 21 | width = len(img_L[0]) 22 | length = height * width 23 | R_rray = [] 24 | for i in range(height): 25 | for j in range(width): 26 | R_rray.append(img_L[i][j]) 27 | R_rray.sort() 28 | I_min = R_rray[int(length / 1000)] 29 | I_max = R_rray[-int(length / 1000)] 30 | 31 | array_Global_histogram_stretching_L = np.zeros((height, width)) 32 | for i in range(0, height): 33 | for j in range(0, width): 34 | if img_L[i][j] < I_min: 35 | p_out = img_L[i][j] 36 | array_Global_histogram_stretching_L[i][j] = 0.2 37 | elif (img_L[i][j] > I_max): 38 | p_out = img_L[i][j] 39 | array_Global_histogram_stretching_L[i][j] = 0.9 40 | else: 41 | p_out = (img_L[i][j] - I_min) * ((0.9-0.2) / (I_max - I_min))+ 0.2 42 | array_Global_histogram_stretching_L[i][j] = p_out 43 | return (array_Global_histogram_stretching_L) 44 | 45 | 46 | 47 | def stretching(sceneRadiance,height, width): 48 | 49 | r_array_Global_histogram_stretching = global_stretching_depth(sceneRadiance) 50 | sceneRadiance = r_array_Global_histogram_stretching 51 | 52 | 53 | return sceneRadiance 54 | -------------------------------------------------------------------------------- /data_augmention/sceneRadiance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def sceneRadianceRGB(img, transmission, AtomsphericLight): 4 | sceneRadiance = np.zeros(img.shape) 5 | img = np.float32(img) 6 | for i in range(0, 3): 7 | sceneRadiance[:, :, i] = (img[:, :, i] - AtomsphericLight[i]) / transmission[:, :, i] + AtomsphericLight[i] 8 | # 限制透射率 在0~255 9 | 10 | 11 | sceneRadiance = np.clip(sceneRadiance, 0, 255) 12 | sceneRadiance = np.uint8(sceneRadiance) 13 | return sceneRadiance 14 | 15 | 16 | -------------------------------------------------------------------------------- /data_process.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from bs4 import BeautifulSoup\n", 10 | "import os\n", 11 | "import cv2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# 导入数据" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "class_label = ['holothurian', 'echinus', 'scallop', 'starfish', 'waterweeds']\n", 28 | "file_path = 'data/train/box/'\n", 29 | "image_path = 'data/train/augment/'\n", 30 | "all_name = os.listdir(file_path)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# 数据处理" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# train_result用于后续yolo训练,k_mean_result用于计算anchor boxes\n", 47 | "train_result = []\n", 48 | "k_mean_result = []\n", 49 | "box_size = {}\n", 50 | "label_number = {}\n", 51 | "del_number = 0\n", 52 | "\n", 53 | "# 遍历图片\n", 54 | "for i in all_name:\n", 55 | " image_name = image_path+i.rstrip('xml')+'jpg'\n", 56 | " img_name_append = False\n", 57 | " soup = BeautifulSoup(open(file_path+i), 'lxml')\n", 58 | " bbx = soup.find_all('object')\n", 59 | " img = cv2.imread(image_name)\n", 60 | " height, width, _ = img.shape\n", 61 | " \n", 62 | " # 对各尺寸数量进行统计\n", 63 | " height, width, _ = img.shape\n", 64 | " if box_size.get((height, width), False):\n", 65 | " box_size[(height, width)] += 1\n", 66 | " else:\n", 67 | " box_size[(height, width)] = 1\n", 68 | " \n", 69 | " # 遍历候选框\n", 70 | " for j in bbx:\n", 71 | " # 同时对范围进行修正,像素起点改为0\n", 72 | " name = str(j.contents[1].string)\n", 73 | " xmin = int(j.xmin.string) - 1\n", 74 | " ymin = int(j.ymin.string) - 1\n", 75 | " xmax = int(j.xmax.string) - 1\n", 76 | " ymax = int(j.ymax.string) - 1\n", 77 | " index = str(class_label.index(name))\n", 78 | "\n", 79 | " # 防止大小混淆\n", 80 | " if xmin > xmax:\n", 81 | " xmax, xmin = xmin, xmax\n", 82 | " if ymin > ymax:\n", 83 | " ymax, ymin = ymin, ymax\n", 84 | " \n", 85 | " # 判断ground truth是否超出图像范围\n", 86 | " if xmin > width - 1 or xmax < 0:\n", 87 | " continue\n", 88 | " if ymin > height - 1 or ymax < 0:\n", 89 | " continue\n", 90 | "\n", 91 | " # 处于图像边缘候选框的处理\n", 92 | " if xmin < 0:\n", 93 | " xmin = 0\n", 94 | " if ymin < 0:\n", 95 | " ymin = 0\n", 96 | " if xmax > width - 1:\n", 97 | " xmax = width - 1\n", 98 | " if ymax > height - 1:\n", 99 | " ymax = height - 1\n", 100 | " \n", 101 | " # 判断是否为无效框\n", 102 | " x_distance = xmax - xmin\n", 103 | " y_distance = ymax - ymin\n", 104 | " if x_distance == 0 or y_distance == 0:\n", 105 | " continue\n", 106 | " \n", 107 | " # 对于过小的图片进行删除\n", 108 | " square = (xmax - xmin) * (ymax - ymin)\n", 109 | " if square < 120:\n", 110 | " del_number += 1\n", 111 | " continue\n", 112 | " \n", 113 | " # 防止有图片没有任何候选框\n", 114 | " if img_name_append == False:\n", 115 | " train_result.append(image_name)\n", 116 | " img_name_append = True\n", 117 | "\n", 118 | " # 判断是否增加kmean缩放候选框\n", 119 | " scale = min(448/width, 448/height)\n", 120 | " square = ((xmax - xmin)*scale) * ((ymax - ymin)*scale)\n", 121 | " if square < 120:\n", 122 | " kmean_scale = False\n", 123 | " else:\n", 124 | " kmean_scale = True\n", 125 | " \n", 126 | " xmin, xmax, ymin, ymax = str(xmin), str(xmax), str(ymin), str(ymax)\n", 127 | " train_result.append(' ')\n", 128 | " for x in [xmin, ymin, xmax, ymax]:\n", 129 | " train_result.append(x)\n", 130 | " train_result.append(',')\n", 131 | " if x is ymax:\n", 132 | " train_result.append(index)\n", 133 | " \n", 134 | " # kmean添加缩放候选框\n", 135 | " if kmean_scale == True:\n", 136 | " for x in [xmin, ymin, xmax, ymax]:\n", 137 | " k_mean_result.append(int(round(int(x)*scale)))\n", 138 | " if x is not ymax:\n", 139 | " k_mean_result.append(',')\n", 140 | " else:\n", 141 | " k_mean_result.append('\\n')\n", 142 | " \n", 143 | " # 对各label数量统计\n", 144 | " if label_number.get(name, False):\n", 145 | " label_number[name] += 1\n", 146 | " else:\n", 147 | " label_number[name] = 1\n", 148 | " if img_name_append == True:\n", 149 | " train_result.append('\\n')" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "# 查看统计结果" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 4, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "10" 168 | ] 169 | }, 170 | "execution_count": 4, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "del_number" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 5, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "{'echinus': 22098,\n", 188 | " 'scallop': 6694,\n", 189 | " 'starfish': 6827,\n", 190 | " 'holothurian': 5503,\n", 191 | " 'waterweeds': 82}" 192 | ] 193 | }, 194 | "execution_count": 5, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "label_number" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 6, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "{(405, 720): 3066,\n", 212 | " (2160, 3840): 1644,\n", 213 | " (1080, 1920): 595,\n", 214 | " (576, 704): 38,\n", 215 | " (480, 586): 44}" 216 | ] 217 | }, 218 | "execution_count": 6, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "box_size" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "# 保存" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 7, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "with open('data/train_data.txt', 'w') as f:\n", 241 | " f.writelines(train_result)\n", 242 | "with open('data/k_mean_data.txt', 'w') as f:\n", 243 | " f.writelines(k_mean_result)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | } 253 | ], 254 | "metadata": { 255 | "kernelspec": { 256 | "display_name": "Python 3", 257 | "language": "python", 258 | "name": "python3" 259 | }, 260 | "language_info": { 261 | "codemirror_mode": { 262 | "name": "ipython", 263 | "version": 3 264 | }, 265 | "file_extension": ".py", 266 | "mimetype": "text/x-python", 267 | "name": "python", 268 | "nbconvert_exporter": "python", 269 | "pygments_lexer": "ipython3", 270 | "version": "3.6.9" 271 | } 272 | }, 273 | "nbformat": 4, 274 | "nbformat_minor": 2 275 | } 276 | -------------------------------------------------------------------------------- /data_process.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import os 3 | import cv2 4 | from tqdm import tqdm 5 | 6 | 7 | def process(): 8 | # train_result用于后续yolo训练,k_mean_result用于计算anchor boxes 9 | train_result = [] 10 | k_mean_result = [] 11 | 12 | # 遍历图片 13 | for i in tqdm(all_name): 14 | image_name = image_path+i.rstrip('xml')+'jpg' 15 | img_name_append = False 16 | soup = BeautifulSoup(open(file_path+i), 'lxml') 17 | bbx = soup.find_all('object') 18 | img = cv2.imread(image_name) 19 | height, width, _ = img.shape 20 | 21 | # 遍历候选框 22 | for j in bbx: 23 | # 同时对范围进行修正,像素起点改为0 24 | name = str(j.contents[1].string) 25 | xmin = int(j.xmin.string) - 1 26 | ymin = int(j.ymin.string) - 1 27 | xmax = int(j.xmax.string) - 1 28 | ymax = int(j.ymax.string) - 1 29 | index = str(class_label.index(name)) 30 | 31 | # 防止大小混淆 32 | if xmin > xmax: 33 | xmax, xmin = xmin, xmax 34 | if ymin > ymax: 35 | ymax, ymin = ymin, ymax 36 | 37 | # 判断ground truth是否超出图像范围 38 | if xmin > width - 1 or xmax < 0: 39 | continue 40 | if ymin > height - 1 or ymax < 0: 41 | continue 42 | 43 | # 处于图像边缘候选框的处理 44 | if xmin < 0: 45 | xmin = 0 46 | if ymin < 0: 47 | ymin = 0 48 | if xmax > width - 1: 49 | xmax = width - 1 50 | if ymax > height - 1: 51 | ymax = height - 1 52 | 53 | # 判断是否为无效框 54 | x_distance = xmax - xmin 55 | y_distance = ymax - ymin 56 | if x_distance == 0 or y_distance == 0: 57 | continue 58 | 59 | # 对于过小的候选框进行删除 60 | square = (xmax - xmin) * (ymax - ymin) 61 | if square < 120: 62 | continue 63 | 64 | # 防止有图片没有任何候选框 65 | if img_name_append == False: 66 | train_result.append(image_name) 67 | img_name_append = True 68 | 69 | # 判断是否增加kmean缩放候选框 70 | scale = min(input_shape/width, input_shape/height) 71 | square = ((xmax - xmin)*scale) * ((ymax - ymin)*scale) 72 | if square < 120: 73 | kmean_scale = False 74 | else: 75 | kmean_scale = True 76 | 77 | xmin, xmax, ymin, ymax = str(xmin), str(xmax), str(ymin), str(ymax) 78 | train_result.append(' ') 79 | for x in [xmin, ymin, xmax, ymax]: 80 | train_result.append(x) 81 | train_result.append(',') 82 | if x is ymax: 83 | train_result.append(index) 84 | 85 | # kmean添加缩放候选框 86 | if kmean_scale == True: 87 | for x in [xmin, ymin, xmax, ymax]: 88 | k_mean_result.append(str(int(round(int(x)*scale)))) 89 | if x is not ymax: 90 | k_mean_result.append(',') 91 | else: 92 | k_mean_result.append('\n') 93 | 94 | if img_name_append == True: 95 | train_result.append('\n') 96 | 97 | return train_result, k_mean_result 98 | 99 | 100 | def save_file(train_result, k_mean_result): 101 | with open('data/train_data.txt', 'w') as f: 102 | f.writelines(train_result) 103 | with open('data/k_mean_data.txt', 'w') as f: 104 | f.writelines(k_mean_result) 105 | 106 | 107 | if __name__ == '__main__': 108 | input_shape = 480 109 | class_label = ['holothurian', 'echinus', 'scallop', 'starfish', 'waterweeds'] 110 | file_path = 'data/train/box/' 111 | image_path = 'data/train/augment/' 112 | all_name = os.listdir(file_path) 113 | 114 | train_result, k_mean_result = process() 115 | save_file(train_result, k_mean_result) 116 | -------------------------------------------------------------------------------- /image_aug.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0,'data_augmention/') 3 | from augment import augment 4 | import os 5 | import cv2 6 | import numpy as np 7 | from multiprocessing import Pool 8 | 9 | number = 0 10 | def img_aug(name, train=False, input_shape=(480,480)): 11 | global number 12 | 13 | img = cv2.imread(image_path+name) 14 | if train == False: 15 | h, w = input_shape 16 | ih, iw = img.shape[:2] 17 | scale = min(w/iw, h/ih) 18 | nw = int(iw*scale) 19 | nh = int(ih*scale) 20 | img = cv2.resize(img,(nw,nh)) 21 | img = augment(img) 22 | 23 | # 用灰色像素块来做背景扩充图片满足输入尺寸需求 24 | dx = (w-nw) // 2 25 | dy = (h-nh) // 2 26 | img = np.pad(img, ((dy, dy), (dx, dx), (0, 0)), 27 | 'constant', constant_values=128) 28 | if tuple(img.shape[:2]) != input_shape: 29 | img = np.pad(img, ((0, input_shape[0]-img.shape[0]), 30 | (0, input_shape[1]-img.shape[1]), (0, 0)), 31 | 'constant', constant_values=128) 32 | else: 33 | img = augment(img) 34 | 35 | cv2.imwrite(result_path+name, img) 36 | number += 1 37 | print('{}. {} is finish!'.format(number, name)) 38 | 39 | 40 | if __name__ == '__main__': 41 | image_path = 'data/test/test-B-image/' 42 | result_path = 'data/test/test_B_augment/' 43 | all_name = os.listdir(image_path) 44 | 45 | # 使用多进程来加速处理 46 | p = Pool(10) 47 | for i in all_name: 48 | p.apply_async(img_aug, args=(i,)) 49 | p.close() 50 | p.join() 51 | -------------------------------------------------------------------------------- /kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class YOLO_Kmeans: 5 | 6 | def __init__(self, cluster_number, filename): 7 | self.cluster_number = cluster_number 8 | self.filename = filename 9 | 10 | def iou(self, boxes, clusters): # 1 box -> k clusters 11 | n = boxes.shape[0] 12 | k = self.cluster_number 13 | 14 | box_area = boxes[:, 0] * boxes[:, 1] 15 | box_area = box_area.repeat(k) 16 | box_area = np.reshape(box_area, (n, k)) 17 | 18 | cluster_area = clusters[:, 0] * clusters[:, 1] 19 | cluster_area = np.tile(cluster_area, [1, n]) 20 | cluster_area = np.reshape(cluster_area, (n, k)) 21 | 22 | box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) 23 | cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) 24 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) 25 | 26 | box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) 27 | cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) 28 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) 29 | inter_area = np.multiply(min_w_matrix, min_h_matrix) 30 | 31 | result = inter_area / (box_area + cluster_area - inter_area) 32 | return result 33 | 34 | def avg_iou(self, boxes, clusters): 35 | accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) 36 | return accuracy 37 | 38 | def kmeans(self, boxes, k, dist=np.median): 39 | box_number = boxes.shape[0] 40 | distances = np.empty((box_number, k)) 41 | last_nearest = np.zeros((box_number,)) 42 | np.random.seed() 43 | clusters = boxes[np.random.choice( 44 | box_number, k, replace=False)] # init k clusters 45 | epoch = 0 46 | 47 | while True: 48 | distances = 1 - self.iou(boxes, clusters) 49 | 50 | current_nearest = np.argmin(distances, axis=1) 51 | if (last_nearest == current_nearest).all(): 52 | break # clusters won't change 53 | for cluster in range(k): 54 | clusters[cluster] = dist( # update clusters 55 | boxes[current_nearest == cluster], axis=0) 56 | 57 | last_nearest = current_nearest 58 | 59 | return clusters 60 | 61 | def result2txt(self, data): 62 | f = open("data/yolo_anchors.txt", 'w') 63 | row = np.shape(data)[0] 64 | for i in range(row): 65 | if i == 0: 66 | x_y = "%d,%d" % (data[i][0], data[i][1]) 67 | else: 68 | x_y = ", %d,%d" % (data[i][0], data[i][1]) 69 | f.write(x_y) 70 | f.close() 71 | 72 | def txt2boxes(self): 73 | f = open(self.filename, 'r') 74 | dataSet = [] 75 | for line in f: 76 | infos = line.split(",") 77 | width = int(infos[2]) - int(infos[0]) 78 | height = int(infos[3]) - int(infos[1]) 79 | dataSet.append([width, height]) 80 | result = np.array(dataSet) 81 | f.close() 82 | return result 83 | 84 | def txt2clusters(self): 85 | all_boxes = self.txt2boxes() 86 | result = self.kmeans(all_boxes, k=self.cluster_number) 87 | result = result[np.lexsort(result.T[0, None])] 88 | self.result2txt(result) 89 | print("K anchors:\n {}".format(result)) 90 | print("Accuracy: {:.2f}%".format( 91 | self.avg_iou(all_boxes, result) * 100)) 92 | 93 | 94 | if __name__ == "__main__": 95 | cluster_number = 9 96 | filename = "data/k_mean_data.txt" 97 | kmeans = YOLO_Kmeans(cluster_number, filename) 98 | kmeans.txt2clusters() 99 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import colorsys 2 | import os 3 | import numpy as np 4 | from keras import backend as K 5 | from keras.models import load_model 6 | from keras.layers import Input 7 | import cv2 8 | from yolo3.model import yolo_eval, yolo_body 9 | from keras.utils import multi_gpu_model 10 | import pandas as pd 11 | from tqdm import tqdm 12 | from ensemble_boxes import * 13 | import glob 14 | 15 | 16 | def create_model(input_shape, num_anchors, num_classes, weights_path): 17 | '''create the training model''' 18 | image_input = Input(shape=(None, None, 3)) 19 | h, w = input_shape 20 | 21 | model_body = yolo_body(image_input, num_anchors//3, num_classes) 22 | model_body.load_weights(weights_path) 23 | 24 | return model_body 25 | 26 | class YOLO(object): 27 | _defaults = { 28 | "model_path": 'models/trained_weights_final.h5', 29 | "anchors_path": 'data/yolo_anchors.txt', 30 | "classes_path": 'data/classes.txt', 31 | "score" : 0.001, 32 | "iou" : 0.25, 33 | "model_image_size" : (480, 480), 34 | "gpu_num" : 1, 35 | } 36 | 37 | @classmethod 38 | def get_defaults(cls, n): 39 | if n in cls._defaults: 40 | return cls._defaults[n] 41 | else: 42 | return "Unrecognized attribute name '" + n + "'" 43 | 44 | def __init__(self): 45 | self.__dict__.update(self._defaults) # set up default values 46 | self.class_names = self._get_class() 47 | self.anchors = self._get_anchors() 48 | self.sess = K.get_session() 49 | self.boxes, self.scores, self.classes = self.generate() 50 | 51 | def _get_class(self): 52 | classes_path = os.path.expanduser(self.classes_path) 53 | with open(classes_path) as f: 54 | class_names = f.readlines() 55 | class_names = [c.strip() for c in class_names] 56 | return class_names 57 | 58 | def _get_anchors(self): 59 | anchors_path = os.path.expanduser(self.anchors_path) 60 | with open(anchors_path) as f: 61 | anchors = f.readline() 62 | anchors = [float(x) for x in anchors.split(',')] 63 | return np.array(anchors).reshape(-1, 2) 64 | 65 | def generate(self): 66 | model_path = os.path.expanduser(self.model_path) 67 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' 68 | 69 | # Load model, or construct model and load weights. 70 | num_anchors = len(self.anchors) 71 | num_classes = len(self.class_names) 72 | self.yolo_model = create_model(self.model_image_size, num_anchors, num_classes, self.model_path) 73 | 74 | print('{} model, anchors, and classes loaded.'.format(model_path)) 75 | 76 | # Generate colors for drawing bounding boxes. 77 | hsv_tuples = [(x / len(self.class_names), 1., 1.) 78 | for x in range(len(self.class_names))] 79 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 80 | self.colors = list( 81 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 82 | self.colors)) 83 | np.random.seed(10101) # Fixed seed for consistent colors across runs. 84 | np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. 85 | np.random.seed(None) # Reset seed to default. 86 | 87 | # Generate output tensor targets for filtered bounding boxes. 88 | self.input_image_shape = K.placeholder(shape=(2, )) 89 | if self.gpu_num>=2: 90 | self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) 91 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, 92 | len(self.class_names), self.input_image_shape, 93 | score_threshold=self.score, iou_threshold=self.iou) 94 | return boxes, scores, classes 95 | 96 | def detect_image(self, image): 97 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' 98 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' 99 | 100 | image_data = np.array(image, dtype='float32') 101 | image_data /= 255. 102 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 103 | 104 | out_boxes, out_scores, out_classes = self.sess.run( 105 | [self.boxes, self.scores, self.classes], 106 | feed_dict={ 107 | self.yolo_model.input: image_data, 108 | self.input_image_shape: [image.shape[1], image.shape[0]], 109 | K.learning_phase(): 0 110 | }) 111 | 112 | return out_classes, out_scores, out_boxes 113 | 114 | 115 | def detect_img(yolo, test, input_shape): 116 | class_label = ['holothurian', 'echinus', 'scallop', 'starfish'] 117 | name, image_id, confidence, xmin, ymin, xmax, ymax = \ 118 | [], [], [], [], [], [], [] 119 | 120 | for img in tqdm(test): 121 | image = cv2.imread(img) 122 | height, width, _ = cv2.imread(img.replace('test_B_augment', 'test-B-image')).shape 123 | scale = min(input_shape/width, input_shape/height) 124 | nw = int(width*scale) 125 | nh = int(height*scale) 126 | dx = (input_shape-nw) // 2 127 | dy = (input_shape-nh) // 2 128 | out_classes, out_scores, out_boxes = yolo.detect_image(image) 129 | 130 | # 采用WBF代替NMS 131 | out_boxes, out_scores, out_classes = weighted_boxes_fusion([out_boxes], [out_scores], [out_classes], weights=None, iou_thr=0.25, skip_box_thr=0.0) 132 | out_boxes = out_boxes.tolist() 133 | out_scores = out_scores.tolist() 134 | out_classes = out_classes.tolist() 135 | 136 | for v, i in enumerate(out_boxes): 137 | # 水草一类删除 138 | if int(out_classes[v]) == 4: 139 | continue 140 | 141 | ym, xm, ya, xa = i 142 | # 范围修正 143 | xm, ym, xa, ya = (xm-dx)/scale+1, (ym-dy)/scale+1, (xa-dx)/scale+1, (ya-dy)/scale+1 144 | 145 | # 防止大小混淆 146 | if xm > xa: 147 | xa, xm = xm, xa 148 | if ym > ya: 149 | ya, ym = ym, ya 150 | 151 | # 判断ground truth是否超出图像范围 152 | if xm > width or xa < 1: 153 | continue 154 | if ym > height or ya < 1: 155 | continue 156 | 157 | # 处于图像边缘候选框的处理 158 | if xm < 1: 159 | xm = 1 160 | if ym < 1: 161 | ym = 1 162 | if xa > width: 163 | xa = width 164 | if ya > height: 165 | ya = height 166 | 167 | # 判断是否为无效框 168 | x_distance = xa - xm 169 | y_distance = ya - ym 170 | if x_distance == 0 or y_distance == 0: 171 | continue 172 | 173 | # 四舍五入转换为int 174 | xm = int(round(xm)) 175 | ym = int(round(ym)) 176 | xa = int(round(xa)) 177 | ya = int(round(ya)) 178 | 179 | xmin.append(xm) 180 | ymin.append(ym) 181 | xmax.append(xa) 182 | ymax.append(ya) 183 | 184 | name.append(class_label[int(out_classes[v])]) 185 | confidence.append(out_scores[v]) 186 | image_id.append(img.replace('jpg', 'xml').lstrip(TEST_PATH)) 187 | 188 | save_csv(name, image_id, confidence, xmin, ymin, xmax, ymax) 189 | 190 | 191 | def save_csv(name, image_id, confidence, xmin, ymin, xmax, ymax): 192 | result_table = pd.DataFrame({"name": name, 193 | "image_id": image_id, 194 | "confidence":confidence, 195 | "xmin":xmin, 196 | "ymin":ymin, 197 | "xmax":xmax, 198 | "ymax":ymax}) 199 | result_table.to_csv("predict.csv", index=False) 200 | 201 | 202 | if __name__ == '__main__': 203 | input_shape = 480 204 | TEST_PATH = "data/test/test_B_augment/" 205 | TEST_NAME = glob.glob(TEST_PATH + "*.jpg") 206 | yolo = YOLO() 207 | 208 | detect_img(yolo, TEST_NAME, input_shape) 209 | -------------------------------------------------------------------------------- /swa.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | paper implementaton: 5 | Stochastic Weight Averaging: https://arxiv.org/abs/1803.05407 6 | 7 | """ 8 | 9 | import keras as K 10 | 11 | 12 | class SWA(K.callbacks.Callback): 13 | 14 | def __init__(self, filepath, SWA_START): 15 | super(SWA, self).__init__() 16 | self.filepath = filepath 17 | self.SWA_START = SWA_START 18 | 19 | def on_train_begin(self, logs=None): 20 | self.nb_epoch = self.params['epochs'] 21 | print('Stochastic weight averaging selected for last {} epochs.' 22 | .format(self.nb_epoch - self.SWA_START)) 23 | 24 | def on_epoch_begin(self, epoch, logs=None): 25 | lr = float(K.backend.get_value(self.model.optimizer.lr)) 26 | print('learning rate of current epoch is : {}'.format(lr)) 27 | 28 | def on_epoch_end(self, epoch, logs=None): 29 | if epoch == self.SWA_START: 30 | self.swa_weights = self.model.get_weights() 31 | 32 | elif epoch > self.SWA_START: 33 | for i, layer in enumerate(self.model.layers): 34 | self.swa_weights[i] = (self.swa_weights[i] * 35 | (epoch - self.SWA_START) + self.model.get_weights()[i]) / ( 36 | (epoch - self.SWA_START) + 1) 37 | else: 38 | pass 39 | 40 | def on_train_end(self, logs=None): 41 | self.model.set_weights(self.swa_weights) 42 | print('set stochastic weight average as final model parameters [FINISH].') 43 | # self.model.save_weights(self.filepath) 44 | # print('save final stochastic averaged weights model to file [FINISH].') 45 | 46 | 47 | class LearningRateDisplay(K.callbacks.Callback): 48 | def on_epoch_begin(self, epoch, logs=None): 49 | lr = float(K.backend.get_value(self.model.optimizer.lr)) 50 | print('learning rate of current epoch is : {}'.format(lr)) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Retrain the YOLO model for your own dataset. 3 | """ 4 | 5 | import numpy as np 6 | import keras.backend as K 7 | from keras.layers import Input, Lambda 8 | from keras.models import Model 9 | from keras_radam import RAdam 10 | from keras.callbacks import TensorBoard, ModelCheckpoint 11 | from cosine_annealing import CosineAnnealingScheduler 12 | import tensorflow as tf 13 | from swa import SWA 14 | 15 | from yolo3.model import preprocess_true_boxes, yolo_body, yolo_loss 16 | from yolo3.utils import get_random_data 17 | 18 | 19 | def _main(): 20 | annotation_path = 'data/train_data.txt' 21 | log_dir = 'models/' 22 | classes_path = 'data/classes.txt' 23 | anchors_path = 'data/yolo_anchors.txt' 24 | class_names = get_classes(classes_path) 25 | num_classes = len(class_names) 26 | anchors = get_anchors(anchors_path) 27 | 28 | input_shape = (480,480) # multiple of 32, hw 29 | 30 | model = create_model(input_shape, anchors, num_classes, 31 | freeze_body=2, weights_path='pre_train/yolo_weights.h5') # make sure you know what you freeze 32 | logging = TensorBoard(log_dir=log_dir) 33 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=1) 34 | 35 | # use swa 36 | swa_start = 100 37 | swa_obj = SWA('',swa_start) 38 | 39 | # use cosine 40 | cosine = CosineAnnealingScheduler(init_epoch=100, T_max=200, eta_max=1e-2, eta_min=1e-6) 41 | 42 | val_split = 0.1 43 | with open(annotation_path) as f: 44 | lines = f.readlines() 45 | np.random.seed(10101) 46 | np.random.shuffle(lines) 47 | np.random.seed(None) 48 | num_val = int(len(lines)*val_split) 49 | num_train = len(lines) - num_val 50 | 51 | # Train with frozen layers first, to get a stable loss. 52 | # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. 53 | if True: 54 | model.compile(optimizer=RAdam(warmup_proportion=0.1, min_lr=1e-5), loss={ 55 | # use custom yolo_loss Lambda layer. 56 | 'yolo_loss': lambda y_true, y_pred: y_pred}) 57 | 58 | batch_size = 32 59 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 60 | model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes, trainable=True), 61 | steps_per_epoch=max(1, num_train//batch_size), 62 | validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes, trainable=False), 63 | validation_steps=max(1, num_val//batch_size), 64 | epochs=100, 65 | initial_epoch=0, 66 | callbacks=[logging]) 67 | model.save_weights(log_dir + 'trained_weights_stage_1.h5') 68 | 69 | # Unfreeze and continue training, to fine-tune. 70 | # Train longer if the result is not good. 71 | 72 | if True: 73 | for i in range(len(model.layers)): 74 | model.layers[i].trainable = True 75 | model.compile(optimizer=RAdam(warmup_proportion=0.1, min_lr=1e-6), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change 76 | print('Unfreeze all of the layers.') 77 | 78 | batch_size = 8 # note that more GPU memory is required after unfreezing the body 79 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 80 | model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes, trainable=True), 81 | steps_per_epoch=max(1, num_train//batch_size), 82 | validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes, trainable=False), 83 | validation_steps=max(1, num_val//batch_size), 84 | epochs=300, 85 | initial_epoch=100, 86 | callbacks=[logging, cosine, swa_obj, checkpoint]) 87 | model.save_weights(log_dir + 'trained_weights_final.h5') 88 | 89 | # Further training if needed. 90 | 91 | 92 | def get_classes(classes_path): 93 | '''loads the classes''' 94 | with open(classes_path) as f: 95 | class_names = f.readlines() 96 | class_names = [c.strip() for c in class_names] 97 | return class_names 98 | 99 | def get_anchors(anchors_path): 100 | '''loads the anchors from a file''' 101 | with open(anchors_path) as f: 102 | anchors = f.readline() 103 | anchors = [float(x) for x in anchors.split(',')] 104 | return np.array(anchors).reshape(-1, 2) 105 | 106 | 107 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, 108 | weights_path='model_data/yolo_weights.h5'): 109 | '''create the training model''' 110 | K.clear_session() # get a new session 111 | image_input = Input(shape=(None, None, 3)) 112 | h, w = input_shape 113 | num_anchors = len(anchors) 114 | 115 | y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ 116 | num_anchors//3, num_classes+5)) for l in range(3)] 117 | 118 | model_body = yolo_body(image_input, num_anchors//3, num_classes) 119 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 120 | 121 | if load_pretrained: 122 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 123 | print('Load weights {}.'.format(weights_path)) 124 | if freeze_body in [1, 2]: 125 | # Freeze darknet53 body or freeze all but 3 output layers. 126 | num = (185, len(model_body.layers)-3)[freeze_body-1] 127 | for i in range(num): model_body.layers[i].trainable = False 128 | print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) 129 | 130 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 131 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( 132 | [*model_body.output, *y_true]) 133 | model = Model([model_body.input, *y_true], model_loss) 134 | 135 | return model 136 | 137 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, trainable): 138 | '''data generator for fit_generator''' 139 | n = len(annotation_lines) 140 | i = 0 141 | while True: 142 | image_data = [] 143 | box_data = [] 144 | for b in range(batch_size): 145 | if i==0: 146 | np.random.shuffle(annotation_lines) 147 | image, box = get_random_data(annotation_lines[i], input_shape, trainable=trainable) 148 | while type(image) == type(None): 149 | i = (i+1) % n 150 | if i==0: 151 | np.random.shuffle(annotation_lines) 152 | image, box = get_random_data(annotation_lines[i], input_shape, trainable=trainable) 153 | image_data.append(image) 154 | box_data.append(box) 155 | i = (i+1) % n 156 | image_data = np.array(image_data) 157 | box_data = np.array(box_data) 158 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 159 | yield [image_data, *y_true], np.zeros(batch_size) 160 | 161 | def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes, trainable): 162 | n = len(annotation_lines) 163 | if n==0 or batch_size<=0: return None 164 | return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, trainable) 165 | 166 | if __name__ == '__main__': 167 | _main() 168 | -------------------------------------------------------------------------------- /yolo3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__init__.py -------------------------------------------------------------------------------- /yolo3/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /yolo3/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /yolo3/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fieldhunter/2020-ZhanJiang-Underwater-Object-Detection-Algorithm-Contest/b3d5e756766cff352acd2a0636e167f09f225514/yolo3/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /yolo3/model.py: -------------------------------------------------------------------------------- 1 | """YOLO_v3 Model Defined in Keras.""" 2 | 3 | from functools import wraps 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D 9 | from keras.layers.advanced_activations import LeakyReLU 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.models import Model 12 | from keras.regularizers import l2 13 | 14 | from yolo3.utils import compose 15 | 16 | 17 | @wraps(Conv2D) 18 | def DarknetConv2D(*args, **kwargs): 19 | """Wrapper to set Darknet parameters for Convolution2D.""" 20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 21 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 22 | darknet_conv_kwargs.update(kwargs) 23 | return Conv2D(*args, **darknet_conv_kwargs) 24 | 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 26 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 27 | no_bias_kwargs = {'use_bias': False} 28 | no_bias_kwargs.update(kwargs) 29 | return compose( 30 | DarknetConv2D(*args, **no_bias_kwargs), 31 | BatchNormalization(), 32 | LeakyReLU(alpha=0.1)) 33 | 34 | def resblock_body(x, num_filters, num_blocks): 35 | '''A series of resblocks starting with a downsampling Convolution2D''' 36 | # Darknet uses left and top padding instead of 'same' mode 37 | x = ZeroPadding2D(((1,0),(1,0)))(x) 38 | x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x) 39 | for i in range(num_blocks): 40 | y = compose( 41 | DarknetConv2D_BN_Leaky(num_filters//2, (1,1)), 42 | DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x) 43 | x = Add()([x,y]) 44 | return x 45 | 46 | def darknet_body(x): 47 | '''Darknent body having 52 Convolution2D layers''' 48 | x = DarknetConv2D_BN_Leaky(32, (3,3))(x) 49 | x = resblock_body(x, 64, 1) 50 | x = resblock_body(x, 128, 2) 51 | x = resblock_body(x, 256, 8) 52 | x = resblock_body(x, 512, 8) 53 | x = resblock_body(x, 1024, 4) 54 | return x 55 | 56 | def make_last_layers(x, num_filters, out_filters): 57 | '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' 58 | x = compose( 59 | DarknetConv2D_BN_Leaky(num_filters, (1,1)), 60 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 61 | DarknetConv2D_BN_Leaky(num_filters, (1,1)), 62 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 63 | DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x) 64 | y = compose( 65 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 66 | DarknetConv2D(out_filters, (1,1)))(x) 67 | return x, y 68 | 69 | 70 | def yolo_body(inputs, num_anchors, num_classes): 71 | """Create YOLO_V3 model CNN body in Keras.""" 72 | darknet = Model(inputs, darknet_body(inputs)) 73 | x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) 74 | 75 | x = compose( 76 | DarknetConv2D_BN_Leaky(256, (1,1)), 77 | UpSampling2D(2))(x) 78 | x = Concatenate()([x,darknet.layers[152].output]) 79 | x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) 80 | 81 | x = compose( 82 | DarknetConv2D_BN_Leaky(128, (1,1)), 83 | UpSampling2D(2))(x) 84 | x = Concatenate()([x,darknet.layers[92].output]) 85 | x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) 86 | 87 | return Model(inputs, [y1,y2,y3]) 88 | 89 | 90 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 91 | """Convert final layer features to bounding box parameters.""" 92 | num_anchors = len(anchors) 93 | # Reshape to batch, height, width, num_anchors, box_params. 94 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 95 | 96 | grid_shape = K.shape(feats)[1:3] # height, width 97 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 98 | [1, grid_shape[1], 1, 1]) 99 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 100 | [grid_shape[0], 1, 1, 1]) 101 | grid = K.concatenate([grid_x, grid_y]) 102 | grid = K.cast(grid, K.dtype(feats)) 103 | 104 | feats = K.reshape( 105 | feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 106 | 107 | # Adjust preditions to each spatial grid point and anchor size. 108 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 109 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) 110 | box_confidence = K.sigmoid(feats[..., 4:5]) 111 | box_class_probs = K.sigmoid(feats[..., 5:]) 112 | 113 | if calc_loss == True: 114 | return grid, feats, box_xy, box_wh 115 | return box_xy, box_wh, box_confidence, box_class_probs 116 | 117 | 118 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): 119 | '''Get corrected boxes''' 120 | box_yx = box_xy[..., ::-1] 121 | box_hw = box_wh[..., ::-1] 122 | input_shape = K.cast(input_shape, K.dtype(box_yx)) 123 | image_shape = K.cast(image_shape, K.dtype(box_yx)) 124 | new_shape = K.round(image_shape * K.min(input_shape/image_shape)) 125 | offset = (input_shape-new_shape)/2./input_shape 126 | scale = input_shape/new_shape 127 | box_yx = (box_yx - offset) * scale 128 | box_hw *= scale 129 | 130 | box_mins = box_yx - (box_hw / 2.) 131 | box_maxes = box_yx + (box_hw / 2.) 132 | boxes = K.concatenate([ 133 | box_mins[..., 0:1], # y_min 134 | box_mins[..., 1:2], # x_min 135 | box_maxes[..., 0:1], # y_max 136 | box_maxes[..., 1:2] # x_max 137 | ]) 138 | 139 | # Scale boxes back to original image shape. 140 | boxes *= K.concatenate([image_shape, image_shape]) 141 | return boxes 142 | 143 | 144 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): 145 | '''Process Conv layer output''' 146 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, 147 | anchors, num_classes, input_shape) 148 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 149 | boxes = K.reshape(boxes, [-1, 4]) 150 | box_scores = box_confidence * box_class_probs 151 | box_scores = K.reshape(box_scores, [-1, num_classes]) 152 | return boxes, box_scores 153 | 154 | 155 | def yolo_eval(yolo_outputs, 156 | anchors, 157 | num_classes, 158 | image_shape, 159 | max_boxes=20, 160 | score_threshold=.6, 161 | iou_threshold=.5): 162 | """Evaluate YOLO model on given input and return filtered boxes.""" 163 | num_layers = len(yolo_outputs) 164 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting 165 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32 166 | boxes = [] 167 | box_scores = [] 168 | for l in range(num_layers): 169 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], 170 | anchors[anchor_mask[l]], num_classes, input_shape, image_shape) 171 | boxes.append(_boxes) 172 | box_scores.append(_box_scores) 173 | boxes = K.concatenate(boxes, axis=0) 174 | box_scores = K.concatenate(box_scores, axis=0) 175 | 176 | mask = box_scores >= score_threshold 177 | max_boxes_tensor = K.constant(max_boxes, dtype='int32') 178 | boxes_ = [] 179 | scores_ = [] 180 | classes_ = [] 181 | for c in range(num_classes): 182 | # TODO: use keras backend instead of tf. 183 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 184 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 185 | # nms_index = tf.image.non_max_suppression( 186 | # class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) 187 | # class_boxes = K.gather(class_boxes, nms_index) 188 | # class_box_scores = K.gather(class_box_scores, nms_index) 189 | classes = K.ones_like(class_box_scores, 'int32') * c 190 | boxes_.append(class_boxes) 191 | scores_.append(class_box_scores) 192 | classes_.append(classes) 193 | boxes_ = K.concatenate(boxes_, axis=0) 194 | scores_ = K.concatenate(scores_, axis=0) 195 | classes_ = K.concatenate(classes_, axis=0) 196 | 197 | return boxes_, scores_, classes_ 198 | 199 | 200 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 201 | '''Preprocess true boxes to training input format 202 | 203 | Parameters 204 | ---------- 205 | true_boxes: array, shape=(m, T, 5) 206 | Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. 207 | input_shape: array-like, hw, multiples of 32 208 | anchors: array, shape=(N, 2), wh 209 | num_classes: integer 210 | 211 | Returns 212 | ------- 213 | y_true: list of array, shape like yolo_outputs, xywh are reletive value 214 | 215 | ''' 216 | assert (true_boxes[..., 4]0 237 | 238 | for b in range(m): 239 | # Discard zero rows. 240 | wh = boxes_wh[b, valid_mask[b]] 241 | if len(wh)==0: continue 242 | # Expand dim to apply broadcasting. 243 | wh = np.expand_dims(wh, -2) 244 | box_maxes = wh / 2. 245 | box_mins = -box_maxes 246 | 247 | intersect_mins = np.maximum(box_mins, anchor_mins) 248 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 249 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 250 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 251 | box_area = wh[..., 0] * wh[..., 1] 252 | anchor_area = anchors[..., 0] * anchors[..., 1] 253 | iou = intersect_area / (box_area + anchor_area - intersect_area) 254 | 255 | # Find best anchor for each true box 256 | best_anchor = np.argmax(iou, axis=-1) 257 | 258 | for t, n in enumerate(best_anchor): 259 | for l in range(num_layers): 260 | if n in anchor_mask[l]: 261 | i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') 262 | j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') 263 | k = anchor_mask[l].index(n) 264 | c = true_boxes[b,t, 4].astype('int32') 265 | y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] 266 | y_true[l][b, j, i, k, 4] = 1 267 | y_true[l][b, j, i, k, 5+c] = 1 268 | 269 | return y_true 270 | 271 | 272 | def box_iou(b1, b2): 273 | '''Return iou tensor 274 | 275 | Parameters 276 | ---------- 277 | b1: tensor, shape=(i1,...,iN, 4), xywh 278 | b2: tensor, shape=(j, 4), xywh 279 | 280 | Returns 281 | ------- 282 | iou: tensor, shape=(i1,...,iN, j) 283 | 284 | ''' 285 | 286 | # Expand dim to apply broadcasting. 287 | b1 = K.expand_dims(b1, -2) 288 | b1_xy = b1[..., :2] 289 | b1_wh = b1[..., 2:4] 290 | b1_wh_half = b1_wh/2. 291 | b1_mins = b1_xy - b1_wh_half 292 | b1_maxes = b1_xy + b1_wh_half 293 | 294 | # Expand dim to apply broadcasting. 295 | b2 = K.expand_dims(b2, 0) 296 | b2_xy = b2[..., :2] 297 | b2_wh = b2[..., 2:4] 298 | b2_wh_half = b2_wh/2. 299 | b2_mins = b2_xy - b2_wh_half 300 | b2_maxes = b2_xy + b2_wh_half 301 | 302 | intersect_mins = K.maximum(b1_mins, b2_mins) 303 | intersect_maxes = K.minimum(b1_maxes, b2_maxes) 304 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 305 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 306 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 307 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 308 | iou = intersect_area / (b1_area + b2_area - intersect_area) 309 | 310 | return iou 311 | 312 | 313 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): 314 | '''Return yolo_loss tensor 315 | 316 | Parameters 317 | ---------- 318 | yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body 319 | y_true: list of array, the output of preprocess_true_boxes 320 | anchors: array, shape=(N, 2), wh 321 | num_classes: integer 322 | ignore_thresh: float, the iou threshold whether to ignore object confidence loss 323 | 324 | Returns 325 | ------- 326 | loss: tensor, shape=(1,) 327 | 328 | ''' 329 | num_layers = len(anchors)//3 # default setting 330 | yolo_outputs = args[:num_layers] 331 | y_true = args[num_layers:] 332 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] 333 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) 334 | grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] 335 | loss = 0 336 | m = K.shape(yolo_outputs[0])[0] # batch size, tensor 337 | mf = K.cast(m, K.dtype(yolo_outputs[0])) 338 | 339 | for l in range(num_layers): 340 | object_mask = y_true[l][..., 4:5] 341 | true_class_probs = y_true[l][..., 5:] 342 | 343 | grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], 344 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) 345 | pred_box = K.concatenate([pred_xy, pred_wh]) 346 | 347 | # Darknet raw box to calculate loss. 348 | raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid 349 | raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) 350 | raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf 351 | box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] 352 | 353 | # Find ignore mask, iterate over each of batch. 354 | ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) 355 | object_mask_bool = K.cast(object_mask, 'bool') 356 | def loop_body(b, ignore_mask): 357 | true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) 358 | iou = box_iou(pred_box[b], true_box) 359 | best_iou = K.max(iou, axis=-1) 360 | ignore_mask = ignore_mask.write(b, K.cast(best_iou= len(bboxes): 71 | break 72 | 73 | i = bboxes[n] 74 | if (i[2]-i[0]) * (i[3]-i[1]) < 120: 75 | del bboxes[n] 76 | del category_id[n] 77 | if len(bboxes) == 0: 78 | return None, None 79 | 80 | augmented['bboxes'] = bboxes 81 | augmented['category_id'] = category_id 82 | # 随机剪裁 83 | else: 84 | annotations = {'image': image, 85 | 'bboxes':bboxes, 86 | 'category_id': category_id} 87 | aug = get_aug([RandomCrop(p=1, height=nh, width=nw)], 88 | min_visibility=0.3) 89 | augmented = aug(**annotations) 90 | if len(augmented['bboxes']) == 0: 91 | return None, None 92 | 93 | # 图像轻度增强 94 | if rand() < .5: 95 | aug = get_aug([ A.Compose([ 96 | A.RandomBrightness(p=1), 97 | A.RandomContrast(p=1), 98 | A.RandomGamma(p=1), 99 | A.CLAHE(p=1), 100 | ], p=1)]) 101 | augmented = aug(**augmented) 102 | 103 | # 水平和垂直翻转 104 | if rand() < .5: 105 | aug = get_aug([VerticalFlip(p=1)]) 106 | augmented = aug(**augmented) 107 | if rand() < .5: 108 | aug = get_aug([HorizontalFlip(p=1)]) 109 | augmented = aug(**augmented) 110 | else: 111 | # 测试集直接缩放 112 | annotations = {'image': image, 113 | 'bboxes':bboxes, 114 | 'category_id': category_id} 115 | aug = get_aug([Resize(p=1, height=nh, width=nw)]) 116 | augmented = aug(**annotations) 117 | 118 | # 标签数组和候选框数组合并 119 | image = augmented['image'] 120 | box = augmented['bboxes'] 121 | category_id = augmented['category_id'] 122 | for n in range(len(box)): 123 | box[n] = list(map(lambda x : round(x), box[n])) 124 | box[n].append(category_id[n]) 125 | 126 | # 用灰色像素块来做背景扩充图片满足输入尺寸需求 127 | dx = (w-nw) // 2 128 | dy = (h-nh) // 2 129 | image = np.pad(image, ((dy, dy), (dx, dx), (0, 0)), 130 | 'constant', constant_values=128) 131 | if tuple(image.shape[:2]) != input_shape: 132 | image = np.pad(image, ((0, input_shape[0]-image.shape[0]), 133 | (0, input_shape[1]-image.shape[1]), (0, 0)), 134 | 'constant', constant_values=128) 135 | 136 | # 模型输入格式处理 137 | image = image / 255. 138 | box_data = np.zeros((max_boxes,5)) 139 | box = np.array([np.array(box[i]) for i in range(len(box))]) 140 | box[:, [0,2]] = box[:, [0,2]] + dx 141 | box[:, [1,3]] = box[:, [1,3]] + dy 142 | box_data[:len(box)] = box 143 | 144 | return image, box_data 145 | -------------------------------------------------------------------------------- /yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .5 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .5 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .5 787 | truth_thresh = 1 788 | random=1 789 | 790 | --------------------------------------------------------------------------------