├── README.md ├── code ├── DataAugmentForTrain.py ├── DataAugmentForValid.py ├── del_copy_for_train.py ├── extract_xml.py ├── main.py ├── merge.py ├── split_good_bad.py └── xml_helper.py ├── data └── 存放原始数据 ├── submit └── 最终提交文件 └── version.txt /README.md: -------------------------------------------------------------------------------- 1 | ## 比赛信息 2 | - 本次大赛要求选手开发算法模型,通过布样影像,基于对布样中疵点形态、长度、面积以及所处位置等的分析,判断瑕疵的种类 。通过探索布样疵点精确智能诊断的优秀算法,提升布样疵点检验的准确度,降低对大量人工的依赖,提升布样疵点质检的效果和效率。 3 | - [比赛链接](https://tianchi.aliyun.com/competition/information.htm?spm=5176.100067.5678.2.70731756uJzvoZ&raceId=231666) 4 | 5 | ## 文件说明 6 | - code : 存放所有相关代码的文件夹 7 | - main.py : 主函数, 运行该函数进行模型的训练及预测,可以得到最终结果 8 | - split_good_bad.py : 将原始文件按照有无瑕疵分成good和bad两个文件夹 9 | - extract_xml.py : 将所有xml文件提取出来放在xml文件夹下面 10 | - DataAugmentForTrain.py : 对训练数据进行线下增强 11 | - DataAugmentForValid.py : 将增强后的数据作为validation 12 | - del_copy_for_train.py : 讲增强后的数据及原始数据全都copy到data_for_train文件夹中用于训练 13 | - merge.py : 融合最终的多个结果 14 | - data : 存放原始数据文件,[官方数据下载地址](https://tianchi.aliyun.com/competition/information.htm?spm=5176.11165261.5678.2.164f419dba7Pjd&raceId=231666) 15 | - submit : 存放提交文件 16 | 17 | ## 操作说明 18 | - step1 : 手动解压原始数据压缩文件,得保证解压后的文件名没有乱码!!!!!!! 19 | - step2 : **手动在keras源码中修改插值方式,keras默认插值resize会出现波纹**: 20 | - 在~/anaconda2/lib/python2.7/site-packages/keras_preprocessing/imge.py的第33行后面加入 'antialias': pil_image.ANTIALIAS 21 | - 变成: 22 | ''' 23 | _PIL_INTERPOLATION_METHODS = { 24 | 'nearest': pil_image.NEAREST, 25 | 'bilinear': pil_image.BILINEAR, 26 | 'bicubic': pil_image.BICUBIC, 27 | 'antialias': pil_image.ANTIALIAS, #added by mao 28 | } 29 | ''' 30 | - step3 : 运行main.py 31 | 32 | ## 思路说明 33 | - 本次比赛我们团队分两个支路进行,我负责的这块使用的是keras,队友负责的那块使用的是pytorch, 所以会有两个模型训练预测part1和part2,这个在main.py里面有注释 34 | - part1部分: 35 | - 数据增强线下:每张图片扩充到两张,加入了裁剪,改变亮度,加噪声,cutout等方式,当然增强的时候利用了xml文件的信息,保证了框也随之变化. 36 | - 数据增强线上:使用keras内置的增强方法,开启了旋转,镜像,shear等 37 | - 模型:densenet 38 | - 修改loss, [参考](https://spaces.ac.cn/archives/4493) 39 | - part2部分: 40 | - 数据增强:只有线上,使用pytorch内置的增强方式 41 | - 模型:resnet152 42 | - 单模92左右,3个densenet模型融合线上能达到93.8%左右 43 | - 比赛最终用的是3个densenet和1个resnet(队友pytorch训练)出来的结果进行融合,达到了**线上94.9%,9/2403**的成绩 44 | - **注意,模型初始化用的是imagenet预训练权值,在开始模型训练前会自行下载** 45 | 46 | ## 随机性 47 | - 线下线上增强是随机增强的,这会有一个随机性,结果可能会在线上最好成绩附近波动 48 | 49 | ## 另 50 | - 针对目标检测的数据增强,数据预处理等相关脚本见:[Data_Preprocess_For_CV](https://github.com/maozezhong/Data_Preprocess_For_CV) 51 | - part2部分为队友用pytorch,finetune resnet152, 在线镜像增强,输入resize到800,原始数据,训练得到。 52 | - 上传代码未包括队友的部分 53 | - 如果想达到949的精确度,可以在main.py代码里面将模型改为resnet152再训练一个模型,融合一下 54 | - 感觉再多训练几个模型融合线上还能提高 = = 55 | - [复赛代码(已更新)](https://github.com/maozezhong/TIANCHI_XUELANG_AI_2) 56 | - 比赛数据 57 | - [初赛part1](https://pan.baidu.com/s/1KoZcXKCCaWLWfGc5Q4gCjg), 密码: 2qdn 58 | - [初赛part2](https://pan.baidu.com/s/1c0o7WKm-ETPcIyF6JPS3Wg), 密码: jq9a 59 | - [复赛](https://pan.baidu.com/s/1wuA0VT7E7SBtkrvarfPCcw), 密码: vyj9 60 | -------------------------------------------------------------------------------- /code/DataAugmentForTrain.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | ############################################################## 3 | # description: 4 | # data augmentation for obeject detection 5 | # author: 6 | # maozezhong 2018-6-27 7 | ############################################################## 8 | 9 | # 包括: 10 | # 1. 裁剪(需改变bbox) 11 | # 2. 平移(需改变bbox) 12 | # 3. 改变亮度 13 | # 4. 加噪声 14 | # 5. 旋转角度(需要改变bbox) 15 | # 6. 镜像(需要改变bbox) 16 | # 7. cutout 17 | # 注意: 18 | # random.seed(),相同的seed,产生的随机数是一样的!! 19 | 20 | import time 21 | import random 22 | import cv2 23 | import os 24 | import math 25 | import numpy as np 26 | from skimage.util import random_noise 27 | from skimage import exposure 28 | 29 | def show_pic(img, bboxes=None): 30 | ''' 31 | 输入: 32 | img:图像array 33 | bboxes:图像的所有boudning box list, 格式为[[x_min, y_min, x_max, y_max]....] 34 | names:每个box对应的名称 35 | ''' 36 | cv2.imwrite('./1.jpg', img) 37 | img = cv2.imread('./1.jpg') 38 | for i in range(len(bboxes)): 39 | bbox = bboxes[i] 40 | x_min = bbox[0] 41 | y_min = bbox[1] 42 | x_max = bbox[2] 43 | y_max = bbox[3] 44 | cv2.rectangle(img,(int(x_min),int(y_min)),(int(x_max),int(y_max)),(0,255,0),3) 45 | cv2.namedWindow('pic', 0) # 1表示原图 46 | cv2.moveWindow('pic', 0, 0) 47 | cv2.resizeWindow('pic', 1200,800) # 可视化的图片大小 48 | cv2.imshow('pic', img) 49 | cv2.waitKey(0) 50 | cv2.destroyAllWindows() 51 | os.remove('./1.jpg') 52 | 53 | # 图像均为cv2读取 54 | class DataAugmentForObjectDetection(): 55 | def __init__(self, rotation_rate=0.5, max_rotation_angle=5, 56 | crop_rate=0.5, shift_rate=0.5, change_light_rate=0.5, 57 | add_noise_rate=0.5, flip_rate=0.5, cutout_rate=0.5): 58 | self.rotation_rate = rotation_rate 59 | self.max_rotation_angle = max_rotation_angle 60 | self.crop_rate = crop_rate 61 | self.shift_rate = shift_rate 62 | self.change_light_rate = change_light_rate 63 | self.add_noise_rate = add_noise_rate 64 | self.flip_rate = flip_rate 65 | self.cutout_rate = cutout_rate 66 | 67 | # 加噪声 68 | def _addNoise(self, img): 69 | ''' 70 | 输入: 71 | img:图像array 72 | 输出: 73 | 加噪声后的图像array,由于输出的像素是在[0,1]之间,所以得乘以255 74 | ''' 75 | # random.seed(int(time.time())) 76 | # return random_noise(img, mode='gaussian', seed=int(time.time()), clip=True)*255 77 | return random_noise(img, mode='gaussian', clip=True)*255 78 | 79 | 80 | # 调整亮度 81 | def _changeLight(self, img): 82 | # random.seed(int(time.time())) 83 | flag = random.uniform(0.5, 1.5) #flag>1为调暗,小于1为调亮 84 | return exposure.adjust_gamma(img, flag) 85 | 86 | # cutout 87 | def _cutout(self, img, bboxes, length=100, n_holes=1, threshold=0.5): 88 | ''' 89 | 原版本:https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py 90 | Randomly mask out one or more patches from an image. 91 | Args: 92 | img : a 3D numpy array,(h,w,c) 93 | bboxes : 框的坐标 94 | n_holes (int): Number of patches to cut out of each image. 95 | length (int): The length (in pixels) of each square patch. 96 | ''' 97 | 98 | def cal_iou(boxA, boxB): 99 | ''' 100 | boxA, boxB为两个框,返回iou 101 | boxB为bouding box 102 | ''' 103 | 104 | # determine the (x, y)-coordinates of the intersection rectangle 105 | xA = max(boxA[0], boxB[0]) 106 | yA = max(boxA[1], boxB[1]) 107 | xB = min(boxA[2], boxB[2]) 108 | yB = min(boxA[3], boxB[3]) 109 | 110 | if xB <= xA or yB <= yA: 111 | return 0.0 112 | 113 | # compute the area of intersection rectangle 114 | interArea = (xB - xA + 1) * (yB - yA + 1) 115 | 116 | # compute the area of both the prediction and ground-truth 117 | # rectangles 118 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) 119 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) 120 | 121 | # compute the intersection over union by taking the intersection 122 | # area and dividing it by the sum of prediction + ground-truth 123 | # areas - the interesection area 124 | # iou = interArea / float(boxAArea + boxBArea - interArea) 125 | iou = interArea / float(boxBArea) 126 | 127 | # return the intersection over union value 128 | return iou 129 | 130 | # 得到h和w 131 | if img.ndim == 3: 132 | h,w,c = img.shape 133 | else: 134 | _,h,w,c = img.shape 135 | 136 | mask = np.ones((h,w,c), np.float32) 137 | 138 | for n in range(n_holes): 139 | 140 | chongdie = True #看切割的区域是否与box重叠太多 141 | 142 | while chongdie: 143 | y = np.random.randint(h) 144 | x = np.random.randint(w) 145 | 146 | y1 = np.clip(y - length // 2, 0, h) #numpy.clip(a, a_min, a_max, out=None), clip这个函数将将数组中的元素限制在a_min, a_max之间,大于a_max的就使得它等于 a_max,小于a_min,的就使得它等于a_min 147 | y2 = np.clip(y + length // 2, 0, h) 148 | x1 = np.clip(x - length // 2, 0, w) 149 | x2 = np.clip(x + length // 2, 0, w) 150 | 151 | chongdie = False 152 | for box in bboxes: 153 | if cal_iou([x1,y1,x2,y2], box) > threshold: 154 | chongdie = True 155 | break 156 | 157 | mask[y1: y2, x1: x2, :] = 0. 158 | 159 | # mask = np.expand_dims(mask, axis=0) 160 | img = img * mask 161 | 162 | return img 163 | 164 | # 旋转 165 | def _rotate_img_bbox(self, img, bboxes, angle=5, scale=1.): 166 | ''' 167 | 参考:https://blog.csdn.net/u014540717/article/details/53301195crop_rate 168 | 输入: 169 | img:图像array,(h,w,c) 170 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 171 | angle:旋转角度 172 | scale:默认1 173 | 输出: 174 | rot_img:旋转后的图像array 175 | rot_bboxes:旋转后的boundingbox坐标list 176 | ''' 177 | #---------------------- 旋转图像 ---------------------- 178 | w = img.shape[1] 179 | h = img.shape[0] 180 | # 角度变弧度 181 | rangle = np.deg2rad(angle) # angle in radians 182 | # now calculate new image width and height 183 | nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale 184 | nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale 185 | # ask OpenCV for the rotation matrix 186 | rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale) 187 | # calculate the move from the old center to the new center combined 188 | # with the rotation 189 | rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0])) 190 | # the move only affects the translation, so update the translation 191 | # part of the transform 192 | rot_mat[0,2] += rot_move[0] 193 | rot_mat[1,2] += rot_move[1] 194 | # 仿射变换 195 | rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4) 196 | 197 | #---------------------- 矫正bbox坐标 ---------------------- 198 | # rot_mat是最终的旋转矩阵 199 | # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下 200 | rot_bboxes = list() 201 | for bbox in bboxes: 202 | xmin = bbox[0] 203 | ymin = bbox[1] 204 | xmax = bbox[2] 205 | ymax = bbox[3] 206 | point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1])) 207 | point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1])) 208 | point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1])) 209 | point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1])) 210 | # 合并np.array 211 | concat = np.vstack((point1, point2, point3, point4)) 212 | # 改变array类型 213 | concat = concat.astype(np.int32) 214 | # 得到旋转后的坐标 215 | rx, ry, rw, rh = cv2.boundingRect(concat) 216 | rx_min = rx 217 | ry_min = ry 218 | rx_max = rx+rw 219 | ry_max = ry+rh 220 | # 加入list中 221 | rot_bboxes.append([rx_min, ry_min, rx_max, ry_max]) 222 | 223 | return rot_img, rot_bboxes 224 | 225 | # 裁剪 226 | def _crop_img_bboxes(self, img, bboxes): 227 | ''' 228 | 裁剪后的图片要包含所有的框 229 | 输入: 230 | img:图像array 231 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 232 | 输出: 233 | crop_img:裁剪后的图像array 234 | crop_bboxes:裁剪后的bounding box的坐标list 235 | ''' 236 | #---------------------- 裁剪图像 ---------------------- 237 | w = img.shape[1] 238 | h = img.shape[0] 239 | x_min = w #裁剪后的包含所有目标框的最小的框 240 | x_max = 0 241 | y_min = h 242 | y_max = 0 243 | for bbox in bboxes: 244 | x_min = min(x_min, bbox[0]) 245 | y_min = min(y_min, bbox[1]) 246 | x_max = max(x_max, bbox[2]) 247 | y_max = max(y_max, bbox[3]) 248 | 249 | d_to_left = x_min #包含所有目标框的最小框到左边的距离 250 | d_to_right = w - x_max #包含所有目标框的最小框到右边的距离 251 | d_to_top = y_min #包含所有目标框的最小框到顶端的距离 252 | d_to_bottom = h - y_max #包含所有目标框的最小框到底部的距离 253 | 254 | #随机扩展这个最小框 255 | crop_x_min = int(x_min - random.uniform(0, d_to_left)) 256 | crop_y_min = int(y_min - random.uniform(0, d_to_top)) 257 | crop_x_max = int(x_max + random.uniform(0, d_to_right)) 258 | crop_y_max = int(y_max + random.uniform(0, d_to_bottom)) 259 | 260 | # 随机扩展这个最小框 , 防止别裁的太小 261 | # crop_x_min = int(x_min - random.uniform(d_to_left//2, d_to_left)) 262 | # crop_y_min = int(y_min - random.uniform(d_to_top//2, d_to_top)) 263 | # crop_x_max = int(x_max + random.uniform(d_to_right//2, d_to_right)) 264 | # crop_y_max = int(y_max + random.uniform(d_to_bottom//2, d_to_bottom)) 265 | 266 | #确保不要越界 267 | crop_x_min = max(0, crop_x_min) 268 | crop_y_min = max(0, crop_y_min) 269 | crop_x_max = min(w, crop_x_max) 270 | crop_y_max = min(h, crop_y_max) 271 | 272 | crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max] 273 | 274 | #---------------------- 裁剪boundingbox ---------------------- 275 | #裁剪后的boundingbox坐标计算 276 | crop_bboxes = list() 277 | for bbox in bboxes: 278 | crop_bboxes.append([bbox[0]-crop_x_min, bbox[1]-crop_y_min, bbox[2]-crop_x_min, bbox[3]-crop_y_min]) 279 | 280 | return crop_img, crop_bboxes 281 | 282 | # 平移 283 | def _shift_pic_bboxes(self, img, bboxes): 284 | ''' 285 | 参考:https://blog.csdn.net/sty945/article/details/79387054 286 | 平移后的图片要包含所有的框 287 | 输入: 288 | img:图像array 289 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 290 | 输出: 291 | shift_img:平移后的图像array 292 | shift_bboxes:平移后的bounding box的坐标list 293 | ''' 294 | #---------------------- 平移图像 ---------------------- 295 | w = img.shape[1] 296 | h = img.shape[0] 297 | x_min = w #裁剪后的包含所有目标框的最小的框 298 | x_max = 0 299 | y_min = h 300 | y_max = 0 301 | for bbox in bboxes: 302 | x_min = min(x_min, bbox[0]) 303 | y_min = min(y_min, bbox[1]) 304 | x_max = max(x_max, bbox[2]) 305 | y_max = max(y_max, bbox[3]) 306 | 307 | d_to_left = x_min #包含所有目标框的最大左移动距离 308 | d_to_right = w - x_max #包含所有目标框的最大右移动距离 309 | d_to_top = y_min #包含所有目标框的最大上移动距离 310 | d_to_bottom = h - y_max #包含所有目标框的最大下移动距离 311 | 312 | x = random.uniform(-(d_to_left-1) / 3, (d_to_right-1) / 3) 313 | y = random.uniform(-(d_to_top-1) / 3, (d_to_bottom-1) / 3) 314 | 315 | M = np.float32([[1, 0, x], [0, 1, y]]) #x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上 316 | shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) 317 | 318 | #---------------------- 平移boundingbox ---------------------- 319 | shift_bboxes = list() 320 | for bbox in bboxes: 321 | shift_bboxes.append([bbox[0]+x, bbox[1]+y, bbox[2]+x, bbox[3]+y]) 322 | 323 | return shift_img, shift_bboxes 324 | 325 | # 镜像 326 | def _filp_pic_bboxes(self, img, bboxes): 327 | ''' 328 | 参考:https://blog.csdn.net/jningwei/article/details/78753607 329 | 平移后的图片要包含所有的框 330 | 输入: 331 | img:图像array 332 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 333 | 输出: 334 | flip_img:平移后的图像array 335 | flip_bboxes:平移后的bounding box的坐标list 336 | ''' 337 | # ---------------------- 翻转图像 ---------------------- 338 | import copy 339 | flip_img = copy.deepcopy(img) 340 | if random.random() < 0.5: #0.5的概率水平翻转,0.5的概率垂直翻转 341 | horizon = True 342 | else: 343 | horizon = False 344 | h,w,_ = img.shape 345 | if horizon: #水平翻转 346 | flip_img = cv2.flip(flip_img, -1) 347 | else: 348 | flip_img = cv2.flip(flip_img, 0) 349 | 350 | # ---------------------- 调整boundingbox ---------------------- 351 | flip_bboxes = list() 352 | for box in bboxes: 353 | x_min = box[0] 354 | y_min = box[1] 355 | x_max = box[2] 356 | y_max = box[3] 357 | if horizon: 358 | flip_bboxes.append([w-x_max, y_min, w-x_min, y_max]) 359 | else: 360 | flip_bboxes.append([x_min, h-y_max, x_max, h-y_min]) 361 | 362 | return flip_img, flip_bboxes 363 | 364 | def dataAugment(self, img, bboxes): 365 | ''' 366 | 图像增强 367 | 输入: 368 | img:图像array 369 | bboxes:该图像的所有框坐标 370 | 输出: 371 | img:增强后的图像 372 | bboxes:增强后图片对应的box 373 | ''' 374 | change_num = 0 #改变的次数 375 | # print('------') 376 | while change_num < 1: #默认至少有一种数据增强生效 377 | if random.random() < 0.8:#self.crop_rate: #裁剪 378 | # print('裁剪') 379 | change_num += 1 380 | img, bboxes = self._crop_img_bboxes(img, bboxes) 381 | 382 | # if random.random() < self.rotation_rate: #旋转 383 | # print('旋转') 384 | # change_num += 1 385 | # # angle = random.uniform(-self.max_rotation_angle, self.max_rotation_angle) 386 | # angle = random.sample([90, 180, 270],1)[0] 387 | # scale = random.uniform(0.7, 0.8) 388 | # img, bboxes = self._rotate_img_bbox(img, bboxes, angle, scale) 389 | 390 | # if random.random() < self.shift_rate: #平移 391 | # # print('平移') 392 | # change_num += 1 393 | # img, bboxes = self._shift_pic_bboxes(img, bboxes) 394 | 395 | if random.random() < self.change_light_rate: #改变亮度 396 | # print('亮度') 397 | change_num += 1 398 | img = self._changeLight(img) 399 | 400 | if random.random() < self.add_noise_rate: #加噪声 401 | # print('加噪声') 402 | change_num += 1 403 | img = self._addNoise(img) 404 | 405 | if random.random() < self.cutout_rate: #cutout 406 | # print('cutout') 407 | change_num += 1 408 | img = self._cutout(img, bboxes, length=200, n_holes=3, threshold=0.5) 409 | 410 | # if random.random() < self.flip_rate: #翻转 411 | # print('翻转') 412 | # change_num += 1 413 | # img, bboxes = self._filp_pic_bboxes(img, bboxes) 414 | # print('\n') 415 | # print('------') 416 | return img, bboxes 417 | 418 | 419 | if __name__ == '__main__': 420 | import shutil 421 | from xml_helper import * 422 | from tqdm import tqdm 423 | 424 | dataAug = DataAugmentForObjectDetection() 425 | 426 | source_pic_root_path = '../data/data_split' 427 | source_xml_root_path = '../data/xml' 428 | target_pic_root_path = '../data/data_augment' 429 | if os.path.exists(target_pic_root_path): 430 | shutil.rmtree(target_pic_root_path) 431 | os.makedirs(target_pic_root_path) 432 | 433 | for parent, _, files in os.walk(source_pic_root_path): 434 | for file in tqdm(files): 435 | if parent.split('/')[-1] == 'bad': 436 | need_aug_num = 2 #每张图片需要增强多少次 437 | # print('bad') 438 | if parent.split('/')[-1] == 'good': 439 | need_aug_num = 2 #每张图片需要增强多少次 440 | # print('good') 441 | auged_num = 0 #已经增强的次数计数 442 | while auged_num < need_aug_num: 443 | auged_num += 1 444 | pic_path = os.path.join(parent, file) 445 | xml_path = os.path.join(source_xml_root_path, file[:-4]+'.xml') 446 | 447 | img = cv2.imread(pic_path) 448 | h,w,_ = img.shape 449 | 450 | if os.path.exists(xml_path): 451 | coords = parse_xml(xml_path) #解析得到box信息,格式为[[x_min,y_min,x_max,y_max,name]] 452 | coords = [coord[:4] for coord in coords] 453 | else: 454 | coords = [[0,0,w,h]] #如果是没有框的图片,即无瑕疵的布匹图片,就给它一个整体图像的框,即不进行crop了 455 | 456 | # show_pic(img, coords) # 原图 457 | 458 | auged_img, auged_bboxes = dataAug.dataAugment(img, coords) 459 | 460 | # show_pic(auged_img, auged_bboxes) # 强化后的图 461 | 462 | temp_target_pic_root_path = os.path.join(target_pic_root_path, parent.split('/')[-1]) 463 | if not os.path.exists(temp_target_pic_root_path): 464 | os.mkdir(temp_target_pic_root_path) 465 | target_pic_path = os.path.join(temp_target_pic_root_path, file[:-4]+'_aug'+str(auged_num)+'.jpg') 466 | cv2.imwrite(target_pic_path, auged_img) #写入增强图片 467 | 468 | 469 | 470 | -------------------------------------------------------------------------------- /code/DataAugmentForValid.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | ############################################################## 3 | # description: 4 | # data augmentation for obeject detection 5 | # author: 6 | # maozezhong 2018-6-27 7 | ############################################################## 8 | 9 | # 包括: 10 | # 1. 裁剪(需改变bbox) 11 | # 2. 平移(需改变bbox) 12 | # 3. 改变亮度 13 | # 4. 加噪声 14 | # 5. 旋转角度(需要改变bbox) 15 | # 6. 镜像(需要改变bbox) 16 | # 注意: 17 | # random.seed(),相同的seed,产生的随机数是一样的!! 18 | 19 | import time 20 | import random 21 | import cv2 22 | import os 23 | import math 24 | import numpy as np 25 | from skimage.util import random_noise 26 | from skimage import exposure 27 | 28 | def show_pic(img, bboxes=None): 29 | ''' 30 | 输入: 31 | img:图像array 32 | bboxes:图像的所有boudning box list, 格式为[[x_min, y_min, x_max, y_max]....] 33 | names:每个box对应的名称 34 | ''' 35 | cv2.imwrite('./1.jpg', img) 36 | img = cv2.imread('./1.jpg') 37 | for i in range(len(bboxes)): 38 | bbox = bboxes[i] 39 | x_min = bbox[0] 40 | y_min = bbox[1] 41 | x_max = bbox[2] 42 | y_max = bbox[3] 43 | cv2.rectangle(img,(int(x_min),int(y_min)),(int(x_max),int(y_max)),(0,255,0),3) 44 | cv2.namedWindow('pic', 0) # 1表示原图 45 | cv2.moveWindow('pic', 0, 0) 46 | cv2.resizeWindow('pic', 1200,800) # 可视化的图片大小 47 | cv2.imshow('pic', img) 48 | cv2.waitKey(0) 49 | cv2.destroyAllWindows() 50 | os.remove('./1.jpg') 51 | 52 | # 图像均为cv2读取 53 | class DataAugmentForObjectDetection(): 54 | def __init__(self, rotation_rate=0.5, max_rotation_angle=5, 55 | crop_rate=0.5, shift_rate=0.5, change_light_rate=0.5, 56 | add_noise_rate=0.5, flip_rate=0.5): 57 | self.rotation_rate = rotation_rate 58 | self.max_rotation_angle = max_rotation_angle 59 | self.crop_rate = crop_rate 60 | self.shift_rate = shift_rate 61 | self.change_light_rate = change_light_rate 62 | self.add_noise_rate = add_noise_rate 63 | self.flip_rate = flip_rate 64 | 65 | # 加噪声 66 | def _addNoise(self, img): 67 | ''' 68 | 输入: 69 | img:图像array 70 | 输出: 71 | 加噪声后的图像array,由于输出的像素是在[0,1]之间,所以得乘以255 72 | ''' 73 | # random.seed(int(time.time())) 74 | # return random_noise(img, mode='gaussian', seed=int(time.time()), clip=True)*255 75 | return random_noise(img, mode='gaussian', clip=True)*255 76 | 77 | 78 | # 调整亮度 79 | def _changeLight(self, img): 80 | # random.seed(int(time.time())) 81 | flag = random.uniform(0.5, 1.5) #flag>1为调暗,小于1为调亮 82 | return exposure.adjust_gamma(img, flag) 83 | 84 | # 旋转 85 | def _rotate_img_bbox(self, img, bboxes, angle=5, scale=1.): 86 | ''' 87 | 参考:https://blog.csdn.net/u014540717/article/details/53301195crop_rate 88 | 输入: 89 | img:图像array,(h,w,c) 90 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 91 | angle:旋转角度 92 | scale:默认1 93 | 输出: 94 | rot_img:旋转后的图像array 95 | rot_bboxes:旋转后的boundingbox坐标list 96 | ''' 97 | #---------------------- 旋转图像 ---------------------- 98 | w = img.shape[1] 99 | h = img.shape[0] 100 | # 角度变弧度 101 | rangle = np.deg2rad(angle) # angle in radians 102 | # now calculate new image width and height 103 | nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale 104 | nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale 105 | # ask OpenCV for the rotation matrix 106 | rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale) 107 | # calculate the move from the old center to the new center combined 108 | # with the rotation 109 | rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0])) 110 | # the move only affects the translation, so update the translation 111 | # part of the transform 112 | rot_mat[0,2] += rot_move[0] 113 | rot_mat[1,2] += rot_move[1] 114 | # 仿射变换 115 | rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4) 116 | 117 | #---------------------- 矫正bbox坐标 ---------------------- 118 | # rot_mat是最终的旋转矩阵 119 | # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下 120 | rot_bboxes = list() 121 | for bbox in bboxes: 122 | xmin = bbox[0] 123 | ymin = bbox[1] 124 | xmax = bbox[2] 125 | ymax = bbox[3] 126 | point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1])) 127 | point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1])) 128 | point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1])) 129 | point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1])) 130 | # 合并np.array 131 | concat = np.vstack((point1, point2, point3, point4)) 132 | # 改变array类型 133 | concat = concat.astype(np.int32) 134 | # 得到旋转后的坐标 135 | rx, ry, rw, rh = cv2.boundingRect(concat) 136 | rx_min = rx 137 | ry_min = ry 138 | rx_max = rx+rw 139 | ry_max = ry+rh 140 | # 加入list中 141 | rot_bboxes.append([rx_min, ry_min, rx_max, ry_max]) 142 | 143 | return rot_img, rot_bboxes 144 | 145 | # 裁剪 146 | def _crop_img_bboxes(self, img, bboxes): 147 | ''' 148 | 裁剪后的图片要包含所有的框 149 | 输入: 150 | img:图像array 151 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 152 | 输出: 153 | crop_img:裁剪后的图像array 154 | crop_bboxes:裁剪后的bounding box的坐标list 155 | ''' 156 | #---------------------- 裁剪图像 ---------------------- 157 | w = img.shape[1] 158 | h = img.shape[0] 159 | x_min = w #裁剪后的包含所有目标框的最小的框 160 | x_max = 0 161 | y_min = h 162 | y_max = 0 163 | for bbox in bboxes: 164 | x_min = min(x_min, bbox[0]) 165 | y_min = min(y_min, bbox[1]) 166 | x_max = max(x_max, bbox[2]) 167 | y_max = max(y_max, bbox[3]) 168 | 169 | d_to_left = x_min #包含所有目标框的最小框到左边的距离 170 | d_to_right = w - x_max #包含所有目标框的最小框到右边的距离 171 | d_to_top = y_min #包含所有目标框的最小框到顶端的距离 172 | d_to_bottom = h - y_max #包含所有目标框的最小框到底部的距离 173 | 174 | #随机扩展这个最小框 175 | # crop_x_min = int(x_min - random.uniform(0, d_to_left)) 176 | # crop_y_min = int(y_min - random.uniform(0, d_to_top)) 177 | # crop_x_max = int(x_max + random.uniform(0, d_to_right)) 178 | # crop_y_max = int(y_max + random.uniform(0, d_to_bottom)) 179 | 180 | # 随机扩展这个最小框 , 防止别裁的太小 181 | crop_x_min = int(x_min - random.uniform(d_to_left//2, d_to_left)) 182 | crop_y_min = int(y_min - random.uniform(d_to_top//2, d_to_top)) 183 | crop_x_max = int(x_max + random.uniform(d_to_right//2, d_to_right)) 184 | crop_y_max = int(y_max + random.uniform(d_to_bottom//2, d_to_bottom)) 185 | 186 | #确保不要越界 187 | crop_x_min = max(0, crop_x_min) 188 | crop_y_min = max(0, crop_y_min) 189 | crop_x_max = min(w, crop_x_max) 190 | crop_y_max = min(h, crop_y_max) 191 | 192 | crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max] 193 | 194 | #---------------------- 裁剪boundingbox ---------------------- 195 | #裁剪后的boundingbox坐标计算 196 | crop_bboxes = list() 197 | for bbox in bboxes: 198 | crop_bboxes.append([bbox[0]-crop_x_min, bbox[1]-crop_y_min, bbox[2]-crop_x_min, bbox[3]-crop_y_min]) 199 | 200 | return crop_img, crop_bboxes 201 | 202 | # 平移 203 | def _shift_pic_bboxes(self, img, bboxes): 204 | ''' 205 | 参考:https://blog.csdn.net/sty945/article/details/79387054 206 | 平移后的图片要包含所有的框 207 | 输入: 208 | img:图像array 209 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 210 | 输出: 211 | shift_img:平移后的图像array 212 | shift_bboxes:平移后的bounding box的坐标list 213 | ''' 214 | #---------------------- 平移图像 ---------------------- 215 | w = img.shape[1] 216 | h = img.shape[0] 217 | x_min = w #裁剪后的包含所有目标框的最小的框 218 | x_max = 0 219 | y_min = h 220 | y_max = 0 221 | for bbox in bboxes: 222 | x_min = min(x_min, bbox[0]) 223 | y_min = min(y_min, bbox[1]) 224 | x_max = max(x_max, bbox[2]) 225 | y_max = max(y_max, bbox[3]) 226 | 227 | d_to_left = x_min #包含所有目标框的最大左移动距离 228 | d_to_right = w - x_max #包含所有目标框的最大右移动距离 229 | d_to_top = y_min #包含所有目标框的最大上移动距离 230 | d_to_bottom = h - y_max #包含所有目标框的最大下移动距离 231 | 232 | x = random.uniform(-(d_to_left-1) / 3, (d_to_right-1) / 3) 233 | y = random.uniform(-(d_to_top-1) / 3, (d_to_bottom-1) / 3) 234 | 235 | M = np.float32([[1, 0, x], [0, 1, y]]) #x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上 236 | shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) 237 | 238 | #---------------------- 平移boundingbox ---------------------- 239 | shift_bboxes = list() 240 | for bbox in bboxes: 241 | shift_bboxes.append([bbox[0]+x, bbox[1]+y, bbox[2]+x, bbox[3]+y]) 242 | 243 | return shift_img, shift_bboxes 244 | 245 | # 镜像 246 | def _filp_pic_bboxes(self, img, bboxes): 247 | ''' 248 | 参考:https://blog.csdn.net/jningwei/article/details/78753607 249 | 平移后的图片要包含所有的框 250 | 输入: 251 | img:图像array 252 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 253 | 输出: 254 | flip_img:平移后的图像array 255 | flip_bboxes:平移后的bounding box的坐标list 256 | ''' 257 | # ---------------------- 翻转图像 ---------------------- 258 | import copy 259 | flip_img = copy.deepcopy(img) 260 | if random.random() < 0.5: #0.5的概率水平翻转,0.5的概率垂直翻转 261 | horizon = True 262 | else: 263 | horizon = False 264 | h,w,_ = img.shape 265 | if horizon: #水平翻转 266 | flip_img = cv2.flip(flip_img, -1) 267 | else: 268 | flip_img = cv2.flip(flip_img, 0) 269 | 270 | # ---------------------- 调整boundingbox ---------------------- 271 | flip_bboxes = list() 272 | for box in bboxes: 273 | x_min = box[0] 274 | y_min = box[1] 275 | x_max = box[2] 276 | y_max = box[3] 277 | if horizon: 278 | flip_bboxes.append([w-x_max, y_min, w-x_min, y_max]) 279 | else: 280 | flip_bboxes.append([x_min, h-y_max, x_max, h-y_min]) 281 | 282 | return flip_img, flip_bboxes 283 | 284 | def dataAugment(self, img, bboxes): 285 | ''' 286 | 图像增强 287 | 输入: 288 | img:图像array 289 | bboxes:该图像的所有框坐标 290 | 输出: 291 | img:增强后的图像 292 | bboxes:增强后图片对应的box 293 | ''' 294 | change_num = 0 #改变的次数 295 | # print('------') 296 | while change_num < 1: #默认至少有一种数据增强生效 297 | if random.random() < 0.8:#self.crop_rate: #裁剪 298 | print('裁剪') 299 | change_num += 1 300 | img, bboxes = self._crop_img_bboxes(img, bboxes) 301 | 302 | # if random.random() > self.rotation_rate: #旋转 303 | # print('旋转') 304 | # change_num += 1 305 | # # angle = random.uniform(-self.max_rotation_angle, self.max_rotation_angle) 306 | # angle = random.sample([90, 180, 270],1)[0] 307 | # scale = random.uniform(0.7, 0.8) 308 | # img, bboxes = self._rotate_img_bbox(img, bboxes, angle, scale) 309 | 310 | # if random.random() < self.shift_rate: #平移 311 | # # print('平移') 312 | # change_num += 1 313 | # img, bboxes = self._shift_pic_bboxes(img, bboxes) 314 | 315 | if random.random() > self.change_light_rate: #改变亮度 316 | print('亮度') 317 | change_num += 1 318 | img = self._changeLight(img) 319 | 320 | if random.random() < self.add_noise_rate: #加噪声 321 | print('加噪声') 322 | change_num += 1 323 | img = self._addNoise(img) 324 | 325 | if random.random() < self.flip_rate: #翻转 326 | print('翻转') 327 | change_num += 1 328 | img, bboxes = self._filp_pic_bboxes(img, bboxes) 329 | print('\n') 330 | # print('------') 331 | return img, bboxes 332 | 333 | 334 | if __name__ == '__main__': 335 | import shutil 336 | from xml_helper import * 337 | from tqdm import tqdm 338 | 339 | dataAug = DataAugmentForObjectDetection() 340 | 341 | source_pic_root_path = '../data/data_split' 342 | source_xml_root_path = '../data/xml' 343 | target_pic_root_path = '../data/data_for_valid' 344 | if os.path.exists(target_pic_root_path): 345 | shutil.rmtree(target_pic_root_path) 346 | os.makedirs(target_pic_root_path) 347 | 348 | bad_num = 0 349 | good_num = 0 350 | bad_top_num = 500000 351 | good_top_num = 500000 352 | for parent, _, files in os.walk(source_pic_root_path): 353 | random.shuffle(files) 354 | for file in tqdm(files): 355 | if parent.split('/')[-1] == 'bad': 356 | need_aug_num = 1 #每张图片需要增强多少次 357 | if bad_num >= bad_top_num: 358 | break 359 | bad_num += need_aug_num 360 | # print('bad '+str(bad_num)) 361 | if parent.split('/')[-1] == 'good': 362 | need_aug_num = 1 #每张图片需要增强多少次 363 | if good_num >= good_top_num: 364 | break 365 | good_num += need_aug_num 366 | # print('good '+str(good_num)) 367 | auged_num = 0 #已经增强的次数计数 368 | while auged_num < need_aug_num: 369 | auged_num += 1 370 | pic_path = os.path.join(parent, file) 371 | xml_path = os.path.join(source_xml_root_path, file[:-4]+'.xml') 372 | 373 | img = cv2.imread(pic_path) 374 | h,w,_ = img.shape 375 | 376 | if os.path.exists(xml_path): #'bad'的数据才有xml 377 | coords = parse_xml(xml_path) #解析得到box信息,格式为[[x_min,y_min,x_max,y_max,name]] 378 | coords = [coord[:4] for coord in coords] 379 | else: 380 | coords = [[0,0,w,h]] #如果是没有框的图片,即无瑕疵的布匹图片,就给它一个整体图像的框,即不进行crop了 381 | 382 | # show_pic(img, coords) # 原图 383 | 384 | auged_img, auged_bboxes = dataAug.dataAugment(img, coords) 385 | 386 | # show_pic(auged_img, auged_bboxes) # 强化后的图 387 | 388 | temp_target_pic_root_path = os.path.join(target_pic_root_path, parent.split('/')[-1]) 389 | if not os.path.exists(temp_target_pic_root_path): 390 | os.mkdir(temp_target_pic_root_path) 391 | target_pic_path = os.path.join(temp_target_pic_root_path, file[:-4]+'_aug'+str(auged_num)+'.jpg') 392 | cv2.imwrite(target_pic_path, auged_img) #写入增强图片 393 | 394 | 395 | 396 | -------------------------------------------------------------------------------- /code/del_copy_for_train.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | 3 | import os 4 | import shutil 5 | 6 | source_split_bad_pic_path = '../data/data_split/bad' 7 | source_split_good_pic_path = '../data/data_split/good' 8 | source_aug_bad_pic_path = '../data/data_augment/bad' 9 | source_aug_good_pic_path = '../data/data_augment/good' 10 | target_bad_pic_path = '../data/data_for_train/bad' 11 | target_good_pic_path = '../data/data_for_train/good' 12 | if os.path.exists(target_bad_pic_path): 13 | shutil.rmtree(target_bad_pic_path) 14 | if os.path.exists(target_good_pic_path): 15 | shutil.rmtree(target_good_pic_path) 16 | os.makedirs(target_bad_pic_path) 17 | os.makedirs(target_good_pic_path) 18 | 19 | #1# 复制原始图片到目标位置 20 | # 复制图片到目标bad文件夹下 21 | for parent, _, files in os.walk(source_split_bad_pic_path): 22 | for file in files: 23 | # print(os.path.join(parent, file)) 24 | shutil.copyfile(os.path.join(source_split_bad_pic_path, file), os.path.join(target_bad_pic_path, file)) 25 | 26 | # 复制图片到目标good文件夹下 27 | for parent, _, files in os.walk(source_split_good_pic_path): 28 | for file in files: 29 | # print(os.path.join(parent, file)) 30 | shutil.copyfile(os.path.join(source_split_good_pic_path, file), os.path.join(target_good_pic_path, file)) 31 | 32 | #2# 复制增强后的图片到目标位置 33 | # 复制图片到目标bad文件夹下 34 | for parent, _, files in os.walk(source_aug_bad_pic_path): 35 | for file in files: 36 | # print(os.path.join(parent, file)) 37 | shutil.copyfile(os.path.join(source_aug_bad_pic_path, file), os.path.join(target_bad_pic_path, file)) 38 | 39 | # 复制图片到目标good文件夹下 40 | for parent, _, files in os.walk(source_aug_good_pic_path): 41 | for file in files: 42 | # print(os.path.join(parent, file)) 43 | shutil.copyfile(os.path.join(source_aug_good_pic_path, file), os.path.join(target_good_pic_path, file)) 44 | -------------------------------------------------------------------------------- /code/extract_xml.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | ''' 3 | 讲所有xml文件copy到./data/xml文件夹下 4 | ''' 5 | import os 6 | import shutil 7 | from tqdm import tqdm 8 | 9 | ori_data_root = '../data' 10 | target_xml_path = '../data/xml' 11 | if os.path.exists(target_xml_path): 12 | shutil.rmtree(target_xml_path) 13 | os.makedirs(target_xml_path) 14 | 15 | for parent, _, files in os.walk(ori_data_root): 16 | # 跳过test数据以及无瑕疵即正常的数据 17 | if 'test' in parent.split('_') or '正常' in parent.split('/') or 'xml' in parent.split('/'): 18 | continue 19 | for file in tqdm(files): 20 | file_name = os.path.join(parent, file) 21 | if file_name[-3:] == 'xml': 22 | shutil.copyfile(file_name, os.path.join(target_xml_path, file)) 23 | -------------------------------------------------------------------------------- /code/main.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | ################################################# 3 | # finetune 4 | ################################################# 5 | import pandas as pd 6 | import numpy as np 7 | import os 8 | import imageio 9 | import random 10 | import cv2 11 | 12 | from PIL import Image as pil_image 13 | from skimage.transform import resize as imresize 14 | from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping 15 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img 16 | from keras import initializers 17 | from keras.utils import plot_model 18 | from keras.models import Model 19 | from keras.layers import Input 20 | from keras.layers import Dense 21 | from keras.layers import Flatten 22 | from keras.layers import Activation 23 | from keras.layers import Dropout 24 | from keras.layers import Maximum 25 | from keras.layers import ZeroPadding2D 26 | from keras.layers.convolutional import Conv2D 27 | from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D 28 | from keras.layers.merge import concatenate 29 | from keras import regularizers 30 | from keras.layers import BatchNormalization 31 | from keras.optimizers import Adam, SGD 32 | from keras.preprocessing.image import ImageDataGenerator 33 | from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau 34 | from keras.layers.advanced_activations import LeakyReLU 35 | 36 | from keras.applications.densenet import DenseNet169 37 | from keras.applications.densenet import preprocess_input 38 | 39 | ####################################### 40 | # 在训练的时候置为1 41 | from keras import backend as K 42 | K.set_learning_phase(1) 43 | ####################################### 44 | 45 | EPOCHS = 50 46 | RANDOM_STATE = 2018 47 | learning_rate = 0.003 48 | 49 | TRAIN_DIR = '../data/data_for_train' 50 | VALID_DIR = '../data/data_for_valid' 51 | 52 | def get_callbacks(filepath, patience=2): 53 | lr_reduce = ReduceLROnPlateau(monitor='val_acc', factor=0.1, epsilon=1e-5, patience=patience, verbose=1, min_lr = 0.00001) 54 | msave = ModelCheckpoint(filepath, monitor='val_acc', save_best_only=True) #该回调函数将在每个epoch后保存模型到filepath 55 | earlystop = EarlyStopping(monitor='val_acc', min_delta=0, patience=patience*3+2, verbose=1, mode='auto') 56 | return [lr_reduce, msave, earlystop] 57 | 58 | def add_new_last_layer(base_model, nb_classes, drop_rate=0.): 59 | """Add last layer to the convnet 60 | Args: 61 | base_model: keras model excluding top 62 | nb_classes: # of classes 63 | Returns: 64 | new keras model with last layer 65 | """ 66 | x = base_model.output 67 | x = Dropout(0.5)(x) 68 | x = GlobalAveragePooling2D()(x) 69 | predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer 70 | model = Model(input=base_model.input, output=predictions) 71 | return model 72 | 73 | # 能一定程度上防止过拟合的交叉熵 74 | def mycrossentropy(e = 0.1,nb_classes=2): 75 | ''' 76 | https://spaces.ac.cn/archives/4493 77 | ''' 78 | def mycrossentropy_fixed(y_true, y_pred): 79 | return (1-e)*K.categorical_crossentropy(y_true,y_pred) + e*K.categorical_crossentropy(K.ones_like(y_pred)/nb_classes, y_pred) 80 | return mycrossentropy_fixed 81 | 82 | def get_model(): 83 | ''' 84 | 获得模型 85 | ''' 86 | 87 | base_model = DenseNet169(include_top=False, weights='imagenet', input_shape=(IN_WIDTH,INT_HEIGHT, 3)) 88 | 89 | model = add_new_last_layer(base_model, 2) 90 | model.compile(optimizer=SGD(lr=learning_rate, momentum=0.9), loss=[mycrossentropy()], metrics=['accuracy']) 91 | model.summary() 92 | 93 | return model 94 | 95 | def train_model(save_model_path, BATCH_SIZE, IN_SIZE): 96 | 97 | IN_WIDTH = IN_SIZE 98 | INT_HEIGHT = IN_SIZE 99 | 100 | callbacks = get_callbacks(filepath=save_model_path, patience=3) 101 | model = get_model() 102 | 103 | train_datagen = ImageDataGenerator( 104 | preprocessing_function = preprocess_input, 105 | horizontal_flip = True, 106 | vertical_flip = True, 107 | rotation_range=30, 108 | shear_range=0.1 109 | ) 110 | 111 | valid_datagen = ImageDataGenerator( 112 | preprocessing_function = preprocess_input 113 | ) 114 | 115 | train_generator = train_datagen.flow_from_directory( 116 | directory = TRAIN_DIR, 117 | target_size = (IN_WIDTH, INT_HEIGHT), 118 | batch_size=BATCH_SIZE, 119 | class_mode='categorical', 120 | seed=2018, 121 | interpolation='antialias', # PIL默认插值下采样的时候会模糊 122 | ) 123 | 124 | valid_generator = valid_datagen.flow_from_directory( 125 | directory = VALID_DIR, 126 | target_size = (IN_WIDTH, INT_HEIGHT), 127 | batch_size=BATCH_SIZE, 128 | class_mode='categorical', 129 | seed=2018, 130 | interpolation='antialias', # PIL默认插值下采样的时候会模糊 131 | ) 132 | 133 | model.fit_generator( 134 | train_generator, 135 | steps_per_epoch = 1*(train_generator.samples // BATCH_SIZE + 1), 136 | epochs = EPOCHS, 137 | max_queue_size = 1000, 138 | workers = 1, 139 | verbose = 1, 140 | validation_data = valid_generator, #valid_generator, 141 | validation_steps = valid_generator.samples // BATCH_SIZE, #valid_generator.samples // BATCH_SIZE + 1, #len(valid_datagen)+1, 142 | callbacks = callbacks 143 | ) 144 | 145 | def predict(weights_path, IN_SIZE): 146 | ''' 147 | 对测试数据进行预测 148 | ''' 149 | 150 | IN_WIDTH = IN_SIZE 151 | INT_HEIGHT = IN_SIZE 152 | K.set_learning_phase(0) 153 | 154 | test_pic_root_path = '../data/xuelang_round1_test_b' 155 | 156 | filename = [] 157 | probability = [] 158 | 159 | #1#得到模型 160 | model = get_model() 161 | model.load_weights(weights_path) 162 | 163 | #2#预测 164 | for parent,_,files in os.walk(test_pic_root_path): 165 | for line in files: 166 | 167 | pic_path = os.path.join(test_pic_root_path, line.strip()) 168 | 169 | ori_img = cv2.imread(pic_path) 170 | 171 | img = load_img(pic_path, target_size=(INT_HEIGHT, IN_WIDTH, 3), interpolation='antialias') 172 | img = img_to_array(img) 173 | img = preprocess_input(img) 174 | img = np.expand_dims(img, axis=0) 175 | 176 | prediction = model.predict(img)[0] 177 | index = list(prediction).index(np.max(prediction)) 178 | pro = list(prediction)[0] #bad对应的概率大小,因为bad是在index为0的位置 179 | pro = round(pro, 5) 180 | if pro == 0: 181 | pro = 0.00001 182 | if pro == 1: 183 | pro = 0.99999 184 | 185 | #存入list 186 | filename.append(line.strip()) 187 | probability.append(pro) 188 | 189 | #3#写入csv 190 | res_path = "../submit/submit_"+datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv" 191 | dataframe = pd.DataFrame({'filename': filename, 'probability': probability}) 192 | dataframe.to_csv(res_path, index=False, header=True) 193 | 194 | def main(): 195 | 196 | #~~~~~~~~~~~~~~~~~ 数据处理部分 ~~~~~~~~~~~~~~~~~ 197 | #0# 解压缩原始数据文件 198 | # 抱歉,有密码,需要手动解压 199 | 200 | #1# 数据转换 201 | ##1.1## 将图片分为good和bad两类 202 | os.system("python split_good_bad.py") 203 | ##1.2## 将所有xml文件提取到data/xml文件夹下面 204 | os.system("python extract_xml.py") 205 | 206 | #2# 线下训练数据增强 207 | os.system("python DataAugmentForTrain.py") 208 | 209 | #3# 线下增强数据作为validaiton数据 210 | os.system("python DataAugmentForValid.py") 211 | 212 | #4# 讲所有训练图片copy到 data/data_for_train文件夹下面 213 | os.system("python del_copy_for_train.py") 214 | 215 | #~~~~~~~~~~~~~~~~~ 模型训练及预测part1 ~~~~~~~~~~~~~~~~~ 216 | weights_path = './model_weight1.hdf5' 217 | # train_model(save_model_path=weights_path, BATCH_SIZE=6, IN_SIZE=500) # 线上0.9157 218 | predict(weights_path=weights_path, IN_SIZE=500) 219 | 220 | weights_path = './model_weight2.hdf5' 221 | # train_model(save_model_path=weights_path, BATCH_SIZE=4, IN_SIZE=550) # 线上0.918 222 | predict(weights_path=weights_path, IN_SIZE=550) 223 | 224 | weights_path = './model_weight3.hdf5' 225 | # train_model(save_model_path=weights_path, BATCH_SIZE=4, IN_SIZE=600) # 线上0.914 226 | predict(weights_path=weights_path, IN_SIZE=600) 227 | 228 | # #~~~~~~~~~~~~~~~~~ 模型训练及预测part2 ~~~~~~~~~~~~~~~~~ 229 | # # 注意,由于团队是分两条路做的,part2部分是用的pytorch,数据需要手动处理一下,具体见README.md 230 | # os.system("mv ../data/data_split/good ../data/data_split/0") 231 | # os.system("mv ../data/data_split/bad ../data/data_split/1") 232 | # os.system("python xuelangzsp.py") 233 | 234 | #~~~~~~~~~~~~~~~~~ 融合模型得到最终的结果 ~~~~~~~~~~~~~~~~~ 235 | os.system("python merge.py") 236 | 237 | if __name__=='__main__': 238 | main() 239 | -------------------------------------------------------------------------------- /code/merge.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | 3 | import pandas as pd 4 | import os 5 | 6 | path = '../submit' 7 | file_dict = dict() 8 | final_res_path = '../submit/res_merge.csv' 9 | cnt = 0 10 | w_list = [0.9157, 0.918, 0.914, 0.905] 11 | w_list = [w/sum(w_list) for w in w_list] 12 | for parent, _, files in os.walk(path): 13 | for file in files: 14 | data_path = os.path.join(parent, file) 15 | data = pd.read_csv(data_path) 16 | for i in range(len(data['filename'])): 17 | file_name = data['filename'][i] 18 | pro = data['probability'][i]*w_list[cnt] 19 | if file_name in file_dict.keys(): 20 | file_dict[file_name] += pro 21 | else: 22 | file_dict[file_name] = pro 23 | cnt += 1 24 | 25 | file_name_list = list() 26 | pro_list = list() 27 | for key in file_dict.keys(): 28 | file_name_list.append(key) 29 | pro = file_dict[key] 30 | pro = round(pro, 6) 31 | pro = max(0.000001, pro) 32 | pro = min(0.999999, pro) 33 | pro_list.append(pro) 34 | 35 | dataframe = pd.DataFrame({'filename': file_name_list, 'probability': pro_list}) 36 | dataframe.to_csv('../submit/res_merge.csv', index=False, header=True) -------------------------------------------------------------------------------- /code/split_good_bad.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | ''' 3 | 根据原始数据data 4 | 得到分类的数据data_split 5 | data_split文件夹下包括两个文件夹 6 | - good, 无瑕疵图片 7 | - bad, 有瑕疵图片 8 | ''' 9 | 10 | import os 11 | import shutil 12 | from tqdm import tqdm 13 | 14 | ori_data_root = '../data' 15 | target_bad_root_path = '../data/data_split/bad' #存放有瑕疵图片的根目录 16 | target_good_root_path = '../data/data_split/good' #存放无瑕疵图片的根目录 17 | if os.path.exists(target_bad_root_path): 18 | shutil.rmtree(target_bad_root_path) 19 | if os.path.exists(target_good_root_path): 20 | shutil.rmtree(target_good_root_path) 21 | os.makedirs(target_bad_root_path) 22 | os.makedirs(target_good_root_path) 23 | 24 | for parent, _, files in os.walk(ori_data_root): 25 | # 跳过测试数据 26 | if 'test' in parent.split('_') or 'data_split' in parent.split('/'): 27 | continue 28 | print(parent) 29 | for file in tqdm(files): 30 | file_name = os.path.join(parent, file) 31 | if file_name[-3:] == 'jpg': #只拷贝图片 32 | temp_name = file_name.split('/')[-2] #比如'正常' 33 | if temp_name == '正常': 34 | target_pic_path = os.path.join(target_good_root_path, file) 35 | else: 36 | target_pic_path = os.path.join(target_bad_root_path, file) 37 | shutil.copyfile(file_name, target_pic_path) -------------------------------------------------------------------------------- /code/xml_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import xml.etree.ElementTree as ET 3 | import xml.dom.minidom as DOC 4 | 5 | # 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]] 6 | def parse_xml(xml_path): 7 | ''' 8 | 输入: 9 | xml_path: xml的文件路径 10 | 输出: 11 | 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]] 12 | ''' 13 | tree = ET.parse(xml_path) 14 | root = tree.getroot() 15 | objs = root.findall('object') 16 | coords = list() 17 | for ix, obj in enumerate(objs): 18 | name = obj.find('name').text 19 | box = obj.find('bndbox') 20 | x_min = int(box[0].text) 21 | y_min = int(box[1].text) 22 | x_max = int(box[2].text) 23 | y_max = int(box[3].text) 24 | coords.append([x_min, y_min, x_max, y_max, name]) 25 | return coords 26 | 27 | #将bounding box信息写入xml文件中, bouding box格式为[[x_min, y_min, x_max, y_max, name]] 28 | def generate_xml(img_name,coords,img_size,out_root_path): 29 | ''' 30 | 输入: 31 | img_name:图片名称,如a.jpg 32 | coords:坐标list,格式为[[x_min, y_min, x_max, y_max, name]],name为概况的标注 33 | img_size:图像的大小,格式为[h,w,c] 34 | out_root_path: xml文件输出的根路径 35 | ''' 36 | doc = DOC.Document() # 创建DOM文档对象 37 | 38 | annotation = doc.createElement('annotation') 39 | doc.appendChild(annotation) 40 | 41 | title = doc.createElement('folder') 42 | title_text = doc.createTextNode('Tianchi') 43 | title.appendChild(title_text) 44 | annotation.appendChild(title) 45 | 46 | title = doc.createElement('filename') 47 | title_text = doc.createTextNode(img_name) 48 | title.appendChild(title_text) 49 | annotation.appendChild(title) 50 | 51 | source = doc.createElement('source') 52 | annotation.appendChild(source) 53 | 54 | title = doc.createElement('database') 55 | title_text = doc.createTextNode('The Tianchi Database') 56 | title.appendChild(title_text) 57 | source.appendChild(title) 58 | 59 | title = doc.createElement('annotation') 60 | title_text = doc.createTextNode('Tianchi') 61 | title.appendChild(title_text) 62 | source.appendChild(title) 63 | 64 | size = doc.createElement('size') 65 | annotation.appendChild(size) 66 | 67 | title = doc.createElement('width') 68 | title_text = doc.createTextNode(str(img_size[1])) 69 | title.appendChild(title_text) 70 | size.appendChild(title) 71 | 72 | title = doc.createElement('height') 73 | title_text = doc.createTextNode(str(img_size[0])) 74 | title.appendChild(title_text) 75 | size.appendChild(title) 76 | 77 | title = doc.createElement('depth') 78 | title_text = doc.createTextNode(str(img_size[2])) 79 | title.appendChild(title_text) 80 | size.appendChild(title) 81 | 82 | for coord in coords: 83 | 84 | object = doc.createElement('object') 85 | annotation.appendChild(object) 86 | 87 | title = doc.createElement('name') 88 | title_text = doc.createTextNode(coord[4]) 89 | title.appendChild(title_text) 90 | object.appendChild(title) 91 | 92 | pose = doc.createElement('pose') 93 | pose.appendChild(doc.createTextNode('Unspecified')) 94 | object.appendChild(pose) 95 | truncated = doc.createElement('truncated') 96 | truncated.appendChild(doc.createTextNode('1')) 97 | object.appendChild(truncated) 98 | difficult = doc.createElement('difficult') 99 | difficult.appendChild(doc.createTextNode('0')) 100 | object.appendChild(difficult) 101 | 102 | bndbox = doc.createElement('bndbox') 103 | object.appendChild(bndbox) 104 | title = doc.createElement('xmin') 105 | title_text = doc.createTextNode(str(int(float(coord[0])))) 106 | title.appendChild(title_text) 107 | bndbox.appendChild(title) 108 | title = doc.createElement('ymin') 109 | title_text = doc.createTextNode(str(int(float(coord[1])))) 110 | title.appendChild(title_text) 111 | bndbox.appendChild(title) 112 | title = doc.createElement('xmax') 113 | title_text = doc.createTextNode(str(int(float(coord[2])))) 114 | title.appendChild(title_text) 115 | bndbox.appendChild(title) 116 | title = doc.createElement('ymax') 117 | title_text = doc.createTextNode(str(int(float(coord[3])))) 118 | title.appendChild(title_text) 119 | bndbox.appendChild(title) 120 | 121 | # 将DOM对象doc写入文件 122 | f = open(os.path.jpin(out_root_path, img_name[:-4]+'.xml'),'w') 123 | f.write(doc.toprettyxml(indent = '')) 124 | f.close() -------------------------------------------------------------------------------- /data/存放原始数据: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maozezhong/TIANCHI_XUELANG_AI/cfec006032e264cc77a56776b0ed237e6d07fe48/data/存放原始数据 -------------------------------------------------------------------------------- /submit/最终提交文件: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maozezhong/TIANCHI_XUELANG_AI/cfec006032e264cc77a56776b0ed237e6d07fe48/submit/最终提交文件 -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | _tflow_180_select==1.0 2 | absl-py==0.2.2 3 | astor==0.6.2 4 | blas==1.0 5 | bleach==1.5.0 6 | ca-certificates==2018.03.07 7 | certifi==2018.4.16 8 | cloudpickle==0.5.3 9 | cudatoolkit==9.0 10 | cudnn==7.1.2 11 | cupti==9.0.176 12 | cycler==0.10.0 13 | dask-core==0.18.1 14 | dbus==1.13.2 15 | decorator==4.3.0 16 | expat==2.2.5 17 | fontconfig==2.13.0 18 | freetype==2.9.1 19 | gast==0.2.0 20 | glib==2.56.1 21 | grpcio==1.12.1 22 | gst-plugins-base==1.14.0 23 | gstreamer==1.14.0 24 | h5py==2.8.0 25 | hdf5==1.10.2 26 | html5lib==0.9999999 27 | icu==58.2 28 | imageio==2.3.0 29 | intel-openmp==2018.0.3 30 | jpeg==9b 31 | keras==2.2.0 32 | keras-applications==1.0.2 33 | keras-base==2.2.0 34 | keras-preprocessing==1.0.1 35 | kiwisolver==1.0.1 36 | libedit==3.1.20170329 37 | libffi==3.2.1 38 | libgcc-ng==7.2.0 39 | libgfortran-ng==7.2.0 40 | libpng==1.6.34 41 | libprotobuf==3.5.2 42 | libstdcxx-ng==7.2.0 43 | libtiff==4.0.9 44 | libuuid==1.0.3 45 | libxcb==1.13 46 | libxml2==2.9.8 47 | markdown==2.6.11 48 | matplotlib==2.2.2 49 | mkl==2018.0.3 50 | mkl_fft==1.0.2 51 | mkl_random==1.0.1 52 | ncurses==6.1 53 | networkx==2.1 54 | numpy==1.14.5 55 | numpy-base==1.14.5 56 | olefile==0.45.1 57 | opencv-python==3.4.1.15 58 | openssl==1.0.2o 59 | pandas==0.23.3 60 | pcre==8.42 61 | pillow==5.1.0 62 | pip==10.0.1 63 | protobuf==3.5.2 64 | pyparsing==2.2.0 65 | pyqt==5.9.2 66 | python==3.6.6 67 | python-dateutil==2.7.3 68 | pytz==2018.5 69 | pywavelets==0.5.2 70 | pyyaml==3.12 71 | qt==5.9.6 72 | readline==7.0 73 | scikit-image==0.14.0 74 | scipy==1.1.0 75 | setuptools==39.2.0 76 | sip==4.19.8 77 | six==1.11.0 78 | sqlite==3.24.0 79 | tensorboard==1.8.0 80 | tensorflow==1.8.0 81 | tensorflow-base==1.8.0 82 | tensorflow-gpu==1.8.0 83 | termcolor==1.1.0 84 | tk==8.6.7 85 | toolz==0.9.0 86 | tornado==5.0.2 87 | tqdm==4.24.0 88 | werkzeug==0.14.1 89 | wheel==0.31.1 90 | xz==5.2.4 91 | yaml==0.1.7 92 | zlib==1.2.11 93 | 94 | absl-py==0.2.0 95 | alabaster==0.7.10 96 | anaconda-client==1.6.14 97 | anaconda-navigator==1.8.3 98 | anaconda-project==0.8.2 99 | argcomplete==1.9.4 100 | asn1crypto==0.24.0 101 | astor==0.6.2 102 | astroid==1.6.1 103 | astropy==2.0.3 104 | atomicwrites==1.1.5 105 | attrdict==2.0.0 106 | attrs==17.4.0 107 | autopep8==1.3.5 108 | Babel==2.5.3 109 | backcall==0.1.0 110 | backports.shutil-get-terminal-size==1.0.0 111 | bayesian-optimization==0.6.0 112 | beautifulsoup4==4.6.0 113 | bitarray==0.8.1 114 | bkcharts==0.2 115 | blaze==0.11.3 116 | bleach==1.5.0 117 | bokeh==0.12.13 118 | boto==2.48.0 119 | Bottleneck==1.2.1 120 | catboost==0.6.3 121 | category-encoders==1.2.8 122 | certifi==2018.4.16 123 | cffi==1.11.4 124 | chardet==3.0.4 125 | click==6.7 126 | cloudpickle==0.5.2 127 | clyent==1.2.2 128 | cnn-finetune==0.5 129 | colorama==0.3.9 130 | conda==4.5.4 131 | conda-build==3.4.1 132 | conda-verify==2.0.0 133 | contextlib2==0.5.5 134 | cryptography==2.1.4 135 | cvxpy==1.0.6 136 | cycler==0.10.0 137 | Cython==0.27.3 138 | cytoolz==0.9.0 139 | dask==0.16.1 140 | datashape==0.5.4 141 | decorator==4.2.1 142 | dill==0.2.8.2 143 | distributed==1.20.2 144 | docopt==0.6.2 145 | docutils==0.14 146 | ecos==2.0.5 147 | entrypoints==0.2.3 148 | enum34==1.1.6 149 | et-xmlfile==1.0.1 150 | fancyimpute==0.3.2 151 | fastcache==1.0.2 152 | ffm===7e8621d 153 | filelock==2.0.13 154 | Flask==0.12 155 | Flask-Cors==3.0.3 156 | future==0.16.0 157 | gast==0.2.0 158 | gevent==1.2.2 159 | gitdb2==2.0.4 160 | GitPython==2.1.11 161 | glob2==0.6 162 | gmpy2==2.0.8 163 | greenlet==0.4.12 164 | grpcio==1.11.0 165 | h5py==2.7.1 166 | heapdict==1.0.0 167 | html5lib==0.9999999 168 | humanize==0.5.1 169 | idna==2.6 170 | imageio==2.2.0 171 | imagesize==0.7.1 172 | imgaug==0.2.6 173 | ipykernel==4.8.0 174 | ipython==6.4.0 175 | ipython-genutils==0.2.0 176 | ipywidgets==7.1.1 177 | isort==4.2.15 178 | itsdangerous==0.24 179 | jdcal==1.3 180 | jedi==0.11.1 181 | Jinja2==2.10 182 | jsonschema==2.6.0 183 | jupyter==1.0.0 184 | jupyter-client==5.2.2 185 | jupyter-console==5.2.0 186 | jupyter-contrib-core==0.3.3 187 | jupyter-contrib-nbextensions==0.5.0 188 | jupyter-core==4.4.0 189 | jupyter-highlight-selected-word==0.2.0 190 | jupyter-latex-envs==1.4.4 191 | jupyter-nbextensions-configurator==0.4.0 192 | jupyterlab==0.31.5 193 | jupyterlab-launcher==0.10.2 194 | kaggle==1.3.8 195 | Keras==2.1.5 196 | kitchen==1.2.5 197 | knnimpute==0.1.0 198 | lazy-object-proxy==1.3.1 199 | lightgbm==2.1.0 200 | llvmlite==0.21.0 201 | locket==0.2.0 202 | lxml==4.1.1 203 | Markdown==2.6.11 204 | MarkupSafe==1.0 205 | matplotlib==2.1.2 206 | mccabe==0.6.1 207 | mistune==0.8.3 208 | MLFeatureSelection==0.0.2.2 209 | more-itertools==4.2.0 210 | mpmath==1.0.0 211 | msgpack-python==0.5.1 212 | multipledispatch==0.4.9 213 | multiprocess==0.70.6.1 214 | munch==2.3.2 215 | mysql-connector-python==8.0.6 216 | navigator-updater==0.1.0 217 | nbconvert==5.3.1 218 | nbformat==4.4.0 219 | neptune-cli==2.8.6 220 | networkx==2.1 221 | nltk==3.2.5 222 | nose==1.3.7 223 | notebook==5.4.0 224 | np-utils==0.5.5.0 225 | numba==0.36.2 226 | numexpr==2.6.4 227 | numpy==1.14.5 228 | numpydoc==0.7.0 229 | oauthlib==2.1.0 230 | odo==0.5.1 231 | olefile==0.45.1 232 | opencv-python==3.4.1.15 233 | openpyxl==2.4.10 234 | osqp==0.3.1 235 | packaging==16.8 236 | pandas==0.23.3 237 | pandocfilters==1.4.2 238 | parso==0.1.1 239 | partd==0.3.8 240 | path.py==10.5 241 | pathlib2==2.3.0 242 | patsy==0.5.0 243 | pep8==1.7.1 244 | pexpect==4.3.1 245 | pickleshare==0.7.4 246 | Pillow==5.0.0 247 | pkginfo==1.4.1 248 | pluggy==0.6.0 249 | ply==3.10 250 | pretrainedmodels==0.7.0 251 | prompt-toolkit==1.0.15 252 | psutil==5.4.3 253 | ptyprocess==0.5.2 254 | py==1.5.2 255 | pycodestyle==2.3.1 256 | pycosat==0.6.3 257 | pycparser==2.18 258 | pycrypto==2.6.1 259 | pycurl==7.43.0.1 260 | pydot-ng==1.0.0 261 | pyflakes==1.6.0 262 | Pygments==2.2.0 263 | PyJWT==1.6.4 264 | pykwalify==1.5.2 265 | pylint==1.8.2 266 | pyodbc==4.0.22 267 | pyOpenSSL==17.5.0 268 | pyparsing==2.2.0 269 | PySocks==1.6.7 270 | pytest==3.6.3 271 | python-dateutil==2.6.1 272 | pytz==2017.3 273 | PyWavelets==0.5.2 274 | PyYAML==3.12 275 | pyzmq==16.0.3 276 | QtAwesome==0.4.4 277 | qtconsole==4.3.1 278 | QtPy==1.3.1 279 | raven==6.9.0 280 | requests==2.18.4 281 | requests-oauthlib==1.0.0 282 | Requires==0.0.3 283 | rope==0.10.7 284 | ruamel-yaml==0.15.35 285 | scikit-image==0.13.1 286 | scikit-learn==0.19.1 287 | scipy==1.0.0 288 | scs==2.0.2 289 | seaborn==0.8.1 290 | Send2Trash==1.4.2 291 | simplegeneric==0.8.1 292 | singledispatch==3.4.0.3 293 | six==1.11.0 294 | smmap2==2.0.4 295 | snowballstemmer==1.2.1 296 | sortedcollections==0.5.3 297 | sortedcontainers==1.5.9 298 | Sphinx==1.6.6 299 | sphinxcontrib-websupport==1.0.1 300 | spyder==3.2.6 301 | SQLAlchemy==1.2.1 302 | statsmodels==0.8.0 303 | steppy==0.1.5 304 | steppy-toolkit==0.1.5 305 | swifter==0.153 306 | sympy==1.1.1 307 | tables==3.4.2 308 | tb-nightly==1.8.0a20180420 309 | tblib==1.3.2 310 | tensorboard==1.6.0 311 | termcolor==1.1.0 312 | terminado==0.8.1 313 | terminaltables==2.1.0 314 | testpath==0.3.1 315 | tf-nightly-gpu==1.8.0.dev20180416 316 | toolz==0.9.0 317 | torch==0.4.0 318 | torchvision==0.2.1 319 | tornado==4.5.3 320 | tqdm==4.19.8 321 | traitlets==4.3.2 322 | typing==3.6.4 323 | unicodecsv==0.14.1 324 | urllib3==1.22 325 | virtualenv==15.2.0 326 | voluptuous==0.11.1 327 | wcwidth==0.1.7 328 | webencodings==0.5.1 329 | websocket-client==0.48.0 330 | Werkzeug==0.14.1 331 | widgetsnbextension==3.1.0 332 | word2vec==0.9.2 333 | wrapt==1.10.11 334 | xgboost==0.7.post3 335 | xlearn==0.30a1 336 | xlrd==1.1.0 337 | XlsxWriter==1.0.2 338 | xlwt==1.3.0 339 | zict==0.1.3 340 | 341 | --------------------------------------------------------------------------------