├── .gitignore ├── README.md ├── code ├── ocr │ ├── dataloader.py │ ├── densenet.py │ ├── main.py │ ├── resnet.py │ └── tools │ │ ├── __init__.py │ │ ├── measures.py │ │ ├── parse.py │ │ ├── plot.py │ │ ├── py_op.py │ │ ├── segmentation.py │ │ └── utils.py └── preprocessing │ ├── analysis_dataset.py │ ├── map_word_to_index.py │ └── show_black.py ├── files ├── alphabet_count_dict.json ├── alphabet_index_dict.json ├── black.json ├── image_hw_ratio_dict.json ├── src │ ├── A81.png │ └── B1000_0.png ├── train.csv ├── train_alphabet.json └── ttf │ └── simsun.ttf └── requirement.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | data/ 106 | result/ 107 | results/ 108 | tmp.py 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OCR 2 | [第一届西安交通大学人工智能实践大赛(2018AI实践大赛--图片文字识别)](http://competition.heils.cn/main.html)冠军 3 | 4 | 5 | # 模型结果 6 | 该比赛计算每一个条目的f1score,取所有条目的平均,具体计算方式在[这里](http://competition.heils.cn/main.html)。这里的计算方式不对一句话里的相同文字重复计算,故f1score比提交的最终结果低: 7 | 8 | | - | train | val | 9 | | :----------------: | :----------------: | :----------------: | 10 | | f1score | 0.9911 | 0.9582 | 11 | | recall | 0.9943 | 0.9574 | 12 | | precision | 0.9894 | 0.9637 | 13 | 14 | # 模型说明 15 | 1. 模型 16 | 17 | 采用densenet结构,模型输入为(64×512)的图片,输出为(8×64×2159)的概率。 18 | 19 | 将图片划分为多个(8×8)的方格,在每个方格预测2159个字符的概率。 20 | 21 | 2. Loss 22 | 23 | 将(8×64×2159)的概率沿着长宽方向取最大值,得到(2159)的概率,表示这张图片里有对应字符的概率。 24 | 25 | balance: 对正例和负例分别计算loss,使得正例loss权重之和与负例loss权重之和相等,解决数据不平衡的问题。 26 | 27 | hard-mining 28 | 29 | 3. 文字检测 30 | 将(8×64×2159)的概率沿着宽方向取最大值,得到(64×2159)的概率。 31 | 沿着长方向一个个方格预测文字,然后连起来可得到一句完整的语句。 32 | 33 | 存在问题:两个连续的文字无法重复检测 34 | 35 | 下图是一个文字识别正确的示例:的长为半径作圆 36 | 37 | 38 | 39 | 下图是一个文字识别错误的示例:为10元;经粗加工后销售,每 40 | 41 | 42 | 43 | 44 | # 文件目录 45 | ocr 46 | | 47 | |--code 48 | | 49 | |--files 50 | | | 51 | | |--train.csv 52 | | 53 | |--data 54 | | 55 | |--dataset 56 | | | 57 | | |--train 58 | | | 59 | | |--test 60 | | 61 | |--result 62 | | | 63 | | |--test_result.csv 64 | | 65 | |--images 此文件夹放置任何图片均可,我放的celebA数据集用作pretrain 66 | 67 | # 运行环境 68 | Ubuntu16.04, python2.7, CUDA9.0 69 | 70 | 安装[pytorch](https://pytorch.org/), 推荐版本: 0.2.0_3 71 | ``` 72 | pip install -r requirement.txt 73 | ``` 74 | 75 | # 下载数据 76 | 从[这里](https://pan.baidu.com/s/1w0iEE7q84IolmZXwttOxVw)下载初赛、复赛数据、模型,合并训练集、测试集。 77 | 78 | 79 | # 预处理 80 | 如果不更换数据集,不需要执行这一步。 81 | 82 | 如果更换其他数据集,一并更换 files/train.csv 83 | ``` 84 | cd code/preprocessing 85 | python map_word_to_index.py 86 | python analysis_dataset.py 87 | ``` 88 | 89 | # 训练 90 | ``` 91 | cd code/ocr 92 | python main.py 93 | ``` 94 | 95 | # 测试 96 | f1score在0.9以下,lr=0.001,不使用hard-mining; 97 | 98 | f1score在0.9以上,lr=0.0001,使用hard-mining; 99 | 100 | 生成的model保存在不同的文件夹里。 101 | ``` 102 | cd code/ocr 103 | python main.py --phase test --resume ../../data/models-small/densenet/eval-16-1/best_f1score.ckpt 104 | ``` 105 | -------------------------------------------------------------------------------- /code/ocr/dataloader.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """ 4 | Read images and corresponding labels. 5 | """ 6 | 7 | import numpy as np 8 | import os 9 | import json 10 | # import skimage 11 | # from skimage import io 12 | from PIL import Image,ImageDraw,ImageFont,ImageFilter 13 | from torch.utils.data import Dataset 14 | import time 15 | 16 | filters = [ 17 | ImageFilter.SMOOTH, # 平滑,大于16可以用 18 | ImageFilter.SMOOTH_MORE, # 平滑,大于16可以用 19 | ImageFilter.GaussianBlur(radius=1), # 大于16可以用 20 | 21 | ImageFilter.GaussianBlur(radius=2), # 大于32可以用 22 | ImageFilter.BLUR, # 大于32可以用 23 | ] 24 | 25 | def histeq (im,nbr_bins =256): 26 | # 对一副灰度图像进行直方图均衡化 27 | #该函数有两个输入参数,一个是灰度图像,一个是直方图中使用小区间的数目 28 | #函数返回直方图均衡化后的图像,以及用来做像素值映射的累计分布函数 29 | # 计算图像的直方图 30 | imhist,bins =np.histogram(im.flatten(),nbr_bins,normed=True) 31 | cdf =imhist.cumsum() #cumulative distribution function 32 | cdf =255*cdf/cdf[-1] #归一化,函数中使用累计分布函数的最后一个元素(下标为-1,目标是 33 | # 将其归一化到0-1范围 ) 34 | # 使用累计分布函数的线性插值,计算新的像素值 35 | im2=np.interp(im.flatten(),bins[:-1],cdf) # im2 is an array 36 | return im2.reshape(im.shape),cdf 37 | 38 | 39 | class DataSet(Dataset): 40 | def __init__(self, 41 | image_names, 42 | image_label_dict, 43 | class_num, 44 | transform=None, 45 | image_size=None, # 最后生成的图片大小 46 | word_index_dict=None, # 字符与index的对应 47 | phase='train', # phase 48 | args=None, # 全局参数 49 | font_range=None, # 生成字符大小范围 50 | rotate_range=None, # 图片旋转范围 51 | margin=None # 图片边缘不覆盖字符,以免旋转时候丢失 52 | ): 53 | 54 | self.font_range = font_range 55 | self.rotate_range = rotate_range 56 | self.margin = margin 57 | self.image_names = image_names 58 | self.image_label_dict = image_label_dict 59 | self.transform = transform 60 | self.phase = phase 61 | self.class_num = class_num 62 | self.word_labels = { } 63 | self.image_size = image_size 64 | self.word_index_dict = word_index_dict 65 | self.args = args 66 | if self.phase != 'pretrain': 67 | for image_name in image_names: 68 | image_name = image_name.split('/')[-1] 69 | if image_name not in image_label_dict: 70 | try: 71 | image_label_dict[image_name] = image_label_dict[image_name.replace('seg.','').split('.png')[0]+'.png'] 72 | except: 73 | image_label_dict[image_name] = '' 74 | word_label = np.zeros(class_num) 75 | label = image_label_dict[image_name] 76 | for l in label.split(): 77 | word_label[int(l)] = 1 78 | self.word_labels[image_name] = word_label.astype(np.float32) 79 | 80 | def __getitem__(self, index): 81 | image_name = self.image_names[index] 82 | # print self.image_size 83 | if self.phase == 'pretrain': 84 | image = Image.open(image_name).convert('RGB') 85 | # 改变灰度 86 | image = np.array(image) 87 | r = get_random(index) 88 | # 通常背景为高亮度颜色 89 | if r < 0.3: 90 | min_rgb = 192. 91 | elif r < 0.7: 92 | min_rgb = 128. 93 | else: 94 | min_rgb = 64. 95 | if self.args.model == 'resnet': 96 | pass 97 | elif index % 2 == 0: 98 | image = image / (255. - min_rgb) + min_rgb 99 | else: 100 | image[image 0.5 and self.args.epoch > 35: 133 | noise_level = 10 134 | noise = np.random.random(image.shape) * noise_level - noise_level / 2. 135 | image = image + noise 136 | ''' 137 | image = (image / 128. - 1).astype(np.float32) 138 | 139 | if font_size > 32: 140 | size_label = 1 141 | elif font_size < 16: 142 | size_label = 0 143 | else: 144 | size_label = 11 145 | size_label = np.array([size_label]).astype(np.float32) 146 | 147 | return image_name, image.astype(np.float32), label, bbox_label, seg_label, size_label 148 | 149 | elif self.phase == 'seg': 150 | # 保持和原图相同的分辨率 151 | image = Image.open(image_name).convert('RGB') 152 | # image_name = image_name.split('/')[-1] 153 | # image = image.resize(self.image_size) 154 | image = np.transpose(np.array(image), [2,0,1]).astype(np.float32) 155 | min_size = 32 156 | shape = (np.array(image.shape).astype(np.int32) / min_size) * min_size + min_size # * 2 157 | new_image = np.zeros([3, shape[1], shape[2]], dtype=np.float32) 158 | ''' 159 | for i in range(3): 160 | gray = sorted(image[i].reshape(-1)) 161 | gray = gray[len(gray)/2] 162 | new_image[i] = gray 163 | ''' 164 | # new_image[:, min_size/2:image.shape[1]+min_size/2, min_size/2:image.shape[2]+min_size/2] = image 165 | new_image[:, :image.shape[1], :image.shape[2]] = image 166 | image = new_image 167 | # word_label = self.word_labels[image_name] 168 | image = (image / 128. - 1).astype(np.float32) 169 | return image_name, image, np.zeros(self.class_num, dtype=np.float32) 170 | else: 171 | seg_name = image_name.replace('train','seg.train').replace('test','seg.test') + '.seg.crop.png' 172 | no_aug = self.args.no_aug 173 | if os.path.exists(seg_name): 174 | # image, word_label = random_crop_image(seg_name, self.image_label_dict[image_name.split('/')[-1]], self.image_size, self.class_num, self.phase, index, no_aug) 175 | image, word_label = random_crop_image(image_name, self.image_label_dict[image_name.split('/')[-1]], self.image_size, self.class_num, self.phase, index, no_aug, self.args) 176 | else: 177 | image, word_label = random_crop_image(image_name, self.image_label_dict[image_name.split('/')[-1]], self.image_size, self.class_num, self.phase, index, no_aug, self.args) 178 | 179 | # 灰度反向翻转,变成黑底,白字 180 | if self.phase == 'train': 181 | r = get_random(index+111) 182 | if r < 0.1: 183 | image[0,:,:] = 255 - image[0,:,:] 184 | elif r < 0.2: 185 | image[1,:,:] = 255 - image[1,:,:] 186 | elif r < 0.3: 187 | image[2,:,:] = 255 - image[2,:,:] 188 | if get_random(index+112) < 0.2: 189 | image = 255. - image 190 | 191 | image = (image / 128. - 1).astype(np.float32) 192 | return image_name, image, word_label 193 | 194 | def __len__(self): 195 | return len(self.image_names) 196 | 197 | last_random = 10 198 | def get_random(idx): 199 | global last_random 200 | if last_random < 1: 201 | np.random.seed(int(last_random * 1000000 + time.time()) + idx) 202 | else: 203 | np.random.seed(int((time.time()))) 204 | x = np.random.random() 205 | while np.abs(last_random - x) < 0.1: 206 | x = np.random.random() 207 | last_random = x 208 | return x 209 | 210 | def comput_iou(font, proposal): 211 | fx,fy,fh,fw = font 212 | px,py,pd = proposal 213 | overlap_x = max(min(pd, fh) - np.abs(fx - px), 0) 214 | overlap_y = max(min(pd, fw) - np.abs(fy - py), 0) 215 | # 面积 216 | sf = fh * fw 217 | sp = pd * pd 218 | so = overlap_x * overlap_y 219 | iou = float(so) / (sf + sp - so) 220 | return iou 221 | 222 | def generate_bbox_label(image, font_place, font_size, font_num, args, image_size): 223 | imgh,imgw = image.size 224 | seg_label = np.zeros((image_size[0]/2, image_size[1]/2), dtype=np.float32) 225 | sx = float(font_place[0]) / image.size[0] * image_size[0] 226 | ex = sx + float(font_size) / image.size[0] * image_size[0] * font_num 227 | sy = float(font_place[1]) / image.size[1] * image_size[1] 228 | ey = sy + float(font_size) / image.size[1] * image_size[1] 229 | seg_label[int(sx)/2:int(ex)/2, int(sy)/2:int(ey)/2] = 1 230 | seg_label = seg_label.transpose((1,0)) 231 | 232 | bbox_label = np.zeros(( 233 | image_size[0]/args.stride, # 16 234 | image_size[1]/args.stride, # 16 235 | len(args.anchors), # 4 236 | 4 # dx,dy,dd,c 237 | ), dtype=np.float32) 238 | fonts= [] 239 | for i in range(font_num): 240 | x = font_place[0] + font_size/2. + i * font_size 241 | y = font_place[1] + font_size/2. 242 | h = font_size 243 | w = font_size 244 | 245 | x = float(x) * image_size[0] / imgh 246 | h = float(h) * image_size[0] / imgh 247 | y = float(y) * image_size[1] / imgw 248 | w = float(w) * image_size[1] / imgw 249 | fonts.append([x,y,h,w]) 250 | 251 | # print bbox_label.shape 252 | for ix in range(bbox_label.shape[0]): 253 | for iy in range(bbox_label.shape[1]): 254 | for ia in range(bbox_label.shape[2]): 255 | proposal = [ix*args.stride + args.stride/2, iy*args.stride + args.stride/2, args.anchors[ia]] 256 | iou_fi = [] 257 | for fi, font in enumerate(fonts): 258 | iou = comput_iou(font, proposal) 259 | iou_fi.append((iou, fi)) 260 | max_iou, max_fi = sorted(iou_fi)[-1] 261 | if max_iou > 0.5: 262 | # 正例 263 | dx = (font[0] - proposal[0]) / float(proposal[2]) 264 | dy = (font[1] - proposal[1]) / float(proposal[2]) 265 | fd = max(font[2:]) 266 | dd = np.log(fd / float(proposal[2])) 267 | # bbox_label[ix,iy,ia] = [dx, dy, dd, 1] 268 | bbox_label[ix,iy,ia] = [dx, dy, dd, 1] 269 | elif max_iou > 0.25: 270 | # 忽略 271 | bbox_label[ix,iy,ia,3] = 0 272 | else: 273 | # 负例 274 | bbox_label[ix,iy,ia,3] = -1 275 | # 这里有一个transpose操作 276 | bbox_label = bbox_label.transpose((1,0,2,3)) 277 | 278 | 279 | # 计算anchor信息 280 | return bbox_label, seg_label 281 | 282 | def get_resize_para(size, idx): 283 | if size > 48: 284 | rh, rw = 4,4 285 | elif size > 32: 286 | if idx % 2: 287 | rh, rw = 2,4 288 | else: 289 | rh, rw = 4,2 290 | elif size > 16: 291 | if idx % 2: 292 | rh, rw = 1,2 293 | else: 294 | rh, rw = 2,1 295 | else: 296 | return 1,1 297 | 298 | rhs = range(rh) 299 | np.random.seed(int(time.time()) + idx + 1) 300 | np.random.shuffle(rhs) 301 | rh = rhs[0] + 1 302 | 303 | rws = range(rw) 304 | np.random.seed(int(time.time()) + idx + 2) 305 | np.random.shuffle(rws) 306 | rw = rws[0] + 1 307 | 308 | return rh, rw 309 | 310 | # def generate_image(idx, image, word_index_dict, class_num, args, image_size, no_aug, epoch): 311 | def generate_image( idx, image, no_aug, dataset): 312 | ''' 313 | args.model == 'resnet' 的时候只是用于训练分割网络,大部分augmentation都不用 314 | 这里的注释,默认参数是 315 | image_size [512, 64] 316 | rotate_range [-5, 5] 317 | font_range [8,32] 318 | ''' 319 | 320 | word_index_dict = dataset.word_index_dict 321 | class_num = dataset.class_num 322 | args = dataset.args 323 | image_size = dataset.image_size 324 | font_range = dataset.font_range 325 | rotate_range = dataset.rotate_range 326 | epoch = args.epoch 327 | margin = dataset.margin 328 | 329 | # 选择文字背景 330 | image = image.resize((1024,1024)) 331 | h,w = image.size 332 | # 随机crop一个部分,resize成固定大小,会对文字有一定的水平竖直方向拉伸 333 | h_crop = int(get_random(idx + 10) * image_size[0] * 2 / 8) + image_size[0] * 6 / 8 # 长度范围 [374, 512] 334 | w_crop = int(get_random(idx + 11) * image_size[1] * 2 / 8) + image_size[1] * 6 / 8 # 宽度范围 [48, 64] 335 | if args.model == 'resnet' or no_aug or epoch < 60: 336 | # resnet: 分割网络采用固定大小crop 337 | # epoch<60: 网络训练初期采用固定大小,加速收敛 338 | h_crop = image_size[0] 339 | w_crop = image_size[1] 340 | # 选择文字背景,随机选择crop起始位置 341 | x = int(get_random(idx+12) * (h - h_crop)) 342 | y = int(get_random(idx+13) * (w - w_crop)) 343 | image = image.crop((x,y,x+h_crop,y+w_crop)) 344 | 345 | 346 | # 字体大小是最容易引起错误的变量,字体大小不能超出图片中心区域大小 347 | size = font_range[0] + int(get_random(idx+20) * (font_range[1] - font_range[0])) 348 | size = min(size, h_crop - 2*margin - 2, w_crop - 2*margin - 2) 349 | 350 | # 字体数量,超过可容纳数量的一半以上,至少包含一个字符 351 | large_num = max(0, (h_crop - 2 * margin)/ size - 1) 352 | word_num = int(min(large_num / 2, 5) + get_random(idx+21) * large_num / 2) + 1 353 | # word_num = int(large_num / 2 + get_random(idx+21) * large_num / 2) + 1 354 | word_num = max(1, word_num) 355 | 356 | # 添加字体位置,并生成label信息 357 | place_x = int(get_random(idx+22) * (h_crop - word_num * size - margin)) + margin 358 | if margin == 0: 359 | # 用于添加两排文字 360 | place_y = int(get_random(idx+23) * (w_crop/2 - size - margin)) + margin 361 | else: 362 | place_y = int(get_random(idx+23) * (w_crop - size - margin)) + margin 363 | place = (place_x, place_y) 364 | label = np.zeros(class_num).astype(np.float32) 365 | 366 | text = u'' 367 | words = word_index_dict.keys() 368 | 369 | if margin == 0: 370 | # 两排文字 371 | word_num *= 2 372 | while len(text) < word_num: 373 | np.random.shuffle(words) 374 | w = words[len(text)] 375 | if w in u'"(),': 376 | # 部分字符不建议生成 377 | continue 378 | text = text + w 379 | index = word_index_dict[w] 380 | label[index] = 1 381 | 382 | # 得到bbox_label 383 | if args.model == 'resnet': 384 | bbox_label, seg_label = generate_bbox_label(image, place, size, word_num, args, image_size) 385 | else: 386 | bbox_label, seg_label = 0,0 387 | 388 | # 字体,可以添加其他字体 389 | fonts = ['../../files/ttf/simsun.ttf'] 390 | np.random.shuffle(fonts) 391 | font = fonts[0] 392 | 393 | # 颜色 394 | r = get_random(idx+24) 395 | if no_aug or r < 0.7: 396 | # 选择不同程度的黑色 397 | if r < 0.3: 398 | c = int(get_random(idx + 25) * 64) 399 | color = (c,c,c) 400 | else: 401 | rgb = 64 402 | r = int(get_random(idx + 27) * rgb) 403 | g = int(get_random(idx + 28) * rgb) 404 | b = int(get_random(idx + 29) * rgb) 405 | color = (r,g,b) 406 | else: 407 | # 随机颜色,但是选择较暗的颜色 408 | rgb = 256 409 | r = int(get_random(idx + 27) * rgb) 410 | g = int(get_random(idx + 28) * rgb) 411 | b = int(get_random(idx + 29) * rgb) 412 | ra = get_random(idx + 30) 413 | if ra < 0.5: 414 | ra = int(1000 * ra) % 3 415 | if ra == 0: 416 | r = 0 417 | elif ra == 1: 418 | g = 0 419 | else: 420 | b = 0 421 | color = (r,g,b) 422 | 423 | # 增加文字到图片 424 | if margin == 0: 425 | image = add_text_to_img(image, text[:word_num/2], size, font, color, place) 426 | image = add_text_to_img(image, text[word_num/2:], size, font, color, (place[0], place[1]+image_size[1]/2)) 427 | else: 428 | image = add_text_to_img(image, text, size, font, color, place) 429 | 430 | ''' 431 | # 随机翻转,增加泛化程度 432 | if args.model != 'resnet': 433 | if get_random(idx+130) < 0.3: 434 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 435 | if get_random(idx+131) < 0.3: 436 | image = image.transpose(Image.FLIP_TOP_BOTTOM) 437 | 438 | # 先做旋转,然后在拉伸图片 439 | h,w = image.size 440 | max_hw, min_hw = float(max(h,w)), float(min(h,w)) 441 | if max_hw / min_hw >= 5: 442 | rotate_size = 5 443 | elif max_hw / min_hw >= 3: 444 | rotate_size = 10 445 | elif max_hw / min_hw >= 1.5: 446 | rotate_size = 30 447 | else: 448 | rotate_size = 50 449 | if args.model != 'resnet' and not no_aug and epoch>70 and get_random(idx+50) < 0.8: 450 | theta = int(rotate_size * 2 * get_random(idx+32)) - rotate_size 451 | image = image.rotate(theta) 452 | else: 453 | theta = 0 454 | ''' 455 | 456 | 457 | # 还原成 [512, 64] 的大小 458 | image = image.resize(image_size) 459 | 460 | 461 | # 最后生成图片后再一次旋转,图片模糊化 462 | if args.model == 'resnet' or (get_random(idx+50) < 0.8 and not no_aug): 463 | 464 | # 旋转 465 | if args.model == 'resnet' : 466 | rotate_size = 10 467 | else: 468 | rotate_size = rotate_range[0] + int(get_random(idx+32) * (rotate_range[1] - rotate_range[0])) 469 | theta = int(rotate_size * 2 * get_random(idx+33)) - rotate_size 470 | image = image.rotate(theta) 471 | if args.model == 'resnet': 472 | # 作分割的时候,标签信息也需要一起旋转 473 | seg_label = np.array([seg_label, seg_label, seg_label]) * 255 474 | seg_label = np.array(Image.fromarray(seg_label.transpose([1,2,0]).astype(np.uint8)).rotate(theta)) 475 | seg_label = (seg_label[:,:,0] > 128).astype(np.float32) 476 | 477 | filters = [ 478 | ImageFilter.SMOOTH, # 平滑,大于16可以用 479 | ImageFilter.SMOOTH_MORE, # 平滑,大于16可以用 480 | ImageFilter.GaussianBlur(radius=1), # 大于16可以用 481 | 482 | ImageFilter.GaussianBlur(radius=2), # 大于32可以用 483 | ImageFilter.BLUR, # 大于32可以用 484 | ImageFilter.GaussianBlur(radius=2), # 多来两次 485 | ImageFilter.BLUR, # 多来两次 486 | ] 487 | 488 | # 当文字比较大的时候,增加一些模糊 489 | if size > 16: 490 | if size < 32: 491 | filters = filters[:3] 492 | np.random.shuffle(filters) 493 | image = image.filter(filters[idx % len(filters)]) 494 | 495 | if args.model == 'resnet': 496 | # add noise 497 | noise_level = 32 498 | image = np.array(image) 499 | noise = np.random.random(image.shape) * noise_level - noise_level / 2. 500 | image = image + noise 501 | image = image.astype(np.uint8) 502 | image = Image.fromarray(image) 503 | 504 | 505 | # 有时候需要低分辨率的图片 506 | resize_0, resize_1 = get_resize_para(size, idx) 507 | image = image.resize([image_size[0]/resize_0, image_size[1]/resize_1]) 508 | 509 | # 还原成 [512, 64] 的大小 510 | image = image.resize(image_size) 511 | 512 | return image, label, bbox_label, seg_label, size 513 | 514 | def add_text_to_img(img, text, size, font, color, place): 515 | imgdraw = ImageDraw.Draw(img) 516 | imgfont = ImageFont.truetype(font,size=size) 517 | imgdraw.text(place, text, fill=color, font=imgfont) 518 | return img 519 | 520 | def random_crop_image(image_name, text, image_size, class_num, phase, idx, no_aug, args): 521 | # label 522 | text = text.split() 523 | word_label = np.zeros(class_num, dtype=np.float32) 524 | 525 | 526 | if args.hist: 527 | if get_random(idx+34) < 0.4 and phase == 'train': 528 | image = Image.open(image_name).convert('RGB') 529 | else: 530 | # 直方图均衡化 531 | image = Image.open(image_name).convert('YCbCr') 532 | image = np.array(image) 533 | imy = image[:,:,0] 534 | imy,_ = histeq(imy) 535 | image[:,:,0] = imy 536 | image = Image.fromarray(image, mode='YCbCr').convert('RGB') 537 | else: 538 | image = Image.open(image_name).convert('RGB') 539 | x = np.array(image) 540 | assert x.min() >= 0 541 | assert x.max() < 256 542 | 543 | if phase == 'train' and not no_aug: 544 | # 旋转 545 | if get_random(idx+11) < 0.8: 546 | theta = int(6 * get_random(idx+1)) - 3 547 | image = image.rotate(theta) 548 | 549 | # 模糊处理 550 | if get_random(idx+2) < 0.3: 551 | np.random.shuffle(filters) 552 | image = image.filter(filters[0]) 553 | 554 | # 短边小于64, 直接填0 555 | h,w = image.size 556 | if w < image_size[1] and h > 64: 557 | if get_random(idx+3) < 0.3: 558 | image = np.array(image) 559 | start_index = (image_size[1] - w)/2 560 | new_image = np.zeros((image_size[1], h, 3), dtype=np.uint8) 561 | new_image[start_index:start_index+w, :, :] = image 562 | image = Image.fromarray(new_image) 563 | 564 | 565 | # 先处理成 X * 64 的图片 566 | h,w = image.size 567 | h = int(float(h) * image_size[1] / w) 568 | image = image.resize((h, image_size[1])) 569 | 570 | if phase == 'train' and not no_aug: 571 | 572 | # 放缩 0.8~1.2 573 | h,w = image.size 574 | r = get_random(idx+4) / 4. + 0.8 575 | image = image.resize((int(h*r), int(w*r))) 576 | 577 | # crop 578 | if min(h,w) > 32: 579 | crop_size = 20 580 | x = int((crop_size * get_random(idx+5) - crop_size/2) * r) 581 | y = int((crop_size * get_random(idx+6) - crop_size/2) * r) 582 | image = image.crop((max(0,x),max(0,y),min(0,x)+h,min(0,y)+w)) 583 | 584 | # 有时需要生成一些低分辨率的图片 585 | h,w = image.size 586 | r = get_random(idx+7) 587 | 588 | ''' 589 | if r < 0.01 and min(h,w) > 64: 590 | image = image.resize((h/8, w/8)) 591 | elif r < 0.1 and min(h,w) > 64: 592 | image = image.resize((h/4, w/4)) 593 | elif r < 0.3 and min(h,w) > 32: 594 | image = image.resize((h/2, w/2)) 595 | ''' 596 | 597 | # 从新变为 X * 64 的图片 598 | h = int(float(h) * image_size[1] / w) 599 | image = image.resize((h, image_size[1])) 600 | 601 | # 填充成固定大小 602 | image = np.transpose(np.array(image), [2,0,1]).astype(np.float32) 603 | if image.shape[2] < image_size[0]: 604 | # 长宽比例小于8(16),直接填充 605 | if phase == 'test': 606 | # 正中间 607 | start = np.abs(image_size[0] - image.shape[2])/2 608 | else: 609 | start = int(np.random.random() * np.abs(image_size[0] - image.shape[2])) 610 | new_image = np.zeros((3, image_size[1], image_size[0]), dtype=np.float32) 611 | new_image[:,:,start:start+image.shape[2]] = image 612 | if phase == 'test': 613 | new_image = np.array([new_image]).astype(np.float32) 614 | for w in text: 615 | word_label[int(w)] = 1 616 | else: 617 | # 长宽比例大于16,随机截取 618 | if phase == 'test': 619 | # 测试阶段直接合并 620 | crop_num = image.shape[2] * 2 / image_size[0] + 1 621 | new_image = np.zeros((crop_num, 3, image_size[1], image_size[0]), dtype=np.float32) 622 | for i in range(crop_num): 623 | start_index = i * image_size[0] / 2 624 | end_index = start_index + image_size[0] 625 | if end_index > image.shape[2]: 626 | new_image[i,:,:,:image.shape[2] - start_index] = image[:,:,start_index:end_index] 627 | else: 628 | new_image[i] = image[:,:,start_index:end_index] 629 | for w in text: 630 | word_label[int(w)] = 1 631 | else: 632 | # 训练阶段不算负例loss 633 | start = int(np.random.random() * np.abs(image_size[0] - image.shape[2])) 634 | new_image = image[:,:,start:start+image_size[0]] 635 | for w in text: 636 | word_label[int(w)] = -1 637 | 638 | image = new_image 639 | if phase == 'train': 640 | image = image.astype(np.float32) 641 | ''' 642 | # 每一列灰度有所改变 643 | if get_random(idx+9) < 0.3: 644 | change_level = 256. / image.shape[1] 645 | gray_change = 0 646 | for j in range(image.shape[1]): 647 | gray_change += change_level * get_random(j+idx) - change_level / 2 648 | image[:,j,:] += gray_change 649 | # 每一行灰度有所改变 650 | if get_random(idx+10) < 0.3: 651 | change_level = 256. / image.shape[2] 652 | gray_change = 0 653 | for k in range(image.shape[2]): 654 | gray_change += change_level * get_random(10+k+idx) - change_level / 2 655 | image[:,:,k] += gray_change 656 | ''' 657 | # 增加噪声 658 | if get_random(idx+8) < 0.1: 659 | noise_level = 64 660 | noise = np.random.random(image.shape) * noise_level - noise_level / 2. 661 | image = image + noise 662 | # noise = np.random.random(image.shape[1:]) * noise_level - noise_level / 2. 663 | # image = image + np.array([noise, noise, noise]) 664 | image = image.astype(np.float32) 665 | 666 | return image, word_label 667 | -------------------------------------------------------------------------------- /code/ocr/densenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.model_zoo as model_zoo 5 | from collections import OrderedDict 6 | 7 | __all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161'] 8 | 9 | 10 | model_urls = { 11 | 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', 12 | 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', 13 | 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', 14 | 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', 15 | } 16 | 17 | 18 | def densenet121(pretrained=False, small=0,**kwargs): 19 | r"""Densenet-121 model from 20 | `"Densely Connected Convolutional Networks" `_ 21 | 22 | Args: 23 | pretrained (bool): If True, returns a model pre-trained on ImageNet 24 | """ 25 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), small=small, 26 | **kwargs) 27 | if pretrained: 28 | model.load_state_dict(model_zoo.load_url(model_urls['densenet121'])) 29 | return model 30 | 31 | 32 | def densenet169(pretrained=False, **kwargs): 33 | r"""Densenet-169 model from 34 | `"Densely Connected Convolutional Networks" `_ 35 | 36 | Args: 37 | pretrained (bool): If True, returns a model pre-trained on ImageNet 38 | """ 39 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32), 40 | **kwargs) 41 | if pretrained: 42 | model.load_state_dict(model_zoo.load_url(model_urls['densenet169'])) 43 | return model 44 | 45 | 46 | def densenet201(pretrained=False, **kwargs): 47 | r"""Densenet-201 model from 48 | `"Densely Connected Convolutional Networks" `_ 49 | 50 | Args: 51 | pretrained (bool): If True, returns a model pre-trained on ImageNet 52 | """ 53 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32), 54 | **kwargs) 55 | if pretrained: 56 | model.load_state_dict(model_zoo.load_url(model_urls['densenet201'])) 57 | return model 58 | 59 | 60 | def densenet161(pretrained=False, **kwargs): 61 | r"""Densenet-161 model from 62 | `"Densely Connected Convolutional Networks" `_ 63 | 64 | Args: 65 | pretrained (bool): If True, returns a model pre-trained on ImageNet 66 | """ 67 | model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24), 68 | **kwargs) 69 | if pretrained: 70 | model.load_state_dict(model_zoo.load_url(model_urls['densenet161'])) 71 | return model 72 | 73 | 74 | class _DenseLayer(nn.Sequential): 75 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): 76 | super(_DenseLayer, self).__init__() 77 | self.add_module('norm.1', nn.BatchNorm2d(num_input_features)), 78 | self.add_module('relu.1', nn.ReLU(inplace=True)), 79 | self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size * 80 | growth_rate, kernel_size=1, stride=1, bias=False)), 81 | self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)), 82 | self.add_module('relu.2', nn.ReLU(inplace=True)), 83 | self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate, 84 | kernel_size=3, stride=1, padding=1, bias=False)), 85 | self.drop_rate = drop_rate 86 | 87 | def forward(self, x): 88 | new_features = super(_DenseLayer, self).forward(x) 89 | if self.drop_rate > 0: 90 | new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) 91 | return torch.cat([x, new_features], 1) 92 | 93 | 94 | class _DenseBlock(nn.Sequential): 95 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): 96 | super(_DenseBlock, self).__init__() 97 | for i in range(num_layers): 98 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate) 99 | self.add_module('denselayer%d' % (i + 1), layer) 100 | 101 | 102 | class _Transition(nn.Sequential): 103 | def __init__(self, num_input_features, num_output_features, use_pool): 104 | super(_Transition, self).__init__() 105 | self.add_module('norm', nn.BatchNorm2d(num_input_features)) 106 | self.add_module('relu', nn.ReLU(inplace=True)) 107 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, 108 | kernel_size=1, stride=1, bias=False)) 109 | if use_pool: 110 | self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) 111 | 112 | 113 | class DenseNet(nn.Module): 114 | r"""Densenet-BC model class, based on 115 | `"Densely Connected Convolutional Networks" `_ 116 | 117 | Args: 118 | growth_rate (int) - how many filters to add each layer (`k` in paper) 119 | block_config (list of 4 ints) - how many layers in each pooling block 120 | num_init_features (int) - the number of filters to learn in the first convolution layer 121 | bn_size (int) - multiplicative factor for number of bottle neck layers 122 | (i.e. bn_size * k features in the bottleneck layer) 123 | drop_rate (float) - dropout rate after each dense layer 124 | num_classes (int) - number of classification classes 125 | """ 126 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), small=0, 127 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000): 128 | 129 | super(DenseNet, self).__init__() 130 | 131 | # First convolution 132 | self.features = nn.Sequential(OrderedDict([ 133 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), 134 | ('norm0', nn.BatchNorm2d(num_init_features)), 135 | ('relu0', nn.ReLU(inplace=True)), 136 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 137 | ])) 138 | 139 | # Each denseblock 140 | num_features = num_init_features 141 | for i, num_layers in enumerate(block_config): 142 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, 143 | bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) 144 | self.features.add_module('denseblock%d' % (i + 1), block) 145 | num_features = num_features + num_layers * growth_rate 146 | if i != len(block_config) - 1: 147 | if small and i > 0: 148 | use_pool = 0 149 | else: 150 | use_pool = 1 151 | trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2, use_pool=use_pool) 152 | self.features.add_module('transition%d' % (i + 1), trans) 153 | num_features = num_features // 2 154 | 155 | # Final batch norm 156 | self.features.add_module('norm5', nn.BatchNorm2d(num_features)) 157 | 158 | # Linear layer 159 | self.classifier = nn.Linear(num_features, num_classes) 160 | 161 | def forward(self, x): 162 | features = self.features(x) 163 | return features 164 | att_feats = features 165 | out = F.relu(features, inplace=True) 166 | out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1) 167 | # out = F.avg_pool2d(out, kernel_size=3, stride=1).view(features.size(0), -1) 168 | fc_feats = out 169 | out = self.classifier(out) 170 | return att_feats, fc_feats, out 171 | -------------------------------------------------------------------------------- /code/ocr/main.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | """ResNet Train/Eval module. 18 | """ 19 | import time 20 | import sys 21 | import os 22 | 23 | import numpy as np 24 | import dataloader 25 | import json 26 | from tqdm import tqdm 27 | 28 | import densenet 29 | import resnet 30 | from PIL import Image 31 | 32 | import torchvision 33 | 34 | import torch 35 | import torch.nn as nn 36 | import torch.backends.cudnn as cudnn 37 | from torch.autograd import Variable 38 | from torch.utils.data import DataLoader 39 | import torch.nn.functional as F 40 | 41 | from sklearn.metrics import roc_auc_score 42 | 43 | from tools import parse 44 | from glob import glob 45 | from skimage import measure 46 | import sys 47 | reload(sys) 48 | sys.setdefaultencoding('utf8') 49 | import traceback 50 | 51 | args = parse.args 52 | # anchor大小 53 | args.anchors = [8, 12, 18, 27, 40, 60] 54 | args.stride = 8 55 | args.image_size = [512,64] 56 | 57 | 58 | class DenseNet121(nn.Module): 59 | """Model modified. 60 | 61 | The architecture of our model is the same as standard DenseNet121 62 | except the classifier layer which has an additional sigmoid function. 63 | 64 | """ 65 | def __init__(self, out_size): 66 | super(DenseNet121, self).__init__() 67 | self.inplanes = 1024 68 | self.densenet121 = densenet.densenet121(pretrained=True, small=args.small) 69 | num_ftrs = self.densenet121.classifier.in_features 70 | self.classifier_font = nn.Sequential( 71 | # 这里可以用fc做分类 72 | # nn.Linear(num_ftrs, out_size) 73 | # 这里可以用1×1卷积做分类 74 | nn.Conv2d(num_ftrs, out_size, kernel_size=1, bias=False) 75 | ) 76 | self.train_params = [] 77 | self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2) 78 | 79 | def _make_layer(self, block, planes, blocks, stride=1): 80 | downsample = None 81 | if stride != 1 or self.inplanes != planes * block.expansion: 82 | downsample = nn.Sequential( 83 | nn.Conv2d(self.inplanes, planes * block.expansion, 84 | kernel_size=1, stride=stride, bias=False), 85 | nn.BatchNorm2d(planes * block.expansion), 86 | ) 87 | 88 | layers = [] 89 | layers.append(block(self.inplanes, planes, stride, downsample)) 90 | self.inplanes = planes * block.expansion 91 | for i in range(1, blocks): 92 | layers.append(block(self.inplanes, planes)) 93 | 94 | return nn.Sequential(*layers) 95 | 96 | def forward(self, x, phase='train'): 97 | feats = self.densenet121(x) # (32, 1024, 2, 16) 98 | if not args.small: 99 | feats = F.max_pool2d(feats, kernel_size=2, stride=2) # (32, 1024, 1, 8) 100 | out = self.classifier_font(feats) # (32, 1824, 1, 8) 101 | out_size = out.size() 102 | # print out.size() 103 | out = out.view(out.size(0),out.size(1),-1) # (32, 1824, 8) 104 | # print out.size() 105 | if phase == 'train': 106 | out = F.adaptive_max_pool1d(out, output_size=(1)).view(out.size(0),-1) # (32, 1824) 107 | return out 108 | else: 109 | out = out.transpose(1,2).contiguous() 110 | out = out.view(out_size[0],out_size[2], out_size[3], out_size[1]) # (32, 1, 8, 1824) 111 | return out, feats 112 | 113 | class Loss(nn.Module): 114 | def __init__(self): 115 | super(Loss, self).__init__() 116 | self.classify_loss = nn.BCELoss() 117 | self.sigmoid = nn.Sigmoid() 118 | self.regress_loss = nn.SmoothL1Loss() 119 | 120 | def forward(self, font_output, font_target, weight=None, use_hard_mining=False): 121 | font_output = self.sigmoid(font_output) 122 | font_loss = F.binary_cross_entropy(font_output, font_target, weight) 123 | 124 | # hard_mining 125 | if use_hard_mining: 126 | font_output = font_output.view(-1) 127 | font_target = font_target.view(-1) 128 | pos_index = font_target > 0.5 129 | neg_index = font_target == 0 130 | 131 | # pos 132 | pos_output = font_output[pos_index] 133 | pos_target = font_target[pos_index] 134 | num_hard_pos = max(len(pos_output)/4, min(5, len(pos_output))) 135 | if len(pos_output) > 5: 136 | pos_output, pos_target = hard_mining(pos_output, pos_target, num_hard_pos, largest=False) 137 | pos_loss = self.classify_loss(pos_output, pos_target) * 0.5 138 | 139 | 140 | # neg 141 | num_hard_neg = len(pos_output) * 2 142 | neg_output = font_output[neg_index] 143 | neg_target = font_target[neg_index] 144 | neg_output, neg_target = hard_mining(neg_output, neg_target, num_hard_neg, largest=True) 145 | neg_loss = self.classify_loss(neg_output, neg_target) * 0.5 146 | 147 | font_loss += pos_loss + neg_loss 148 | 149 | else: 150 | pos_loss, neg_loss = font_loss, font_loss 151 | return [font_loss, pos_loss, neg_loss] 152 | 153 | def _forward(self, font_output, font_target, weight, bbox_output=None, bbox_label=None, seg_output=None, seg_labels=None): 154 | font_output = self.sigmoid(font_output) 155 | font_loss = F.binary_cross_entropy(font_output, font_target, weight) 156 | 157 | acc = [] 158 | if bbox_output is not None: 159 | # bbox_loss = 0 160 | bbox_output = bbox_output.view((-1, 4)) 161 | bbox_label = bbox_label.view((-1, 4)) 162 | pos_index = bbox_label[:,-1] >= 0.5 163 | pos_index = pos_index.unsqueeze(1).expand(pos_index.size(0), 4) 164 | neg_index = bbox_label[:,-1] <= -0.5 165 | neg_index = neg_index.unsqueeze(1).expand(neg_index.size(0), 4) 166 | 167 | # 正例 168 | pos_label = bbox_label[pos_index].view((-1,4)) 169 | pos_output = bbox_output[pos_index].view((-1,4)) 170 | lx,ly,ld,lc = pos_label[:,0],pos_label[:,1],pos_label[:,2],pos_label[:,3] 171 | ox,oy,od,oc = pos_output[:,0],pos_output[:,1],pos_output[:,2],pos_output[:,3] 172 | regress_loss = [ 173 | self.regress_loss(ox, lx), 174 | self.regress_loss(oy, ly), 175 | self.regress_loss(od, ld), 176 | ] 177 | pc = self.sigmoid(oc) 178 | acc.append((pc>=0.5).data.cpu().numpy().astype(np.float32).sum()) 179 | acc.append(len(pc)) 180 | # print pc.size(), lc.size() 181 | classify_loss = self.classify_loss(pc, lc) * 0.5 182 | 183 | # 负例 184 | neg_label = bbox_label[neg_index].view((-1,4)) 185 | neg_output = bbox_output[neg_index].view((-1,4)) 186 | lc = neg_label[:, 3] 187 | oc = neg_output[:, 3] 188 | pc = self.sigmoid(oc) 189 | acc.append((pc<=0.5).data.cpu().numpy().astype(np.float32).sum()) 190 | acc.append(len(pc)) 191 | # print pc.size(), lc.size() 192 | classify_loss += self.classify_loss(pc, lc+1) * 0.5 193 | 194 | # seg_loss 195 | seg_output = seg_output.view(-1) 196 | seg_labels = seg_labels.view(-1) 197 | pos_index = seg_labels > 0.5 198 | neg_index = seg_labels < 0.5 199 | seg_loss = 0.5 * self.classify_loss(seg_output[pos_index], seg_labels[pos_index]) + \ 200 | 0.5 * self.classify_loss(seg_output[neg_index], seg_labels[neg_index]) 201 | seg_tpr = (seg_output[pos_index] > 0.5).data.cpu().numpy().astype(np.float32).sum() / len(seg_labels[pos_index]) 202 | seg_tnr = (seg_output[neg_index] < 0.5).data.cpu().numpy().astype(np.float32).sum() / len(seg_labels[neg_index]) 203 | # print seg_output[neg_index] 204 | # print seg_labels[neg_index] 205 | 206 | 207 | 208 | 209 | else: 210 | return font_loss 211 | 212 | if args.model == 'resnet': 213 | loss = font_loss + classify_loss + seg_loss 214 | else: 215 | loss = font_loss + classify_loss + seg_loss 216 | for reg in regress_loss: 217 | loss += reg 218 | # if args.model == 'resnet': 219 | # loss = seg_loss 220 | 221 | return [loss, font_loss, seg_loss, classify_loss] + regress_loss + acc + [seg_tpr, seg_tnr] 222 | 223 | font_num = font_target.sum(0).data.cpu().numpy() 224 | font_loss = 0 225 | for di in range(font_num.shape[0]): 226 | if font_num[di] > 0: 227 | font_output_i = font_output[:,di] 228 | font_target_i = font_target[:,di] 229 | pos_font_index = font_target_i > 0.5 230 | font_loss += 0.5 * self.classify_loss(font_output_i[pos_font_index], font_target_i[pos_font_index]) 231 | neg_font_index = font_target_i < 0.5 232 | if len(font_target_i[neg_font_index]) > 0: 233 | font_loss += 0.5 * self.classify_loss(font_output_i[neg_font_index], font_target_i[neg_font_index]) 234 | font_loss = font_loss / (font_num>0).sum() 235 | 236 | return font_loss 237 | # ''' 238 | 239 | def hard_mining(neg_output, neg_labels, num_hard, largest=True): 240 | num_hard = min(max(num_hard, 10), len(neg_output)) 241 | _, idcs = torch.topk(neg_output, min(num_hard, len(neg_output)), largest=largest) 242 | neg_output = torch.index_select(neg_output, 0, idcs) 243 | neg_labels = torch.index_select(neg_labels, 0, idcs) 244 | return neg_output, neg_labels 245 | 246 | def save_model(save_dir, phase, name, epoch, f1score, model): 247 | if not os.path.exists(save_dir): 248 | os.mkdir(save_dir) 249 | save_dir = os.path.join(save_dir, args.model) 250 | if not os.path.exists(save_dir): 251 | os.mkdir(save_dir) 252 | save_dir = os.path.join(save_dir, phase) 253 | if not os.path.exists(save_dir): 254 | os.mkdir(save_dir) 255 | state_dict = model.state_dict() 256 | for key in state_dict.keys(): 257 | state_dict[key] = state_dict[key].cpu() 258 | state_dict_all = { 259 | 'state_dict': state_dict, 260 | 'epoch': epoch, 261 | 'f1score': f1score, 262 | } 263 | torch.save( state_dict_all , os.path.join(save_dir, '{:s}.ckpt'.format(name))) 264 | if 'best' in name and f1score > 0.3: 265 | torch.save( state_dict_all , os.path.join(save_dir, '{:s}_{:s}.ckpt'.format(name, str(epoch)))) 266 | 267 | def mkdir(path): 268 | if not os.path.exists(path): 269 | os.mkdir(path) 270 | 271 | def test(epoch, model, train_loader, phase='test'): 272 | print '\ntest {:s}_files, epoch: {:d}'.format(phase, epoch) 273 | mkdir('../../data/result') 274 | model.eval() 275 | f1score_list = [] 276 | recall_list = [] 277 | precision_list = [] 278 | word_index_dict = json.load(open(args.word_index_json)) 279 | index_word_dict = { v:k for k,v in word_index_dict.items() } 280 | result_file = open('../../data/result/{:d}_{:s}_result.csv'.format(epoch, phase), 'w') 281 | result_file.write('name,content\n') 282 | name_f1score_dict = dict() 283 | 284 | # 保存densenet生成的feature 285 | feat_dir = args.data_dir.replace('dataset', 'feats') 286 | mkdir(feat_dir) 287 | feat_dir = os.path.join(feat_dir, phase) 288 | print feat_dir 289 | mkdir(feat_dir) 290 | 291 | names = [] 292 | if phase != 'test': 293 | gt_file = open('../../data/result/{:d}_{:s}_gt.csv'.format(epoch, phase), 'w') 294 | gt_file.write('name,content\n') 295 | analysis_file = open('../../data/result/{:s}_{:s}_gt.csv'.format('analysis', phase), 'w') 296 | os.system('rm -r ../../data/analysis/{:s}'.format(phase)) 297 | labels_all = [] 298 | probs_all = [] 299 | for i,data in enumerate(tqdm(train_loader)): 300 | name = data[0][0].split('/')[-1].split('.seg')[0] 301 | names.append(name) 302 | images, labels = [Variable(x.cuda(async=True)) for x in data[1:3]] 303 | if len(images.size()) == 5: 304 | images = images[0] 305 | 306 | probs, feats = model(images, 'test') 307 | probs_all.append(probs.data.cpu().numpy().max(2).max(1).max(0)) 308 | 309 | preds = probs.data.cpu().numpy() > 0.5 # (-1, 8, 1824) 310 | 311 | # result_file.write(name+',') 312 | result = u'' 313 | last_set = set() 314 | all_set = set() 315 | 316 | if args.feat: 317 | # 保存所有的feat 318 | feats = feats.data.cpu().numpy() 319 | if i == 0: 320 | print feats.shape 321 | np.save(os.path.join(feat_dir, name.replace('.png','.npy')), feats) 322 | if len(feats) > 1: # feats: [-1, 1024, 1, 8] 323 | # 多个patch 324 | new_feats = [] 325 | for i,feat in enumerate(feats): 326 | if i == 0: 327 | # 第一个patch,保存前6个 328 | new_feats.append(feat[:,:,:6]) 329 | elif i == len(feats) - 1: 330 | # 最后一个patch,保存后6个 331 | new_feats.append(feat[:,:,2:]) 332 | else: 333 | # 保存中间4个 334 | new_feats.append(feat[:,:,2:6]) 335 | feats = np.concatenate(new_feats, 2) 336 | 337 | # 这种方法用于检测不同区域的同一个字,当同一个字同一个区域出现时,可能检测不到多次 338 | preds = preds.max(1) # 沿着竖直方向pooling 339 | # if len(preds) > 1: 340 | # print name 341 | for patch_i, patch_pred in enumerate(preds): 342 | for part_i, part_pred in enumerate(patch_pred): 343 | new_set = set() 344 | for idx,p in enumerate(part_pred): 345 | if p: 346 | # 出现了这个字 347 | w = index_word_dict[idx] 348 | new_set.add(w) 349 | if w not in all_set: 350 | # 从没见过的字 351 | all_set.add(w) 352 | result += w 353 | elif w not in last_set: 354 | # 以前出现过 355 | if patch_i == 0: 356 | # 第一个patch # 上一个部分没有这个字 357 | result += w 358 | elif part_i >= preds.shape[1]/2 : 359 | # 后续patch的后一半,不写 # 上一个部分没有这个字 360 | result += w 361 | last_set = new_set 362 | # if len(result) > len(set(result)): 363 | # print name 364 | 365 | 366 | 367 | 368 | ''' 369 | for idx,p in enumerate(preds.reshape(-1)): 370 | if p: 371 | # result_file.write(index_word_dict[idx]) 372 | result = result + index_word_dict[idx] 373 | ''' 374 | 375 | result = result.replace(u'"', u'') 376 | if u',' in result: 377 | result = '"' + result + '"' 378 | if len(result) == 0: 379 | global_prob = probs.data.cpu().numpy().max(0).max(0).max(0) 380 | max_index = global_prob.argmax() 381 | result = index_word_dict[max_index] 382 | print name 383 | 384 | result_file.write(name+','+result+'\n') 385 | # result_file.write('\n') 386 | 387 | if phase == 'test': 388 | continue 389 | labels = labels.data.cpu().numpy() 390 | gt_file.write(name+',') 391 | gt = u'' 392 | for idx,l in enumerate(labels.reshape(-1)): 393 | if l: 394 | gt = gt + index_word_dict[idx] 395 | gt_file.write(index_word_dict[idx]) 396 | gt_file.write('\n') 397 | 398 | 399 | labels_all.append(labels[0]) 400 | # 全局pooling 401 | preds = np.array([preds.max(1).max(0)]) 402 | # print preds.shape 403 | for pred, label in zip(preds, labels): 404 | tp = (pred + label == 2).sum() 405 | tn = (pred + label == 0).sum() 406 | fp = (pred - label == 1).sum() 407 | fn = (pred - label ==-1).sum() 408 | precision = 1.0 * tp / max(tp + fp , 10e-20) 409 | recall = 1.0 * tp / max(tp + fn , 10e-20) 410 | f1score = 2. * precision * recall / max(precision + recall , 10e-20) 411 | precision_list.append(precision) 412 | recall_list.append(recall) 413 | f1score_list.append(f1score) 414 | name_f1score_dict[name] = f1score 415 | 416 | # 分析不好的结果 417 | if phase == 'train_val': 418 | th = 0.8 419 | elif phase == 'train': 420 | th = 0.95 421 | else: 422 | th = 0.6 423 | if f1score < th: 424 | save_dir = '../../data/analysis' 425 | if not os.path.exists(save_dir): 426 | os.mkdir(save_dir) 427 | save_dir = os.path.join(save_dir, phase) 428 | if not os.path.exists(save_dir): 429 | os.mkdir(save_dir) 430 | os.system('cp ../../data/dataset/train/{:s} {:s}/{:d}_{:s}'.format(name, save_dir, 100000+i, name)) 431 | analysis_file.write(name+'\t\t') 432 | gt = set(gt) 433 | result = set(result.strip('"')) 434 | analysis_file.write(''.join(sorted(gt - result))+'\t\t') 435 | analysis_file.write(''.join(sorted(result - gt))+'\t\n') 436 | 437 | 438 | 439 | if phase != 'test': 440 | # f1score = np.mean(f1score_list) 441 | # print 'f1score all', f1score 442 | # f1score_list = sorted(f1score_list)[500:] 443 | f1score = np.mean(f1score_list) 444 | recall = np.mean(recall_list) 445 | precision = np.mean(precision_list) 446 | print 'f1score', f1score 447 | print 'recall', recall 448 | print 'precision', precision 449 | gt_file.write('f1score,' + str(f1score)) 450 | gt_file.write('recall,' + str(recall)) 451 | gt_file.write('precision,' + str(precision)) 452 | gt_file.close() 453 | result_file.write('f1score,' + str(f1score)) 454 | result_file.write('recall,' + str(recall)) 455 | result_file.write('precision,' + str(precision)) 456 | with open('../../data/result/name_f1score_dict.json','w') as f: 457 | f.write(json.dumps(name_f1score_dict, indent=4)) 458 | np.save('../../data/result/{:d}_{:s}_labels.npy'.format(epoch, phase), labels_all) 459 | result_file.close() 460 | os.system('cp ../../data/result/{:d}_{:s}_result.csv ../../data/result/{:s}_result.csv'.format(epoch, phase, phase)) 461 | 462 | np.save('../../data/result/{:d}_{:s}_probs.npy'.format(epoch, phase), probs_all) 463 | with open('../../data/result/{:s}_names.json'.format(phase), 'w') as f: 464 | f.write(json.dumps(names, indent=4)) 465 | 466 | def get_weight(labels): 467 | labels = labels.data.cpu().numpy() 468 | weights = np.zeros_like(labels) 469 | # weight_false = 1.0 / ((labels<0.5).sum() + 10e-20) 470 | # weight_true = 1.0 / ((labels>0.5).sum() + 10e-20) 471 | weight_false = 1.0 / ((labels<0.5).sum(0) + 10e-20) 472 | label_true = (labels>0.5).sum(0) 473 | for i in range(labels.shape[1]): 474 | label_i = labels[:,i] 475 | weight_i = np.ones(labels.shape[0]) * weight_false[i] 476 | # weight_i = np.ones(labels.shape[0]) * weight_false 477 | if label_true[i] > 0: 478 | weight_i[label_i>0.5] = 1.0 / label_true[i] 479 | weights[:,i] = weight_i 480 | weights *= np.ones_like(labels).sum() / (weights.sum() + 10e-20) 481 | weights[labels<-0.5] = 0 482 | return weights 483 | 484 | def train_eval(epoch, model, train_loader, loss, optimizer, best_f1score=0, phase='train'): 485 | print '\n',epoch, phase 486 | if 'train' in phase: 487 | model.train() 488 | else: 489 | model.eval() 490 | loss_list = [] 491 | f1score_list = [] 492 | recall_list = [] 493 | precision_list = [] 494 | for i,data in enumerate(tqdm(train_loader)): 495 | images, labels = [Variable(x.cuda(async=True)) for x in data[1:3]] 496 | weights = torch.from_numpy(get_weight(labels)).cuda(async=True) 497 | probs = model(images) 498 | 499 | # 训练阶段 500 | if 'train' in phase: 501 | loss_output = loss(probs, labels, weights, args.hard_mining) 502 | try: 503 | optimizer.zero_grad() 504 | loss_output[0].backward() 505 | optimizer.step() 506 | loss_list.append([x.data.cpu().numpy()[0] for x in loss_output]) 507 | except: 508 | # pass 509 | traceback.print_exc() 510 | 511 | 512 | # 计算 f1score, recall, precision 513 | ''' 514 | x = probs.data.cpu().numpy() 515 | l = labels.data.cpu().numpy() 516 | print (get_weight(labels) * l).sum() 517 | l = 1 - l 518 | print (get_weight(labels) * l).sum() 519 | print x.max() 520 | print x.min() 521 | print x.mean() 522 | print 523 | # ''' 524 | preds = probs.data.cpu().numpy() > 0 525 | labels = labels.data.cpu().numpy() 526 | for pred, label in zip(preds, labels): 527 | pred[label<0] = -1 528 | if label.sum() < 0.5: 529 | continue 530 | tp = (pred + label == 2).sum() 531 | tn = (pred + label == 0).sum() 532 | fp = (pred - label == 1).sum() 533 | fn = (pred - label ==-1).sum() 534 | precision = 1.0 * tp / (tp + fp + 10e-20) 535 | recall = 1.0 * tp / (tp + fn + 10e-20) 536 | f1score = 2. * precision * recall / (precision + recall + 10e-20) 537 | precision_list.append(precision) 538 | recall_list.append(recall) 539 | f1score_list.append(f1score) 540 | 541 | 542 | # 保存中间结果到 data/middle_result,用于分析 543 | if i == 0: 544 | images = images.data.cpu().numpy() * 128 + 128 545 | if phase == 'pretrain': 546 | bbox_labels = bbox_labels.data.cpu().numpy() 547 | seg_labels = seg_labels.data.cpu().numpy() 548 | seg_output = seg_output.data.cpu().numpy() 549 | for ii in range(len(images)): 550 | middle_dir = os.path.join(args.save_dir, 'middle_result') 551 | if not os.path.exists(middle_dir): 552 | os.mkdir(middle_dir) 553 | middle_dir = os.path.join(middle_dir, phase) 554 | if not os.path.exists(middle_dir): 555 | os.mkdir(middle_dir) 556 | Image.fromarray(images[ii].astype(np.uint8).transpose(1,2,0)).save(os.path.join(middle_dir, str(ii)+'.image.png')) 557 | if phase == 'pretrain': 558 | segi = seg_labels[ii] 559 | _segi = np.array([segi, segi, segi]) * 255 560 | segi = np.zeros([3, _segi.shape[1]*2, _segi.shape[2]*2]) 561 | for si in range(segi.shape[1]): 562 | for sj in range(segi.shape[2]): 563 | segi[:,si,sj] = _segi[:,si/2,sj/2] 564 | Image.fromarray(segi.transpose(1,2,0).astype(np.uint8)).save(os.path.join(middle_dir, str(ii)+'.seg.png')) 565 | segi = seg_output[ii] 566 | _segi = np.array([segi, segi, segi]) * 255 567 | segi = np.zeros([3, _segi.shape[1]*2, _segi.shape[2]*2]) 568 | for si in range(segi.shape[1]): 569 | for sj in range(segi.shape[2]): 570 | segi[:,si,sj] = _segi[:,si/2,sj/2] 571 | Image.fromarray(segi.transpose(1,2,0).astype(np.uint8)).save(os.path.join(middle_dir, str(ii)+'.seg.out.png')) 572 | 573 | f1score = np.mean(f1score_list) 574 | print 'f1score', f1score 575 | print 'recall', np.mean(recall_list) 576 | print 'precision', np.mean(precision_list) 577 | if 'train' in phase: 578 | loss_mean = np.array(loss_list).mean(0) 579 | print 'loss: {:3.4f} pos loss: {:3.4f} neg loss: {:3.4f}'.format(loss_mean[0], loss_mean[1], loss_mean[2]) 580 | 581 | # 保存模型 582 | if ('eval' in phase or 'pretrain' in phase)and best_f1score < 2: 583 | if args.small: 584 | save_dir = os.path.join(args.save_dir, 'models-small') 585 | else: 586 | save_dir = os.path.join(args.save_dir, 'models') 587 | if not os.path.exists(save_dir): 588 | os.mkdir(save_dir) 589 | if epoch % 5 == 0: 590 | save_model(save_dir, phase, str(epoch), epoch, f1score, model) 591 | if f1score > best_f1score: 592 | save_model(save_dir, phase, 'best_f1score', epoch, f1score, model) 593 | if args.model == 'resnet': 594 | tpnr = loss[11] + loss[12] 595 | # 这里用 best_f1score 也当tpnr好了,懒得改 596 | if tpnr > best_f1score: 597 | best_f1score = tpnr 598 | save_model(save_dir, phase, 'best_tpnr', epoch, f1score, model) 599 | print 'best tpnr', best_f1score 600 | else: 601 | best_f1score = max(best_f1score, f1score) 602 | if best_f1score < 1: 603 | print '\n\t{:s}\tbest f1score {:3.4f}\n'.format(phase, best_f1score) 604 | return best_f1score 605 | 606 | 607 | def main(): 608 | word_index_dict = json.load(open(args.word_index_json)) 609 | num_classes = len(word_index_dict) 610 | image_label_dict = json.load(open(args.image_label_json)) 611 | 612 | cudnn.benchmark = True 613 | if args.model == 'densenet': 614 | # 两千多种字符,multi-label分类 615 | model = DenseNet121(num_classes).cuda() 616 | elif args.model == 'resnet': 617 | # resnet主要用于文字区域的segmentation以及object detection操作 618 | model = resnet.ResNet(num_classes=num_classes, args=args).cuda() 619 | else: 620 | return 621 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) 622 | # model = torch.nn.DataParallel(model).cuda() 623 | loss = Loss().cuda() 624 | 625 | if args.resume: 626 | state_dict = torch.load(args.resume) 627 | model.load_state_dict(state_dict['state_dict']) 628 | best_f1score = state_dict['f1score'] 629 | start_epoch = state_dict['epoch'] + 1 630 | else: 631 | best_f1score = 0 632 | if args.model == 'resnet': 633 | start_epoch = 100 634 | else: 635 | start_epoch = 1 636 | args.epoch = start_epoch 637 | print 'best_f1score', best_f1score 638 | 639 | 640 | # 划分数据集 641 | test_filelist = sorted(glob(os.path.join(args.data_dir,'test','*'))) 642 | trainval_filelist = sorted(glob(os.path.join(args.data_dir,'train','*'))) 643 | 644 | # 两种输入size训练 645 | # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入 646 | # train_filelist2: 长宽比大于8:1的图片,经过padding,crop后变成 64*1024的输入 647 | train_filelist1, train_filelist2 = [],[] 648 | 649 | # 黑名单,这些图片的label是有问题的 650 | black_list = set(json.load(open(args.black_json))['black_list']) 651 | image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json)) 652 | for f in trainval_filelist: 653 | image = f.split('/')[-1] 654 | if image in black_list: 655 | continue 656 | r = image_hw_ratio_dict[image] 657 | if r == 0: 658 | train_filelist1.append(f) 659 | else: 660 | train_filelist2.append(f) 661 | train_val_filelist = train_filelist1 + train_filelist2 662 | val_filelist = train_filelist1[-2048:] 663 | train_filelist1 = train_filelist1[:-2048] 664 | 665 | train_filelist2 = train_filelist2 666 | image_size = [512, 64] 667 | 668 | if args.phase in ['test', 'val', 'train_val']: 669 | # 测试输出文字检测结果 670 | test_dataset = dataloader.DataSet( 671 | test_filelist, 672 | image_label_dict, 673 | num_classes, 674 | # transform=train_transform, 675 | args=args, 676 | image_size=image_size, 677 | phase='test') 678 | test_loader = DataLoader( 679 | dataset=test_dataset, 680 | batch_size=1, 681 | shuffle=False, 682 | num_workers=8, 683 | pin_memory=True) 684 | train_filelist = train_filelist1[-2048:] 685 | train_dataset = dataloader.DataSet( 686 | train_filelist, 687 | image_label_dict, 688 | num_classes, 689 | image_size=image_size, 690 | args=args, 691 | phase='test') 692 | train_loader = DataLoader( 693 | dataset=train_dataset, 694 | batch_size=1, 695 | shuffle=False, 696 | num_workers=8, 697 | pin_memory=True) 698 | 699 | val_dataset = dataloader.DataSet( 700 | val_filelist, 701 | image_label_dict, 702 | num_classes, 703 | image_size=image_size, 704 | args=args, 705 | phase='test') 706 | val_loader = DataLoader( 707 | dataset=val_dataset, 708 | batch_size=1, 709 | shuffle=False, 710 | num_workers=8, 711 | pin_memory=True) 712 | 713 | train_val_dataset = dataloader.DataSet( 714 | train_val_filelist, 715 | image_label_dict, 716 | num_classes, 717 | image_size=image_size, 718 | args=args, 719 | phase='test') 720 | train_val_loader= DataLoader( 721 | dataset=train_val_dataset, 722 | batch_size=1, 723 | shuffle=False, 724 | num_workers=8, 725 | pin_memory=True) 726 | 727 | if args.phase == 'test': 728 | test(start_epoch - 1, model, val_loader, 'val') 729 | test(start_epoch - 1, model, test_loader, 'test') 730 | # test(start_epoch - 1, model, train_val_loader, 'train_val') 731 | elif args.phase == 'val': 732 | test(start_epoch - 1, model, train_loader, 'train') 733 | test(start_epoch - 1, model, val_loader, 'val') 734 | elif args.phase == 'train_val': 735 | test(start_epoch - 1, model, train_val_loader, 'train_val') 736 | return 737 | 738 | elif args.phase == 'train': 739 | 740 | train_dataset1 = dataloader.DataSet( 741 | train_filelist1, 742 | image_label_dict, 743 | num_classes, 744 | image_size=image_size, 745 | args=args, 746 | phase='train') 747 | train_loader1 = DataLoader( 748 | dataset=train_dataset1, 749 | batch_size=args.batch_size, 750 | shuffle=True, 751 | num_workers=8, 752 | pin_memory=True) 753 | train_dataset2 = dataloader.DataSet( 754 | train_filelist2, 755 | image_label_dict, 756 | num_classes, 757 | image_size=(1024,64), 758 | args=args, 759 | phase='train') 760 | train_loader2 = DataLoader( 761 | dataset=train_dataset2, 762 | batch_size=args.batch_size / 2, 763 | shuffle=True, 764 | num_workers=8, 765 | pin_memory=True) 766 | val_dataset = dataloader.DataSet( 767 | val_filelist, 768 | image_label_dict, 769 | num_classes, 770 | image_size=image_size, 771 | args=args, 772 | phase='val') 773 | val_loader = DataLoader( 774 | dataset=val_dataset, 775 | batch_size=min(8,args.batch_size), 776 | shuffle=False, 777 | num_workers=8, 778 | pin_memory=True) 779 | filelist = glob(os.path.join(args.bg_dir,'*')) 780 | pretrain_dataset1 = dataloader.DataSet( 781 | filelist, 782 | image_label_dict, 783 | num_classes, 784 | image_size=args.image_size, 785 | word_index_dict = word_index_dict, 786 | args=args, 787 | font_range=[8,32], 788 | margin=10, 789 | rotate_range=[-10., 10. ], 790 | phase='pretrain') 791 | pretrain_loader1 = DataLoader( 792 | dataset=pretrain_dataset1, 793 | batch_size=args.batch_size, 794 | shuffle=True, 795 | num_workers=8, 796 | pin_memory=True) 797 | pretrain_dataset2 = dataloader.DataSet( 798 | filelist, 799 | image_label_dict, 800 | num_classes, 801 | image_size=(256, 128), 802 | word_index_dict = word_index_dict, 803 | args=args, 804 | font_range=[24,64], 805 | margin=20, 806 | rotate_range=[-20., 20.], 807 | phase='pretrain') 808 | pretrain_loader2 = DataLoader( 809 | dataset=pretrain_dataset2, 810 | batch_size=args.batch_size, 811 | shuffle=True, 812 | num_workers=8, 813 | pin_memory=True) 814 | 815 | best_f1score = 0 816 | # eval_mode = 'pretrain-2' 817 | eval_mode = 'eval' 818 | for epoch in range(start_epoch, args.epochs): 819 | 820 | args.epoch = epoch 821 | 822 | if eval_mode == 'eval': 823 | if best_f1score > 0.9: 824 | args.lr = 0.0001 825 | if best_f1score > 0.9: 826 | args.hard_mining = 1 827 | 828 | for param_group in optimizer.param_groups: 829 | param_group['lr'] = args.lr 830 | 831 | train_eval(epoch, model, train_loader1, loss, optimizer, 2., 'train-1') 832 | if best_f1score > 0.9: 833 | train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2') 834 | best_f1score = train_eval(epoch, model, val_loader, loss, optimizer, best_f1score, 'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining)) 835 | continue 836 | ''' 837 | 838 | if eval_mode == 'pretrain-2': 839 | args.epoch = 1 840 | best_f1score = train_eval(epoch, model, pretrain_loader2, loss, optimizer, best_f1score, 'pretrain-2') 841 | if best_f1score > 0.8: 842 | eval_mode = 'pretrain-1' 843 | best_f1score = 0 844 | elif eval_mode == 'pretrain-1': 845 | args.epoch = max(100, epoch) 846 | train_eval(epoch, model, pretrain_loader2, loss, optimizer, 2.0 , 'pretrain-2') 847 | best_f1score = train_eval(epoch, model, pretrain_loader1, loss, optimizer, best_f1score, 'pretrain-1') 848 | if best_f1score > 0.5: 849 | eval_mode = 'eval' 850 | best_f1score = 0 851 | else: 852 | train_eval(epoch, model, train_loader1, loss, optimizer, 2., 'train-1') 853 | train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2') 854 | best_f1score = train_eval(epoch, model, val_loader, loss, optimizer, best_f1score, 'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining)) 855 | 856 | ''' 857 | 858 | 859 | 860 | 861 | 862 | 863 | if __name__ == '__main__': 864 | main() 865 | -------------------------------------------------------------------------------- /code/ocr/resnet.py: -------------------------------------------------------------------------------- 1 | # Implementation of https://arxiv.org/pdf/1512.03385.pdf. 2 | # See section 4.2 for model architecture on CIFAR-10. 3 | # Some part of the code was referenced below. 4 | # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 5 | import torch 6 | import torch.nn as nn 7 | import torchvision.datasets as dsets 8 | import torchvision.transforms as transforms 9 | from torch.autograd import Variable 10 | import torch.nn.functional as F 11 | 12 | # 3x3 Convolution 13 | def conv3x3(in_channels, out_channels, stride=1): 14 | return nn.Conv2d(in_channels, out_channels, kernel_size=3, 15 | stride=stride, padding=1, bias=False) 16 | 17 | # Residual Block 18 | class ResidualBlock(nn.Module): 19 | def __init__(self, in_channels, out_channels, stride=1, downsample=None): 20 | super(ResidualBlock, self).__init__() 21 | self.conv1 = conv3x3(in_channels, out_channels, stride) 22 | self.bn1 = nn.BatchNorm2d(out_channels) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv2 = conv3x3(out_channels, out_channels) 25 | self.bn2 = nn.BatchNorm2d(out_channels) 26 | self.downsample = downsample 27 | 28 | def forward(self, x): 29 | residual = x 30 | out = self.conv1(x) 31 | out = self.bn1(out) 32 | out = self.relu(out) 33 | out = self.conv2(out) 34 | out = self.bn2(out) 35 | if self.downsample: 36 | residual = self.downsample(x) 37 | out += residual 38 | out = self.relu(out) 39 | return out 40 | 41 | # ResNet Module 42 | class ResNet(nn.Module): 43 | def __init__(self, block=ResidualBlock, layers=[2,3], num_classes=10, args=None): 44 | super(ResNet, self).__init__() 45 | self.in_channels = 16 46 | self.conv = conv3x3(3, 16) 47 | self.bn = nn.BatchNorm2d(16) 48 | self.relu = nn.ReLU(inplace=True) 49 | self.layer1 = self.make_layer(block, 32, layers[0], 2) 50 | self.layer2 = self.make_layer(block, 64, layers[0], 2) 51 | self.layer3 = self.make_layer(block, 128, layers[0], 2) 52 | self.layer4 = self.make_layer(block, 128, layers[0], 2) 53 | self.layer5 = self.make_layer(block, 128, layers[0], 2) 54 | self.fc = nn.Linear(128, num_classes) 55 | 56 | # detect 57 | self.convt1 = nn.Sequential( 58 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2), 59 | nn.BatchNorm2d(128), 60 | nn.ReLU(inplace=True)) 61 | self.convt2 = nn.Sequential( 62 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2), 63 | nn.BatchNorm2d(128), 64 | nn.ReLU(inplace=True)) 65 | self.convt3 = nn.Sequential( 66 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2), 67 | nn.BatchNorm2d(128), 68 | nn.ReLU(inplace=True)) 69 | self.convt4 = nn.Sequential( 70 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2), 71 | nn.BatchNorm2d(128), 72 | nn.ReLU(inplace=True)) 73 | self.in_channels = 256 74 | self.dec1 = self.make_layer(block, 128, layers[0]) 75 | self.in_channels = 256 76 | self.dec2 = self.make_layer(block, 128, layers[0]) 77 | self.in_channels = 192 78 | self.dec3 = self.make_layer(block, 128, layers[0]) 79 | self.in_channels = 160 80 | # self.dec4 = self.make_layer(block, 1, layers[0]) 81 | self.dec4 = nn.Sequential( 82 | nn.Conv2d(160, 256, kernel_size=3, padding=1), 83 | nn.BatchNorm2d(256), 84 | nn.ReLU(inplace=True), 85 | nn.Conv2d(256, 1, kernel_size=1, bias=True) 86 | ) 87 | self.in_channels = 256 88 | # self.dec2 = self.make_layer(block, 256, layers[0]) 89 | # self.output = conv3x3(256, 4 * len(args.anchors)) 90 | self.bbox = nn.Sequential( 91 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 92 | nn.BatchNorm2d(256), 93 | nn.ReLU(inplace=True), 94 | nn.Conv2d(256, 4 * len(args.anchors), kernel_size=1, bias=True) 95 | ) 96 | self.sigmoid = nn.Sigmoid() 97 | 98 | 99 | def make_layer(self, block, out_channels, blocks, stride=1): 100 | downsample = None 101 | if (stride != 1) or (self.in_channels != out_channels): 102 | downsample = nn.Sequential( 103 | conv3x3(self.in_channels, out_channels, stride=stride), 104 | nn.BatchNorm2d(out_channels)) 105 | layers = [] 106 | layers.append(block(self.in_channels, out_channels, stride, downsample)) 107 | self.in_channels = out_channels 108 | for i in range(1, blocks): 109 | layers.append(block(out_channels, out_channels)) 110 | return nn.Sequential(*layers) 111 | 112 | def forward(self, x, phase='train'): 113 | out = self.conv(x) 114 | # print out.size() 115 | out = self.bn(out) 116 | # print out.size() 117 | out = self.relu(out) 118 | # print out.size() 119 | out1 = self.layer1(out) # 64 120 | # print out1.size() 121 | out2 = self.layer2(out1) # 32 122 | # print out2.size() 123 | out3 = self.layer3(out2) # 16 124 | # print out3.size() 125 | out4 = self.layer4(out3) # 8 126 | # print out4.size() 127 | out5 = self.layer5(out4) # 4 128 | # print out5.size() 129 | 130 | # out = F.adaptive_max_pool2d(out5, output_size=(1,1)).view(out.size(0), -1) # 128 131 | # out = out.view(out.size(0), -1) 132 | 133 | if phase == 'seg': 134 | out = F.adaptive_max_pool2d(out5, output_size=(1,1)).view(out.size(0), -1) # 128 135 | out = self.fc(out) 136 | out = out.view(out.size(0), -1) 137 | else: 138 | out = F.max_pool2d(out5, 2) 139 | out_size = out.size() 140 | # out = out.view(out_size[0],out_size[1],out_size[3]).transpose(1,2).contiguous().view(-1, out_size[1]) 141 | out = out.view(out_size[0],out_size[1],out_size[2] * out_size[3]).transpose(1,2).contiguous().view(-1, out_size[1]) 142 | out = self.fc(out) 143 | out = out.view(out_size[0], out_size[2] * out_size[3], -1).transpose(1,2).contiguous() 144 | out = F.adaptive_max_pool1d(out, output_size=(1)).view(out_size[0], -1) 145 | 146 | # print out.size() 147 | if phase not in ['seg', 'pretrain', 'pretrain2']: 148 | return out 149 | 150 | # detect 151 | cat1 = torch.cat([self.convt1(out5), out4], 1) 152 | # print cat1.size() 153 | dec1 = self.dec1(cat1) 154 | # print dec1.size() 155 | # print out3.size() 156 | cat2 = torch.cat([self.convt2(dec1), out3], 1) 157 | # print cat2.size() 158 | dec2 = self.dec2(cat2) 159 | cat3 = torch.cat([self.convt3(dec2), out2], 1) 160 | dec3 = self.dec3(cat3) 161 | cat4 = torch.cat([self.convt4(dec3), out1], 1) 162 | seg = self.dec4(cat4) 163 | seg = seg.view((seg.size(0), seg.size(2), seg.size(3))) 164 | seg = self.sigmoid(seg) 165 | 166 | bbox = self.bbox(cat2) 167 | # dec2 = self.output(dec2) 168 | # print dec2.size() 169 | size = bbox.size() 170 | bbox = bbox.view((size[0], size[1], -1)).transpose(1,2).contiguous() 171 | bbox = bbox.view((size[0], size[2],size[3],-1, 4)) 172 | 173 | return out, bbox, seg 174 | 175 | # resnet = ResNet(ResidualBlock, [2, 2, 2, 2]) 176 | -------------------------------------------------------------------------------- /code/ocr/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/code/ocr/tools/__init__.py -------------------------------------------------------------------------------- /code/ocr/tools/measures.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import os 3 | import numpy as np 4 | from sklearn import metrics 5 | from PIL import Image 6 | import traceback 7 | 8 | def stati_class_number_true_flase(label, pred): 9 | label = np.array(label) 10 | pred = np.array(pred) 11 | 12 | cls_list = set(label) | set(pred) 13 | d = dict() 14 | for cls in cls_list: 15 | d[cls] = dict() 16 | d[cls]['number'] = np.sum(label==cls) 17 | d[cls]['true'] = np.sum(label[label==cls]==pred[label==cls]) 18 | d[cls]['pred'] = np.sum(pred==cls) 19 | return d 20 | 21 | def stati_class_number_true_flase_multi_label_margin(labels, preds): 22 | 23 | d = dict() 24 | for label, pred in zip(labels, preds): 25 | label = set(label[label>=0]) 26 | for cls in range(len(pred)): 27 | if cls not in d: 28 | d[cls] = dict() 29 | d[cls]['number'] = 0 30 | d[cls]['true'] = 0 31 | d[cls]['pred'] = 0 32 | if cls in label: 33 | d[cls]['number'] += 1 34 | if pred[cls] > 0.5: 35 | d[cls]['true'] += 1 36 | if pred[cls] > 0.5: 37 | d[cls]['pred'] += 1 38 | return d 39 | 40 | def stati_class_number_true_flase_bce(labels, preds): 41 | d = dict() 42 | labels = labels.astype(np.int64).reshape(-1) 43 | preds = preds.reshape(-1) > 0 44 | index = labels >= 0 45 | labels = labels[index] 46 | preds = preds[index] 47 | 48 | preds_num = preds.sum(0) 49 | true_num = (labels+preds==2).sum(0) 50 | for cls in range(2): 51 | d[cls] = dict() 52 | d[cls]['number'] = (labels==cls).sum() 53 | d[cls]['true'] = (labels+preds==2*cls).sum() 54 | d[cls]['pred'] = (labels==cls).sum() 55 | return d 56 | 57 | def measures(d_list): 58 | # 合并每一个预测的结果 59 | d_all = dict() 60 | for d in d_list: 61 | for cls in d.keys(): 62 | if cls not in d_all: 63 | d_all[cls] = dict() 64 | for k in d[cls].keys(): 65 | if k not in d_all[cls]: 66 | d_all[cls][k] = 0 67 | d_all[cls][k] += d[cls][k] 68 | m = dict() 69 | number = sum([d_all[cls]['number'] for cls in d_all.keys()]) 70 | for cls in d_all: 71 | m[cls] = dict() 72 | m[cls]['number'] = d_all[cls]['number'] 73 | m[cls]['true'] = d_all[cls]['true'] 74 | m[cls]['pred'] = d_all[cls]['pred'] 75 | m[cls]['ratio'] = d_all[cls]['number'] / (float(number) + 10e-10) 76 | m[cls]['accuracy'] = d_all[cls]['true'] / (float(d_all[cls]['number']) + 10e-10) 77 | m[cls]['precision'] = d_all[cls]['true'] /(float(d_all[cls]['pred']) + 10e-10) 78 | return m 79 | 80 | def print_measures(m, s = 'measures'): 81 | print s 82 | accuracy = 0 83 | for cls in sorted(m.keys()): 84 | print '\tclass: {:d}\taccuracy:{:.6f}\tprecision:{:.6f}\tratio:{:.6f}\t\tN/T/P:{:d}/{:d}/{:d}\ 85 | '.format(cls, m[cls]['accuracy'],m[cls]['precision'],m[cls]['ratio'],m[cls]['number'],m[cls]['true'],m[cls]['pred']) 86 | accuracy += m[cls]['accuracy'] * m[cls]['ratio'] 87 | print '\tacc:{:.6f}'.format(accuracy) 88 | return accuracy 89 | 90 | def mse(pred_image, image): 91 | pred_image = pred_image.reshape(-1).astype(np.float32) 92 | image = image.reshape(-1).astype(np.float32) 93 | mse_err = metrics.mean_squared_error(pred_image,image) 94 | return mse_err 95 | 96 | def psnr(pred_image, image): 97 | return 10 * np.log10(255*255/mse(pred_image,image)) 98 | 99 | 100 | def psnr_pred(stain_vis=20, end= 10000): 101 | clean_dir = '../../data/AI/testB/' 102 | psnr_list = [] 103 | f = open('../../data/result.csv','w') 104 | for i,clean in enumerate(os.listdir(clean_dir)): 105 | clean = os.path.join(clean_dir, clean) 106 | clean_file = clean 107 | pred = clean.replace('.jpg','.png').replace('data','data/test_clean') 108 | stain = clean.replace('trainB','trainA').replace('testB','testA').replace('.jpg','_.jpg') 109 | 110 | try: 111 | pred = np.array(Image.open(pred).resize((250,250))).astype(np.float32) 112 | clean = np.array(Image.open(clean).resize((250,250))).astype(np.float32) 113 | stain = np.array(Image.open(stain).resize((250,250))).astype(np.float32) 114 | 115 | # diff = np.abs(stain - pred) 116 | # vis = 20 117 | # pred[diffgray_vis] = stain[stain>gray_vis] 121 | 122 | if end < 1000: 123 | diff = np.abs(clean - stain) 124 | # stain[diff>stain_vis] = pred[diff>stain_vis] 125 | stain[diff>stain_vis] = clean[diff>stain_vis] 126 | 127 | psnr_pred = psnr(clean, pred) 128 | psnr_stain = psnr(clean, stain) 129 | psnr_list.append([psnr_stain, psnr_pred]) 130 | except: 131 | continue 132 | if i>end: 133 | break 134 | print i, min(end, 1000) 135 | 136 | f.write(clean_file.split('/')[-1].split('.')[0]) 137 | f.write(',') 138 | f.write(str(psnr_stain)) 139 | f.write(',') 140 | f.write(str(psnr_pred)) 141 | f.write(',') 142 | f.write(str(psnr_pred/psnr_stain - 1)) 143 | f.write('\n') 144 | # print '预测',np.mean(psnr_list) 145 | psnr_list = np.array(psnr_list) 146 | psnr_mean = ((psnr_list[:,1] - psnr_list[:,0]) / psnr_list[:,0]).mean() 147 | if end > 1000: 148 | print '网纹图PSNR', psnr_list[:,0].mean() 149 | print '预测图PSNR', psnr_list[:,1].mean() 150 | print '增益率', psnr_mean 151 | f.write(str(psnr_mean)) 152 | f.close() 153 | return psnr_list[:,0].mean() 154 | 155 | def main(): 156 | pmax = [0.,0.] 157 | for vis in range(1, 30): 158 | p = psnr_pred(vis, 10) 159 | print vis, p 160 | if p > pmax[1]: 161 | pmax = [vis, p] 162 | print '...' 163 | # print 256,psnr_pred(256) 164 | print pmax 165 | # print 10 * np.log10(255*255/metrics.mean_squared_error([3],[9])) 166 | 167 | 168 | if __name__ == '__main__': 169 | psnr_pred(4000) 170 | # main() 171 | # for v in range(1,10): 172 | # print v, 10 * np.log10(255*255/v/v) 173 | -------------------------------------------------------------------------------- /code/ocr/tools/parse.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import argparse 4 | 5 | parser = argparse.ArgumentParser(description='medical caption GAN') 6 | 7 | parser.add_argument( 8 | '--model', 9 | '-m', 10 | type=str, 11 | default='densenet', 12 | help='model' 13 | ) 14 | parser.add_argument( 15 | '--data-dir', 16 | '-d', 17 | type=str, 18 | default='../../data/dataset/', 19 | help='data directory' 20 | ) 21 | parser.add_argument( 22 | '--bg-dir', 23 | type=str, 24 | default='../../data/images', 25 | help='back groud images directory' 26 | ) 27 | parser.add_argument( 28 | '--hard-mining', 29 | type=int, 30 | default=0, 31 | help='use hard mining' 32 | ) 33 | parser.add_argument('--phase', 34 | default='train', 35 | type=str, 36 | metavar='S', 37 | help='pretrain/train/test phase') 38 | parser.add_argument( 39 | '--batch-size', 40 | '-b', 41 | metavar='BATCH SIZE', 42 | type=int, 43 | default=16, 44 | help='batch size' 45 | ) 46 | parser.add_argument('--save-dir', 47 | default='../../data', 48 | type=str, 49 | metavar='S', 50 | help='save dir') 51 | parser.add_argument('--word-index-json', 52 | default='../../files/alphabet_index_dict.json', 53 | type=str, 54 | metavar='S', 55 | help='save dir') 56 | parser.add_argument('--black-json', 57 | default='../../files/black.json', 58 | type=str, 59 | metavar='S', 60 | help='black_list json') 61 | parser.add_argument('--image-hw-ratio-json', 62 | default='../../files/image_hw_ratio_dict.json', 63 | type=str, 64 | metavar='S', 65 | help='image h:w ratio dict') 66 | parser.add_argument('--word-count-json', 67 | default='../../files/alphabet_count_dict.json', 68 | type=str, 69 | metavar='S', 70 | help='word count file') 71 | parser.add_argument('--image-label-json', 72 | default='../../files/train_alphabet.json', 73 | type=str, 74 | metavar='S', 75 | help='image label json') 76 | parser.add_argument('--resume', 77 | default='', 78 | type=str, 79 | metavar='S', 80 | help='start from checkpoints') 81 | parser.add_argument('--no-aug', 82 | default=0, 83 | type=int, 84 | metavar='S', 85 | help='no augmentation') 86 | parser.add_argument('--small', 87 | default=1, 88 | type=int, 89 | metavar='S', 90 | help='small fonts') 91 | parser.add_argument('--difficult', 92 | default=0, 93 | type=int, 94 | metavar='S', 95 | help='只计算比较难的图片') 96 | parser.add_argument('--hist', 97 | default=0, 98 | type=int, 99 | metavar='S', 100 | help='采用直方图均衡化') 101 | parser.add_argument('--feat', 102 | default=0, 103 | type=int, 104 | metavar='S', 105 | help='生成LSTM的feature') 106 | 107 | ##### 108 | parser.add_argument('-j', 109 | '--workers', 110 | default=8, 111 | type=int, 112 | metavar='N', 113 | help='number of data loading workers (default: 32)') 114 | parser.add_argument('--lr', 115 | '--learning-rate', 116 | default=0.001, 117 | type=float, 118 | metavar='LR', 119 | help='initial learning rate') 120 | parser.add_argument('--epochs', 121 | default=10000, 122 | type=int, 123 | metavar='N', 124 | help='number of total epochs to run') 125 | parser.add_argument('--save-freq', 126 | default='5', 127 | type=int, 128 | metavar='S', 129 | help='save frequency') 130 | parser.add_argument('--save-pred-freq', 131 | default='10', 132 | type=int, 133 | metavar='S', 134 | help='save pred clean frequency') 135 | parser.add_argument('--val-freq', 136 | default='5', 137 | type=int, 138 | metavar='S', 139 | help='val frequency') 140 | parser.add_argument('--debug', 141 | default=0, 142 | type=int, 143 | metavar='S', 144 | help='debug') 145 | parser.add_argument('--input-filter', 146 | default=7, 147 | type=int, 148 | metavar='S', 149 | help='val frequency') 150 | parser.add_argument('--use-gan', 151 | default=0, 152 | type=int, 153 | metavar='S', 154 | help='use GAN') 155 | parser.add_argument('--write-pred', 156 | default=0, 157 | type=int, 158 | metavar='S', 159 | help='writ predictions') 160 | parser.add_argument( 161 | '--result-file', 162 | '-r', 163 | type=str, 164 | default='../../data/result/test_result.csv', 165 | help='result file' 166 | ) 167 | parser.add_argument( 168 | '--output-file', 169 | '-o', 170 | type=str, 171 | default='../../data/result/test.csv', 172 | help='output file' 173 | ) 174 | args = parser.parse_args() 175 | -------------------------------------------------------------------------------- /code/ocr/tools/plot.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | def plot_multi_graph(image_list, name_list, save_path=None, show=False): 6 | graph_place = int(np.sqrt(len(name_list) - 1)) + 1 7 | for i, (image, name) in enumerate(zip(image_list, name_list)): 8 | ax1 = plt.subplot(graph_place,graph_place,i+1) 9 | ax1.set_title(name) 10 | # plt.imshow(image,cmap='gray') 11 | plt.imshow(image) 12 | plt.axis('off') 13 | if save_path: 14 | plt.savefig(save_path) 15 | pass 16 | if show: 17 | plt.show() 18 | 19 | def plot_multi_line(x_list, y_list, name_list, save_path=None, show=False): 20 | graph_place = int(np.sqrt(len(name_list) - 1)) + 1 21 | for i, (x, y, name) in enumerate(zip(x_list, y_list, name_list)): 22 | ax1 = plt.subplot(graph_place,graph_place,i+1) 23 | ax1.set_title(name) 24 | plt.plot(x,y) 25 | # plt.imshow(image,cmap='gray') 26 | if save_path: 27 | plt.savefig(save_path) 28 | if show: 29 | plt.show() 30 | 31 | 32 | -------------------------------------------------------------------------------- /code/ocr/tools/py_op.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 此文件用于常用python函数的使用 4 | """ 5 | import os 6 | import json 7 | import traceback 8 | from collections import OrderedDict 9 | import random 10 | from fuzzywuzzy import fuzz 11 | 12 | import sys 13 | reload(sys) 14 | sys.setdefaultencoding('utf-8') 15 | 16 | ################################################################################ 17 | ### pre define variables 18 | #:: enumerate 19 | #:: raw_input 20 | #:: listdir 21 | #:: sorted 22 | ### pre define function 23 | def mywritejson(save_path,content): 24 | content = json.dumps(content,indent=4,ensure_ascii=False) 25 | with open(save_path,'w') as f: 26 | f.write(content) 27 | 28 | def myreadjson(load_path): 29 | with open(load_path,'r') as f: 30 | return json.loads(f.read()) 31 | 32 | def mywritefile(save_path,content): 33 | with open(save_path,'w') as f: 34 | f.write(content) 35 | 36 | def myreadfile(load_path): 37 | with open(load_path,'r') as f: 38 | return f.read() 39 | 40 | def myprint(content): 41 | print json.dumps(content,indent=4,ensure_ascii=False) 42 | 43 | def rm(fi): 44 | os.system('rm ' + fi) 45 | 46 | def mystrip(s): 47 | return ''.join(s.split()) 48 | 49 | def mysorteddict(d,key = lambda s:s, reverse=False): 50 | dordered = OrderedDict() 51 | for k in sorted(d.keys(),key = key,reverse=reverse): 52 | dordered[k] = d[k] 53 | return dordered 54 | 55 | def mysorteddictfile(src,obj): 56 | mywritejson(obj,mysorteddict(myreadjson(src))) 57 | 58 | def myfuzzymatch(srcs,objs,grade=80): 59 | matchDict = OrderedDict() 60 | for src in srcs: 61 | for obj in objs: 62 | value = fuzz.partial_ratio(src,obj) 63 | if value > grade: 64 | try: 65 | matchDict[src].append(obj) 66 | except: 67 | matchDict[src] = [obj] 68 | return matchDict 69 | 70 | def mydumps(x): 71 | return json.dumps(content,indent=4,ensure_ascii=False) 72 | 73 | def get_random_list(l,num=-1,isunique=0): 74 | if isunique: 75 | l = set(l) 76 | if num < 0: 77 | num = len(l) 78 | if isunique and num > len(l): 79 | return 80 | lnew = [] 81 | l = list(l) 82 | while(num>len(lnew)): 83 | x = l[int(random.random()*len(l))] 84 | if isunique and x in lnew: 85 | continue 86 | lnew.append(x) 87 | return lnew 88 | 89 | def fuzz_list(node1_list,node2_list,score_baseline=66,proposal_num=10,string_map=None): 90 | node_dict = { } 91 | for i,node1 in enumerate(node1_list): 92 | match_score_dict = { } 93 | for node2 in node2_list: 94 | if node1 != node2: 95 | if string_map is not None: 96 | n1 = string_map(node1) 97 | n2 = string_map(node2) 98 | score = fuzz.partial_ratio(n1,n2) 99 | if n1 == n2: 100 | node2_list.remove(node2) 101 | else: 102 | score = fuzz.partial_ratio(node1,node2) 103 | if score > score_baseline: 104 | match_score_dict[node2] = score 105 | else: 106 | node2_list.remove(node2) 107 | node2_sort = sorted(match_score_dict.keys(), key=lambda k:match_score_dict[k],reverse=True) 108 | node_dict[node1] = [[n,match_score_dict[n]] for n in node2_sort[:proposal_num]] 109 | print i,len(node1_list) 110 | return node_dict, node2_list 111 | 112 | def swap(a,b): 113 | return b, a 114 | 115 | def mkdir(d): 116 | path = d.split('/') 117 | for i in range(len(path)): 118 | d = '/'.join(path[:i+1]) 119 | if not os.path.exists(d): 120 | os.mkdir(d) 121 | 122 | -------------------------------------------------------------------------------- /code/ocr/tools/segmentation.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import matplotlib.pyplot as plt 3 | from scipy import ndimage as ndi 4 | from skimage import morphology,color,data 5 | from skimage import filters 6 | import numpy as np 7 | import skimage 8 | import os 9 | from skimage import measure 10 | 11 | 12 | 13 | def watershed(image, label=None): 14 | denoised = filters.rank.median(image, morphology.disk(2)) #过滤噪声 15 | #将梯度值低于10的作为开始标记点 16 | markers = filters.rank.gradient(denoised, morphology.disk(5)) < 10 17 | markers = ndi.label(markers)[0] 18 | 19 | gradient = filters.rank.gradient(denoised, morphology.disk(2)) #计算梯度 20 | labels =morphology.watershed(gradient, markers, mask=image) #基于梯度的分水岭算法 21 | 22 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(6, 6)) 23 | axes = axes.ravel() 24 | ax0, ax1, ax2, ax3 = axes 25 | 26 | ax0.imshow(image, cmap=plt.cm.gray, interpolation='nearest') 27 | ax0.set_title("Original") 28 | # ax1.imshow(gradient, cmap=plt.cm.spectral, interpolation='nearest') 29 | ax1.imshow(gradient, cmap=plt.cm.gray, interpolation='nearest') 30 | ax1.set_title("Gradient") 31 | if label is not None: 32 | # ax2.imshow(markers, cmap=plt.cm.spectral, interpolation='nearest') 33 | ax2.imshow(label, cmap=plt.cm.gray, interpolation='nearest') 34 | else: 35 | ax2.imshow(markers, cmap=plt.cm.spectral, interpolation='nearest') 36 | ax2.set_title("Markers") 37 | ax3.imshow(labels, cmap=plt.cm.spectral, interpolation='nearest') 38 | ax3.set_title("Segmented") 39 | 40 | for ax in axes: 41 | ax.axis('off') 42 | 43 | fig.tight_layout() 44 | plt.show() 45 | 46 | def plot_4(image, gradient,label,segmentation, save_path=None): 47 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(6, 6)) 48 | axes = axes.ravel() 49 | ax0, ax1, ax2, ax3 = axes 50 | ax0.imshow(image, cmap=plt.cm.gray, interpolation='nearest') 51 | ax0.set_title("Original") 52 | ax1.imshow(gradient, cmap=plt.cm.gray, interpolation='nearest') 53 | ax1.set_title("Gradient") 54 | ax2.imshow(label, cmap=plt.cm.gray, interpolation='nearest') 55 | ax2.set_title("label") 56 | ax3.imshow(segmentation, cmap=plt.cm.spectral, interpolation='nearest') 57 | ax3.set_title("Segmented") 58 | 59 | for ax in axes: 60 | ax.axis('off') 61 | 62 | fig.tight_layout() 63 | if save_path: 64 | print save_path 65 | plt.savefig(save_path) 66 | else: 67 | plt.show() 68 | 69 | def fill(image): 70 | ''' 71 | 填充图片内部空白 72 | 临时写的函数 73 | 建议后期替换 74 | ''' 75 | label_img = measure.label(image, background=1) 76 | props = measure.regionprops(label_img) 77 | max_area = np.array([p.area for p in props]).max() 78 | for i,prop in enumerate(props): 79 | if prop.area < max_area: 80 | image[prop.coords[:,0],prop.coords[:,1]] = 1 81 | return image 82 | 83 | 84 | 85 | def my_watershed(image, label=None, min_gray=480, max_gray=708, min_gradient=5, show=False, save_path='/tmp/x.jpg'): 86 | image = image - min_gray 87 | image[image>max_gray] = 0 88 | image[image< 10] = 0 89 | image = image * 5 90 | 91 | denoised = filters.rank.median(image, morphology.disk(2)) #过滤噪声 92 | #将梯度值低于10的作为开始标记点 93 | markers = filters.rank.gradient(denoised, morphology.disk(5)) < 10 94 | markers = ndi.label(markers)[0] 95 | 96 | gradient = filters.rank.gradient(denoised, morphology.disk(2)) #计算梯度 97 | labels = gradient > min_gradient 98 | 99 | mask = gradient > min_gradient 100 | label_img = measure.label(mask, background=0) 101 | props = measure.regionprops(label_img) 102 | pred = np.zeros_like(gradient) 103 | for i,prop in enumerate(props): 104 | if prop.area > 50: 105 | region = np.array(prop.coords) 106 | vx,vy = region.var(0) 107 | v = vx + vy 108 | if v < 200: 109 | pred[prop.coords[:,0],prop.coords[:,1]] = 1 110 | 111 | # 填充边缘内部空白 112 | pred = fill(pred) 113 | 114 | if show: 115 | plot_4(image, gradient, label, pred) 116 | else: 117 | plot_4(image, gradient, label, pred, save_path) 118 | 119 | return pred 120 | 121 | def segmentation(image_npy, label_npy,save_path): 122 | print image_npy 123 | image = np.load(image_npy) 124 | label = np.load(label_npy) 125 | if np.sum(label) == 0: 126 | return 127 | min_gray,max_gray = 480, 708 128 | my_watershed(image,label,min_gray, max_gray,show=False, save_path=save_path) 129 | 130 | def main(): 131 | data_dir = '/home/yin/all/PVL_DATA/preprocessed/2D/' 132 | save_dir = '/home/yin/all/PVL_DATA/tool_result/' 133 | os.system('rm -r ' + save_dir) 134 | os.system('mkdir ' + save_dir) 135 | for patient in os.listdir(data_dir): 136 | patient_dir = os.path.join(data_dir, patient) 137 | for f in os.listdir(patient_dir): 138 | if 'roi.npy' in f: 139 | label_npy = os.path.join(patient_dir,f) 140 | image_npy = label_npy.replace('.roi.npy','.npy') 141 | segmentation(image_npy,label_npy, os.path.join(save_dir,label_npy.strip('/').replace('/','.').replace('npy','jpg'))) 142 | 143 | if __name__ == '__main__': 144 | # image =color.rgb2gray(data.camera()) 145 | # watershed(image) 146 | main() 147 | image_npy = '/home/yin/all/PVL_DATA/preprocessed/2D/JD_chen_xi/23.npy' 148 | image_npy = '/home/yin/all/PVL_DATA/preprocessed/2D/JD_chen_xi/14.npy' 149 | image_npy = '/home/yin/all/PVL_DATA/preprocessed/2D/JD_zhang_yu_chen/23.npy' 150 | label_npy = image_npy.replace('.npy','.roi.npy') 151 | segmentation(image_npy,label_npy) 152 | 153 | 154 | -------------------------------------------------------------------------------- /code/ocr/tools/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2017 www.drcubic.com, Inc. All Rights Reserved 5 | # 6 | """ 7 | File: utils.py 8 | Author: shileicao(shileicao@stu.xjtu.edu.cn) 9 | Date: 2017-06-20 14:56:54 10 | 11 | **Note.** This code absorb some code from following source. 12 | 1. [DSB2017](https://github.com/lfz/DSB2017) 13 | """ 14 | 15 | import os 16 | import sys 17 | 18 | import numpy as np 19 | import torch 20 | 21 | 22 | def getFreeId(): 23 | import pynvml 24 | 25 | pynvml.nvmlInit() 26 | 27 | def getFreeRatio(id): 28 | handle = pynvml.nvmlDeviceGetHandleByIndex(id) 29 | use = pynvml.nvmlDeviceGetUtilizationRates(handle) 30 | ratio = 0.5 * (float(use.gpu + float(use.memory))) 31 | return ratio 32 | 33 | deviceCount = pynvml.nvmlDeviceGetCount() 34 | available = [] 35 | for i in range(deviceCount): 36 | if getFreeRatio(i) < 70: 37 | available.append(i) 38 | gpus = '' 39 | for g in available: 40 | gpus = gpus + str(g) + ',' 41 | gpus = gpus[:-1] 42 | return gpus 43 | 44 | 45 | def setgpu(gpuinput): 46 | freeids = getFreeId() 47 | if gpuinput == 'all': 48 | gpus = freeids 49 | else: 50 | gpus = gpuinput 51 | busy_gpu = [g not in freeids for g in gpus.split(',')] 52 | if any(busy_gpu): 53 | raise ValueError('gpu' + ' '.join(busy_gpu) + 'is being used') 54 | print('using gpu ' + gpus) 55 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 56 | return len(gpus.split(',')) 57 | 58 | 59 | def error_mask_stats(labels, filenames): 60 | error_f = [] 61 | for i, f in enumerate(filenames): 62 | # if not np.all(labels[i] > 0): 63 | # error_f.append(f) 64 | for bbox_i in range(labels[i].shape[0]): 65 | imgs = np.load(f) 66 | if not np.all( 67 | np.array(imgs.shape[1:]) - labels[i][bbox_i][:-1] > 0): 68 | error_f.append(f) 69 | error_f = list(set(error_f)) 70 | fileid_list = [os.path.split(filename)[1].split('_')[0] 71 | for filename in error_f] 72 | print("','".join(fileid_list)) 73 | return error_f 74 | 75 | 76 | class Logger(object): 77 | def __init__(self, logfile): 78 | self.terminal = sys.stdout 79 | self.log = open(logfile, "a") 80 | 81 | def write(self, message): 82 | self.terminal.write(message) 83 | self.log.write(message) 84 | 85 | def flush(self): 86 | #this flush method is needed for python 3 compatibility. 87 | #this handles the flush command by doing nothing. 88 | #you might want to specify some extra behavior here. 89 | pass 90 | 91 | 92 | def split4(data, max_stride, margin): 93 | splits = [] 94 | data = torch.Tensor.numpy(data) 95 | _, c, z, h, w = data.shape 96 | 97 | w_width = np.ceil(float(w / 2 + margin) / 98 | max_stride).astype('int') * max_stride 99 | h_width = np.ceil(float(h / 2 + margin) / 100 | max_stride).astype('int') * max_stride 101 | pad = int(np.ceil(float(z) / max_stride) * max_stride) - z 102 | leftpad = pad / 2 103 | pad = [[0, 0], [0, 0], [leftpad, pad - leftpad], [0, 0], [0, 0]] 104 | data = np.pad(data, pad, 'constant', constant_values=-1) 105 | data = torch.from_numpy(data) 106 | splits.append(data[:, :, :, :h_width, :w_width]) 107 | splits.append(data[:, :, :, :h_width, -w_width:]) 108 | splits.append(data[:, :, :, -h_width:, :w_width]) 109 | splits.append(data[:, :, :, -h_width:, -w_width:]) 110 | 111 | return torch.cat(splits, 0) 112 | 113 | 114 | def combine4(output, h, w): 115 | splits = [] 116 | for i in range(len(output)): 117 | splits.append(output[i]) 118 | 119 | output = np.zeros( 120 | (splits[0].shape[0], h, w, splits[0].shape[3], 121 | splits[0].shape[4]), np.float32) 122 | 123 | h0 = output.shape[1] / 2 124 | h1 = output.shape[1] - h0 125 | w0 = output.shape[2] / 2 126 | w1 = output.shape[2] - w0 127 | 128 | splits[0] = splits[0][:, :h0, :w0, :, :] 129 | output[:, :h0, :w0, :, :] = splits[0] 130 | 131 | splits[1] = splits[1][:, :h0, -w1:, :, :] 132 | output[:, :h0, -w1:, :, :] = splits[1] 133 | 134 | splits[2] = splits[2][:, -h1:, :w0, :, :] 135 | output[:, -h1:, :w0, :, :] = splits[2] 136 | 137 | splits[3] = splits[3][:, -h1:, -w1:, :, :] 138 | output[:, -h1:, -w1:, :, :] = splits[3] 139 | 140 | return output 141 | 142 | 143 | def split8(data, max_stride, margin): 144 | splits = [] 145 | if isinstance(data, np.ndarray): 146 | c, z, h, w = data.shape 147 | else: 148 | _, c, z, h, w = data.size() 149 | 150 | z_width = np.ceil(float(z / 2 + margin) / 151 | max_stride).astype('int') * max_stride 152 | w_width = np.ceil(float(w / 2 + margin) / 153 | max_stride).astype('int') * max_stride 154 | h_width = np.ceil(float(h / 2 + margin) / 155 | max_stride).astype('int') * max_stride 156 | for zz in [[0, z_width], [-z_width, None]]: 157 | for hh in [[0, h_width], [-h_width, None]]: 158 | for ww in [[0, w_width], [-w_width, None]]: 159 | if isinstance(data, np.ndarray): 160 | splits.append(data[np.newaxis, :, zz[0]:zz[1], hh[0]:hh[1], 161 | ww[0]:ww[1]]) 162 | else: 163 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]: 164 | ww[1]]) 165 | 166 | if isinstance(data, np.ndarray): 167 | return np.concatenate(splits, 0) 168 | else: 169 | return torch.cat(splits, 0) 170 | 171 | 172 | def combine8(output, z, h, w): 173 | splits = [] 174 | for i in range(len(output)): 175 | splits.append(output[i]) 176 | 177 | output = np.zeros( 178 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32) 179 | 180 | z_width = z / 2 181 | h_width = h / 2 182 | w_width = w / 2 183 | i = 0 184 | for zz in [[0, z_width], [z_width - z, None]]: 185 | for hh in [[0, h_width], [h_width - h, None]]: 186 | for ww in [[0, w_width], [w_width - w, None]]: 187 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[ 188 | i][zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] 189 | i = i + 1 190 | 191 | return output 192 | 193 | 194 | def split16(data, max_stride, margin): 195 | splits = [] 196 | _, c, z, h, w = data.size() 197 | 198 | z_width = np.ceil(float(z / 4 + margin) / 199 | max_stride).astype('int') * max_stride 200 | z_pos = [z * 3 / 8 - z_width / 2, z * 5 / 8 - z_width / 2] 201 | h_width = np.ceil(float(h / 2 + margin) / 202 | max_stride).astype('int') * max_stride 203 | w_width = np.ceil(float(w / 2 + margin) / 204 | max_stride).astype('int') * max_stride 205 | for zz in [[0, z_width], [z_pos[0], z_pos[0] + z_width], 206 | [z_pos[1], z_pos[1] + z_width], [-z_width, None]]: 207 | for hh in [[0, h_width], [-h_width, None]]: 208 | for ww in [[0, w_width], [-w_width, None]]: 209 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[ 210 | 1]]) 211 | 212 | return torch.cat(splits, 0) 213 | 214 | 215 | def combine16(output, z, h, w): 216 | splits = [] 217 | for i in range(len(output)): 218 | splits.append(output[i]) 219 | 220 | output = np.zeros( 221 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32) 222 | 223 | z_width = z / 4 224 | h_width = h / 2 225 | w_width = w / 2 226 | splitzstart = splits[0].shape[0] / 2 - z_width / 2 227 | z_pos = [z * 3 / 8 - z_width / 2, z * 5 / 8 - z_width / 2] 228 | i = 0 229 | for zz, zz2 in zip( 230 | [[0, z_width], [z_width, z_width * 2], [z_width * 2, z_width * 3], 231 | [z_width * 3 - z, None]], 232 | [[0, z_width], [splitzstart, z_width + splitzstart], 233 | [splitzstart, z_width + splitzstart], [z_width * 3 - z, None]]): 234 | for hh in [[0, h_width], [h_width - h, None]]: 235 | for ww in [[0, w_width], [w_width - w, None]]: 236 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[ 237 | i][zz2[0]:zz2[1], hh[0]:hh[1], ww[0]:ww[1], :, :] 238 | i = i + 1 239 | 240 | return output 241 | 242 | 243 | def split32(data, max_stride, margin): 244 | splits = [] 245 | _, c, z, h, w = data.size() 246 | 247 | z_width = np.ceil(float(z / 2 + margin) / 248 | max_stride).astype('int') * max_stride 249 | w_width = np.ceil(float(w / 4 + margin) / 250 | max_stride).astype('int') * max_stride 251 | h_width = np.ceil(float(h / 4 + margin) / 252 | max_stride).astype('int') * max_stride 253 | 254 | w_pos = [w * 3 / 8 - w_width / 2, w * 5 / 8 - w_width / 2] 255 | h_pos = [h * 3 / 8 - h_width / 2, h * 5 / 8 - h_width / 2] 256 | 257 | for zz in [[0, z_width], [-z_width, None]]: 258 | for hh in [[0, h_width], [h_pos[0], h_pos[0] + h_width], 259 | [h_pos[1], h_pos[1] + h_width], [-h_width, None]]: 260 | for ww in [[0, w_width], [w_pos[0], w_pos[0] + w_width], 261 | [w_pos[1], w_pos[1] + w_width], [-w_width, None]]: 262 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[ 263 | 1]]) 264 | 265 | return torch.cat(splits, 0) 266 | 267 | 268 | def combine32(splits, z, h, w): 269 | 270 | output = np.zeros( 271 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32) 272 | 273 | z_width = int(np.ceil(float(z) / 2)) 274 | h_width = int(np.ceil(float(h) / 4)) 275 | w_width = int(np.ceil(float(w) / 4)) 276 | splithstart = splits[0].shape[1] / 2 - h_width / 2 277 | splitwstart = splits[0].shape[2] / 2 - w_width / 2 278 | 279 | i = 0 280 | for zz in [[0, z_width], [z_width - z, None]]: 281 | 282 | for hh, hh2 in zip( 283 | [[0, h_width], [h_width, h_width * 2], [h_width * 2, h_width * 3], 284 | [h_width * 3 - h, None]], 285 | [[0, h_width], [splithstart, h_width + splithstart], 286 | [splithstart, h_width + splithstart], [h_width * 3 - h, None]]): 287 | 288 | for ww, ww2 in zip( 289 | [[0, w_width], [w_width, w_width * 2], 290 | [w_width * 2, w_width * 3], [w_width * 3 - w, None]], 291 | [[0, w_width], [splitwstart, w_width + splitwstart], 292 | [splitwstart, w_width + splitwstart], 293 | [w_width * 3 - w, None]]): 294 | 295 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[ 296 | i][zz[0]:zz[1], hh2[0]:hh2[1], ww2[0]:ww2[1], :, :] 297 | i = i + 1 298 | 299 | return output 300 | 301 | 302 | def split64(data, max_stride, margin): 303 | splits = [] 304 | _, c, z, h, w = data.size() 305 | 306 | z_width = np.ceil(float(z / 4 + margin) / 307 | max_stride).astype('int') * max_stride 308 | w_width = np.ceil(float(w / 4 + margin) / 309 | max_stride).astype('int') * max_stride 310 | h_width = np.ceil(float(h / 4 + margin) / 311 | max_stride).astype('int') * max_stride 312 | 313 | z_pos = [z * 3 / 8 - z_width / 2, z * 5 / 8 - z_width / 2] 314 | w_pos = [w * 3 / 8 - w_width / 2, w * 5 / 8 - w_width / 2] 315 | h_pos = [h * 3 / 8 - h_width / 2, h * 5 / 8 - h_width / 2] 316 | 317 | for zz in [[0, z_width], [z_pos[0], z_pos[0] + z_width], 318 | [z_pos[1], z_pos[1] + z_width], [-z_width, None]]: 319 | for hh in [[0, h_width], [h_pos[0], h_pos[0] + h_width], 320 | [h_pos[1], h_pos[1] + h_width], [-h_width, None]]: 321 | for ww in [[0, w_width], [w_pos[0], w_pos[0] + w_width], 322 | [w_pos[1], w_pos[1] + w_width], [-w_width, None]]: 323 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[ 324 | 1]]) 325 | 326 | return torch.cat(splits, 0) 327 | 328 | 329 | def combine64(output, z, h, w): 330 | splits = [] 331 | for i in range(len(output)): 332 | splits.append(output[i]) 333 | 334 | output = np.zeros( 335 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32) 336 | 337 | z_width = int(np.ceil(float(z) / 4)) 338 | h_width = int(np.ceil(float(h) / 4)) 339 | w_width = int(np.ceil(float(w) / 4)) 340 | splitzstart = splits[0].shape[0] / 2 - z_width / 2 341 | splithstart = splits[0].shape[1] / 2 - h_width / 2 342 | splitwstart = splits[0].shape[2] / 2 - w_width / 2 343 | 344 | i = 0 345 | for zz, zz2 in zip( 346 | [[0, z_width], [z_width, z_width * 2], [z_width * 2, z_width * 3], 347 | [z_width * 3 - z, None]], 348 | [[0, z_width], [splitzstart, z_width + splitzstart], 349 | [splitzstart, z_width + splitzstart], [z_width * 3 - z, None]]): 350 | 351 | for hh, hh2 in zip( 352 | [[0, h_width], [h_width, h_width * 2], [h_width * 2, h_width * 3], 353 | [h_width * 3 - h, None]], 354 | [[0, h_width], [splithstart, h_width + splithstart], 355 | [splithstart, h_width + splithstart], [h_width * 3 - h, None]]): 356 | 357 | for ww, ww2 in zip( 358 | [[0, w_width], [w_width, w_width * 2], 359 | [w_width * 2, w_width * 3], [w_width * 3 - w, None]], 360 | [[0, w_width], [splitwstart, w_width + splitwstart], 361 | [splitwstart, w_width + splitwstart], 362 | [w_width * 3 - w, None]]): 363 | 364 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[ 365 | i][zz2[0]:zz2[1], hh2[0]:hh2[1], ww2[0]:ww2[1], :, :] 366 | i = i + 1 367 | 368 | return output 369 | -------------------------------------------------------------------------------- /code/preprocessing/analysis_dataset.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | ######################################################################### 3 | # File Name: analysis_dataset.py 4 | # Author: ccyin 5 | # mail: ccyin04@gmail.com 6 | # Created Time: Fri 18 May 2018 04:19:58 PM CST 7 | ######################################################################### 8 | ''' 9 | 此文件用于分析原有数据集信息 10 | stati_image_size: 统计图片大小信息 11 | stati_label_length: 统计文字长度信息 12 | ''' 13 | 14 | import os 15 | import json 16 | from PIL import Image 17 | import numpy as np 18 | from tqdm import tqdm 19 | import sys 20 | sys.path.append('../ocr') 21 | from tools import plot 22 | 23 | def stati_image_size(image_dir, save_dir, big_w_dir): 24 | if not os.path.exists(big_w_dir): 25 | os.mkdir(big_w_dir) 26 | if not os.path.exists(save_dir): 27 | os.mkdir(save_dir) 28 | h_count_dict, w_count_dict, r_count_dict = { }, { }, { } 29 | image_hw_ratio_dict = { } 30 | for image in os.listdir(image_dir): 31 | h,w = Image.open(os.path.join(image_dir, image)).size 32 | if w > 80: 33 | cmd = 'cp ../../data/dataset/train/{:s} {:s}'.format(image, big_w_dir) 34 | # os.system(cmd) 35 | 36 | r = int(h / 8. / w) 37 | h = h / 10 38 | w = w / 10 39 | r_count_dict[r] = r_count_dict.get(r, 0) + 1 40 | h_count_dict[h] = h_count_dict.get(h, 0) + 1 41 | w_count_dict[w] = w_count_dict.get(w, 0) + 1 42 | image_hw_ratio_dict[image] = r 43 | 44 | with open(os.path.join(save_dir, 'image_hw_ratio_dict.json'), 'w') as f: 45 | f.write(json.dumps(image_hw_ratio_dict, indent=4)) 46 | 47 | x = range(max(h_count_dict.keys())+1) 48 | y = [0 for _ in x] 49 | for h in sorted(h_count_dict.keys()): 50 | print '图片长度:{:d}~{:d},有{:d}张图'.format(10*h, 10*h+10, h_count_dict[h]) 51 | y[h] = h_count_dict[h] 52 | plot.plot_multi_line([x], [y], ['Length'], save_path='../../data/length.png', show=True) 53 | 54 | x = range(max(w_count_dict.keys())+1) 55 | y = [0 for _ in x] 56 | for w in sorted(w_count_dict.keys()): 57 | print '图片宽度:{:d}~{:d},有{:d}张图'.format(10*w, 10*w+10, w_count_dict[w]) 58 | y[w] = w_count_dict[w] 59 | plot.plot_multi_line([x], [y], ['Width'], save_path='../../data/width.png', show=True) 60 | 61 | x = range(max(r_count_dict.keys())+1) 62 | y = [0 for _ in x] 63 | for r in sorted(r_count_dict.keys()): 64 | print '图片比例:{:d}~{:d},有{:d}张图'.format(8*r, 8*r+8, r_count_dict[r]) 65 | y[r] = r_count_dict[r] 66 | x = [8*(_+1) for _ in x] 67 | plot.plot_multi_line([x], [y], ['L/W'], save_path='../../data/ratio.png', show=True) 68 | 69 | print '\n最多的长\n', sorted(h_count_dict.keys(), key=lambda h:h_count_dict[h])[-1] * 10 70 | print '\n最多的宽\n', sorted(w_count_dict.keys(), key=lambda w:w_count_dict[w])[-1] * 10 71 | 72 | print '建议使用 64 * 512 的输入' 73 | print ' 部分使用 64 * 1024 的输入' 74 | print ' 剩下的忽略' 75 | print '建议使用FCN来做,全局取最大值得到最终结果' 76 | 77 | def stati_label_length(label_json, long_text_dir): 78 | if not os.path.exists(long_text_dir): 79 | os.mkdir(long_text_dir) 80 | image_label_json = json.load(open(label_json)) 81 | l_count_dict = { } 82 | for image, label in image_label_json.items(): 83 | l = len(label.split()) 84 | l_count_dict[l] = l_count_dict.get(l, 0) + 1 85 | if l > 25: 86 | cmd = 'cp ../../data/dataset/train/{:s} {:s}'.format(image, long_text_dir) 87 | # os.system(cmd) 88 | 89 | word_num = 0. 90 | x = range(max(l_count_dict.keys())+1) 91 | y = [0 for _ in x] 92 | for l in sorted(l_count_dict.keys()): 93 | word_num += l * l_count_dict[l] 94 | print '文字长度:{:d},有{:d}张图'.format(l, l_count_dict[l]) 95 | y[l] = l_count_dict[l] 96 | plot.plot_multi_line([x], [y], ['Word Number'], save_path='../../data/word_num.png', show=True) 97 | print '平均每张图片{:3.4f}个字'.format(word_num / sum(l_count_dict.values())) 98 | 99 | def stati_image_gray(image_dir): 100 | print 'eval train image gray' 101 | for image in tqdm(os.listdir(image_dir)): 102 | image = Image.open(os.path.join(image_dir, image)).convert('RGB') 103 | image = np.array(image) 104 | mi,ma = image.min(), image.max() 105 | assert mi >= 0 106 | assert ma < 256 107 | 108 | print 'eval test image gray' 109 | image_dir = image_dir.replace('train', 'test') 110 | for image in tqdm(os.listdir(image_dir)): 111 | image = Image.open(os.path.join(image_dir, image)).convert('RGB') 112 | image = np.array(image) 113 | mi,ma = image.min(), image.max() 114 | assert mi >= 0 115 | assert ma < 256 116 | 117 | 118 | 119 | def main(): 120 | image_dir = '../../data/dataset/train' 121 | save_dir = '../../files/' 122 | big_w_dir = '../../data/big_w_dir' 123 | stati_image_size(image_dir, save_dir, big_w_dir) 124 | 125 | train_label_json = '../../files/train_alphabet.json' 126 | long_text_dir = '../../data/long_text_dir' 127 | stati_label_length(train_label_json, long_text_dir) 128 | # stati_image_gray(image_dir) 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /code/preprocessing/map_word_to_index.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | ######################################################################### 3 | # File Name: map_word_to_index.py 4 | # Author: ccyin 5 | # mail: ccyin04@gmail.com 6 | # Created Time: Fri 18 May 2018 03:30:26 PM CST 7 | ######################################################################### 8 | ''' 9 | 此代码用于将所有文字映射到index上,有两种方式 10 | 1. 映射每一个英文单词为一个index 11 | 2. 映射每一个英文字母为一个index 12 | ''' 13 | 14 | import os 15 | import sys 16 | reload(sys) 17 | sys.setdefaultencoding('utf8') 18 | import json 19 | from collections import OrderedDict 20 | 21 | def map_word_to_index(train_word_file, word_index_json, word_count_json, index_label_json, alphabet_to_index=True): 22 | with open(train_word_file, 'r') as f: 23 | labels = f.read().strip().decode('utf8') 24 | word_count_dict = { } 25 | for line in labels.split('\n')[1:]: 26 | line = line.strip() 27 | image, sentence = line.strip().split('.png,') 28 | sentence = sentence.strip('"') 29 | for w in sentence: 30 | word_count_dict[w] = word_count_dict.get(w,0) + 1 31 | print '一共有{:d}种字符,共{:d}个'.format(len(word_count_dict), sum(word_count_dict.values())) 32 | word_sorted = sorted(word_count_dict.keys(), key=lambda k:word_count_dict[k], reverse=True) 33 | # word_index_dict = { w:i for i,w in enumerate(word_sorted) } 34 | word_index_dict = json.load(open(word_index_json)) 35 | 36 | with open(word_count_json, 'w') as f: 37 | f.write(json.dumps(word_count_dict, indent=4, ensure_ascii=False)) 38 | # with open(word_index_json, 'w') as f: 39 | # f.write(json.dumps(word_index_dict, indent=4, ensure_ascii=False)) 40 | 41 | image_label_dict = OrderedDict() 42 | for line in labels.split('\n')[1:]: 43 | line = line.strip() 44 | image, sentence = line.strip().split('.png,') 45 | sentence = sentence.strip('"') 46 | 47 | # 换掉部分相似符号 48 | for c in u"  ": 49 | sentence = sentence.replace(c, '') 50 | replace_words = [ 51 | u'((', 52 | u'))', 53 | u',,', 54 | u"´'′", 55 | u"″"“", 56 | u"..", 57 | u"—-" 58 | ] 59 | for words in replace_words: 60 | for w in words[:-1]: 61 | sentence = sentence.replace(w, words[-1]) 62 | 63 | index_list = [] 64 | for w in sentence: 65 | index_list.append(str(word_index_dict[w])) 66 | image_label_dict[image + '.png'] = ' '.join(index_list) 67 | with open(index_label_json, 'w') as f: 68 | f.write(json.dumps(image_label_dict, indent=4)) 69 | 70 | 71 | def main(): 72 | 73 | # 映射字母为index 74 | train_word_file = '../../files/train.csv' 75 | word_index_json = '../../files/alphabet_index_dict.json' 76 | word_count_json = '../../files/alphabet_count_dict.json' 77 | index_label_json = '../../files/train_alphabet.json' 78 | map_word_to_index(train_word_file, word_index_json, word_count_json, index_label_json, True) 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /code/preprocessing/show_black.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | ######################################################################### 3 | # File Name: show_black.py 4 | # Author: ccyin 5 | # mail: ccyin04@gmail.com 6 | # Created Time: 2018年06月07日 星期四 01时06分22秒 7 | ######################################################################### 8 | 9 | import os 10 | import sys 11 | import json 12 | sys.path.append('../ocr') 13 | from tools import parse, py_op 14 | args = parse.args 15 | 16 | def cp_black_list(black_json, black_dir): 17 | word_index_dict = json.load(open(args.word_index_json)) 18 | index_word_dict = { v:k for k,v in word_index_dict.items() } 19 | train_word_dict = json.load(open(args.image_label_json)) 20 | train_word_dict = { k:''.join([index_word_dict[int(i)] for i in v.split()]) for k,v in train_word_dict.items() } 21 | 22 | py_op.mkdir(black_dir) 23 | black_list = json.load(open(black_json))['black_list'] 24 | for i,name in enumerate(black_list): 25 | cmd = 'cp {:s} {:s}'.format(os.path.join(args.data_dir, 'train', name), black_dir) 26 | if train_word_dict[name] in ['Err:501', '#NAME?', '###']: 27 | continue 28 | print name 29 | print train_word_dict[name] 30 | os.system(cmd) 31 | if i > 30: 32 | break 33 | 34 | if __name__ == '__main__': 35 | black_dir = os.path.join(args.save_dir, 'black') 36 | cp_black_list(args.black_json, black_dir) 37 | -------------------------------------------------------------------------------- /files/alphabet_count_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "挂": 9, 3 | "退": 5, 4 | "谈": 4, 5 | "随": 139, 6 | "抗": 4, 7 | "料": 95, 8 | "微": 7, 9 | "洞": 9, 10 | "造": 61, 11 | "般": 10, 12 | "潜": 3, 13 | "河": 48, 14 | "欲": 2, 15 | "侵": 3, 16 | "临": 5, 17 | "然": 113, 18 | "吸": 7, 19 | "场": 194, 20 | "宽": 89, 21 | "线": 4480, 22 | "@": 2, 23 | "反": 244, 24 | "牌": 60, 25 | "盏": 7, 26 | "科": 26, 27 | "筒": 8, 28 | "苗": 22, 29 | "摘": 16, 30 | "话": 18, 31 | "赞": 3, 32 | "凡": 3, 33 | "知": 1291, 34 | "除": 68, 35 | "揭": 3, 36 | "扬": 6, 37 | "泳": 5, 38 | "其": 520, 39 | "闹": 1, 40 | "绿": 42, 41 | "渔": 11, 42 | "覆": 5, 43 | "沈": 4, 44 | "》": 5, 45 | "引": 14, 46 | "应": 333, 47 | "枚": 18, 48 | "灵": 2, 49 | "滤": 1, 50 | "假": 53, 51 | "鲨": 1, 52 | "+": 2226, 53 | "循": 12, 54 | "抬": 2, 55 | "是": 3714, 56 | "械": 4, 57 | "讲": 3, 58 | "刷": 23, 59 | "冶": 1, 60 | "咸": 2, 61 | "胀": 1, 62 | "视": 135, 63 | "俊": 1, 64 | "抱": 1, 65 | "契": 4, 66 | "寒": 1, 67 | "录": 13, 68 | "酸": 7, 69 | "教": 48, 70 | "也": 85, 71 | "囤": 1, 72 | "秦": 2, 73 | "峨": 1, 74 | "k": 518, 75 | "括": 30, 76 | "景": 22, 77 | "滴": 3, 78 | "铸": 2, 79 | "须": 18, 80 | "基": 28, 81 | "广": 47, 82 | "₁": 176, 83 | "暅": 2, 84 | "上": 2968, 85 | "后": 592, 86 | "频": 26, 87 | "餐": 19, 88 | "暂": 4, 89 | "底": 192, 90 | "蒙": 1, 91 | "辟": 2, 92 | "足": 436, 93 | "伴": 4, 94 | "馈": 1, 95 | "甸": 1, 96 | "离": 525, 97 | "笼": 3, 98 | "尾": 33, 99 | "框": 31, 100 | "泉": 8, 101 | "绕": 178, 102 | "V": 20, 103 | "虚": 23, 104 | "迟": 3, 105 | "郡": 1, 106 | "牢": 1, 107 | "柯": 1, 108 | "棱": 155, 109 | "跳": 23, 110 | "轴": 1757, 111 | "号": 159, 112 | "偶": 46, 113 | "啸": 1, 114 | "移": 348, 115 | "态": 15, 116 | "节": 59, 117 | "★": 8, 118 | "构": 61, 119 | "消": 18, 120 | "肖": 1, 121 | "伟": 2, 122 | "倡": 2, 123 | "冠": 4, 124 | "纪": 6, 125 | "术": 22, 126 | "精": 34, 127 | "A": 9419, 128 | "柄": 2, 129 | "汉": 13, 130 | "克": 110, 131 | "今": 34, 132 | "前": 219, 133 | "双": 169, 134 | "坏": 1, 135 | "塑": 10, 136 | "姐": 1, 137 | "幕": 2, 138 | "胖": 1, 139 | "几": 207, 140 | "巨": 1, 141 | "杯": 29, 142 | "卷": 33, 143 | "馨": 1, 144 | "固": 20, 145 | "导": 35, 146 | "齿": 7, 147 | "∀": 2, 148 | "辉": 4, 149 | "丈": 5, 150 | "再": 148, 151 | "咏": 1, 152 | "库": 35, 153 | "尔": 7, 154 | "挖": 11, 155 | "炮": 5, 156 | "沟": 1, 157 | "伞": 4, 158 | "㎡": 1, 159 | "符": 49, 160 | "爆": 3, 161 | ",": 316, 162 | "水": 414, 163 | "ρ": 10, 164 | "所": 1053, 165 | "旅": 30, 166 | "摄": 3, 167 | "么": 311, 168 | "重": 280, 169 | "灌": 5, 170 | "坎": 1, 171 | "结": 509, 172 | "×": 54, 173 | "学": 631, 174 | "臭": 2, 175 | "l": 460, 176 | "倒": 24, 177 | "践": 15, 178 | "培": 6, 179 | "持": 49, 180 | "技": 23, 181 | "标": 1525, 182 | "予": 8, 183 | "越": 18, 184 | "馒": 2, 185 | "耗": 11, 186 | "辞": 1, 187 | "加": 244, 188 | "锥": 118, 189 | "缩": 7, 190 | "悬": 3, 191 | "贵": 4, 192 | "臂": 1, 193 | "故": 10, 194 | "蓄": 10, 195 | "识": 34, 196 | "免": 12, 197 | "侣": 3, 198 | "城": 62, 199 | "筑": 20, 200 | "秒": 190, 201 | "W": 15, 202 | "蝙": 1, 203 | "江": 49, 204 | "连": 542, 205 | "卡": 61, 206 | "狠": 1, 207 | "略": 19, 208 | "彩": 15, 209 | "扫": 13, 210 | "赵": 7, 211 | "叶": 5, 212 | "相": 1124, 213 | "好": 220, 214 | "屿": 1, 215 | "争": 4, 216 | "压": 17, 217 | "谊": 1, 218 | "吃": 8, 219 | "疏": 2, 220 | "骑": 33, 221 | "或": 125, 222 | "蜘": 1, 223 | "趟": 2, 224 | "务": 40, 225 | "匠": 1, 226 | "垣": 1, 227 | "钥": 1, 228 | "斯": 2, 229 | "2": 6759, 230 | "贴": 12, 231 | "冷": 1, 232 | "制": 75, 233 | "霸": 1, 234 | "礼": 2, 235 | "B": 8729, 236 | "送": 27, 237 | "友": 15, 238 | "筐": 8, 239 | "糕": 3, 240 | "蛙": 3, 241 | "毛": 16, 242 | "玻": 13, 243 | "跟": 5, 244 | "占": 20, 245 | "啤": 1, 246 | "绩": 24, 247 | "普": 19, 248 | "泵": 1, 249 | "浴": 1, 250 | "寻": 2, 251 | "饼": 7, 252 | "₂": 116, 253 | "搅": 2, 254 | "薄": 4, 255 | "三": 1668, 256 | "倍": 92, 257 | "册": 5, 258 | "鼓": 6, 259 | "榜": 1, 260 | "负": 79, 261 | "圣": 4, 262 | "ア": 292, 263 | "大": 855, 264 | "-": 2268, 265 | "支": 130, 266 | "樱": 9, 267 | "记": 113, 268 | "状": 66, 269 | "扁": 1, 270 | "π": 68, 271 | "权": 4, 272 | "义": 108, 273 | "银": 10, 274 | "遍": 3, 275 | "里": 77, 276 | "雎": 1, 277 | "当": 817, 278 | "展": 69, 279 | "拖": 2, 280 | "牡": 1, 281 | "珠": 12, 282 | "荐": 1, 283 | "荫": 1, 284 | "m": 1596, 285 | "佳": 9, 286 | "恒": 59, 287 | "具": 70, 288 | "铺": 21, 289 | "肃": 2, 290 | "蜂": 2, 291 | "暗": 4, 292 | "依": 70, 293 | "东": 97, 294 | "那": 247, 295 | "判": 170, 296 | "级": 89, 297 | "梳": 1, 298 | "段": 802, 299 | "区": 224, 300 | "徽": 9, 301 | "社": 41, 302 | "旁": 21, 303 | "杂": 5, 304 | "υ": 1, 305 | "爷": 7, 306 | "觉": 3, 307 | "案": 102, 308 | "归": 16, 309 | "X": 8, 310 | "泛": 1, 311 | "保": 142, 312 | "面": 1955, 313 | "句": 8, 314 | "继": 23, 315 | "秩": 1, 316 | "深": 18, 317 | "汰": 3, 318 | "镶": 2, 319 | "凹": 5, 320 | "系": 912, 321 | "忽": 17, 322 | "幼": 4, 323 | "竿": 2, 324 | "考": 149, 325 | "抄": 1, 326 | "万": 84, 327 | "殊": 8, 328 | "徒": 6, 329 | "锡": 3, 330 | "英": 4, 331 | "天": 346, 332 | "墨": 4, 333 | "音": 7, 334 | "]": 1, 335 | "稿": 2, 336 | "C": 7685, 337 | "驶": 110, 338 | "野": 7, 339 | "汛": 1, 340 | "勤": 8, 341 | "左": 200, 342 | "葫": 1, 343 | "杭": 8, 344 | "良": 5, 345 | "泰": 9, 346 | "兹": 1, 347 | "类": 51, 348 | "综": 14, 349 | "穿": 3, 350 | "攀": 6, 351 | "茄": 5, 352 | "笔": 70, 353 | "林": 27, 354 | "√": 180, 355 | "孝": 3, 356 | "本": 247, 357 | ".": 5096, 358 | "串": 1, 359 | "点": 8894, 360 | "纽": 2, 361 | "宿": 5, 362 | "担": 5, 363 | "过": 1283, 364 | "豆": 7, 365 | "棉": 10, 366 | "姓": 2, 367 | "答": 138, 368 | "券": 18, 369 | "作": 791, 370 | "∑": 1, 371 | "院": 16, 372 | "票": 58, 373 | "n": 1062, 374 | "走": 91, 375 | "典": 17, 376 | "彼": 2, 377 | "顾": 20, 378 | "艇": 10, 379 | "℃": 9, 380 | "蚂": 9, 381 | "炎": 1, 382 | "少": 639, 383 | "蜗": 2, 384 | "洛": 3, 385 | "抚": 1, 386 | "丝": 23, 387 | "弧": 102, 388 | "思": 25, 389 | "振": 2, 390 | "亲": 8, 391 | "帽": 2, 392 | "览": 2, 393 | "降": 55, 394 | "协": 2, 395 | "ノ": 137, 396 | "Y": 6, 397 | "赛": 104, 398 | "九": 43, 399 | "远": 26, 400 | "团": 24, 401 | "古": 15, 402 | "姨": 1, 403 | "药": 35, 404 | "说": 368, 405 | "瓶": 25, 406 | "凸": 6, 407 | "极": 140, 408 | "漆": 4, 409 | "皋": 1, 410 | "同": 829, 411 | "帐": 4, 412 | "研": 41, 413 | "托": 5, 414 | "战": 12, 415 | "些": 70, 416 | "脚": 10, 417 | "劣": 10, 418 | "弦": 188, 419 | "太": 17, 420 | "断": 177, 421 | "搬": 10, 422 | "丰": 2, 423 | "洲": 8, 424 | "爸": 41, 425 | "麻": 1, 426 | "尼": 1, 427 | "D": 4649, 428 | "但": 49, 429 | "诉": 2, 430 | "像": 176, 431 | "华": 29, 432 | "塔": 45, 433 | "艘": 26, 434 | "距": 515, 435 | "d": 95, 436 | "碑": 1, 437 | "耽": 1, 438 | "据": 221, 439 | "买": 202, 440 | "瓷": 4, 441 | "靶": 2, 442 | "鱼": 17, 443 | "签": 6, 444 | "蚀": 1, 445 | "∃": 4, 446 | "辆": 105, 447 | "和": 977, 448 | "透": 12, 449 | "夕": 1, 450 | "折": 287, 451 | "簧": 3, 452 | "骨": 2, 453 | "/": 374, 454 | "液": 12, 455 | "宾": 8, 456 | "汇": 1, 457 | "偏": 51, 458 | "网": 75, 459 | "麦": 4, 460 | "著": 15, 461 | "诞": 2, 462 | "坡": 88, 463 | "因": 54, 464 | "入": 131, 465 | "孩": 6, 466 | "虫": 4, 467 | "息": 61, 468 | "仰": 27, 469 | "韶": 2, 470 | "出": 1592, 471 | "蚁": 9, 472 | "最": 929, 473 | "踪": 3, 474 | "鞋": 4, 475 | "锌": 1, 476 | "熏": 1, 477 | "斗": 2, 478 | "⊙": 487, 479 | "供": 49, 480 | "眠": 1, 481 | "带": 56, 482 | "播": 5, 483 | "蔬": 29, 484 | "估": 31, 485 | "喷": 12, 486 | "阶": 17, 487 | "债": 5, 488 | "粽": 2, 489 | "情": 90, 490 | "掷": 22, 491 | "淇": 2, 492 | "响": 18, 493 | "界": 16, 494 | "减": 79, 495 | "黑": 30, 496 | "罐": 4, 497 | "寓": 4, 498 | "奔": 1, 499 | "旗": 21, 500 | "进": 331, 501 | "Z": 9, 502 | "靠": 15, 503 | "口": 75, 504 | "呢": 5, 505 | "內": 2, 506 | "巧": 12, 507 | "苦": 3, 508 | "敬": 2, 509 | "棵": 38, 510 | "陶": 2, 511 | "卸": 10, 512 | "翻": 53, 513 | "姿": 1, 514 | "驾": 8, 515 | "范": 415, 516 | "者": 42, 517 | "了": 514, 518 | "看": 62, 519 | "名": 156, 520 | "徐": 8, 521 | "粒": 3, 522 | "掘": 2, 523 | "肚": 1, 524 | "钢": 24, 525 | "紧": 12, 526 | "约": 58, 527 | "末": 38, 528 | "搭": 21, 529 | "男": 21, 530 | "刹": 4, 531 | "尽": 9, 532 | "E": 3118, 533 | "浇": 4, 534 | "委": 6, 535 | "佛": 2, 536 | "陡": 2, 537 | "健": 18, 538 | "凤": 2, 539 | "乱": 1, 540 | "述": 59, 541 | "零": 141, 542 | "特": 19, 543 | "司": 88, 544 | "雾": 3, 545 | "要": 357, 546 | "辅": 8, 547 | "序": 76, 548 | "【": 44, 549 | "钓": 1, 550 | """: 2, 551 | "会": 97, 552 | "晚": 11, 553 | "焦": 170, 554 | "吨": 47, 555 | "被": 77, 556 | "训": 11, 557 | "躯": 1, 558 | "贮": 1, 559 | "0": 4856, 560 | "邻": 45, 561 | "强": 38, 562 | "弓": 1, 563 | "见": 17, 564 | "血": 2, 565 | "迅": 1, 566 | "经": 401, 567 | "金": 93, 568 | "周": 240, 569 | "坪": 4, 570 | "语": 8, 571 | "浮": 5, 572 | "p": 114, 573 | "奶": 16, 574 | "调": 223, 575 | "验": 52, 576 | "香": 8, 577 | "隔": 36, 578 | "芜": 2, 579 | "星": 46, 580 | "颠": 1, 581 | "客": 92, 582 | "饰": 4, 583 | "咨": 1, 584 | "港": 30, 585 | "脱": 1, 586 | "°": 992, 587 | "β": 56, 588 | "岸": 34, 589 | "逻": 2, 590 | "阿": 2, 591 | "检": 38, 592 | "扇": 80, 593 | "蕉": 5, 594 | "恶": 1, 595 | "鹏": 2, 596 | "浙": 12, 597 | "[": 146, 598 | "牧": 9, 599 | "哨": 1, 600 | "衫": 18, 601 | "淮": 2, 602 | "胰": 1, 603 | "更": 29, 604 | "穷": 7, 605 | "怨": 1, 606 | "黏": 1, 607 | "老": 54, 608 | "划": 99, 609 | "栖": 4, 610 | "沙": 9, 611 | "业": 51, 612 | "茜": 1, 613 | "搞": 2, 614 | "钩": 2, 615 | "用": 804, 616 | "侯": 2, 617 | "±": 6, 618 | "△": 1760, 619 | "裁": 22, 620 | "睡": 2, 621 | "仅": 27, 622 | "F": 1991, 623 | "效": 24, 624 | "始": 125, 625 | "郑": 11, 626 | "雕": 4, 627 | "诗": 1, 628 | "酥": 1, 629 | "²": 310, 630 | "奠": 1, 631 | "绥": 1, 632 | "噪": 5, 633 | "譬": 1, 634 | "俯": 34, 635 | "腰": 360, 636 | "色": 82, 637 | "篷": 3, 638 | "顶": 631, 639 | "击": 4, 640 | "矿": 5, 641 | "清": 16, 642 | "澄": 1, 643 | "指": 68, 644 | "完": 218, 645 | "式": 956, 646 | "】": 49, 647 | "o": 126, 648 | "袖": 2, 649 | "亚": 4, 650 | "期": 151, 651 | "撞": 1, 652 | "群": 12, 653 | "伯": 2, 654 | "1": 6605, 655 | "弟": 5, 656 | "爽": 3, 657 | "久": 2, 658 | "俄": 1, 659 | "奋": 1, 660 | "富": 3, 661 | "勒": 1, 662 | "陕": 6, 663 | "淘": 3, 664 | "滚": 17, 665 | "菜": 44, 666 | "束": 15, 667 | "卧": 2, 668 | "门": 65, 669 | "雪": 4, 670 | "软": 1, 671 | "q": 45, 672 | "柴": 4, 673 | "壶": 1, 674 | "养": 17, 675 | "建": 135, 676 | "链": 5, 677 | "言": 2, 678 | "超": 179, 679 | "砌": 2, 680 | ".": 27, 681 | "丘": 1, 682 | "抛": 957, 683 | "“": 280, 684 | "管": 38, 685 | "莱": 2, 686 | "舰": 7, 687 | "羹": 1, 688 | "常": 75, 689 | "值": 1567, 690 | "圾": 11, 691 | "证": 541, 692 | "捆": 6, 693 | "立": 234, 694 | "蕊": 2, 695 | "种": 520, 696 | "发": 495, 697 | "酒": 12, 698 | "痕": 49, 699 | "这": 713, 700 | "乘": 60, 701 | "招": 6, 702 | "赚": 9, 703 | "工": 298, 704 | "烧": 5, 705 | "矩": 291, 706 | "器": 104, 707 | "衬": 9, 708 | "轮": 48, 709 | "菱": 101, 710 | "幸": 2, 711 | "函": 1404, 712 | "七": 33, 713 | "概": 90, 714 | "有": 1547, 715 | "程": 854, 716 | "复": 43, 717 | "小": 1400, 718 | "美": 16, 719 | "殖": 5, 720 | "厦": 9, 721 | "椭": 231, 722 | "氯": 2, 723 | "家": 238, 724 | "弹": 11, 725 | "纸": 202, 726 | "刻": 27, 727 | "炼": 11, 728 | "观": 96, 729 | "浅": 1, 730 | "G": 427, 731 | "φ": 5, 732 | "屏": 4, 733 | "街": 5, 734 | "余": 108, 735 | "付": 73, 736 | "胜": 34, 737 | "章": 6, 738 | "另": 141, 739 | "革": 1, 740 | "雨": 6, 741 | "鸦": 3, 742 | "修": 62, 743 | "顷": 5, 744 | "较": 59, 745 | "月": 218, 746 | "宋": 3, 747 | "递": 61, 748 | "优": 82, 749 | "窄": 3, 750 | "骡": 1, 751 | "在": 3928, 752 | "袭": 3, 753 | "花": 82, 754 | "′": 390, 755 | "缸": 1, 756 | "去": 160, 757 | "尺": 71, 758 | "炽": 2, 759 | "瘾": 1, 760 | "促": 24, 761 | "桂": 3, 762 | "资": 64, 763 | "摊": 1, 764 | "仙": 1, 765 | "虞": 2, 766 | "毫": 8, 767 | "路": 350, 768 | "拱": 22, 769 | "柳": 2, 770 | "r": 174, 771 | "整": 273, 772 | "究": 96, 773 | "都": 309, 774 | "百": 39, 775 | "霓": 1, 776 | "吕": 1, 777 | "丙": 45, 778 | "”": 247, 779 | "亮": 21, 780 | "舱": 1, 781 | "育": 30, 782 | "医": 27, 783 | "难": 4, 784 | "裂": 1, 785 | "淄": 2, 786 | "拆": 10, 787 | "黎": 1, 788 | "量": 499, 789 | "乙": 491, 790 | "还": 86, 791 | "]": 150, 792 | "仑": 2, 793 | "履": 1, 794 | "照": 48, 795 | "齐": 7, 796 | "险": 21, 797 | "哪": 74, 798 | "轿": 4, 799 | "长": 1774, 800 | "图": 3271, 801 | "漂": 3, 802 | "希": 4, 803 | "疑": 1, 804 | "枝": 3, 805 | "唱": 3, 806 | "聚": 2, 807 | "隧": 28, 808 | "锦": 2, 809 | "″": 1, 810 | "▱": 8, 811 | "³": 26, 812 | "✲": 14, 813 | "∴": 10, 814 | "康": 5, 815 | "妹": 2, 816 | "势": 3, 817 | "蛇": 4, 818 | "H": 286, 819 | "青": 15, 820 | "拟": 48, 821 | "煤": 6, 822 | "巡": 6, 823 | "形": 2838, 824 | "麓": 1, 825 | "旱": 3, 826 | "想": 98, 827 | "椒": 1, 828 | "绷": 1, 829 | "勿": 1, 830 | "洁": 2, 831 | "赁": 8, 832 | "朝": 14, 833 | "舟": 5, 834 | "骤": 10, 835 | "储": 15, 836 | "京": 25, 837 | "3": 2839, 838 | "夹": 47, 839 | "流": 44, 840 | "桃": 16, 841 | "珊": 1, 842 | "潍": 4, 843 | "屋": 2, 844 | "道": 189, 845 | "单": 510, 846 | "¬": 2, 847 | "畜": 2, 848 | "部": 466, 849 | "只": 193, 850 | "们": 200, 851 | "s": 321, 852 | "拴": 1, 853 | "她": 19, 854 | "寺": 3, 855 | "悉": 2, 856 | "戊": 1, 857 | ",": 19839, 858 | "宏": 1, 859 | "锐": 62, 860 | "蜜": 1, 861 | "素": 22, 862 | "否": 379, 863 | "亭": 10, 864 | "讯": 8, 865 | "抵": 8, 866 | "德": 7, 867 | "守": 2, 868 | "眼": 10, 869 | "县": 16, 870 | "倾": 17, 871 | "摆": 28, 872 | "慈": 2, 873 | "绍": 1, 874 | "档": 3, 875 | "峡": 2, 876 | "幢": 7, 877 | "童": 9, 878 | "孤": 2, 879 | "韧": 2, 880 | "全": 232, 881 | "剪": 91, 882 | "转": 337, 883 | "误": 35, 884 | "数": 3880, 885 | "至": 145, 886 | "闻": 2, 887 | "空": 104, 888 | "国": 91, 889 | "逼": 1, 890 | "灾": 15, 891 | "谁": 17, 892 | "菁": 1, 893 | "把": 195, 894 | "碎": 5, 895 | "向": 715, 896 | "众": 18, 897 | "果": 546, 898 | "红": 64, 899 | "室": 21, 900 | "贫": 2, 901 | "中": 2874, 902 | "新": 97, 903 | "∵": 5, 904 | "画": 157, 905 | "汁": 3, 906 | "评": 5, 907 | "I": 27, 908 | "魏": 1, 909 | "风": 49, 910 | "盒": 76, 911 | "叔": 2, 912 | "%": 122, 913 | "柜": 13, 914 | "损": 13, 915 | "胞": 1, 916 | "瓦": 4, 917 | "度": 1032, 918 | "廓": 2, 919 | "森": 3, 920 | "曲": 245, 921 | "援": 9, 922 | "项": 209, 923 | "白": 55, 924 | "一": 3859, 925 | "⑩": 2, 926 | "掉": 14, 927 | "∈": 124, 928 | "阐": 1, 929 | "途": 21, 930 | "搜": 1, 931 | "御": 1, 932 | "索": 20, 933 | "堤": 4, 934 | "玩": 34, 935 | "享": 15, 936 | "梯": 84, 937 | "肉": 1, 938 | "芳": 1, 939 | "4": 1979, 940 | "容": 24, 941 | "示": 692, 942 | "匾": 1, 943 | "迁": 2, 944 | "杆": 24, 945 | "ω": 17, 946 | "≈": 46, 947 | "腊": 1, 948 | "巍": 5, 949 | "限": 196, 950 | "慢": 19, 951 | "燕": 2, 952 | "橘": 1, 953 | "蛛": 1, 954 | "竣": 1, 955 | "姥": 2, 956 | "拥": 6, 957 | "警": 4, 958 | "柱": 57, 959 | "晰": 1, 960 | "敲": 2, 961 | "t": 511, 962 | "对": 1036, 963 | "⊕": 1, 964 | "企": 20, 965 | "涂": 25, 966 | "椅": 7, 967 | "按": 267, 968 | "隐": 1, 969 | "蒜": 3, 970 | "莞": 1, 971 | "吧": 1, 972 | "动": 1346, 973 | "购": 291, 974 | "议": 4, 975 | "▲": 2, 976 | "´": 5, 977 | "阻": 8, 978 | "什": 70, 979 | "摇": 4, 980 | "捉": 2, 981 | "盐": 3, 982 | "丢": 1, 983 | "巢": 3, 984 | "祥": 2, 985 | "扳": 2, 986 | "筹": 8, 987 | "登": 5, 988 | "避": 5, 989 | "截": 102, 990 | "梅": 8, 991 | "文": 51, 992 | "昆": 3, 993 | "律": 92, 994 | "福": 13, 995 | "税": 12, 996 | "世": 6, 997 | "∞": 49, 998 | "张": 197, 999 | "宣": 1, 1000 | "助": 27, 1001 | "γ": 4, 1002 | "仁": 1, 1003 | "求": 2444, 1004 | "装": 127, 1005 | "襄": 1, 1006 | "拉": 11, 1007 | "聊": 3, 1008 | "嵌": 2, 1009 | "监": 9, 1010 | "坐": 1446, 1011 | "兔": 3, 1012 | "湖": 33, 1013 | "遥": 1, 1014 | "蓝": 11, 1015 | "杰": 5, 1016 | "并": 521, 1017 | "竹": 14, 1018 | "顺": 148, 1019 | "丁": 8, 1020 | "沂": 3, 1021 | "栅": 1, 1022 | "授": 2, 1023 | "夏": 9, 1024 | "熔": 1, 1025 | "洗": 12, 1026 | "望": 12, 1027 | "萝": 4, 1028 | "斜": 184, 1029 | "感": 11, 1030 | "鸡": 10, 1031 | "利": 279, 1032 | "厨": 5, 1033 | "漫": 6, 1034 | "5": 1720, 1035 | "桥": 38, 1036 | "儿": 7, 1037 | "激": 8, 1038 | "规": 177, 1039 | "籍": 2, 1040 | "他": 184, 1041 | "橙": 2, 1042 | "棚": 4, 1043 | "季": 27, 1044 | "剩": 44, 1045 | "u": 4, 1046 | "致": 25, 1047 | "延": 310, 1048 | "寸": 2, 1049 | "命": 85, 1050 | "勾": 12, 1051 | " ": 10, 1052 | "璃": 13, 1053 | "阄": 2, 1054 | "且": 965, 1055 | "宝": 4, 1056 | "废": 1, 1057 | " ": 16, 1058 | "爬": 25, 1059 | "售": 409, 1060 | "堰": 4, 1061 | "方": 1883, 1062 | "机": 177, 1063 | "亿": 4, 1064 | "派": 11, 1065 | "附": 9, 1066 | "≌": 36, 1067 | "狗": 2, 1068 | "员": 68, 1069 | "坚": 1, 1070 | "统": 46, 1071 | "换": 43, 1072 | "查": 87, 1073 | "晤": 4, 1074 | "逐": 4, 1075 | "班": 66, 1076 | "念": 9, 1077 | "年": 281, 1078 | "摸": 43, 1079 | "仿": 7, 1080 | "球": 314, 1081 | "厂": 74, 1082 | "的": 15843, 1083 | "鲁": 7, 1084 | "馆": 29, 1085 | "羊": 4, 1086 | "谓": 1, 1087 | "五": 80, 1088 | "列": 627, 1089 | "错": 49, 1090 | "鸟": 2, 1091 | "探": 105, 1092 | "失": 7, 1093 | "户": 57, 1094 | "改": 73, 1095 | "螺": 35, 1096 | "丿": 1, 1097 | "允": 3, 1098 | "参": 135, 1099 | "雄": 3, 1100 | "配": 51, 1101 | "K": 61, 1102 | "拌": 1, 1103 | "毒": 4, 1104 | "何": 219, 1105 | "仔": 5, 1106 | "留": 60, 1107 | "筝": 5, 1108 | "州": 80, 1109 | "Ⅱ": 36, 1110 | "惠": 64, 1111 | "④": 84, 1112 | "王": 53, 1113 | "来": 151, 1114 | "触": 6, 1115 | "注": 27, 1116 | "火": 50, 1117 | "独": 68, 1118 | "问": 275, 1119 | "声": 3, 1120 | "米": 415, 1121 | "维": 8, 1122 | "湿": 1, 1123 | "宇": 1, 1124 | "堆": 18, 1125 | "粉": 14, 1126 | "井": 8, 1127 | "演": 9, 1128 | "甘": 3, 1129 | "股": 17, 1130 | "质": 104, 1131 | "个": 2737, 1132 | "碱": 1, 1133 | "纵": 40, 1134 | "缴": 14, 1135 | "获": 112, 1136 | "6": 1348, 1137 | "便": 13, 1138 | "嘴": 1, 1139 | "剂": 1, 1140 | "雅": 2, 1141 | "拍": 9, 1142 | "受": 37, 1143 | "属": 25, 1144 | "胡": 1, 1145 | "Ⅰ": 38, 1146 | "缺": 5, 1147 | "狭": 1, 1148 | "罩": 2, 1149 | "绵": 1, 1150 | "v": 15, 1151 | "智": 11, 1152 | "汾": 1, 1153 | "、": 2143, 1154 | "冀": 2, 1155 | "阅": 40, 1156 | "领": 21, 1157 | "床": 3, 1158 | "舍": 20, 1159 | "树": 117, 1160 | "北": 86, 1161 | "宜": 2, 1162 | "?": 723, 1163 | "!": 7, 1164 | "抢": 1, 1165 | "春": 13, 1166 | "欧": 4, 1167 | "梦": 5, 1168 | "涨": 19, 1169 | "溪": 3, 1170 | "净": 18, 1171 | "际": 37, 1172 | "屉": 3, 1173 | "才": 60, 1174 | "菌": 1, 1175 | "槐": 1, 1176 | "浓": 8, 1177 | "返": 50, 1178 | "南": 76, 1179 | "a": 2002, 1180 | "散": 4, 1181 | "跨": 2, 1182 | "八": 46, 1183 | "仪": 10, 1184 | "召": 6, 1185 | "坯": 1, 1186 | "称": 391, 1187 | "荷": 3, 1188 | "胶": 2, 1189 | "使": 507, 1190 | "刀": 8, 1191 | "各": 215, 1192 | "笆": 7, 1193 | "师": 75, 1194 | "祖": 4, 1195 | "而": 122, 1196 | "适": 23, 1197 | "气": 59, 1198 | "化": 216, 1199 | ">": 305, 1200 | "∠": 1603, 1201 | "斥": 3, 1202 | "丨": 14, 1203 | "洪": 4, 1204 | "箱": 37, 1205 | "纳": 17, 1206 | "·": 189, 1207 | "茶": 9, 1208 | "格": 214, 1209 | "沿": 352, 1210 | "病": 5, 1211 | "嵊": 1, 1212 | "L": 22, 1213 | "李": 40, 1214 | "译": 1, 1215 | "仓": 15, 1216 | "卖": 43, 1217 | "锻": 10, 1218 | "~": 5, 1219 | "握": 3, 1220 | "≠": 114, 1221 | "Ⅲ": 4, 1222 | "日": 69, 1223 | "⑤": 22, 1224 | "武": 18, 1225 | "往": 69, 1226 | "型": 133, 1227 | "浪": 1, 1228 | "凭": 2, 1229 | "篱": 7, 1230 | "绳": 29, 1231 | "饲": 3, 1232 | "÷": 4, 1233 | "锅": 1, 1234 | ")": 6138, 1235 | "专": 8, 1236 | "逗": 1, 1237 | "颜": 28, 1238 | "挡": 4, 1239 | "撤": 1, 1240 | "恰": 184, 1241 | "别": 1036, 1242 | "启": 2, 1243 | "骰": 7, 1244 | "7": 678, 1245 | "达": 282, 1246 | "十": 59, 1247 | "畅": 2, 1248 | "密": 10, 1249 | "终": 91, 1250 | "手": 61, 1251 | "背": 17, 1252 | "乓": 9, 1253 | "角": 2523, 1254 | "遗": 1, 1255 | "取": 655, 1256 | "静": 11, 1257 | "盘": 23, 1258 | "祝": 1, 1259 | "详": 1, 1260 | "轩": 1, 1261 | "滨": 7, 1262 | "六": 73, 1263 | "矮": 3, 1264 | "平": 1616, 1265 | "w": 15, 1266 | "旺": 5, 1267 | "匀": 109, 1268 | "垃": 11, 1269 | "。": 205, 1270 | "(": 6089, 1271 | "明": 650, 1272 | "输": 57, 1273 | "隙": 3, 1274 | "额": 44, 1275 | "夜": 1, 1276 | "枣": 3, 1277 | "•": 117, 1278 | "亩": 2, 1279 | "舶": 2, 1280 | "携": 6, 1281 | "举": 19, 1282 | "污": 33, 1283 | "遂": 1, 1284 | "铅": 16, 1285 | "父": 9, 1286 | "藏": 2, 1287 | "虎": 1, 1288 | "法": 264, 1289 | "赔": 6, 1290 | "b": 1082, 1291 | "轨": 56, 1292 | "叫": 26, 1293 | "豪": 5, 1294 | "热": 19, 1295 | "公": 344, 1296 | "闯": 2, 1297 | "扶": 7, 1298 | "读": 39, 1299 | "楼": 65, 1300 | "湾": 2, 1301 | "圃": 7, 1302 | "善": 8, 1303 | "备": 47, 1304 | "很": 14, 1305 | "营": 40, 1306 | "温": 41, 1307 | "辨": 2, 1308 | "沪": 2, 1309 | "冬": 2, 1310 | "唯": 11, 1311 | "皮": 30, 1312 | "娱": 1, 1313 | "栽": 20, 1314 | "届": 7, 1315 | "奇": 36, 1316 | "M": 1415, 1317 | "敏": 6, 1318 | "体": 379, 1319 | "棒": 12, 1320 | "汕": 2, 1321 | "族": 1, 1322 | "硝": 1, 1323 | "⑥": 6, 1324 | "子": 255, 1325 | "卫": 5, 1326 | "灭": 2, 1327 | "目": 45, 1328 | "巴": 4, 1329 | "虹": 1, 1330 | "厅": 5, 1331 | "昌": 7, 1332 | "谐": 3, 1333 | "写": 419, 1334 | "岛": 24, 1335 | "□": 6, 1336 | "患": 3, 1337 | "接": 755, 1338 | "产": 255, 1339 | "账": 1, 1340 | "碳": 6, 1341 | "莲": 3, 1342 | "收": 71, 1343 | "8": 887, 1344 | "治": 3, 1345 | "辽": 6, 1346 | "织": 18, 1347 | "秉": 1, 1348 | "苏": 20, 1349 | "慎": 1, 1350 | "跑": 58, 1351 | "恢": 1, 1352 | "毕": 10, 1353 | "博": 9, 1354 | "※": 1, 1355 | "神": 3, 1356 | "①": 282, 1357 | "≤": 153, 1358 | "表": 614, 1359 | "短": 58, 1360 | "己": 10, 1361 | "驴": 3, 1362 | "x": 4672, 1363 | "追": 24, 1364 | "锁": 6, 1365 | "垂": 322, 1366 | "包": 72, 1367 | "岗": 8, 1368 | "服": 45, 1369 | "戏": 27, 1370 | "酬": 9, 1371 | "栓": 7, 1372 | "厚": 18, 1373 | "缝": 1, 1374 | "府": 10, 1375 | "娟": 1, 1376 | "黄": 40, 1377 | "#": 167, 1378 | "挥": 2, 1379 | "护": 18, 1380 | "润": 138, 1381 | "梨": 3, 1382 | "含": 98, 1383 | "赴": 1, 1384 | "哀": 1, 1385 | "奉": 1, 1386 | "络": 4, 1387 | "c": 968, 1388 | "副": 15, 1389 | "峰": 4, 1390 | "时": 2209, 1391 | "载": 10, 1392 | "身": 55, 1393 | "销": 329, 1394 | "将": 505, 1395 | "怎": 45, 1396 | "刚": 45, 1397 | "<": 349, 1398 | "报": 42, 1399 | "挤": 3, 1400 | "澧": 2, 1401 | "横": 128, 1402 | "环": 64, 1403 | "脸": 1, 1404 | "似": 146, 1405 | "访": 3, 1406 | "铁": 74, 1407 | "燃": 12, 1408 | "祈": 1, 1409 | "旋": 280, 1410 | "描": 8, 1411 | "N": 781, 1412 | "乒": 10, 1413 | "绝": 13, 1414 | "察": 56, 1415 | "令": 11, 1416 | "夺": 1, 1417 | "若": 1714, 1418 | "剧": 1, 1419 | "园": 39, 1420 | "缓": 4, 1421 | "可": 457, 1422 | "遮": 1, 1423 | "干": 57, 1424 | "添": 19, 1425 | "艺": 7, 1426 | "份": 74, 1427 | "轼": 1, 1428 | "玄": 2, 1429 | "妈": 34, 1430 | "民": 61, 1431 | "互": 101, 1432 | "候": 5, 1433 | "增": 165, 1434 | "■": 1, 1435 | "∥": 204, 1436 | "瞬": 3, 1437 | "邮": 15, 1438 | "就": 79, 1439 | "9": 792, 1440 | "活": 74, 1441 | "丽": 17, 1442 | "围": 474, 1443 | "元": 702, 1444 | "婷": 1, 1445 | "俩": 2, 1446 | "比": 590, 1447 | "块": 111, 1448 | "飞": 41, 1449 | "②": 265, 1450 | "无": 115, 1451 | "Ⅳ": 1, 1452 | "≥": 66, 1453 | "桩": 2, 1454 | "山": 88, 1455 | "踩": 3, 1456 | "女": 25, 1457 | "侨": 1, 1458 | "y": 2232, 1459 | "T": 61, 1460 | "内": 538, 1461 | "缆": 5, 1462 | "安": 70, 1463 | "萌": 2, 1464 | "茎": 5, 1465 | "辑": 1, 1466 | "渐": 29, 1467 | "锯": 2, 1468 | "刘": 8, 1469 | "盛": 3, 1470 | "映": 10, 1471 | "伦": 1, 1472 | "让": 18, 1473 | "听": 9, 1474 | "破": 7, 1475 | "款": 71, 1476 | "陀": 2, 1477 | "蛋": 15, 1478 | "近": 71, 1479 | "乐": 5, 1480 | "靖": 1, 1481 | "叙": 4, 1482 | "川": 11, 1483 | "食": 11, 1484 | "恤": 11, 1485 | "车": 557, 1486 | "捷": 2, 1487 | "闭": 17, 1488 | "任": 279, 1489 | "省": 67, 1490 | "喂": 1, 1491 | "宗": 2, 1492 | "簇": 1, 1493 | "砖": 12, 1494 | "休": 12, 1495 | "源": 17, 1496 | "烂": 1, 1497 | "吗": 80, 1498 | "匙": 2, 1499 | "鲜": 3, 1500 | "实": 413, 1501 | "蜡": 24, 1502 | "∣": 342, 1503 | "氧": 2, 1504 | "侦": 2, 1505 | "弱": 1, 1506 | "稳": 16, 1507 | "架": 31, 1508 | "主": 33, 1509 | "沼": 9, 1510 | "设": 545, 1511 | "雀": 2, 1512 | "充": 34, 1513 | "等": 1164, 1514 | "晋": 1, 1515 | "O": 2430, 1516 | "凌": 1, 1517 | "条": 735, 1518 | "扣": 14, 1519 | "既": 20, 1520 | "&": 3, 1521 | "灯": 73, 1522 | "影": 183, 1523 | "绰": 1, 1524 | "已": 1220, 1525 | "课": 44, 1526 | "需": 230, 1527 | "傅": 18, 1528 | "云": 5, 1529 | "疗": 21, 1530 | "通": 179, 1531 | "肥": 5, 1532 | "涧": 1, 1533 | "垫": 2, 1534 | "株": 6, 1535 | "头": 72, 1536 | ":": 79, 1537 | "着": 98, 1538 | "叠": 116, 1539 | "操": 38, 1540 | "敌": 6, 1541 | "扎": 3, 1542 | "滑": 38, 1543 | "裕": 1, 1544 | "做": 135, 1545 | "租": 63, 1546 | "蝠": 1, 1547 | "③": 124, 1548 | "书": 123, 1549 | "芽": 2, 1550 | "矫": 4, 1551 | "凯": 1, 1552 | "彰": 2, 1553 | "奴": 1, 1554 | "苹": 31, 1555 | "z": 31, 1556 | "磁": 11, 1557 | "熄": 2, 1558 | "崇": 1, 1559 | "庆": 10, 1560 | "遭": 2, 1561 | "朋": 8, 1562 | "意": 263, 1563 | "耻": 1, 1564 | "则": 1518, 1565 | "厘": 34, 1566 | ";": 1368, 1567 | "尝": 9, 1568 | "够": 29, 1569 | "急": 9, 1570 | "货": 91, 1571 | "讨": 21, 1572 | "借": 10, 1573 | "船": 93, 1574 | "θ": 50, 1575 | "º": 6, 1576 | "秋": 4, 1577 | "千": 207, 1578 | "细": 26, 1579 | "运": 696, 1580 | "盖": 18, 1581 | "变": 342, 1582 | "竞": 26, 1583 | "荣": 1, 1584 | "e": 113, 1585 | "页": 15, 1586 | "批": 121, 1587 | "轻": 7, 1588 | "汽": 91, 1589 | "西": 76, 1590 | "待": 10, 1591 | "合": 355, 1592 | "袋": 25, 1593 | "₃": 44, 1594 | "亏": 14, 1595 | "⌒": 70, 1596 | "窗": 7, 1597 | "外": 223, 1598 | ":": 1311, 1599 | "欢": 4, 1600 | "两": 1925, 1601 | "未": 14, 1602 | "箭": 4, 1603 | "纯": 6, 1604 | "钳": 3, 1605 | "阴": 107, 1606 | "永": 4, 1607 | "放": 148, 1608 | "私": 3, 1609 | "础": 12, 1610 | "必": 65, 1611 | "勇": 1, 1612 | "呈": 11, 1613 | "棋": 11, 1614 | "坊": 4, 1615 | "母": 53, 1616 | "济": 10, 1617 | "P": 2875, 1618 | "荒": 2, 1619 | "奖": 48, 1620 | "烛": 25, 1621 | "罚": 5, 1622 | "盟": 4, 1623 | "止": 111, 1624 | "拧": 1, 1625 | "早": 14, 1626 | "士": 9, 1627 | "马": 20, 1628 | "灰": 2, 1629 | "职": 8, 1630 | "蕨": 1, 1631 | "淹": 1, 1632 | "替": 4, 1633 | "礁": 3, 1634 | "陪": 1, 1635 | "庄": 10, 1636 | "切": 308, 1637 | "针": 201, 1638 | "栋": 5, 1639 | "脑": 19, 1640 | "排": 110, 1641 | "首": 19, 1642 | "粘": 4, 1643 | "匝": 4, 1644 | "队": 140, 1645 | "订": 7, 1646 | "交": 1788, 1647 | "∧": 6, 1648 | "慰": 1, 1649 | "草": 14, 1650 | "费": 212, 1651 | "总": 172, 1652 | "寂": 2, 1653 | "组": 295, 1654 | "片": 156, 1655 | "准": 74, 1656 | "哈": 4, 1657 | "插": 4, 1658 | "询": 1, 1659 | "座": 41, 1660 | "执": 13, 1661 | "填": 58, 1662 | "差": 94, 1663 | "共": 397, 1664 | "味": 5, 1665 | "史": 2, 1666 | "直": 2859, 1667 | "泸": 1, 1668 | "{": 174, 1669 | "拼": 44, 1670 | "弄": 1, 1671 | "率": 287, 1672 | "逆": 78, 1673 | "钉": 2, 1674 | "锈": 2, 1675 | "脐": 1, 1676 | "戒": 1, 1677 | "算": 219, 1678 | "高": 369, 1679 | "军": 27, 1680 | "猜": 53, 1681 | "龄": 18, 1682 | "趣": 21, 1683 | "控": 7, 1684 | "…": 68, 1685 | "第": 593, 1686 | "铜": 6, 1687 | "焰": 1, 1688 | "油": 22, 1689 | "贸": 1, 1690 | "为": 5153, 1691 | "抽": 75, 1692 | "壁": 8, 1693 | "罄": 1, 1694 | "歌": 7, 1695 | "漏": 1, 1696 | "糙": 2, 1697 | "奥": 7, 1698 | "卜": 5, 1699 | "域": 94, 1700 | "裤": 7, 1701 | "以": 894, 1702 | "f": 567, 1703 | "羽": 2, 1704 | "兰": 7, 1705 | "割": 31, 1706 | "请": 603, 1707 | "桶": 23, 1708 | "浸": 1, 1709 | "臻": 1, 1710 | "柿": 4, 1711 | "码": 26, 1712 | "径": 496, 1713 | "吉": 7, 1714 | "伏": 5, 1715 | "←": 1, 1716 | "颖": 5, 1717 | "庚": 1, 1718 | "初": 31, 1719 | "真": 33, 1720 | "校": 243, 1721 | "责": 7, 1722 | "严": 3, 1723 | "悦": 1, 1724 | "夫": 2, 1725 | "弯": 9, 1726 | "粮": 4, 1727 | "冰": 15, 1728 | "申": 2, 1729 | "劲": 1, 1730 | "阵": 4, 1731 | "土": 26, 1732 | "人": 566, 1733 | "∽": 35, 1734 | "秀": 8, 1735 | "幅": 13, 1736 | "升": 32, 1737 | "益": 6, 1738 | "住": 45, 1739 | "Q": 737, 1740 | "字": 158, 1741 | "衡": 10, 1742 | "摩": 6, 1743 | "凰": 2, 1744 | "偿": 3, 1745 | "试": 276, 1746 | "板": 105, 1747 | "宁": 18, 1748 | "稀": 2, 1749 | "射": 215, 1750 | "成": 812, 1751 | "碗": 3, 1752 | "聘": 2, 1753 | "龙": 10, 1754 | "力": 49, 1755 | "钟": 97, 1756 | "计": 340, 1757 | "昨": 1, 1758 | "☆": 3, 1759 | "到": 1039, 1760 | "吴": 5, 1761 | "边": 1956, 1762 | "游": 75, 1763 | "贺": 3, 1764 | "<": 50, 1765 | "练": 24, 1766 | "陈": 2, 1767 | "行": 781, 1768 | "低": 75, 1769 | "摔": 2, 1770 | "策": 5, 1771 | "忙": 4, 1772 | "停": 133, 1773 | "非": 35, 1774 | "代": 145, 1775 | "汤": 1, 1776 | "晨": 4, 1777 | "硬": 29, 1778 | "置": 247, 1779 | "桨": 1, 1780 | "迹": 59, 1781 | "|": 83, 1782 | "松": 6, 1783 | "布": 45, 1784 | "如": 2274, 1785 | "历": 6, 1786 | "植": 35, 1787 | "挑": 4, 1788 | "易": 16, 1789 | "→": 171, 1790 | "鹅": 1, 1791 | "创": 12, 1792 | "次": 892, 1793 | "渣": 1, 1794 | "津": 3, 1795 | "性": 101, 1796 | "Φ": 5, 1797 | "暨": 1, 1798 | "薪": 2, 1799 | "甜": 2, 1800 | "阳": 36, 1801 | "冲": 6, 1802 | "电": 186, 1803 | "样": 207, 1804 | "舞": 3, 1805 | "髀": 1, 1806 | "遇": 62, 1807 | "鼠": 12, 1808 | "盈": 23, 1809 | "迎": 7, 1810 | "拐": 14, 1811 | "⇒": 1, 1812 | "绘": 9, 1813 | "牛": 23, 1814 | "g": 172, 1815 | "饭": 7, 1816 | "危": 6, 1817 | "突": 7, 1818 | "堂": 2, 1819 | "分": 2482, 1820 | "圈": 27, 1821 | "残": 3, 1822 | "与": 2046, 1823 | "抑": 1, 1824 | "撕": 1, 1825 | "缘": 6, 1826 | "尚": 6, 1827 | "辣": 1, 1828 | "木": 38, 1829 | "宫": 3, 1830 | "帮": 29, 1831 | "励": 7, 1832 | "伸": 11, 1833 | "λ": 16, 1834 | "显": 6, 1835 | "心": 383, 1836 | "采": 49, 1837 | "告": 12, 1838 | "棍": 1, 1839 | "捐": 24, 1840 | "染": 3, 1841 | "R": 288, 1842 | "∏": 4, 1843 | "给": 117, 1844 | "衢": 4, 1845 | "泥": 5, 1846 | "赤": 1, 1847 | "杨": 11, 1848 | "印": 16, 1849 | "石": 14, 1850 | "能": 425, 1851 | "威": 4, 1852 | "简": 36, 1853 | "嘉": 6, 1854 | "于": 2477, 1855 | "我": 118, 1856 | "农": 34, 1857 | "生": 424, 1858 | "模": 89, 1859 | "梢": 1, 1860 | "芦": 1, 1861 | "累": 5, 1862 | "爱": 11, 1863 | "现": 277, 1864 | "圳": 5, 1865 | "丹": 3, 1866 | "=": 5025, 1867 | "政": 18, 1868 | "J": 5, 1869 | "居": 29, 1870 | "钠": 3, 1871 | "从": 584, 1872 | "扑": 4, 1873 | "提": 96, 1874 | "套": 42, 1875 | "竖": 22, 1876 | "忘": 6, 1877 | "回": 114, 1878 | "橡": 3, 1879 | "佣": 2, 1880 | "慧": 9, 1881 | "拦": 2, 1882 | "台": 123, 1883 | "间": 882, 1884 | "}": 173, 1885 | "曾": 4, 1886 | "境": 9, 1887 | "异": 39, 1888 | "宅": 2, 1889 | "娄": 2, 1890 | "斑": 3, 1891 | "店": 114, 1892 | "崖": 1, 1893 | "妙": 2, 1894 | "功": 8, 1895 | "鄞": 1, 1896 | "(": 75, 1897 | "央": 3, 1898 | "露": 5, 1899 | "核": 7, 1900 | "⊿": 1, 1901 | "局": 14, 1902 | "雇": 12, 1903 | "商": 264, 1904 | "凉": 5, 1905 | "半": 542, 1906 | "乌": 8, 1907 | "扔": 2, 1908 | "塘": 4, 1909 | "淡": 2, 1910 | "池": 38, 1911 | "该": 518, 1912 | "h": 137, 1913 | "端": 90, 1914 | "饮": 22, 1915 | "价": 499, 1916 | "赶": 12, 1917 | "拿": 9, 1918 | "腾": 1, 1919 | "j": 5, 1920 | "颗": 10, 1921 | "疆": 1, 1922 | "伍": 10, 1923 | "二": 499, 1924 | "题": 545, 1925 | "定": 544, 1926 | "障": 3, 1927 | "贡": 2, 1928 | "您": 1, 1929 | "措": 2, 1930 | "由": 408, 1931 | "论": 273, 1932 | "房": 50, 1933 | "诊": 3, 1934 | "魅": 1, 1935 | "光": 83, 1936 | "烈": 1, 1937 | "及": 172, 1938 | "位": 803, 1939 | "救": 9, 1940 | "S": 403, 1941 | "捕": 1, 1942 | "拔": 8, 1943 | "坝": 10, 1944 | "漠": 1, 1945 | "衣": 23, 1946 | "步": 73, 1947 | "▪": 20, 1948 | "旦": 8, 1949 | "承": 9, 1950 | "岁": 21, 1951 | "市": 259, 1952 | "理": 382, 1953 | "炉": 2, 1954 | "刊": 1, 1955 | "撑": 2, 1956 | "投": 67, 1957 | "得": 775, 1958 | "尖": 4, 1959 | "墙": 37, 1960 | "原": 407, 1961 | "没": 62, 1962 | "α": 151, 1963 | "踢": 4, 1964 | "肩": 2, 1965 | "航": 68, 1966 | "庭": 20, 1967 | "钱": 54, 1968 | "田": 3, 1969 | "决": 84, 1970 | "防": 11, 1971 | "猴": 5, 1972 | ">": 35, 1973 | "幂": 2, 1974 | "先": 111, 1975 | "削": 2, 1976 | "仍": 39, 1977 | "每": 928, 1978 | "志": 10, 1979 | "泡": 1, 1980 | "赠": 7, 1981 | "解": 543, 1982 | "此": 306, 1983 | "⑦": 1, 1984 | "续": 61, 1985 | "彬": 1, 1986 | "确": 283, 1987 | "却": 1, 1988 | "迷": 1, 1989 | "汶": 1, 1990 | "霾": 3, 1991 | "封": 15, 1992 | "震": 9, 1993 | "选": 139, 1994 | "冈": 5, 1995 | "洋": 3, 1996 | "不": 1149, 1997 | "斐": 4, 1998 | "媚": 1, 1999 | "溢": 4, 2000 | "认": 26, 2001 | ")": 75, 2002 | "膨": 1, 2003 | "紫": 7, 2004 | "荆": 1, 2005 | "鸭": 2, 2006 | "根": 324, 2007 | "攻": 1, 2008 | "寄": 3, 2009 | "均": 254, 2010 | "测": 186, 2011 | "桌": 35, 2012 | "罗": 1, 2013 | "糖": 19, 2014 | "秘": 3, 2015 | "象": 686, 2016 | "擦": 2, 2017 | "i": 151, 2018 | "关": 794, 2019 | "盲": 3, 2020 | "牵": 3, 2021 | "∪": 4, 2022 | "征": 7, 2023 | "圆": 1087, 2024 | "玉": 7, 2025 | "事": 21, 2026 | "抓": 5, 2027 | "—": 5, 2028 | "粗": 22, 2029 | "编": 25, 2030 | "多": 786, 2031 | "喝": 2, 2032 | "澡": 1, 2033 | "渠": 6, 2034 | "财": 6, 2035 | "Ω": 2, 2036 | "∨": 5, 2037 | "劳": 6, 2038 | "甲": 490, 2039 | "况": 74, 2040 | "施": 20, 2041 | "叉": 3, 2042 | "版": 3, 2043 | "介": 2, 2044 | "拓": 11, 2045 | "联": 33, 2046 | "四": 784, 2047 | "烟": 6, 2048 | "信": 60, 2049 | "习": 46, 2050 | "赢": 5, 2051 | "僧": 2, 2052 | "⊂": 3, 2053 | "韩": 2, 2054 | "!": 1, 2055 | "繁": 4, 2056 | "预": 22, 2057 | "下": 880, 2058 | "栏": 9, 2059 | "潮": 8, 2060 | "速": 531, 2061 | "萧": 1, 2062 | "盆": 2, 2063 | "谷": 5, 2064 | "?": 18, 2065 | "卉": 1, 2066 | "之": 518, 2067 | "释": 13, 2068 | "村": 30, 2069 | "打": 87, 2070 | "孙": 1, 2071 | "⊥": 481, 2072 | "姚": 2, 2073 | "瓜": 4, 2074 | "侧": 176, 2075 | "拨": 5, 2076 | "自": 177, 2077 | "篮": 26, 2078 | "困": 2, 2079 | "闲": 2, 2080 | "兵": 6, 2081 | "郴": 1, 2082 | "开": 235, 2083 | "它": 260, 2084 | "墅": 5, 2085 | "处": 318, 2086 | "例": 307, 2087 | "《": 4, 2088 | "暑": 5, 2089 | "Γ": 5, 2090 | "钝": 11, 2091 | "睛": 4, 2092 | "熟": 1, 2093 | "传": 35, 2094 | "谢": 2, 2095 | "*": 11, 2096 | "岭": 2, 2097 | "地": 484, 2098 | "层": 29, 2099 | "陆": 6, 2100 | "午": 22, 2101 | "赌": 3, 2102 | "材": 39, 2103 | "站": 46, 2104 | "存": 535, 2105 | "铝": 2, 2106 | "镜": 25, 2107 | "满": 384, 2108 | "你": 278, 2109 | "旧": 8, 2110 | "择": 46, 2111 | "快": 50, 2112 | "聪": 14, 2113 | "积": 860, 2114 | "驮": 3, 2115 | "物": 1081, 2116 | "右": 256, 2117 | "兴": 26, 2118 | "混": 16, 2119 | "找": 45, 2120 | "币": 19, 2121 | "品": 319, 2122 | "皆": 1, 2123 | "咱": 1, 2124 | "析": 259, 2125 | "官": 1, 2126 | "喜": 5, 2127 | "办": 13, 2128 | "渡": 1, 2129 | "欣": 2, 2130 | "∩": 8, 2131 | "推": 28, 2132 | "溶": 12, 2133 | "许": 7, 2134 | "落": 152, 2135 | "○": 13, 2136 | "愿": 5, 2137 | "﹁": 3, 2138 | "海": 72, 2139 | "镇": 13, 2140 | "集": 67, 2141 | "又": 78, 2142 | "郊": 4, 2143 | "跌": 1, 2144 | "虑": 15, 2145 | "某": 559, 2146 | "U": 1, 2147 | "坛": 12, 2148 | "乡": 3, 2149 | "正": 1218, 2150 | "波": 15, 2151 | "补": 38, 2152 | "'": 17, 2153 | "扩": 5, 2154 | "即": 84, 2155 | "遵": 1, 2156 | "起": 66, 2157 | "件": 471, 2158 | "诸": 2, 2159 | "腿": 4, 2160 | "孔": 5 2161 | } -------------------------------------------------------------------------------- /files/black.json: -------------------------------------------------------------------------------- 1 | { 2 | "white_list": [ 3 | "A4758.png", 4 | "A3905.png", 5 | "T359_0.png", 6 | "T898_10.png", 7 | "T1091_4.png", 8 | "B2258_1.png", 9 | "A2114.png", 10 | "A9393.png", 11 | "T40_11.png", 12 | "T411_4.png", 13 | "A2754.png", 14 | "A14295.png", 15 | "A14349.png", 16 | "A14364.png", 17 | "A15101.png", 18 | "A15241.png", 19 | "A15933.png", 20 | "A16323.png", 21 | "A1595.png", 22 | "A16092.png", 23 | "A17063.png", 24 | "A16761.png", 25 | "A17141.png", 26 | "A16560.png", 27 | "A16953.png", 28 | "A1745.png", 29 | "A17010.png", 30 | "A16435.png", 31 | "A17281.png", 32 | "A17980.png", 33 | "A1875.png", 34 | "A18451.png", 35 | "A18969.png", 36 | "A19152.png", 37 | "A19424.png", 38 | "A17989.png", 39 | "A18139.png", 40 | "A18891.png", 41 | "A19260.png", 42 | "A18701.png", 43 | "A19584.png", 44 | "A19012.png", 45 | "A215.png", 46 | "A20801.png", 47 | "A22541.png", 48 | "A22740.png", 49 | "A22290.png", 50 | "A21194.png", 51 | "A22899.png", 52 | "A22381.png", 53 | "A19650.png", 54 | "A26050.png", 55 | "A24554.png", 56 | "A309.png", 57 | "A26100.png", 58 | "A24084.png", 59 | "A2508.png", 60 | "A25709.png", 61 | "A26620.png", 62 | "A2594.png", 63 | "A24920.png", 64 | "A27604.png", 65 | "A27933.png", 66 | "A25019.png", 67 | "A26091.png", 68 | "A2432.png", 69 | "A2493.png", 70 | "A2755.png", 71 | "A23883.png", 72 | "A3437.png", 73 | "A27102.png", 74 | "A314.png", 75 | "A25250.png", 76 | "A25612.png", 77 | "A24169.png", 78 | "A28034.png", 79 | "A3210.png", 80 | "A26760.png", 81 | "A2538.png", 82 | "A2974.png", 83 | "A24609.png", 84 | "A20.png", 85 | "A24062.png", 86 | "A3223.png", 87 | "A24311.png", 88 | "A26699.png", 89 | "A23992.png", 90 | "A26012.png", 91 | "A2175.png", 92 | "A24603.png", 93 | "A27172.png", 94 | "A2004.png", 95 | "A20100.png", 96 | "A3808.png", 97 | "A4943.png", 98 | "A441.png", 99 | "A7366.png", 100 | "A7242.png", 101 | "A7697.png", 102 | "A5932.png", 103 | "A5298.png", 104 | "A5823.png", 105 | "A3937.png", 106 | "A6152.png", 107 | "A4141.png", 108 | "A4141.png", 109 | "A7360.png", 110 | "A5923.png", 111 | "A3945.png", 112 | "A6821.png", 113 | "A3852.png", 114 | "A6252.png", 115 | "A4188.png", 116 | "A3681.png", 117 | "A4947.png", 118 | "A6011.png", 119 | "A5304.png", 120 | "A5304.png", 121 | "A4979.png", 122 | "A7861.png", 123 | "A5400.png", 124 | "A4611.png", 125 | "A4883.png", 126 | "A368.png", 127 | "A4347.png", 128 | "A7100.png", 129 | "A4956.png", 130 | "A7133.png", 131 | "A6237.png", 132 | "A2923.png", 133 | "A3235.png", 134 | "A26992.png", 135 | "A4642.png", 136 | "A27272.png", 137 | "A5920.png", 138 | "A21381.png", 139 | "A6608.png", 140 | "A4419.png", 141 | "A23613.png", 142 | "A27283.png", 143 | "A24464.png", 144 | "A25601.png", 145 | "A7814.png", 146 | "A24509.png", 147 | "A24304.png", 148 | "A6120.png", 149 | "A3495.png", 150 | "A3949.png", 151 | "A24379.png", 152 | "A20111.png", 153 | "A22962.png", 154 | "A8877.png", 155 | "A8936.png", 156 | "A9761.png", 157 | "A8741.png", 158 | "A9064.png", 159 | "A8370.png", 160 | "A9829.png", 161 | "A9018.png", 162 | "B1879_1.png", 163 | "A9354.png", 164 | "A8201.png", 165 | "B1813_3.png", 166 | "A8350.png", 167 | "A8353.png", 168 | "A9446.png", 169 | "B1879_0.png", 170 | "A8674.png", 171 | "A9219.png", 172 | "B2404_2.png", 173 | "T1175_12.png", 174 | "B935_8.png", 175 | "T1140_5.png", 176 | "B523_1.png", 177 | "B523_5.png", 178 | "T1175_15.png", 179 | "B523_0.png", 180 | "T1058_17.png", 181 | "T1089_3.png", 182 | "B2741_0.png", 183 | "T1036_1.png", 184 | "T1184_4.png", 185 | "T129_18.png", 186 | "T134_7.png", 187 | "T142_4.png", 188 | "T144_4.png", 189 | "T169_10.png", 190 | "T169_6.png", 191 | "T174_0.png", 192 | "T200_0.png", 193 | "T20_8.png", 194 | "T217_6.png", 195 | "T217_8.png", 196 | "T230_6.png", 197 | "T235_12.png", 198 | "T23_4.png", 199 | "T244_5.png", 200 | "T247_8.png", 201 | "T261_8.png", 202 | "T270_12.png", 203 | "T294_4.png", 204 | "T300_12.png", 205 | "T302_1.png", 206 | "T311_11.png", 207 | "T321_11.png", 208 | "T321_3.png", 209 | "T321_9.png", 210 | "T324_2.png", 211 | "T327_5.png", 212 | "T328_7.png", 213 | "T329_1.png", 214 | "T331_0.png", 215 | "T340_1.png", 216 | "T352_4.png", 217 | "T352_8.png", 218 | "T356_5.png", 219 | "T358_7.png", 220 | "T359_6.png", 221 | "T366_12.png", 222 | "T372_4.png", 223 | "T374_5.png", 224 | "T374_6.png", 225 | "T381_2.png", 226 | "T381_4.png", 227 | "T381_6.png", 228 | "T382_3.png", 229 | "T387_1.png", 230 | "T389_14.png", 231 | "T38_1.png", 232 | "T38_4.png", 233 | "T396_6.png", 234 | "T3_8.png", 235 | "T403_9.png", 236 | "T409_0.png", 237 | "T40_14.png", 238 | "T40_15.png", 239 | "T40_2.png", 240 | "T40_6.png", 241 | "T411_0.png", 242 | "T411_5.png", 243 | "T41_1.png", 244 | "T50_16.png", 245 | "T50_19.png", 246 | "T53_13.png", 247 | "T53_9.png", 248 | "T580_4.png", 249 | "T580_5.png", 250 | "T582_3.png", 251 | "T582_5.png", 252 | "T583_4.png", 253 | "T586_1.png", 254 | "T58_0.png", 255 | "T58_3.png", 256 | "T58_7.png", 257 | "T597_0.png", 258 | "T602_7.png", 259 | "T602_9.png", 260 | "T607_0.png", 261 | "T619_12.png", 262 | "T619_6.png", 263 | "T619_7.png", 264 | "T636_0.png", 265 | "T636_4.png", 266 | "T642_1.png", 267 | "T647_18.png", 268 | "T647_2.png", 269 | "T647_20.png", 270 | "T64_0.png", 271 | "T64_1.png", 272 | "T658_11.png", 273 | "T658_6.png", 274 | "T663_11.png", 275 | "T66_3.png", 276 | "T677_6.png", 277 | "T693_0.png", 278 | "T693_9.png", 279 | "T695_0.png", 280 | "T710_14.png", 281 | "T711_5.png", 282 | "T712_4.png", 283 | "T71_0.png", 284 | "T71_1.png", 285 | "T71_2.png", 286 | "T71_3.png", 287 | "T71_4.png", 288 | "T71_5.png", 289 | "T71_6.png", 290 | "T71_7.png", 291 | "T71_8.png", 292 | "T71_9.png", 293 | "T724_10.png", 294 | "T725_11.png", 295 | "T726_1.png", 296 | "T734_2.png", 297 | "T736_15.png", 298 | "T736_2.png", 299 | "T736_5.png", 300 | "T740_2.png", 301 | "T745_1.png", 302 | "T756_7.png", 303 | "T757_4.png", 304 | "T762_0.png", 305 | "T767_1.png", 306 | "T767_6.png", 307 | "T770_0.png", 308 | "T770_10.png", 309 | "T770_11.png", 310 | "T770_6.png", 311 | "T772_11.png", 312 | "T775_9.png", 313 | "T77_1.png", 314 | "T795_1.png", 315 | "T795_11.png", 316 | "T7_7.png", 317 | "T7_9.png", 318 | "T803_6.png", 319 | "T803_7.png", 320 | "T810_5.png", 321 | "T810_6.png", 322 | "T810_7.png", 323 | "T813_5.png", 324 | "T823_4.png", 325 | "T823_5.png", 326 | "T840_6.png", 327 | "T844_9.png", 328 | "T848_1.png", 329 | "T855_2.png", 330 | "T856_18.png", 331 | "T856_2.png", 332 | "T856_4.png", 333 | "T865_6.png", 334 | "T86_1.png", 335 | "T86_6.png", 336 | "T879_6.png", 337 | "T884_4.png", 338 | "T886_1.png", 339 | "T898_8.png", 340 | "T913_14.png", 341 | "T915_4.png", 342 | "T919_1.png", 343 | "T932_3.png", 344 | "T945_12.png", 345 | "T945_13.png", 346 | "T945_15.png", 347 | "T945_16.png", 348 | "T945_17.png", 349 | "T945_18.png", 350 | "T945_8.png", 351 | "T963_1.png", 352 | "T96_5.png", 353 | "T96_6.png", 354 | "T972_6.png", 355 | "T979_13.png", 356 | "T994_4.png", 357 | "T997_10.png", 358 | "T999_7.png", 359 | "T106_1.png", 360 | "T188_11.png", 361 | "T763_1.png", 362 | "T763_2.png", 363 | "T865_0.png", 364 | "T876_9.png", 365 | "T999_3.png", 366 | "A1007.png", 367 | "A1264.png", 368 | "A14912.png", 369 | "A15901.png", 370 | "A17682.png", 371 | "A20064.png", 372 | "A24631.png", 373 | "A2751.png", 374 | "A4189.png", 375 | "A9707.png", 376 | "B2436_1.png", 377 | "B2861_1.png", 378 | "T1027_2.png", 379 | "T151_8.png", 380 | "T165_6.png", 381 | "T207_12.png", 382 | "T217_1.png", 383 | "T217_3.png", 384 | "T261_1.png", 385 | "T261_2.png", 386 | "T311_7.png", 387 | "T320_5.png", 388 | "T325_1.png", 389 | "T329_8.png", 390 | "T333_6.png", 391 | "T342_5.png", 392 | "T350_10.png", 393 | "T350_2.png", 394 | "T387_2.png", 395 | "T387_4.png", 396 | "T389_10.png", 397 | "T3_7.png", 398 | "T4_7.png", 399 | "T58_8.png", 400 | "T597_9.png", 401 | "T59_6.png", 402 | "T5_0.png", 403 | "T5_1.png", 404 | "T619_13.png", 405 | "T624_2.png", 406 | "T636_1.png", 407 | "T647_21.png", 408 | "T658_13.png", 409 | "T663_3.png", 410 | "T667_2.png", 411 | "T684_5.png", 412 | "T684_8.png", 413 | "T687_2.png", 414 | "T6_11.png", 415 | "T731_5.png", 416 | "T735_1.png", 417 | "T756_12.png", 418 | "T757_3.png", 419 | "T795_13.png", 420 | "T838_9.png", 421 | "T856_19.png", 422 | "T857_0.png", 423 | "T86_7.png", 424 | "T886_0.png", 425 | "T898_9.png", 426 | "T933_5.png", 427 | "T997_7.png", 428 | "T374_0.png", 429 | "A24374.png", 430 | "T799_1.png", 431 | "T398_7.png", 432 | "T949_8.png", 433 | "T789_1.png", 434 | "T1009_1.png", 435 | "T4_5.png", 436 | "T816_0.png", 437 | "T159_6.png", 438 | "A14243.png", 439 | "A1610.png", 440 | "A6442.png", 441 | "A3354.png", 442 | "T690_4.png", 443 | "A4609.png", 444 | "T1077_7.png", 445 | "A5355.png", 446 | "T73_3.png", 447 | "A3692.png", 448 | "A9506.png", 449 | "A14812.png", 450 | "T936_8.png", 451 | "T1071_5.png", 452 | "T216_7.png", 453 | "T1105_2.png", 454 | "T261_3.png", 455 | "A21449.png", 456 | "T124_13.png", 457 | "T583_6.png", 458 | "T942_7.png", 459 | "B1442_9.png", 460 | "B968_3.png", 461 | "T401_6.png", 462 | "T230_10.png", 463 | "A2143.png", 464 | "A2143.png", 465 | "A9643.png", 466 | "T587_1.png", 467 | "A24620.png", 468 | "T934_0.png", 469 | "A2433.png", 470 | "T881_5.png", 471 | "T931_24.png", 472 | "B858_2.png", 473 | "T1009_0.png", 474 | "T270_14.png", 475 | "T181_18.png", 476 | "T1071_6.png", 477 | "A4674.png", 478 | "A16263.png", 479 | "A6368.png", 480 | "T1134_7.png", 481 | "A7325.png", 482 | "T174_5.png", 483 | "B685_0.png", 484 | "T285_2.png", 485 | "A20784.png", 486 | "A19004.png", 487 | "A2612.png", 488 | "T374_8.png", 489 | "B2681_2.png", 490 | "A26479.png", 491 | "B1958_0.png", 492 | "T312_1.png", 493 | "A1268.png", 494 | "A798.png", 495 | "A7143.png", 496 | "B121_0.png", 497 | "A20795.png", 498 | "A21802.png", 499 | "A2295.png", 500 | "A4076.png", 501 | "A3121.png", 502 | "A27044.png", 503 | "T684_6.png", 504 | "A6189.png", 505 | "T723_3.png", 506 | "T218_9.png", 507 | "T279_5.png", 508 | "A4335.png", 509 | "T634_7.png", 510 | "T870_2.png", 511 | "A4889.png" 512 | ], 513 | "black_list": [ 514 | "A14430.png", 515 | "A1315.png", 516 | "A1573.png", 517 | "A16342.png", 518 | "A18403.png", 519 | "A18610.png", 520 | "A19289.png", 521 | "A1945.png", 522 | "A19462.png", 523 | "A19233.png", 524 | "A23543.png", 525 | "A22742.png", 526 | "A22689.png", 527 | "A20253.png", 528 | "A19845.png", 529 | "A20654.png", 530 | "A475.png", 531 | "B1339_5.png", 532 | "B1462_3.png", 533 | "B1339_4.png", 534 | "B1014_0.png", 535 | "B1610_7.png", 536 | "B1864_0.png", 537 | "B1864_1.png", 538 | "B1141_13.png", 539 | "B1884_0.png", 540 | "B1141_3.png", 541 | "B1721_0.png", 542 | "B1252_1.png", 543 | "B1877_3.png", 544 | "B1801_3.png", 545 | "B1422_1.png", 546 | "B1387_2.png", 547 | "B1339_3.png", 548 | "B1007_0.png", 549 | "B1131_0.png", 550 | "B1252_0.png", 551 | "B1141_2.png", 552 | "B1141_14.png", 553 | "B1652_1.png", 554 | "B1422_3.png", 555 | "B1141_8.png", 556 | "B1652_2.png", 557 | "B1052_2.png", 558 | "B1141_4.png", 559 | "B1141_6.png", 560 | "B1422_5.png", 561 | "B1339_6.png", 562 | "B1462_2.png", 563 | "B1410_0.png", 564 | "B1422_4.png", 565 | "B1339_7.png", 566 | "B1864_3.png", 567 | "B1387_1.png", 568 | "B1864_4.png", 569 | "B1864_2.png", 570 | "B1339_2.png", 571 | "B1801_2.png", 572 | "B1877_2.png", 573 | "B1052_1.png", 574 | "B1462_1.png", 575 | "B1877_1.png", 576 | "B1387_0.png", 577 | "B1387_3.png", 578 | "B1566_0.png", 579 | "B1141_12.png", 580 | "B2756_5.png", 581 | "B245_0.png", 582 | "B2530_5.png", 583 | "B2999_3.png", 584 | "B227_1.png", 585 | "B2411_0.png", 586 | "B364_0.png", 587 | "B2530_8.png", 588 | "B552_0.png", 589 | "B535_4.png", 590 | "B2756_7.png", 591 | "B2999_6.png", 592 | "B631_0.png", 593 | "B245_3.png", 594 | "B2043_0.png", 595 | "B2513_0.png", 596 | "B2557_4.png", 597 | "B3090_4.png", 598 | "B2982_16.png", 599 | "B2267_0.png", 600 | "B2092_3.png", 601 | "B364_1.png", 602 | "B2999_5.png", 603 | "T103_5.png", 604 | "B759_4.png", 605 | "T1070_1.png", 606 | "T1095_4.png", 607 | "B866_10.png", 608 | "B762_4.png", 609 | "T1135_6.png", 610 | "T1108_9.png", 611 | "T1056_4.png", 612 | "T1061_2.png", 613 | "T105_9.png", 614 | "T1135_7.png", 615 | "T1174_7.png", 616 | "B866_6.png", 617 | "T1070_4.png", 618 | "B866_1.png", 619 | "B866_4.png", 620 | "B2999_2.png", 621 | "T107_3.png", 622 | "B227_0.png", 623 | "B866_0.png", 624 | "B2412_0.png", 625 | "B2982_6.png", 626 | "B2999_11.png", 627 | "B2993_6.png", 628 | "T1070_2.png", 629 | "T1070_12.png", 630 | "B245_1.png", 631 | "B3007_0.png", 632 | "B2267_2.png", 633 | "T1112_10.png", 634 | "B535_3.png", 635 | "B552_1.png", 636 | "B2092_4.png", 637 | "B245_2.png", 638 | "T1070_3.png", 639 | "B421_0.png", 640 | "B2557_6.png", 641 | "T1070_0.png", 642 | "B2530_3.png", 643 | "B2092_2.png", 644 | "B762_5.png", 645 | "B2557_5.png", 646 | "B2511_2.png", 647 | "B759_3.png", 648 | "B2982_14.png", 649 | "T1135_4.png", 650 | "B2530_6.png", 651 | "B227_2.png", 652 | "B2215_0.png", 653 | "B2530_9.png", 654 | "B2982_7.png", 655 | "T1070_8.png", 656 | "B2176_0.png", 657 | "B759_2.png", 658 | "B2999_0.png", 659 | "B762_6.png", 660 | "B2982_5.png", 661 | "T1052_4.png", 662 | "B2412_2.png", 663 | "B634_0.png", 664 | "B552_2.png", 665 | "B2999_10.png", 666 | "B762_0.png", 667 | "B2982_4.png", 668 | "B2999_4.png", 669 | "B2452_0.png", 670 | "B866_3.png", 671 | "B2567_0.png", 672 | "B2703_2.png", 673 | "B364_2.png", 674 | "B2557_7.png", 675 | "T1155_3.png", 676 | "B2252_0.png", 677 | "B2999_1.png", 678 | "T1046_3.png", 679 | "T1135_0.png", 680 | "B2530_14.png", 681 | "B227_3.png", 682 | "B552_3.png", 683 | "B866_2.png", 684 | "B2557_3.png", 685 | "B216_0.png", 686 | "B2412_1.png", 687 | "B2530_13.png", 688 | "B2649_0.png", 689 | "B2748_0.png", 690 | "B2748_1.png", 691 | "B2756_4.png", 692 | "B2982_2.png", 693 | "B2982_3.png", 694 | "B2993_1.png", 695 | "B2993_5.png", 696 | "B3084_0.png", 697 | "B3090_0.png", 698 | "B535_8.png", 699 | "B699_0.png", 700 | "B803_0.png", 701 | "T1071_1.png", 702 | "T1112_9.png", 703 | "T1135_8.png", 704 | "T117_2.png", 705 | "T121_2.png", 706 | "T127_4.png", 707 | "T129_1.png", 708 | "T129_13.png", 709 | "T150_8.png", 710 | "T169_12.png", 711 | "T182_12.png", 712 | "T183_5.png", 713 | "T188_6.png", 714 | "T19_2.png", 715 | "T206_13.png", 716 | "T206_8.png", 717 | "T216_9.png", 718 | "T230_7.png", 719 | "T233_11.png", 720 | "T258_3.png", 721 | "T260_3.png", 722 | "T288_0.png", 723 | "T288_11.png", 724 | "T288_9.png", 725 | "T294_9.png", 726 | "T300_11.png", 727 | "T319_8.png", 728 | "T32_0.png", 729 | "T32_1.png", 730 | "T32_2.png", 731 | "T32_4.png", 732 | "T32_5.png", 733 | "T32_6.png", 734 | "T32_9.png", 735 | "T330_8.png", 736 | "T340_10.png", 737 | "T344_3.png", 738 | "T357_5.png", 739 | "T377_3.png", 740 | "T389_6.png", 741 | "T38_2.png", 742 | "T390_8.png", 743 | "T398_9.png", 744 | "T3_0.png", 745 | "T3_1.png", 746 | "T3_6.png", 747 | "T4_4.png", 748 | "T50_7.png", 749 | "T53_11.png", 750 | "T53_14.png", 751 | "T593_5.png", 752 | "T597_8.png", 753 | "T59_7.png", 754 | "T608_4.png", 755 | "T648_4.png", 756 | "T659_7.png", 757 | "T668_4.png", 758 | "T668_5.png", 759 | "T668_6.png", 760 | "T668_7.png", 761 | "T668_8.png", 762 | "T688_8.png", 763 | "T691_4.png", 764 | "T701_11.png", 765 | "T710_7.png", 766 | "T747_0.png", 767 | "T747_1.png", 768 | "T747_10.png", 769 | "T749_1.png", 770 | "T74_1.png", 771 | "T74_7.png", 772 | "T760_6.png", 773 | "T765_1.png", 774 | "T767_9.png", 775 | "T781_1.png", 776 | "T781_11.png", 777 | "T781_12.png", 778 | "T781_13.png", 779 | "T781_14.png", 780 | "T781_15.png", 781 | "T781_16.png", 782 | "T781_17.png", 783 | "T781_2.png", 784 | "T781_21.png", 785 | "T781_3.png", 786 | "T781_4.png", 787 | "T781_5.png", 788 | "T781_6.png", 789 | "T781_7.png", 790 | "T792_1.png", 791 | "T792_2.png", 792 | "T792_3.png", 793 | "T792_4.png", 794 | "T792_6.png", 795 | "T792_7.png", 796 | "T794_3.png", 797 | "T803_8.png", 798 | "T813_4.png", 799 | "T817_11.png", 800 | "T817_13.png", 801 | "T826_1.png", 802 | "T84_0.png", 803 | "T84_10.png", 804 | "T84_12.png", 805 | "T84_13.png", 806 | "T84_14.png", 807 | "T84_15.png", 808 | "T84_4.png", 809 | "T84_5.png", 810 | "T85_13.png", 811 | "T865_5.png", 812 | "T86_2.png", 813 | "T881_8.png", 814 | "T886_11.png", 815 | "T914_11.png", 816 | "T931_12.png", 817 | "T931_13.png", 818 | "T949_3.png", 819 | "T96_9.png", 820 | "T997_5.png", 821 | "T997_9.png", 822 | "B1052_0.png", 823 | "B1141_15.png", 824 | "B1141_5.png", 825 | "B1141_7.png", 826 | "B1422_2.png", 827 | "B1652_0.png", 828 | "B2166_0.png", 829 | "B2267_1.png", 830 | "B2530_15.png", 831 | "B2530_16.png", 832 | "B2530_17.png", 833 | "B2530_4.png", 834 | "B2530_7.png", 835 | "B2649_1.png", 836 | "B2756_3.png", 837 | "B2756_6.png", 838 | "B2865_0.png", 839 | "B2865_1.png", 840 | "B2982_12.png", 841 | "B2982_13.png", 842 | "B2982_15.png", 843 | "B2982_8.png", 844 | "B2993_0.png", 845 | "B3064_0.png", 846 | "B3126_0.png", 847 | "B421_1.png", 848 | "B535_0.png", 849 | "B535_1.png", 850 | "B535_2.png", 851 | "B866_5.png", 852 | "T1031_2.png", 853 | "T106_5.png", 854 | "T1070_10.png", 855 | "T1070_11.png", 856 | "T1070_9.png", 857 | "T1135_5.png", 858 | "T1164_6.png", 859 | "T143_2.png", 860 | "T15_4.png", 861 | "T221_4.png", 862 | "T279_1.png", 863 | "T288_1.png", 864 | "T288_10.png", 865 | "T288_12.png", 866 | "T288_13.png", 867 | "T288_14.png", 868 | "T288_15.png", 869 | "T288_16.png", 870 | "T288_17.png", 871 | "T288_2.png", 872 | "T288_3.png", 873 | "T288_4.png", 874 | "T288_5.png", 875 | "T288_6.png", 876 | "T288_7.png", 877 | "T288_8.png", 878 | "T321_10.png", 879 | "T321_6.png", 880 | "T32_3.png", 881 | "T32_7.png", 882 | "T32_8.png", 883 | "T352_6.png", 884 | "T354_4.png", 885 | "T41_2.png", 886 | "T53_0.png", 887 | "T668_2.png", 888 | "T668_3.png", 889 | "T66_1.png", 890 | "T747_11.png", 891 | "T747_5.png", 892 | "T747_6.png", 893 | "T747_7.png", 894 | "T747_8.png", 895 | "T747_9.png", 896 | "T773_0.png", 897 | "T792_5.png", 898 | "T83_2.png", 899 | "T84_1.png", 900 | "T84_11.png", 901 | "T84_2.png", 902 | "T84_3.png", 903 | "T84_6.png", 904 | "T84_7.png", 905 | "T84_8.png", 906 | "T84_9.png", 907 | "T95_2.png", 908 | "T302_3.png", 909 | "T404_3.png", 910 | "T714_3.png", 911 | "T723_4.png", 912 | "T931_9.png", 913 | "T825_1.png", 914 | "T285_3.png", 915 | "T835_23.png", 916 | "T295_3.png", 917 | "T302_10.png", 918 | "B2069_1.png", 919 | "T76_5.png", 920 | "T1172_8.png", 921 | "T1134_17.png", 922 | "T295_4.png", 923 | "A9514.png", 924 | "T765_9.png", 925 | "T1020_0.png", 926 | "T933_8.png", 927 | "T329_0.png", 928 | "T346_4.png", 929 | "T820_0.png", 930 | "T761_1.png", 931 | "T783_0.png", 932 | "T917_0.png", 933 | "T238_8.png", 934 | "T216_17.png", 935 | "T931_0.png", 936 | "T374_9.png", 937 | "T1026_13.png", 938 | "T183_7.png", 939 | "T769_2.png", 940 | "T176_1.png", 941 | "T945_22.png", 942 | "T247_0.png", 943 | "T981_3.png", 944 | "T322_4.png", 945 | "T137_0.png", 946 | "T35_3.png", 947 | "T355_4.png", 948 | "T877_0.png", 949 | "T776_0.png", 950 | "T388_8.png", 951 | "T207_16.png", 952 | "T196_8.png", 953 | "T929_10.png", 954 | "T1076_10.png", 955 | "T229_0.png", 956 | "T927_5.png", 957 | "T760_0.png", 958 | "T18_0.png", 959 | "T928_9.png", 960 | "T230_12.png", 961 | "T807_5.png", 962 | "T129_0.png", 963 | "T164_9.png", 964 | "T240_0.png", 965 | "T354_3.png", 966 | "T912_0.png", 967 | "T366_6.png", 968 | "T231_5.png", 969 | "T179_0.png", 970 | "T82_2.png", 971 | "T191_7.png", 972 | "T243_1.png", 973 | "T207_9.png", 974 | "T1007_4.png", 975 | "A25503.png", 976 | "T834_2.png", 977 | "T371_0.png", 978 | "T770_8.png", 979 | "T349_3.png", 980 | "T995_9.png", 981 | "T1016_2.png", 982 | "T227_8.png", 983 | "T771_3.png", 984 | "T1034_5.png", 985 | "T755_2.png", 986 | "A24951.png", 987 | "T207_14.png", 988 | "T935_7.png", 989 | "A8455.png", 990 | "T684_9.png", 991 | "T989_5.png", 992 | "A6750.png", 993 | "A631.png", 994 | "A8794.png", 995 | "A25351.png", 996 | "A7508.png", 997 | "A2718.png", 998 | "A26059.png", 999 | "T1134_4.png", 1000 | "A254.png", 1001 | "A9840.png", 1002 | "A511.png", 1003 | "A20920.png", 1004 | "A15739.png", 1005 | "A874.png", 1006 | "A21434.png", 1007 | "A22940.png", 1008 | "A26491.png", 1009 | "A5271.png", 1010 | "A19883.png", 1011 | "A7578.png", 1012 | "T124_8.png", 1013 | "T919_2.png", 1014 | "A25805.png", 1015 | "A5626.png", 1016 | "A25754.png", 1017 | "A8109.png", 1018 | "A20859.png", 1019 | "A5111.png", 1020 | "A5019.png", 1021 | "A26234.png", 1022 | "A20231.png", 1023 | "A26382.png", 1024 | "A5864.png", 1025 | "A22103.png", 1026 | "A26384.png", 1027 | "A3577.png", 1028 | "T684_10.png", 1029 | "A22322.png", 1030 | "A3374.png", 1031 | "T1037_4.png", 1032 | "A4999.png", 1033 | "A5769.png", 1034 | "A27040.png", 1035 | "T234_7.png", 1036 | "T725_10.png", 1037 | "T302_15.png", 1038 | "T688_9.png", 1039 | "T6_6.png", 1040 | "A296.png", 1041 | "A21322.png", 1042 | "T251_4.png", 1043 | "A8899.png" 1044 | ] 1045 | } 1046 | -------------------------------------------------------------------------------- /files/src/A81.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/files/src/A81.png -------------------------------------------------------------------------------- /files/src/B1000_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/files/src/B1000_0.png -------------------------------------------------------------------------------- /files/ttf/simsun.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/files/ttf/simsun.ttf -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | Pillow 2 | fuzzywuzzy 3 | numpy==1.14.2 4 | tqdm==4.19.4 5 | scikit-image==0.13.0 6 | scikit-learn==0.19.1 7 | torchvision==0.2.0 8 | scipy==0.19.0 9 | matplotlib==2.0.2 10 | --------------------------------------------------------------------------------