├── .gitignore
├── README.md
├── code
├── ocr
│ ├── dataloader.py
│ ├── densenet.py
│ ├── main.py
│ ├── resnet.py
│ └── tools
│ │ ├── __init__.py
│ │ ├── measures.py
│ │ ├── parse.py
│ │ ├── plot.py
│ │ ├── py_op.py
│ │ ├── segmentation.py
│ │ └── utils.py
└── preprocessing
│ ├── analysis_dataset.py
│ ├── map_word_to_index.py
│ └── show_black.py
├── files
├── alphabet_count_dict.json
├── alphabet_index_dict.json
├── black.json
├── image_hw_ratio_dict.json
├── src
│ ├── A81.png
│ └── B1000_0.png
├── train.csv
├── train_alphabet.json
└── ttf
│ └── simsun.ttf
└── requirement.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 | data/
106 | result/
107 | results/
108 | tmp.py
109 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OCR
2 | [第一届西安交通大学人工智能实践大赛(2018AI实践大赛--图片文字识别)](http://competition.heils.cn/main.html)冠军
3 |
4 |
5 | # 模型结果
6 | 该比赛计算每一个条目的f1score,取所有条目的平均,具体计算方式在[这里](http://competition.heils.cn/main.html)。这里的计算方式不对一句话里的相同文字重复计算,故f1score比提交的最终结果低:
7 |
8 | | - | train | val |
9 | | :----------------: | :----------------: | :----------------: |
10 | | f1score | 0.9911 | 0.9582 |
11 | | recall | 0.9943 | 0.9574 |
12 | | precision | 0.9894 | 0.9637 |
13 |
14 | # 模型说明
15 | 1. 模型
16 |
17 | 采用densenet结构,模型输入为(64×512)的图片,输出为(8×64×2159)的概率。
18 |
19 | 将图片划分为多个(8×8)的方格,在每个方格预测2159个字符的概率。
20 |
21 | 2. Loss
22 |
23 | 将(8×64×2159)的概率沿着长宽方向取最大值,得到(2159)的概率,表示这张图片里有对应字符的概率。
24 |
25 | balance: 对正例和负例分别计算loss,使得正例loss权重之和与负例loss权重之和相等,解决数据不平衡的问题。
26 |
27 | hard-mining
28 |
29 | 3. 文字检测
30 | 将(8×64×2159)的概率沿着宽方向取最大值,得到(64×2159)的概率。
31 | 沿着长方向一个个方格预测文字,然后连起来可得到一句完整的语句。
32 |
33 | 存在问题:两个连续的文字无法重复检测
34 |
35 | 下图是一个文字识别正确的示例:的长为半径作圆
36 |
37 |
38 |
39 | 下图是一个文字识别错误的示例:为10元;经粗加工后销售,每
40 |
41 |
42 |
43 |
44 | # 文件目录
45 | ocr
46 | |
47 | |--code
48 | |
49 | |--files
50 | | |
51 | | |--train.csv
52 | |
53 | |--data
54 | |
55 | |--dataset
56 | | |
57 | | |--train
58 | | |
59 | | |--test
60 | |
61 | |--result
62 | | |
63 | | |--test_result.csv
64 | |
65 | |--images 此文件夹放置任何图片均可,我放的celebA数据集用作pretrain
66 |
67 | # 运行环境
68 | Ubuntu16.04, python2.7, CUDA9.0
69 |
70 | 安装[pytorch](https://pytorch.org/), 推荐版本: 0.2.0_3
71 | ```
72 | pip install -r requirement.txt
73 | ```
74 |
75 | # 下载数据
76 | 从[这里](https://pan.baidu.com/s/1w0iEE7q84IolmZXwttOxVw)下载初赛、复赛数据、模型,合并训练集、测试集。
77 |
78 |
79 | # 预处理
80 | 如果不更换数据集,不需要执行这一步。
81 |
82 | 如果更换其他数据集,一并更换 files/train.csv
83 | ```
84 | cd code/preprocessing
85 | python map_word_to_index.py
86 | python analysis_dataset.py
87 | ```
88 |
89 | # 训练
90 | ```
91 | cd code/ocr
92 | python main.py
93 | ```
94 |
95 | # 测试
96 | f1score在0.9以下,lr=0.001,不使用hard-mining;
97 |
98 | f1score在0.9以上,lr=0.0001,使用hard-mining;
99 |
100 | 生成的model保存在不同的文件夹里。
101 | ```
102 | cd code/ocr
103 | python main.py --phase test --resume ../../data/models-small/densenet/eval-16-1/best_f1score.ckpt
104 | ```
105 |
--------------------------------------------------------------------------------
/code/ocr/dataloader.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | """
4 | Read images and corresponding labels.
5 | """
6 |
7 | import numpy as np
8 | import os
9 | import json
10 | # import skimage
11 | # from skimage import io
12 | from PIL import Image,ImageDraw,ImageFont,ImageFilter
13 | from torch.utils.data import Dataset
14 | import time
15 |
16 | filters = [
17 | ImageFilter.SMOOTH, # 平滑,大于16可以用
18 | ImageFilter.SMOOTH_MORE, # 平滑,大于16可以用
19 | ImageFilter.GaussianBlur(radius=1), # 大于16可以用
20 |
21 | ImageFilter.GaussianBlur(radius=2), # 大于32可以用
22 | ImageFilter.BLUR, # 大于32可以用
23 | ]
24 |
25 | def histeq (im,nbr_bins =256):
26 | # 对一副灰度图像进行直方图均衡化
27 | #该函数有两个输入参数,一个是灰度图像,一个是直方图中使用小区间的数目
28 | #函数返回直方图均衡化后的图像,以及用来做像素值映射的累计分布函数
29 | # 计算图像的直方图
30 | imhist,bins =np.histogram(im.flatten(),nbr_bins,normed=True)
31 | cdf =imhist.cumsum() #cumulative distribution function
32 | cdf =255*cdf/cdf[-1] #归一化,函数中使用累计分布函数的最后一个元素(下标为-1,目标是
33 | # 将其归一化到0-1范围 )
34 | # 使用累计分布函数的线性插值,计算新的像素值
35 | im2=np.interp(im.flatten(),bins[:-1],cdf) # im2 is an array
36 | return im2.reshape(im.shape),cdf
37 |
38 |
39 | class DataSet(Dataset):
40 | def __init__(self,
41 | image_names,
42 | image_label_dict,
43 | class_num,
44 | transform=None,
45 | image_size=None, # 最后生成的图片大小
46 | word_index_dict=None, # 字符与index的对应
47 | phase='train', # phase
48 | args=None, # 全局参数
49 | font_range=None, # 生成字符大小范围
50 | rotate_range=None, # 图片旋转范围
51 | margin=None # 图片边缘不覆盖字符,以免旋转时候丢失
52 | ):
53 |
54 | self.font_range = font_range
55 | self.rotate_range = rotate_range
56 | self.margin = margin
57 | self.image_names = image_names
58 | self.image_label_dict = image_label_dict
59 | self.transform = transform
60 | self.phase = phase
61 | self.class_num = class_num
62 | self.word_labels = { }
63 | self.image_size = image_size
64 | self.word_index_dict = word_index_dict
65 | self.args = args
66 | if self.phase != 'pretrain':
67 | for image_name in image_names:
68 | image_name = image_name.split('/')[-1]
69 | if image_name not in image_label_dict:
70 | try:
71 | image_label_dict[image_name] = image_label_dict[image_name.replace('seg.','').split('.png')[0]+'.png']
72 | except:
73 | image_label_dict[image_name] = ''
74 | word_label = np.zeros(class_num)
75 | label = image_label_dict[image_name]
76 | for l in label.split():
77 | word_label[int(l)] = 1
78 | self.word_labels[image_name] = word_label.astype(np.float32)
79 |
80 | def __getitem__(self, index):
81 | image_name = self.image_names[index]
82 | # print self.image_size
83 | if self.phase == 'pretrain':
84 | image = Image.open(image_name).convert('RGB')
85 | # 改变灰度
86 | image = np.array(image)
87 | r = get_random(index)
88 | # 通常背景为高亮度颜色
89 | if r < 0.3:
90 | min_rgb = 192.
91 | elif r < 0.7:
92 | min_rgb = 128.
93 | else:
94 | min_rgb = 64.
95 | if self.args.model == 'resnet':
96 | pass
97 | elif index % 2 == 0:
98 | image = image / (255. - min_rgb) + min_rgb
99 | else:
100 | image[image 0.5 and self.args.epoch > 35:
133 | noise_level = 10
134 | noise = np.random.random(image.shape) * noise_level - noise_level / 2.
135 | image = image + noise
136 | '''
137 | image = (image / 128. - 1).astype(np.float32)
138 |
139 | if font_size > 32:
140 | size_label = 1
141 | elif font_size < 16:
142 | size_label = 0
143 | else:
144 | size_label = 11
145 | size_label = np.array([size_label]).astype(np.float32)
146 |
147 | return image_name, image.astype(np.float32), label, bbox_label, seg_label, size_label
148 |
149 | elif self.phase == 'seg':
150 | # 保持和原图相同的分辨率
151 | image = Image.open(image_name).convert('RGB')
152 | # image_name = image_name.split('/')[-1]
153 | # image = image.resize(self.image_size)
154 | image = np.transpose(np.array(image), [2,0,1]).astype(np.float32)
155 | min_size = 32
156 | shape = (np.array(image.shape).astype(np.int32) / min_size) * min_size + min_size # * 2
157 | new_image = np.zeros([3, shape[1], shape[2]], dtype=np.float32)
158 | '''
159 | for i in range(3):
160 | gray = sorted(image[i].reshape(-1))
161 | gray = gray[len(gray)/2]
162 | new_image[i] = gray
163 | '''
164 | # new_image[:, min_size/2:image.shape[1]+min_size/2, min_size/2:image.shape[2]+min_size/2] = image
165 | new_image[:, :image.shape[1], :image.shape[2]] = image
166 | image = new_image
167 | # word_label = self.word_labels[image_name]
168 | image = (image / 128. - 1).astype(np.float32)
169 | return image_name, image, np.zeros(self.class_num, dtype=np.float32)
170 | else:
171 | seg_name = image_name.replace('train','seg.train').replace('test','seg.test') + '.seg.crop.png'
172 | no_aug = self.args.no_aug
173 | if os.path.exists(seg_name):
174 | # image, word_label = random_crop_image(seg_name, self.image_label_dict[image_name.split('/')[-1]], self.image_size, self.class_num, self.phase, index, no_aug)
175 | image, word_label = random_crop_image(image_name, self.image_label_dict[image_name.split('/')[-1]], self.image_size, self.class_num, self.phase, index, no_aug, self.args)
176 | else:
177 | image, word_label = random_crop_image(image_name, self.image_label_dict[image_name.split('/')[-1]], self.image_size, self.class_num, self.phase, index, no_aug, self.args)
178 |
179 | # 灰度反向翻转,变成黑底,白字
180 | if self.phase == 'train':
181 | r = get_random(index+111)
182 | if r < 0.1:
183 | image[0,:,:] = 255 - image[0,:,:]
184 | elif r < 0.2:
185 | image[1,:,:] = 255 - image[1,:,:]
186 | elif r < 0.3:
187 | image[2,:,:] = 255 - image[2,:,:]
188 | if get_random(index+112) < 0.2:
189 | image = 255. - image
190 |
191 | image = (image / 128. - 1).astype(np.float32)
192 | return image_name, image, word_label
193 |
194 | def __len__(self):
195 | return len(self.image_names)
196 |
197 | last_random = 10
198 | def get_random(idx):
199 | global last_random
200 | if last_random < 1:
201 | np.random.seed(int(last_random * 1000000 + time.time()) + idx)
202 | else:
203 | np.random.seed(int((time.time())))
204 | x = np.random.random()
205 | while np.abs(last_random - x) < 0.1:
206 | x = np.random.random()
207 | last_random = x
208 | return x
209 |
210 | def comput_iou(font, proposal):
211 | fx,fy,fh,fw = font
212 | px,py,pd = proposal
213 | overlap_x = max(min(pd, fh) - np.abs(fx - px), 0)
214 | overlap_y = max(min(pd, fw) - np.abs(fy - py), 0)
215 | # 面积
216 | sf = fh * fw
217 | sp = pd * pd
218 | so = overlap_x * overlap_y
219 | iou = float(so) / (sf + sp - so)
220 | return iou
221 |
222 | def generate_bbox_label(image, font_place, font_size, font_num, args, image_size):
223 | imgh,imgw = image.size
224 | seg_label = np.zeros((image_size[0]/2, image_size[1]/2), dtype=np.float32)
225 | sx = float(font_place[0]) / image.size[0] * image_size[0]
226 | ex = sx + float(font_size) / image.size[0] * image_size[0] * font_num
227 | sy = float(font_place[1]) / image.size[1] * image_size[1]
228 | ey = sy + float(font_size) / image.size[1] * image_size[1]
229 | seg_label[int(sx)/2:int(ex)/2, int(sy)/2:int(ey)/2] = 1
230 | seg_label = seg_label.transpose((1,0))
231 |
232 | bbox_label = np.zeros((
233 | image_size[0]/args.stride, # 16
234 | image_size[1]/args.stride, # 16
235 | len(args.anchors), # 4
236 | 4 # dx,dy,dd,c
237 | ), dtype=np.float32)
238 | fonts= []
239 | for i in range(font_num):
240 | x = font_place[0] + font_size/2. + i * font_size
241 | y = font_place[1] + font_size/2.
242 | h = font_size
243 | w = font_size
244 |
245 | x = float(x) * image_size[0] / imgh
246 | h = float(h) * image_size[0] / imgh
247 | y = float(y) * image_size[1] / imgw
248 | w = float(w) * image_size[1] / imgw
249 | fonts.append([x,y,h,w])
250 |
251 | # print bbox_label.shape
252 | for ix in range(bbox_label.shape[0]):
253 | for iy in range(bbox_label.shape[1]):
254 | for ia in range(bbox_label.shape[2]):
255 | proposal = [ix*args.stride + args.stride/2, iy*args.stride + args.stride/2, args.anchors[ia]]
256 | iou_fi = []
257 | for fi, font in enumerate(fonts):
258 | iou = comput_iou(font, proposal)
259 | iou_fi.append((iou, fi))
260 | max_iou, max_fi = sorted(iou_fi)[-1]
261 | if max_iou > 0.5:
262 | # 正例
263 | dx = (font[0] - proposal[0]) / float(proposal[2])
264 | dy = (font[1] - proposal[1]) / float(proposal[2])
265 | fd = max(font[2:])
266 | dd = np.log(fd / float(proposal[2]))
267 | # bbox_label[ix,iy,ia] = [dx, dy, dd, 1]
268 | bbox_label[ix,iy,ia] = [dx, dy, dd, 1]
269 | elif max_iou > 0.25:
270 | # 忽略
271 | bbox_label[ix,iy,ia,3] = 0
272 | else:
273 | # 负例
274 | bbox_label[ix,iy,ia,3] = -1
275 | # 这里有一个transpose操作
276 | bbox_label = bbox_label.transpose((1,0,2,3))
277 |
278 |
279 | # 计算anchor信息
280 | return bbox_label, seg_label
281 |
282 | def get_resize_para(size, idx):
283 | if size > 48:
284 | rh, rw = 4,4
285 | elif size > 32:
286 | if idx % 2:
287 | rh, rw = 2,4
288 | else:
289 | rh, rw = 4,2
290 | elif size > 16:
291 | if idx % 2:
292 | rh, rw = 1,2
293 | else:
294 | rh, rw = 2,1
295 | else:
296 | return 1,1
297 |
298 | rhs = range(rh)
299 | np.random.seed(int(time.time()) + idx + 1)
300 | np.random.shuffle(rhs)
301 | rh = rhs[0] + 1
302 |
303 | rws = range(rw)
304 | np.random.seed(int(time.time()) + idx + 2)
305 | np.random.shuffle(rws)
306 | rw = rws[0] + 1
307 |
308 | return rh, rw
309 |
310 | # def generate_image(idx, image, word_index_dict, class_num, args, image_size, no_aug, epoch):
311 | def generate_image( idx, image, no_aug, dataset):
312 | '''
313 | args.model == 'resnet' 的时候只是用于训练分割网络,大部分augmentation都不用
314 | 这里的注释,默认参数是
315 | image_size [512, 64]
316 | rotate_range [-5, 5]
317 | font_range [8,32]
318 | '''
319 |
320 | word_index_dict = dataset.word_index_dict
321 | class_num = dataset.class_num
322 | args = dataset.args
323 | image_size = dataset.image_size
324 | font_range = dataset.font_range
325 | rotate_range = dataset.rotate_range
326 | epoch = args.epoch
327 | margin = dataset.margin
328 |
329 | # 选择文字背景
330 | image = image.resize((1024,1024))
331 | h,w = image.size
332 | # 随机crop一个部分,resize成固定大小,会对文字有一定的水平竖直方向拉伸
333 | h_crop = int(get_random(idx + 10) * image_size[0] * 2 / 8) + image_size[0] * 6 / 8 # 长度范围 [374, 512]
334 | w_crop = int(get_random(idx + 11) * image_size[1] * 2 / 8) + image_size[1] * 6 / 8 # 宽度范围 [48, 64]
335 | if args.model == 'resnet' or no_aug or epoch < 60:
336 | # resnet: 分割网络采用固定大小crop
337 | # epoch<60: 网络训练初期采用固定大小,加速收敛
338 | h_crop = image_size[0]
339 | w_crop = image_size[1]
340 | # 选择文字背景,随机选择crop起始位置
341 | x = int(get_random(idx+12) * (h - h_crop))
342 | y = int(get_random(idx+13) * (w - w_crop))
343 | image = image.crop((x,y,x+h_crop,y+w_crop))
344 |
345 |
346 | # 字体大小是最容易引起错误的变量,字体大小不能超出图片中心区域大小
347 | size = font_range[0] + int(get_random(idx+20) * (font_range[1] - font_range[0]))
348 | size = min(size, h_crop - 2*margin - 2, w_crop - 2*margin - 2)
349 |
350 | # 字体数量,超过可容纳数量的一半以上,至少包含一个字符
351 | large_num = max(0, (h_crop - 2 * margin)/ size - 1)
352 | word_num = int(min(large_num / 2, 5) + get_random(idx+21) * large_num / 2) + 1
353 | # word_num = int(large_num / 2 + get_random(idx+21) * large_num / 2) + 1
354 | word_num = max(1, word_num)
355 |
356 | # 添加字体位置,并生成label信息
357 | place_x = int(get_random(idx+22) * (h_crop - word_num * size - margin)) + margin
358 | if margin == 0:
359 | # 用于添加两排文字
360 | place_y = int(get_random(idx+23) * (w_crop/2 - size - margin)) + margin
361 | else:
362 | place_y = int(get_random(idx+23) * (w_crop - size - margin)) + margin
363 | place = (place_x, place_y)
364 | label = np.zeros(class_num).astype(np.float32)
365 |
366 | text = u''
367 | words = word_index_dict.keys()
368 |
369 | if margin == 0:
370 | # 两排文字
371 | word_num *= 2
372 | while len(text) < word_num:
373 | np.random.shuffle(words)
374 | w = words[len(text)]
375 | if w in u'"(),':
376 | # 部分字符不建议生成
377 | continue
378 | text = text + w
379 | index = word_index_dict[w]
380 | label[index] = 1
381 |
382 | # 得到bbox_label
383 | if args.model == 'resnet':
384 | bbox_label, seg_label = generate_bbox_label(image, place, size, word_num, args, image_size)
385 | else:
386 | bbox_label, seg_label = 0,0
387 |
388 | # 字体,可以添加其他字体
389 | fonts = ['../../files/ttf/simsun.ttf']
390 | np.random.shuffle(fonts)
391 | font = fonts[0]
392 |
393 | # 颜色
394 | r = get_random(idx+24)
395 | if no_aug or r < 0.7:
396 | # 选择不同程度的黑色
397 | if r < 0.3:
398 | c = int(get_random(idx + 25) * 64)
399 | color = (c,c,c)
400 | else:
401 | rgb = 64
402 | r = int(get_random(idx + 27) * rgb)
403 | g = int(get_random(idx + 28) * rgb)
404 | b = int(get_random(idx + 29) * rgb)
405 | color = (r,g,b)
406 | else:
407 | # 随机颜色,但是选择较暗的颜色
408 | rgb = 256
409 | r = int(get_random(idx + 27) * rgb)
410 | g = int(get_random(idx + 28) * rgb)
411 | b = int(get_random(idx + 29) * rgb)
412 | ra = get_random(idx + 30)
413 | if ra < 0.5:
414 | ra = int(1000 * ra) % 3
415 | if ra == 0:
416 | r = 0
417 | elif ra == 1:
418 | g = 0
419 | else:
420 | b = 0
421 | color = (r,g,b)
422 |
423 | # 增加文字到图片
424 | if margin == 0:
425 | image = add_text_to_img(image, text[:word_num/2], size, font, color, place)
426 | image = add_text_to_img(image, text[word_num/2:], size, font, color, (place[0], place[1]+image_size[1]/2))
427 | else:
428 | image = add_text_to_img(image, text, size, font, color, place)
429 |
430 | '''
431 | # 随机翻转,增加泛化程度
432 | if args.model != 'resnet':
433 | if get_random(idx+130) < 0.3:
434 | image = image.transpose(Image.FLIP_LEFT_RIGHT)
435 | if get_random(idx+131) < 0.3:
436 | image = image.transpose(Image.FLIP_TOP_BOTTOM)
437 |
438 | # 先做旋转,然后在拉伸图片
439 | h,w = image.size
440 | max_hw, min_hw = float(max(h,w)), float(min(h,w))
441 | if max_hw / min_hw >= 5:
442 | rotate_size = 5
443 | elif max_hw / min_hw >= 3:
444 | rotate_size = 10
445 | elif max_hw / min_hw >= 1.5:
446 | rotate_size = 30
447 | else:
448 | rotate_size = 50
449 | if args.model != 'resnet' and not no_aug and epoch>70 and get_random(idx+50) < 0.8:
450 | theta = int(rotate_size * 2 * get_random(idx+32)) - rotate_size
451 | image = image.rotate(theta)
452 | else:
453 | theta = 0
454 | '''
455 |
456 |
457 | # 还原成 [512, 64] 的大小
458 | image = image.resize(image_size)
459 |
460 |
461 | # 最后生成图片后再一次旋转,图片模糊化
462 | if args.model == 'resnet' or (get_random(idx+50) < 0.8 and not no_aug):
463 |
464 | # 旋转
465 | if args.model == 'resnet' :
466 | rotate_size = 10
467 | else:
468 | rotate_size = rotate_range[0] + int(get_random(idx+32) * (rotate_range[1] - rotate_range[0]))
469 | theta = int(rotate_size * 2 * get_random(idx+33)) - rotate_size
470 | image = image.rotate(theta)
471 | if args.model == 'resnet':
472 | # 作分割的时候,标签信息也需要一起旋转
473 | seg_label = np.array([seg_label, seg_label, seg_label]) * 255
474 | seg_label = np.array(Image.fromarray(seg_label.transpose([1,2,0]).astype(np.uint8)).rotate(theta))
475 | seg_label = (seg_label[:,:,0] > 128).astype(np.float32)
476 |
477 | filters = [
478 | ImageFilter.SMOOTH, # 平滑,大于16可以用
479 | ImageFilter.SMOOTH_MORE, # 平滑,大于16可以用
480 | ImageFilter.GaussianBlur(radius=1), # 大于16可以用
481 |
482 | ImageFilter.GaussianBlur(radius=2), # 大于32可以用
483 | ImageFilter.BLUR, # 大于32可以用
484 | ImageFilter.GaussianBlur(radius=2), # 多来两次
485 | ImageFilter.BLUR, # 多来两次
486 | ]
487 |
488 | # 当文字比较大的时候,增加一些模糊
489 | if size > 16:
490 | if size < 32:
491 | filters = filters[:3]
492 | np.random.shuffle(filters)
493 | image = image.filter(filters[idx % len(filters)])
494 |
495 | if args.model == 'resnet':
496 | # add noise
497 | noise_level = 32
498 | image = np.array(image)
499 | noise = np.random.random(image.shape) * noise_level - noise_level / 2.
500 | image = image + noise
501 | image = image.astype(np.uint8)
502 | image = Image.fromarray(image)
503 |
504 |
505 | # 有时候需要低分辨率的图片
506 | resize_0, resize_1 = get_resize_para(size, idx)
507 | image = image.resize([image_size[0]/resize_0, image_size[1]/resize_1])
508 |
509 | # 还原成 [512, 64] 的大小
510 | image = image.resize(image_size)
511 |
512 | return image, label, bbox_label, seg_label, size
513 |
514 | def add_text_to_img(img, text, size, font, color, place):
515 | imgdraw = ImageDraw.Draw(img)
516 | imgfont = ImageFont.truetype(font,size=size)
517 | imgdraw.text(place, text, fill=color, font=imgfont)
518 | return img
519 |
520 | def random_crop_image(image_name, text, image_size, class_num, phase, idx, no_aug, args):
521 | # label
522 | text = text.split()
523 | word_label = np.zeros(class_num, dtype=np.float32)
524 |
525 |
526 | if args.hist:
527 | if get_random(idx+34) < 0.4 and phase == 'train':
528 | image = Image.open(image_name).convert('RGB')
529 | else:
530 | # 直方图均衡化
531 | image = Image.open(image_name).convert('YCbCr')
532 | image = np.array(image)
533 | imy = image[:,:,0]
534 | imy,_ = histeq(imy)
535 | image[:,:,0] = imy
536 | image = Image.fromarray(image, mode='YCbCr').convert('RGB')
537 | else:
538 | image = Image.open(image_name).convert('RGB')
539 | x = np.array(image)
540 | assert x.min() >= 0
541 | assert x.max() < 256
542 |
543 | if phase == 'train' and not no_aug:
544 | # 旋转
545 | if get_random(idx+11) < 0.8:
546 | theta = int(6 * get_random(idx+1)) - 3
547 | image = image.rotate(theta)
548 |
549 | # 模糊处理
550 | if get_random(idx+2) < 0.3:
551 | np.random.shuffle(filters)
552 | image = image.filter(filters[0])
553 |
554 | # 短边小于64, 直接填0
555 | h,w = image.size
556 | if w < image_size[1] and h > 64:
557 | if get_random(idx+3) < 0.3:
558 | image = np.array(image)
559 | start_index = (image_size[1] - w)/2
560 | new_image = np.zeros((image_size[1], h, 3), dtype=np.uint8)
561 | new_image[start_index:start_index+w, :, :] = image
562 | image = Image.fromarray(new_image)
563 |
564 |
565 | # 先处理成 X * 64 的图片
566 | h,w = image.size
567 | h = int(float(h) * image_size[1] / w)
568 | image = image.resize((h, image_size[1]))
569 |
570 | if phase == 'train' and not no_aug:
571 |
572 | # 放缩 0.8~1.2
573 | h,w = image.size
574 | r = get_random(idx+4) / 4. + 0.8
575 | image = image.resize((int(h*r), int(w*r)))
576 |
577 | # crop
578 | if min(h,w) > 32:
579 | crop_size = 20
580 | x = int((crop_size * get_random(idx+5) - crop_size/2) * r)
581 | y = int((crop_size * get_random(idx+6) - crop_size/2) * r)
582 | image = image.crop((max(0,x),max(0,y),min(0,x)+h,min(0,y)+w))
583 |
584 | # 有时需要生成一些低分辨率的图片
585 | h,w = image.size
586 | r = get_random(idx+7)
587 |
588 | '''
589 | if r < 0.01 and min(h,w) > 64:
590 | image = image.resize((h/8, w/8))
591 | elif r < 0.1 and min(h,w) > 64:
592 | image = image.resize((h/4, w/4))
593 | elif r < 0.3 and min(h,w) > 32:
594 | image = image.resize((h/2, w/2))
595 | '''
596 |
597 | # 从新变为 X * 64 的图片
598 | h = int(float(h) * image_size[1] / w)
599 | image = image.resize((h, image_size[1]))
600 |
601 | # 填充成固定大小
602 | image = np.transpose(np.array(image), [2,0,1]).astype(np.float32)
603 | if image.shape[2] < image_size[0]:
604 | # 长宽比例小于8(16),直接填充
605 | if phase == 'test':
606 | # 正中间
607 | start = np.abs(image_size[0] - image.shape[2])/2
608 | else:
609 | start = int(np.random.random() * np.abs(image_size[0] - image.shape[2]))
610 | new_image = np.zeros((3, image_size[1], image_size[0]), dtype=np.float32)
611 | new_image[:,:,start:start+image.shape[2]] = image
612 | if phase == 'test':
613 | new_image = np.array([new_image]).astype(np.float32)
614 | for w in text:
615 | word_label[int(w)] = 1
616 | else:
617 | # 长宽比例大于16,随机截取
618 | if phase == 'test':
619 | # 测试阶段直接合并
620 | crop_num = image.shape[2] * 2 / image_size[0] + 1
621 | new_image = np.zeros((crop_num, 3, image_size[1], image_size[0]), dtype=np.float32)
622 | for i in range(crop_num):
623 | start_index = i * image_size[0] / 2
624 | end_index = start_index + image_size[0]
625 | if end_index > image.shape[2]:
626 | new_image[i,:,:,:image.shape[2] - start_index] = image[:,:,start_index:end_index]
627 | else:
628 | new_image[i] = image[:,:,start_index:end_index]
629 | for w in text:
630 | word_label[int(w)] = 1
631 | else:
632 | # 训练阶段不算负例loss
633 | start = int(np.random.random() * np.abs(image_size[0] - image.shape[2]))
634 | new_image = image[:,:,start:start+image_size[0]]
635 | for w in text:
636 | word_label[int(w)] = -1
637 |
638 | image = new_image
639 | if phase == 'train':
640 | image = image.astype(np.float32)
641 | '''
642 | # 每一列灰度有所改变
643 | if get_random(idx+9) < 0.3:
644 | change_level = 256. / image.shape[1]
645 | gray_change = 0
646 | for j in range(image.shape[1]):
647 | gray_change += change_level * get_random(j+idx) - change_level / 2
648 | image[:,j,:] += gray_change
649 | # 每一行灰度有所改变
650 | if get_random(idx+10) < 0.3:
651 | change_level = 256. / image.shape[2]
652 | gray_change = 0
653 | for k in range(image.shape[2]):
654 | gray_change += change_level * get_random(10+k+idx) - change_level / 2
655 | image[:,:,k] += gray_change
656 | '''
657 | # 增加噪声
658 | if get_random(idx+8) < 0.1:
659 | noise_level = 64
660 | noise = np.random.random(image.shape) * noise_level - noise_level / 2.
661 | image = image + noise
662 | # noise = np.random.random(image.shape[1:]) * noise_level - noise_level / 2.
663 | # image = image + np.array([noise, noise, noise])
664 | image = image.astype(np.float32)
665 |
666 | return image, word_label
667 |
--------------------------------------------------------------------------------
/code/ocr/densenet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.utils.model_zoo as model_zoo
5 | from collections import OrderedDict
6 |
7 | __all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
8 |
9 |
10 | model_urls = {
11 | 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
12 | 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
13 | 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
14 | 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
15 | }
16 |
17 |
18 | def densenet121(pretrained=False, small=0,**kwargs):
19 | r"""Densenet-121 model from
20 | `"Densely Connected Convolutional Networks" `_
21 |
22 | Args:
23 | pretrained (bool): If True, returns a model pre-trained on ImageNet
24 | """
25 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), small=small,
26 | **kwargs)
27 | if pretrained:
28 | model.load_state_dict(model_zoo.load_url(model_urls['densenet121']))
29 | return model
30 |
31 |
32 | def densenet169(pretrained=False, **kwargs):
33 | r"""Densenet-169 model from
34 | `"Densely Connected Convolutional Networks" `_
35 |
36 | Args:
37 | pretrained (bool): If True, returns a model pre-trained on ImageNet
38 | """
39 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32),
40 | **kwargs)
41 | if pretrained:
42 | model.load_state_dict(model_zoo.load_url(model_urls['densenet169']))
43 | return model
44 |
45 |
46 | def densenet201(pretrained=False, **kwargs):
47 | r"""Densenet-201 model from
48 | `"Densely Connected Convolutional Networks" `_
49 |
50 | Args:
51 | pretrained (bool): If True, returns a model pre-trained on ImageNet
52 | """
53 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32),
54 | **kwargs)
55 | if pretrained:
56 | model.load_state_dict(model_zoo.load_url(model_urls['densenet201']))
57 | return model
58 |
59 |
60 | def densenet161(pretrained=False, **kwargs):
61 | r"""Densenet-161 model from
62 | `"Densely Connected Convolutional Networks" `_
63 |
64 | Args:
65 | pretrained (bool): If True, returns a model pre-trained on ImageNet
66 | """
67 | model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24),
68 | **kwargs)
69 | if pretrained:
70 | model.load_state_dict(model_zoo.load_url(model_urls['densenet161']))
71 | return model
72 |
73 |
74 | class _DenseLayer(nn.Sequential):
75 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
76 | super(_DenseLayer, self).__init__()
77 | self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
78 | self.add_module('relu.1', nn.ReLU(inplace=True)),
79 | self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
80 | growth_rate, kernel_size=1, stride=1, bias=False)),
81 | self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
82 | self.add_module('relu.2', nn.ReLU(inplace=True)),
83 | self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
84 | kernel_size=3, stride=1, padding=1, bias=False)),
85 | self.drop_rate = drop_rate
86 |
87 | def forward(self, x):
88 | new_features = super(_DenseLayer, self).forward(x)
89 | if self.drop_rate > 0:
90 | new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
91 | return torch.cat([x, new_features], 1)
92 |
93 |
94 | class _DenseBlock(nn.Sequential):
95 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
96 | super(_DenseBlock, self).__init__()
97 | for i in range(num_layers):
98 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
99 | self.add_module('denselayer%d' % (i + 1), layer)
100 |
101 |
102 | class _Transition(nn.Sequential):
103 | def __init__(self, num_input_features, num_output_features, use_pool):
104 | super(_Transition, self).__init__()
105 | self.add_module('norm', nn.BatchNorm2d(num_input_features))
106 | self.add_module('relu', nn.ReLU(inplace=True))
107 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
108 | kernel_size=1, stride=1, bias=False))
109 | if use_pool:
110 | self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
111 |
112 |
113 | class DenseNet(nn.Module):
114 | r"""Densenet-BC model class, based on
115 | `"Densely Connected Convolutional Networks" `_
116 |
117 | Args:
118 | growth_rate (int) - how many filters to add each layer (`k` in paper)
119 | block_config (list of 4 ints) - how many layers in each pooling block
120 | num_init_features (int) - the number of filters to learn in the first convolution layer
121 | bn_size (int) - multiplicative factor for number of bottle neck layers
122 | (i.e. bn_size * k features in the bottleneck layer)
123 | drop_rate (float) - dropout rate after each dense layer
124 | num_classes (int) - number of classification classes
125 | """
126 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), small=0,
127 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
128 |
129 | super(DenseNet, self).__init__()
130 |
131 | # First convolution
132 | self.features = nn.Sequential(OrderedDict([
133 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
134 | ('norm0', nn.BatchNorm2d(num_init_features)),
135 | ('relu0', nn.ReLU(inplace=True)),
136 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
137 | ]))
138 |
139 | # Each denseblock
140 | num_features = num_init_features
141 | for i, num_layers in enumerate(block_config):
142 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
143 | bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
144 | self.features.add_module('denseblock%d' % (i + 1), block)
145 | num_features = num_features + num_layers * growth_rate
146 | if i != len(block_config) - 1:
147 | if small and i > 0:
148 | use_pool = 0
149 | else:
150 | use_pool = 1
151 | trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2, use_pool=use_pool)
152 | self.features.add_module('transition%d' % (i + 1), trans)
153 | num_features = num_features // 2
154 |
155 | # Final batch norm
156 | self.features.add_module('norm5', nn.BatchNorm2d(num_features))
157 |
158 | # Linear layer
159 | self.classifier = nn.Linear(num_features, num_classes)
160 |
161 | def forward(self, x):
162 | features = self.features(x)
163 | return features
164 | att_feats = features
165 | out = F.relu(features, inplace=True)
166 | out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1)
167 | # out = F.avg_pool2d(out, kernel_size=3, stride=1).view(features.size(0), -1)
168 | fc_feats = out
169 | out = self.classifier(out)
170 | return att_feats, fc_feats, out
171 |
--------------------------------------------------------------------------------
/code/ocr/main.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 |
17 | """ResNet Train/Eval module.
18 | """
19 | import time
20 | import sys
21 | import os
22 |
23 | import numpy as np
24 | import dataloader
25 | import json
26 | from tqdm import tqdm
27 |
28 | import densenet
29 | import resnet
30 | from PIL import Image
31 |
32 | import torchvision
33 |
34 | import torch
35 | import torch.nn as nn
36 | import torch.backends.cudnn as cudnn
37 | from torch.autograd import Variable
38 | from torch.utils.data import DataLoader
39 | import torch.nn.functional as F
40 |
41 | from sklearn.metrics import roc_auc_score
42 |
43 | from tools import parse
44 | from glob import glob
45 | from skimage import measure
46 | import sys
47 | reload(sys)
48 | sys.setdefaultencoding('utf8')
49 | import traceback
50 |
51 | args = parse.args
52 | # anchor大小
53 | args.anchors = [8, 12, 18, 27, 40, 60]
54 | args.stride = 8
55 | args.image_size = [512,64]
56 |
57 |
58 | class DenseNet121(nn.Module):
59 | """Model modified.
60 |
61 | The architecture of our model is the same as standard DenseNet121
62 | except the classifier layer which has an additional sigmoid function.
63 |
64 | """
65 | def __init__(self, out_size):
66 | super(DenseNet121, self).__init__()
67 | self.inplanes = 1024
68 | self.densenet121 = densenet.densenet121(pretrained=True, small=args.small)
69 | num_ftrs = self.densenet121.classifier.in_features
70 | self.classifier_font = nn.Sequential(
71 | # 这里可以用fc做分类
72 | # nn.Linear(num_ftrs, out_size)
73 | # 这里可以用1×1卷积做分类
74 | nn.Conv2d(num_ftrs, out_size, kernel_size=1, bias=False)
75 | )
76 | self.train_params = []
77 | self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2)
78 |
79 | def _make_layer(self, block, planes, blocks, stride=1):
80 | downsample = None
81 | if stride != 1 or self.inplanes != planes * block.expansion:
82 | downsample = nn.Sequential(
83 | nn.Conv2d(self.inplanes, planes * block.expansion,
84 | kernel_size=1, stride=stride, bias=False),
85 | nn.BatchNorm2d(planes * block.expansion),
86 | )
87 |
88 | layers = []
89 | layers.append(block(self.inplanes, planes, stride, downsample))
90 | self.inplanes = planes * block.expansion
91 | for i in range(1, blocks):
92 | layers.append(block(self.inplanes, planes))
93 |
94 | return nn.Sequential(*layers)
95 |
96 | def forward(self, x, phase='train'):
97 | feats = self.densenet121(x) # (32, 1024, 2, 16)
98 | if not args.small:
99 | feats = F.max_pool2d(feats, kernel_size=2, stride=2) # (32, 1024, 1, 8)
100 | out = self.classifier_font(feats) # (32, 1824, 1, 8)
101 | out_size = out.size()
102 | # print out.size()
103 | out = out.view(out.size(0),out.size(1),-1) # (32, 1824, 8)
104 | # print out.size()
105 | if phase == 'train':
106 | out = F.adaptive_max_pool1d(out, output_size=(1)).view(out.size(0),-1) # (32, 1824)
107 | return out
108 | else:
109 | out = out.transpose(1,2).contiguous()
110 | out = out.view(out_size[0],out_size[2], out_size[3], out_size[1]) # (32, 1, 8, 1824)
111 | return out, feats
112 |
113 | class Loss(nn.Module):
114 | def __init__(self):
115 | super(Loss, self).__init__()
116 | self.classify_loss = nn.BCELoss()
117 | self.sigmoid = nn.Sigmoid()
118 | self.regress_loss = nn.SmoothL1Loss()
119 |
120 | def forward(self, font_output, font_target, weight=None, use_hard_mining=False):
121 | font_output = self.sigmoid(font_output)
122 | font_loss = F.binary_cross_entropy(font_output, font_target, weight)
123 |
124 | # hard_mining
125 | if use_hard_mining:
126 | font_output = font_output.view(-1)
127 | font_target = font_target.view(-1)
128 | pos_index = font_target > 0.5
129 | neg_index = font_target == 0
130 |
131 | # pos
132 | pos_output = font_output[pos_index]
133 | pos_target = font_target[pos_index]
134 | num_hard_pos = max(len(pos_output)/4, min(5, len(pos_output)))
135 | if len(pos_output) > 5:
136 | pos_output, pos_target = hard_mining(pos_output, pos_target, num_hard_pos, largest=False)
137 | pos_loss = self.classify_loss(pos_output, pos_target) * 0.5
138 |
139 |
140 | # neg
141 | num_hard_neg = len(pos_output) * 2
142 | neg_output = font_output[neg_index]
143 | neg_target = font_target[neg_index]
144 | neg_output, neg_target = hard_mining(neg_output, neg_target, num_hard_neg, largest=True)
145 | neg_loss = self.classify_loss(neg_output, neg_target) * 0.5
146 |
147 | font_loss += pos_loss + neg_loss
148 |
149 | else:
150 | pos_loss, neg_loss = font_loss, font_loss
151 | return [font_loss, pos_loss, neg_loss]
152 |
153 | def _forward(self, font_output, font_target, weight, bbox_output=None, bbox_label=None, seg_output=None, seg_labels=None):
154 | font_output = self.sigmoid(font_output)
155 | font_loss = F.binary_cross_entropy(font_output, font_target, weight)
156 |
157 | acc = []
158 | if bbox_output is not None:
159 | # bbox_loss = 0
160 | bbox_output = bbox_output.view((-1, 4))
161 | bbox_label = bbox_label.view((-1, 4))
162 | pos_index = bbox_label[:,-1] >= 0.5
163 | pos_index = pos_index.unsqueeze(1).expand(pos_index.size(0), 4)
164 | neg_index = bbox_label[:,-1] <= -0.5
165 | neg_index = neg_index.unsqueeze(1).expand(neg_index.size(0), 4)
166 |
167 | # 正例
168 | pos_label = bbox_label[pos_index].view((-1,4))
169 | pos_output = bbox_output[pos_index].view((-1,4))
170 | lx,ly,ld,lc = pos_label[:,0],pos_label[:,1],pos_label[:,2],pos_label[:,3]
171 | ox,oy,od,oc = pos_output[:,0],pos_output[:,1],pos_output[:,2],pos_output[:,3]
172 | regress_loss = [
173 | self.regress_loss(ox, lx),
174 | self.regress_loss(oy, ly),
175 | self.regress_loss(od, ld),
176 | ]
177 | pc = self.sigmoid(oc)
178 | acc.append((pc>=0.5).data.cpu().numpy().astype(np.float32).sum())
179 | acc.append(len(pc))
180 | # print pc.size(), lc.size()
181 | classify_loss = self.classify_loss(pc, lc) * 0.5
182 |
183 | # 负例
184 | neg_label = bbox_label[neg_index].view((-1,4))
185 | neg_output = bbox_output[neg_index].view((-1,4))
186 | lc = neg_label[:, 3]
187 | oc = neg_output[:, 3]
188 | pc = self.sigmoid(oc)
189 | acc.append((pc<=0.5).data.cpu().numpy().astype(np.float32).sum())
190 | acc.append(len(pc))
191 | # print pc.size(), lc.size()
192 | classify_loss += self.classify_loss(pc, lc+1) * 0.5
193 |
194 | # seg_loss
195 | seg_output = seg_output.view(-1)
196 | seg_labels = seg_labels.view(-1)
197 | pos_index = seg_labels > 0.5
198 | neg_index = seg_labels < 0.5
199 | seg_loss = 0.5 * self.classify_loss(seg_output[pos_index], seg_labels[pos_index]) + \
200 | 0.5 * self.classify_loss(seg_output[neg_index], seg_labels[neg_index])
201 | seg_tpr = (seg_output[pos_index] > 0.5).data.cpu().numpy().astype(np.float32).sum() / len(seg_labels[pos_index])
202 | seg_tnr = (seg_output[neg_index] < 0.5).data.cpu().numpy().astype(np.float32).sum() / len(seg_labels[neg_index])
203 | # print seg_output[neg_index]
204 | # print seg_labels[neg_index]
205 |
206 |
207 |
208 |
209 | else:
210 | return font_loss
211 |
212 | if args.model == 'resnet':
213 | loss = font_loss + classify_loss + seg_loss
214 | else:
215 | loss = font_loss + classify_loss + seg_loss
216 | for reg in regress_loss:
217 | loss += reg
218 | # if args.model == 'resnet':
219 | # loss = seg_loss
220 |
221 | return [loss, font_loss, seg_loss, classify_loss] + regress_loss + acc + [seg_tpr, seg_tnr]
222 |
223 | font_num = font_target.sum(0).data.cpu().numpy()
224 | font_loss = 0
225 | for di in range(font_num.shape[0]):
226 | if font_num[di] > 0:
227 | font_output_i = font_output[:,di]
228 | font_target_i = font_target[:,di]
229 | pos_font_index = font_target_i > 0.5
230 | font_loss += 0.5 * self.classify_loss(font_output_i[pos_font_index], font_target_i[pos_font_index])
231 | neg_font_index = font_target_i < 0.5
232 | if len(font_target_i[neg_font_index]) > 0:
233 | font_loss += 0.5 * self.classify_loss(font_output_i[neg_font_index], font_target_i[neg_font_index])
234 | font_loss = font_loss / (font_num>0).sum()
235 |
236 | return font_loss
237 | # '''
238 |
239 | def hard_mining(neg_output, neg_labels, num_hard, largest=True):
240 | num_hard = min(max(num_hard, 10), len(neg_output))
241 | _, idcs = torch.topk(neg_output, min(num_hard, len(neg_output)), largest=largest)
242 | neg_output = torch.index_select(neg_output, 0, idcs)
243 | neg_labels = torch.index_select(neg_labels, 0, idcs)
244 | return neg_output, neg_labels
245 |
246 | def save_model(save_dir, phase, name, epoch, f1score, model):
247 | if not os.path.exists(save_dir):
248 | os.mkdir(save_dir)
249 | save_dir = os.path.join(save_dir, args.model)
250 | if not os.path.exists(save_dir):
251 | os.mkdir(save_dir)
252 | save_dir = os.path.join(save_dir, phase)
253 | if not os.path.exists(save_dir):
254 | os.mkdir(save_dir)
255 | state_dict = model.state_dict()
256 | for key in state_dict.keys():
257 | state_dict[key] = state_dict[key].cpu()
258 | state_dict_all = {
259 | 'state_dict': state_dict,
260 | 'epoch': epoch,
261 | 'f1score': f1score,
262 | }
263 | torch.save( state_dict_all , os.path.join(save_dir, '{:s}.ckpt'.format(name)))
264 | if 'best' in name and f1score > 0.3:
265 | torch.save( state_dict_all , os.path.join(save_dir, '{:s}_{:s}.ckpt'.format(name, str(epoch))))
266 |
267 | def mkdir(path):
268 | if not os.path.exists(path):
269 | os.mkdir(path)
270 |
271 | def test(epoch, model, train_loader, phase='test'):
272 | print '\ntest {:s}_files, epoch: {:d}'.format(phase, epoch)
273 | mkdir('../../data/result')
274 | model.eval()
275 | f1score_list = []
276 | recall_list = []
277 | precision_list = []
278 | word_index_dict = json.load(open(args.word_index_json))
279 | index_word_dict = { v:k for k,v in word_index_dict.items() }
280 | result_file = open('../../data/result/{:d}_{:s}_result.csv'.format(epoch, phase), 'w')
281 | result_file.write('name,content\n')
282 | name_f1score_dict = dict()
283 |
284 | # 保存densenet生成的feature
285 | feat_dir = args.data_dir.replace('dataset', 'feats')
286 | mkdir(feat_dir)
287 | feat_dir = os.path.join(feat_dir, phase)
288 | print feat_dir
289 | mkdir(feat_dir)
290 |
291 | names = []
292 | if phase != 'test':
293 | gt_file = open('../../data/result/{:d}_{:s}_gt.csv'.format(epoch, phase), 'w')
294 | gt_file.write('name,content\n')
295 | analysis_file = open('../../data/result/{:s}_{:s}_gt.csv'.format('analysis', phase), 'w')
296 | os.system('rm -r ../../data/analysis/{:s}'.format(phase))
297 | labels_all = []
298 | probs_all = []
299 | for i,data in enumerate(tqdm(train_loader)):
300 | name = data[0][0].split('/')[-1].split('.seg')[0]
301 | names.append(name)
302 | images, labels = [Variable(x.cuda(async=True)) for x in data[1:3]]
303 | if len(images.size()) == 5:
304 | images = images[0]
305 |
306 | probs, feats = model(images, 'test')
307 | probs_all.append(probs.data.cpu().numpy().max(2).max(1).max(0))
308 |
309 | preds = probs.data.cpu().numpy() > 0.5 # (-1, 8, 1824)
310 |
311 | # result_file.write(name+',')
312 | result = u''
313 | last_set = set()
314 | all_set = set()
315 |
316 | if args.feat:
317 | # 保存所有的feat
318 | feats = feats.data.cpu().numpy()
319 | if i == 0:
320 | print feats.shape
321 | np.save(os.path.join(feat_dir, name.replace('.png','.npy')), feats)
322 | if len(feats) > 1: # feats: [-1, 1024, 1, 8]
323 | # 多个patch
324 | new_feats = []
325 | for i,feat in enumerate(feats):
326 | if i == 0:
327 | # 第一个patch,保存前6个
328 | new_feats.append(feat[:,:,:6])
329 | elif i == len(feats) - 1:
330 | # 最后一个patch,保存后6个
331 | new_feats.append(feat[:,:,2:])
332 | else:
333 | # 保存中间4个
334 | new_feats.append(feat[:,:,2:6])
335 | feats = np.concatenate(new_feats, 2)
336 |
337 | # 这种方法用于检测不同区域的同一个字,当同一个字同一个区域出现时,可能检测不到多次
338 | preds = preds.max(1) # 沿着竖直方向pooling
339 | # if len(preds) > 1:
340 | # print name
341 | for patch_i, patch_pred in enumerate(preds):
342 | for part_i, part_pred in enumerate(patch_pred):
343 | new_set = set()
344 | for idx,p in enumerate(part_pred):
345 | if p:
346 | # 出现了这个字
347 | w = index_word_dict[idx]
348 | new_set.add(w)
349 | if w not in all_set:
350 | # 从没见过的字
351 | all_set.add(w)
352 | result += w
353 | elif w not in last_set:
354 | # 以前出现过
355 | if patch_i == 0:
356 | # 第一个patch # 上一个部分没有这个字
357 | result += w
358 | elif part_i >= preds.shape[1]/2 :
359 | # 后续patch的后一半,不写 # 上一个部分没有这个字
360 | result += w
361 | last_set = new_set
362 | # if len(result) > len(set(result)):
363 | # print name
364 |
365 |
366 |
367 |
368 | '''
369 | for idx,p in enumerate(preds.reshape(-1)):
370 | if p:
371 | # result_file.write(index_word_dict[idx])
372 | result = result + index_word_dict[idx]
373 | '''
374 |
375 | result = result.replace(u'"', u'')
376 | if u',' in result:
377 | result = '"' + result + '"'
378 | if len(result) == 0:
379 | global_prob = probs.data.cpu().numpy().max(0).max(0).max(0)
380 | max_index = global_prob.argmax()
381 | result = index_word_dict[max_index]
382 | print name
383 |
384 | result_file.write(name+','+result+'\n')
385 | # result_file.write('\n')
386 |
387 | if phase == 'test':
388 | continue
389 | labels = labels.data.cpu().numpy()
390 | gt_file.write(name+',')
391 | gt = u''
392 | for idx,l in enumerate(labels.reshape(-1)):
393 | if l:
394 | gt = gt + index_word_dict[idx]
395 | gt_file.write(index_word_dict[idx])
396 | gt_file.write('\n')
397 |
398 |
399 | labels_all.append(labels[0])
400 | # 全局pooling
401 | preds = np.array([preds.max(1).max(0)])
402 | # print preds.shape
403 | for pred, label in zip(preds, labels):
404 | tp = (pred + label == 2).sum()
405 | tn = (pred + label == 0).sum()
406 | fp = (pred - label == 1).sum()
407 | fn = (pred - label ==-1).sum()
408 | precision = 1.0 * tp / max(tp + fp , 10e-20)
409 | recall = 1.0 * tp / max(tp + fn , 10e-20)
410 | f1score = 2. * precision * recall / max(precision + recall , 10e-20)
411 | precision_list.append(precision)
412 | recall_list.append(recall)
413 | f1score_list.append(f1score)
414 | name_f1score_dict[name] = f1score
415 |
416 | # 分析不好的结果
417 | if phase == 'train_val':
418 | th = 0.8
419 | elif phase == 'train':
420 | th = 0.95
421 | else:
422 | th = 0.6
423 | if f1score < th:
424 | save_dir = '../../data/analysis'
425 | if not os.path.exists(save_dir):
426 | os.mkdir(save_dir)
427 | save_dir = os.path.join(save_dir, phase)
428 | if not os.path.exists(save_dir):
429 | os.mkdir(save_dir)
430 | os.system('cp ../../data/dataset/train/{:s} {:s}/{:d}_{:s}'.format(name, save_dir, 100000+i, name))
431 | analysis_file.write(name+'\t\t')
432 | gt = set(gt)
433 | result = set(result.strip('"'))
434 | analysis_file.write(''.join(sorted(gt - result))+'\t\t')
435 | analysis_file.write(''.join(sorted(result - gt))+'\t\n')
436 |
437 |
438 |
439 | if phase != 'test':
440 | # f1score = np.mean(f1score_list)
441 | # print 'f1score all', f1score
442 | # f1score_list = sorted(f1score_list)[500:]
443 | f1score = np.mean(f1score_list)
444 | recall = np.mean(recall_list)
445 | precision = np.mean(precision_list)
446 | print 'f1score', f1score
447 | print 'recall', recall
448 | print 'precision', precision
449 | gt_file.write('f1score,' + str(f1score))
450 | gt_file.write('recall,' + str(recall))
451 | gt_file.write('precision,' + str(precision))
452 | gt_file.close()
453 | result_file.write('f1score,' + str(f1score))
454 | result_file.write('recall,' + str(recall))
455 | result_file.write('precision,' + str(precision))
456 | with open('../../data/result/name_f1score_dict.json','w') as f:
457 | f.write(json.dumps(name_f1score_dict, indent=4))
458 | np.save('../../data/result/{:d}_{:s}_labels.npy'.format(epoch, phase), labels_all)
459 | result_file.close()
460 | os.system('cp ../../data/result/{:d}_{:s}_result.csv ../../data/result/{:s}_result.csv'.format(epoch, phase, phase))
461 |
462 | np.save('../../data/result/{:d}_{:s}_probs.npy'.format(epoch, phase), probs_all)
463 | with open('../../data/result/{:s}_names.json'.format(phase), 'w') as f:
464 | f.write(json.dumps(names, indent=4))
465 |
466 | def get_weight(labels):
467 | labels = labels.data.cpu().numpy()
468 | weights = np.zeros_like(labels)
469 | # weight_false = 1.0 / ((labels<0.5).sum() + 10e-20)
470 | # weight_true = 1.0 / ((labels>0.5).sum() + 10e-20)
471 | weight_false = 1.0 / ((labels<0.5).sum(0) + 10e-20)
472 | label_true = (labels>0.5).sum(0)
473 | for i in range(labels.shape[1]):
474 | label_i = labels[:,i]
475 | weight_i = np.ones(labels.shape[0]) * weight_false[i]
476 | # weight_i = np.ones(labels.shape[0]) * weight_false
477 | if label_true[i] > 0:
478 | weight_i[label_i>0.5] = 1.0 / label_true[i]
479 | weights[:,i] = weight_i
480 | weights *= np.ones_like(labels).sum() / (weights.sum() + 10e-20)
481 | weights[labels<-0.5] = 0
482 | return weights
483 |
484 | def train_eval(epoch, model, train_loader, loss, optimizer, best_f1score=0, phase='train'):
485 | print '\n',epoch, phase
486 | if 'train' in phase:
487 | model.train()
488 | else:
489 | model.eval()
490 | loss_list = []
491 | f1score_list = []
492 | recall_list = []
493 | precision_list = []
494 | for i,data in enumerate(tqdm(train_loader)):
495 | images, labels = [Variable(x.cuda(async=True)) for x in data[1:3]]
496 | weights = torch.from_numpy(get_weight(labels)).cuda(async=True)
497 | probs = model(images)
498 |
499 | # 训练阶段
500 | if 'train' in phase:
501 | loss_output = loss(probs, labels, weights, args.hard_mining)
502 | try:
503 | optimizer.zero_grad()
504 | loss_output[0].backward()
505 | optimizer.step()
506 | loss_list.append([x.data.cpu().numpy()[0] for x in loss_output])
507 | except:
508 | # pass
509 | traceback.print_exc()
510 |
511 |
512 | # 计算 f1score, recall, precision
513 | '''
514 | x = probs.data.cpu().numpy()
515 | l = labels.data.cpu().numpy()
516 | print (get_weight(labels) * l).sum()
517 | l = 1 - l
518 | print (get_weight(labels) * l).sum()
519 | print x.max()
520 | print x.min()
521 | print x.mean()
522 | print
523 | # '''
524 | preds = probs.data.cpu().numpy() > 0
525 | labels = labels.data.cpu().numpy()
526 | for pred, label in zip(preds, labels):
527 | pred[label<0] = -1
528 | if label.sum() < 0.5:
529 | continue
530 | tp = (pred + label == 2).sum()
531 | tn = (pred + label == 0).sum()
532 | fp = (pred - label == 1).sum()
533 | fn = (pred - label ==-1).sum()
534 | precision = 1.0 * tp / (tp + fp + 10e-20)
535 | recall = 1.0 * tp / (tp + fn + 10e-20)
536 | f1score = 2. * precision * recall / (precision + recall + 10e-20)
537 | precision_list.append(precision)
538 | recall_list.append(recall)
539 | f1score_list.append(f1score)
540 |
541 |
542 | # 保存中间结果到 data/middle_result,用于分析
543 | if i == 0:
544 | images = images.data.cpu().numpy() * 128 + 128
545 | if phase == 'pretrain':
546 | bbox_labels = bbox_labels.data.cpu().numpy()
547 | seg_labels = seg_labels.data.cpu().numpy()
548 | seg_output = seg_output.data.cpu().numpy()
549 | for ii in range(len(images)):
550 | middle_dir = os.path.join(args.save_dir, 'middle_result')
551 | if not os.path.exists(middle_dir):
552 | os.mkdir(middle_dir)
553 | middle_dir = os.path.join(middle_dir, phase)
554 | if not os.path.exists(middle_dir):
555 | os.mkdir(middle_dir)
556 | Image.fromarray(images[ii].astype(np.uint8).transpose(1,2,0)).save(os.path.join(middle_dir, str(ii)+'.image.png'))
557 | if phase == 'pretrain':
558 | segi = seg_labels[ii]
559 | _segi = np.array([segi, segi, segi]) * 255
560 | segi = np.zeros([3, _segi.shape[1]*2, _segi.shape[2]*2])
561 | for si in range(segi.shape[1]):
562 | for sj in range(segi.shape[2]):
563 | segi[:,si,sj] = _segi[:,si/2,sj/2]
564 | Image.fromarray(segi.transpose(1,2,0).astype(np.uint8)).save(os.path.join(middle_dir, str(ii)+'.seg.png'))
565 | segi = seg_output[ii]
566 | _segi = np.array([segi, segi, segi]) * 255
567 | segi = np.zeros([3, _segi.shape[1]*2, _segi.shape[2]*2])
568 | for si in range(segi.shape[1]):
569 | for sj in range(segi.shape[2]):
570 | segi[:,si,sj] = _segi[:,si/2,sj/2]
571 | Image.fromarray(segi.transpose(1,2,0).astype(np.uint8)).save(os.path.join(middle_dir, str(ii)+'.seg.out.png'))
572 |
573 | f1score = np.mean(f1score_list)
574 | print 'f1score', f1score
575 | print 'recall', np.mean(recall_list)
576 | print 'precision', np.mean(precision_list)
577 | if 'train' in phase:
578 | loss_mean = np.array(loss_list).mean(0)
579 | print 'loss: {:3.4f} pos loss: {:3.4f} neg loss: {:3.4f}'.format(loss_mean[0], loss_mean[1], loss_mean[2])
580 |
581 | # 保存模型
582 | if ('eval' in phase or 'pretrain' in phase)and best_f1score < 2:
583 | if args.small:
584 | save_dir = os.path.join(args.save_dir, 'models-small')
585 | else:
586 | save_dir = os.path.join(args.save_dir, 'models')
587 | if not os.path.exists(save_dir):
588 | os.mkdir(save_dir)
589 | if epoch % 5 == 0:
590 | save_model(save_dir, phase, str(epoch), epoch, f1score, model)
591 | if f1score > best_f1score:
592 | save_model(save_dir, phase, 'best_f1score', epoch, f1score, model)
593 | if args.model == 'resnet':
594 | tpnr = loss[11] + loss[12]
595 | # 这里用 best_f1score 也当tpnr好了,懒得改
596 | if tpnr > best_f1score:
597 | best_f1score = tpnr
598 | save_model(save_dir, phase, 'best_tpnr', epoch, f1score, model)
599 | print 'best tpnr', best_f1score
600 | else:
601 | best_f1score = max(best_f1score, f1score)
602 | if best_f1score < 1:
603 | print '\n\t{:s}\tbest f1score {:3.4f}\n'.format(phase, best_f1score)
604 | return best_f1score
605 |
606 |
607 | def main():
608 | word_index_dict = json.load(open(args.word_index_json))
609 | num_classes = len(word_index_dict)
610 | image_label_dict = json.load(open(args.image_label_json))
611 |
612 | cudnn.benchmark = True
613 | if args.model == 'densenet':
614 | # 两千多种字符,multi-label分类
615 | model = DenseNet121(num_classes).cuda()
616 | elif args.model == 'resnet':
617 | # resnet主要用于文字区域的segmentation以及object detection操作
618 | model = resnet.ResNet(num_classes=num_classes, args=args).cuda()
619 | else:
620 | return
621 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
622 | # model = torch.nn.DataParallel(model).cuda()
623 | loss = Loss().cuda()
624 |
625 | if args.resume:
626 | state_dict = torch.load(args.resume)
627 | model.load_state_dict(state_dict['state_dict'])
628 | best_f1score = state_dict['f1score']
629 | start_epoch = state_dict['epoch'] + 1
630 | else:
631 | best_f1score = 0
632 | if args.model == 'resnet':
633 | start_epoch = 100
634 | else:
635 | start_epoch = 1
636 | args.epoch = start_epoch
637 | print 'best_f1score', best_f1score
638 |
639 |
640 | # 划分数据集
641 | test_filelist = sorted(glob(os.path.join(args.data_dir,'test','*')))
642 | trainval_filelist = sorted(glob(os.path.join(args.data_dir,'train','*')))
643 |
644 | # 两种输入size训练
645 | # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入
646 | # train_filelist2: 长宽比大于8:1的图片,经过padding,crop后变成 64*1024的输入
647 | train_filelist1, train_filelist2 = [],[]
648 |
649 | # 黑名单,这些图片的label是有问题的
650 | black_list = set(json.load(open(args.black_json))['black_list'])
651 | image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json))
652 | for f in trainval_filelist:
653 | image = f.split('/')[-1]
654 | if image in black_list:
655 | continue
656 | r = image_hw_ratio_dict[image]
657 | if r == 0:
658 | train_filelist1.append(f)
659 | else:
660 | train_filelist2.append(f)
661 | train_val_filelist = train_filelist1 + train_filelist2
662 | val_filelist = train_filelist1[-2048:]
663 | train_filelist1 = train_filelist1[:-2048]
664 |
665 | train_filelist2 = train_filelist2
666 | image_size = [512, 64]
667 |
668 | if args.phase in ['test', 'val', 'train_val']:
669 | # 测试输出文字检测结果
670 | test_dataset = dataloader.DataSet(
671 | test_filelist,
672 | image_label_dict,
673 | num_classes,
674 | # transform=train_transform,
675 | args=args,
676 | image_size=image_size,
677 | phase='test')
678 | test_loader = DataLoader(
679 | dataset=test_dataset,
680 | batch_size=1,
681 | shuffle=False,
682 | num_workers=8,
683 | pin_memory=True)
684 | train_filelist = train_filelist1[-2048:]
685 | train_dataset = dataloader.DataSet(
686 | train_filelist,
687 | image_label_dict,
688 | num_classes,
689 | image_size=image_size,
690 | args=args,
691 | phase='test')
692 | train_loader = DataLoader(
693 | dataset=train_dataset,
694 | batch_size=1,
695 | shuffle=False,
696 | num_workers=8,
697 | pin_memory=True)
698 |
699 | val_dataset = dataloader.DataSet(
700 | val_filelist,
701 | image_label_dict,
702 | num_classes,
703 | image_size=image_size,
704 | args=args,
705 | phase='test')
706 | val_loader = DataLoader(
707 | dataset=val_dataset,
708 | batch_size=1,
709 | shuffle=False,
710 | num_workers=8,
711 | pin_memory=True)
712 |
713 | train_val_dataset = dataloader.DataSet(
714 | train_val_filelist,
715 | image_label_dict,
716 | num_classes,
717 | image_size=image_size,
718 | args=args,
719 | phase='test')
720 | train_val_loader= DataLoader(
721 | dataset=train_val_dataset,
722 | batch_size=1,
723 | shuffle=False,
724 | num_workers=8,
725 | pin_memory=True)
726 |
727 | if args.phase == 'test':
728 | test(start_epoch - 1, model, val_loader, 'val')
729 | test(start_epoch - 1, model, test_loader, 'test')
730 | # test(start_epoch - 1, model, train_val_loader, 'train_val')
731 | elif args.phase == 'val':
732 | test(start_epoch - 1, model, train_loader, 'train')
733 | test(start_epoch - 1, model, val_loader, 'val')
734 | elif args.phase == 'train_val':
735 | test(start_epoch - 1, model, train_val_loader, 'train_val')
736 | return
737 |
738 | elif args.phase == 'train':
739 |
740 | train_dataset1 = dataloader.DataSet(
741 | train_filelist1,
742 | image_label_dict,
743 | num_classes,
744 | image_size=image_size,
745 | args=args,
746 | phase='train')
747 | train_loader1 = DataLoader(
748 | dataset=train_dataset1,
749 | batch_size=args.batch_size,
750 | shuffle=True,
751 | num_workers=8,
752 | pin_memory=True)
753 | train_dataset2 = dataloader.DataSet(
754 | train_filelist2,
755 | image_label_dict,
756 | num_classes,
757 | image_size=(1024,64),
758 | args=args,
759 | phase='train')
760 | train_loader2 = DataLoader(
761 | dataset=train_dataset2,
762 | batch_size=args.batch_size / 2,
763 | shuffle=True,
764 | num_workers=8,
765 | pin_memory=True)
766 | val_dataset = dataloader.DataSet(
767 | val_filelist,
768 | image_label_dict,
769 | num_classes,
770 | image_size=image_size,
771 | args=args,
772 | phase='val')
773 | val_loader = DataLoader(
774 | dataset=val_dataset,
775 | batch_size=min(8,args.batch_size),
776 | shuffle=False,
777 | num_workers=8,
778 | pin_memory=True)
779 | filelist = glob(os.path.join(args.bg_dir,'*'))
780 | pretrain_dataset1 = dataloader.DataSet(
781 | filelist,
782 | image_label_dict,
783 | num_classes,
784 | image_size=args.image_size,
785 | word_index_dict = word_index_dict,
786 | args=args,
787 | font_range=[8,32],
788 | margin=10,
789 | rotate_range=[-10., 10. ],
790 | phase='pretrain')
791 | pretrain_loader1 = DataLoader(
792 | dataset=pretrain_dataset1,
793 | batch_size=args.batch_size,
794 | shuffle=True,
795 | num_workers=8,
796 | pin_memory=True)
797 | pretrain_dataset2 = dataloader.DataSet(
798 | filelist,
799 | image_label_dict,
800 | num_classes,
801 | image_size=(256, 128),
802 | word_index_dict = word_index_dict,
803 | args=args,
804 | font_range=[24,64],
805 | margin=20,
806 | rotate_range=[-20., 20.],
807 | phase='pretrain')
808 | pretrain_loader2 = DataLoader(
809 | dataset=pretrain_dataset2,
810 | batch_size=args.batch_size,
811 | shuffle=True,
812 | num_workers=8,
813 | pin_memory=True)
814 |
815 | best_f1score = 0
816 | # eval_mode = 'pretrain-2'
817 | eval_mode = 'eval'
818 | for epoch in range(start_epoch, args.epochs):
819 |
820 | args.epoch = epoch
821 |
822 | if eval_mode == 'eval':
823 | if best_f1score > 0.9:
824 | args.lr = 0.0001
825 | if best_f1score > 0.9:
826 | args.hard_mining = 1
827 |
828 | for param_group in optimizer.param_groups:
829 | param_group['lr'] = args.lr
830 |
831 | train_eval(epoch, model, train_loader1, loss, optimizer, 2., 'train-1')
832 | if best_f1score > 0.9:
833 | train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2')
834 | best_f1score = train_eval(epoch, model, val_loader, loss, optimizer, best_f1score, 'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining))
835 | continue
836 | '''
837 |
838 | if eval_mode == 'pretrain-2':
839 | args.epoch = 1
840 | best_f1score = train_eval(epoch, model, pretrain_loader2, loss, optimizer, best_f1score, 'pretrain-2')
841 | if best_f1score > 0.8:
842 | eval_mode = 'pretrain-1'
843 | best_f1score = 0
844 | elif eval_mode == 'pretrain-1':
845 | args.epoch = max(100, epoch)
846 | train_eval(epoch, model, pretrain_loader2, loss, optimizer, 2.0 , 'pretrain-2')
847 | best_f1score = train_eval(epoch, model, pretrain_loader1, loss, optimizer, best_f1score, 'pretrain-1')
848 | if best_f1score > 0.5:
849 | eval_mode = 'eval'
850 | best_f1score = 0
851 | else:
852 | train_eval(epoch, model, train_loader1, loss, optimizer, 2., 'train-1')
853 | train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2')
854 | best_f1score = train_eval(epoch, model, val_loader, loss, optimizer, best_f1score, 'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining))
855 |
856 | '''
857 |
858 |
859 |
860 |
861 |
862 |
863 | if __name__ == '__main__':
864 | main()
865 |
--------------------------------------------------------------------------------
/code/ocr/resnet.py:
--------------------------------------------------------------------------------
1 | # Implementation of https://arxiv.org/pdf/1512.03385.pdf.
2 | # See section 4.2 for model architecture on CIFAR-10.
3 | # Some part of the code was referenced below.
4 | # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
5 | import torch
6 | import torch.nn as nn
7 | import torchvision.datasets as dsets
8 | import torchvision.transforms as transforms
9 | from torch.autograd import Variable
10 | import torch.nn.functional as F
11 |
12 | # 3x3 Convolution
13 | def conv3x3(in_channels, out_channels, stride=1):
14 | return nn.Conv2d(in_channels, out_channels, kernel_size=3,
15 | stride=stride, padding=1, bias=False)
16 |
17 | # Residual Block
18 | class ResidualBlock(nn.Module):
19 | def __init__(self, in_channels, out_channels, stride=1, downsample=None):
20 | super(ResidualBlock, self).__init__()
21 | self.conv1 = conv3x3(in_channels, out_channels, stride)
22 | self.bn1 = nn.BatchNorm2d(out_channels)
23 | self.relu = nn.ReLU(inplace=True)
24 | self.conv2 = conv3x3(out_channels, out_channels)
25 | self.bn2 = nn.BatchNorm2d(out_channels)
26 | self.downsample = downsample
27 |
28 | def forward(self, x):
29 | residual = x
30 | out = self.conv1(x)
31 | out = self.bn1(out)
32 | out = self.relu(out)
33 | out = self.conv2(out)
34 | out = self.bn2(out)
35 | if self.downsample:
36 | residual = self.downsample(x)
37 | out += residual
38 | out = self.relu(out)
39 | return out
40 |
41 | # ResNet Module
42 | class ResNet(nn.Module):
43 | def __init__(self, block=ResidualBlock, layers=[2,3], num_classes=10, args=None):
44 | super(ResNet, self).__init__()
45 | self.in_channels = 16
46 | self.conv = conv3x3(3, 16)
47 | self.bn = nn.BatchNorm2d(16)
48 | self.relu = nn.ReLU(inplace=True)
49 | self.layer1 = self.make_layer(block, 32, layers[0], 2)
50 | self.layer2 = self.make_layer(block, 64, layers[0], 2)
51 | self.layer3 = self.make_layer(block, 128, layers[0], 2)
52 | self.layer4 = self.make_layer(block, 128, layers[0], 2)
53 | self.layer5 = self.make_layer(block, 128, layers[0], 2)
54 | self.fc = nn.Linear(128, num_classes)
55 |
56 | # detect
57 | self.convt1 = nn.Sequential(
58 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2),
59 | nn.BatchNorm2d(128),
60 | nn.ReLU(inplace=True))
61 | self.convt2 = nn.Sequential(
62 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2),
63 | nn.BatchNorm2d(128),
64 | nn.ReLU(inplace=True))
65 | self.convt3 = nn.Sequential(
66 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2),
67 | nn.BatchNorm2d(128),
68 | nn.ReLU(inplace=True))
69 | self.convt4 = nn.Sequential(
70 | nn.ConvTranspose2d(128,128,kernel_size=2, stride=2),
71 | nn.BatchNorm2d(128),
72 | nn.ReLU(inplace=True))
73 | self.in_channels = 256
74 | self.dec1 = self.make_layer(block, 128, layers[0])
75 | self.in_channels = 256
76 | self.dec2 = self.make_layer(block, 128, layers[0])
77 | self.in_channels = 192
78 | self.dec3 = self.make_layer(block, 128, layers[0])
79 | self.in_channels = 160
80 | # self.dec4 = self.make_layer(block, 1, layers[0])
81 | self.dec4 = nn.Sequential(
82 | nn.Conv2d(160, 256, kernel_size=3, padding=1),
83 | nn.BatchNorm2d(256),
84 | nn.ReLU(inplace=True),
85 | nn.Conv2d(256, 1, kernel_size=1, bias=True)
86 | )
87 | self.in_channels = 256
88 | # self.dec2 = self.make_layer(block, 256, layers[0])
89 | # self.output = conv3x3(256, 4 * len(args.anchors))
90 | self.bbox = nn.Sequential(
91 | nn.Conv2d(256, 256, kernel_size=3, padding=1),
92 | nn.BatchNorm2d(256),
93 | nn.ReLU(inplace=True),
94 | nn.Conv2d(256, 4 * len(args.anchors), kernel_size=1, bias=True)
95 | )
96 | self.sigmoid = nn.Sigmoid()
97 |
98 |
99 | def make_layer(self, block, out_channels, blocks, stride=1):
100 | downsample = None
101 | if (stride != 1) or (self.in_channels != out_channels):
102 | downsample = nn.Sequential(
103 | conv3x3(self.in_channels, out_channels, stride=stride),
104 | nn.BatchNorm2d(out_channels))
105 | layers = []
106 | layers.append(block(self.in_channels, out_channels, stride, downsample))
107 | self.in_channels = out_channels
108 | for i in range(1, blocks):
109 | layers.append(block(out_channels, out_channels))
110 | return nn.Sequential(*layers)
111 |
112 | def forward(self, x, phase='train'):
113 | out = self.conv(x)
114 | # print out.size()
115 | out = self.bn(out)
116 | # print out.size()
117 | out = self.relu(out)
118 | # print out.size()
119 | out1 = self.layer1(out) # 64
120 | # print out1.size()
121 | out2 = self.layer2(out1) # 32
122 | # print out2.size()
123 | out3 = self.layer3(out2) # 16
124 | # print out3.size()
125 | out4 = self.layer4(out3) # 8
126 | # print out4.size()
127 | out5 = self.layer5(out4) # 4
128 | # print out5.size()
129 |
130 | # out = F.adaptive_max_pool2d(out5, output_size=(1,1)).view(out.size(0), -1) # 128
131 | # out = out.view(out.size(0), -1)
132 |
133 | if phase == 'seg':
134 | out = F.adaptive_max_pool2d(out5, output_size=(1,1)).view(out.size(0), -1) # 128
135 | out = self.fc(out)
136 | out = out.view(out.size(0), -1)
137 | else:
138 | out = F.max_pool2d(out5, 2)
139 | out_size = out.size()
140 | # out = out.view(out_size[0],out_size[1],out_size[3]).transpose(1,2).contiguous().view(-1, out_size[1])
141 | out = out.view(out_size[0],out_size[1],out_size[2] * out_size[3]).transpose(1,2).contiguous().view(-1, out_size[1])
142 | out = self.fc(out)
143 | out = out.view(out_size[0], out_size[2] * out_size[3], -1).transpose(1,2).contiguous()
144 | out = F.adaptive_max_pool1d(out, output_size=(1)).view(out_size[0], -1)
145 |
146 | # print out.size()
147 | if phase not in ['seg', 'pretrain', 'pretrain2']:
148 | return out
149 |
150 | # detect
151 | cat1 = torch.cat([self.convt1(out5), out4], 1)
152 | # print cat1.size()
153 | dec1 = self.dec1(cat1)
154 | # print dec1.size()
155 | # print out3.size()
156 | cat2 = torch.cat([self.convt2(dec1), out3], 1)
157 | # print cat2.size()
158 | dec2 = self.dec2(cat2)
159 | cat3 = torch.cat([self.convt3(dec2), out2], 1)
160 | dec3 = self.dec3(cat3)
161 | cat4 = torch.cat([self.convt4(dec3), out1], 1)
162 | seg = self.dec4(cat4)
163 | seg = seg.view((seg.size(0), seg.size(2), seg.size(3)))
164 | seg = self.sigmoid(seg)
165 |
166 | bbox = self.bbox(cat2)
167 | # dec2 = self.output(dec2)
168 | # print dec2.size()
169 | size = bbox.size()
170 | bbox = bbox.view((size[0], size[1], -1)).transpose(1,2).contiguous()
171 | bbox = bbox.view((size[0], size[2],size[3],-1, 4))
172 |
173 | return out, bbox, seg
174 |
175 | # resnet = ResNet(ResidualBlock, [2, 2, 2, 2])
176 |
--------------------------------------------------------------------------------
/code/ocr/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/code/ocr/tools/__init__.py
--------------------------------------------------------------------------------
/code/ocr/tools/measures.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | import os
3 | import numpy as np
4 | from sklearn import metrics
5 | from PIL import Image
6 | import traceback
7 |
8 | def stati_class_number_true_flase(label, pred):
9 | label = np.array(label)
10 | pred = np.array(pred)
11 |
12 | cls_list = set(label) | set(pred)
13 | d = dict()
14 | for cls in cls_list:
15 | d[cls] = dict()
16 | d[cls]['number'] = np.sum(label==cls)
17 | d[cls]['true'] = np.sum(label[label==cls]==pred[label==cls])
18 | d[cls]['pred'] = np.sum(pred==cls)
19 | return d
20 |
21 | def stati_class_number_true_flase_multi_label_margin(labels, preds):
22 |
23 | d = dict()
24 | for label, pred in zip(labels, preds):
25 | label = set(label[label>=0])
26 | for cls in range(len(pred)):
27 | if cls not in d:
28 | d[cls] = dict()
29 | d[cls]['number'] = 0
30 | d[cls]['true'] = 0
31 | d[cls]['pred'] = 0
32 | if cls in label:
33 | d[cls]['number'] += 1
34 | if pred[cls] > 0.5:
35 | d[cls]['true'] += 1
36 | if pred[cls] > 0.5:
37 | d[cls]['pred'] += 1
38 | return d
39 |
40 | def stati_class_number_true_flase_bce(labels, preds):
41 | d = dict()
42 | labels = labels.astype(np.int64).reshape(-1)
43 | preds = preds.reshape(-1) > 0
44 | index = labels >= 0
45 | labels = labels[index]
46 | preds = preds[index]
47 |
48 | preds_num = preds.sum(0)
49 | true_num = (labels+preds==2).sum(0)
50 | for cls in range(2):
51 | d[cls] = dict()
52 | d[cls]['number'] = (labels==cls).sum()
53 | d[cls]['true'] = (labels+preds==2*cls).sum()
54 | d[cls]['pred'] = (labels==cls).sum()
55 | return d
56 |
57 | def measures(d_list):
58 | # 合并每一个预测的结果
59 | d_all = dict()
60 | for d in d_list:
61 | for cls in d.keys():
62 | if cls not in d_all:
63 | d_all[cls] = dict()
64 | for k in d[cls].keys():
65 | if k not in d_all[cls]:
66 | d_all[cls][k] = 0
67 | d_all[cls][k] += d[cls][k]
68 | m = dict()
69 | number = sum([d_all[cls]['number'] for cls in d_all.keys()])
70 | for cls in d_all:
71 | m[cls] = dict()
72 | m[cls]['number'] = d_all[cls]['number']
73 | m[cls]['true'] = d_all[cls]['true']
74 | m[cls]['pred'] = d_all[cls]['pred']
75 | m[cls]['ratio'] = d_all[cls]['number'] / (float(number) + 10e-10)
76 | m[cls]['accuracy'] = d_all[cls]['true'] / (float(d_all[cls]['number']) + 10e-10)
77 | m[cls]['precision'] = d_all[cls]['true'] /(float(d_all[cls]['pred']) + 10e-10)
78 | return m
79 |
80 | def print_measures(m, s = 'measures'):
81 | print s
82 | accuracy = 0
83 | for cls in sorted(m.keys()):
84 | print '\tclass: {:d}\taccuracy:{:.6f}\tprecision:{:.6f}\tratio:{:.6f}\t\tN/T/P:{:d}/{:d}/{:d}\
85 | '.format(cls, m[cls]['accuracy'],m[cls]['precision'],m[cls]['ratio'],m[cls]['number'],m[cls]['true'],m[cls]['pred'])
86 | accuracy += m[cls]['accuracy'] * m[cls]['ratio']
87 | print '\tacc:{:.6f}'.format(accuracy)
88 | return accuracy
89 |
90 | def mse(pred_image, image):
91 | pred_image = pred_image.reshape(-1).astype(np.float32)
92 | image = image.reshape(-1).astype(np.float32)
93 | mse_err = metrics.mean_squared_error(pred_image,image)
94 | return mse_err
95 |
96 | def psnr(pred_image, image):
97 | return 10 * np.log10(255*255/mse(pred_image,image))
98 |
99 |
100 | def psnr_pred(stain_vis=20, end= 10000):
101 | clean_dir = '../../data/AI/testB/'
102 | psnr_list = []
103 | f = open('../../data/result.csv','w')
104 | for i,clean in enumerate(os.listdir(clean_dir)):
105 | clean = os.path.join(clean_dir, clean)
106 | clean_file = clean
107 | pred = clean.replace('.jpg','.png').replace('data','data/test_clean')
108 | stain = clean.replace('trainB','trainA').replace('testB','testA').replace('.jpg','_.jpg')
109 |
110 | try:
111 | pred = np.array(Image.open(pred).resize((250,250))).astype(np.float32)
112 | clean = np.array(Image.open(clean).resize((250,250))).astype(np.float32)
113 | stain = np.array(Image.open(stain).resize((250,250))).astype(np.float32)
114 |
115 | # diff = np.abs(stain - pred)
116 | # vis = 20
117 | # pred[diffgray_vis] = stain[stain>gray_vis]
121 |
122 | if end < 1000:
123 | diff = np.abs(clean - stain)
124 | # stain[diff>stain_vis] = pred[diff>stain_vis]
125 | stain[diff>stain_vis] = clean[diff>stain_vis]
126 |
127 | psnr_pred = psnr(clean, pred)
128 | psnr_stain = psnr(clean, stain)
129 | psnr_list.append([psnr_stain, psnr_pred])
130 | except:
131 | continue
132 | if i>end:
133 | break
134 | print i, min(end, 1000)
135 |
136 | f.write(clean_file.split('/')[-1].split('.')[0])
137 | f.write(',')
138 | f.write(str(psnr_stain))
139 | f.write(',')
140 | f.write(str(psnr_pred))
141 | f.write(',')
142 | f.write(str(psnr_pred/psnr_stain - 1))
143 | f.write('\n')
144 | # print '预测',np.mean(psnr_list)
145 | psnr_list = np.array(psnr_list)
146 | psnr_mean = ((psnr_list[:,1] - psnr_list[:,0]) / psnr_list[:,0]).mean()
147 | if end > 1000:
148 | print '网纹图PSNR', psnr_list[:,0].mean()
149 | print '预测图PSNR', psnr_list[:,1].mean()
150 | print '增益率', psnr_mean
151 | f.write(str(psnr_mean))
152 | f.close()
153 | return psnr_list[:,0].mean()
154 |
155 | def main():
156 | pmax = [0.,0.]
157 | for vis in range(1, 30):
158 | p = psnr_pred(vis, 10)
159 | print vis, p
160 | if p > pmax[1]:
161 | pmax = [vis, p]
162 | print '...'
163 | # print 256,psnr_pred(256)
164 | print pmax
165 | # print 10 * np.log10(255*255/metrics.mean_squared_error([3],[9]))
166 |
167 |
168 | if __name__ == '__main__':
169 | psnr_pred(4000)
170 | # main()
171 | # for v in range(1,10):
172 | # print v, 10 * np.log10(255*255/v/v)
173 |
--------------------------------------------------------------------------------
/code/ocr/tools/parse.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 |
3 | import argparse
4 |
5 | parser = argparse.ArgumentParser(description='medical caption GAN')
6 |
7 | parser.add_argument(
8 | '--model',
9 | '-m',
10 | type=str,
11 | default='densenet',
12 | help='model'
13 | )
14 | parser.add_argument(
15 | '--data-dir',
16 | '-d',
17 | type=str,
18 | default='../../data/dataset/',
19 | help='data directory'
20 | )
21 | parser.add_argument(
22 | '--bg-dir',
23 | type=str,
24 | default='../../data/images',
25 | help='back groud images directory'
26 | )
27 | parser.add_argument(
28 | '--hard-mining',
29 | type=int,
30 | default=0,
31 | help='use hard mining'
32 | )
33 | parser.add_argument('--phase',
34 | default='train',
35 | type=str,
36 | metavar='S',
37 | help='pretrain/train/test phase')
38 | parser.add_argument(
39 | '--batch-size',
40 | '-b',
41 | metavar='BATCH SIZE',
42 | type=int,
43 | default=16,
44 | help='batch size'
45 | )
46 | parser.add_argument('--save-dir',
47 | default='../../data',
48 | type=str,
49 | metavar='S',
50 | help='save dir')
51 | parser.add_argument('--word-index-json',
52 | default='../../files/alphabet_index_dict.json',
53 | type=str,
54 | metavar='S',
55 | help='save dir')
56 | parser.add_argument('--black-json',
57 | default='../../files/black.json',
58 | type=str,
59 | metavar='S',
60 | help='black_list json')
61 | parser.add_argument('--image-hw-ratio-json',
62 | default='../../files/image_hw_ratio_dict.json',
63 | type=str,
64 | metavar='S',
65 | help='image h:w ratio dict')
66 | parser.add_argument('--word-count-json',
67 | default='../../files/alphabet_count_dict.json',
68 | type=str,
69 | metavar='S',
70 | help='word count file')
71 | parser.add_argument('--image-label-json',
72 | default='../../files/train_alphabet.json',
73 | type=str,
74 | metavar='S',
75 | help='image label json')
76 | parser.add_argument('--resume',
77 | default='',
78 | type=str,
79 | metavar='S',
80 | help='start from checkpoints')
81 | parser.add_argument('--no-aug',
82 | default=0,
83 | type=int,
84 | metavar='S',
85 | help='no augmentation')
86 | parser.add_argument('--small',
87 | default=1,
88 | type=int,
89 | metavar='S',
90 | help='small fonts')
91 | parser.add_argument('--difficult',
92 | default=0,
93 | type=int,
94 | metavar='S',
95 | help='只计算比较难的图片')
96 | parser.add_argument('--hist',
97 | default=0,
98 | type=int,
99 | metavar='S',
100 | help='采用直方图均衡化')
101 | parser.add_argument('--feat',
102 | default=0,
103 | type=int,
104 | metavar='S',
105 | help='生成LSTM的feature')
106 |
107 | #####
108 | parser.add_argument('-j',
109 | '--workers',
110 | default=8,
111 | type=int,
112 | metavar='N',
113 | help='number of data loading workers (default: 32)')
114 | parser.add_argument('--lr',
115 | '--learning-rate',
116 | default=0.001,
117 | type=float,
118 | metavar='LR',
119 | help='initial learning rate')
120 | parser.add_argument('--epochs',
121 | default=10000,
122 | type=int,
123 | metavar='N',
124 | help='number of total epochs to run')
125 | parser.add_argument('--save-freq',
126 | default='5',
127 | type=int,
128 | metavar='S',
129 | help='save frequency')
130 | parser.add_argument('--save-pred-freq',
131 | default='10',
132 | type=int,
133 | metavar='S',
134 | help='save pred clean frequency')
135 | parser.add_argument('--val-freq',
136 | default='5',
137 | type=int,
138 | metavar='S',
139 | help='val frequency')
140 | parser.add_argument('--debug',
141 | default=0,
142 | type=int,
143 | metavar='S',
144 | help='debug')
145 | parser.add_argument('--input-filter',
146 | default=7,
147 | type=int,
148 | metavar='S',
149 | help='val frequency')
150 | parser.add_argument('--use-gan',
151 | default=0,
152 | type=int,
153 | metavar='S',
154 | help='use GAN')
155 | parser.add_argument('--write-pred',
156 | default=0,
157 | type=int,
158 | metavar='S',
159 | help='writ predictions')
160 | parser.add_argument(
161 | '--result-file',
162 | '-r',
163 | type=str,
164 | default='../../data/result/test_result.csv',
165 | help='result file'
166 | )
167 | parser.add_argument(
168 | '--output-file',
169 | '-o',
170 | type=str,
171 | default='../../data/result/test.csv',
172 | help='output file'
173 | )
174 | args = parser.parse_args()
175 |
--------------------------------------------------------------------------------
/code/ocr/tools/plot.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 |
5 | def plot_multi_graph(image_list, name_list, save_path=None, show=False):
6 | graph_place = int(np.sqrt(len(name_list) - 1)) + 1
7 | for i, (image, name) in enumerate(zip(image_list, name_list)):
8 | ax1 = plt.subplot(graph_place,graph_place,i+1)
9 | ax1.set_title(name)
10 | # plt.imshow(image,cmap='gray')
11 | plt.imshow(image)
12 | plt.axis('off')
13 | if save_path:
14 | plt.savefig(save_path)
15 | pass
16 | if show:
17 | plt.show()
18 |
19 | def plot_multi_line(x_list, y_list, name_list, save_path=None, show=False):
20 | graph_place = int(np.sqrt(len(name_list) - 1)) + 1
21 | for i, (x, y, name) in enumerate(zip(x_list, y_list, name_list)):
22 | ax1 = plt.subplot(graph_place,graph_place,i+1)
23 | ax1.set_title(name)
24 | plt.plot(x,y)
25 | # plt.imshow(image,cmap='gray')
26 | if save_path:
27 | plt.savefig(save_path)
28 | if show:
29 | plt.show()
30 |
31 |
32 |
--------------------------------------------------------------------------------
/code/ocr/tools/py_op.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | 此文件用于常用python函数的使用
4 | """
5 | import os
6 | import json
7 | import traceback
8 | from collections import OrderedDict
9 | import random
10 | from fuzzywuzzy import fuzz
11 |
12 | import sys
13 | reload(sys)
14 | sys.setdefaultencoding('utf-8')
15 |
16 | ################################################################################
17 | ### pre define variables
18 | #:: enumerate
19 | #:: raw_input
20 | #:: listdir
21 | #:: sorted
22 | ### pre define function
23 | def mywritejson(save_path,content):
24 | content = json.dumps(content,indent=4,ensure_ascii=False)
25 | with open(save_path,'w') as f:
26 | f.write(content)
27 |
28 | def myreadjson(load_path):
29 | with open(load_path,'r') as f:
30 | return json.loads(f.read())
31 |
32 | def mywritefile(save_path,content):
33 | with open(save_path,'w') as f:
34 | f.write(content)
35 |
36 | def myreadfile(load_path):
37 | with open(load_path,'r') as f:
38 | return f.read()
39 |
40 | def myprint(content):
41 | print json.dumps(content,indent=4,ensure_ascii=False)
42 |
43 | def rm(fi):
44 | os.system('rm ' + fi)
45 |
46 | def mystrip(s):
47 | return ''.join(s.split())
48 |
49 | def mysorteddict(d,key = lambda s:s, reverse=False):
50 | dordered = OrderedDict()
51 | for k in sorted(d.keys(),key = key,reverse=reverse):
52 | dordered[k] = d[k]
53 | return dordered
54 |
55 | def mysorteddictfile(src,obj):
56 | mywritejson(obj,mysorteddict(myreadjson(src)))
57 |
58 | def myfuzzymatch(srcs,objs,grade=80):
59 | matchDict = OrderedDict()
60 | for src in srcs:
61 | for obj in objs:
62 | value = fuzz.partial_ratio(src,obj)
63 | if value > grade:
64 | try:
65 | matchDict[src].append(obj)
66 | except:
67 | matchDict[src] = [obj]
68 | return matchDict
69 |
70 | def mydumps(x):
71 | return json.dumps(content,indent=4,ensure_ascii=False)
72 |
73 | def get_random_list(l,num=-1,isunique=0):
74 | if isunique:
75 | l = set(l)
76 | if num < 0:
77 | num = len(l)
78 | if isunique and num > len(l):
79 | return
80 | lnew = []
81 | l = list(l)
82 | while(num>len(lnew)):
83 | x = l[int(random.random()*len(l))]
84 | if isunique and x in lnew:
85 | continue
86 | lnew.append(x)
87 | return lnew
88 |
89 | def fuzz_list(node1_list,node2_list,score_baseline=66,proposal_num=10,string_map=None):
90 | node_dict = { }
91 | for i,node1 in enumerate(node1_list):
92 | match_score_dict = { }
93 | for node2 in node2_list:
94 | if node1 != node2:
95 | if string_map is not None:
96 | n1 = string_map(node1)
97 | n2 = string_map(node2)
98 | score = fuzz.partial_ratio(n1,n2)
99 | if n1 == n2:
100 | node2_list.remove(node2)
101 | else:
102 | score = fuzz.partial_ratio(node1,node2)
103 | if score > score_baseline:
104 | match_score_dict[node2] = score
105 | else:
106 | node2_list.remove(node2)
107 | node2_sort = sorted(match_score_dict.keys(), key=lambda k:match_score_dict[k],reverse=True)
108 | node_dict[node1] = [[n,match_score_dict[n]] for n in node2_sort[:proposal_num]]
109 | print i,len(node1_list)
110 | return node_dict, node2_list
111 |
112 | def swap(a,b):
113 | return b, a
114 |
115 | def mkdir(d):
116 | path = d.split('/')
117 | for i in range(len(path)):
118 | d = '/'.join(path[:i+1])
119 | if not os.path.exists(d):
120 | os.mkdir(d)
121 |
122 |
--------------------------------------------------------------------------------
/code/ocr/tools/segmentation.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | import matplotlib.pyplot as plt
3 | from scipy import ndimage as ndi
4 | from skimage import morphology,color,data
5 | from skimage import filters
6 | import numpy as np
7 | import skimage
8 | import os
9 | from skimage import measure
10 |
11 |
12 |
13 | def watershed(image, label=None):
14 | denoised = filters.rank.median(image, morphology.disk(2)) #过滤噪声
15 | #将梯度值低于10的作为开始标记点
16 | markers = filters.rank.gradient(denoised, morphology.disk(5)) < 10
17 | markers = ndi.label(markers)[0]
18 |
19 | gradient = filters.rank.gradient(denoised, morphology.disk(2)) #计算梯度
20 | labels =morphology.watershed(gradient, markers, mask=image) #基于梯度的分水岭算法
21 |
22 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(6, 6))
23 | axes = axes.ravel()
24 | ax0, ax1, ax2, ax3 = axes
25 |
26 | ax0.imshow(image, cmap=plt.cm.gray, interpolation='nearest')
27 | ax0.set_title("Original")
28 | # ax1.imshow(gradient, cmap=plt.cm.spectral, interpolation='nearest')
29 | ax1.imshow(gradient, cmap=plt.cm.gray, interpolation='nearest')
30 | ax1.set_title("Gradient")
31 | if label is not None:
32 | # ax2.imshow(markers, cmap=plt.cm.spectral, interpolation='nearest')
33 | ax2.imshow(label, cmap=plt.cm.gray, interpolation='nearest')
34 | else:
35 | ax2.imshow(markers, cmap=plt.cm.spectral, interpolation='nearest')
36 | ax2.set_title("Markers")
37 | ax3.imshow(labels, cmap=plt.cm.spectral, interpolation='nearest')
38 | ax3.set_title("Segmented")
39 |
40 | for ax in axes:
41 | ax.axis('off')
42 |
43 | fig.tight_layout()
44 | plt.show()
45 |
46 | def plot_4(image, gradient,label,segmentation, save_path=None):
47 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(6, 6))
48 | axes = axes.ravel()
49 | ax0, ax1, ax2, ax3 = axes
50 | ax0.imshow(image, cmap=plt.cm.gray, interpolation='nearest')
51 | ax0.set_title("Original")
52 | ax1.imshow(gradient, cmap=plt.cm.gray, interpolation='nearest')
53 | ax1.set_title("Gradient")
54 | ax2.imshow(label, cmap=plt.cm.gray, interpolation='nearest')
55 | ax2.set_title("label")
56 | ax3.imshow(segmentation, cmap=plt.cm.spectral, interpolation='nearest')
57 | ax3.set_title("Segmented")
58 |
59 | for ax in axes:
60 | ax.axis('off')
61 |
62 | fig.tight_layout()
63 | if save_path:
64 | print save_path
65 | plt.savefig(save_path)
66 | else:
67 | plt.show()
68 |
69 | def fill(image):
70 | '''
71 | 填充图片内部空白
72 | 临时写的函数
73 | 建议后期替换
74 | '''
75 | label_img = measure.label(image, background=1)
76 | props = measure.regionprops(label_img)
77 | max_area = np.array([p.area for p in props]).max()
78 | for i,prop in enumerate(props):
79 | if prop.area < max_area:
80 | image[prop.coords[:,0],prop.coords[:,1]] = 1
81 | return image
82 |
83 |
84 |
85 | def my_watershed(image, label=None, min_gray=480, max_gray=708, min_gradient=5, show=False, save_path='/tmp/x.jpg'):
86 | image = image - min_gray
87 | image[image>max_gray] = 0
88 | image[image< 10] = 0
89 | image = image * 5
90 |
91 | denoised = filters.rank.median(image, morphology.disk(2)) #过滤噪声
92 | #将梯度值低于10的作为开始标记点
93 | markers = filters.rank.gradient(denoised, morphology.disk(5)) < 10
94 | markers = ndi.label(markers)[0]
95 |
96 | gradient = filters.rank.gradient(denoised, morphology.disk(2)) #计算梯度
97 | labels = gradient > min_gradient
98 |
99 | mask = gradient > min_gradient
100 | label_img = measure.label(mask, background=0)
101 | props = measure.regionprops(label_img)
102 | pred = np.zeros_like(gradient)
103 | for i,prop in enumerate(props):
104 | if prop.area > 50:
105 | region = np.array(prop.coords)
106 | vx,vy = region.var(0)
107 | v = vx + vy
108 | if v < 200:
109 | pred[prop.coords[:,0],prop.coords[:,1]] = 1
110 |
111 | # 填充边缘内部空白
112 | pred = fill(pred)
113 |
114 | if show:
115 | plot_4(image, gradient, label, pred)
116 | else:
117 | plot_4(image, gradient, label, pred, save_path)
118 |
119 | return pred
120 |
121 | def segmentation(image_npy, label_npy,save_path):
122 | print image_npy
123 | image = np.load(image_npy)
124 | label = np.load(label_npy)
125 | if np.sum(label) == 0:
126 | return
127 | min_gray,max_gray = 480, 708
128 | my_watershed(image,label,min_gray, max_gray,show=False, save_path=save_path)
129 |
130 | def main():
131 | data_dir = '/home/yin/all/PVL_DATA/preprocessed/2D/'
132 | save_dir = '/home/yin/all/PVL_DATA/tool_result/'
133 | os.system('rm -r ' + save_dir)
134 | os.system('mkdir ' + save_dir)
135 | for patient in os.listdir(data_dir):
136 | patient_dir = os.path.join(data_dir, patient)
137 | for f in os.listdir(patient_dir):
138 | if 'roi.npy' in f:
139 | label_npy = os.path.join(patient_dir,f)
140 | image_npy = label_npy.replace('.roi.npy','.npy')
141 | segmentation(image_npy,label_npy, os.path.join(save_dir,label_npy.strip('/').replace('/','.').replace('npy','jpg')))
142 |
143 | if __name__ == '__main__':
144 | # image =color.rgb2gray(data.camera())
145 | # watershed(image)
146 | main()
147 | image_npy = '/home/yin/all/PVL_DATA/preprocessed/2D/JD_chen_xi/23.npy'
148 | image_npy = '/home/yin/all/PVL_DATA/preprocessed/2D/JD_chen_xi/14.npy'
149 | image_npy = '/home/yin/all/PVL_DATA/preprocessed/2D/JD_zhang_yu_chen/23.npy'
150 | label_npy = image_npy.replace('.npy','.roi.npy')
151 | segmentation(image_npy,label_npy)
152 |
153 |
154 |
--------------------------------------------------------------------------------
/code/ocr/tools/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright (c) 2017 www.drcubic.com, Inc. All Rights Reserved
5 | #
6 | """
7 | File: utils.py
8 | Author: shileicao(shileicao@stu.xjtu.edu.cn)
9 | Date: 2017-06-20 14:56:54
10 |
11 | **Note.** This code absorb some code from following source.
12 | 1. [DSB2017](https://github.com/lfz/DSB2017)
13 | """
14 |
15 | import os
16 | import sys
17 |
18 | import numpy as np
19 | import torch
20 |
21 |
22 | def getFreeId():
23 | import pynvml
24 |
25 | pynvml.nvmlInit()
26 |
27 | def getFreeRatio(id):
28 | handle = pynvml.nvmlDeviceGetHandleByIndex(id)
29 | use = pynvml.nvmlDeviceGetUtilizationRates(handle)
30 | ratio = 0.5 * (float(use.gpu + float(use.memory)))
31 | return ratio
32 |
33 | deviceCount = pynvml.nvmlDeviceGetCount()
34 | available = []
35 | for i in range(deviceCount):
36 | if getFreeRatio(i) < 70:
37 | available.append(i)
38 | gpus = ''
39 | for g in available:
40 | gpus = gpus + str(g) + ','
41 | gpus = gpus[:-1]
42 | return gpus
43 |
44 |
45 | def setgpu(gpuinput):
46 | freeids = getFreeId()
47 | if gpuinput == 'all':
48 | gpus = freeids
49 | else:
50 | gpus = gpuinput
51 | busy_gpu = [g not in freeids for g in gpus.split(',')]
52 | if any(busy_gpu):
53 | raise ValueError('gpu' + ' '.join(busy_gpu) + 'is being used')
54 | print('using gpu ' + gpus)
55 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus
56 | return len(gpus.split(','))
57 |
58 |
59 | def error_mask_stats(labels, filenames):
60 | error_f = []
61 | for i, f in enumerate(filenames):
62 | # if not np.all(labels[i] > 0):
63 | # error_f.append(f)
64 | for bbox_i in range(labels[i].shape[0]):
65 | imgs = np.load(f)
66 | if not np.all(
67 | np.array(imgs.shape[1:]) - labels[i][bbox_i][:-1] > 0):
68 | error_f.append(f)
69 | error_f = list(set(error_f))
70 | fileid_list = [os.path.split(filename)[1].split('_')[0]
71 | for filename in error_f]
72 | print("','".join(fileid_list))
73 | return error_f
74 |
75 |
76 | class Logger(object):
77 | def __init__(self, logfile):
78 | self.terminal = sys.stdout
79 | self.log = open(logfile, "a")
80 |
81 | def write(self, message):
82 | self.terminal.write(message)
83 | self.log.write(message)
84 |
85 | def flush(self):
86 | #this flush method is needed for python 3 compatibility.
87 | #this handles the flush command by doing nothing.
88 | #you might want to specify some extra behavior here.
89 | pass
90 |
91 |
92 | def split4(data, max_stride, margin):
93 | splits = []
94 | data = torch.Tensor.numpy(data)
95 | _, c, z, h, w = data.shape
96 |
97 | w_width = np.ceil(float(w / 2 + margin) /
98 | max_stride).astype('int') * max_stride
99 | h_width = np.ceil(float(h / 2 + margin) /
100 | max_stride).astype('int') * max_stride
101 | pad = int(np.ceil(float(z) / max_stride) * max_stride) - z
102 | leftpad = pad / 2
103 | pad = [[0, 0], [0, 0], [leftpad, pad - leftpad], [0, 0], [0, 0]]
104 | data = np.pad(data, pad, 'constant', constant_values=-1)
105 | data = torch.from_numpy(data)
106 | splits.append(data[:, :, :, :h_width, :w_width])
107 | splits.append(data[:, :, :, :h_width, -w_width:])
108 | splits.append(data[:, :, :, -h_width:, :w_width])
109 | splits.append(data[:, :, :, -h_width:, -w_width:])
110 |
111 | return torch.cat(splits, 0)
112 |
113 |
114 | def combine4(output, h, w):
115 | splits = []
116 | for i in range(len(output)):
117 | splits.append(output[i])
118 |
119 | output = np.zeros(
120 | (splits[0].shape[0], h, w, splits[0].shape[3],
121 | splits[0].shape[4]), np.float32)
122 |
123 | h0 = output.shape[1] / 2
124 | h1 = output.shape[1] - h0
125 | w0 = output.shape[2] / 2
126 | w1 = output.shape[2] - w0
127 |
128 | splits[0] = splits[0][:, :h0, :w0, :, :]
129 | output[:, :h0, :w0, :, :] = splits[0]
130 |
131 | splits[1] = splits[1][:, :h0, -w1:, :, :]
132 | output[:, :h0, -w1:, :, :] = splits[1]
133 |
134 | splits[2] = splits[2][:, -h1:, :w0, :, :]
135 | output[:, -h1:, :w0, :, :] = splits[2]
136 |
137 | splits[3] = splits[3][:, -h1:, -w1:, :, :]
138 | output[:, -h1:, -w1:, :, :] = splits[3]
139 |
140 | return output
141 |
142 |
143 | def split8(data, max_stride, margin):
144 | splits = []
145 | if isinstance(data, np.ndarray):
146 | c, z, h, w = data.shape
147 | else:
148 | _, c, z, h, w = data.size()
149 |
150 | z_width = np.ceil(float(z / 2 + margin) /
151 | max_stride).astype('int') * max_stride
152 | w_width = np.ceil(float(w / 2 + margin) /
153 | max_stride).astype('int') * max_stride
154 | h_width = np.ceil(float(h / 2 + margin) /
155 | max_stride).astype('int') * max_stride
156 | for zz in [[0, z_width], [-z_width, None]]:
157 | for hh in [[0, h_width], [-h_width, None]]:
158 | for ww in [[0, w_width], [-w_width, None]]:
159 | if isinstance(data, np.ndarray):
160 | splits.append(data[np.newaxis, :, zz[0]:zz[1], hh[0]:hh[1],
161 | ww[0]:ww[1]])
162 | else:
163 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:
164 | ww[1]])
165 |
166 | if isinstance(data, np.ndarray):
167 | return np.concatenate(splits, 0)
168 | else:
169 | return torch.cat(splits, 0)
170 |
171 |
172 | def combine8(output, z, h, w):
173 | splits = []
174 | for i in range(len(output)):
175 | splits.append(output[i])
176 |
177 | output = np.zeros(
178 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32)
179 |
180 | z_width = z / 2
181 | h_width = h / 2
182 | w_width = w / 2
183 | i = 0
184 | for zz in [[0, z_width], [z_width - z, None]]:
185 | for hh in [[0, h_width], [h_width - h, None]]:
186 | for ww in [[0, w_width], [w_width - w, None]]:
187 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[
188 | i][zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :]
189 | i = i + 1
190 |
191 | return output
192 |
193 |
194 | def split16(data, max_stride, margin):
195 | splits = []
196 | _, c, z, h, w = data.size()
197 |
198 | z_width = np.ceil(float(z / 4 + margin) /
199 | max_stride).astype('int') * max_stride
200 | z_pos = [z * 3 / 8 - z_width / 2, z * 5 / 8 - z_width / 2]
201 | h_width = np.ceil(float(h / 2 + margin) /
202 | max_stride).astype('int') * max_stride
203 | w_width = np.ceil(float(w / 2 + margin) /
204 | max_stride).astype('int') * max_stride
205 | for zz in [[0, z_width], [z_pos[0], z_pos[0] + z_width],
206 | [z_pos[1], z_pos[1] + z_width], [-z_width, None]]:
207 | for hh in [[0, h_width], [-h_width, None]]:
208 | for ww in [[0, w_width], [-w_width, None]]:
209 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[
210 | 1]])
211 |
212 | return torch.cat(splits, 0)
213 |
214 |
215 | def combine16(output, z, h, w):
216 | splits = []
217 | for i in range(len(output)):
218 | splits.append(output[i])
219 |
220 | output = np.zeros(
221 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32)
222 |
223 | z_width = z / 4
224 | h_width = h / 2
225 | w_width = w / 2
226 | splitzstart = splits[0].shape[0] / 2 - z_width / 2
227 | z_pos = [z * 3 / 8 - z_width / 2, z * 5 / 8 - z_width / 2]
228 | i = 0
229 | for zz, zz2 in zip(
230 | [[0, z_width], [z_width, z_width * 2], [z_width * 2, z_width * 3],
231 | [z_width * 3 - z, None]],
232 | [[0, z_width], [splitzstart, z_width + splitzstart],
233 | [splitzstart, z_width + splitzstart], [z_width * 3 - z, None]]):
234 | for hh in [[0, h_width], [h_width - h, None]]:
235 | for ww in [[0, w_width], [w_width - w, None]]:
236 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[
237 | i][zz2[0]:zz2[1], hh[0]:hh[1], ww[0]:ww[1], :, :]
238 | i = i + 1
239 |
240 | return output
241 |
242 |
243 | def split32(data, max_stride, margin):
244 | splits = []
245 | _, c, z, h, w = data.size()
246 |
247 | z_width = np.ceil(float(z / 2 + margin) /
248 | max_stride).astype('int') * max_stride
249 | w_width = np.ceil(float(w / 4 + margin) /
250 | max_stride).astype('int') * max_stride
251 | h_width = np.ceil(float(h / 4 + margin) /
252 | max_stride).astype('int') * max_stride
253 |
254 | w_pos = [w * 3 / 8 - w_width / 2, w * 5 / 8 - w_width / 2]
255 | h_pos = [h * 3 / 8 - h_width / 2, h * 5 / 8 - h_width / 2]
256 |
257 | for zz in [[0, z_width], [-z_width, None]]:
258 | for hh in [[0, h_width], [h_pos[0], h_pos[0] + h_width],
259 | [h_pos[1], h_pos[1] + h_width], [-h_width, None]]:
260 | for ww in [[0, w_width], [w_pos[0], w_pos[0] + w_width],
261 | [w_pos[1], w_pos[1] + w_width], [-w_width, None]]:
262 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[
263 | 1]])
264 |
265 | return torch.cat(splits, 0)
266 |
267 |
268 | def combine32(splits, z, h, w):
269 |
270 | output = np.zeros(
271 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32)
272 |
273 | z_width = int(np.ceil(float(z) / 2))
274 | h_width = int(np.ceil(float(h) / 4))
275 | w_width = int(np.ceil(float(w) / 4))
276 | splithstart = splits[0].shape[1] / 2 - h_width / 2
277 | splitwstart = splits[0].shape[2] / 2 - w_width / 2
278 |
279 | i = 0
280 | for zz in [[0, z_width], [z_width - z, None]]:
281 |
282 | for hh, hh2 in zip(
283 | [[0, h_width], [h_width, h_width * 2], [h_width * 2, h_width * 3],
284 | [h_width * 3 - h, None]],
285 | [[0, h_width], [splithstart, h_width + splithstart],
286 | [splithstart, h_width + splithstart], [h_width * 3 - h, None]]):
287 |
288 | for ww, ww2 in zip(
289 | [[0, w_width], [w_width, w_width * 2],
290 | [w_width * 2, w_width * 3], [w_width * 3 - w, None]],
291 | [[0, w_width], [splitwstart, w_width + splitwstart],
292 | [splitwstart, w_width + splitwstart],
293 | [w_width * 3 - w, None]]):
294 |
295 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[
296 | i][zz[0]:zz[1], hh2[0]:hh2[1], ww2[0]:ww2[1], :, :]
297 | i = i + 1
298 |
299 | return output
300 |
301 |
302 | def split64(data, max_stride, margin):
303 | splits = []
304 | _, c, z, h, w = data.size()
305 |
306 | z_width = np.ceil(float(z / 4 + margin) /
307 | max_stride).astype('int') * max_stride
308 | w_width = np.ceil(float(w / 4 + margin) /
309 | max_stride).astype('int') * max_stride
310 | h_width = np.ceil(float(h / 4 + margin) /
311 | max_stride).astype('int') * max_stride
312 |
313 | z_pos = [z * 3 / 8 - z_width / 2, z * 5 / 8 - z_width / 2]
314 | w_pos = [w * 3 / 8 - w_width / 2, w * 5 / 8 - w_width / 2]
315 | h_pos = [h * 3 / 8 - h_width / 2, h * 5 / 8 - h_width / 2]
316 |
317 | for zz in [[0, z_width], [z_pos[0], z_pos[0] + z_width],
318 | [z_pos[1], z_pos[1] + z_width], [-z_width, None]]:
319 | for hh in [[0, h_width], [h_pos[0], h_pos[0] + h_width],
320 | [h_pos[1], h_pos[1] + h_width], [-h_width, None]]:
321 | for ww in [[0, w_width], [w_pos[0], w_pos[0] + w_width],
322 | [w_pos[1], w_pos[1] + w_width], [-w_width, None]]:
323 | splits.append(data[:, :, zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[
324 | 1]])
325 |
326 | return torch.cat(splits, 0)
327 |
328 |
329 | def combine64(output, z, h, w):
330 | splits = []
331 | for i in range(len(output)):
332 | splits.append(output[i])
333 |
334 | output = np.zeros(
335 | (z, h, w, splits[0].shape[3], splits[0].shape[4]), np.float32)
336 |
337 | z_width = int(np.ceil(float(z) / 4))
338 | h_width = int(np.ceil(float(h) / 4))
339 | w_width = int(np.ceil(float(w) / 4))
340 | splitzstart = splits[0].shape[0] / 2 - z_width / 2
341 | splithstart = splits[0].shape[1] / 2 - h_width / 2
342 | splitwstart = splits[0].shape[2] / 2 - w_width / 2
343 |
344 | i = 0
345 | for zz, zz2 in zip(
346 | [[0, z_width], [z_width, z_width * 2], [z_width * 2, z_width * 3],
347 | [z_width * 3 - z, None]],
348 | [[0, z_width], [splitzstart, z_width + splitzstart],
349 | [splitzstart, z_width + splitzstart], [z_width * 3 - z, None]]):
350 |
351 | for hh, hh2 in zip(
352 | [[0, h_width], [h_width, h_width * 2], [h_width * 2, h_width * 3],
353 | [h_width * 3 - h, None]],
354 | [[0, h_width], [splithstart, h_width + splithstart],
355 | [splithstart, h_width + splithstart], [h_width * 3 - h, None]]):
356 |
357 | for ww, ww2 in zip(
358 | [[0, w_width], [w_width, w_width * 2],
359 | [w_width * 2, w_width * 3], [w_width * 3 - w, None]],
360 | [[0, w_width], [splitwstart, w_width + splitwstart],
361 | [splitwstart, w_width + splitwstart],
362 | [w_width * 3 - w, None]]):
363 |
364 | output[zz[0]:zz[1], hh[0]:hh[1], ww[0]:ww[1], :, :] = splits[
365 | i][zz2[0]:zz2[1], hh2[0]:hh2[1], ww2[0]:ww2[1], :, :]
366 | i = i + 1
367 |
368 | return output
369 |
--------------------------------------------------------------------------------
/code/preprocessing/analysis_dataset.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | #########################################################################
3 | # File Name: analysis_dataset.py
4 | # Author: ccyin
5 | # mail: ccyin04@gmail.com
6 | # Created Time: Fri 18 May 2018 04:19:58 PM CST
7 | #########################################################################
8 | '''
9 | 此文件用于分析原有数据集信息
10 | stati_image_size: 统计图片大小信息
11 | stati_label_length: 统计文字长度信息
12 | '''
13 |
14 | import os
15 | import json
16 | from PIL import Image
17 | import numpy as np
18 | from tqdm import tqdm
19 | import sys
20 | sys.path.append('../ocr')
21 | from tools import plot
22 |
23 | def stati_image_size(image_dir, save_dir, big_w_dir):
24 | if not os.path.exists(big_w_dir):
25 | os.mkdir(big_w_dir)
26 | if not os.path.exists(save_dir):
27 | os.mkdir(save_dir)
28 | h_count_dict, w_count_dict, r_count_dict = { }, { }, { }
29 | image_hw_ratio_dict = { }
30 | for image in os.listdir(image_dir):
31 | h,w = Image.open(os.path.join(image_dir, image)).size
32 | if w > 80:
33 | cmd = 'cp ../../data/dataset/train/{:s} {:s}'.format(image, big_w_dir)
34 | # os.system(cmd)
35 |
36 | r = int(h / 8. / w)
37 | h = h / 10
38 | w = w / 10
39 | r_count_dict[r] = r_count_dict.get(r, 0) + 1
40 | h_count_dict[h] = h_count_dict.get(h, 0) + 1
41 | w_count_dict[w] = w_count_dict.get(w, 0) + 1
42 | image_hw_ratio_dict[image] = r
43 |
44 | with open(os.path.join(save_dir, 'image_hw_ratio_dict.json'), 'w') as f:
45 | f.write(json.dumps(image_hw_ratio_dict, indent=4))
46 |
47 | x = range(max(h_count_dict.keys())+1)
48 | y = [0 for _ in x]
49 | for h in sorted(h_count_dict.keys()):
50 | print '图片长度:{:d}~{:d},有{:d}张图'.format(10*h, 10*h+10, h_count_dict[h])
51 | y[h] = h_count_dict[h]
52 | plot.plot_multi_line([x], [y], ['Length'], save_path='../../data/length.png', show=True)
53 |
54 | x = range(max(w_count_dict.keys())+1)
55 | y = [0 for _ in x]
56 | for w in sorted(w_count_dict.keys()):
57 | print '图片宽度:{:d}~{:d},有{:d}张图'.format(10*w, 10*w+10, w_count_dict[w])
58 | y[w] = w_count_dict[w]
59 | plot.plot_multi_line([x], [y], ['Width'], save_path='../../data/width.png', show=True)
60 |
61 | x = range(max(r_count_dict.keys())+1)
62 | y = [0 for _ in x]
63 | for r in sorted(r_count_dict.keys()):
64 | print '图片比例:{:d}~{:d},有{:d}张图'.format(8*r, 8*r+8, r_count_dict[r])
65 | y[r] = r_count_dict[r]
66 | x = [8*(_+1) for _ in x]
67 | plot.plot_multi_line([x], [y], ['L/W'], save_path='../../data/ratio.png', show=True)
68 |
69 | print '\n最多的长\n', sorted(h_count_dict.keys(), key=lambda h:h_count_dict[h])[-1] * 10
70 | print '\n最多的宽\n', sorted(w_count_dict.keys(), key=lambda w:w_count_dict[w])[-1] * 10
71 |
72 | print '建议使用 64 * 512 的输入'
73 | print ' 部分使用 64 * 1024 的输入'
74 | print ' 剩下的忽略'
75 | print '建议使用FCN来做,全局取最大值得到最终结果'
76 |
77 | def stati_label_length(label_json, long_text_dir):
78 | if not os.path.exists(long_text_dir):
79 | os.mkdir(long_text_dir)
80 | image_label_json = json.load(open(label_json))
81 | l_count_dict = { }
82 | for image, label in image_label_json.items():
83 | l = len(label.split())
84 | l_count_dict[l] = l_count_dict.get(l, 0) + 1
85 | if l > 25:
86 | cmd = 'cp ../../data/dataset/train/{:s} {:s}'.format(image, long_text_dir)
87 | # os.system(cmd)
88 |
89 | word_num = 0.
90 | x = range(max(l_count_dict.keys())+1)
91 | y = [0 for _ in x]
92 | for l in sorted(l_count_dict.keys()):
93 | word_num += l * l_count_dict[l]
94 | print '文字长度:{:d},有{:d}张图'.format(l, l_count_dict[l])
95 | y[l] = l_count_dict[l]
96 | plot.plot_multi_line([x], [y], ['Word Number'], save_path='../../data/word_num.png', show=True)
97 | print '平均每张图片{:3.4f}个字'.format(word_num / sum(l_count_dict.values()))
98 |
99 | def stati_image_gray(image_dir):
100 | print 'eval train image gray'
101 | for image in tqdm(os.listdir(image_dir)):
102 | image = Image.open(os.path.join(image_dir, image)).convert('RGB')
103 | image = np.array(image)
104 | mi,ma = image.min(), image.max()
105 | assert mi >= 0
106 | assert ma < 256
107 |
108 | print 'eval test image gray'
109 | image_dir = image_dir.replace('train', 'test')
110 | for image in tqdm(os.listdir(image_dir)):
111 | image = Image.open(os.path.join(image_dir, image)).convert('RGB')
112 | image = np.array(image)
113 | mi,ma = image.min(), image.max()
114 | assert mi >= 0
115 | assert ma < 256
116 |
117 |
118 |
119 | def main():
120 | image_dir = '../../data/dataset/train'
121 | save_dir = '../../files/'
122 | big_w_dir = '../../data/big_w_dir'
123 | stati_image_size(image_dir, save_dir, big_w_dir)
124 |
125 | train_label_json = '../../files/train_alphabet.json'
126 | long_text_dir = '../../data/long_text_dir'
127 | stati_label_length(train_label_json, long_text_dir)
128 | # stati_image_gray(image_dir)
129 |
130 | if __name__ == '__main__':
131 | main()
132 |
--------------------------------------------------------------------------------
/code/preprocessing/map_word_to_index.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | #########################################################################
3 | # File Name: map_word_to_index.py
4 | # Author: ccyin
5 | # mail: ccyin04@gmail.com
6 | # Created Time: Fri 18 May 2018 03:30:26 PM CST
7 | #########################################################################
8 | '''
9 | 此代码用于将所有文字映射到index上,有两种方式
10 | 1. 映射每一个英文单词为一个index
11 | 2. 映射每一个英文字母为一个index
12 | '''
13 |
14 | import os
15 | import sys
16 | reload(sys)
17 | sys.setdefaultencoding('utf8')
18 | import json
19 | from collections import OrderedDict
20 |
21 | def map_word_to_index(train_word_file, word_index_json, word_count_json, index_label_json, alphabet_to_index=True):
22 | with open(train_word_file, 'r') as f:
23 | labels = f.read().strip().decode('utf8')
24 | word_count_dict = { }
25 | for line in labels.split('\n')[1:]:
26 | line = line.strip()
27 | image, sentence = line.strip().split('.png,')
28 | sentence = sentence.strip('"')
29 | for w in sentence:
30 | word_count_dict[w] = word_count_dict.get(w,0) + 1
31 | print '一共有{:d}种字符,共{:d}个'.format(len(word_count_dict), sum(word_count_dict.values()))
32 | word_sorted = sorted(word_count_dict.keys(), key=lambda k:word_count_dict[k], reverse=True)
33 | # word_index_dict = { w:i for i,w in enumerate(word_sorted) }
34 | word_index_dict = json.load(open(word_index_json))
35 |
36 | with open(word_count_json, 'w') as f:
37 | f.write(json.dumps(word_count_dict, indent=4, ensure_ascii=False))
38 | # with open(word_index_json, 'w') as f:
39 | # f.write(json.dumps(word_index_dict, indent=4, ensure_ascii=False))
40 |
41 | image_label_dict = OrderedDict()
42 | for line in labels.split('\n')[1:]:
43 | line = line.strip()
44 | image, sentence = line.strip().split('.png,')
45 | sentence = sentence.strip('"')
46 |
47 | # 换掉部分相似符号
48 | for c in u" ":
49 | sentence = sentence.replace(c, '')
50 | replace_words = [
51 | u'((',
52 | u'))',
53 | u',,',
54 | u"´'′",
55 | u"″"“",
56 | u"..",
57 | u"—-"
58 | ]
59 | for words in replace_words:
60 | for w in words[:-1]:
61 | sentence = sentence.replace(w, words[-1])
62 |
63 | index_list = []
64 | for w in sentence:
65 | index_list.append(str(word_index_dict[w]))
66 | image_label_dict[image + '.png'] = ' '.join(index_list)
67 | with open(index_label_json, 'w') as f:
68 | f.write(json.dumps(image_label_dict, indent=4))
69 |
70 |
71 | def main():
72 |
73 | # 映射字母为index
74 | train_word_file = '../../files/train.csv'
75 | word_index_json = '../../files/alphabet_index_dict.json'
76 | word_count_json = '../../files/alphabet_count_dict.json'
77 | index_label_json = '../../files/train_alphabet.json'
78 | map_word_to_index(train_word_file, word_index_json, word_count_json, index_label_json, True)
79 |
80 | if __name__ == '__main__':
81 | main()
82 |
--------------------------------------------------------------------------------
/code/preprocessing/show_black.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 | #########################################################################
3 | # File Name: show_black.py
4 | # Author: ccyin
5 | # mail: ccyin04@gmail.com
6 | # Created Time: 2018年06月07日 星期四 01时06分22秒
7 | #########################################################################
8 |
9 | import os
10 | import sys
11 | import json
12 | sys.path.append('../ocr')
13 | from tools import parse, py_op
14 | args = parse.args
15 |
16 | def cp_black_list(black_json, black_dir):
17 | word_index_dict = json.load(open(args.word_index_json))
18 | index_word_dict = { v:k for k,v in word_index_dict.items() }
19 | train_word_dict = json.load(open(args.image_label_json))
20 | train_word_dict = { k:''.join([index_word_dict[int(i)] for i in v.split()]) for k,v in train_word_dict.items() }
21 |
22 | py_op.mkdir(black_dir)
23 | black_list = json.load(open(black_json))['black_list']
24 | for i,name in enumerate(black_list):
25 | cmd = 'cp {:s} {:s}'.format(os.path.join(args.data_dir, 'train', name), black_dir)
26 | if train_word_dict[name] in ['Err:501', '#NAME?', '###']:
27 | continue
28 | print name
29 | print train_word_dict[name]
30 | os.system(cmd)
31 | if i > 30:
32 | break
33 |
34 | if __name__ == '__main__':
35 | black_dir = os.path.join(args.save_dir, 'black')
36 | cp_black_list(args.black_json, black_dir)
37 |
--------------------------------------------------------------------------------
/files/alphabet_count_dict.json:
--------------------------------------------------------------------------------
1 | {
2 | "挂": 9,
3 | "退": 5,
4 | "谈": 4,
5 | "随": 139,
6 | "抗": 4,
7 | "料": 95,
8 | "微": 7,
9 | "洞": 9,
10 | "造": 61,
11 | "般": 10,
12 | "潜": 3,
13 | "河": 48,
14 | "欲": 2,
15 | "侵": 3,
16 | "临": 5,
17 | "然": 113,
18 | "吸": 7,
19 | "场": 194,
20 | "宽": 89,
21 | "线": 4480,
22 | "@": 2,
23 | "反": 244,
24 | "牌": 60,
25 | "盏": 7,
26 | "科": 26,
27 | "筒": 8,
28 | "苗": 22,
29 | "摘": 16,
30 | "话": 18,
31 | "赞": 3,
32 | "凡": 3,
33 | "知": 1291,
34 | "除": 68,
35 | "揭": 3,
36 | "扬": 6,
37 | "泳": 5,
38 | "其": 520,
39 | "闹": 1,
40 | "绿": 42,
41 | "渔": 11,
42 | "覆": 5,
43 | "沈": 4,
44 | "》": 5,
45 | "引": 14,
46 | "应": 333,
47 | "枚": 18,
48 | "灵": 2,
49 | "滤": 1,
50 | "假": 53,
51 | "鲨": 1,
52 | "+": 2226,
53 | "循": 12,
54 | "抬": 2,
55 | "是": 3714,
56 | "械": 4,
57 | "讲": 3,
58 | "刷": 23,
59 | "冶": 1,
60 | "咸": 2,
61 | "胀": 1,
62 | "视": 135,
63 | "俊": 1,
64 | "抱": 1,
65 | "契": 4,
66 | "寒": 1,
67 | "录": 13,
68 | "酸": 7,
69 | "教": 48,
70 | "也": 85,
71 | "囤": 1,
72 | "秦": 2,
73 | "峨": 1,
74 | "k": 518,
75 | "括": 30,
76 | "景": 22,
77 | "滴": 3,
78 | "铸": 2,
79 | "须": 18,
80 | "基": 28,
81 | "广": 47,
82 | "₁": 176,
83 | "暅": 2,
84 | "上": 2968,
85 | "后": 592,
86 | "频": 26,
87 | "餐": 19,
88 | "暂": 4,
89 | "底": 192,
90 | "蒙": 1,
91 | "辟": 2,
92 | "足": 436,
93 | "伴": 4,
94 | "馈": 1,
95 | "甸": 1,
96 | "离": 525,
97 | "笼": 3,
98 | "尾": 33,
99 | "框": 31,
100 | "泉": 8,
101 | "绕": 178,
102 | "V": 20,
103 | "虚": 23,
104 | "迟": 3,
105 | "郡": 1,
106 | "牢": 1,
107 | "柯": 1,
108 | "棱": 155,
109 | "跳": 23,
110 | "轴": 1757,
111 | "号": 159,
112 | "偶": 46,
113 | "啸": 1,
114 | "移": 348,
115 | "态": 15,
116 | "节": 59,
117 | "★": 8,
118 | "构": 61,
119 | "消": 18,
120 | "肖": 1,
121 | "伟": 2,
122 | "倡": 2,
123 | "冠": 4,
124 | "纪": 6,
125 | "术": 22,
126 | "精": 34,
127 | "A": 9419,
128 | "柄": 2,
129 | "汉": 13,
130 | "克": 110,
131 | "今": 34,
132 | "前": 219,
133 | "双": 169,
134 | "坏": 1,
135 | "塑": 10,
136 | "姐": 1,
137 | "幕": 2,
138 | "胖": 1,
139 | "几": 207,
140 | "巨": 1,
141 | "杯": 29,
142 | "卷": 33,
143 | "馨": 1,
144 | "固": 20,
145 | "导": 35,
146 | "齿": 7,
147 | "∀": 2,
148 | "辉": 4,
149 | "丈": 5,
150 | "再": 148,
151 | "咏": 1,
152 | "库": 35,
153 | "尔": 7,
154 | "挖": 11,
155 | "炮": 5,
156 | "沟": 1,
157 | "伞": 4,
158 | "㎡": 1,
159 | "符": 49,
160 | "爆": 3,
161 | ",": 316,
162 | "水": 414,
163 | "ρ": 10,
164 | "所": 1053,
165 | "旅": 30,
166 | "摄": 3,
167 | "么": 311,
168 | "重": 280,
169 | "灌": 5,
170 | "坎": 1,
171 | "结": 509,
172 | "×": 54,
173 | "学": 631,
174 | "臭": 2,
175 | "l": 460,
176 | "倒": 24,
177 | "践": 15,
178 | "培": 6,
179 | "持": 49,
180 | "技": 23,
181 | "标": 1525,
182 | "予": 8,
183 | "越": 18,
184 | "馒": 2,
185 | "耗": 11,
186 | "辞": 1,
187 | "加": 244,
188 | "锥": 118,
189 | "缩": 7,
190 | "悬": 3,
191 | "贵": 4,
192 | "臂": 1,
193 | "故": 10,
194 | "蓄": 10,
195 | "识": 34,
196 | "免": 12,
197 | "侣": 3,
198 | "城": 62,
199 | "筑": 20,
200 | "秒": 190,
201 | "W": 15,
202 | "蝙": 1,
203 | "江": 49,
204 | "连": 542,
205 | "卡": 61,
206 | "狠": 1,
207 | "略": 19,
208 | "彩": 15,
209 | "扫": 13,
210 | "赵": 7,
211 | "叶": 5,
212 | "相": 1124,
213 | "好": 220,
214 | "屿": 1,
215 | "争": 4,
216 | "压": 17,
217 | "谊": 1,
218 | "吃": 8,
219 | "疏": 2,
220 | "骑": 33,
221 | "或": 125,
222 | "蜘": 1,
223 | "趟": 2,
224 | "务": 40,
225 | "匠": 1,
226 | "垣": 1,
227 | "钥": 1,
228 | "斯": 2,
229 | "2": 6759,
230 | "贴": 12,
231 | "冷": 1,
232 | "制": 75,
233 | "霸": 1,
234 | "礼": 2,
235 | "B": 8729,
236 | "送": 27,
237 | "友": 15,
238 | "筐": 8,
239 | "糕": 3,
240 | "蛙": 3,
241 | "毛": 16,
242 | "玻": 13,
243 | "跟": 5,
244 | "占": 20,
245 | "啤": 1,
246 | "绩": 24,
247 | "普": 19,
248 | "泵": 1,
249 | "浴": 1,
250 | "寻": 2,
251 | "饼": 7,
252 | "₂": 116,
253 | "搅": 2,
254 | "薄": 4,
255 | "三": 1668,
256 | "倍": 92,
257 | "册": 5,
258 | "鼓": 6,
259 | "榜": 1,
260 | "负": 79,
261 | "圣": 4,
262 | "ア": 292,
263 | "大": 855,
264 | "-": 2268,
265 | "支": 130,
266 | "樱": 9,
267 | "记": 113,
268 | "状": 66,
269 | "扁": 1,
270 | "π": 68,
271 | "权": 4,
272 | "义": 108,
273 | "银": 10,
274 | "遍": 3,
275 | "里": 77,
276 | "雎": 1,
277 | "当": 817,
278 | "展": 69,
279 | "拖": 2,
280 | "牡": 1,
281 | "珠": 12,
282 | "荐": 1,
283 | "荫": 1,
284 | "m": 1596,
285 | "佳": 9,
286 | "恒": 59,
287 | "具": 70,
288 | "铺": 21,
289 | "肃": 2,
290 | "蜂": 2,
291 | "暗": 4,
292 | "依": 70,
293 | "东": 97,
294 | "那": 247,
295 | "判": 170,
296 | "级": 89,
297 | "梳": 1,
298 | "段": 802,
299 | "区": 224,
300 | "徽": 9,
301 | "社": 41,
302 | "旁": 21,
303 | "杂": 5,
304 | "υ": 1,
305 | "爷": 7,
306 | "觉": 3,
307 | "案": 102,
308 | "归": 16,
309 | "X": 8,
310 | "泛": 1,
311 | "保": 142,
312 | "面": 1955,
313 | "句": 8,
314 | "继": 23,
315 | "秩": 1,
316 | "深": 18,
317 | "汰": 3,
318 | "镶": 2,
319 | "凹": 5,
320 | "系": 912,
321 | "忽": 17,
322 | "幼": 4,
323 | "竿": 2,
324 | "考": 149,
325 | "抄": 1,
326 | "万": 84,
327 | "殊": 8,
328 | "徒": 6,
329 | "锡": 3,
330 | "英": 4,
331 | "天": 346,
332 | "墨": 4,
333 | "音": 7,
334 | "]": 1,
335 | "稿": 2,
336 | "C": 7685,
337 | "驶": 110,
338 | "野": 7,
339 | "汛": 1,
340 | "勤": 8,
341 | "左": 200,
342 | "葫": 1,
343 | "杭": 8,
344 | "良": 5,
345 | "泰": 9,
346 | "兹": 1,
347 | "类": 51,
348 | "综": 14,
349 | "穿": 3,
350 | "攀": 6,
351 | "茄": 5,
352 | "笔": 70,
353 | "林": 27,
354 | "√": 180,
355 | "孝": 3,
356 | "本": 247,
357 | ".": 5096,
358 | "串": 1,
359 | "点": 8894,
360 | "纽": 2,
361 | "宿": 5,
362 | "担": 5,
363 | "过": 1283,
364 | "豆": 7,
365 | "棉": 10,
366 | "姓": 2,
367 | "答": 138,
368 | "券": 18,
369 | "作": 791,
370 | "∑": 1,
371 | "院": 16,
372 | "票": 58,
373 | "n": 1062,
374 | "走": 91,
375 | "典": 17,
376 | "彼": 2,
377 | "顾": 20,
378 | "艇": 10,
379 | "℃": 9,
380 | "蚂": 9,
381 | "炎": 1,
382 | "少": 639,
383 | "蜗": 2,
384 | "洛": 3,
385 | "抚": 1,
386 | "丝": 23,
387 | "弧": 102,
388 | "思": 25,
389 | "振": 2,
390 | "亲": 8,
391 | "帽": 2,
392 | "览": 2,
393 | "降": 55,
394 | "协": 2,
395 | "ノ": 137,
396 | "Y": 6,
397 | "赛": 104,
398 | "九": 43,
399 | "远": 26,
400 | "团": 24,
401 | "古": 15,
402 | "姨": 1,
403 | "药": 35,
404 | "说": 368,
405 | "瓶": 25,
406 | "凸": 6,
407 | "极": 140,
408 | "漆": 4,
409 | "皋": 1,
410 | "同": 829,
411 | "帐": 4,
412 | "研": 41,
413 | "托": 5,
414 | "战": 12,
415 | "些": 70,
416 | "脚": 10,
417 | "劣": 10,
418 | "弦": 188,
419 | "太": 17,
420 | "断": 177,
421 | "搬": 10,
422 | "丰": 2,
423 | "洲": 8,
424 | "爸": 41,
425 | "麻": 1,
426 | "尼": 1,
427 | "D": 4649,
428 | "但": 49,
429 | "诉": 2,
430 | "像": 176,
431 | "华": 29,
432 | "塔": 45,
433 | "艘": 26,
434 | "距": 515,
435 | "d": 95,
436 | "碑": 1,
437 | "耽": 1,
438 | "据": 221,
439 | "买": 202,
440 | "瓷": 4,
441 | "靶": 2,
442 | "鱼": 17,
443 | "签": 6,
444 | "蚀": 1,
445 | "∃": 4,
446 | "辆": 105,
447 | "和": 977,
448 | "透": 12,
449 | "夕": 1,
450 | "折": 287,
451 | "簧": 3,
452 | "骨": 2,
453 | "/": 374,
454 | "液": 12,
455 | "宾": 8,
456 | "汇": 1,
457 | "偏": 51,
458 | "网": 75,
459 | "麦": 4,
460 | "著": 15,
461 | "诞": 2,
462 | "坡": 88,
463 | "因": 54,
464 | "入": 131,
465 | "孩": 6,
466 | "虫": 4,
467 | "息": 61,
468 | "仰": 27,
469 | "韶": 2,
470 | "出": 1592,
471 | "蚁": 9,
472 | "最": 929,
473 | "踪": 3,
474 | "鞋": 4,
475 | "锌": 1,
476 | "熏": 1,
477 | "斗": 2,
478 | "⊙": 487,
479 | "供": 49,
480 | "眠": 1,
481 | "带": 56,
482 | "播": 5,
483 | "蔬": 29,
484 | "估": 31,
485 | "喷": 12,
486 | "阶": 17,
487 | "债": 5,
488 | "粽": 2,
489 | "情": 90,
490 | "掷": 22,
491 | "淇": 2,
492 | "响": 18,
493 | "界": 16,
494 | "减": 79,
495 | "黑": 30,
496 | "罐": 4,
497 | "寓": 4,
498 | "奔": 1,
499 | "旗": 21,
500 | "进": 331,
501 | "Z": 9,
502 | "靠": 15,
503 | "口": 75,
504 | "呢": 5,
505 | "內": 2,
506 | "巧": 12,
507 | "苦": 3,
508 | "敬": 2,
509 | "棵": 38,
510 | "陶": 2,
511 | "卸": 10,
512 | "翻": 53,
513 | "姿": 1,
514 | "驾": 8,
515 | "范": 415,
516 | "者": 42,
517 | "了": 514,
518 | "看": 62,
519 | "名": 156,
520 | "徐": 8,
521 | "粒": 3,
522 | "掘": 2,
523 | "肚": 1,
524 | "钢": 24,
525 | "紧": 12,
526 | "约": 58,
527 | "末": 38,
528 | "搭": 21,
529 | "男": 21,
530 | "刹": 4,
531 | "尽": 9,
532 | "E": 3118,
533 | "浇": 4,
534 | "委": 6,
535 | "佛": 2,
536 | "陡": 2,
537 | "健": 18,
538 | "凤": 2,
539 | "乱": 1,
540 | "述": 59,
541 | "零": 141,
542 | "特": 19,
543 | "司": 88,
544 | "雾": 3,
545 | "要": 357,
546 | "辅": 8,
547 | "序": 76,
548 | "【": 44,
549 | "钓": 1,
550 | """: 2,
551 | "会": 97,
552 | "晚": 11,
553 | "焦": 170,
554 | "吨": 47,
555 | "被": 77,
556 | "训": 11,
557 | "躯": 1,
558 | "贮": 1,
559 | "0": 4856,
560 | "邻": 45,
561 | "强": 38,
562 | "弓": 1,
563 | "见": 17,
564 | "血": 2,
565 | "迅": 1,
566 | "经": 401,
567 | "金": 93,
568 | "周": 240,
569 | "坪": 4,
570 | "语": 8,
571 | "浮": 5,
572 | "p": 114,
573 | "奶": 16,
574 | "调": 223,
575 | "验": 52,
576 | "香": 8,
577 | "隔": 36,
578 | "芜": 2,
579 | "星": 46,
580 | "颠": 1,
581 | "客": 92,
582 | "饰": 4,
583 | "咨": 1,
584 | "港": 30,
585 | "脱": 1,
586 | "°": 992,
587 | "β": 56,
588 | "岸": 34,
589 | "逻": 2,
590 | "阿": 2,
591 | "检": 38,
592 | "扇": 80,
593 | "蕉": 5,
594 | "恶": 1,
595 | "鹏": 2,
596 | "浙": 12,
597 | "[": 146,
598 | "牧": 9,
599 | "哨": 1,
600 | "衫": 18,
601 | "淮": 2,
602 | "胰": 1,
603 | "更": 29,
604 | "穷": 7,
605 | "怨": 1,
606 | "黏": 1,
607 | "老": 54,
608 | "划": 99,
609 | "栖": 4,
610 | "沙": 9,
611 | "业": 51,
612 | "茜": 1,
613 | "搞": 2,
614 | "钩": 2,
615 | "用": 804,
616 | "侯": 2,
617 | "±": 6,
618 | "△": 1760,
619 | "裁": 22,
620 | "睡": 2,
621 | "仅": 27,
622 | "F": 1991,
623 | "效": 24,
624 | "始": 125,
625 | "郑": 11,
626 | "雕": 4,
627 | "诗": 1,
628 | "酥": 1,
629 | "²": 310,
630 | "奠": 1,
631 | "绥": 1,
632 | "噪": 5,
633 | "譬": 1,
634 | "俯": 34,
635 | "腰": 360,
636 | "色": 82,
637 | "篷": 3,
638 | "顶": 631,
639 | "击": 4,
640 | "矿": 5,
641 | "清": 16,
642 | "澄": 1,
643 | "指": 68,
644 | "完": 218,
645 | "式": 956,
646 | "】": 49,
647 | "o": 126,
648 | "袖": 2,
649 | "亚": 4,
650 | "期": 151,
651 | "撞": 1,
652 | "群": 12,
653 | "伯": 2,
654 | "1": 6605,
655 | "弟": 5,
656 | "爽": 3,
657 | "久": 2,
658 | "俄": 1,
659 | "奋": 1,
660 | "富": 3,
661 | "勒": 1,
662 | "陕": 6,
663 | "淘": 3,
664 | "滚": 17,
665 | "菜": 44,
666 | "束": 15,
667 | "卧": 2,
668 | "门": 65,
669 | "雪": 4,
670 | "软": 1,
671 | "q": 45,
672 | "柴": 4,
673 | "壶": 1,
674 | "养": 17,
675 | "建": 135,
676 | "链": 5,
677 | "言": 2,
678 | "超": 179,
679 | "砌": 2,
680 | ".": 27,
681 | "丘": 1,
682 | "抛": 957,
683 | "“": 280,
684 | "管": 38,
685 | "莱": 2,
686 | "舰": 7,
687 | "羹": 1,
688 | "常": 75,
689 | "值": 1567,
690 | "圾": 11,
691 | "证": 541,
692 | "捆": 6,
693 | "立": 234,
694 | "蕊": 2,
695 | "种": 520,
696 | "发": 495,
697 | "酒": 12,
698 | "痕": 49,
699 | "这": 713,
700 | "乘": 60,
701 | "招": 6,
702 | "赚": 9,
703 | "工": 298,
704 | "烧": 5,
705 | "矩": 291,
706 | "器": 104,
707 | "衬": 9,
708 | "轮": 48,
709 | "菱": 101,
710 | "幸": 2,
711 | "函": 1404,
712 | "七": 33,
713 | "概": 90,
714 | "有": 1547,
715 | "程": 854,
716 | "复": 43,
717 | "小": 1400,
718 | "美": 16,
719 | "殖": 5,
720 | "厦": 9,
721 | "椭": 231,
722 | "氯": 2,
723 | "家": 238,
724 | "弹": 11,
725 | "纸": 202,
726 | "刻": 27,
727 | "炼": 11,
728 | "观": 96,
729 | "浅": 1,
730 | "G": 427,
731 | "φ": 5,
732 | "屏": 4,
733 | "街": 5,
734 | "余": 108,
735 | "付": 73,
736 | "胜": 34,
737 | "章": 6,
738 | "另": 141,
739 | "革": 1,
740 | "雨": 6,
741 | "鸦": 3,
742 | "修": 62,
743 | "顷": 5,
744 | "较": 59,
745 | "月": 218,
746 | "宋": 3,
747 | "递": 61,
748 | "优": 82,
749 | "窄": 3,
750 | "骡": 1,
751 | "在": 3928,
752 | "袭": 3,
753 | "花": 82,
754 | "′": 390,
755 | "缸": 1,
756 | "去": 160,
757 | "尺": 71,
758 | "炽": 2,
759 | "瘾": 1,
760 | "促": 24,
761 | "桂": 3,
762 | "资": 64,
763 | "摊": 1,
764 | "仙": 1,
765 | "虞": 2,
766 | "毫": 8,
767 | "路": 350,
768 | "拱": 22,
769 | "柳": 2,
770 | "r": 174,
771 | "整": 273,
772 | "究": 96,
773 | "都": 309,
774 | "百": 39,
775 | "霓": 1,
776 | "吕": 1,
777 | "丙": 45,
778 | "”": 247,
779 | "亮": 21,
780 | "舱": 1,
781 | "育": 30,
782 | "医": 27,
783 | "难": 4,
784 | "裂": 1,
785 | "淄": 2,
786 | "拆": 10,
787 | "黎": 1,
788 | "量": 499,
789 | "乙": 491,
790 | "还": 86,
791 | "]": 150,
792 | "仑": 2,
793 | "履": 1,
794 | "照": 48,
795 | "齐": 7,
796 | "险": 21,
797 | "哪": 74,
798 | "轿": 4,
799 | "长": 1774,
800 | "图": 3271,
801 | "漂": 3,
802 | "希": 4,
803 | "疑": 1,
804 | "枝": 3,
805 | "唱": 3,
806 | "聚": 2,
807 | "隧": 28,
808 | "锦": 2,
809 | "″": 1,
810 | "▱": 8,
811 | "³": 26,
812 | "✲": 14,
813 | "∴": 10,
814 | "康": 5,
815 | "妹": 2,
816 | "势": 3,
817 | "蛇": 4,
818 | "H": 286,
819 | "青": 15,
820 | "拟": 48,
821 | "煤": 6,
822 | "巡": 6,
823 | "形": 2838,
824 | "麓": 1,
825 | "旱": 3,
826 | "想": 98,
827 | "椒": 1,
828 | "绷": 1,
829 | "勿": 1,
830 | "洁": 2,
831 | "赁": 8,
832 | "朝": 14,
833 | "舟": 5,
834 | "骤": 10,
835 | "储": 15,
836 | "京": 25,
837 | "3": 2839,
838 | "夹": 47,
839 | "流": 44,
840 | "桃": 16,
841 | "珊": 1,
842 | "潍": 4,
843 | "屋": 2,
844 | "道": 189,
845 | "单": 510,
846 | "¬": 2,
847 | "畜": 2,
848 | "部": 466,
849 | "只": 193,
850 | "们": 200,
851 | "s": 321,
852 | "拴": 1,
853 | "她": 19,
854 | "寺": 3,
855 | "悉": 2,
856 | "戊": 1,
857 | ",": 19839,
858 | "宏": 1,
859 | "锐": 62,
860 | "蜜": 1,
861 | "素": 22,
862 | "否": 379,
863 | "亭": 10,
864 | "讯": 8,
865 | "抵": 8,
866 | "德": 7,
867 | "守": 2,
868 | "眼": 10,
869 | "县": 16,
870 | "倾": 17,
871 | "摆": 28,
872 | "慈": 2,
873 | "绍": 1,
874 | "档": 3,
875 | "峡": 2,
876 | "幢": 7,
877 | "童": 9,
878 | "孤": 2,
879 | "韧": 2,
880 | "全": 232,
881 | "剪": 91,
882 | "转": 337,
883 | "误": 35,
884 | "数": 3880,
885 | "至": 145,
886 | "闻": 2,
887 | "空": 104,
888 | "国": 91,
889 | "逼": 1,
890 | "灾": 15,
891 | "谁": 17,
892 | "菁": 1,
893 | "把": 195,
894 | "碎": 5,
895 | "向": 715,
896 | "众": 18,
897 | "果": 546,
898 | "红": 64,
899 | "室": 21,
900 | "贫": 2,
901 | "中": 2874,
902 | "新": 97,
903 | "∵": 5,
904 | "画": 157,
905 | "汁": 3,
906 | "评": 5,
907 | "I": 27,
908 | "魏": 1,
909 | "风": 49,
910 | "盒": 76,
911 | "叔": 2,
912 | "%": 122,
913 | "柜": 13,
914 | "损": 13,
915 | "胞": 1,
916 | "瓦": 4,
917 | "度": 1032,
918 | "廓": 2,
919 | "森": 3,
920 | "曲": 245,
921 | "援": 9,
922 | "项": 209,
923 | "白": 55,
924 | "一": 3859,
925 | "⑩": 2,
926 | "掉": 14,
927 | "∈": 124,
928 | "阐": 1,
929 | "途": 21,
930 | "搜": 1,
931 | "御": 1,
932 | "索": 20,
933 | "堤": 4,
934 | "玩": 34,
935 | "享": 15,
936 | "梯": 84,
937 | "肉": 1,
938 | "芳": 1,
939 | "4": 1979,
940 | "容": 24,
941 | "示": 692,
942 | "匾": 1,
943 | "迁": 2,
944 | "杆": 24,
945 | "ω": 17,
946 | "≈": 46,
947 | "腊": 1,
948 | "巍": 5,
949 | "限": 196,
950 | "慢": 19,
951 | "燕": 2,
952 | "橘": 1,
953 | "蛛": 1,
954 | "竣": 1,
955 | "姥": 2,
956 | "拥": 6,
957 | "警": 4,
958 | "柱": 57,
959 | "晰": 1,
960 | "敲": 2,
961 | "t": 511,
962 | "对": 1036,
963 | "⊕": 1,
964 | "企": 20,
965 | "涂": 25,
966 | "椅": 7,
967 | "按": 267,
968 | "隐": 1,
969 | "蒜": 3,
970 | "莞": 1,
971 | "吧": 1,
972 | "动": 1346,
973 | "购": 291,
974 | "议": 4,
975 | "▲": 2,
976 | "´": 5,
977 | "阻": 8,
978 | "什": 70,
979 | "摇": 4,
980 | "捉": 2,
981 | "盐": 3,
982 | "丢": 1,
983 | "巢": 3,
984 | "祥": 2,
985 | "扳": 2,
986 | "筹": 8,
987 | "登": 5,
988 | "避": 5,
989 | "截": 102,
990 | "梅": 8,
991 | "文": 51,
992 | "昆": 3,
993 | "律": 92,
994 | "福": 13,
995 | "税": 12,
996 | "世": 6,
997 | "∞": 49,
998 | "张": 197,
999 | "宣": 1,
1000 | "助": 27,
1001 | "γ": 4,
1002 | "仁": 1,
1003 | "求": 2444,
1004 | "装": 127,
1005 | "襄": 1,
1006 | "拉": 11,
1007 | "聊": 3,
1008 | "嵌": 2,
1009 | "监": 9,
1010 | "坐": 1446,
1011 | "兔": 3,
1012 | "湖": 33,
1013 | "遥": 1,
1014 | "蓝": 11,
1015 | "杰": 5,
1016 | "并": 521,
1017 | "竹": 14,
1018 | "顺": 148,
1019 | "丁": 8,
1020 | "沂": 3,
1021 | "栅": 1,
1022 | "授": 2,
1023 | "夏": 9,
1024 | "熔": 1,
1025 | "洗": 12,
1026 | "望": 12,
1027 | "萝": 4,
1028 | "斜": 184,
1029 | "感": 11,
1030 | "鸡": 10,
1031 | "利": 279,
1032 | "厨": 5,
1033 | "漫": 6,
1034 | "5": 1720,
1035 | "桥": 38,
1036 | "儿": 7,
1037 | "激": 8,
1038 | "规": 177,
1039 | "籍": 2,
1040 | "他": 184,
1041 | "橙": 2,
1042 | "棚": 4,
1043 | "季": 27,
1044 | "剩": 44,
1045 | "u": 4,
1046 | "致": 25,
1047 | "延": 310,
1048 | "寸": 2,
1049 | "命": 85,
1050 | "勾": 12,
1051 | " ": 10,
1052 | "璃": 13,
1053 | "阄": 2,
1054 | "且": 965,
1055 | "宝": 4,
1056 | "废": 1,
1057 | " ": 16,
1058 | "爬": 25,
1059 | "售": 409,
1060 | "堰": 4,
1061 | "方": 1883,
1062 | "机": 177,
1063 | "亿": 4,
1064 | "派": 11,
1065 | "附": 9,
1066 | "≌": 36,
1067 | "狗": 2,
1068 | "员": 68,
1069 | "坚": 1,
1070 | "统": 46,
1071 | "换": 43,
1072 | "查": 87,
1073 | "晤": 4,
1074 | "逐": 4,
1075 | "班": 66,
1076 | "念": 9,
1077 | "年": 281,
1078 | "摸": 43,
1079 | "仿": 7,
1080 | "球": 314,
1081 | "厂": 74,
1082 | "的": 15843,
1083 | "鲁": 7,
1084 | "馆": 29,
1085 | "羊": 4,
1086 | "谓": 1,
1087 | "五": 80,
1088 | "列": 627,
1089 | "错": 49,
1090 | "鸟": 2,
1091 | "探": 105,
1092 | "失": 7,
1093 | "户": 57,
1094 | "改": 73,
1095 | "螺": 35,
1096 | "丿": 1,
1097 | "允": 3,
1098 | "参": 135,
1099 | "雄": 3,
1100 | "配": 51,
1101 | "K": 61,
1102 | "拌": 1,
1103 | "毒": 4,
1104 | "何": 219,
1105 | "仔": 5,
1106 | "留": 60,
1107 | "筝": 5,
1108 | "州": 80,
1109 | "Ⅱ": 36,
1110 | "惠": 64,
1111 | "④": 84,
1112 | "王": 53,
1113 | "来": 151,
1114 | "触": 6,
1115 | "注": 27,
1116 | "火": 50,
1117 | "独": 68,
1118 | "问": 275,
1119 | "声": 3,
1120 | "米": 415,
1121 | "维": 8,
1122 | "湿": 1,
1123 | "宇": 1,
1124 | "堆": 18,
1125 | "粉": 14,
1126 | "井": 8,
1127 | "演": 9,
1128 | "甘": 3,
1129 | "股": 17,
1130 | "质": 104,
1131 | "个": 2737,
1132 | "碱": 1,
1133 | "纵": 40,
1134 | "缴": 14,
1135 | "获": 112,
1136 | "6": 1348,
1137 | "便": 13,
1138 | "嘴": 1,
1139 | "剂": 1,
1140 | "雅": 2,
1141 | "拍": 9,
1142 | "受": 37,
1143 | "属": 25,
1144 | "胡": 1,
1145 | "Ⅰ": 38,
1146 | "缺": 5,
1147 | "狭": 1,
1148 | "罩": 2,
1149 | "绵": 1,
1150 | "v": 15,
1151 | "智": 11,
1152 | "汾": 1,
1153 | "、": 2143,
1154 | "冀": 2,
1155 | "阅": 40,
1156 | "领": 21,
1157 | "床": 3,
1158 | "舍": 20,
1159 | "树": 117,
1160 | "北": 86,
1161 | "宜": 2,
1162 | "?": 723,
1163 | "!": 7,
1164 | "抢": 1,
1165 | "春": 13,
1166 | "欧": 4,
1167 | "梦": 5,
1168 | "涨": 19,
1169 | "溪": 3,
1170 | "净": 18,
1171 | "际": 37,
1172 | "屉": 3,
1173 | "才": 60,
1174 | "菌": 1,
1175 | "槐": 1,
1176 | "浓": 8,
1177 | "返": 50,
1178 | "南": 76,
1179 | "a": 2002,
1180 | "散": 4,
1181 | "跨": 2,
1182 | "八": 46,
1183 | "仪": 10,
1184 | "召": 6,
1185 | "坯": 1,
1186 | "称": 391,
1187 | "荷": 3,
1188 | "胶": 2,
1189 | "使": 507,
1190 | "刀": 8,
1191 | "各": 215,
1192 | "笆": 7,
1193 | "师": 75,
1194 | "祖": 4,
1195 | "而": 122,
1196 | "适": 23,
1197 | "气": 59,
1198 | "化": 216,
1199 | ">": 305,
1200 | "∠": 1603,
1201 | "斥": 3,
1202 | "丨": 14,
1203 | "洪": 4,
1204 | "箱": 37,
1205 | "纳": 17,
1206 | "·": 189,
1207 | "茶": 9,
1208 | "格": 214,
1209 | "沿": 352,
1210 | "病": 5,
1211 | "嵊": 1,
1212 | "L": 22,
1213 | "李": 40,
1214 | "译": 1,
1215 | "仓": 15,
1216 | "卖": 43,
1217 | "锻": 10,
1218 | "~": 5,
1219 | "握": 3,
1220 | "≠": 114,
1221 | "Ⅲ": 4,
1222 | "日": 69,
1223 | "⑤": 22,
1224 | "武": 18,
1225 | "往": 69,
1226 | "型": 133,
1227 | "浪": 1,
1228 | "凭": 2,
1229 | "篱": 7,
1230 | "绳": 29,
1231 | "饲": 3,
1232 | "÷": 4,
1233 | "锅": 1,
1234 | ")": 6138,
1235 | "专": 8,
1236 | "逗": 1,
1237 | "颜": 28,
1238 | "挡": 4,
1239 | "撤": 1,
1240 | "恰": 184,
1241 | "别": 1036,
1242 | "启": 2,
1243 | "骰": 7,
1244 | "7": 678,
1245 | "达": 282,
1246 | "十": 59,
1247 | "畅": 2,
1248 | "密": 10,
1249 | "终": 91,
1250 | "手": 61,
1251 | "背": 17,
1252 | "乓": 9,
1253 | "角": 2523,
1254 | "遗": 1,
1255 | "取": 655,
1256 | "静": 11,
1257 | "盘": 23,
1258 | "祝": 1,
1259 | "详": 1,
1260 | "轩": 1,
1261 | "滨": 7,
1262 | "六": 73,
1263 | "矮": 3,
1264 | "平": 1616,
1265 | "w": 15,
1266 | "旺": 5,
1267 | "匀": 109,
1268 | "垃": 11,
1269 | "。": 205,
1270 | "(": 6089,
1271 | "明": 650,
1272 | "输": 57,
1273 | "隙": 3,
1274 | "额": 44,
1275 | "夜": 1,
1276 | "枣": 3,
1277 | "•": 117,
1278 | "亩": 2,
1279 | "舶": 2,
1280 | "携": 6,
1281 | "举": 19,
1282 | "污": 33,
1283 | "遂": 1,
1284 | "铅": 16,
1285 | "父": 9,
1286 | "藏": 2,
1287 | "虎": 1,
1288 | "法": 264,
1289 | "赔": 6,
1290 | "b": 1082,
1291 | "轨": 56,
1292 | "叫": 26,
1293 | "豪": 5,
1294 | "热": 19,
1295 | "公": 344,
1296 | "闯": 2,
1297 | "扶": 7,
1298 | "读": 39,
1299 | "楼": 65,
1300 | "湾": 2,
1301 | "圃": 7,
1302 | "善": 8,
1303 | "备": 47,
1304 | "很": 14,
1305 | "营": 40,
1306 | "温": 41,
1307 | "辨": 2,
1308 | "沪": 2,
1309 | "冬": 2,
1310 | "唯": 11,
1311 | "皮": 30,
1312 | "娱": 1,
1313 | "栽": 20,
1314 | "届": 7,
1315 | "奇": 36,
1316 | "M": 1415,
1317 | "敏": 6,
1318 | "体": 379,
1319 | "棒": 12,
1320 | "汕": 2,
1321 | "族": 1,
1322 | "硝": 1,
1323 | "⑥": 6,
1324 | "子": 255,
1325 | "卫": 5,
1326 | "灭": 2,
1327 | "目": 45,
1328 | "巴": 4,
1329 | "虹": 1,
1330 | "厅": 5,
1331 | "昌": 7,
1332 | "谐": 3,
1333 | "写": 419,
1334 | "岛": 24,
1335 | "□": 6,
1336 | "患": 3,
1337 | "接": 755,
1338 | "产": 255,
1339 | "账": 1,
1340 | "碳": 6,
1341 | "莲": 3,
1342 | "收": 71,
1343 | "8": 887,
1344 | "治": 3,
1345 | "辽": 6,
1346 | "织": 18,
1347 | "秉": 1,
1348 | "苏": 20,
1349 | "慎": 1,
1350 | "跑": 58,
1351 | "恢": 1,
1352 | "毕": 10,
1353 | "博": 9,
1354 | "※": 1,
1355 | "神": 3,
1356 | "①": 282,
1357 | "≤": 153,
1358 | "表": 614,
1359 | "短": 58,
1360 | "己": 10,
1361 | "驴": 3,
1362 | "x": 4672,
1363 | "追": 24,
1364 | "锁": 6,
1365 | "垂": 322,
1366 | "包": 72,
1367 | "岗": 8,
1368 | "服": 45,
1369 | "戏": 27,
1370 | "酬": 9,
1371 | "栓": 7,
1372 | "厚": 18,
1373 | "缝": 1,
1374 | "府": 10,
1375 | "娟": 1,
1376 | "黄": 40,
1377 | "#": 167,
1378 | "挥": 2,
1379 | "护": 18,
1380 | "润": 138,
1381 | "梨": 3,
1382 | "含": 98,
1383 | "赴": 1,
1384 | "哀": 1,
1385 | "奉": 1,
1386 | "络": 4,
1387 | "c": 968,
1388 | "副": 15,
1389 | "峰": 4,
1390 | "时": 2209,
1391 | "载": 10,
1392 | "身": 55,
1393 | "销": 329,
1394 | "将": 505,
1395 | "怎": 45,
1396 | "刚": 45,
1397 | "<": 349,
1398 | "报": 42,
1399 | "挤": 3,
1400 | "澧": 2,
1401 | "横": 128,
1402 | "环": 64,
1403 | "脸": 1,
1404 | "似": 146,
1405 | "访": 3,
1406 | "铁": 74,
1407 | "燃": 12,
1408 | "祈": 1,
1409 | "旋": 280,
1410 | "描": 8,
1411 | "N": 781,
1412 | "乒": 10,
1413 | "绝": 13,
1414 | "察": 56,
1415 | "令": 11,
1416 | "夺": 1,
1417 | "若": 1714,
1418 | "剧": 1,
1419 | "园": 39,
1420 | "缓": 4,
1421 | "可": 457,
1422 | "遮": 1,
1423 | "干": 57,
1424 | "添": 19,
1425 | "艺": 7,
1426 | "份": 74,
1427 | "轼": 1,
1428 | "玄": 2,
1429 | "妈": 34,
1430 | "民": 61,
1431 | "互": 101,
1432 | "候": 5,
1433 | "增": 165,
1434 | "■": 1,
1435 | "∥": 204,
1436 | "瞬": 3,
1437 | "邮": 15,
1438 | "就": 79,
1439 | "9": 792,
1440 | "活": 74,
1441 | "丽": 17,
1442 | "围": 474,
1443 | "元": 702,
1444 | "婷": 1,
1445 | "俩": 2,
1446 | "比": 590,
1447 | "块": 111,
1448 | "飞": 41,
1449 | "②": 265,
1450 | "无": 115,
1451 | "Ⅳ": 1,
1452 | "≥": 66,
1453 | "桩": 2,
1454 | "山": 88,
1455 | "踩": 3,
1456 | "女": 25,
1457 | "侨": 1,
1458 | "y": 2232,
1459 | "T": 61,
1460 | "内": 538,
1461 | "缆": 5,
1462 | "安": 70,
1463 | "萌": 2,
1464 | "茎": 5,
1465 | "辑": 1,
1466 | "渐": 29,
1467 | "锯": 2,
1468 | "刘": 8,
1469 | "盛": 3,
1470 | "映": 10,
1471 | "伦": 1,
1472 | "让": 18,
1473 | "听": 9,
1474 | "破": 7,
1475 | "款": 71,
1476 | "陀": 2,
1477 | "蛋": 15,
1478 | "近": 71,
1479 | "乐": 5,
1480 | "靖": 1,
1481 | "叙": 4,
1482 | "川": 11,
1483 | "食": 11,
1484 | "恤": 11,
1485 | "车": 557,
1486 | "捷": 2,
1487 | "闭": 17,
1488 | "任": 279,
1489 | "省": 67,
1490 | "喂": 1,
1491 | "宗": 2,
1492 | "簇": 1,
1493 | "砖": 12,
1494 | "休": 12,
1495 | "源": 17,
1496 | "烂": 1,
1497 | "吗": 80,
1498 | "匙": 2,
1499 | "鲜": 3,
1500 | "实": 413,
1501 | "蜡": 24,
1502 | "∣": 342,
1503 | "氧": 2,
1504 | "侦": 2,
1505 | "弱": 1,
1506 | "稳": 16,
1507 | "架": 31,
1508 | "主": 33,
1509 | "沼": 9,
1510 | "设": 545,
1511 | "雀": 2,
1512 | "充": 34,
1513 | "等": 1164,
1514 | "晋": 1,
1515 | "O": 2430,
1516 | "凌": 1,
1517 | "条": 735,
1518 | "扣": 14,
1519 | "既": 20,
1520 | "&": 3,
1521 | "灯": 73,
1522 | "影": 183,
1523 | "绰": 1,
1524 | "已": 1220,
1525 | "课": 44,
1526 | "需": 230,
1527 | "傅": 18,
1528 | "云": 5,
1529 | "疗": 21,
1530 | "通": 179,
1531 | "肥": 5,
1532 | "涧": 1,
1533 | "垫": 2,
1534 | "株": 6,
1535 | "头": 72,
1536 | ":": 79,
1537 | "着": 98,
1538 | "叠": 116,
1539 | "操": 38,
1540 | "敌": 6,
1541 | "扎": 3,
1542 | "滑": 38,
1543 | "裕": 1,
1544 | "做": 135,
1545 | "租": 63,
1546 | "蝠": 1,
1547 | "③": 124,
1548 | "书": 123,
1549 | "芽": 2,
1550 | "矫": 4,
1551 | "凯": 1,
1552 | "彰": 2,
1553 | "奴": 1,
1554 | "苹": 31,
1555 | "z": 31,
1556 | "磁": 11,
1557 | "熄": 2,
1558 | "崇": 1,
1559 | "庆": 10,
1560 | "遭": 2,
1561 | "朋": 8,
1562 | "意": 263,
1563 | "耻": 1,
1564 | "则": 1518,
1565 | "厘": 34,
1566 | ";": 1368,
1567 | "尝": 9,
1568 | "够": 29,
1569 | "急": 9,
1570 | "货": 91,
1571 | "讨": 21,
1572 | "借": 10,
1573 | "船": 93,
1574 | "θ": 50,
1575 | "º": 6,
1576 | "秋": 4,
1577 | "千": 207,
1578 | "细": 26,
1579 | "运": 696,
1580 | "盖": 18,
1581 | "变": 342,
1582 | "竞": 26,
1583 | "荣": 1,
1584 | "e": 113,
1585 | "页": 15,
1586 | "批": 121,
1587 | "轻": 7,
1588 | "汽": 91,
1589 | "西": 76,
1590 | "待": 10,
1591 | "合": 355,
1592 | "袋": 25,
1593 | "₃": 44,
1594 | "亏": 14,
1595 | "⌒": 70,
1596 | "窗": 7,
1597 | "外": 223,
1598 | ":": 1311,
1599 | "欢": 4,
1600 | "两": 1925,
1601 | "未": 14,
1602 | "箭": 4,
1603 | "纯": 6,
1604 | "钳": 3,
1605 | "阴": 107,
1606 | "永": 4,
1607 | "放": 148,
1608 | "私": 3,
1609 | "础": 12,
1610 | "必": 65,
1611 | "勇": 1,
1612 | "呈": 11,
1613 | "棋": 11,
1614 | "坊": 4,
1615 | "母": 53,
1616 | "济": 10,
1617 | "P": 2875,
1618 | "荒": 2,
1619 | "奖": 48,
1620 | "烛": 25,
1621 | "罚": 5,
1622 | "盟": 4,
1623 | "止": 111,
1624 | "拧": 1,
1625 | "早": 14,
1626 | "士": 9,
1627 | "马": 20,
1628 | "灰": 2,
1629 | "职": 8,
1630 | "蕨": 1,
1631 | "淹": 1,
1632 | "替": 4,
1633 | "礁": 3,
1634 | "陪": 1,
1635 | "庄": 10,
1636 | "切": 308,
1637 | "针": 201,
1638 | "栋": 5,
1639 | "脑": 19,
1640 | "排": 110,
1641 | "首": 19,
1642 | "粘": 4,
1643 | "匝": 4,
1644 | "队": 140,
1645 | "订": 7,
1646 | "交": 1788,
1647 | "∧": 6,
1648 | "慰": 1,
1649 | "草": 14,
1650 | "费": 212,
1651 | "总": 172,
1652 | "寂": 2,
1653 | "组": 295,
1654 | "片": 156,
1655 | "准": 74,
1656 | "哈": 4,
1657 | "插": 4,
1658 | "询": 1,
1659 | "座": 41,
1660 | "执": 13,
1661 | "填": 58,
1662 | "差": 94,
1663 | "共": 397,
1664 | "味": 5,
1665 | "史": 2,
1666 | "直": 2859,
1667 | "泸": 1,
1668 | "{": 174,
1669 | "拼": 44,
1670 | "弄": 1,
1671 | "率": 287,
1672 | "逆": 78,
1673 | "钉": 2,
1674 | "锈": 2,
1675 | "脐": 1,
1676 | "戒": 1,
1677 | "算": 219,
1678 | "高": 369,
1679 | "军": 27,
1680 | "猜": 53,
1681 | "龄": 18,
1682 | "趣": 21,
1683 | "控": 7,
1684 | "…": 68,
1685 | "第": 593,
1686 | "铜": 6,
1687 | "焰": 1,
1688 | "油": 22,
1689 | "贸": 1,
1690 | "为": 5153,
1691 | "抽": 75,
1692 | "壁": 8,
1693 | "罄": 1,
1694 | "歌": 7,
1695 | "漏": 1,
1696 | "糙": 2,
1697 | "奥": 7,
1698 | "卜": 5,
1699 | "域": 94,
1700 | "裤": 7,
1701 | "以": 894,
1702 | "f": 567,
1703 | "羽": 2,
1704 | "兰": 7,
1705 | "割": 31,
1706 | "请": 603,
1707 | "桶": 23,
1708 | "浸": 1,
1709 | "臻": 1,
1710 | "柿": 4,
1711 | "码": 26,
1712 | "径": 496,
1713 | "吉": 7,
1714 | "伏": 5,
1715 | "←": 1,
1716 | "颖": 5,
1717 | "庚": 1,
1718 | "初": 31,
1719 | "真": 33,
1720 | "校": 243,
1721 | "责": 7,
1722 | "严": 3,
1723 | "悦": 1,
1724 | "夫": 2,
1725 | "弯": 9,
1726 | "粮": 4,
1727 | "冰": 15,
1728 | "申": 2,
1729 | "劲": 1,
1730 | "阵": 4,
1731 | "土": 26,
1732 | "人": 566,
1733 | "∽": 35,
1734 | "秀": 8,
1735 | "幅": 13,
1736 | "升": 32,
1737 | "益": 6,
1738 | "住": 45,
1739 | "Q": 737,
1740 | "字": 158,
1741 | "衡": 10,
1742 | "摩": 6,
1743 | "凰": 2,
1744 | "偿": 3,
1745 | "试": 276,
1746 | "板": 105,
1747 | "宁": 18,
1748 | "稀": 2,
1749 | "射": 215,
1750 | "成": 812,
1751 | "碗": 3,
1752 | "聘": 2,
1753 | "龙": 10,
1754 | "力": 49,
1755 | "钟": 97,
1756 | "计": 340,
1757 | "昨": 1,
1758 | "☆": 3,
1759 | "到": 1039,
1760 | "吴": 5,
1761 | "边": 1956,
1762 | "游": 75,
1763 | "贺": 3,
1764 | "<": 50,
1765 | "练": 24,
1766 | "陈": 2,
1767 | "行": 781,
1768 | "低": 75,
1769 | "摔": 2,
1770 | "策": 5,
1771 | "忙": 4,
1772 | "停": 133,
1773 | "非": 35,
1774 | "代": 145,
1775 | "汤": 1,
1776 | "晨": 4,
1777 | "硬": 29,
1778 | "置": 247,
1779 | "桨": 1,
1780 | "迹": 59,
1781 | "|": 83,
1782 | "松": 6,
1783 | "布": 45,
1784 | "如": 2274,
1785 | "历": 6,
1786 | "植": 35,
1787 | "挑": 4,
1788 | "易": 16,
1789 | "→": 171,
1790 | "鹅": 1,
1791 | "创": 12,
1792 | "次": 892,
1793 | "渣": 1,
1794 | "津": 3,
1795 | "性": 101,
1796 | "Φ": 5,
1797 | "暨": 1,
1798 | "薪": 2,
1799 | "甜": 2,
1800 | "阳": 36,
1801 | "冲": 6,
1802 | "电": 186,
1803 | "样": 207,
1804 | "舞": 3,
1805 | "髀": 1,
1806 | "遇": 62,
1807 | "鼠": 12,
1808 | "盈": 23,
1809 | "迎": 7,
1810 | "拐": 14,
1811 | "⇒": 1,
1812 | "绘": 9,
1813 | "牛": 23,
1814 | "g": 172,
1815 | "饭": 7,
1816 | "危": 6,
1817 | "突": 7,
1818 | "堂": 2,
1819 | "分": 2482,
1820 | "圈": 27,
1821 | "残": 3,
1822 | "与": 2046,
1823 | "抑": 1,
1824 | "撕": 1,
1825 | "缘": 6,
1826 | "尚": 6,
1827 | "辣": 1,
1828 | "木": 38,
1829 | "宫": 3,
1830 | "帮": 29,
1831 | "励": 7,
1832 | "伸": 11,
1833 | "λ": 16,
1834 | "显": 6,
1835 | "心": 383,
1836 | "采": 49,
1837 | "告": 12,
1838 | "棍": 1,
1839 | "捐": 24,
1840 | "染": 3,
1841 | "R": 288,
1842 | "∏": 4,
1843 | "给": 117,
1844 | "衢": 4,
1845 | "泥": 5,
1846 | "赤": 1,
1847 | "杨": 11,
1848 | "印": 16,
1849 | "石": 14,
1850 | "能": 425,
1851 | "威": 4,
1852 | "简": 36,
1853 | "嘉": 6,
1854 | "于": 2477,
1855 | "我": 118,
1856 | "农": 34,
1857 | "生": 424,
1858 | "模": 89,
1859 | "梢": 1,
1860 | "芦": 1,
1861 | "累": 5,
1862 | "爱": 11,
1863 | "现": 277,
1864 | "圳": 5,
1865 | "丹": 3,
1866 | "=": 5025,
1867 | "政": 18,
1868 | "J": 5,
1869 | "居": 29,
1870 | "钠": 3,
1871 | "从": 584,
1872 | "扑": 4,
1873 | "提": 96,
1874 | "套": 42,
1875 | "竖": 22,
1876 | "忘": 6,
1877 | "回": 114,
1878 | "橡": 3,
1879 | "佣": 2,
1880 | "慧": 9,
1881 | "拦": 2,
1882 | "台": 123,
1883 | "间": 882,
1884 | "}": 173,
1885 | "曾": 4,
1886 | "境": 9,
1887 | "异": 39,
1888 | "宅": 2,
1889 | "娄": 2,
1890 | "斑": 3,
1891 | "店": 114,
1892 | "崖": 1,
1893 | "妙": 2,
1894 | "功": 8,
1895 | "鄞": 1,
1896 | "(": 75,
1897 | "央": 3,
1898 | "露": 5,
1899 | "核": 7,
1900 | "⊿": 1,
1901 | "局": 14,
1902 | "雇": 12,
1903 | "商": 264,
1904 | "凉": 5,
1905 | "半": 542,
1906 | "乌": 8,
1907 | "扔": 2,
1908 | "塘": 4,
1909 | "淡": 2,
1910 | "池": 38,
1911 | "该": 518,
1912 | "h": 137,
1913 | "端": 90,
1914 | "饮": 22,
1915 | "价": 499,
1916 | "赶": 12,
1917 | "拿": 9,
1918 | "腾": 1,
1919 | "j": 5,
1920 | "颗": 10,
1921 | "疆": 1,
1922 | "伍": 10,
1923 | "二": 499,
1924 | "题": 545,
1925 | "定": 544,
1926 | "障": 3,
1927 | "贡": 2,
1928 | "您": 1,
1929 | "措": 2,
1930 | "由": 408,
1931 | "论": 273,
1932 | "房": 50,
1933 | "诊": 3,
1934 | "魅": 1,
1935 | "光": 83,
1936 | "烈": 1,
1937 | "及": 172,
1938 | "位": 803,
1939 | "救": 9,
1940 | "S": 403,
1941 | "捕": 1,
1942 | "拔": 8,
1943 | "坝": 10,
1944 | "漠": 1,
1945 | "衣": 23,
1946 | "步": 73,
1947 | "▪": 20,
1948 | "旦": 8,
1949 | "承": 9,
1950 | "岁": 21,
1951 | "市": 259,
1952 | "理": 382,
1953 | "炉": 2,
1954 | "刊": 1,
1955 | "撑": 2,
1956 | "投": 67,
1957 | "得": 775,
1958 | "尖": 4,
1959 | "墙": 37,
1960 | "原": 407,
1961 | "没": 62,
1962 | "α": 151,
1963 | "踢": 4,
1964 | "肩": 2,
1965 | "航": 68,
1966 | "庭": 20,
1967 | "钱": 54,
1968 | "田": 3,
1969 | "决": 84,
1970 | "防": 11,
1971 | "猴": 5,
1972 | ">": 35,
1973 | "幂": 2,
1974 | "先": 111,
1975 | "削": 2,
1976 | "仍": 39,
1977 | "每": 928,
1978 | "志": 10,
1979 | "泡": 1,
1980 | "赠": 7,
1981 | "解": 543,
1982 | "此": 306,
1983 | "⑦": 1,
1984 | "续": 61,
1985 | "彬": 1,
1986 | "确": 283,
1987 | "却": 1,
1988 | "迷": 1,
1989 | "汶": 1,
1990 | "霾": 3,
1991 | "封": 15,
1992 | "震": 9,
1993 | "选": 139,
1994 | "冈": 5,
1995 | "洋": 3,
1996 | "不": 1149,
1997 | "斐": 4,
1998 | "媚": 1,
1999 | "溢": 4,
2000 | "认": 26,
2001 | ")": 75,
2002 | "膨": 1,
2003 | "紫": 7,
2004 | "荆": 1,
2005 | "鸭": 2,
2006 | "根": 324,
2007 | "攻": 1,
2008 | "寄": 3,
2009 | "均": 254,
2010 | "测": 186,
2011 | "桌": 35,
2012 | "罗": 1,
2013 | "糖": 19,
2014 | "秘": 3,
2015 | "象": 686,
2016 | "擦": 2,
2017 | "i": 151,
2018 | "关": 794,
2019 | "盲": 3,
2020 | "牵": 3,
2021 | "∪": 4,
2022 | "征": 7,
2023 | "圆": 1087,
2024 | "玉": 7,
2025 | "事": 21,
2026 | "抓": 5,
2027 | "—": 5,
2028 | "粗": 22,
2029 | "编": 25,
2030 | "多": 786,
2031 | "喝": 2,
2032 | "澡": 1,
2033 | "渠": 6,
2034 | "财": 6,
2035 | "Ω": 2,
2036 | "∨": 5,
2037 | "劳": 6,
2038 | "甲": 490,
2039 | "况": 74,
2040 | "施": 20,
2041 | "叉": 3,
2042 | "版": 3,
2043 | "介": 2,
2044 | "拓": 11,
2045 | "联": 33,
2046 | "四": 784,
2047 | "烟": 6,
2048 | "信": 60,
2049 | "习": 46,
2050 | "赢": 5,
2051 | "僧": 2,
2052 | "⊂": 3,
2053 | "韩": 2,
2054 | "!": 1,
2055 | "繁": 4,
2056 | "预": 22,
2057 | "下": 880,
2058 | "栏": 9,
2059 | "潮": 8,
2060 | "速": 531,
2061 | "萧": 1,
2062 | "盆": 2,
2063 | "谷": 5,
2064 | "?": 18,
2065 | "卉": 1,
2066 | "之": 518,
2067 | "释": 13,
2068 | "村": 30,
2069 | "打": 87,
2070 | "孙": 1,
2071 | "⊥": 481,
2072 | "姚": 2,
2073 | "瓜": 4,
2074 | "侧": 176,
2075 | "拨": 5,
2076 | "自": 177,
2077 | "篮": 26,
2078 | "困": 2,
2079 | "闲": 2,
2080 | "兵": 6,
2081 | "郴": 1,
2082 | "开": 235,
2083 | "它": 260,
2084 | "墅": 5,
2085 | "处": 318,
2086 | "例": 307,
2087 | "《": 4,
2088 | "暑": 5,
2089 | "Γ": 5,
2090 | "钝": 11,
2091 | "睛": 4,
2092 | "熟": 1,
2093 | "传": 35,
2094 | "谢": 2,
2095 | "*": 11,
2096 | "岭": 2,
2097 | "地": 484,
2098 | "层": 29,
2099 | "陆": 6,
2100 | "午": 22,
2101 | "赌": 3,
2102 | "材": 39,
2103 | "站": 46,
2104 | "存": 535,
2105 | "铝": 2,
2106 | "镜": 25,
2107 | "满": 384,
2108 | "你": 278,
2109 | "旧": 8,
2110 | "择": 46,
2111 | "快": 50,
2112 | "聪": 14,
2113 | "积": 860,
2114 | "驮": 3,
2115 | "物": 1081,
2116 | "右": 256,
2117 | "兴": 26,
2118 | "混": 16,
2119 | "找": 45,
2120 | "币": 19,
2121 | "品": 319,
2122 | "皆": 1,
2123 | "咱": 1,
2124 | "析": 259,
2125 | "官": 1,
2126 | "喜": 5,
2127 | "办": 13,
2128 | "渡": 1,
2129 | "欣": 2,
2130 | "∩": 8,
2131 | "推": 28,
2132 | "溶": 12,
2133 | "许": 7,
2134 | "落": 152,
2135 | "○": 13,
2136 | "愿": 5,
2137 | "﹁": 3,
2138 | "海": 72,
2139 | "镇": 13,
2140 | "集": 67,
2141 | "又": 78,
2142 | "郊": 4,
2143 | "跌": 1,
2144 | "虑": 15,
2145 | "某": 559,
2146 | "U": 1,
2147 | "坛": 12,
2148 | "乡": 3,
2149 | "正": 1218,
2150 | "波": 15,
2151 | "补": 38,
2152 | "'": 17,
2153 | "扩": 5,
2154 | "即": 84,
2155 | "遵": 1,
2156 | "起": 66,
2157 | "件": 471,
2158 | "诸": 2,
2159 | "腿": 4,
2160 | "孔": 5
2161 | }
--------------------------------------------------------------------------------
/files/black.json:
--------------------------------------------------------------------------------
1 | {
2 | "white_list": [
3 | "A4758.png",
4 | "A3905.png",
5 | "T359_0.png",
6 | "T898_10.png",
7 | "T1091_4.png",
8 | "B2258_1.png",
9 | "A2114.png",
10 | "A9393.png",
11 | "T40_11.png",
12 | "T411_4.png",
13 | "A2754.png",
14 | "A14295.png",
15 | "A14349.png",
16 | "A14364.png",
17 | "A15101.png",
18 | "A15241.png",
19 | "A15933.png",
20 | "A16323.png",
21 | "A1595.png",
22 | "A16092.png",
23 | "A17063.png",
24 | "A16761.png",
25 | "A17141.png",
26 | "A16560.png",
27 | "A16953.png",
28 | "A1745.png",
29 | "A17010.png",
30 | "A16435.png",
31 | "A17281.png",
32 | "A17980.png",
33 | "A1875.png",
34 | "A18451.png",
35 | "A18969.png",
36 | "A19152.png",
37 | "A19424.png",
38 | "A17989.png",
39 | "A18139.png",
40 | "A18891.png",
41 | "A19260.png",
42 | "A18701.png",
43 | "A19584.png",
44 | "A19012.png",
45 | "A215.png",
46 | "A20801.png",
47 | "A22541.png",
48 | "A22740.png",
49 | "A22290.png",
50 | "A21194.png",
51 | "A22899.png",
52 | "A22381.png",
53 | "A19650.png",
54 | "A26050.png",
55 | "A24554.png",
56 | "A309.png",
57 | "A26100.png",
58 | "A24084.png",
59 | "A2508.png",
60 | "A25709.png",
61 | "A26620.png",
62 | "A2594.png",
63 | "A24920.png",
64 | "A27604.png",
65 | "A27933.png",
66 | "A25019.png",
67 | "A26091.png",
68 | "A2432.png",
69 | "A2493.png",
70 | "A2755.png",
71 | "A23883.png",
72 | "A3437.png",
73 | "A27102.png",
74 | "A314.png",
75 | "A25250.png",
76 | "A25612.png",
77 | "A24169.png",
78 | "A28034.png",
79 | "A3210.png",
80 | "A26760.png",
81 | "A2538.png",
82 | "A2974.png",
83 | "A24609.png",
84 | "A20.png",
85 | "A24062.png",
86 | "A3223.png",
87 | "A24311.png",
88 | "A26699.png",
89 | "A23992.png",
90 | "A26012.png",
91 | "A2175.png",
92 | "A24603.png",
93 | "A27172.png",
94 | "A2004.png",
95 | "A20100.png",
96 | "A3808.png",
97 | "A4943.png",
98 | "A441.png",
99 | "A7366.png",
100 | "A7242.png",
101 | "A7697.png",
102 | "A5932.png",
103 | "A5298.png",
104 | "A5823.png",
105 | "A3937.png",
106 | "A6152.png",
107 | "A4141.png",
108 | "A4141.png",
109 | "A7360.png",
110 | "A5923.png",
111 | "A3945.png",
112 | "A6821.png",
113 | "A3852.png",
114 | "A6252.png",
115 | "A4188.png",
116 | "A3681.png",
117 | "A4947.png",
118 | "A6011.png",
119 | "A5304.png",
120 | "A5304.png",
121 | "A4979.png",
122 | "A7861.png",
123 | "A5400.png",
124 | "A4611.png",
125 | "A4883.png",
126 | "A368.png",
127 | "A4347.png",
128 | "A7100.png",
129 | "A4956.png",
130 | "A7133.png",
131 | "A6237.png",
132 | "A2923.png",
133 | "A3235.png",
134 | "A26992.png",
135 | "A4642.png",
136 | "A27272.png",
137 | "A5920.png",
138 | "A21381.png",
139 | "A6608.png",
140 | "A4419.png",
141 | "A23613.png",
142 | "A27283.png",
143 | "A24464.png",
144 | "A25601.png",
145 | "A7814.png",
146 | "A24509.png",
147 | "A24304.png",
148 | "A6120.png",
149 | "A3495.png",
150 | "A3949.png",
151 | "A24379.png",
152 | "A20111.png",
153 | "A22962.png",
154 | "A8877.png",
155 | "A8936.png",
156 | "A9761.png",
157 | "A8741.png",
158 | "A9064.png",
159 | "A8370.png",
160 | "A9829.png",
161 | "A9018.png",
162 | "B1879_1.png",
163 | "A9354.png",
164 | "A8201.png",
165 | "B1813_3.png",
166 | "A8350.png",
167 | "A8353.png",
168 | "A9446.png",
169 | "B1879_0.png",
170 | "A8674.png",
171 | "A9219.png",
172 | "B2404_2.png",
173 | "T1175_12.png",
174 | "B935_8.png",
175 | "T1140_5.png",
176 | "B523_1.png",
177 | "B523_5.png",
178 | "T1175_15.png",
179 | "B523_0.png",
180 | "T1058_17.png",
181 | "T1089_3.png",
182 | "B2741_0.png",
183 | "T1036_1.png",
184 | "T1184_4.png",
185 | "T129_18.png",
186 | "T134_7.png",
187 | "T142_4.png",
188 | "T144_4.png",
189 | "T169_10.png",
190 | "T169_6.png",
191 | "T174_0.png",
192 | "T200_0.png",
193 | "T20_8.png",
194 | "T217_6.png",
195 | "T217_8.png",
196 | "T230_6.png",
197 | "T235_12.png",
198 | "T23_4.png",
199 | "T244_5.png",
200 | "T247_8.png",
201 | "T261_8.png",
202 | "T270_12.png",
203 | "T294_4.png",
204 | "T300_12.png",
205 | "T302_1.png",
206 | "T311_11.png",
207 | "T321_11.png",
208 | "T321_3.png",
209 | "T321_9.png",
210 | "T324_2.png",
211 | "T327_5.png",
212 | "T328_7.png",
213 | "T329_1.png",
214 | "T331_0.png",
215 | "T340_1.png",
216 | "T352_4.png",
217 | "T352_8.png",
218 | "T356_5.png",
219 | "T358_7.png",
220 | "T359_6.png",
221 | "T366_12.png",
222 | "T372_4.png",
223 | "T374_5.png",
224 | "T374_6.png",
225 | "T381_2.png",
226 | "T381_4.png",
227 | "T381_6.png",
228 | "T382_3.png",
229 | "T387_1.png",
230 | "T389_14.png",
231 | "T38_1.png",
232 | "T38_4.png",
233 | "T396_6.png",
234 | "T3_8.png",
235 | "T403_9.png",
236 | "T409_0.png",
237 | "T40_14.png",
238 | "T40_15.png",
239 | "T40_2.png",
240 | "T40_6.png",
241 | "T411_0.png",
242 | "T411_5.png",
243 | "T41_1.png",
244 | "T50_16.png",
245 | "T50_19.png",
246 | "T53_13.png",
247 | "T53_9.png",
248 | "T580_4.png",
249 | "T580_5.png",
250 | "T582_3.png",
251 | "T582_5.png",
252 | "T583_4.png",
253 | "T586_1.png",
254 | "T58_0.png",
255 | "T58_3.png",
256 | "T58_7.png",
257 | "T597_0.png",
258 | "T602_7.png",
259 | "T602_9.png",
260 | "T607_0.png",
261 | "T619_12.png",
262 | "T619_6.png",
263 | "T619_7.png",
264 | "T636_0.png",
265 | "T636_4.png",
266 | "T642_1.png",
267 | "T647_18.png",
268 | "T647_2.png",
269 | "T647_20.png",
270 | "T64_0.png",
271 | "T64_1.png",
272 | "T658_11.png",
273 | "T658_6.png",
274 | "T663_11.png",
275 | "T66_3.png",
276 | "T677_6.png",
277 | "T693_0.png",
278 | "T693_9.png",
279 | "T695_0.png",
280 | "T710_14.png",
281 | "T711_5.png",
282 | "T712_4.png",
283 | "T71_0.png",
284 | "T71_1.png",
285 | "T71_2.png",
286 | "T71_3.png",
287 | "T71_4.png",
288 | "T71_5.png",
289 | "T71_6.png",
290 | "T71_7.png",
291 | "T71_8.png",
292 | "T71_9.png",
293 | "T724_10.png",
294 | "T725_11.png",
295 | "T726_1.png",
296 | "T734_2.png",
297 | "T736_15.png",
298 | "T736_2.png",
299 | "T736_5.png",
300 | "T740_2.png",
301 | "T745_1.png",
302 | "T756_7.png",
303 | "T757_4.png",
304 | "T762_0.png",
305 | "T767_1.png",
306 | "T767_6.png",
307 | "T770_0.png",
308 | "T770_10.png",
309 | "T770_11.png",
310 | "T770_6.png",
311 | "T772_11.png",
312 | "T775_9.png",
313 | "T77_1.png",
314 | "T795_1.png",
315 | "T795_11.png",
316 | "T7_7.png",
317 | "T7_9.png",
318 | "T803_6.png",
319 | "T803_7.png",
320 | "T810_5.png",
321 | "T810_6.png",
322 | "T810_7.png",
323 | "T813_5.png",
324 | "T823_4.png",
325 | "T823_5.png",
326 | "T840_6.png",
327 | "T844_9.png",
328 | "T848_1.png",
329 | "T855_2.png",
330 | "T856_18.png",
331 | "T856_2.png",
332 | "T856_4.png",
333 | "T865_6.png",
334 | "T86_1.png",
335 | "T86_6.png",
336 | "T879_6.png",
337 | "T884_4.png",
338 | "T886_1.png",
339 | "T898_8.png",
340 | "T913_14.png",
341 | "T915_4.png",
342 | "T919_1.png",
343 | "T932_3.png",
344 | "T945_12.png",
345 | "T945_13.png",
346 | "T945_15.png",
347 | "T945_16.png",
348 | "T945_17.png",
349 | "T945_18.png",
350 | "T945_8.png",
351 | "T963_1.png",
352 | "T96_5.png",
353 | "T96_6.png",
354 | "T972_6.png",
355 | "T979_13.png",
356 | "T994_4.png",
357 | "T997_10.png",
358 | "T999_7.png",
359 | "T106_1.png",
360 | "T188_11.png",
361 | "T763_1.png",
362 | "T763_2.png",
363 | "T865_0.png",
364 | "T876_9.png",
365 | "T999_3.png",
366 | "A1007.png",
367 | "A1264.png",
368 | "A14912.png",
369 | "A15901.png",
370 | "A17682.png",
371 | "A20064.png",
372 | "A24631.png",
373 | "A2751.png",
374 | "A4189.png",
375 | "A9707.png",
376 | "B2436_1.png",
377 | "B2861_1.png",
378 | "T1027_2.png",
379 | "T151_8.png",
380 | "T165_6.png",
381 | "T207_12.png",
382 | "T217_1.png",
383 | "T217_3.png",
384 | "T261_1.png",
385 | "T261_2.png",
386 | "T311_7.png",
387 | "T320_5.png",
388 | "T325_1.png",
389 | "T329_8.png",
390 | "T333_6.png",
391 | "T342_5.png",
392 | "T350_10.png",
393 | "T350_2.png",
394 | "T387_2.png",
395 | "T387_4.png",
396 | "T389_10.png",
397 | "T3_7.png",
398 | "T4_7.png",
399 | "T58_8.png",
400 | "T597_9.png",
401 | "T59_6.png",
402 | "T5_0.png",
403 | "T5_1.png",
404 | "T619_13.png",
405 | "T624_2.png",
406 | "T636_1.png",
407 | "T647_21.png",
408 | "T658_13.png",
409 | "T663_3.png",
410 | "T667_2.png",
411 | "T684_5.png",
412 | "T684_8.png",
413 | "T687_2.png",
414 | "T6_11.png",
415 | "T731_5.png",
416 | "T735_1.png",
417 | "T756_12.png",
418 | "T757_3.png",
419 | "T795_13.png",
420 | "T838_9.png",
421 | "T856_19.png",
422 | "T857_0.png",
423 | "T86_7.png",
424 | "T886_0.png",
425 | "T898_9.png",
426 | "T933_5.png",
427 | "T997_7.png",
428 | "T374_0.png",
429 | "A24374.png",
430 | "T799_1.png",
431 | "T398_7.png",
432 | "T949_8.png",
433 | "T789_1.png",
434 | "T1009_1.png",
435 | "T4_5.png",
436 | "T816_0.png",
437 | "T159_6.png",
438 | "A14243.png",
439 | "A1610.png",
440 | "A6442.png",
441 | "A3354.png",
442 | "T690_4.png",
443 | "A4609.png",
444 | "T1077_7.png",
445 | "A5355.png",
446 | "T73_3.png",
447 | "A3692.png",
448 | "A9506.png",
449 | "A14812.png",
450 | "T936_8.png",
451 | "T1071_5.png",
452 | "T216_7.png",
453 | "T1105_2.png",
454 | "T261_3.png",
455 | "A21449.png",
456 | "T124_13.png",
457 | "T583_6.png",
458 | "T942_7.png",
459 | "B1442_9.png",
460 | "B968_3.png",
461 | "T401_6.png",
462 | "T230_10.png",
463 | "A2143.png",
464 | "A2143.png",
465 | "A9643.png",
466 | "T587_1.png",
467 | "A24620.png",
468 | "T934_0.png",
469 | "A2433.png",
470 | "T881_5.png",
471 | "T931_24.png",
472 | "B858_2.png",
473 | "T1009_0.png",
474 | "T270_14.png",
475 | "T181_18.png",
476 | "T1071_6.png",
477 | "A4674.png",
478 | "A16263.png",
479 | "A6368.png",
480 | "T1134_7.png",
481 | "A7325.png",
482 | "T174_5.png",
483 | "B685_0.png",
484 | "T285_2.png",
485 | "A20784.png",
486 | "A19004.png",
487 | "A2612.png",
488 | "T374_8.png",
489 | "B2681_2.png",
490 | "A26479.png",
491 | "B1958_0.png",
492 | "T312_1.png",
493 | "A1268.png",
494 | "A798.png",
495 | "A7143.png",
496 | "B121_0.png",
497 | "A20795.png",
498 | "A21802.png",
499 | "A2295.png",
500 | "A4076.png",
501 | "A3121.png",
502 | "A27044.png",
503 | "T684_6.png",
504 | "A6189.png",
505 | "T723_3.png",
506 | "T218_9.png",
507 | "T279_5.png",
508 | "A4335.png",
509 | "T634_7.png",
510 | "T870_2.png",
511 | "A4889.png"
512 | ],
513 | "black_list": [
514 | "A14430.png",
515 | "A1315.png",
516 | "A1573.png",
517 | "A16342.png",
518 | "A18403.png",
519 | "A18610.png",
520 | "A19289.png",
521 | "A1945.png",
522 | "A19462.png",
523 | "A19233.png",
524 | "A23543.png",
525 | "A22742.png",
526 | "A22689.png",
527 | "A20253.png",
528 | "A19845.png",
529 | "A20654.png",
530 | "A475.png",
531 | "B1339_5.png",
532 | "B1462_3.png",
533 | "B1339_4.png",
534 | "B1014_0.png",
535 | "B1610_7.png",
536 | "B1864_0.png",
537 | "B1864_1.png",
538 | "B1141_13.png",
539 | "B1884_0.png",
540 | "B1141_3.png",
541 | "B1721_0.png",
542 | "B1252_1.png",
543 | "B1877_3.png",
544 | "B1801_3.png",
545 | "B1422_1.png",
546 | "B1387_2.png",
547 | "B1339_3.png",
548 | "B1007_0.png",
549 | "B1131_0.png",
550 | "B1252_0.png",
551 | "B1141_2.png",
552 | "B1141_14.png",
553 | "B1652_1.png",
554 | "B1422_3.png",
555 | "B1141_8.png",
556 | "B1652_2.png",
557 | "B1052_2.png",
558 | "B1141_4.png",
559 | "B1141_6.png",
560 | "B1422_5.png",
561 | "B1339_6.png",
562 | "B1462_2.png",
563 | "B1410_0.png",
564 | "B1422_4.png",
565 | "B1339_7.png",
566 | "B1864_3.png",
567 | "B1387_1.png",
568 | "B1864_4.png",
569 | "B1864_2.png",
570 | "B1339_2.png",
571 | "B1801_2.png",
572 | "B1877_2.png",
573 | "B1052_1.png",
574 | "B1462_1.png",
575 | "B1877_1.png",
576 | "B1387_0.png",
577 | "B1387_3.png",
578 | "B1566_0.png",
579 | "B1141_12.png",
580 | "B2756_5.png",
581 | "B245_0.png",
582 | "B2530_5.png",
583 | "B2999_3.png",
584 | "B227_1.png",
585 | "B2411_0.png",
586 | "B364_0.png",
587 | "B2530_8.png",
588 | "B552_0.png",
589 | "B535_4.png",
590 | "B2756_7.png",
591 | "B2999_6.png",
592 | "B631_0.png",
593 | "B245_3.png",
594 | "B2043_0.png",
595 | "B2513_0.png",
596 | "B2557_4.png",
597 | "B3090_4.png",
598 | "B2982_16.png",
599 | "B2267_0.png",
600 | "B2092_3.png",
601 | "B364_1.png",
602 | "B2999_5.png",
603 | "T103_5.png",
604 | "B759_4.png",
605 | "T1070_1.png",
606 | "T1095_4.png",
607 | "B866_10.png",
608 | "B762_4.png",
609 | "T1135_6.png",
610 | "T1108_9.png",
611 | "T1056_4.png",
612 | "T1061_2.png",
613 | "T105_9.png",
614 | "T1135_7.png",
615 | "T1174_7.png",
616 | "B866_6.png",
617 | "T1070_4.png",
618 | "B866_1.png",
619 | "B866_4.png",
620 | "B2999_2.png",
621 | "T107_3.png",
622 | "B227_0.png",
623 | "B866_0.png",
624 | "B2412_0.png",
625 | "B2982_6.png",
626 | "B2999_11.png",
627 | "B2993_6.png",
628 | "T1070_2.png",
629 | "T1070_12.png",
630 | "B245_1.png",
631 | "B3007_0.png",
632 | "B2267_2.png",
633 | "T1112_10.png",
634 | "B535_3.png",
635 | "B552_1.png",
636 | "B2092_4.png",
637 | "B245_2.png",
638 | "T1070_3.png",
639 | "B421_0.png",
640 | "B2557_6.png",
641 | "T1070_0.png",
642 | "B2530_3.png",
643 | "B2092_2.png",
644 | "B762_5.png",
645 | "B2557_5.png",
646 | "B2511_2.png",
647 | "B759_3.png",
648 | "B2982_14.png",
649 | "T1135_4.png",
650 | "B2530_6.png",
651 | "B227_2.png",
652 | "B2215_0.png",
653 | "B2530_9.png",
654 | "B2982_7.png",
655 | "T1070_8.png",
656 | "B2176_0.png",
657 | "B759_2.png",
658 | "B2999_0.png",
659 | "B762_6.png",
660 | "B2982_5.png",
661 | "T1052_4.png",
662 | "B2412_2.png",
663 | "B634_0.png",
664 | "B552_2.png",
665 | "B2999_10.png",
666 | "B762_0.png",
667 | "B2982_4.png",
668 | "B2999_4.png",
669 | "B2452_0.png",
670 | "B866_3.png",
671 | "B2567_0.png",
672 | "B2703_2.png",
673 | "B364_2.png",
674 | "B2557_7.png",
675 | "T1155_3.png",
676 | "B2252_0.png",
677 | "B2999_1.png",
678 | "T1046_3.png",
679 | "T1135_0.png",
680 | "B2530_14.png",
681 | "B227_3.png",
682 | "B552_3.png",
683 | "B866_2.png",
684 | "B2557_3.png",
685 | "B216_0.png",
686 | "B2412_1.png",
687 | "B2530_13.png",
688 | "B2649_0.png",
689 | "B2748_0.png",
690 | "B2748_1.png",
691 | "B2756_4.png",
692 | "B2982_2.png",
693 | "B2982_3.png",
694 | "B2993_1.png",
695 | "B2993_5.png",
696 | "B3084_0.png",
697 | "B3090_0.png",
698 | "B535_8.png",
699 | "B699_0.png",
700 | "B803_0.png",
701 | "T1071_1.png",
702 | "T1112_9.png",
703 | "T1135_8.png",
704 | "T117_2.png",
705 | "T121_2.png",
706 | "T127_4.png",
707 | "T129_1.png",
708 | "T129_13.png",
709 | "T150_8.png",
710 | "T169_12.png",
711 | "T182_12.png",
712 | "T183_5.png",
713 | "T188_6.png",
714 | "T19_2.png",
715 | "T206_13.png",
716 | "T206_8.png",
717 | "T216_9.png",
718 | "T230_7.png",
719 | "T233_11.png",
720 | "T258_3.png",
721 | "T260_3.png",
722 | "T288_0.png",
723 | "T288_11.png",
724 | "T288_9.png",
725 | "T294_9.png",
726 | "T300_11.png",
727 | "T319_8.png",
728 | "T32_0.png",
729 | "T32_1.png",
730 | "T32_2.png",
731 | "T32_4.png",
732 | "T32_5.png",
733 | "T32_6.png",
734 | "T32_9.png",
735 | "T330_8.png",
736 | "T340_10.png",
737 | "T344_3.png",
738 | "T357_5.png",
739 | "T377_3.png",
740 | "T389_6.png",
741 | "T38_2.png",
742 | "T390_8.png",
743 | "T398_9.png",
744 | "T3_0.png",
745 | "T3_1.png",
746 | "T3_6.png",
747 | "T4_4.png",
748 | "T50_7.png",
749 | "T53_11.png",
750 | "T53_14.png",
751 | "T593_5.png",
752 | "T597_8.png",
753 | "T59_7.png",
754 | "T608_4.png",
755 | "T648_4.png",
756 | "T659_7.png",
757 | "T668_4.png",
758 | "T668_5.png",
759 | "T668_6.png",
760 | "T668_7.png",
761 | "T668_8.png",
762 | "T688_8.png",
763 | "T691_4.png",
764 | "T701_11.png",
765 | "T710_7.png",
766 | "T747_0.png",
767 | "T747_1.png",
768 | "T747_10.png",
769 | "T749_1.png",
770 | "T74_1.png",
771 | "T74_7.png",
772 | "T760_6.png",
773 | "T765_1.png",
774 | "T767_9.png",
775 | "T781_1.png",
776 | "T781_11.png",
777 | "T781_12.png",
778 | "T781_13.png",
779 | "T781_14.png",
780 | "T781_15.png",
781 | "T781_16.png",
782 | "T781_17.png",
783 | "T781_2.png",
784 | "T781_21.png",
785 | "T781_3.png",
786 | "T781_4.png",
787 | "T781_5.png",
788 | "T781_6.png",
789 | "T781_7.png",
790 | "T792_1.png",
791 | "T792_2.png",
792 | "T792_3.png",
793 | "T792_4.png",
794 | "T792_6.png",
795 | "T792_7.png",
796 | "T794_3.png",
797 | "T803_8.png",
798 | "T813_4.png",
799 | "T817_11.png",
800 | "T817_13.png",
801 | "T826_1.png",
802 | "T84_0.png",
803 | "T84_10.png",
804 | "T84_12.png",
805 | "T84_13.png",
806 | "T84_14.png",
807 | "T84_15.png",
808 | "T84_4.png",
809 | "T84_5.png",
810 | "T85_13.png",
811 | "T865_5.png",
812 | "T86_2.png",
813 | "T881_8.png",
814 | "T886_11.png",
815 | "T914_11.png",
816 | "T931_12.png",
817 | "T931_13.png",
818 | "T949_3.png",
819 | "T96_9.png",
820 | "T997_5.png",
821 | "T997_9.png",
822 | "B1052_0.png",
823 | "B1141_15.png",
824 | "B1141_5.png",
825 | "B1141_7.png",
826 | "B1422_2.png",
827 | "B1652_0.png",
828 | "B2166_0.png",
829 | "B2267_1.png",
830 | "B2530_15.png",
831 | "B2530_16.png",
832 | "B2530_17.png",
833 | "B2530_4.png",
834 | "B2530_7.png",
835 | "B2649_1.png",
836 | "B2756_3.png",
837 | "B2756_6.png",
838 | "B2865_0.png",
839 | "B2865_1.png",
840 | "B2982_12.png",
841 | "B2982_13.png",
842 | "B2982_15.png",
843 | "B2982_8.png",
844 | "B2993_0.png",
845 | "B3064_0.png",
846 | "B3126_0.png",
847 | "B421_1.png",
848 | "B535_0.png",
849 | "B535_1.png",
850 | "B535_2.png",
851 | "B866_5.png",
852 | "T1031_2.png",
853 | "T106_5.png",
854 | "T1070_10.png",
855 | "T1070_11.png",
856 | "T1070_9.png",
857 | "T1135_5.png",
858 | "T1164_6.png",
859 | "T143_2.png",
860 | "T15_4.png",
861 | "T221_4.png",
862 | "T279_1.png",
863 | "T288_1.png",
864 | "T288_10.png",
865 | "T288_12.png",
866 | "T288_13.png",
867 | "T288_14.png",
868 | "T288_15.png",
869 | "T288_16.png",
870 | "T288_17.png",
871 | "T288_2.png",
872 | "T288_3.png",
873 | "T288_4.png",
874 | "T288_5.png",
875 | "T288_6.png",
876 | "T288_7.png",
877 | "T288_8.png",
878 | "T321_10.png",
879 | "T321_6.png",
880 | "T32_3.png",
881 | "T32_7.png",
882 | "T32_8.png",
883 | "T352_6.png",
884 | "T354_4.png",
885 | "T41_2.png",
886 | "T53_0.png",
887 | "T668_2.png",
888 | "T668_3.png",
889 | "T66_1.png",
890 | "T747_11.png",
891 | "T747_5.png",
892 | "T747_6.png",
893 | "T747_7.png",
894 | "T747_8.png",
895 | "T747_9.png",
896 | "T773_0.png",
897 | "T792_5.png",
898 | "T83_2.png",
899 | "T84_1.png",
900 | "T84_11.png",
901 | "T84_2.png",
902 | "T84_3.png",
903 | "T84_6.png",
904 | "T84_7.png",
905 | "T84_8.png",
906 | "T84_9.png",
907 | "T95_2.png",
908 | "T302_3.png",
909 | "T404_3.png",
910 | "T714_3.png",
911 | "T723_4.png",
912 | "T931_9.png",
913 | "T825_1.png",
914 | "T285_3.png",
915 | "T835_23.png",
916 | "T295_3.png",
917 | "T302_10.png",
918 | "B2069_1.png",
919 | "T76_5.png",
920 | "T1172_8.png",
921 | "T1134_17.png",
922 | "T295_4.png",
923 | "A9514.png",
924 | "T765_9.png",
925 | "T1020_0.png",
926 | "T933_8.png",
927 | "T329_0.png",
928 | "T346_4.png",
929 | "T820_0.png",
930 | "T761_1.png",
931 | "T783_0.png",
932 | "T917_0.png",
933 | "T238_8.png",
934 | "T216_17.png",
935 | "T931_0.png",
936 | "T374_9.png",
937 | "T1026_13.png",
938 | "T183_7.png",
939 | "T769_2.png",
940 | "T176_1.png",
941 | "T945_22.png",
942 | "T247_0.png",
943 | "T981_3.png",
944 | "T322_4.png",
945 | "T137_0.png",
946 | "T35_3.png",
947 | "T355_4.png",
948 | "T877_0.png",
949 | "T776_0.png",
950 | "T388_8.png",
951 | "T207_16.png",
952 | "T196_8.png",
953 | "T929_10.png",
954 | "T1076_10.png",
955 | "T229_0.png",
956 | "T927_5.png",
957 | "T760_0.png",
958 | "T18_0.png",
959 | "T928_9.png",
960 | "T230_12.png",
961 | "T807_5.png",
962 | "T129_0.png",
963 | "T164_9.png",
964 | "T240_0.png",
965 | "T354_3.png",
966 | "T912_0.png",
967 | "T366_6.png",
968 | "T231_5.png",
969 | "T179_0.png",
970 | "T82_2.png",
971 | "T191_7.png",
972 | "T243_1.png",
973 | "T207_9.png",
974 | "T1007_4.png",
975 | "A25503.png",
976 | "T834_2.png",
977 | "T371_0.png",
978 | "T770_8.png",
979 | "T349_3.png",
980 | "T995_9.png",
981 | "T1016_2.png",
982 | "T227_8.png",
983 | "T771_3.png",
984 | "T1034_5.png",
985 | "T755_2.png",
986 | "A24951.png",
987 | "T207_14.png",
988 | "T935_7.png",
989 | "A8455.png",
990 | "T684_9.png",
991 | "T989_5.png",
992 | "A6750.png",
993 | "A631.png",
994 | "A8794.png",
995 | "A25351.png",
996 | "A7508.png",
997 | "A2718.png",
998 | "A26059.png",
999 | "T1134_4.png",
1000 | "A254.png",
1001 | "A9840.png",
1002 | "A511.png",
1003 | "A20920.png",
1004 | "A15739.png",
1005 | "A874.png",
1006 | "A21434.png",
1007 | "A22940.png",
1008 | "A26491.png",
1009 | "A5271.png",
1010 | "A19883.png",
1011 | "A7578.png",
1012 | "T124_8.png",
1013 | "T919_2.png",
1014 | "A25805.png",
1015 | "A5626.png",
1016 | "A25754.png",
1017 | "A8109.png",
1018 | "A20859.png",
1019 | "A5111.png",
1020 | "A5019.png",
1021 | "A26234.png",
1022 | "A20231.png",
1023 | "A26382.png",
1024 | "A5864.png",
1025 | "A22103.png",
1026 | "A26384.png",
1027 | "A3577.png",
1028 | "T684_10.png",
1029 | "A22322.png",
1030 | "A3374.png",
1031 | "T1037_4.png",
1032 | "A4999.png",
1033 | "A5769.png",
1034 | "A27040.png",
1035 | "T234_7.png",
1036 | "T725_10.png",
1037 | "T302_15.png",
1038 | "T688_9.png",
1039 | "T6_6.png",
1040 | "A296.png",
1041 | "A21322.png",
1042 | "T251_4.png",
1043 | "A8899.png"
1044 | ]
1045 | }
1046 |
--------------------------------------------------------------------------------
/files/src/A81.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/files/src/A81.png
--------------------------------------------------------------------------------
/files/src/B1000_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/files/src/B1000_0.png
--------------------------------------------------------------------------------
/files/ttf/simsun.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yinchangchang/ocr_densenet/a31f57e006f73b52b3881fd4a771320f02df2147/files/ttf/simsun.ttf
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | Pillow
2 | fuzzywuzzy
3 | numpy==1.14.2
4 | tqdm==4.19.4
5 | scikit-image==0.13.0
6 | scikit-learn==0.19.1
7 | torchvision==0.2.0
8 | scipy==0.19.0
9 | matplotlib==2.0.2
10 |
--------------------------------------------------------------------------------