├── .gitignore ├── README.md ├── bin ├── deep_ocr_id_card_reco ├── deep_ocr_id_card_segmentation ├── deep_ocr_make_caffe_dataset └── deep_ocr_reco_captcha ├── chinese_fonts ├── DroidSansFallbackFull.ttf ├── NotoSansHans-Black.otf ├── NotoSansHans-Bold.otf ├── NotoSansHans-DemiLight.otf ├── NotoSansHans-Light.otf ├── NotoSansHans-Medium.otf ├── NotoSansHans-Regular.otf ├── NotoSansHans-Thin-Windows.otf ├── fangzheng_fangsong.ttf ├── fangzheng_heiti.TTF ├── fangzheng_jieti.TTF ├── fangzheng_shusong.ttf └── mingliu.ttc ├── deep_idocr.py ├── deploy_lenet_train_test.prototxt ├── id_num_fonts ├── OCR-B.ttf ├── huawenxihei.ttf ├── msyh.ttf ├── simhei.ttf └── simkai.ttf ├── lenet_solver.prototxt ├── lenet_train_test.prototxt ├── lenet_train_test.prototxt.bak ├── python ├── .gitignore ├── deep_ocr │ ├── __init__.py │ ├── caffe_clf.py │ ├── captcha │ │ ├── __init__.py │ │ ├── char_segmentation.py │ │ ├── rm_noise.py │ │ └── search_best_segmentation.py │ ├── cv2_img_proc.py │ ├── id_cards │ │ ├── __init__.py │ │ ├── char_set.py │ │ └── segmentation.py │ ├── lang_aux.py │ ├── langs │ │ ├── __init__.py │ │ ├── chi_sim.py │ │ ├── chi_tra.py │ │ ├── digits.py │ │ ├── eng.py │ │ ├── id_num.py │ │ ├── lower_eng.py │ │ ├── test.py │ │ └── upper_eng.py │ ├── reco_text_line.py │ └── utils.py ├── deep_ocr_id_card_reco ├── deep_ocr_id_card_reco.py ├── deep_ocr_make_caffe_dataset ├── deep_ocr_reco_captcha ├── get_dataset.sh ├── make_caffe_dataset.py ├── reco_chars.py ├── test_id_card_reco.py ├── test_model.py └── test_reco.py ├── reco_chars.py └── solver.prototxt /.gitignore: -------------------------------------------------------------------------------- 1 | ./.idea 2 | python/.idea 3 | *.pyc 4 | .idea/* 5 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deepLearning_OCR 2 | Deep Learning on 身份证识别     3 | ## Get dataset 4 | >deep_ocr_make_caffe_dataset --out_caffe_dir /workspace/caffe_dataset_lower_eng \ 5 | --font_dir /opt/deep_ocr/data/fonts/chinese_fonts \ 6 | --width 28 --height 28 --margin 4 --langs lower_eng 7 | ## trained model 8 | link: https://pan.baidu.com/s/1YCGVZENzlubH6G1mXEElOw 9 | 提取码:baoo 10 | ## refer 11 | For details, please refer to http://www.cnblogs.com/ygh1229/p/7224940.html 12 | Some work reference http://chongdata.com/articles/?p=5 13 | If it involves infringement, please tell the author. 14 | 15 | ## new Method 16 | - CTPN: https://github.com/tianzhi0549/CTPN 17 | -------------------------------------------------------------------------------- /bin/deep_ocr_id_card_reco: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | import argparse 7 | from argparse import RawTextHelpFormatter 8 | import os 9 | import shutil 10 | import cv2 11 | from deep_ocr.caffe_clf import CaffeClsBuilder 12 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 13 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 14 | from deep_ocr.id_cards.segmentation import Segmentation 15 | from deep_ocr.id_cards.char_set import CharSet 16 | from deep_ocr.reco_text_line import RecoTextLine 17 | from deep_ocr.reco_text_line import RectImageClassifier 18 | 19 | 20 | 21 | if __name__ == "__main__": 22 | 23 | description = ''' 24 | # Docker config 25 | CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model 26 | DEEP_OCR_ROOT=/opt/deep_ocr 27 | WORKSPACE=/workspace 28 | 29 | # PC 30 | CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model 31 | DEEP_OCR_ROOT=/root/workspace/deep_ocr 32 | WORKSPACE=/root/data/deep_ocr_workspace 33 | 34 | deep_ocr_id_card_reco --img $DEEP_OCR_ROOT/data/id_card_img.jpg \ 35 | --debug_path /tmp/debug \ 36 | --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \ 37 | --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 38 | 39 | deep_ocr_id_card_reco --img ~/data/id_card_front \ 40 | --debug_path /tmp/debug \ 41 | --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \ 42 | --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 43 | ''' 44 | 45 | parser = argparse.ArgumentParser( 46 | description=description, formatter_class=RawTextHelpFormatter) 47 | parser.add_argument('--img', dest='img', 48 | default=None, required=True, 49 | help='id card image to reco') 50 | parser.add_argument('--debug_path', dest='debug_path', 51 | default=None, required=False, 52 | help='debug path') 53 | parser.add_argument('--cls_sim', dest='cls_sim', 54 | default=None, required=False, 55 | help='cls sim') 56 | parser.add_argument('--cls_ua', dest='cls_ua', 57 | default=None, required=False, 58 | help='cls ua') 59 | 60 | options = parser.parse_args() 61 | path_img = os.path.expanduser(options.img) 62 | debug_path = os.path.expanduser(options.debug_path) 63 | if debug_path is not None: 64 | if os.path.isdir(debug_path): 65 | shutil.rmtree(debug_path) 66 | os.makedirs(debug_path) 67 | 68 | cls_dir_sim = os.path.expanduser(options.cls_sim) 69 | cls_dir_ua = os.path.expanduser(options.cls_ua) 70 | 71 | caffe_cls_builder = CaffeClsBuilder() 72 | cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,) 73 | cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,) 74 | caffe_classifiers = {"sim": cls_sim, "ua": cls_ua} 75 | 76 | seg_norm_width = 600 77 | seg_norm_height = 600 78 | preprocess_resize = PreprocessResizeKeepRatio( 79 | seg_norm_width, seg_norm_height) 80 | id_card_img = cv2.imread(path_img) 81 | id_card_img = preprocess_resize.do(id_card_img) 82 | segmentation = Segmentation(debug_path) 83 | key_to_segmentation = segmentation.do(id_card_img) 84 | 85 | boundaries = [ 86 | ((0, 0, 0), (100, 100, 100)), 87 | ] 88 | boundary2binimgs = [] 89 | for boundary in boundaries: 90 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 91 | id_card_img_mask = preprocess_bg_mask.do(id_card_img) 92 | boundary2binimgs.append((boundary, id_card_img_mask)) 93 | 94 | char_set = CharSet() 95 | char_set_data = char_set.get() 96 | 97 | rect_img_clf = RectImageClassifier( 98 | None, 99 | None, 100 | char_set, 101 | caffe_cls_width=64, 102 | caffe_cls_height=64) 103 | 104 | reco_text_line = RecoTextLine(rect_img_clf) 105 | 106 | key_ocr_res = {} 107 | for key in key_to_segmentation: 108 | key_ocr_res[key] = [] 109 | print("="*64) 110 | print(key) 111 | for i, segment in enumerate(key_to_segmentation[key]): 112 | if debug_path is not None: 113 | line_debug_path = "key_%s_%i" % (key, i) 114 | line_debug_path = os.path.join(debug_path, line_debug_path) 115 | reco_text_line.debug_path = line_debug_path 116 | reco_text_line.char_set = char_set_data[key] 117 | caffe_cls = caffe_classifiers[ 118 | char_set_data[key]["caffe_cls"]] 119 | ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls) 120 | key_ocr_res[key].append(ocr_res) 121 | print("ocr res:") 122 | for key in key_ocr_res: 123 | print("="*60) 124 | print(key) 125 | for res_i in key_ocr_res[key]: 126 | print(res_i.encode("utf-8")) 127 | 128 | if debug_path is not None: 129 | path_debug_image_mask = os.path.join( 130 | debug_path, "reco_debug_01_image_mask.jpg") 131 | cv2.imwrite(path_debug_image_mask, id_card_img_mask) 132 | -------------------------------------------------------------------------------- /bin/deep_ocr_id_card_segmentation: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | 7 | import argparse 8 | from argparse import RawTextHelpFormatter 9 | import os 10 | import shutil 11 | import cv2 12 | import numpy as np 13 | 14 | from deep_ocr.utils import extract_peek_ranges_from_array 15 | from deep_ocr.utils import median_split_ranges 16 | from deep_ocr.utils import merge_chars_into_line_segments 17 | 18 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 19 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 20 | 21 | from deep_ocr.id_cards.segmentation import Segmentation 22 | 23 | 24 | 25 | if __name__ == "__main__": 26 | 27 | description = ''' 28 | # Docker config 29 | CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model 30 | DEEP_OCR_ROOT=/opt/deep_ocr 31 | 32 | # PC 33 | CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model 34 | DEEP_OCR_ROOT=/root/workspace/deep_ocr 35 | 36 | deep_ocr_id_card_segmentation --img $DEEP_OCR_ROOT/data/id_card_img.jpg \ 37 | --debug_path /tmp/debug 38 | 39 | deep_ocr_id_card_segmentation --img ~/data/id_card_front \ 40 | --debug_path /tmp/debug 41 | ''' 42 | 43 | parser = argparse.ArgumentParser( 44 | description=description, formatter_class=RawTextHelpFormatter) 45 | parser.add_argument('--img', dest='img', 46 | default=None, required=True, 47 | help='id card image to reco') 48 | parser.add_argument('--debug_path', dest='debug_path', 49 | default=None, required=False, 50 | help='debug path') 51 | options = parser.parse_args() 52 | 53 | path_img = os.path.expanduser(options.img) 54 | debug_path = os.path.expanduser(options.debug_path) 55 | if debug_path is not None: 56 | if os.path.isdir(debug_path): 57 | shutil.rmtree(debug_path) 58 | os.makedirs(debug_path) 59 | norm_width = 600 60 | norm_height = 600 61 | 62 | if os.path.isfile(path_img): 63 | id_card_img = cv2.imread(path_img) 64 | preprocess_resize = PreprocessResizeKeepRatio(norm_width, norm_height) 65 | id_card_img = preprocess_resize.do(id_card_img) 66 | segmentation = Segmentation(debug_path) 67 | segmentation.do(id_card_img) 68 | elif os.path.isdir(path_img): 69 | filenames = os.listdir(path_img) 70 | for filename in filenames: 71 | if filename.lower().endswith(".jpg"): 72 | print("process filename=", filename) 73 | debug_path_filename = None 74 | if debug_path is not None: 75 | debug_path_filename = os.path.join(debug_path, filename) 76 | each_path_img = os.path.join(path_img, filename) 77 | id_card_img = cv2.imread(each_path_img) 78 | preprocess_resize = PreprocessResizeKeepRatio(norm_width, norm_height) 79 | id_card_img = preprocess_resize.do(id_card_img) 80 | segmentation = Segmentation(debug_path_filename) 81 | segmentation.do(id_card_img) -------------------------------------------------------------------------------- /bin/deep_ocr_make_caffe_dataset: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | 7 | import argparse 8 | from argparse import RawTextHelpFormatter 9 | import fnmatch 10 | import os 11 | import cv2 12 | import json 13 | import random 14 | import numpy as np 15 | import shutil 16 | from deep_ocr.lang_aux import LangCharsGenerate 17 | from deep_ocr.lang_aux import FontCheck 18 | from deep_ocr.lang_aux import Font2Image 19 | 20 | 21 | 22 | if __name__ == "__main__": 23 | 24 | description = ''' 25 | deep_ocr_make_caffe_dataset --out_caffe_dir /root/data/caffe_dataset \ 26 | --font_dir /root/workspace/deep_ocr_fonts/chinese_fonts \ 27 | --width 30 --height 30 --margin 4 --langs lower_eng 28 | ''' 29 | 30 | parser = argparse.ArgumentParser( 31 | description=description, formatter_class=RawTextHelpFormatter) 32 | parser.add_argument('--out_caffe_dir', dest='out_caffe_dir', 33 | default=None, required=True, 34 | help='write a caffe dir') 35 | parser.add_argument('--font_dir', dest='font_dir', 36 | default=None, required=True, 37 | help='font dir to to produce images') 38 | parser.add_argument('--test_ratio', dest='test_ratio', 39 | default=0.3, required=False, 40 | help='test dataset size') 41 | parser.add_argument('--width', dest='width', 42 | default=None, required=True, 43 | help='width') 44 | parser.add_argument('--height', dest='height', 45 | default=None, required=True, 46 | help='height') 47 | parser.add_argument('--no_crop', dest='no_crop', 48 | default=True, required=False, 49 | help='', action='store_true') 50 | parser.add_argument('--margin', dest='margin', 51 | default=0, required=False, 52 | help='', ) 53 | parser.add_argument('--langs', dest='langs', 54 | default="chi_sim", required=True, 55 | help='deep_ocr.langs.*, e.g. chi_sim, chi_tra, digits...') 56 | options = parser.parse_args() 57 | 58 | out_caffe_dir = os.path.expanduser(options.out_caffe_dir) 59 | font_dir = os.path.expanduser(options.font_dir) 60 | test_ratio = float(options.test_ratio) 61 | width = int(options.width) 62 | height = int(options.height) 63 | need_crop = not options.no_crop 64 | margin = int(options.margin) 65 | langs = options.langs 66 | 67 | image_dir_name = "images" 68 | 69 | images_dir = os.path.join(out_caffe_dir, image_dir_name) 70 | if os.path.isdir(images_dir): 71 | shutil.rmtree(images_dir) 72 | os.makedirs(images_dir) 73 | 74 | lang_chars_gen = LangCharsGenerate(langs) 75 | lang_chars = lang_chars_gen.do() 76 | font_check = FontCheck(lang_chars) 77 | 78 | y_to_tag = {} 79 | y_tag_json_file = os.path.join(out_caffe_dir, "y_tag.json") 80 | y_tag_text_file = os.path.join(out_caffe_dir, "y_tag.txt") 81 | path_train = os.path.join(out_caffe_dir, "train.txt") 82 | path_test = os.path.join(out_caffe_dir, "test.txt") 83 | 84 | 85 | verified_font_paths = [] 86 | ## search for file fonts 87 | for font_name in os.listdir(font_dir): 88 | path_font_file = os.path.join(font_dir, font_name) 89 | if font_check.do(path_font_file): 90 | verified_font_paths.append(path_font_file) 91 | 92 | train_list = [] 93 | test_list = [] 94 | max_train_i = int(len(verified_font_paths) * (1.0 - test_ratio)) 95 | 96 | font2image = Font2Image(width, height, need_crop, margin) 97 | 98 | for i, verified_font_path in enumerate(verified_font_paths): 99 | is_train = True 100 | if i >= max_train_i: 101 | is_train = False 102 | for j, char in enumerate(lang_chars): 103 | if j not in y_to_tag: 104 | y_to_tag[j] = char 105 | char_dir = os.path.join(images_dir, "%d" % j) 106 | if not os.path.isdir(char_dir): 107 | os.makedirs(char_dir) 108 | path_image = os.path.join( 109 | char_dir, 110 | "%d_%s.jpg" % (i, os.path.basename(verified_font_path))) 111 | relative_path_image = os.path.join( 112 | image_dir_name, "%d"%j, 113 | "%d_%s.jpg" % (i, os.path.basename(verified_font_path)) 114 | ) 115 | font2image.do(verified_font_path, char, path_image) 116 | if is_train: 117 | train_list.append((relative_path_image, j)) 118 | else: 119 | test_list.append((relative_path_image, j)) 120 | 121 | h_y_tag_json_file = open(y_tag_json_file, "w+") 122 | json.dump(y_to_tag, h_y_tag_json_file) 123 | h_y_tag_json_file.close() 124 | 125 | h_y_tag_text_file = open(y_tag_text_file, "w+") 126 | for key in y_to_tag: 127 | h_y_tag_text_file.write("%d %s\n" % (key, y_to_tag[key].encode("utf-8"))) 128 | h_y_tag_text_file.close() 129 | 130 | fout = open(path_train, "w+") 131 | for item in train_list: 132 | fout.write("%s %d\n" % (item[0], item[1])) 133 | fout.close() 134 | 135 | fout = open(path_test, "w+") 136 | for item in test_list: 137 | fout.write("%s %d\n" % (item[0], item[1])) 138 | fout.close() 139 | -------------------------------------------------------------------------------- /bin/deep_ocr_reco_captcha: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | 7 | import argparse 8 | from argparse import RawTextHelpFormatter 9 | import os 10 | import shutil 11 | import cv2 12 | 13 | from deep_ocr.captcha.char_segmentation import CharSegmentation 14 | from deep_ocr.captcha.search_best_segmentation import SearchBestSegmentation 15 | from deep_ocr.caffe_clf import CaffeCls 16 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | description = ''' 22 | # Docker config 23 | CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model 24 | DEEP_OCR_ROOT=/opt/deep_ocr 25 | 26 | # PC 27 | CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model 28 | DEEP_OCR_ROOT=/root/workspace/deep_ocr 29 | 30 | deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/captcha.png \ 31 | --num_char 5 \ 32 | --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \ 33 | --caffe_network $CAFFE_MODEL/lenet.prototxt \ 34 | --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \ 35 | --caffe_img_w 28 --caffe_img_h 28 \ 36 | --debug_path /tmp/debug_captcha 37 | 38 | deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/simple.png \ 39 | --num_char 5 \ 40 | --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \ 41 | --caffe_network $CAFFE_MODEL/lenet.prototxt \ 42 | --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \ 43 | --caffe_img_w 28 --caffe_img_h 28 \ 44 | --debug_path /tmp/debug_captcha 45 | 46 | ''' 47 | 48 | parser = argparse.ArgumentParser( 49 | description=description, formatter_class=RawTextHelpFormatter) 50 | parser.add_argument('--captcha_img', dest='captcha_img', 51 | default=None, required=True, 52 | help='captcha image to reco') 53 | parser.add_argument('--num_char', dest='num_char', 54 | default=None, required=True, 55 | help='m_char') 56 | parser.add_argument('--caffe_model', dest='caffe_model', 57 | default=None, required=True, 58 | help='trained caffe model') 59 | parser.add_argument('--caffe_network', dest='caffe_network', 60 | default=None, required=True, 61 | help='caffe network') 62 | parser.add_argument('--y_tag', dest='y_tag', 63 | default=None, required=True, 64 | help='y_tag') 65 | parser.add_argument('--caffe_img_w', dest='caffe_img_w', 66 | default=None, required=True, 67 | help='caffe_img_w') 68 | parser.add_argument('--caffe_img_h', dest='caffe_img_h', 69 | default=None, required=True, 70 | help='caffe_img_h') 71 | parser.add_argument('--debug_path', dest='debug_path', 72 | default=None, required=False, 73 | help='debug path') 74 | options = parser.parse_args() 75 | 76 | captcha_img = os.path.expanduser(options.captcha_img) 77 | num_char = int(options.num_char) 78 | caffe_model = os.path.expanduser(options.caffe_model) 79 | caffe_network = os.path.expanduser(options.caffe_network) 80 | y_tag = os.path.expanduser(options.y_tag) 81 | caffe_img_w = int(options.caffe_img_w) 82 | caffe_img_h = int(options.caffe_img_h) 83 | norm_width = 200 84 | norm_height = 200 85 | 86 | debug_path = None 87 | if options.debug_path is not None: 88 | debug_path = os.path.expanduser(options.debug_path) 89 | if os.path.isdir(debug_path): 90 | shutil.rmtree(debug_path) 91 | os.makedirs(debug_path) 92 | 93 | image = cv2.imread(captcha_img) 94 | 95 | proc_keep_ratio = PreprocessResizeKeepRatio( 96 | width=norm_width, height=norm_height) 97 | image = proc_keep_ratio.do(image) 98 | 99 | char_segmentation = CharSegmentation( 100 | num_char=num_char, 101 | debug_path=debug_path) 102 | segmentations = char_segmentation.do(image) 103 | 104 | caffe_cls = CaffeCls(caffe_network, caffe_model, y_tag, 105 | width=caffe_img_w, height=caffe_img_h) 106 | 107 | search_best_segmentation = SearchBestSegmentation( 108 | caffe_cls, char_segmentation.bin_img, 109 | debug_path) 110 | eval_segmentations = search_best_segmentation.do(segmentations) 111 | 112 | n_top = 100 113 | for i, eval_segmentation in enumerate(eval_segmentations): 114 | if i > n_top: 115 | break 116 | print(eval_segmentation) -------------------------------------------------------------------------------- /chinese_fonts/DroidSansFallbackFull.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/DroidSansFallbackFull.ttf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-Black.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Black.otf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-Bold.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Bold.otf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-DemiLight.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-DemiLight.otf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-Light.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Light.otf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-Medium.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Medium.otf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-Regular.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Regular.otf -------------------------------------------------------------------------------- /chinese_fonts/NotoSansHans-Thin-Windows.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Thin-Windows.otf -------------------------------------------------------------------------------- /chinese_fonts/fangzheng_fangsong.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_fangsong.ttf -------------------------------------------------------------------------------- /chinese_fonts/fangzheng_heiti.TTF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_heiti.TTF -------------------------------------------------------------------------------- /chinese_fonts/fangzheng_jieti.TTF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_jieti.TTF -------------------------------------------------------------------------------- /chinese_fonts/fangzheng_shusong.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_shusong.ttf -------------------------------------------------------------------------------- /chinese_fonts/mingliu.ttc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/mingliu.ttc -------------------------------------------------------------------------------- /deep_idocr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function 4 | 5 | import argparse 6 | from argparse import RawTextHelpFormatter 7 | import os 8 | import shutil 9 | import cv2 10 | from deep_ocr.caffe_clf import CaffeClsBuilder 11 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 12 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 13 | from deep_ocr.id_cards.segmentation import Segmentation 14 | from deep_ocr.id_cards.char_set import CharSet 15 | from deep_ocr.reco_text_line import RecoTextLine 16 | from deep_ocr.reco_text_line import RectImageClassifier 17 | 18 | import json 19 | from flask import Flask 20 | from flask import request 21 | from flask import redirect 22 | from flask import jsonify 23 | app = Flask(__name__) 24 | 25 | @app.route('/upload', methods=['GET', 'POST']) 26 | def upload_file(): 27 | if request.method == 'POST': 28 | f = request.files['file'] 29 | f.save('/home/ygh/flask/id_card_img.jpg') 30 | 31 | ## path_img = os.path.expanduser("/home/ygh/deep_ocr/data/id_card_img.jpg") 32 | path_img = os.path.expanduser("/home/ygh/flask/id_card_img.jpg") 33 | debug_path = os.path.expanduser("/home/ygh/deep_ocr_workspace/debug") 34 | if debug_path is not None: 35 | if os.path.isdir(debug_path): 36 | shutil.rmtree(debug_path) 37 | os.makedirs(debug_path) 38 | 39 | cls_dir_sim = os.path.expanduser("/home/ygh/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64") 40 | cls_dir_ua = os.path.expanduser("/home/ygh/deep_ocr_workspace/data/chongdata_train_ualpha_digits_64_64") 41 | 42 | caffe_cls_builder = CaffeClsBuilder() 43 | cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,) 44 | cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,) 45 | caffe_classifiers = {"sim": cls_sim, "ua": cls_ua} 46 | 47 | seg_norm_width = 600 48 | seg_norm_height = 600 49 | preprocess_resize = PreprocessResizeKeepRatio( 50 | seg_norm_width, seg_norm_height) 51 | id_card_img = cv2.imread(path_img) 52 | id_card_img = preprocess_resize.do(id_card_img) 53 | segmentation = Segmentation(debug_path) 54 | key_to_segmentation = segmentation.do(id_card_img) 55 | 56 | boundaries = [ 57 | ((0, 0, 0), (100, 100, 100)), 58 | ] 59 | boundary2binimgs = [] 60 | for boundary in boundaries: 61 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 62 | id_card_img_mask = preprocess_bg_mask.do(id_card_img) 63 | boundary2binimgs.append((boundary, id_card_img_mask)) 64 | 65 | char_set = CharSet() 66 | char_set_data = char_set.get() 67 | 68 | rect_img_clf = RectImageClassifier( 69 | None, 70 | None, 71 | char_set, 72 | caffe_cls_width=64, 73 | caffe_cls_height=64) 74 | 75 | reco_text_line = RecoTextLine(rect_img_clf) 76 | 77 | key_ocr_res = {} 78 | for key in key_to_segmentation: 79 | key_ocr_res[key] = [] 80 | print("="*64) 81 | print(key) 82 | for i, segment in enumerate(key_to_segmentation[key]): 83 | if debug_path is not None: 84 | line_debug_path = "key_%s_%i" % (key, i) 85 | line_debug_path = os.path.join(debug_path, line_debug_path) 86 | reco_text_line.debug_path = line_debug_path 87 | reco_text_line.char_set = char_set_data[key] 88 | ## 初始化模型 89 | caffe_cls = caffe_classifiers[ 90 | char_set_data[key]["caffe_cls"]] 91 | ## 输入到模型中进行识别 92 | ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls) 93 | ## 将结果输出到列表中 94 | key_ocr_res[key].append(ocr_res) 95 | print("ocr res:") 96 | for key in key_ocr_res: 97 | print("="*60) 98 | print(key) 99 | for res_i in key_ocr_res[key]: 100 | print(res_i.encode("utf-8")) 101 | if debug_path is not None: 102 | path_debug_image_mask = os.path.join( 103 | debug_path, "reco_debug_01_image_mask.jpg") 104 | cv2.imwrite(path_debug_image_mask, id_card_img_mask) 105 | 106 | ## 返回结果 将其封装成json的键值对的格式 107 | data = [{"result":"sucess","response":{"name":key_ocr_res["name"],"address":key_ocr_res["address"],"month":key_ocr_res["month"],"minzu":key_ocr_res["minzu"],"year":key_ocr_res["year"],"sex":key_ocr_res["sex"],"id":key_ocr_res["id"],"day":key_ocr_res["day"]}}] 108 | ## data = '{"result":"sucess"} 109 | ## result = json.loads(data) 110 | return json.dumps(data,skipkeys=True,ensure_ascii=False,encoding="utf-8") 111 | else: 112 | data2 = [{"result":"error"}] 113 | ## result2 = json.loads(data2) 114 | return json.dumps(data2) 115 | ## return "error" 116 | 117 | 118 | 119 | 120 | if __name__ == '__main__': 121 | app.run(host='0.0.0.0',port=8880) 122 | -------------------------------------------------------------------------------- /deploy_lenet_train_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim:1 dim: 1 dim: 64 dim: 64 } } 7 | } 8 | layer { 9 | name: "conv11" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv11" 13 | param { 14 | lr_mult: 1 15 | } 16 | param { 17 | lr_mult: 2 18 | } 19 | convolution_param { 20 | num_output: 64 21 | pad: 5 22 | kernel_size: 11 23 | stride: 1 24 | weight_filler { 25 | type: "xavier" 26 | } 27 | bias_filler { 28 | type: "constant" 29 | } 30 | } 31 | } 32 | layer { 33 | name: "pool11" 34 | type: "Pooling" 35 | bottom: "conv11" 36 | top: "pool11" 37 | pooling_param { 38 | pool: MAX 39 | kernel_size: 2 40 | stride: 2 41 | } 42 | } 43 | layer { 44 | name: "conv7" 45 | type: "Convolution" 46 | bottom: "pool11" 47 | top: "conv7" 48 | param { 49 | lr_mult: 1 50 | } 51 | param { 52 | lr_mult: 2 53 | } 54 | convolution_param { 55 | num_output: 128 56 | pad: 3 57 | kernel_size: 7 58 | stride: 1 59 | weight_filler { 60 | type: "xavier" 61 | } 62 | bias_filler { 63 | type: "constant" 64 | } 65 | } 66 | } 67 | layer { 68 | name: "pool7" 69 | type: "Pooling" 70 | bottom: "conv7" 71 | top: "pool7" 72 | pooling_param { 73 | pool: MAX 74 | kernel_size: 2 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | name: "conv5" 80 | type: "Convolution" 81 | bottom: "pool7" 82 | top: "conv5" 83 | param { 84 | lr_mult: 1 85 | } 86 | param { 87 | lr_mult: 2 88 | } 89 | convolution_param { 90 | num_output: 256 91 | pad: 2 92 | kernel_size: 5 93 | stride: 1 94 | weight_filler { 95 | type: "xavier" 96 | } 97 | bias_filler { 98 | type: "constant" 99 | } 100 | } 101 | } 102 | layer { 103 | name: "pool5" 104 | type: "Pooling" 105 | bottom: "conv5" 106 | top: "pool5" 107 | pooling_param { 108 | pool: MAX 109 | kernel_size: 2 110 | stride: 2 111 | } 112 | } 113 | layer { 114 | name: "conv3" 115 | type: "Convolution" 116 | bottom: "pool5" 117 | top: "conv3" 118 | param { 119 | lr_mult: 1 120 | } 121 | param { 122 | lr_mult: 2 123 | } 124 | convolution_param { 125 | num_output: 512 126 | pad: 1 127 | kernel_size: 3 128 | stride: 1 129 | weight_filler { 130 | type: "xavier" 131 | } 132 | bias_filler { 133 | type: "constant" 134 | } 135 | } 136 | } 137 | layer { 138 | name: "pool3" 139 | type: "Pooling" 140 | bottom: "conv3" 141 | top: "pool3" 142 | pooling_param { 143 | pool: MAX 144 | kernel_size: 2 145 | stride: 2 146 | } 147 | } 148 | layer { 149 | name: "fc10000" 150 | type: "InnerProduct" 151 | # learning rate and decay multipliers for the weights 152 | param { lr_mult: 1 } 153 | # learning rate and decay multipliers for the biases 154 | param { lr_mult: 2 } 155 | inner_product_param { 156 | num_output: 10000 157 | weight_filler { 158 | type: "gaussian" 159 | std: 0.01 160 | } 161 | bias_filler { 162 | type: "constant" 163 | value: 0 164 | } 165 | } 166 | bottom: "pool3" 167 | top: "fc10000" 168 | } 169 | layer { 170 | name: "relu1" 171 | type: "ReLU" 172 | bottom: "fc10000" 173 | top: "fc10000" 174 | } 175 | layer { 176 | name: "fc6503" 177 | type: "InnerProduct" 178 | # learning rate and decay multipliers for the weights 179 | param { lr_mult: 1 } 180 | # learning rate and decay multipliers for the biases 181 | param { lr_mult: 2 } 182 | inner_product_param { 183 | num_output: 6503 184 | weight_filler { 185 | type: "gaussian" 186 | std: 0.01 187 | } 188 | bias_filler { 189 | type: "constant" 190 | value: 0 191 | } 192 | } 193 | bottom: "fc10000" 194 | top: "fc6503" 195 | } 196 | layer { 197 | name: "prob" 198 | type: "Softmax" 199 | bottom: "fc6503" 200 | top: "prob" 201 | } 202 | -------------------------------------------------------------------------------- /id_num_fonts/OCR-B.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/OCR-B.ttf -------------------------------------------------------------------------------- /id_num_fonts/huawenxihei.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/huawenxihei.ttf -------------------------------------------------------------------------------- /id_num_fonts/msyh.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/msyh.ttf -------------------------------------------------------------------------------- /id_num_fonts/simhei.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/simhei.ttf -------------------------------------------------------------------------------- /id_num_fonts/simkai.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/simkai.ttf -------------------------------------------------------------------------------- /lenet_solver.prototxt: -------------------------------------------------------------------------------- 1 | # The train/test net protocol buffer definition 2 | net: "examples/dunhe_train_char_cn_sim_digits_64_64/lenet_train_test.prototxt" 3 | # test_iter specifies how many forward passes the test should carry out. 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 5 | # covering the full 10,000 testing images. 6 | test_iter: 100 7 | # Carry out testing every 500 training iterations. 8 | test_interval: 500 9 | # The base learning rate, momentum and the weight decay of the network. 10 | base_lr: 0.01 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | # The learning rate policy 14 | lr_policy: "inv" 15 | gamma: 0.0001 16 | power: 0.75 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 50000 21 | # snapshot intermediate results 22 | snapshot: 5000 23 | snapshot_prefix: "examples/dunhe_train_char_cn_sim_digits_64_64/lenet" 24 | # solver mode: CPU or GPU 25 | solver_mode: GPU 26 | -------------------------------------------------------------------------------- /lenet_train_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | scale: 0.00390625 12 | } 13 | data_param { 14 | source: "/home/user/Projects/data/caffe_dataset_cn_sim/train_lmdb" 15 | batch_size: 64 16 | backend: LMDB 17 | } 18 | } 19 | layer { 20 | name: "mnist" 21 | type: "Data" 22 | top: "data" 23 | top: "label" 24 | include { 25 | phase: TEST 26 | } 27 | transform_param { 28 | scale: 0.00390625 29 | } 30 | data_param { 31 | source: "/home/user/Projects/data/caffe_dataset_cn_sim/val_lmdb" 32 | batch_size: 10 33 | backend: LMDB 34 | } 35 | } 36 | layer { 37 | name: "conv11" 38 | type: "Convolution" 39 | bottom: "data" 40 | top: "conv11" 41 | param { 42 | lr_mult: 1 43 | } 44 | param { 45 | lr_mult: 2 46 | } 47 | convolution_param { 48 | num_output: 64 49 | pad: 5 50 | kernel_size: 11 51 | stride: 1 52 | weight_filler { 53 | type: "xavier" 54 | } 55 | bias_filler { 56 | type: "constant" 57 | } 58 | } 59 | } 60 | layer { 61 | name: "pool11" 62 | type: "Pooling" 63 | bottom: "conv11" 64 | top: "pool11" 65 | pooling_param { 66 | pool: MAX 67 | kernel_size: 2 68 | stride: 2 69 | } 70 | } 71 | layer { 72 | name: "conv7" 73 | type: "Convolution" 74 | bottom: "pool11" 75 | top: "conv7" 76 | param { 77 | lr_mult: 1 78 | } 79 | param { 80 | lr_mult: 2 81 | } 82 | convolution_param { 83 | num_output: 128 84 | pad: 3 85 | kernel_size: 7 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "pool7" 97 | type: "Pooling" 98 | bottom: "conv7" 99 | top: "pool7" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "conv5" 108 | type: "Convolution" 109 | bottom: "pool7" 110 | top: "conv5" 111 | param { 112 | lr_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | } 117 | convolution_param { 118 | num_output: 256 119 | pad: 2 120 | kernel_size: 5 121 | stride: 1 122 | weight_filler { 123 | type: "xavier" 124 | } 125 | bias_filler { 126 | type: "constant" 127 | } 128 | } 129 | } 130 | layer { 131 | name: "pool5" 132 | type: "Pooling" 133 | bottom: "conv5" 134 | top: "pool5" 135 | pooling_param { 136 | pool: MAX 137 | kernel_size: 2 138 | stride: 2 139 | } 140 | } 141 | layer { 142 | name: "conv3" 143 | type: "Convolution" 144 | bottom: "pool5" 145 | top: "conv3" 146 | param { 147 | lr_mult: 1 148 | } 149 | param { 150 | lr_mult: 2 151 | } 152 | convolution_param { 153 | num_output: 512 154 | pad: 1 155 | kernel_size: 3 156 | stride: 1 157 | weight_filler { 158 | type: "xavier" 159 | } 160 | bias_filler { 161 | type: "constant" 162 | } 163 | } 164 | } 165 | layer { 166 | name: "pool3" 167 | type: "Pooling" 168 | bottom: "conv3" 169 | top: "pool3" 170 | pooling_param { 171 | pool: MAX 172 | kernel_size: 2 173 | stride: 2 174 | } 175 | } 176 | layer { 177 | name: "fc10000" 178 | type: "InnerProduct" 179 | # learning rate and decay multipliers for the weights 180 | param { lr_mult: 1 } 181 | # learning rate and decay multipliers for the biases 182 | param { lr_mult: 2 } 183 | inner_product_param { 184 | num_output: 10000 185 | weight_filler { 186 | type: "gaussian" 187 | std: 0.01 188 | } 189 | bias_filler { 190 | type: "constant" 191 | value: 0 192 | } 193 | } 194 | bottom: "pool3" 195 | top: "fc10000" 196 | } 197 | layer { 198 | name: "relu1" 199 | type: "ReLU" 200 | bottom: "fc10000" 201 | top: "fc10000" 202 | } 203 | layer { 204 | name: "fc6492" 205 | type: "InnerProduct" 206 | # learning rate and decay multipliers for the weights 207 | param { lr_mult: 1 } 208 | # learning rate and decay multipliers for the biases 209 | param { lr_mult: 2 } 210 | inner_product_param { 211 | num_output: 6492 212 | weight_filler { 213 | type: "gaussian" 214 | std: 0.01 215 | } 216 | bias_filler { 217 | type: "constant" 218 | value: 0 219 | } 220 | } 221 | bottom: "fc10000" 222 | top: "fc6492" 223 | } 224 | layer { 225 | name: "accuracy" 226 | type: "Accuracy" 227 | bottom: "fc6492" 228 | bottom: "label" 229 | top: "accuracy" 230 | include { 231 | phase: TEST 232 | } 233 | } 234 | layer { 235 | name: "loss" 236 | type: "SoftmaxWithLoss" 237 | bottom: "fc6492" 238 | bottom: "label" 239 | top: "loss" 240 | } 241 | -------------------------------------------------------------------------------- /lenet_train_test.prototxt.bak: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | scale: 0.00390625 12 | } 13 | data_param { 14 | source: "/home/user/Projects/data/caffe_dataset_cn_sim/train_lmdb" 15 | batch_size: 64 16 | backend: LMDB 17 | } 18 | } 19 | layer { 20 | name: "mnist" 21 | type: "Data" 22 | top: "data" 23 | top: "label" 24 | include { 25 | phase: TEST 26 | } 27 | transform_param { 28 | scale: 0.00390625 29 | } 30 | data_param { 31 | source: "/home/user/Projects/data/caffe_dataset_cn_sim/val_lmdb" 32 | batch_size: 10 33 | backend: LMDB 34 | } 35 | } 36 | layer { 37 | name: "conv11" 38 | type: "Convolution" 39 | bottom: "data" 40 | top: "conv11" 41 | param { 42 | lr_mult: 1 43 | } 44 | param { 45 | lr_mult: 2 46 | } 47 | convolution_param { 48 | num_output: 64 49 | pad: 5 50 | kernel_size: 11 51 | stride: 1 52 | weight_filler { 53 | type: "xavier" 54 | } 55 | bias_filler { 56 | type: "constant" 57 | } 58 | } 59 | } 60 | layer { 61 | name: "pool11" 62 | type: "Pooling" 63 | bottom: "conv11" 64 | top: "pool11" 65 | pooling_param { 66 | pool: MAX 67 | kernel_size: 2 68 | stride: 2 69 | } 70 | } 71 | layer { 72 | name: "conv7" 73 | type: "Convolution" 74 | bottom: "pool11" 75 | top: "conv7" 76 | param { 77 | lr_mult: 1 78 | } 79 | param { 80 | lr_mult: 2 81 | } 82 | convolution_param { 83 | num_output: 128 84 | pad: 3 85 | kernel_size: 7 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "pool7" 97 | type: "Pooling" 98 | bottom: "conv7" 99 | top: "pool7" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "conv5" 108 | type: "Convolution" 109 | bottom: "pool7" 110 | top: "conv5" 111 | param { 112 | lr_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | } 117 | convolution_param { 118 | num_output: 256 119 | pad: 2 120 | kernel_size: 5 121 | stride: 1 122 | weight_filler { 123 | type: "xavier" 124 | } 125 | bias_filler { 126 | type: "constant" 127 | } 128 | } 129 | } 130 | layer { 131 | name: "pool5" 132 | type: "Pooling" 133 | bottom: "conv5" 134 | top: "pool5" 135 | pooling_param { 136 | pool: MAX 137 | kernel_size: 2 138 | stride: 2 139 | } 140 | } 141 | layer { 142 | name: "conv3" 143 | type: "Convolution" 144 | bottom: "pool5" 145 | top: "conv3" 146 | param { 147 | lr_mult: 1 148 | } 149 | param { 150 | lr_mult: 2 151 | } 152 | convolution_param { 153 | num_output: 512 154 | pad: 1 155 | kernel_size: 3 156 | stride: 1 157 | weight_filler { 158 | type: "xavier" 159 | } 160 | bias_filler { 161 | type: "constant" 162 | } 163 | } 164 | } 165 | layer { 166 | name: "pool3" 167 | type: "Pooling" 168 | bottom: "conv3" 169 | top: "pool3" 170 | pooling_param { 171 | pool: MAX 172 | kernel_size: 2 173 | stride: 2 174 | } 175 | } 176 | layer { 177 | name: "fc10000" 178 | type: "InnerProduct" 179 | # learning rate and decay multipliers for the weights 180 | param { lr_mult: 1 } 181 | # learning rate and decay multipliers for the biases 182 | param { lr_mult: 2 } 183 | inner_product_param { 184 | num_output: 10000 185 | weight_filler { 186 | type: "gaussian" 187 | std: 0.01 188 | } 189 | bias_filler { 190 | type: "constant" 191 | value: 0 192 | } 193 | } 194 | bottom: "pool3" 195 | top: "fc10000" 196 | } 197 | layer { 198 | name: "relu1" 199 | type: "ReLU" 200 | bottom: "fc10000" 201 | top: "fc10000" 202 | } 203 | layer { 204 | name: "fc6503" 205 | type: "InnerProduct" 206 | # learning rate and decay multipliers for the weights 207 | param { lr_mult: 1 } 208 | # learning rate and decay multipliers for the biases 209 | param { lr_mult: 2 } 210 | inner_product_param { 211 | num_output: 6503 212 | weight_filler { 213 | type: "gaussian" 214 | std: 0.01 215 | } 216 | bias_filler { 217 | type: "constant" 218 | value: 0 219 | } 220 | } 221 | bottom: "fc10000" 222 | top: "fc6503" 223 | } 224 | layer { 225 | name: "accuracy" 226 | type: "Accuracy" 227 | bottom: "fc6503" 228 | bottom: "label" 229 | top: "accuracy" 230 | include { 231 | phase: TEST 232 | } 233 | } 234 | layer { 235 | name: "loss" 236 | type: "SoftmaxWithLoss" 237 | bottom: "fc6503" 238 | bottom: "label" 239 | top: "loss" 240 | } 241 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | ./.idea 2 | -------------------------------------------------------------------------------- /python/deep_ocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/python/deep_ocr/__init__.py -------------------------------------------------------------------------------- /python/deep_ocr/caffe_clf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import caffe 3 | import json 4 | import numpy as np 5 | import os 6 | import cv2 7 | import shutil 8 | import copy 9 | 10 | class CaffeCls(object): 11 | def __init__(self, 12 | model_def, 13 | model_weights, 14 | y_tag_json_path, 15 | is_mode_cpu=True, 16 | width=64, 17 | height=64): 18 | self.net = caffe.Net(model_def, 19 | model_weights, 20 | caffe.TEST) 21 | if is_mode_cpu: 22 | caffe.set_mode_cpu() 23 | self.y_tag_json = json.load(open(y_tag_json_path, "r")) 24 | self.width = width 25 | self.height = height 26 | 27 | def predict_cv2_img(self, cv2_img): 28 | shape = cv2_img.shape 29 | cv2_imgs = cv2_img.reshape((1, shape[0], shape[1])) 30 | return self.predict_cv2_imgs(cv2_imgs)[0] 31 | 32 | 33 | def _predict_cv2_imgs_sub(self, cv2_imgs, pos_start, pos_end): 34 | cv2_imgs_sub = cv2_imgs[pos_start: pos_end] 35 | 36 | self.net.blobs['data'].reshape(cv2_imgs_sub.shape[0], 1, 37 | self.width, self.height) 38 | self.net.blobs['data'].data[...] = cv2_imgs_sub.reshape( 39 | (cv2_imgs_sub.shape[0], 1, self.width, self.height)) 40 | output = self.net.forward() 41 | 42 | output_tag_to_max_proba = [] 43 | 44 | num_sample = cv2_imgs_sub.shape[0] 45 | for i in range(num_sample): 46 | output_prob = output['prob'][i] 47 | output_prob_index = sorted( 48 | range(len(output_prob)), 49 | key=lambda x:output_prob[x], 50 | reverse=True) 51 | output_tag_to_probas = [] 52 | for index in output_prob_index: 53 | item = (self.y_tag_json[str(index)], 54 | output_prob[index]) 55 | output_tag_to_probas.append(item) 56 | # output_tag_to_probas = output_tag_to_probas[:2] 57 | output_tag_to_max_proba.append(output_tag_to_probas) 58 | return output_tag_to_max_proba 59 | 60 | def predict_cv2_imgs(self, cv2_imgs, step=50): 61 | output_tag_to_max_proba = [] 62 | num_sample = cv2_imgs.shape[0] 63 | for i in range(0, num_sample, step): 64 | pos_end = min(num_sample, (i + step)) 65 | output_tag_to_max_proba += \ 66 | self._predict_cv2_imgs_sub(cv2_imgs, i, pos_end) 67 | return output_tag_to_max_proba 68 | 69 | 70 | class CaffeClsBuilder(object): 71 | 72 | def __init__(self,): 73 | pass 74 | 75 | def build(self, 76 | cls_dir, 77 | is_mode_cpu=True, 78 | width=64, 79 | height=64): 80 | model_def = os.path.join(cls_dir, "model_def.prototxt") 81 | model_weights = os.path.join(cls_dir, "model_weights.caffemodel") 82 | y_tag_json_path = os.path.join(cls_dir, "y_tag.json") 83 | return CaffeCls( 84 | model_def=model_def, 85 | model_weights=model_weights, 86 | y_tag_json_path=y_tag_json_path, 87 | is_mode_cpu=is_mode_cpu, 88 | width=width, 89 | height=height) 90 | -------------------------------------------------------------------------------- /python/deep_ocr/captcha/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /python/deep_ocr/captcha/char_segmentation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import cv2 4 | import os 5 | import numpy as np 6 | from scipy.signal import savgol_filter 7 | from scipy.signal import argrelextrema 8 | import itertools 9 | 10 | class CharSegmentation(object): 11 | 12 | def __init__(self, num_char, debug_path = None): 13 | self.num_char = num_char 14 | self.debug_path = debug_path 15 | self.bin_img = None 16 | 17 | def find_lowest_nonzero_curve(self, bin_img): 18 | #cv2.imshow('bin_img', bin_img) 19 | #cv2.waitKey(0) 20 | #cv2.destroyAllWindows() 21 | height, width = bin_img.shape 22 | nonzero_curve = [] 23 | for i in range(width): 24 | is_found = False 25 | for j in range(height): 26 | if bin_img[j, i] != 0: 27 | nonzero_curve.append(j) 28 | is_found = True 29 | break 30 | if not is_found: 31 | nonzero_curve.append(height-1) 32 | return np.asarray(nonzero_curve) 33 | 34 | def merge_closest_points(self, min_x_coordinates, min_diff_x=10): 35 | ret = [] 36 | n = len(min_x_coordinates) 37 | taken = [False] * n 38 | for i in range(n): 39 | if not taken[i]: 40 | count = 1 41 | point = min_x_coordinates[i] 42 | taken[i] = True 43 | for j in range(i+1, n): 44 | if abs(min_x_coordinates[i] - min_x_coordinates[j]) < min_diff_x: 45 | point += min_x_coordinates[j] 46 | count+=1 47 | taken[j] = True 48 | point /= count 49 | ret.append(point) 50 | return ret 51 | 52 | def remove_noise_by_contours(self, bin_img): 53 | c_bin_img = np.copy(bin_img) 54 | min_area = 100 55 | max_area = bin_img.shape[0] * bin_img.shape[1] 56 | min_w = 10 57 | min_h = 10 58 | if cv2.__version__[0] == "2": 59 | contours, hierarchy = cv2.findContours( 60 | c_bin_img, 61 | cv2.RETR_TREE, 62 | cv2.CHAIN_APPROX_SIMPLE) 63 | else: 64 | _, contours, hierarchy = cv2.findContours( 65 | c_bin_img, 66 | cv2.RETR_TREE, 67 | cv2.CHAIN_APPROX_SIMPLE) 68 | 69 | filtered_contours = [] 70 | for cnt in contours: 71 | x, y, w, h = cv2.boundingRect(cnt) 72 | if w * h >= min_area and (h >= min_h \ 73 | or w >= min_w) and w * h <= max_area: 74 | filtered_contours.append(cnt) 75 | else: 76 | bin_img[y:y+h, x:x+w] = 0 77 | contours = filtered_contours 78 | return bin_img 79 | 80 | 81 | def do(self, cv2_color_img): 82 | # return all the possible segmentations 83 | cv_grey_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_BGR2GRAY) 84 | height, width = cv_grey_img.shape 85 | adaptive_threshold = cv2.adaptiveThreshold( 86 | cv_grey_img, 87 | 255, 88 | cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ 89 | cv2.THRESH_BINARY_INV, 11, 2) 90 | 91 | adaptive_threshold = self.remove_noise_by_contours(adaptive_threshold) 92 | self.bin_img = adaptive_threshold 93 | 94 | nonzero_curve = self.find_lowest_nonzero_curve(adaptive_threshold) 95 | nonzero_curve = savgol_filter(nonzero_curve, 15, 2) 96 | min_points = argrelextrema(nonzero_curve, np.greater) 97 | min_points = min_points[0] 98 | min_points = [i for i in min_points] 99 | 100 | #min_points.append(width-1) 101 | #min_points = [0,] + min_points 102 | 103 | min_points = self.merge_closest_points(min_points, width * 0.02) 104 | 105 | print("min_points=", min_points) 106 | segmentations = [] 107 | for selected_min_points in itertools.combinations(min_points, 108 | self.num_char+1): 109 | print("selected_min_points=", selected_min_points) 110 | one_segmentation = [] 111 | prev_min_point = selected_min_points[0] 112 | for i, selected_min_point in enumerate(selected_min_points): 113 | if i != 0: 114 | one_segmentation.append( 115 | (prev_min_point, 0, 116 | selected_min_point - prev_min_point, height)) 117 | prev_min_point = selected_min_point 118 | segmentations.append(one_segmentation) 119 | 120 | if self.debug_path is not None: 121 | import matplotlib.pyplot as plt 122 | path_cv2_color_img = os.path.join(self.debug_path, 123 | "cv2_color_img.jpg") 124 | path_cv_grey_img = os.path.join(self.debug_path, 125 | "cv2_grey_img.jpg") 126 | path_adaptive_threshold = os.path.join(self.debug_path, 127 | "adaptive_threshold.jpg") 128 | ## draw possible segmentation on image 129 | for min_point in min_points: 130 | cv2.line(cv2_color_img, (min_point, 0), 131 | (min_point, height), (255, 0, 0)) 132 | 133 | cv2.imwrite(path_cv2_color_img, cv2_color_img) 134 | cv2.imwrite(path_cv_grey_img, cv_grey_img) 135 | cv2.imwrite(path_adaptive_threshold, adaptive_threshold) 136 | 137 | min_point_vals = [nonzero_curve[i] for i in min_points] 138 | #plt.plot(range(nonzero_curve.shape[0]), nonzero_curve) 139 | #plt.plot(min_points, min_point_vals, 'ro') 140 | #plt.gca().invert_yaxis() 141 | #plt.show() 142 | return segmentations 143 | -------------------------------------------------------------------------------- /python/deep_ocr/captcha/rm_noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import PIL.Image 4 | import sys 5 | 6 | # http://stackoverflow.com/questions/11253899/removing-the-background-noise-of-a-captcha-image-by-replicating-the-chopping-fil 7 | 8 | class RMNoise(object): 9 | def __init__(self): 10 | pass 11 | 12 | def do_path(self, image_path, out_path, chop=2): 13 | # python chop.py [chop-factor] [in-file] [out-file] 14 | 15 | image = PIL.Image.open(image_path).convert('1') 16 | width, height = image.size 17 | data = image.load() 18 | 19 | # Iterate through the rows. 20 | for y in range(height): 21 | for x in range(width): 22 | # Make sure we're on a dark pixel. 23 | if data[x, y] > 128: 24 | continue 25 | # Keep a total of non-white contiguous pixels. 26 | total = 0 27 | # Check a sequence ranging from x to image.width. 28 | for c in range(x, width): 29 | # If the pixel is dark, add it to the total. 30 | if data[c, y] < 128: 31 | total += 1 32 | # If the pixel is light, stop the sequence. 33 | else: 34 | break 35 | # If the total is less than the chop, replace everything with white. 36 | if total <= chop: 37 | for c in range(total): 38 | data[x + c, y] = 255 39 | # Skip this sequence we just altered. 40 | x += total 41 | 42 | # Iterate through the columns. 43 | for x in range(width): 44 | for y in range(height): 45 | # Make sure we're on a dark pixel. 46 | if data[x, y] > 128: 47 | continue 48 | # Keep a total of non-white contiguous pixels. 49 | total = 0 50 | # Check a sequence ranging from y to image.height. 51 | for c in range(y, height): 52 | # If the pixel is dark, add it to the total. 53 | if data[x, c] < 128: 54 | total += 1 55 | # If the pixel is light, stop the sequence. 56 | else: 57 | break 58 | # If the total is less than the chop, replace everything with white. 59 | if total <= chop: 60 | for c in range(total): 61 | data[x, y + c] = 255 62 | # Skip this sequence we just altered. 63 | y += total 64 | 65 | image.save(out_path) -------------------------------------------------------------------------------- /python/deep_ocr/captcha/search_best_segmentation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatioFillBG 4 | from deep_ocr.cv2_img_proc import PreprocessCropZeros 5 | 6 | import numpy as np 7 | import os 8 | import cv2 9 | 10 | class SearchBestSegmentation(object): 11 | 12 | def __init__(self, cls, cv2_grey_img, debug_path=None): 13 | self.cls = cls 14 | self.cv2_img = cv2_grey_img 15 | self.debug_path = debug_path 16 | 17 | def _extract_sub_img(self, cv2_img, rect): 18 | x, y, w, h = rect 19 | return cv2_img[y: y+h-1, x: x+w-1] 20 | 21 | def _extract_sub_imgs(self, cv2_img, segmentation): 22 | char_w = self.cls.width 23 | char_h = self.cls.height 24 | proc_resize = PreprocessResizeKeepRatioFillBG( 25 | width=char_w, height=char_h, 26 | auto_avoid_fill_bg=False, 27 | fill_bg=True, margin=2) 28 | crop_zeros = PreprocessCropZeros() 29 | sub_imgs = [] 30 | for rect in segmentation: 31 | sub_img = self._extract_sub_img(cv2_img, rect) 32 | sub_img = crop_zeros.do(sub_img) 33 | sub_imgs.append(proc_resize.do(sub_img)) 34 | return np.asarray(sub_imgs)/255.0 35 | 36 | def eval_segmentation(self, cv2_img, segmentation): 37 | sub_imgs = self._extract_sub_imgs(cv2_img, segmentation) 38 | tag_to_probas = self.cls.predict_cv2_imgs(sub_imgs) 39 | #compute the proba 40 | accumulate_proba = 1.0 41 | tags = [] 42 | for tag_to_proba in tag_to_probas: 43 | tag = tag_to_proba[0][0] 44 | proba = tag_to_proba[0][1] 45 | accumulate_proba *= proba 46 | tags.append(tag) 47 | 48 | if self.debug_path is not None: 49 | import uuid 50 | sub_imgs_dir = os.path.join(self.debug_path, str(uuid.uuid1())) 51 | os.makedirs(sub_imgs_dir) 52 | for i, sub_img in enumerate(sub_imgs): 53 | image_path = os.path.join(sub_imgs_dir, "%d.jpg" % i) 54 | cv2.imwrite(image_path, sub_img*255.0) 55 | stat_path = os.path.join(sub_imgs_dir, "stat.txt") 56 | f_stat_path = open(stat_path, "w+") 57 | f_stat_path.write("".join(tags)) 58 | f_stat_path.write("\n") 59 | f_stat_path.write("%f" % accumulate_proba) 60 | f_stat_path.write("\n") 61 | f_stat_path.close() 62 | sub_imgs_dir_pic = sub_imgs_dir + ".jpg" 63 | cv2_img_copy = np.copy(cv2_img) 64 | for one_segmentation in segmentation: 65 | left_x = one_segmentation[0] 66 | cv2.line(cv2_img_copy, (), ) 67 | return accumulate_proba, tags 68 | 69 | 70 | def do(self, segmentations): 71 | eval_segmentations = [] 72 | for segmentation in segmentations: 73 | accumulate_proba, tags = \ 74 | self.eval_segmentation(self.cv2_img, segmentation) 75 | # print("=" * 60) 76 | # print("accumulate_proba=", accumulate_proba) 77 | # print("tags=", tags) 78 | eval_segmentations.append((tags, accumulate_proba)) 79 | eval_segmentations = sorted(eval_segmentations, key=lambda x:x[1], reverse=True) 80 | return eval_segmentations 81 | -------------------------------------------------------------------------------- /python/deep_ocr/cv2_img_proc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import cv2 4 | import numpy as np 5 | from PIL import Image 6 | from PIL import ImageFont 7 | from PIL import ImageDraw 8 | import pickle 9 | import argparse 10 | from argparse import RawTextHelpFormatter 11 | import fnmatch 12 | import os 13 | import cv2 14 | import json 15 | import random 16 | import shutil 17 | import traceback 18 | import copy 19 | 20 | 21 | 22 | 23 | class PreprocessRemoveNonCharNoise(object): 24 | 25 | def __init__(self, char_width): 26 | self.min_w = char_width * 0.1 27 | self.min_h = char_width * 0.1 28 | 29 | self.min_area = char_width * char_width * 0.05 30 | 31 | self.max_area = char_width * char_width * 2.0 32 | 33 | def do(self, bin_img): 34 | 35 | tmp_bin_img = np.copy(bin_img) 36 | 37 | if cv2.__version__[0] == "2": 38 | contours, hierarchy = cv2.findContours( 39 | tmp_bin_img, 40 | cv2.RETR_TREE, 41 | cv2.CHAIN_APPROX_SIMPLE) 42 | else: 43 | _, contours, hierarchy = cv2.findContours( 44 | tmp_bin_img, 45 | cv2.RETR_CCOMP, 46 | cv2.CHAIN_APPROX_SIMPLE) 47 | 48 | filtered_contours = [] 49 | for cnt in contours: 50 | x, y, w, h = cv2.boundingRect(cnt) 51 | if w * h > self.max_area or w * h < self.min_area: 52 | bin_img[y:y+h, x:x+w] = 0 53 | contours = filtered_contours 54 | 55 | 56 | class PreprocessBackgroundMask(): 57 | 58 | def __init__(self, boundary): 59 | self.boundary = boundary 60 | 61 | def do(self, image): 62 | (lower, upper) = self.boundary 63 | lower = np.array(lower, dtype = "uint8") 64 | upper = np.array(upper, dtype = "uint8") 65 | mask = cv2.inRange(image, lower, upper) 66 | return mask 67 | 68 | 69 | class PreprocessCropZeros(object): 70 | 71 | def __init__(self): 72 | pass 73 | 74 | def do(self, cv2_gray_img): 75 | height = cv2_gray_img.shape[0] 76 | width = cv2_gray_img.shape[1] 77 | 78 | v_sum = np.sum(cv2_gray_img, axis=0) 79 | h_sum = np.sum(cv2_gray_img, axis=1) 80 | left = 0 81 | right = width - 1 82 | top = 0 83 | low = height - 1 84 | 85 | for i in range(width): 86 | if v_sum[i] > 0: 87 | left = i 88 | break 89 | 90 | for i in range(width - 1, -1, -1): 91 | if v_sum[i] > 0: 92 | right = i 93 | break 94 | 95 | for i in range(height): 96 | if h_sum[i] > 0: 97 | top = i 98 | break 99 | 100 | for i in range(height - 1, -1, -1): 101 | if h_sum[i] > 0: 102 | low = i 103 | break 104 | if not (top < low and right > left): 105 | return cv2_gray_img 106 | 107 | return cv2_gray_img[top: low+1, left: right+1] 108 | 109 | 110 | 111 | class FindImageBBox(object): 112 | def __init__(self, ): 113 | pass 114 | 115 | def do(self, img): 116 | height = img.shape[0] 117 | width = img.shape[1] 118 | v_sum = np.sum(img, axis=0) 119 | h_sum = np.sum(img, axis=1) 120 | left = 0 121 | right = width - 1 122 | top = 0 123 | low = height - 1 124 | for i in range(width): 125 | if v_sum[i] > 0: 126 | left = i 127 | break 128 | for i in range(width - 1, -1, -1): 129 | if v_sum[i] > 0: 130 | right = i 131 | break 132 | for i in range(height): 133 | if h_sum[i] > 0: 134 | top = i 135 | break 136 | for i in range(height - 1, -1, -1): 137 | if h_sum[i] > 0: 138 | low = i 139 | break 140 | return (left, top, right, low) 141 | 142 | 143 | class PreprocessResizeKeepRatio(object): 144 | 145 | def __init__(self, width, height): 146 | self.width = width 147 | self.height = height 148 | 149 | def do(self, cv2_img): 150 | max_width = self.width 151 | max_height = self.height 152 | 153 | cur_height, cur_width = cv2_img.shape[:2] 154 | 155 | ratio_w = float(max_width)/float(cur_width) 156 | ratio_h = float(max_height)/float(cur_height) 157 | ratio = min(ratio_w, ratio_h) 158 | 159 | new_size = (min(int(cur_width*ratio), max_width), 160 | min(int(cur_height*ratio), max_height)) 161 | 162 | new_size = (max(new_size[0], 1), 163 | max(new_size[1], 1),) 164 | 165 | resized_img = cv2.resize(cv2_img, new_size) 166 | return resized_img 167 | 168 | 169 | class PreprocessResizeKeepRatioFillBG(object): 170 | 171 | def __init__(self, width, height, 172 | fill_bg=False, 173 | auto_avoid_fill_bg=True, 174 | margin=None): 175 | self.width = width 176 | self.height = height 177 | self.fill_bg = fill_bg 178 | self.auto_avoid_fill_bg = auto_avoid_fill_bg 179 | self.margin = margin 180 | 181 | @classmethod 182 | def is_need_fill_bg(cls, cv2_img, th=0.5, max_val=255): 183 | image_shape = cv2_img.shape 184 | height, width = image_shape 185 | if height * 3 < width: 186 | return True 187 | if width * 3 < height: 188 | return True 189 | return False 190 | 191 | @classmethod 192 | def put_img_into_center(cls, img_large, img_small, ): 193 | width_large = img_large.shape[1] 194 | height_large = img_large.shape[0] 195 | 196 | width_small = img_small.shape[1] 197 | height_small = img_small.shape[0] 198 | 199 | if width_large < width_small: 200 | raise ValueError("width_large <= width_small") 201 | if height_large < height_small: 202 | raise ValueError("height_large <= height_small") 203 | 204 | start_width = (width_large - width_small) / 2 205 | start_height = (height_large - height_small) / 2 206 | 207 | img_large[int(start_height):int(start_height + height_small), int(start_width):int(start_width + width_small)] = img_small 208 | return img_large 209 | 210 | def do(self, cv2_img): 211 | 212 | if self.margin is not None: 213 | width_minus_margin = max(2, self.width - self.margin) 214 | height_minus_margin = max(2, self.height - self.margin) 215 | else: 216 | width_minus_margin = self.width 217 | height_minus_margin = self.height 218 | 219 | cur_height, cur_width = cv2_img.shape[:2] 220 | if len(cv2_img.shape) > 2: 221 | pix_dim = cv2_img.shape[2] 222 | else: 223 | pix_dim = None 224 | 225 | preprocess_resize_keep_ratio = PreprocessResizeKeepRatio( 226 | width_minus_margin, 227 | height_minus_margin) 228 | resized_cv2_img = preprocess_resize_keep_ratio.do(cv2_img) 229 | 230 | if self.auto_avoid_fill_bg: 231 | need_fill_bg = self.is_need_fill_bg(cv2_img) 232 | if not need_fill_bg: 233 | self.fill_bg = False 234 | else: 235 | self.fill_bg = True 236 | 237 | ## should skip horizontal stroke 238 | if not self.fill_bg: 239 | ret_img = cv2.resize(resized_cv2_img, (width_minus_margin, 240 | height_minus_margin)) 241 | else: 242 | if pix_dim is not None: 243 | norm_img = np.zeros((height_minus_margin, 244 | width_minus_margin, 245 | pix_dim), 246 | np.uint8) 247 | else: 248 | norm_img = np.zeros((height_minus_margin, 249 | width_minus_margin), 250 | np.uint8) 251 | ret_img = self.put_img_into_center(norm_img, resized_cv2_img) 252 | 253 | if self.margin is not None: 254 | if pix_dim is not None: 255 | norm_img = np.zeros((self.height, 256 | self.width, 257 | pix_dim), 258 | np.uint8) 259 | else: 260 | norm_img = np.zeros((self.height, 261 | self.width), 262 | np.uint8) 263 | ret_img = self.put_img_into_center(norm_img, ret_img) 264 | return ret_img -------------------------------------------------------------------------------- /python/deep_ocr/id_cards/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /python/deep_ocr/id_cards/char_set.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.langs.digits import data as digit_data 4 | from deep_ocr.langs.chi_sim import data as sim_data 5 | 6 | 7 | class CharSet(object): 8 | def __init__(self): 9 | self.data = {} 10 | self.data["name"] = { 11 | "set": set(sim_data) - set(digit_data), 12 | "width": 0.030, 13 | "caffe_cls": "sim", 14 | } 15 | self.data["sex"] = { 16 | "set": set(u"男女"), 17 | "width": 0.030, 18 | "caffe_cls": "sim", 19 | } 20 | self.data["minzu"] = { 21 | "set": set( 22 | u"汉蒙古回藏维吾尔苗彝壮布依朝鲜满侗瑶白土家哈尼哈萨克"\ 23 | u"傣黎傈僳佤畲拉祜水东乡纳西景颇柯尔克孜"\ 24 | u"土达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌"\ 25 | u"普米塔吉克怒俄罗斯鄂温克德昂保安裕固京"\ 26 | u"塔塔尔独龙鄂伦春赫哲乌孜别克门巴珞巴"\ 27 | u"基诺高山穿青人"), 28 | "width": 0.030, 29 | "caffe_cls": "sim", 30 | } 31 | self.data["year"] = { 32 | "set": set("0123456789"), 33 | "width": 0.015, 34 | "caffe_cls": "ua", 35 | } 36 | self.data["month"] = { 37 | "set": set("0123456789"), 38 | "width": 0.015, 39 | "caffe_cls": "ua", 40 | } 41 | self.data["day"] = { 42 | "set": set("0123456789"), 43 | "width": 0.015, 44 | "caffe_cls": "ua", 45 | } 46 | self.data["address"] = { 47 | "set": set(sim_data).union(digit_data), 48 | "width": 0.030, 49 | "caffe_cls": "sim", 50 | } 51 | self.data["id"] = { 52 | "set": set(u"0123456789X"), 53 | "width": 0.02, 54 | "caffe_cls": "ua", 55 | } 56 | 57 | def get(self): 58 | return self.data -------------------------------------------------------------------------------- /python/deep_ocr/id_cards/segmentation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import cv2 4 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 5 | from deep_ocr.cv2_img_proc import PreprocessRemoveNonCharNoise 6 | 7 | import numpy as np 8 | from deep_ocr.utils import extract_peek_ranges_from_array 9 | from deep_ocr.utils import merge_chars_into_line_segments 10 | import os 11 | import shutil 12 | import sys, traceback 13 | 14 | class Segmentation(object): 15 | 16 | def __init__(self, debug_path=None): 17 | self.debug_path = debug_path 18 | self.boundaries = [ 19 | ([0, 0, 0], [100, 100, 100]), 20 | ([0, 0, 0], [150, 150, 150]), 21 | ([0, 0, 0], [200, 200, 200]), 22 | ] 23 | 24 | def check_if_good_boundary(self, boundary, norm_height, norm_width, color_img): 25 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 26 | char_w = norm_width / 20 27 | remove_noise = PreprocessRemoveNonCharNoise(char_w) 28 | 29 | id_card_img_mask = preprocess_bg_mask.do(color_img) 30 | id_card_img_mask[0:int(norm_height*0.05), :] = 0 31 | id_card_img_mask[int(norm_height*0.95):, :] = 0 32 | id_card_img_mask[:, 0:int(norm_width*0.05)] = 0 33 | id_card_img_mask[:, int(norm_width*0.95):] = 0 34 | 35 | remove_noise.do(id_card_img_mask) 36 | 37 | # se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5)) 38 | # se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2)) 39 | # mask = cv2.morphologyEx(id_card_img_mask, cv2.MORPH_CLOSE, se1) 40 | # id_card_img_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, se2) 41 | 42 | ## remove right head profile 43 | left_half_id_card_img_mask = np.copy(id_card_img_mask) 44 | left_half_id_card_img_mask[:, int(norm_width/2):] = 0 45 | 46 | ## Try to find text lines and chars 47 | horizontal_sum = np.sum(left_half_id_card_img_mask, axis=1) 48 | line_ranges = extract_peek_ranges_from_array(horizontal_sum) 49 | 50 | return len(line_ranges) >= 5 and len(line_ranges) <= 7 51 | 52 | 53 | def do(self, color_img): 54 | 55 | shape = color_img.shape 56 | 57 | norm_height = shape[0] 58 | norm_width = shape[1] 59 | 60 | gray_id_card_img = cv2.cvtColor(color_img, cv2.COLOR_BGR2GRAY) 61 | # 62 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) 63 | gray_id_card_img = clahe.apply(gray_id_card_img) 64 | 65 | gray_id_card_img = 255 - gray_id_card_img 66 | 67 | best_boundary = None 68 | for boundary in self.boundaries: 69 | if self.check_if_good_boundary( 70 | boundary, 71 | norm_height, norm_width, 72 | color_img): 73 | best_boundary = boundary 74 | break 75 | if best_boundary is None: 76 | return {} 77 | 78 | boundary = best_boundary 79 | ## boundary = ([0, 0, 0], [100, 100, 100]) 80 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 81 | id_card_img_mask = preprocess_bg_mask.do(color_img) 82 | id_card_img_mask[0:int(norm_height*0.05), :] = 0 83 | id_card_img_mask[int(norm_height*0.95):, :] = 0 84 | id_card_img_mask[:, 0:int(norm_width*0.05)] = 0 85 | id_card_img_mask[:, int(norm_width*0.95):] = 0 86 | 87 | # se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5)) 88 | # se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2)) 89 | # mask = cv2.morphologyEx(id_card_img_mask, cv2.MORPH_CLOSE, se1) 90 | # id_card_img_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, se2) 91 | 92 | ## remove right head profile 93 | left_half_id_card_img_mask = np.copy(id_card_img_mask) 94 | left_half_id_card_img_mask[:, int(norm_width/2):] = 0 95 | 96 | ## Try to find text lines and chars 97 | vertical_peek_ranges2d = [] 98 | horizontal_sum = np.sum(left_half_id_card_img_mask, axis=1) 99 | line_ranges = extract_peek_ranges_from_array(horizontal_sum) 100 | 101 | ## char extraction 102 | for line_range in line_ranges: 103 | start_y, end_y = line_range 104 | end_y += 1 105 | line_img = id_card_img_mask[start_y: end_y] 106 | vertical_sum = np.sum(line_img, axis=0) 107 | vertical_peek_ranges = extract_peek_ranges_from_array(vertical_sum, minimun_val=40, minimun_range=1) 108 | vertical_peek_ranges2d.append(vertical_peek_ranges) 109 | 110 | vertical_peek_ranges2d = merge_chars_into_line_segments(vertical_peek_ranges2d) 111 | img_gray_texts = cv2.bitwise_and(gray_id_card_img, gray_id_card_img, mask=id_card_img_mask) 112 | 113 | key_to_segmentation = {} 114 | try: 115 | ## name extraction 116 | range_y = line_ranges[0] 117 | range_x = vertical_peek_ranges2d[0][0] 118 | start_x, end_x = range_x 119 | start_y, end_y = range_y 120 | w = end_x - start_x 121 | h = end_y - start_y 122 | key_to_segmentation["name"] = [(start_x, start_y, w, h), ] 123 | ## sex extraction 124 | range_y = line_ranges[1] 125 | range_x = vertical_peek_ranges2d[1][0] 126 | start_x, end_x = range_x 127 | start_y, end_y = range_y 128 | w = end_x - start_x 129 | h = end_y - start_y 130 | key_to_segmentation["sex"] = [(start_x, start_y, w, h), ] 131 | ## minzu extraction 132 | range_y = line_ranges[1] 133 | range_x = vertical_peek_ranges2d[1][1] 134 | start_x, end_x = range_x 135 | start_y, end_y = range_y 136 | w = end_x - start_x 137 | h = end_y - start_y 138 | key_to_segmentation["minzu"] = [(start_x, start_y, w, h), ] 139 | ## year extraction 140 | range_y = line_ranges[2] 141 | range_x = vertical_peek_ranges2d[2][0] 142 | start_x, end_x = range_x 143 | start_y, end_y = range_y 144 | w = end_x - start_x 145 | h = end_y - start_y 146 | key_to_segmentation["year"] = [(start_x, start_y, w, h), ] 147 | ## month extraction 148 | range_y = line_ranges[2] 149 | range_x = vertical_peek_ranges2d[2][1] 150 | start_x, end_x = range_x 151 | start_y, end_y = range_y 152 | w = end_x - start_x 153 | h = end_y - start_y 154 | key_to_segmentation["month"] = [(start_x, start_y, w, h), ] 155 | ## day extraction 156 | range_y = line_ranges[2] 157 | range_x = vertical_peek_ranges2d[2][2] 158 | start_x, end_x = range_x 159 | start_y, end_y = range_y 160 | w = end_x - start_x 161 | h = end_y - start_y 162 | key_to_segmentation["day"] = [(start_x, start_y, w, h), ] 163 | ## address extraction 164 | key_to_segmentation["address"] = [] 165 | first_line = line_ranges[3][0] 166 | first_line_range_x = vertical_peek_ranges2d[3][0] 167 | first_line_start_x = first_line_range_x[0] 168 | first_line_w = first_line_range_x[1] - first_line_start_x 169 | for i, line_range in enumerate(line_ranges): 170 | if i >= 3: 171 | range_y = line_range 172 | range_x = vertical_peek_ranges2d[i][0] 173 | start_x, end_x = range_x 174 | start_y, end_y = range_y 175 | if abs(first_line_start_x - start_x)> int(first_line_w * 0.05): 176 | break 177 | w = end_x - start_x 178 | h = end_y - start_y 179 | key_to_segmentation["address"].append((start_x, start_y, w, h)) 180 | 181 | ## id extraction 182 | range_y = line_ranges[-1] 183 | range_x = vertical_peek_ranges2d[-1][0] 184 | start_x, end_x = range_x 185 | start_y, end_y = range_y 186 | w = end_x - start_x 187 | h = end_y - start_y 188 | key_to_segmentation["id"] = [(start_x, start_y, w, h), ] 189 | except: 190 | print("Exception in user code:") 191 | print('-' * 60) 192 | traceback.print_exc(file=sys.stdout) 193 | print('-' * 60) 194 | key_to_segmentation = {} 195 | 196 | debug_path = self.debug_path 197 | if debug_path is not None: 198 | import random 199 | 200 | if os.path.isdir(debug_path): 201 | shutil.rmtree(debug_path) 202 | os.makedirs(debug_path) 203 | 204 | debug_image_path = os.path.join(debug_path, "01_origin_img.jpg") 205 | debug_gray_image_path = os.path.join(debug_path, "01_gray_img.jpg") 206 | debug_image_mask_path = os.path.join(debug_path, "02_mask.jpg") 207 | debug_image_mask_text_lines_path = os.path.join(debug_path, "03_mask_text_lines.jpg") 208 | debug_image_left_mask_path = os.path.join(debug_path, "04_left_mask.jpg") 209 | debug_image_gray_texts_path = os.path.join(debug_path, "05_gray_texts.jpg") 210 | debug_image_chars_path = os.path.join(debug_path, "06_origin_img_chars.jpg") 211 | debug_image_key_to_segments_path = os.path.join(debug_path, "07_origin_img_key_to_segments.jpg") 212 | 213 | cv2.imwrite(debug_image_path, color_img) 214 | cv2.imwrite(debug_gray_image_path, 255 - gray_id_card_img) 215 | id_card_img_chars = np.copy(color_img) 216 | cv2.imwrite(debug_image_mask_path, id_card_img_mask) 217 | id_card_img_mask_text_lines = np.copy(id_card_img_mask) 218 | 219 | for i, line_range in enumerate(line_ranges): 220 | start_y, end_y = line_range 221 | id_card_img_mask_text_lines[start_y, :] = 255 222 | id_card_img_mask_text_lines[end_y, :] = 255 223 | 224 | color = (255, 0, 0) 225 | for i, line_range in enumerate(line_ranges): 226 | start_y, end_y = line_range 227 | for vertical_peek_range in vertical_peek_ranges2d[i]: 228 | start_x, end_x = vertical_peek_range 229 | cv2.rectangle(id_card_img_chars, 230 | (start_x, start_y), 231 | (end_x+1, end_y+1), 232 | color) 233 | 234 | key_to_segments_img = np.copy(color_img) 235 | for key in key_to_segmentation: 236 | color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) 237 | rects = key_to_segmentation[key] 238 | for rect in rects: 239 | pt1 = (rect[0], rect[1]) 240 | pt2 = (rect[0] + rect[2], rect[1] + rect[3]) 241 | cv2.rectangle(key_to_segments_img, pt1, pt2, color) 242 | 243 | cv2.imwrite(debug_image_mask_text_lines_path, id_card_img_mask_text_lines) 244 | 245 | cv2.imwrite(debug_image_left_mask_path, left_half_id_card_img_mask) 246 | cv2.imwrite(debug_image_gray_texts_path, img_gray_texts) 247 | cv2.imwrite(debug_image_chars_path, id_card_img_chars) 248 | cv2.imwrite(debug_image_key_to_segments_path, key_to_segments_img) 249 | return key_to_segmentation 250 | 251 | -------------------------------------------------------------------------------- /python/deep_ocr/lang_aux.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from PIL import Image 5 | from PIL import ImageFont 6 | from PIL import ImageDraw 7 | import traceback 8 | import sys 9 | import os 10 | import numpy as np 11 | import cv2 12 | import copy 13 | import random 14 | 15 | from deep_ocr.utils import trim_string 16 | from deep_ocr.cv2_img_proc import FindImageBBox 17 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatioFillBG 18 | 19 | 20 | class DataAugmentation(object): 21 | def __init__(self, noise=True, dilate=True, erode=True): 22 | self.noise = noise 23 | self.dilate = dilate 24 | self.erode = erode 25 | 26 | @classmethod 27 | def add_noise(cls, img): 28 | # add some noise 29 | for i in range(20): 30 | temp_x = np.random.randint(0, img.shape[0]) 31 | temp_y = np.random.randint(0, img.shape[1]) 32 | img[temp_x][temp_y] = 255 33 | return img 34 | 35 | @classmethod 36 | def add_erode(cls, img): 37 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) 38 | img = cv2.erode(img,kernel) 39 | return img 40 | 41 | @classmethod 42 | def add_dilate(cls, img): 43 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) 44 | img = cv2.dilate(img, kernel) 45 | return img 46 | 47 | def do(self, img): 48 | if self.noise and random.random()<0.5: 49 | img = self.add_noise(img) 50 | if self.dilate and random.random()<0.5: 51 | img = self.add_dilate(img) 52 | elif self.erode: 53 | img = self.add_erode(img) 54 | return img 55 | 56 | 57 | class LangCharsGenerate(object): 58 | def __init__(self, langs): 59 | self.langs = langs 60 | 61 | def do(self, ): 62 | lang_list = self.langs.split("+") 63 | lang_chars = "" 64 | for lang in lang_list: 65 | lang_module = "deep_ocr.langs.%s" % lang 66 | lang_module_data = __import__(lang_module, fromlist=['']) 67 | lang_chars += lang_module_data.data 68 | trim_string(lang_chars) 69 | return lang_chars 70 | 71 | 72 | class FontCheck(object): 73 | 74 | def __init__(self, lang_chars, width=32, height=32): 75 | self.lang_chars = lang_chars 76 | self.width = width 77 | self.height = height 78 | 79 | def do(self, font_path): 80 | width = self.width 81 | height = self.height 82 | try: 83 | for i, char in enumerate(self.lang_chars): 84 | img = Image.new("RGB", (width, height), "black") 85 | draw = ImageDraw.Draw(img) 86 | font = ImageFont.truetype(font_path, int(width * 0.9),) 87 | draw.text((0, 0), char, (255, 255, 255), font=font) 88 | data = list(img.getdata()) 89 | sum_val = 0 90 | for i_data in data: 91 | sum_val += sum(i_data) 92 | if sum_val < 2: 93 | return False 94 | except: 95 | print("fail to load:%s" % font_path) 96 | traceback.print_exc(file=sys.stdout) 97 | return False 98 | return True 99 | 100 | 101 | class Font2Image(object): 102 | 103 | def __init__(self, 104 | width, height, 105 | need_crop, margin): 106 | self.width = width 107 | self.height = height 108 | self.need_crop = need_crop 109 | self.margin = margin 110 | 111 | def do(self, font_path, char, path_img="", rotate=0, need_aug=True): 112 | find_image_bbox = FindImageBBox() 113 | img = Image.new("RGB", (self.width, self.height), "black") 114 | draw = ImageDraw.Draw(img) 115 | font = ImageFont.truetype(font_path, int(self.width * 0.7),) 116 | draw.text((0, 0), char, (255, 255, 255), font=font) 117 | 118 | ## rotate 119 | if rotate != 0: 120 | img = img.rotate(rotate) 121 | 122 | data = list(img.getdata()) 123 | sum_val = 0 124 | for i_data in data: 125 | sum_val += sum(i_data) 126 | if sum_val > 2: 127 | np_img = np.asarray(data, dtype='uint8') 128 | np_img = np_img[:, 0] 129 | np_img = np_img.reshape((self.height, self.width)) 130 | cropped_box = find_image_bbox.do(np_img) 131 | left, upper, right, lower = cropped_box 132 | np_img = np_img[upper: lower + 1, left: right + 1] 133 | if not self.need_crop: 134 | preprocess_resize_keep_ratio_fill_bg = \ 135 | PreprocessResizeKeepRatioFillBG(self.width, self.height, 136 | fill_bg=False, 137 | margin=self.margin) 138 | np_img = preprocess_resize_keep_ratio_fill_bg.do(np_img) 139 | 140 | ## noise 141 | if need_aug: 142 | data_aug = DataAugmentation() 143 | np_img = data_aug.do(np_img) 144 | 145 | cv2.imwrite(path_img, np_img) 146 | 147 | else: 148 | print("%s doesn't exist." % path_img) 149 | 150 | 151 | if __name__ == "__main__": 152 | lang_chars_gen = LangCharsGenerate("digits+eng") 153 | lang_chars = lang_chars_gen.do() 154 | font_check = FontCheck(lang_chars) 155 | 156 | font_dir = "/root/workspace/deep_ocr_fonts/chinese_fonts/" 157 | for font_name in os.listdir(font_dir): 158 | font_path = os.path.join(font_dir, font_name) 159 | print("font_path:", font_path) 160 | lang_chars_gen = LangCharsGenerate("chi_sim") 161 | lang_chars = lang_chars_gen.do() 162 | print("char len=", len(lang_chars)) 163 | #print(lang_chars.encode("utf-8")) 164 | font_check = FontCheck(lang_chars) 165 | print("can cover all the chars?:", font_check.do(font_path)) 166 | 167 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/python/deep_ocr/langs/__init__.py -------------------------------------------------------------------------------- /python/deep_ocr/langs/chi_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | from deep_ocr.utils import trim_string 6 | 7 | # I take characters from http://hanyu.iciba.com/zt/3500.html 8 | data = u''' 9 | 一乙二十丁厂七卜八人入儿匕几九刁了刀力乃又三干于亏工土士才下寸大丈与万上小口山巾千乞川亿个夕久么勺 10 | 凡丸及广亡门丫义之尸己已巳弓子卫也女刃飞习叉马乡丰王开井天夫元无云专丐扎艺木五支厅不犬太区历歹友尤 11 | 匹车巨牙屯戈比互切瓦止少曰日中贝冈内水见午牛手气毛壬升夭长仁什片仆化仇币仍仅斤爪反介父从仑今凶分乏 12 | 公仓月氏勿欠风丹匀乌勾凤六文亢方火为斗忆计订户认冗讥心尺引丑巴孔队办以允予邓劝双书幻玉刊未末示击打 13 | 巧正扑卉扒功扔去甘世艾古节本术可丙左厉石右布夯戊龙平灭轧东卡北占凸卢业旧帅归旦目且叶甲申叮电号田由 14 | 只叭史央兄叽叼叫叩叨另叹冉皿凹囚四生矢失乍禾丘付仗代仙们仪白仔他斥瓜乎丛令用甩印尔乐句匆册卯犯外处 15 | 冬鸟务包饥主市立冯玄闪兰半汁汇头汉宁穴它讨写让礼训议必讯记永司尼民弗弘出辽奶奴召加皮边孕发圣对台矛 16 | 纠母幼丝邦式迂刑戎动扛寺吉扣考托老巩圾执扩扫地场扬耳芋共芒亚芝朽朴机权过臣吏再协西压厌戌在百有存而 17 | 页匠夸夺灰达列死成夹夷轨邪尧划迈毕至此贞师尘尖劣光当早吁吐吓虫曲团吕同吊吃因吸吗吆屿屹岁帆回岂则刚 18 | 网肉年朱先丢廷舌竹迁乔迄伟传乒乓休伍伏优臼伐延仲件任伤价伦份华仰仿伙伪自伊血向似后行舟全会杀合兆企 19 | 众爷伞创肌肋朵杂危旬旨旭负匈名各多争色壮冲妆冰庄庆亦刘齐交衣次产决亥充妄闭问闯羊并关米灯州汗污江汛 20 | 池汝汤忙兴宇守宅字安讲讳军讶许讹论讼农讽设访诀寻那迅尽导异弛孙阵阳收阶阴防奸如妇妃好她妈戏羽观欢买 21 | 红驮纤驯约级纪驰纫巡寿弄麦玖玛形进戒吞远违韧运扶抚坛技坏抠扰扼拒找批址扯走抄贡汞坝攻赤折抓扳抡扮抢 22 | 孝坎均抑抛投坟坑抗坊抖护壳志块扭声把报拟却抒劫芙芜苇芽花芹芥芬苍芳严芦芯劳克芭苏杆杠杜材村杖杏杉巫 23 | 极李杨求甫匣更束吾豆两酉丽医辰励否还尬歼来连轩步卤坚肖旱盯呈时吴助县里呆吱吠呕园旷围呀吨足邮男困吵 24 | 串员呐听吟吩呛吻吹呜吭吧邑吼囤别吮岖岗帐财针钉牡告我乱利秃秀私每兵估体何佐佑但伸佃作伯伶佣低你住位 25 | 伴身皂伺佛囱近彻役返余希坐谷妥含邻岔肝肛肚肘肠龟甸免狂犹狈角删条彤卵灸岛刨迎饭饮系言冻状亩况床库庇 26 | 疗吝应这冷庐序辛弃冶忘闰闲间闷判兑灶灿灼弟汪沐沛汰沥沙汽沃沦汹泛沧没沟沪沈沉沁怀忧忱快完宋宏牢究穷 27 | 灾良证启评补初社祀识诈诉罕诊词译君灵即层屁尿尾迟局改张忌际陆阿陈阻附坠妓妙妖姊妨妒努忍劲矣鸡纬驱纯 28 | 纱纲纳驳纵纷纸纹纺驴纽奉玩环武青责现玫表规抹卦坷坯拓拢拔坪拣坦担坤押抽拐拖者拍顶拆拎拥抵拘势抱拄垃 29 | 拉拦幸拌拧拂拙招坡披拨择抬拇拗其取茉苦昔苛若茂苹苗英苟苑苞范直茁茄茎苔茅枉林枝杯枢柜枚析板松枪枫构 30 | 杭杰述枕丧或画卧事刺枣雨卖郁矾矿码厕奈奔奇奋态欧殴垄妻轰顷转斩轮软到非叔歧肯齿些卓虎虏肾贤尚旺具味 31 | 果昆国哎咕昌呵畅明易咙昂迪典固忠呻咒咋咐呼鸣咏呢咄咖岸岩帖罗帜帕岭凯败账贩贬购贮图钓制知迭氛垂牧物 32 | 乖刮秆和季委秉佳侍岳供使例侠侥版侄侦侣侧凭侨佩货侈依卑的迫质欣征往爬彼径所舍金刹命肴斧爸采觅受乳贪 33 | 念贫忿肤肺肢肿胀朋股肮肪肥服胁周昏鱼兔狐忽狗狞备饰饱饲变京享庞店夜庙府底疟疙疚剂卒郊庚废净盲放刻育 34 | 氓闸闹郑券卷单炬炒炊炕炎炉沫浅法泄沽河沾泪沮油泊沿泡注泣泞泻泌泳泥沸沼波泼泽治怔怯怖性怕怜怪怡学宝 35 | 宗定宠宜审宙官空帘宛实试郎诗肩房诚衬衫视祈话诞诡询该详建肃录隶帚屉居届刷屈弧弥弦承孟陋陌孤陕降函限 36 | 妹姑姐姓妮始姆迢驾叁参艰线练组绅细驶织驹终驻绊驼绍绎经贯契贰奏春帮玷珍玲珊玻毒型拭挂封持拷拱项垮挎 37 | 城挟挠政赴赵挡拽哉挺括垢拴拾挑垛指垫挣挤拼挖按挥挪拯某甚荆茸革茬荐巷带草茧茵茶荒茫荡荣荤荧故胡荫荔 38 | 南药标栈柑枯柄栋相查柏栅柳柱柿栏柠树勃要柬咸威歪研砖厘厚砌砂泵砚砍面耐耍牵鸥残殃轴轻鸦皆韭背战点虐 39 | 临览竖省削尝昧盹是盼眨哇哄哑显冒映星昨咧昭畏趴胃贵界虹虾蚁思蚂虽品咽骂勋哗咱响哈哆咬咳咪哪哟炭峡罚 40 | 贱贴贻骨幽钙钝钞钟钢钠钥钦钧钩钮卸缸拜看矩毡氢怎牲选适秒香种秋科重复竿段便俩贷顺修俏保促俄俐侮俭俗 41 | 俘信皇泉鬼侵禹侯追俊盾待徊衍律很须叙剑逃食盆胚胧胆胜胞胖脉胎勉狭狮独狰狡狱狠贸怨急饵饶蚀饺饼峦弯将 42 | 奖哀亭亮度迹庭疮疯疫疤咨姿亲音帝施闺闻闽阀阁差养美姜叛送类迷籽娄前首逆兹总炼炸烁炮炫烂剃洼洁洪洒柒 43 | 浇浊洞测洗活派洽染洛浏济洋洲浑浓津恃恒恢恍恬恤恰恼恨举觉宣宦室宫宪突穿窃客诫冠诬语扁袄祖神祝祠误诱 44 | 诲说诵垦退既屋昼屏屎费陡逊眉孩陨除险院娃姥姨姻娇姚娜怒架贺盈勇怠癸蚤柔垒绑绒结绕骄绘给绚骆络绝绞骇 45 | 统耕耘耗耙艳泰秦珠班素匿蚕顽盏匪捞栽捕埂捂振载赶起盐捎捍捏埋捉捆捐损袁捌都哲逝捡挫换挽挚热恐捣壶捅 46 | 埃挨耻耿耽聂恭莽莱莲莫莉荷获晋恶莹莺真框梆桂桔栖档桐株桥桦栓桃格桩校核样根索哥速逗栗贾酌配翅辱唇夏 47 | 砸砰砾础破原套逐烈殊殉顾轿较顿毙致柴桌虑监紧党逞晒眠晓哮唠鸭晃哺晌剔晕蚌畔蚣蚊蚪蚓哨哩圃哭哦恩鸯唤 48 | 唁哼唧啊唉唆罢峭峨峰圆峻贼贿赂赃钱钳钻钾铁铃铅缺氧氨特牺造乘敌秤租积秧秩称秘透笔笑笋债借值倚俺倾倒 49 | 倘俱倡候赁俯倍倦健臭射躬息倔徒徐殷舰舱般航途拿耸爹舀爱豺豹颁颂翁胰脆脂胸胳脏脐胶脑脓逛狸狼卿逢鸵留 50 | 鸳皱饿馁凌凄恋桨浆衰衷高郭席准座症病疾斋疹疼疲脊效离紊唐瓷资凉站剖竞部旁旅畜阅羞羔瓶拳粉料益兼烤烘 51 | 烦烧烛烟烙递涛浙涝浦酒涉消涡浩海涂浴浮涣涤流润涧涕浪浸涨烫涩涌悖悟悄悍悔悯悦害宽家宵宴宾窍窄容宰案 52 | 请朗诸诺读扇诽袜袖袍被祥课冥谁调冤谅谆谈谊剥恳展剧屑弱陵祟陶陷陪娱娟恕娥娘通能难预桑绢绣验继骏球琐 53 | 理琉琅捧堵措描域捺掩捷排焉掉捶赦堆推埠掀授捻教掏掐掠掂培接掷控探据掘掺职基聆勘聊娶著菱勒黄菲萌萝菌 54 | 萎菜萄菊菩萍菠萤营乾萧萨菇械彬梦婪梗梧梢梅检梳梯桶梭救曹副票酝酗厢戚硅硕奢盔爽聋袭盛匾雪辅辆颅虚彪 55 | 雀堂常眶匙晨睁眯眼悬野啪啦曼晦晚啄啡距趾啃跃略蚯蛀蛇唬累鄂唱患啰唾唯啤啥啸崖崎崭逻崔帷崩崇崛婴圈铐 56 | 铛铝铜铭铲银矫甜秸梨犁秽移笨笼笛笙符第敏做袋悠偿偶偎偷您售停偏躯兜假衅徘徙得衔盘舶船舵斜盒鸽敛悉欲 57 | 彩领脚脖脯豚脸脱象够逸猜猪猎猫凰猖猛祭馅馆凑减毫烹庶麻庵痊痒痕廊康庸鹿盗章竟商族旋望率阎阐着羚盖眷 58 | 粘粗粒断剪兽焊焕清添鸿淋涯淹渠渐淑淌混淮淆渊淫渔淘淳液淤淡淀深涮涵婆梁渗情惜惭悼惧惕惟惊惦悴惋惨惯 59 | 寇寅寄寂宿窒窑密谋谍谎谐袱祷祸谓谚谜逮敢尉屠弹隋堕随蛋隅隆隐婚婶婉颇颈绩绪续骑绰绳维绵绷绸综绽绿缀 60 | 巢琴琳琢琼斑替揍款堪塔搭堰揩越趁趋超揽堤提博揭喜彭揣插揪搜煮援搀裁搁搓搂搅壹握搔揉斯期欺联葫散惹葬 61 | 募葛董葡敬葱蒋蒂落韩朝辜葵棒棱棋椰植森焚椅椒棵棍椎棉棚棕棺榔椭惠惑逼粟棘酣酥厨厦硬硝确硫雁殖裂雄颊 62 | 雳暂雅翘辈悲紫凿辉敞棠赏掌晴睐暑最晰量鼎喷喳晶喇遇喊遏晾景畴践跋跌跑跛遗蛙蛛蜓蜒蛤喝鹃喂喘喉喻啼喧 63 | 嵌幅帽赋赌赎赐赔黑铸铺链销锁锄锅锈锋锌锐甥掰短智氮毯氯鹅剩稍程稀税筐等筑策筛筒筏答筋筝傲傅牌堡集焦 64 | 傍储皓皖粤奥街惩御循艇舒逾番释禽腊脾腋腔腕鲁猩猬猾猴惫然馈馋装蛮就敦斌痘痢痪痛童竣阔善翔羡普粪尊奠 65 | 道遂曾焰港滞湖湘渣渤渺湿温渴溃溅滑湃渝湾渡游滋渲溉愤慌惰愕愣惶愧愉慨割寒富寓窜窝窖窗窘遍雇裕裤裙禅 66 | 禄谢谣谤谦犀属屡强粥疏隔隙隘媒絮嫂媚婿登缅缆缉缎缓缔缕骗编骚缘瑟鹉瑞瑰瑙魂肆摄摸填搏塌鼓摆携搬摇搞 67 | 塘摊聘斟蒜勤靴靶鹊蓝墓幕蓬蓄蒲蓉蒙蒸献椿禁楚楷榄想槐榆楼概赖酪酬感碍碘碑碎碰碗碌尴雷零雾雹辐辑输督 68 | 频龄鉴睛睹睦瞄睫睡睬嗜鄙嗦愚暖盟歇暗暇照畸跨跷跳跺跪路跤跟遣蜈蜗蛾蜂蜕嗅嗡嗓署置罪罩蜀幌错锚锡锣锤 69 | 锥锦键锯锰矮辞稚稠颓愁筹签简筷毁舅鼠催傻像躲魁衙微愈遥腻腰腥腮腹腺鹏腾腿鲍猿颖触解煞雏馍馏酱禀痹廓 70 | 痴痰廉靖新韵意誊粮数煎塑慈煤煌满漠滇源滤滥滔溪溜漓滚溢溯滨溶溺粱滩慎誉塞寞窥窟寝谨褂裸福谬群殿辟障 71 | 媳嫉嫌嫁叠缚缝缠缤剿静碧璃赘熬墙墟嘉摧赫截誓境摘摔撇聚慕暮摹蔓蔑蔡蔗蔽蔼熙蔚兢模槛榴榜榨榕歌遭酵酷 72 | 酿酸碟碱碳磁愿需辖辗雌裳颗瞅墅嗽踊蜻蜡蝇蜘蝉嘛嘀赚锹锻镀舞舔稳熏箕算箩管箫舆僚僧鼻魄魅貌膜膊膀鲜疑 73 | 孵馒裹敲豪膏遮腐瘩瘟瘦辣彰竭端旗精粹歉弊熄熔煽潇漆漱漂漫滴漾演漏慢慷寨赛寡察蜜寥谭肇褐褪谱隧嫩翠熊 74 | 凳骡缩慧撵撕撒撩趣趟撑撮撬播擒墩撞撤增撰聪鞋鞍蕉蕊蔬蕴横槽樱橡樟橄敷豌飘醋醇醉磕磊磅碾震霄霉瞒题暴 75 | 瞎嘻嘶嘲嘹影踢踏踩踪蝶蝴蝠蝎蝌蝗蝙嘿嘱幢墨镇镐镑靠稽稻黎稿稼箱篓箭篇僵躺僻德艘膝膛鲤鲫熟摩褒瘪瘤瘫 76 | 凛颜毅糊遵憋潜澎潮潭鲨澳潘澈澜澄懂憔懊憎额翩褥谴鹤憨慰劈履豫缭撼擂操擅燕蕾薯薛薇擎薪薄颠翰噩橱橙橘 77 | 整融瓢醒霍霎辙冀餐嘴踱蹄蹂蟆螃器噪鹦赠默黔镜赞穆篮篡篷篱儒邀衡膨雕鲸磨瘾瘸凝辨辩糙糖糕燃濒澡激懒憾 78 | 懈窿壁避缰缴戴擦藉鞠藏藐檬檐檀礁磷霜霞瞭瞧瞬瞳瞩瞪曙蹋蹈螺蟋蟀嚎赡穗魏簧簇繁徽爵朦臊鳄癌辫赢糟糠燥 79 | 懦豁臀臂翼骤藕鞭藤覆瞻蹦嚣镰翻鳍鹰瀑襟璧戳孽警蘑藻攀曝蹲蹭蹬巅簸簿蟹颤靡癣瓣羹鳖爆疆鬓壤馨耀躁蠕嚼 80 | 嚷巍籍鳞魔糯灌譬蠢霸露霹躏黯髓赣囊镶瓤罐矗 81 | 82 | 83 | 84 | 乂乜兀弋孑孓幺亓韦廿卅仄厄仃仉仂兮刈爻卞闩讣尹夬爿毋邗邛艽艿札叵匝丕匜劢卟叱叻仨仕仟仡仫仞卮氐犰 85 | 刍邝邙汀讦讧讪讫尻阡尕弁驭匡耒玎玑邢圩圬圭扦圪圳圹扪圮圯芊芍芄芨芑芎芗亘厍夼戍尥乩旯曳岌屺凼囡钇缶 86 | 氘氖牝伎伛伢佤仵伥伧伉伫囟汆刖夙旮刎犷犸舛凫邬饧汕汔汐汲汜汊忖忏讴讵祁讷聿艮阱阮阪丞妁牟纡纣纥纨 87 | 玕玙抟抔圻坂坍坞抃抉芫邯芸芾苈苣芷芮苋芼苌苁芩芪芡芟苄苎苡杌杓杞杈忑孛邴邳矶奁豕忒欤轫迓邶忐卣邺 88 | 旰呋呒呓呔呖呃吡町虬呗吽吣吲帏岐岈岘岑岚兕囵囫钊钋钌迕氙氚牤佞邱攸佚佝佟佗伽彷佘佥孚豸坌肟邸奂劬 89 | 狄狁鸠邹饨饩饪饫饬亨庑庋疔疖肓闱闳闵羌炀沣沅沔沤沌沏沚汩汨沂汾沨汴汶沆沩泐怃怄忡忤忾怅忻忪怆忭忸诂 90 | 诃诅诋诌诏诒孜陇陀陂陉妍妩妪妣妊妗妫妞姒妤邵劭刭甬邰纭纰纴纶纾玮玡玭玠玢玥玦盂忝匦坩抨拤坫拈垆抻劼 91 | 拃拊坼坻㧟坨坭抿坳耶苷苯苤茏苫苜苴苒苘茌苻苓茚茆茑茓茔茕茀苕枥枇杪杳枧杵枨枞枋杻杷杼矸砀刳奄瓯殁郏 92 | 轭郅鸢盱昊昙杲昃咂呸昕昀旻昉炅咔畀虮咀呷黾呱呤咚咆咛呶呣呦咝岢岿岬岫帙岣峁刿迥岷剀帔峄沓囹罔钍钎钏 93 | 钒钕钗邾迮牦竺迤佶佬佰侑侉臾岱侗侃侏侩佻佾侪佼佯侬帛阜侔徂刽郄怂籴瓮戗肼䏝肽肱肫剁迩郇狙狎狍狒咎炙 94 | 枭饯饴冽冼庖疠疝疡兖妾劾炜炖炘炝炔泔沭泷泸泱泅泗泠泺泖泫泮沱泯泓泾怙怵怦怛怏怍㤘怩怫怿宕穹宓诓诔诖 95 | 诘戾诙戽郓衩祆祎祉祇诛诜诟诠诣诤诧诨诩戕孢亟陔妲妯姗帑弩孥驽虱迦迨绀绁绂驷驸绉绌驿骀甾珏珐珂珑玳珀 96 | 顸珉珈拮垭挝垣挞垤赳贲垱垌郝垧垓挦垠茜荚荑贳荜莒茼茴茱莛荞茯荏荇荃荟荀茗荠茭茨垩荥荦荨荩剋荪茹荬荮 97 | 柰栉柯柘栊柩枰栌柙枵柚枳柞柝栀柢栎枸柈柁枷柽剌酊郦甭砗砘砒斫砭砜奎耷虺殂殇殄殆轱轲轳轶轸虿毖觇尜哐 98 | 眄眍郢眇眊眈禺哂咴曷昴昱昵咦哓哔畎毗呲胄畋畈虼虻盅咣哕剐郧咻囿咿哌哙哚咯咩咤哝哏哞峙峣罘帧峒峤峋峥 99 | 贶钚钛钡钣钤钨钫钯氡氟牯郜秕秭竽笈笃俦俨俅俪叟垡牮俣俚皈俑俟逅徇徉舢俞郗俎郤爰郛瓴胨胪胛胂胙胍胗胝 100 | 朐胫鸨匍狨狯飑狩狲訇逄昝饷饸饹胤孪娈弈奕庥疬疣疥疭庠竑彦飒闼闾闿阂羑迸籼酋炳炻炽炯烀炷烃洱洹洧洌浃 101 | 洇洄洙涎洎洫浍洮洵浒浔浕洳恸恓恹恫恺恻恂恪恽宥扃衲衽衿袂祛祜祓祚诮祗祢诰诳鸩昶郡咫弭牁胥陛陟娅姮娆 102 | 姝姣姘姹怼羿炱矜绔骁骅绗绛骈耖挈珥珙顼珰珩珧珣珞琤珲敖恚埔埕埘埙埚挹耆耄埒捋贽垸捃盍荸莆莳莴莪莠莓 103 | 莜莅荼莩荽莸荻莘莎莞莨鸪莼栲栳郴桓桡桎桢桤梃栝桕桁桧桅栟桉栩逑逋彧鬲豇酐逦厝孬砝砹砺砧砷砟砼砥砣剞 104 | 砻轼轾辂鸫趸龀鸬虔逍眬唛晟眩眙哧哽唔晁晏鸮趵趿畛蚨蚜蚍蚋蚬蚝蚧唢圄唣唏盎唑崂崃罡罟峪觊赅钰钲钴钵钹 105 | 钺钽钼钿铀铂铄铆铈铉铊铋铌铍䥽铎氩氤氦毪舐秣秫盉笄笕笊笏笆俸倩俵偌俳俶倬倏恁倭倪俾倜隼隽倌倥臬皋郫 106 | 倨衄颀徕舫釜奚衾胯胱胴胭脍胼朕脒胺鸱玺鸲狷猁狳猃狺逖桀袅饽凇栾挛亳疳疴疸疽痈疱痂痉衮凋颃恣旆旄旃阃 107 | 阄訚阆恙粑朔郸烜烨烩烊剡郯烬涑浯涞涟娑涅涠浞涓浥涔浜浠浣浚悚悭悝悒悌悛宸窈剜诹冢诼袒袢祯诿谀谂谄谇 108 | 屐屙陬勐奘牂蚩陲姬娠娌娉娲娩娴娣娓婀畚逡绠骊绡骋绥绦绨骎邕鸶彗耜焘舂琏琇麸揶埴埯捯掳掴埸埵赧埤捭逵 109 | 埝堋堍掬鸷掖捽掊堉掸捩掮悫埭埽掇掼聃菁萁菘堇萘萋菽菖萜萸萑棻菔菟萏萃菏菹菪菅菀萦菰菡梵梿梏觋桴桷梓 110 | 棁桫棂啬郾匮敕豉鄄酞酚戛硎硭硒硖硗硐硇硌鸸瓠匏厩龚殒殓殍赉雩辄堑眭眦啧晡晤眺眵眸圊喏喵啉勖晞唵晗冕 111 | 啭畦趺啮跄蚶蛄蛎蛆蚰蛊圉蚱蛉蛏蚴啁啕唿啐唼唷啖啵啶啷唳唰啜帻崚崦帼崮崤崆赇赈赊铑铒铗铙铟铠铡铢铣铤 112 | 铧铨铩铪铫铬铮铯铰铱铳铵铷氪牾鸹秾逶笺筇笸笪笮笠笥笤笳笾笞偾偃偕偈傀偬偻皑皎鸻徜舸舻舴舷龛翎脬脘脲 113 | 匐猗猡猞猝斛猕馗馃馄鸾孰庹庾痔痍疵翊旌旎袤阇阈阉阊阋阍阏羟粝粕敝焐烯焓烽焖烷焗渍渚淇淅淞渎涿淖挲淠 114 | 涸渑淦淝淬涪淙涫渌淄惬悻悱惝惘悸惆惚惇惮窕谌谏扈皲谑裆袷裉谒谔谕谖谗谙谛谝逯郿隈粜隍隗婧婊婕娼婢婵 115 | 胬袈翌恿欸绫骐绮绯绱骒绲骓绶绺绻绾骖缁耠琫琵琶琪瑛琦琥琨靓琰琮琯琬琛琚辇鼋揳堞搽揸揠堙趄揖颉塄揿耋 116 | 揄蛩蛰塆摒揆掾聒葑葚靰靸葳葺葸萼葆葩葶蒌萱戟葭楮棼椟棹椤棰赍椋椁椪棣椐鹁覃酤酢酡鹂厥殚殛雯雱辊辋椠 117 | 辍辎斐睄睑睇睃戢喋嗒喃喱喹晷喈跖跗跞跚跎跏跆蛱蛲蛭蛳蛐蛔蛞蛴蛟蛘喁喟啾嗖喑嗟喽嗞喀喔喙嵘嵖崴遄詈嵎 118 | 崽嵬嵛嵯嵝嵫幄嵋赕铻铼铿锃锂锆锇锉锏锑锒锔锕掣矬氰毳毽犊犄犋鹄犍嵇黍稃稂筚筵筌傣傈舄牍傥傧遑傩遁徨 119 | 媭畲弑颌翕釉鹆舜貂腈腌腓腆腴腑腚腱鱿鲀鲂颍猢猹猥飓觞觚猱颎飧馇馊亵脔裒痣痨痦痞痤痫痧赓竦瓿啻颏鹇阑 120 | 阒阕粞遒孳焯焜焙焱鹈湛渫湮湎湜渭湍湫溲湟溆湲湔湉渥湄滁愠惺愦惴愀愎愔喾寐谟扉裢裎裥祾祺谠幂谡谥谧遐 121 | 孱弼巽骘媪媛婷巯翚皴婺骛缂缃缄彘缇缈缌缑缒缗飨耢瑚瑁瑜瑗瑄瑕遨骜韫髡塬鄢趔趑摅摁蜇搋搪搐搛搠摈彀毂 122 | 搦搡蓁戡蓍鄞靳蓐蓦鹋蒽蓓蓖蓊蒯蓟蓑蒿蒺蓠蒟蒡蒹蒴蒗蓥颐楔楠楂楝楫楸椴槌楯皙榈槎榉楦楣楹椽裘剽甄酮酰 123 | 酯酩蜃碛碓硼碉碚碇碜鹌辏龃龅訾粲虞睚嗪韪嗷嗉睨睢雎睥嘟嗑嗫嗬嗔嗝戥嗄煦暄遢暌跬跶跸跐跣跹跻蛸蜊蜍蜉 124 | 蜣畹蛹嗣嗯嗥嗲嗳嗌嗍嗨嗐嗤嗵罨嵊嵩嵴骰锗锛锜锝锞锟锢锨锩锭锱雉氲犏歃稞稗稔筠筢筮筲筱牒煲敫徭愆艄觎 125 | 毹貊貅貉颔腠腩腼腭腧塍媵詹鲅鲆鲇鲈稣鲋鲐肄鹐飕觥遛馐鹑亶瘃痱痼痿瘐瘁瘆麂裔歆旒雍阖阗阙羧豢粳猷煳煜 126 | 煨煅煊煸煺滟溱溘漭滢溥溧溽裟溻溷滗滫溴滏滃滦溏滂滓溟滪愫慑慊鲎骞窦窠窣裱褚裨裾裰禊谩谪媾嫫媲嫒嫔媸 127 | 缙缜缛辔骝缟缡缢缣骟耥璈瑶瑭獒觏慝嫠韬叆髦摽墁撂摞撄翥踅摭墉墒榖綦蔫蔷靺靼鞅靿甍蔸蔟蔺戬蕖蔻蓿斡鹕 128 | 蓼榛榧榻榫榭槔榱槁槟槠榷僰酽酶酹厮碡碴碣碲磋臧豨殡霆霁辕蜚裴翡龇龈睿䁖睽嘞嘈嘌嘁嘎暧暝踌踉蜞蜥蜮蝈 129 | 蜴蜱蜩蜷蜿螂蜢嘘嘡鹗嘣嘤嘚嗾嘧罴罱幔嶂幛赙罂骷骶鹘锲锴锶锷锸锵镁镂犒箐箦箧箍箸箬箅箪箔箜箢箓毓僖儆 130 | 僳僭劁僮魃魆睾艋鄱膈膑鲑鲔鲚鲛鲟獐觫雒夤馑銮塾麽瘌瘊瘘瘙廖韶旖膂阚鄯鲞粿粼粽糁槊鹚熘熥潢漕滹漯漶潋 131 | 潴漪漉漳漩澉潍慵搴窨寤綮谮褡褙褓褛褊谯谰谲暨屣鹛嫣嫱嫖嫦嫚嫘嫡鼐翟瞀鹜骠缥缦缧缨骢缪缫耦耧瑾璜璀璎 132 | 璁璋璇奭髯髫撷撅赭撸鋆撙撺墀聩觐鞑蕙鞒蕈蕨蕤蕞蕺瞢蕃蕲赜槿樯槭樗樘樊槲醌醅靥魇餍磔磙霈辘龉龊觑瞌瞋 133 | 瞑嘭噎噶颙暹噘踔踝踟踒踬踮踯踺踞蝽蝾蝻蝰蝮螋蝓蝣蝼噗嘬颚噍噢噙噜噌噔颛幞幡嶙嶝骺骼骸镊镉镌镍镏镒镓 134 | 镔稷箴篑篁篌篆牖儋徵磐虢鹞膘滕鲠鲡鲢鲣鲥鲧鲩獗獠觯馓馔麾廛瘛瘼瘢瘠齑羯羰遴糌糍糅熜熵熠澍澌潸潦潲鋈 135 | 潟潼潺憬憧寮窳谳褴褟褫谵熨屦嬉勰戮蝥缬缮缯骣畿耩耨耪璞璟靛璠璘聱螯髻髭髹擀熹甏擞縠磬颞蕻鞘颟薤薨檠 136 | 薏薮薜薅樾橛橇樵檎橹樽樨橼墼橐翮醛醐醍醚磲赝飙殪霖霏霓錾辚臻遽氅瞟瞠瞰嚄嚆噤暾蹀踹踵踽蹉蹁螨蟒螈螅 137 | 螭螠螟噱噬噫噻噼罹圜䦃镖镗镘镚镛镝镞镠氇氆憩穑篝篥篦篪篙盥劓翱魉魈徼歙膳膦膙鲮鲱鲲鲳鲴鲵鲷鲻獴獭獬 138 | 邂鹧廨赟瘰廪瘿瘵瘴癃瘳斓麇麈嬴壅羲糗瞥甑燎燠燔燧濑濉潞澧澹澥澶濂褰寰窸褶禧嬖犟隰嬗颡缱缲缳璨璩璐璪 139 | 螫擤壕觳罄擢薹鞡鞬薷薰藓藁檄檩懋醢翳礅磴鹩龋龌豳壑黻嚏嚅蹑蹒蹊蟥螬螵疃螳蟑嚓羁罽罾嶷黜黝髁髀镡镢镣 140 | 镦镧镩镪镫罅黏簌篾篼簖簋鼢黛儡鹪鼾皤魍龠繇貘邈貔臌膻臆臃鲼鲽鳀鳃鳅鳇鳊螽燮鹫襄糜縻膺癍麋懑濡濮濞濠 141 | 濯蹇謇邃襁檗擘孺隳嬷蟊鹬鍪鏊鳌鬈鬃瞽鞯鞨鞫鞧鞣藜藠藩醪蹙礓燹餮瞿曛颢曜躇蹚鹭蟛蟪蟠蟮鹮黠黟髅髂镬镭 142 | 镯馥簟簪鼬雠艟鳎鳏鳐癞癔癜癖糨蹩鎏懵彝邋鬏攉攒鞲鞴藿蘧蘅麓醮醯酃霪霭霨黼嚯蹰蹶蹽蹼蹴蹾蹿蠖蠓蟾蠊黢 143 | 髋髌镲籀籁齁魑艨鳓鳔鳕鳗鳙麒鏖羸㸆瀚瀣瀛襦谶襞骥缵瓒攘蘩蘖醴霰酆矍曦躅鼍巉黩黥黪镳镴黧纂璺鼯臜鳜鳝 144 | 鳟獾孀骧瓘鼙醺礴颦曩鳢癫麝夔爝灏禳鐾羼蠡耱懿蘸鹳霾氍饕躐髑镵穰饔鬻鬟趱攫攥颧躜鼹癯麟蠲蠹躞衢鑫灞襻 145 | 纛鬣攮囔馕戆爨齉 146 | 147 | 壹 贰 叁 肆 伍 陆 柒 捌 玖 零 拾 佰 仟 万 亿 圆 148 | 149 | ''' 150 | 151 | data = trim_string(data) 152 | 153 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/chi_tra.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | # I take characters from http://hanyu.iciba.com/zt/3500.html 6 | data = u''' 7 | 一 乙 8 | 9 | 二 十 丁 廠 七 蔔 人 入 八 九 幾 兒 了 力 乃 刀 又 10 | 11 | 三 於 幹 虧 士 工 土 才 寸 下 大 丈 與 萬 上 小 口 巾 山 千 乞 川 億 個 勺 久 凡 及 夕 丸 麽 廣 12 | 亡 門 義 之 屍 弓 己 已 子 衛 也 女 飛 刃 習 叉 馬 鄉 13 | 14 | 豐 王 井 開 夫 天 無 元 專 雲 紮 藝 木 五 支 廳 不 太 犬 區 歷 尤 友 匹 車 巨 牙 屯 比 互 切 瓦 15 | 止 少 日 中 岡 貝 內 水 見 午 牛 手 毛 氣 升 長 仁 什 片 仆 化 仇 幣 仍 僅 斤 爪 反 介 父 從 今 16 | 兇 分 乏 公 倉 月 氏 勿 欠 風 丹 勻 烏 鳳 勾 文 六 方 火 為 鬥 憶 訂 計 戶 認 心 尺 引 醜 巴 孔 17 | 隊 辦 以 允 予 勸 雙 書 幻 18 | 19 | 玉 刊 示 末 未 擊 打 巧 正 撲 扒 功 扔 去 甘 世 古 節 本 術 可 丙 左 厲 右 石 布 龍 平 滅 軋 東 20 | 卡 北 占 業 舊 帥 歸 且 旦 目 葉 甲 申 叮 電 號 田 由 史 只 央 兄 叼 叫 另 叨 嘆 四 生 失 禾 丘 21 | 付 仗 代 仙 們 儀 白 仔 他 斥 瓜 乎 叢 令 用 甩 印 樂 句 匆 冊 犯 外 處 冬 鳥 務 包 饑 主 市 立 22 | 閃 蘭 半 汁 匯 頭 漢 寧 穴 它 討 寫 讓 禮 訓 必 議 訊 記 永 司 尼 民 出 遼 奶 奴 加 召 皮 邊 發 23 | 孕 聖 對 臺 矛 糾 母 幼 絲 24 | 25 | 式 刑 動 扛 寺 吉 扣 考 托 老 執 鞏 圾 擴 掃 地 揚 場 耳 共 芒 亞 芝 朽 樸 機 權 過 臣 再 協 西 26 | 壓 厭 在 有 百 存 而 頁 匠 誇 奪 灰 達 列 死 成 夾 軌 邪 劃 邁 畢 至 此 貞 師 塵 尖 劣 光 當 早 27 | 吐 嚇 蟲 曲 團 同 吊 吃 因 吸 嗎 嶼 帆 歲 回 豈 剛 則 肉 網 年 朱 先 丟 舌 竹 遷 喬 偉 傳 乒 乓 28 | 休 伍 伏 優 伐 延 件 任 傷 價 份 華 仰 仿 夥 偽 自 血 向 似 後 行 舟 全 會 殺 合 兆 企 眾 爺 傘 29 | 創 肌 朵 雜 危 旬 旨 負 各 名 多 爭 色 壯 沖 冰 莊 慶 亦 劉 齊 交 次 衣 產 決 充 妄 閉 問 闖 羊 30 | 並 關 米 燈 州 汗 汙 江 池 湯 忙 興 宇 守 宅 字 安 講 軍 許 論 農 諷 設 訪 尋 那 迅 盡 導 異 孫 31 | 陣 陽 收 階 陰 防 奸 如 婦 好 她 媽 戲 羽 觀 歡 買 紅 纖 級 約 紀 馳 巡 32 | 33 | 壽 弄 麥 形 進 戒 吞 遠 違 運 扶 撫 壇 技 壞 擾 拒 找 批 扯 址 走 抄 壩 貢 攻 赤 折 抓 扮 搶 孝 34 | 均 拋 投 墳 抗 坑 坊 抖 護 殼 誌 扭 塊 聲 把 報 卻 劫 芽 花 芹 芬 蒼 芳 嚴 蘆 勞 克 蘇 桿 杠 杜 35 | 材 村 杏 極 李 楊 求 更 束 豆 兩 麗 醫 辰 勵 否 還 殲 來 連 步 堅 旱 盯 呈 時 吳 助 縣 裏 呆 園 36 | 曠 圍 呀 噸 足 郵 男 困 吵 串 員 聽 吩 吹 嗚 吧 吼 別 崗 帳 財 針 釘 告 我 亂 利 禿 秀 私 每 兵 37 | 估 體 何 但 伸 作 伯 伶 傭 低 你 住 位 伴 身 皂 佛 近 徹 役 返 余 希 坐 谷 妥 含 鄰 岔 肝 肚 腸 38 | 龜 免 狂 猶 角 刪 條 卵 島 迎 飯 飲 系 言 凍 狀 畝 況 床 庫 療 應 冷 這 序 辛 棄 冶 忘 閑 間 悶 39 | 判 竈 燦 弟 汪 沙 汽 沃 泛 溝 沒 沈 沈 懷 憂 快 完 宋 宏 牢 究 窮 災 良 證 啟 評 補 初 社 識 訴 40 | 診 詞 譯 君 靈 即 層 尿 尾 遲 局 改 張 忌 際 陸 阿 陳 阻 附 妙 妖 妨 努 忍 勁 雞 驅 純 紗 納 綱 41 | 駁 縱 紛 紙 紋 紡 驢 紐 42 | 43 | 奉 玩 環 武 青 責 現 表 規 抹 攏 拔 揀 擔 坦 押 抽 拐 拖 拍 者 頂 拆 擁 抵 拘 勢 抱 垃 拉 攔 拌 44 | 幸 招 坡 披 撥 擇 擡 其 取 苦 若 茂 蘋 苗 英 範 直 茄 莖 茅 林 枝 杯 櫃 析 板 松 槍 構 傑 述 枕 45 | 喪 或 畫 臥 事 刺 棗 雨 賣 礦 碼 廁 奔 奇 奮 態 歐 壟 妻 轟 頃 轉 斬 輪 軟 到 非 叔 肯 齒 些 虎 46 | 虜 腎 賢 尚 旺 具 果 味 昆 國 昌 暢 明 易 昂 典 固 忠 咐 呼 鳴 詠 呢 岸 巖 帖 羅 幟 嶺 凱 敗 販 47 | 購 圖 釣 制 知 垂 牧 物 乖 刮 稈 和 季 委 佳 侍 供 使 例 版 侄 偵 側 憑 僑 佩 貨 依 的 迫 質 欣 48 | 征 往 爬 彼 徑 所 舍 金 命 斧 爸 采 受 乳 貪 念 貧 膚 肺 肢 腫 脹 朋 股 肥 服 脅 周 昏 魚 兔 狐 49 | 忽 狗 備 飾 飽 飼 變 京 享 店 夜 廟 府 底 劑 郊 廢 凈 盲 放 刻 育 閘 鬧 鄭 券 卷 單 炒 炊 炕 炎 50 | 爐 沫 淺 法 泄 河 沾 淚 油 泊 沿 泡 註 瀉 泳 泥 沸 波 潑 澤 治 怖 性 怕 憐 怪 學 寶 宗 定 宜 審 51 | 宙 官 空 簾 實 試 郎 詩 肩 房 誠 襯 衫 視 話 誕 詢 該 詳 建 肅 錄 隸 居 屆 刷 屈 弦 承 孟 孤 陜 52 | 降 限 妹 姑 姐 姓 始 駕 參 艱 線 練 組 細 駛 織 終 駐 駝 紹 經 貫 53 | 54 | 奏 春 幫 珍 玻 毒 型 掛 封 持 項 垮 挎 城 撓 政 赴 趙 擋 挺 括 拴 拾 挑 指 墊 掙 擠 拼 挖 按 揮 55 | 挪 某 甚 革 薦 巷 帶 草 繭 茶 荒 茫 蕩 榮 故 胡 南 藥 標 枯 柄 棟 相 查 柏 柳 柱 柿 欄 樹 要 鹹 56 | 威 歪 研 磚 厘 厚 砌 砍 面 耐 耍 牽 殘 殃 輕 鴉 皆 背 戰 點 臨 覽 豎 省 削 嘗 是 盼 眨 哄 顯 啞 57 | 冒 映 星 昨 畏 趴 胃 貴 界 虹 蝦 蟻 思 螞 雖 品 咽 罵 嘩 咱 響 哈 咬 咳 哪 炭 峽 罰 賤 貼 骨 鈔 58 | 鐘 鋼 鑰 鉤 卸 缸 拜 看 矩 怎 牲 選 適 秒 香 種 秋 科 重 復 竿 段 便 倆 貸 順 修 保 促 侮 儉 俗 59 | 俘 信 皇 泉 鬼 侵 追 俊 盾 待 律 很 須 敘 劍 逃 食 盆 膽 勝 胞 胖 脈 勉 狹 獅 獨 狡 獄 狠 貿 怨 60 | 急 饒 蝕 餃 餅 彎 將 獎 哀 亭 亮 度 跡 庭 瘡 瘋 疫 疤 姿 親 音 帝 施 聞 閥 閣 差 養 美 姜 叛 送 61 | 類 迷 前 首 逆 總 煉 炸 炮 爛 剃 潔 洪 灑 澆 濁 洞 測 洗 活 派 洽 染 濟 洋 洲 渾 濃 津 恒 恢 恰 62 | 惱 恨 舉 覺 宣 室 宮 憲 突 穿 竊 客 冠 語 扁 襖 祖 神 祝 誤 誘 說 誦 墾 退 既 屋 晝 費 陡 眉 孩 63 | 除 險 院 娃 姥 姨 姻 嬌 怒 架 賀 盈 勇 怠 柔 壘 綁 絨 結 繞 驕 繪 給 絡 駱 絕 絞 統 64 | 65 | 耕 耗 艷 泰 珠 班 素 蠶 頑 盞 匪 撈 栽 捕 振 載 趕 起 鹽 捎 捏 埋 捉 捆 捐 損 都 哲 逝 撿 換 挽 66 | 熱 恐 壺 挨 恥 耽 恭 蓮 莫 荷 獲 晉 惡 真 框 桂 檔 桐 株 橋 桃 格 校 核 樣 根 索 哥 速 逗 栗 配 67 | 翅 辱 唇 夏 礎 破 原 套 逐 烈 殊 顧 轎 較 頓 斃 致 柴 桌 慮 監 緊 黨 曬 眠 曉 鴨 晃 晌 暈 蚊 哨 68 | 哭 恩 喚 啊 唉 罷 峰 圓 賊 賄 錢 鉗 鉆 鐵 鈴 鉛 缺 氧 特 犧 造 乘 敵 秤 租 積 秧 秩 稱 秘 透 筆 69 | 笑 筍 債 借 值 倚 傾 倒 倘 俱 倡 候 俯 倍 倦 健 臭 射 躬 息 徒 徐 艦 艙 般 航 途 拿 爹 愛 頌 翁 70 | 脆 脂 胸 胳 臟 膠 腦 貍 狼 逢 留 皺 餓 戀 槳 漿 衰 高 席 準 座 脊 癥 病 疾 疼 疲 效 離 唐 資 涼 71 | 站 剖 競 部 旁 旅 畜 閱 羞 瓶 拳 粉 料 益 兼 烤 烘 煩 燒 燭 煙 遞 濤 浙 澇 酒 涉 消 浩 海 塗 浴 72 | 浮 流 潤 浪 浸 漲 燙 湧 悟 悄 悔 悅 害 寬 家 宵 宴 賓 窄 容 宰 案 請 朗 諸 讀 扇 襪 袖 袍 被 祥 73 | 課 誰 調 冤 諒 談 誼 剝 懇 展 劇 屑 弱 陵 陶 陷 陪 娛 娘 通 能 難 預 桑 絹 繡 驗 繼 74 | 75 | 球 理 捧 堵 描 域 掩 捷 排 掉 堆 推 掀 授 教 掏 掠 培 接 控 探 據 掘 職 基 著 勒 黃 萌 蘿 菌 菜 76 | 萄 菊 萍 菠 營 械 夢 梢 梅 檢 梳 梯 桶 救 副 票 戚 爽 聾 襲 盛 雪 輔 輛 虛 雀 堂 常 匙 晨 睜 瞇 77 | 眼 懸 野 啦 晚 啄 距 躍 略 蛇 累 唱 患 唯 崖 嶄 崇 圈 銅 鏟 銀 甜 梨 犁 移 笨 籠 笛 符 第 敏 做 78 | 袋 悠 償 偶 偷 您 售 停 偏 假 得 銜 盤 船 斜 盒 鴿 悉 欲 彩 領 腳 脖 臉 脫 象 夠 猜 豬 獵 貓 猛 79 | 餡 館 湊 減 毫 麻 癢 痕 廊 康 庸 鹿 盜 章 竟 商 族 旋 望 率 著 蓋 粘 粗 粒 斷 剪 獸 清 添 淋 淹 80 | 渠 漸 混 漁 淘 液 淡 深 婆 梁 滲 情 惜 慚 悼 懼 惕 驚 慘 慣 寇 寄 宿 窯 密 謀 謊 禍 謎 逮 敢 屠 81 | 彈 隨 蛋 隆 隱 婚 嬸 頸 績 緒 續 騎 繩 維 綿 綢 綠 82 | 83 | 琴 斑 替 款 堪 搭 塔 越 趁 趨 超 提 堤 博 揭 喜 插 揪 搜 煮 援 裁 擱 摟 攪 握 揉 斯 期 欺 聯 散 84 | 惹 葬 葛 董 葡 敬 蔥 落 朝 辜 葵 棒 棋 植 森 椅 椒 棵 棍 棉 棚 棕 惠 惑 逼 廚 廈 硬 確 雁 殖 裂 85 | 雄 暫 雅 輩 悲 紫 輝 敞 賞 掌 晴 暑 最 量 噴 晶 喇 遇 喊 景 踐 跌 跑 遺 蛙 蛛 蜓 喝 餵 喘 喉 幅 86 | 帽 賭 賠 黑 鑄 鋪 鏈 銷 鎖 鋤 鍋 銹 鋒 銳 短 智 毯 鵝 剩 稍 程 稀 稅 筐 等 築 策 篩 筒 答 筋 箏 87 | 傲 傅 牌 堡 集 焦 傍 儲 奧 街 懲 禦 循 艇 舒 番 釋 禽 臘 脾 腔 魯 猾 猴 然 饞 裝 蠻 就 痛 童 闊 88 | 善 羨 普 糞 尊 道 曾 焰 港 湖 渣 濕 溫 渴 滑 灣 渡 遊 滋 溉 憤 慌 惰 愧 愉 慨 割 寒 富 竄 窩 窗 89 | 遍 裕 褲 裙 謝 謠 謙 屬 屢 強 粥 疏 隔 隙 絮 嫂 登 緞 緩 編 騙 緣 90 | 91 | 瑞 魂 肆 攝 摸 填 搏 塌 鼓 擺 攜 搬 搖 搞 塘 攤 蒜 勤 鵲 藍 墓 幕 蓬 蓄 蒙 蒸 獻 禁 楚 想 槐 榆 92 | 樓 概 賴 酬 感 礙 碑 碎 碰 碗 碌 雷 零 霧 雹 輸 督 齡 鑒 睛 睡 睬 鄙 愚 暖 盟 歇 暗 照 跨 跳 跪 93 | 路 跟 遣 蛾 蜂 嗓 置 罪 罩 錯 錫 鑼 錘 錦 鍵 鋸 矮 辭 稠 愁 籌 簽 簡 毀 舅 鼠 催 傻 像 躲 微 愈 94 | 遙 腰 腥 腹 騰 腿 觸 解 醬 痰 廉 新 韻 意 糧 數 煎 塑 慈 煤 煌 滿 漠 源 濾 濫 滔 溪 溜 滾 濱 粱 95 | 灘 慎 譽 塞 謹 福 群 殿 辟 障 嫌 嫁 疊 縫 纏 96 | 97 | 靜 碧 璃 墻 撇 嘉 摧 截 誓 境 摘 摔 聚 蔽 慕 暮 蔑 模 榴 榜 榨 歌 遭 酷 釀 酸 磁 願 需 弊 裳 顆 98 | 嗽 蜻 蠟 蠅 蜘 賺 鍬 鍛 舞 穩 算 籮 管 僚 鼻 魄 貌 膜 膊 膀 鮮 疑 饅 裹 敲 豪 膏 遮 腐 瘦 辣 竭 99 | 端 旗 精 歉 熄 熔 漆 漂 漫 滴 演 漏 慢 寨 賽 察 蜜 譜 嫩 翠 熊 凳 騾 縮 100 | 101 | 慧 撕 撒 趣 趟 撐 播 撞 撤 增 聰 鞋 蕉 蔬 橫 槽 櫻 橡 飄 醋 醉 震 黴 瞞 題 暴 瞎 影 踢 踏 踩 蹤 102 | 蝶 蝴 囑 墨 鎮 靠 稻 黎 稿 稼 箱 箭 篇 僵 躺 僻 德 艘 膝 膛 熟 摩 顏 毅 糊 遵 潛 潮 懂 額 慰 劈 103 | 104 | 操 燕 薯 薪 薄 顛 橘 整 融 醒 餐 嘴 蹄 器 贈 默 鏡 贊 籃 邀 衡 膨 雕 磨 凝 辨 辯 糖 糕 燃 澡 激 105 | 懶 壁 避 繳 106 | 107 | 戴 擦 鞠 藏 霜 霞 瞧 蹈 螺 穗 繁 辮 贏 糟 糠 燥 臂 翼 驟 108 | 109 | 鞭 覆 蹦 鐮 翻 鷹 110 | 111 | 警 攀 蹲 顫 瓣 爆 疆 112 | 113 | 壤 耀 躁 嚼 嚷 籍 魔 灌 114 | 115 | 蠢 霸 露 116 | 117 | 囊 118 | 119 | 罐 120 | ''' 121 | 122 | data = trim_string(data) 123 | 124 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/digits.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | data = u''' 6 | 0123456789 7 | ''' 8 | 9 | data = trim_string(data) 10 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/eng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | data = u''' 6 | abcdefghijklmnopqrstuvwxyz 7 | ABCDEFGHIJKLMNOPQRSTUVWXYZ 8 | ''' 9 | 10 | data = trim_string(data) 11 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/id_num.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | data = u''' 6 | 0123456789X 7 | ''' 8 | 9 | data = trim_string(data) -------------------------------------------------------------------------------- /python/deep_ocr/langs/lower_eng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | data = u''' 6 | abcdefghijklmnopqrstuvwxyz 7 | ''' 8 | 9 | data = trim_string(data) 10 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | data = u''' 6 | 一二三四 7 | ''' 8 | 9 | data = trim_string(data) 10 | -------------------------------------------------------------------------------- /python/deep_ocr/langs/upper_eng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from deep_ocr.utils import trim_string 4 | 5 | data = u''' 6 | abcdefghijklmnopqrstuvwxyz 7 | ABCDEFGHIJKLMNOPQRSTUVWXYZ 8 | ''' 9 | 10 | data = trim_string(data) 11 | -------------------------------------------------------------------------------- /python/deep_ocr/reco_text_line.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import cv2 5 | from deep_ocr.cv2_img_proc import PreprocessCropZeros 6 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatioFillBG 7 | from deep_ocr.utils import extract_peek_ranges_from_array 8 | from deep_ocr.utils import merge_peek_ranges_mini_non_digits 9 | 10 | 11 | class RectImageClassifier(object): 12 | def __init__(self, caffe_cls, bin_image, char_set, 13 | caffe_cls_width=64, caffe_cls_height=64): 14 | self.caffe_cls = caffe_cls 15 | self.bin_image = bin_image 16 | self.cache_res = {} 17 | self.char_set = char_set 18 | self.caffe_cls_width = caffe_cls_width 19 | self.caffe_cls_height = caffe_cls_height 20 | 21 | def _do(self, rects, boundary): 22 | rects_to_reco = [] 23 | for rect in rects: 24 | key = (rect, boundary) 25 | if key not in self.cache_res: 26 | rects_to_reco.append(rect) 27 | image = self.bin_image 28 | char_imgs = [] 29 | crop_zeros = PreprocessCropZeros() 30 | resize_keep_ratio = PreprocessResizeKeepRatioFillBG( 31 | self.caffe_cls_width, self.caffe_cls_height, 32 | fill_bg=False, margin=4) 33 | for i, rect in enumerate(rects_to_reco): 34 | x, y, w, h = rect 35 | char_img = image[y:y+h+1, x:x+w+1] 36 | char_img = crop_zeros.do(char_img) 37 | char_img = resize_keep_ratio.do(char_img) 38 | char_imgs.append(char_img) 39 | np_char_imgs = np.asarray(char_imgs) 40 | output_tag_to_max_proba = self.caffe_cls.predict_cv2_imgs(np_char_imgs) 41 | for i, item in enumerate(output_tag_to_max_proba): 42 | cur_rect = rects_to_reco[i] 43 | key = (cur_rect, boundary) 44 | if len(self.char_set["set"]) > 0: 45 | for char_p in item: 46 | if char_p[0] in self.char_set["set"]: 47 | self.cache_res[key] = char_p 48 | break 49 | else: 50 | self.cache_res[key] = item[0] 51 | 52 | def do(self, rects, boundary): 53 | self._do(rects, boundary) 54 | ocr_res = "" 55 | for rect in rects: 56 | key = (rect, boundary) 57 | ocr_res += self.cache_res[key][0] 58 | return ocr_res 59 | 60 | def do_images_maxproba(self, rects, boundaries, bin_images): 61 | size = len(boundaries) 62 | ## generate cache 63 | for i in range(size): 64 | boundary = boundaries[i] 65 | bin_image = bin_images[i] 66 | self.bin_image = bin_image 67 | self._do(rects, boundary) 68 | 69 | mat_proba = [] 70 | for rect in rects: 71 | row_probabilities = [] 72 | for i in range(size): 73 | boundary = boundaries[i] 74 | key = (rect, boundary) 75 | row_probabilities.append(self.cache_res[key]) 76 | mat_proba.append(row_probabilities) 77 | 78 | ocr_res = "" 79 | n = len(mat_proba) 80 | for i in range(n): 81 | mat_proba[i] = sorted(mat_proba[i], key=lambda x: -x[1]) 82 | ocr_res += mat_proba[i][0][0] 83 | return ocr_res 84 | 85 | 86 | class RecoTextLine(object): 87 | def __init__(self, rect_img_clf, 88 | char_set=None, 89 | debug_path=None): 90 | self.char_set = char_set 91 | self.debug_path = debug_path 92 | self.rect_img_clf = rect_img_clf 93 | 94 | def convert_peek_ranges_into_rects(self, peek_ranges, line_rect): 95 | base_x, base_y, base_w, base_h = line_rect 96 | rects = [] 97 | for peek_range in peek_ranges: 98 | x = base_x + peek_range[0] 99 | y = base_y 100 | w = peek_range[1] - peek_range[0] 101 | h = base_h 102 | rect = (x, y, w, h) 103 | rects.append(rect) 104 | return rects 105 | 106 | def do(self, boundary2binimgs, line_rect, caffe_cls): 107 | boundaries, bin_images = [], [] 108 | for boundary, bin_image in boundary2binimgs: 109 | boundaries.append(boundary) 110 | bin_images.append(bin_image) 111 | 112 | bin_image = bin_images[-1] 113 | self.rect_img_clf.caffe_cls = caffe_cls 114 | self.rect_img_clf.bin_image = None 115 | x, y, w, h = line_rect 116 | page_w = bin_image.shape[1] 117 | img_line = bin_image[y: y + h, x: x + w] 118 | char_w = page_w * self.char_set["width"] 119 | ocr_res = None 120 | ## first segmentation 121 | vertical_sum = np.sum(img_line, axis=0) 122 | peek_ranges = extract_peek_ranges_from_array(vertical_sum, minimun_val=10, minimun_range=2) 123 | 124 | rects = self.convert_peek_ranges_into_rects(peek_ranges, line_rect) 125 | self.rect_img_clf.char_set = self.char_set 126 | ocr_res = self.rect_img_clf.do_images_maxproba(rects, boundaries, bin_images) 127 | if ocr_res is not None: 128 | print("before merge..") 129 | #print(ocr_res.encode("utf-8")) 130 | print(ocr_res) 131 | peek_ranges = merge_peek_ranges_mini_non_digits(peek_ranges, char_w, ocr_res) 132 | rects = self.convert_peek_ranges_into_rects(peek_ranges, line_rect) 133 | ocr_res = self.rect_img_clf.do_images_maxproba(rects, boundaries, bin_images) 134 | print("after merge...") 135 | #print(ocr_res.encode("utf-8")) 136 | print(ocr_res) 137 | 138 | # ## end end segmenetation 139 | # if self.debug_path is not None: 140 | # path_debug_image_line = self.debug_path+"_line.jpg" 141 | # debug_img_line = np.copy(bin_image) 142 | # for rect in rects: 143 | # x = rect[0] 144 | # y = rect[1] 145 | # w = rect[2] 146 | # h = rect[3] 147 | # cv2.rectangle(debug_img_line, 148 | # (x, y), 149 | # (x + w, y + h), 150 | # (255,255,255)) 151 | # cv2.imwrite(path_debug_image_line, debug_img_line) 152 | return ocr_res -------------------------------------------------------------------------------- /python/deep_ocr/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | 6 | def trim_string(string_data): 7 | string_data = string_data.replace(" ", "") 8 | string_data = string_data.replace(" ", "") 9 | string_data = string_data.replace("\n", "") 10 | ### for string 11 | string_data = "".join(list(set(string_data))) 12 | return string_data 13 | 14 | 15 | def merge_peek_ranges(peek_ranges, char_w): 16 | new_peek_ranges = [] 17 | l = len(peek_ranges) 18 | cur_range = peek_ranges[0] 19 | for i in range(1, l): 20 | if char_w > cur_range[1] - cur_range[0]: 21 | cur_range = (cur_range[0], peek_ranges[i][1]) 22 | else: 23 | new_peek_ranges.append(cur_range) 24 | cur_range = peek_ranges[i] 25 | new_peek_ranges.append(cur_range) 26 | return new_peek_ranges 27 | 28 | 29 | def merge_peek_ranges_mini_non_digits(peek_ranges, char_w, ocr_res): 30 | digits = u"0123456789" 31 | i = 0 32 | n = len(peek_ranges) 33 | new_peek_ranges = [] 34 | while i < n: 35 | peek_range = peek_ranges[i] 36 | x = peek_range[0] 37 | w = peek_range[1] - x 38 | j = 1 39 | while w < char_w and (i + j) < n and \ 40 | (ocr_res[i+j-1] not in digits) and \ 41 | (ocr_res[i+j] not in digits) : 42 | w = peek_ranges[i+j][1] - x 43 | j += 1 44 | new_peek_ranges.append((x, x+w)) 45 | i += j 46 | return new_peek_ranges 47 | 48 | 49 | def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2): 50 | start_i = None 51 | end_i = None 52 | peek_ranges = [] 53 | for i, val in enumerate(array_vals): 54 | if val > minimun_val and start_i is None: 55 | start_i = i 56 | elif val > minimun_val and i == (len(array_vals) - 1) \ 57 | and start_i is not None: 58 | end_i = i 59 | if end_i - start_i >= minimun_range: 60 | peek_ranges.append((start_i, end_i)) 61 | start_i = None 62 | end_i = None 63 | elif val > minimun_val and start_i is not None: 64 | pass 65 | elif val < minimun_val and start_i is not None: 66 | end_i = i 67 | if end_i - start_i >= minimun_range: 68 | peek_ranges.append((start_i, end_i)) 69 | start_i = None 70 | end_i = None 71 | elif val < minimun_val and start_i is None: 72 | pass 73 | else: 74 | raise ValueError("cannot parse this case...") 75 | return peek_ranges 76 | 77 | def compute_median_w_from_ranges(peek_ranges): 78 | widthes = [] 79 | for peek_range in peek_ranges: 80 | w = peek_range[1] - peek_range[0] + 1 81 | widthes.append(w) 82 | widthes = np.asarray(widthes) 83 | median_w = np.median(widthes) 84 | return median_w 85 | 86 | def median_split_ranges(peek_ranges): 87 | new_peek_ranges = [] 88 | widthes = [] 89 | for peek_range in peek_ranges: 90 | w = peek_range[1] - peek_range[0] + 1 91 | widthes.append(w) 92 | widthes = np.asarray(widthes) 93 | median_w = np.median(widthes) 94 | for i, peek_range in enumerate(peek_ranges): 95 | num_char = int(round(widthes[i]/median_w, 0)) 96 | if num_char > 1: 97 | char_w = float(widthes[i] / num_char) 98 | for i in range(num_char): 99 | start_point = peek_range[0] + int(i * char_w) 100 | end_point = peek_range[0] + int((i + 1) * char_w) 101 | new_peek_ranges.append((start_point, end_point)) 102 | else: 103 | new_peek_ranges.append(peek_range) 104 | return new_peek_ranges 105 | 106 | def merge_chars_into_line_segments(ranges2d): 107 | for i, ranges in enumerate(ranges2d): 108 | m_w = compute_median_w_from_ranges(ranges) 109 | new_ranges = [] 110 | for j, range_pair in enumerate(ranges): 111 | if len(new_ranges) == 0: 112 | new_ranges.append(range_pair) 113 | else: 114 | start_x, end_x = range_pair 115 | pre_start_x, pre_end_x = new_ranges[-1] 116 | if start_x > pre_start_x: 117 | if start_x - pre_end_x < m_w *2: 118 | new_ranges[-1] = (pre_start_x, end_x) 119 | else: 120 | new_ranges.append(range_pair) 121 | ranges2d[i] = new_ranges 122 | return ranges2d 123 | -------------------------------------------------------------------------------- /python/deep_ocr_id_card_reco: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | import argparse 7 | from argparse import RawTextHelpFormatter 8 | import os 9 | import shutil 10 | import cv2 11 | from deep_ocr.caffe_clf import CaffeClsBuilder 12 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 13 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 14 | from deep_ocr.id_cards.segmentation import Segmentation 15 | from deep_ocr.id_cards.char_set import CharSet 16 | from deep_ocr.reco_text_line import RecoTextLine 17 | from deep_ocr.reco_text_line import RectImageClassifier 18 | 19 | 20 | 21 | if __name__ == "__main__": 22 | 23 | description = ''' 24 | # Docker config 25 | CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model 26 | DEEP_OCR_ROOT=/opt/deep_ocr 27 | WORKSPACE=/workspace 28 | 29 | # PC 30 | CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model 31 | DEEP_OCR_ROOT=/root/workspace/deep_ocr 32 | WORKSPACE=/root/data/deep_ocr_workspace 33 | 34 | deep_ocr_id_card_reco --img $DEEP_OCR_ROOT/data/id_card_img.jpg \ 35 | --debug_path /tmp/debug \ 36 | --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \ 37 | --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 38 | 39 | deep_ocr_id_card_reco --img ~/data/id_card_front \ 40 | --debug_path /tmp/debug \ 41 | --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \ 42 | --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 43 | ''' 44 | 45 | parser = argparse.ArgumentParser( 46 | description=description, formatter_class=RawTextHelpFormatter) 47 | parser.add_argument('--img', dest='img', 48 | default=None, required=True, 49 | help='id card image to reco') 50 | parser.add_argument('--debug_path', dest='debug_path', 51 | default=None, required=False, 52 | help='debug path') 53 | parser.add_argument('--cls_sim', dest='cls_sim', 54 | default=None, required=False, 55 | help='cls sim') 56 | parser.add_argument('--cls_ua', dest='cls_ua', 57 | default=None, required=False, 58 | help='cls ua') 59 | 60 | options = parser.parse_args() 61 | path_img = os.path.expanduser(options.img) 62 | debug_path = os.path.expanduser(options.debug_path) 63 | if debug_path is not None: 64 | if os.path.isdir(debug_path): 65 | shutil.rmtree(debug_path) 66 | os.makedirs(debug_path) 67 | 68 | cls_dir_sim = os.path.expanduser(options.cls_sim) 69 | cls_dir_ua = os.path.expanduser(options.cls_ua) 70 | 71 | caffe_cls_builder = CaffeClsBuilder() 72 | cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,) 73 | cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,) 74 | caffe_classifiers = {"sim": cls_sim, "ua": cls_ua} 75 | 76 | seg_norm_width = 600 77 | seg_norm_height = 600 78 | preprocess_resize = PreprocessResizeKeepRatio( 79 | seg_norm_width, seg_norm_height) 80 | id_card_img = cv2.imread(path_img) 81 | id_card_img = preprocess_resize.do(id_card_img) 82 | segmentation = Segmentation(debug_path) 83 | key_to_segmentation = segmentation.do(id_card_img) 84 | 85 | boundaries = [ 86 | ((0, 0, 0), (100, 100, 100)), 87 | ] 88 | boundary2binimgs = [] 89 | for boundary in boundaries: 90 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 91 | id_card_img_mask = preprocess_bg_mask.do(id_card_img) 92 | boundary2binimgs.append((boundary, id_card_img_mask)) 93 | 94 | char_set = CharSet() 95 | char_set_data = char_set.get() 96 | 97 | rect_img_clf = RectImageClassifier( 98 | None, 99 | None, 100 | char_set, 101 | caffe_cls_width=64, 102 | caffe_cls_height=64) 103 | 104 | reco_text_line = RecoTextLine(rect_img_clf) 105 | 106 | key_ocr_res = {} 107 | for key in key_to_segmentation: 108 | key_ocr_res[key] = [] 109 | print("="*64) 110 | print(key) 111 | for i, segment in enumerate(key_to_segmentation[key]): 112 | if debug_path is not None: 113 | line_debug_path = "key_%s_%i" % (key, i) 114 | line_debug_path = os.path.join(debug_path, line_debug_path) 115 | reco_text_line.debug_path = line_debug_path 116 | reco_text_line.char_set = char_set_data[key] 117 | caffe_cls = caffe_classifiers[ 118 | char_set_data[key]["caffe_cls"]] 119 | ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls) 120 | key_ocr_res[key].append(ocr_res) 121 | print("ocr res:") 122 | for key in key_ocr_res: 123 | print("="*60) 124 | print(key) 125 | for res_i in key_ocr_res[key]: 126 | print(res_i.encode("utf-8")) 127 | 128 | if debug_path is not None: 129 | path_debug_image_mask = os.path.join( 130 | debug_path, "reco_debug_01_image_mask.jpg") 131 | cv2.imwrite(path_debug_image_mask, id_card_img_mask) 132 | -------------------------------------------------------------------------------- /python/deep_ocr_id_card_reco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function 4 | 5 | import argparse 6 | from argparse import RawTextHelpFormatter 7 | import os 8 | import shutil 9 | import cv2 10 | from deep_ocr.caffe_clf import CaffeClsBuilder 11 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 12 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 13 | from deep_ocr.id_cards.segmentation import Segmentation 14 | from deep_ocr.id_cards.char_set import CharSet 15 | from deep_ocr.reco_text_line import RecoTextLine 16 | from deep_ocr.reco_text_line import RectImageClassifier 17 | 18 | if __name__ == "__main__": 19 | 20 | path_img = os.path.expanduser("/home/user/Projects/data/test_id_card/hehe4.jpg") 21 | debug_path = os.path.expanduser("/home/user/Projects/data/debug") 22 | if debug_path is not None: 23 | if os.path.isdir(debug_path): 24 | shutil.rmtree(debug_path) 25 | os.makedirs(debug_path) 26 | 27 | cls_dir_sim = os.path.expanduser("/home/user/Projects/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64") 28 | #cls_dir_ua = os.path.expanduser("/home/user/Projects/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64") 29 | cls_dir_ua = os.path.expanduser("/home/user/Projects/data/caffe_dataset_id_num") 30 | 31 | caffe_cls_builder = CaffeClsBuilder() 32 | cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,) 33 | cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,) 34 | caffe_classifiers = {"sim": cls_sim, "ua": cls_ua} 35 | 36 | seg_norm_width = 600 37 | seg_norm_height = 600 38 | preprocess_resize = PreprocessResizeKeepRatio(seg_norm_width, seg_norm_height) 39 | id_card_img = cv2.imread(path_img) 40 | id_card_img = preprocess_resize.do(id_card_img) 41 | segmentation = Segmentation(debug_path) 42 | key_to_segmentation = segmentation.do(id_card_img) 43 | 44 | boundaries = [ 45 | ((0, 0, 0), (100, 100, 100)), 46 | ] 47 | boundary2binimgs = [] 48 | for boundary in boundaries: 49 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 50 | id_card_img_mask = preprocess_bg_mask.do(id_card_img) 51 | boundary2binimgs.append((boundary, id_card_img_mask)) 52 | 53 | char_set = CharSet() 54 | char_set_data = char_set.get() 55 | 56 | rect_img_clf = RectImageClassifier( 57 | None, 58 | None, 59 | char_set, 60 | caffe_cls_width=64, 61 | caffe_cls_height=64) 62 | 63 | reco_text_line = RecoTextLine(rect_img_clf) 64 | 65 | key_ocr_res = {} 66 | for key in key_to_segmentation: 67 | key_ocr_res[key] = [] 68 | # ============== divider 69 | print("="*64) 70 | print(key) 71 | for i, segment in enumerate(key_to_segmentation[key]): 72 | if debug_path is not None: 73 | line_debug_path = "key_%s_%i" % (key, i) 74 | line_debug_path = os.path.join(debug_path, line_debug_path) 75 | reco_text_line.debug_path = line_debug_path 76 | reco_text_line.char_set = char_set_data[key] 77 | caffe_cls = caffe_classifiers[char_set_data[key]["caffe_cls"]] 78 | ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls) 79 | key_ocr_res[key].append(ocr_res) 80 | print("ocr res:") 81 | for key in key_ocr_res: 82 | print("="*60) 83 | print(key) 84 | for res_i in key_ocr_res[key]: 85 | #print(type(res_i)) 86 | print(res_i) 87 | #print(res_i.encode("utf-8")) 88 | 89 | 90 | if debug_path is not None: 91 | path_debug_image_mask = os.path.join(debug_path, "reco_debug_01_image_mask.jpg") 92 | cv2.imwrite(path_debug_image_mask, id_card_img_mask) 93 | -------------------------------------------------------------------------------- /python/deep_ocr_make_caffe_dataset: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | 7 | import argparse 8 | from argparse import RawTextHelpFormatter 9 | import fnmatch 10 | import os 11 | import cv2 12 | import json 13 | import random 14 | import numpy as np 15 | import shutil 16 | from deep_ocr.lang_aux import LangCharsGenerate 17 | from deep_ocr.lang_aux import FontCheck 18 | from deep_ocr.lang_aux import Font2Image 19 | 20 | 21 | 22 | if __name__ == "__main__": 23 | 24 | description = ''' 25 | deep_ocr_make_caffe_dataset --out_caffe_dir /root/data/caffe_dataset \ 26 | --font_dir /root/workspace/deep_ocr_fonts/chinese_fonts \ 27 | --width 30 --height 30 --margin 4 --langs lower_eng 28 | ''' 29 | 30 | parser = argparse.ArgumentParser( 31 | description=description, formatter_class=RawTextHelpFormatter) 32 | parser.add_argument('--out_caffe_dir', dest='out_caffe_dir', 33 | default=None, required=True, 34 | help='write a caffe dir') 35 | parser.add_argument('--font_dir', dest='font_dir', 36 | default=None, required=True, 37 | help='font dir to to produce images') 38 | parser.add_argument('--test_ratio', dest='test_ratio', 39 | default=0.3, required=False, 40 | help='test dataset size') 41 | parser.add_argument('--width', dest='width', 42 | default=None, required=True, 43 | help='width') 44 | parser.add_argument('--height', dest='height', 45 | default=None, required=True, 46 | help='height') 47 | parser.add_argument('--no_crop', dest='no_crop', 48 | default=True, required=False, 49 | help='', action='store_true') 50 | parser.add_argument('--margin', dest='margin', 51 | default=0, required=False, 52 | help='', ) 53 | parser.add_argument('--langs', dest='langs', 54 | default="chi_sim", required=True, 55 | help='deep_ocr.langs.*, e.g. chi_sim, chi_tra, digits...') 56 | options = parser.parse_args() 57 | 58 | out_caffe_dir = os.path.expanduser(options.out_caffe_dir) 59 | font_dir = os.path.expanduser(options.font_dir) 60 | test_ratio = float(options.test_ratio) 61 | width = int(options.width) 62 | height = int(options.height) 63 | need_crop = not options.no_crop 64 | margin = int(options.margin) 65 | langs = options.langs 66 | 67 | image_dir_name = "images" 68 | 69 | images_dir = os.path.join(out_caffe_dir, image_dir_name) 70 | if os.path.isdir(images_dir): 71 | shutil.rmtree(images_dir) 72 | os.makedirs(images_dir) 73 | 74 | lang_chars_gen = LangCharsGenerate(langs) 75 | lang_chars = lang_chars_gen.do() 76 | font_check = FontCheck(lang_chars) 77 | 78 | y_to_tag = {} 79 | y_tag_json_file = os.path.join(out_caffe_dir, "y_tag.json") 80 | y_tag_text_file = os.path.join(out_caffe_dir, "y_tag.txt") 81 | path_train = os.path.join(out_caffe_dir, "train.txt") 82 | path_test = os.path.join(out_caffe_dir, "test.txt") 83 | 84 | 85 | verified_font_paths = [] 86 | ## search for file fonts 87 | for font_name in os.listdir(font_dir): 88 | path_font_file = os.path.join(font_dir, font_name) 89 | if font_check.do(path_font_file): 90 | verified_font_paths.append(path_font_file) 91 | 92 | train_list = [] 93 | test_list = [] 94 | max_train_i = int(len(verified_font_paths) * (1.0 - test_ratio)) 95 | 96 | font2image = Font2Image(width, height, need_crop, margin) 97 | 98 | for i, verified_font_path in enumerate(verified_font_paths): 99 | is_train = True 100 | if i >= max_train_i: 101 | is_train = False 102 | for j, char in enumerate(lang_chars): 103 | if j not in y_to_tag: 104 | y_to_tag[j] = char 105 | char_dir = os.path.join(images_dir, "%d" % j) 106 | if not os.path.isdir(char_dir): 107 | os.makedirs(char_dir) 108 | path_image = os.path.join( 109 | char_dir, 110 | "%d_%s.jpg" % (i, os.path.basename(verified_font_path))) 111 | relative_path_image = os.path.join( 112 | image_dir_name, "%d"%j, 113 | "%d_%s.jpg" % (i, os.path.basename(verified_font_path)) 114 | ) 115 | font2image.do(verified_font_path, char, path_image) 116 | if is_train: 117 | train_list.append((relative_path_image, j)) 118 | else: 119 | test_list.append((relative_path_image, j)) 120 | 121 | h_y_tag_json_file = open(y_tag_json_file, "w+") 122 | json.dump(y_to_tag, h_y_tag_json_file) 123 | h_y_tag_json_file.close() 124 | 125 | h_y_tag_text_file = open(y_tag_text_file, "w+") 126 | for key in y_to_tag: 127 | h_y_tag_text_file.write("%d %s\n" % (key, y_to_tag[key].encode("utf-8"))) 128 | h_y_tag_text_file.close() 129 | 130 | fout = open(path_train, "w+") 131 | for item in train_list: 132 | fout.write("%s %d\n" % (item[0], item[1])) 133 | fout.close() 134 | 135 | fout = open(path_test, "w+") 136 | for item in test_list: 137 | fout.write("%s %d\n" % (item[0], item[1])) 138 | fout.close() 139 | -------------------------------------------------------------------------------- /python/deep_ocr_reco_captcha: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | 7 | import argparse 8 | from argparse import RawTextHelpFormatter 9 | import os 10 | import shutil 11 | import cv2 12 | 13 | from deep_ocr.captcha.char_segmentation import CharSegmentation 14 | from deep_ocr.captcha.search_best_segmentation import SearchBestSegmentation 15 | from deep_ocr.caffe_clf import CaffeCls 16 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | description = ''' 22 | # Docker config 23 | CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model 24 | DEEP_OCR_ROOT=/opt/deep_ocr 25 | 26 | # PC 27 | CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model 28 | DEEP_OCR_ROOT=/root/workspace/deep_ocr 29 | 30 | deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/captcha.png \ 31 | --num_char 5 \ 32 | --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \ 33 | --caffe_network $CAFFE_MODEL/lenet.prototxt \ 34 | --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \ 35 | --caffe_img_w 28 --caffe_img_h 28 \ 36 | --debug_path /tmp/debug_captcha 37 | 38 | deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/simple.png \ 39 | --num_char 5 \ 40 | --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \ 41 | --caffe_network $CAFFE_MODEL/lenet.prototxt \ 42 | --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \ 43 | --caffe_img_w 28 --caffe_img_h 28 \ 44 | --debug_path /tmp/debug_captcha 45 | 46 | ''' 47 | 48 | parser = argparse.ArgumentParser( 49 | description=description, formatter_class=RawTextHelpFormatter) 50 | parser.add_argument('--captcha_img', dest='captcha_img', 51 | default=None, required=True, 52 | help='captcha image to reco') 53 | parser.add_argument('--num_char', dest='num_char', 54 | default=None, required=True, 55 | help='m_char') 56 | parser.add_argument('--caffe_model', dest='caffe_model', 57 | default=None, required=True, 58 | help='trained caffe model') 59 | parser.add_argument('--caffe_network', dest='caffe_network', 60 | default=None, required=True, 61 | help='caffe network') 62 | parser.add_argument('--y_tag', dest='y_tag', 63 | default=None, required=True, 64 | help='y_tag') 65 | parser.add_argument('--caffe_img_w', dest='caffe_img_w', 66 | default=None, required=True, 67 | help='caffe_img_w') 68 | parser.add_argument('--caffe_img_h', dest='caffe_img_h', 69 | default=None, required=True, 70 | help='caffe_img_h') 71 | parser.add_argument('--debug_path', dest='debug_path', 72 | default=None, required=False, 73 | help='debug path') 74 | options = parser.parse_args() 75 | 76 | captcha_img = os.path.expanduser(options.captcha_img) 77 | num_char = int(options.num_char) 78 | caffe_model = os.path.expanduser(options.caffe_model) 79 | caffe_network = os.path.expanduser(options.caffe_network) 80 | y_tag = os.path.expanduser(options.y_tag) 81 | caffe_img_w = int(options.caffe_img_w) 82 | caffe_img_h = int(options.caffe_img_h) 83 | norm_width = 200 84 | norm_height = 200 85 | 86 | debug_path = None 87 | if options.debug_path is not None: 88 | debug_path = os.path.expanduser(options.debug_path) 89 | if os.path.isdir(debug_path): 90 | shutil.rmtree(debug_path) 91 | os.makedirs(debug_path) 92 | 93 | image = cv2.imread(captcha_img) 94 | 95 | proc_keep_ratio = PreprocessResizeKeepRatio( 96 | width=norm_width, height=norm_height) 97 | image = proc_keep_ratio.do(image) 98 | 99 | char_segmentation = CharSegmentation( 100 | num_char=num_char, 101 | debug_path=debug_path) 102 | segmentations = char_segmentation.do(image) 103 | 104 | caffe_cls = CaffeCls(caffe_network, caffe_model, y_tag, 105 | width=caffe_img_w, height=caffe_img_h) 106 | 107 | search_best_segmentation = SearchBestSegmentation( 108 | caffe_cls, char_segmentation.bin_img, 109 | debug_path) 110 | eval_segmentations = search_best_segmentation.do(segmentations) 111 | 112 | n_top = 100 113 | for i, eval_segmentation in enumerate(eval_segmentations): 114 | if i > n_top: 115 | break 116 | print(eval_segmentation) -------------------------------------------------------------------------------- /python/get_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ./deep_ocr_make_caffe_dataset --out_caffe_dir ~/Projects/data/caffe_dataset_eng/ --font_dir ~/Projects/deepLearning_OCR/chinese_fonts/ --width 64 --height 64 --margin 4 --langs eng 4 | 5 | -------------------------------------------------------------------------------- /python/make_caffe_dataset.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | import argparse 7 | from argparse import RawTextHelpFormatter 8 | import fnmatch 9 | import os 10 | import cv2 11 | import json 12 | import random 13 | import numpy as np 14 | import shutil 15 | from deep_ocr.lang_aux import LangCharsGenerate 16 | from deep_ocr.lang_aux import FontCheck 17 | from deep_ocr.lang_aux import Font2Image 18 | from deep_ocr.lang_aux import DataAugmentation 19 | 20 | if __name__ == "__main__": 21 | 22 | out_caffe_dir = os.path.expanduser("/home/user/Projects/data/caffe_dataset_id_num/") 23 | #font_dir = os.path.expanduser("/home/user/Projects/deepLearning_OCR/chinese_fonts/") 24 | font_dir = os.path.expanduser("/home/user/Projects/deepLearning_OCR/id_num_fonts/") 25 | test_ratio = float(0.3) 26 | width = int(64) 27 | height = int(64) 28 | need_crop = False 29 | margin = int(4) 30 | langs = "id_num" 31 | rotate = 10 32 | rotate_step = 1 33 | 34 | image_dir_name = "images" 35 | 36 | images_dir = os.path.join(out_caffe_dir, image_dir_name) 37 | if os.path.isdir(images_dir): 38 | shutil.rmtree(images_dir) 39 | os.makedirs(images_dir) 40 | 41 | lang_chars_gen = LangCharsGenerate(langs) 42 | lang_chars = lang_chars_gen.do() 43 | font_check = FontCheck(lang_chars) 44 | 45 | y_to_tag = {} 46 | y_tag_json_file = os.path.join(out_caffe_dir, "y_tag.json") 47 | y_tag_text_file = os.path.join(out_caffe_dir, "y_tag.txt") 48 | path_train = os.path.join(out_caffe_dir, "train.txt") 49 | path_test = os.path.join(out_caffe_dir, "test.txt") 50 | 51 | ## rotate 52 | if rotate < 0: 53 | rotate = - rotate 54 | 55 | if rotate > 0 and rotate <= 45: 56 | all_rotate_angles = [] 57 | for i in range(0, rotate + 1, rotate_step): 58 | all_rotate_angles.append(i) 59 | for i in range(-rotate, 0, rotate_step): 60 | all_rotate_angles.append(i) 61 | #print(all_rotate_angles) 62 | 63 | verified_font_paths = [] 64 | ## search for file fonts 65 | for font_name in os.listdir(font_dir): 66 | path_font_file = os.path.join(font_dir, font_name) 67 | if font_check.do(path_font_file): 68 | verified_font_paths.append(path_font_file) 69 | 70 | train_list = [] 71 | test_list = [] 72 | max_train_i = int(len(verified_font_paths) * (1.0 - test_ratio)) 73 | 74 | font2image = Font2Image(width, height, need_crop, margin) 75 | 76 | # loop font 77 | for i, verified_font_path in enumerate(verified_font_paths): 78 | is_train = True 79 | if i >= max_train_i: 80 | is_train = False 81 | # loop char 82 | for j, char in enumerate(lang_chars): 83 | if j not in y_to_tag: 84 | y_to_tag[j] = char 85 | char_dir = os.path.join(images_dir, "%d" % j) 86 | if not os.path.isdir(char_dir): 87 | os.makedirs(char_dir) 88 | if rotate == 0: 89 | relative_path_image = os.path.join(image_dir_name, "%d" % j, "%d_%s.jpg" % (i, os.path.basename(verified_font_path))) 90 | path_image = os.path.join(char_dir, "%d_%s.jpg" % (i, os.path.basename(verified_font_path))) 91 | font2image.do(verified_font_path, path_image, char) 92 | if is_train: 93 | train_list.append((relative_path_image, j)) 94 | else: 95 | test_list.append((relative_path_image, j)) 96 | else: 97 | for k in all_rotate_angles: 98 | relative_path_image = os.path.join(image_dir_name, "%d" % j, "%d_%s_%d.jpg" % (i, os.path.basename(verified_font_path), k)) 99 | path_image = os.path.join(char_dir, "%d_%s_%d.jpg" % (i, os.path.basename(verified_font_path), k)) 100 | font2image.do(verified_font_path, char, path_image, rotate=k) 101 | #font2image.do(verified_font_path, char, path_image) 102 | if is_train: 103 | train_list.append((relative_path_image, j)) 104 | else: 105 | test_list.append((relative_path_image, j)) 106 | 107 | 108 | 109 | 110 | h_y_tag_json_file = open(y_tag_json_file, "w+") 111 | json.dump(y_to_tag, h_y_tag_json_file) 112 | h_y_tag_json_file.close() 113 | 114 | h_y_tag_text_file = open(y_tag_text_file, "w+") 115 | for key in y_to_tag: 116 | h_y_tag_text_file.write("%d %s\n" % (key, y_to_tag[key].encode("utf-8"))) 117 | h_y_tag_text_file.close() 118 | 119 | fout = open(path_train, "w+") 120 | for item in train_list: 121 | fout.write("%s %d\n" % (item[0], item[1])) 122 | fout.close() 123 | 124 | fout = open(path_test, "w+") 125 | for item in test_list: 126 | fout.write("%s %d\n" % (item[0], item[1])) 127 | fout.close() 128 | -------------------------------------------------------------------------------- /python/reco_chars.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('/usr/local/lib/python2.7/site-packages') 4 | import caffe 5 | import json 6 | import numpy as np 7 | import os 8 | import cv2 9 | import shutil 10 | import copy 11 | 12 | 13 | class CaffeCls(object): 14 | def __init__(self, 15 | model_def, 16 | model_weights, 17 | y_tag_json_path, 18 | is_mode_cpu=True, 19 | width=64, 20 | height=64): 21 | self.net = caffe.Net(model_def, model_weights, caffe.TEST) 22 | if is_mode_cpu: 23 | caffe.set_mode_cpu() 24 | self.y_tag_json = json.load(open(y_tag_json_path, "r")) 25 | self.width = width 26 | self.height = height 27 | 28 | def predict_cv2_img(self, cv2_img): 29 | shape = cv2_img.shape 30 | cv2_imgs = cv2_img.reshape((1, shape[0], shape[1])) 31 | return self.predict_cv2_imgs(cv2_imgs)[0] 32 | 33 | def _predict_cv2_imgs_sub(self, cv2_imgs, pos_start, pos_end): 34 | cv2_imgs_sub = cv2_imgs[pos_start: pos_end] 35 | 36 | #print(cv2_imgs_sub) 37 | self.net.blobs['data'].reshape(cv2_imgs_sub.shape[0], 1, self.width, self.height) 38 | 39 | self.net.blobs['data'].data[...] = cv2_imgs_sub.reshape((cv2_imgs_sub.shape[0], 1, self.width, self.height)) 40 | output = self.net.forward() 41 | #print(self.net.blobs['data'].data) 42 | 43 | output_tag_to_max_proba = [] 44 | 45 | num_sample = cv2_imgs_sub.shape[0] 46 | for i in range(num_sample): 47 | output_prob = output['prob'][i] 48 | output_prob_index = sorted( 49 | range(len(output_prob)), 50 | key=lambda x:output_prob[x], 51 | reverse=True) 52 | output_tag_to_probas = [] 53 | for index in output_prob_index: 54 | item = (self.y_tag_json[str(index)], 55 | output_prob[index]) 56 | output_tag_to_probas.append(item) 57 | # output_tag_to_probas = output_tag_to_probas[:2] 58 | output_tag_to_max_proba.append(output_tag_to_probas) 59 | return output_tag_to_max_proba 60 | 61 | def predict_cv2_imgs(self, cv2_imgs, step=50): 62 | output_tag_to_max_proba = [] 63 | num_sample = cv2_imgs.shape[0] 64 | for i in range(0, num_sample, step): 65 | pos_end = min(num_sample, (i + step)) 66 | output_tag_to_max_proba += \ 67 | self._predict_cv2_imgs_sub(cv2_imgs, i, pos_end) 68 | return output_tag_to_max_proba 69 | 70 | 71 | class PreprocessCropZeros(object): 72 | 73 | def __init__(self): 74 | pass 75 | 76 | def do(self, cv2_gray_img): 77 | height = cv2_gray_img.shape[0] 78 | width = cv2_gray_img.shape[1] 79 | 80 | v_sum = np.sum(cv2_gray_img, axis=0) 81 | h_sum = np.sum(cv2_gray_img, axis=1) 82 | left = 0 83 | right = width - 1 84 | top = 0 85 | low = height - 1 86 | 87 | for i in range(width): 88 | if v_sum[i] > 0: 89 | left = i 90 | break 91 | 92 | for i in range(width - 1, -1, -1): 93 | if v_sum[i] > 0: 94 | right = i 95 | break 96 | 97 | for i in range(height): 98 | if h_sum[i] > 0: 99 | top = i 100 | break 101 | 102 | for i in range(height - 1, -1, -1): 103 | if h_sum[i] > 0: 104 | low = i 105 | break 106 | if not (top < low and right > left): 107 | return cv2_gray_img 108 | 109 | return cv2_gray_img[top: low+1, left: right+1] 110 | 111 | 112 | class PreprocessResizeKeepRatio(object): 113 | 114 | def __init__(self, width, height): 115 | self.width = width 116 | self.height = height 117 | 118 | def do(self, cv2_img): 119 | max_width = self.width 120 | max_height = self.height 121 | 122 | cur_height, cur_width = cv2_img.shape[:2] 123 | 124 | ratio_w = float(max_width)/float(cur_width) 125 | ratio_h = float(max_height)/float(cur_height) 126 | ratio = min(ratio_w, ratio_h) 127 | 128 | new_size = (min(int(cur_width*ratio), max_width), 129 | min(int(cur_height*ratio), max_height)) 130 | 131 | new_size = (max(new_size[0], 1), 132 | max(new_size[1], 1),) 133 | 134 | resized_img = cv2.resize(cv2_img, new_size) 135 | return resized_img 136 | 137 | 138 | class PreprocessResizeKeepRatioFillBG(object): 139 | 140 | def __init__(self, width, height, fill_bg=False, 141 | auto_avoid_fill_bg=True, margin=None): 142 | self.width = width 143 | self.height = height 144 | self.fill_bg = fill_bg 145 | self.auto_avoid_fill_bg = auto_avoid_fill_bg 146 | self.margin = margin 147 | 148 | @classmethod 149 | def is_need_fill_bg(cls, cv2_img, th=0.5, max_val=255): 150 | image_shape = cv2_img.shape 151 | height, width = image_shape 152 | if height * 3 < width: 153 | return True 154 | if width * 3 < height: 155 | return True 156 | return False 157 | 158 | @classmethod 159 | def put_img_into_center(cls, img_large, img_small, ): 160 | width_large = img_large.shape[1] 161 | height_large = img_large.shape[0] 162 | 163 | width_small = img_small.shape[1] 164 | height_small = img_small.shape[0] 165 | 166 | if width_large < width_small: 167 | raise ValueError("width_large <= width_small") 168 | if height_large < height_small: 169 | raise ValueError("height_large <= height_small") 170 | 171 | start_width = (width_large - width_small) / 2 172 | start_height = (height_large - height_small) / 2 173 | 174 | img_large[int(start_height):int(start_height) + int(height_small), 175 | int(start_width):int(start_width) + int(width_small)] = img_small 176 | return img_large 177 | 178 | def do(self, cv2_img): 179 | 180 | if self.margin is not None: 181 | width_minus_margin = max(2, self.width - self.margin) 182 | height_minus_margin = max(2, self.height - self.margin) 183 | else: 184 | width_minus_margin = self.width 185 | height_minus_margin = self.height 186 | 187 | cur_height, cur_width = cv2_img.shape[:2] 188 | if len(cv2_img.shape) > 2: 189 | pix_dim = cv2_img.shape[2] 190 | else: 191 | pix_dim = None 192 | 193 | preprocess_resize_keep_ratio = PreprocessResizeKeepRatio( 194 | width_minus_margin, 195 | height_minus_margin) 196 | resized_cv2_img = preprocess_resize_keep_ratio.do(cv2_img) 197 | 198 | if self.auto_avoid_fill_bg: 199 | need_fill_bg = self.is_need_fill_bg(cv2_img) 200 | if not need_fill_bg: 201 | self.fill_bg = False 202 | else: 203 | self.fill_bg = True 204 | 205 | ## should skip horizontal stroke 206 | if not self.fill_bg: 207 | ret_img = cv2.resize(resized_cv2_img, (width_minus_margin, 208 | height_minus_margin)) 209 | else: 210 | if pix_dim is not None: 211 | norm_img = np.zeros((height_minus_margin, 212 | width_minus_margin, 213 | pix_dim), 214 | np.uint8) 215 | else: 216 | norm_img = np.zeros((height_minus_margin, 217 | width_minus_margin), 218 | np.uint8) 219 | ret_img = self.put_img_into_center(norm_img, resized_cv2_img) 220 | 221 | if self.margin is not None: 222 | if pix_dim is not None: 223 | norm_img = np.zeros((self.height, 224 | self.width, 225 | pix_dim), 226 | np.uint8) 227 | else: 228 | norm_img = np.zeros((self.height, 229 | self.width), 230 | np.uint8) 231 | ret_img = self.put_img_into_center(norm_img, ret_img) 232 | return ret_img 233 | 234 | 235 | def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2): 236 | start_i = None 237 | end_i = None 238 | peek_ranges = [] 239 | for i, val in enumerate(array_vals): 240 | if val > minimun_val and start_i is None: 241 | start_i = i 242 | elif val > minimun_val and start_i is not None: 243 | pass 244 | elif val < minimun_val and start_i is not None: 245 | end_i = i 246 | if end_i - start_i >= minimun_range: 247 | peek_ranges.append((start_i, end_i)) 248 | start_i = None 249 | end_i = None 250 | elif val < minimun_val and start_i is None: 251 | pass 252 | else: 253 | raise ValueError("cannot parse this case...") 254 | return peek_ranges 255 | 256 | def compute_median_w_from_ranges(peek_ranges): 257 | widthes = [] 258 | for peek_range in peek_ranges: 259 | w = peek_range[1] - peek_range[0] + 1 260 | widthes.append(w) 261 | widthes = np.asarray(widthes) 262 | median_w = np.median(widthes) 263 | return median_w 264 | 265 | def median_split_ranges(peek_ranges): 266 | new_peek_ranges = [] 267 | widthes = [] 268 | for peek_range in peek_ranges: 269 | w = peek_range[1] - peek_range[0] + 1 270 | widthes.append(w) 271 | widthes = np.asarray(widthes) 272 | median_w = np.median(widthes) 273 | for i, peek_range in enumerate(peek_ranges): 274 | num_char = int(round(widthes[i]/median_w, 0)) 275 | if num_char > 1: 276 | char_w = float(widthes[i] / num_char) 277 | for i in range(num_char): 278 | start_point = peek_range[0] + int(i * char_w) 279 | end_point = peek_range[0] + int((i + 1) * char_w) 280 | new_peek_ranges.append((start_point, end_point)) 281 | else: 282 | new_peek_ranges.append(peek_range) 283 | return new_peek_ranges 284 | 285 | 286 | if __name__ == "__main__": 287 | 288 | norm_width = 64 289 | norm_height = 64 290 | #norm_width = 28 291 | #norm_height = 28 292 | 293 | #base_dir = "/workspace/data/chongdata_caffe_cn_sim_digits_64_64" 294 | #base_dir = "/home/user/Projects/data/caffe_dataset_cn_sim" 295 | base_dir = "/home/user/Projects/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64" 296 | model_def = os.path.join(base_dir, "deploy_lenet_train_test.prototxt") 297 | model_weights = os.path.join(base_dir, "lenet_iter_50000.caffemodel") 298 | y_tag_json_path = os.path.join(base_dir, "y_tag.json") 299 | caffe_cls = CaffeCls(model_def, model_weights, y_tag_json_path) 300 | 301 | test_image = "/home/user/Projects/data/test_data.png" 302 | 303 | debug_dir = "/home/user/Projects/data/caffe_dataset_cn_sim/debug_dir" 304 | if debug_dir is not None: 305 | if os.path.isdir(debug_dir): 306 | shutil.rmtree(debug_dir) 307 | os.makedirs(debug_dir) 308 | 309 | cv2_color_img = cv2.imread(test_image) 310 | 311 | resize_keep_ratio = PreprocessResizeKeepRatio(1024, 1024) 312 | cv2_color_img = resize_keep_ratio.do(cv2_color_img) 313 | 314 | cv2_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_RGB2GRAY) 315 | height, width = cv2_img.shape 316 | 317 | adaptive_threshold = cv2.adaptiveThreshold( 318 | cv2_img, 319 | 255, 320 | cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ 321 | cv2.THRESH_BINARY, 11, 2) 322 | adaptive_threshold = 255 - adaptive_threshold 323 | 324 | ## Try to find text lines and chars 325 | horizontal_sum = np.sum(adaptive_threshold, axis=1) 326 | peek_ranges = extract_peek_ranges_from_array(horizontal_sum) 327 | vertical_peek_ranges2d = [] 328 | for peek_range in peek_ranges: 329 | start_y = peek_range[0] 330 | end_y = peek_range[1] 331 | line_img = adaptive_threshold[start_y:end_y, :] 332 | vertical_sum = np.sum(line_img, axis=0) 333 | vertical_peek_ranges = extract_peek_ranges_from_array( 334 | vertical_sum, 335 | minimun_val=40, 336 | minimun_range=1) 337 | vertical_peek_ranges = median_split_ranges(vertical_peek_ranges) 338 | vertical_peek_ranges2d.append(vertical_peek_ranges) 339 | 340 | ## remove noise such as comma 341 | filtered_vertical_peek_ranges2d = [] 342 | for i, peek_range in enumerate(peek_ranges): 343 | new_peek_range = [] 344 | median_w = compute_median_w_from_ranges(vertical_peek_ranges2d[i]) 345 | for vertical_range in vertical_peek_ranges2d[i]: 346 | if vertical_range[1] - vertical_range[0] > median_w*0.7: 347 | new_peek_range.append(vertical_range) 348 | filtered_vertical_peek_ranges2d.append(new_peek_range) 349 | vertical_peek_ranges2d = filtered_vertical_peek_ranges2d 350 | 351 | 352 | char_imgs = [] 353 | crop_zeros = PreprocessCropZeros() 354 | resize_keep_ratio = PreprocessResizeKeepRatioFillBG( 355 | norm_width, norm_height, fill_bg=False, margin=4) 356 | for i, peek_range in enumerate(peek_ranges): 357 | for vertical_range in vertical_peek_ranges2d[i]: 358 | x = vertical_range[0] 359 | y = peek_range[0] 360 | w = vertical_range[1] - x 361 | h = peek_range[1] - y 362 | char_img = adaptive_threshold[y:y+h+1, x:x+w+1] 363 | char_img = crop_zeros.do(char_img) 364 | char_img = resize_keep_ratio.do(char_img) 365 | char_imgs.append(char_img) 366 | 367 | np_char_imgs = np.asarray(char_imgs) 368 | 369 | #print(np_char_imgs) 370 | 371 | output_tag_to_max_proba = caffe_cls.predict_cv2_imgs(np_char_imgs) 372 | 373 | ocr_res = "" 374 | for item in output_tag_to_max_proba: 375 | #print(item[0][0]) 376 | ocr_res += item[0][0] 377 | #print(ocr_res.encode("utf-8")) 378 | print(ocr_res) 379 | 380 | if debug_dir is not None: 381 | path_adaptive_threshold = os.path.join(debug_dir, 382 | "adaptive_threshold.jpg") 383 | cv2.imwrite(path_adaptive_threshold, adaptive_threshold) 384 | seg_adaptive_threshold = cv2_color_img 385 | 386 | # color = (255, 0, 0) 387 | # for rect in rects: 388 | # x, y, w, h = rect 389 | # pt1 = (x, y) 390 | # pt2 = (x + w, y + h) 391 | # cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color) 392 | 393 | color = (0, 255, 0) 394 | for i, peek_range in enumerate(peek_ranges): 395 | for vertical_range in vertical_peek_ranges2d[i]: 396 | x = vertical_range[0] 397 | y = peek_range[0] 398 | w = vertical_range[1] - x 399 | h = peek_range[1] - y 400 | pt1 = (x, y) 401 | pt2 = (x + w, y + h) 402 | cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color) 403 | 404 | path_seg_adaptive_threshold = os.path.join(debug_dir, 405 | "seg_adaptive_threshold.jpg") 406 | cv2.imwrite(path_seg_adaptive_threshold, seg_adaptive_threshold) 407 | 408 | debug_dir_chars = os.path.join(debug_dir, "chars") 409 | os.makedirs(debug_dir_chars) 410 | for i, char_img in enumerate(char_imgs): 411 | path_char = os.path.join(debug_dir_chars, "%d.jpg" % i) 412 | cv2.imwrite(path_char, char_img) 413 | 414 | 415 | -------------------------------------------------------------------------------- /python/test_id_card_reco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function 4 | 5 | import argparse 6 | from argparse import RawTextHelpFormatter 7 | import os 8 | import shutil 9 | import cv2 10 | from deep_ocr.caffe_clf import CaffeClsBuilder 11 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio 12 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask 13 | from deep_ocr.id_cards.segmentation import Segmentation 14 | from deep_ocr.id_cards.char_set import CharSet 15 | from deep_ocr.reco_text_line import RecoTextLine 16 | from deep_ocr.reco_text_line import RectImageClassifier 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | path_img = os.path.expanduser("/home/user/Projects/data/test_id_card/hehe4.jpg") 22 | debug_path = os.path.expanduser("/home/user/Projects/data/debug") 23 | if debug_path is not None: 24 | if os.path.isdir(debug_path): 25 | shutil.rmtree(debug_path) 26 | os.makedirs(debug_path) 27 | 28 | cls_dir_ua = os.path.expanduser("/home/user/Projects/data/caffe_dataset_id_num") 29 | caffe_cls_builder = CaffeClsBuilder() 30 | cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua, ) 31 | 32 | seg_norm_width = 600 33 | seg_norm_height = 600 34 | preprocess_resize = PreprocessResizeKeepRatio(seg_norm_width, seg_norm_height) 35 | id_card_img = cv2.imread(path_img) 36 | id_card_img = preprocess_resize.do(id_card_img) 37 | segmentation = Segmentation(debug_path) 38 | key_to_segmentation = segmentation.do(id_card_img) 39 | 40 | boundaries = [ 41 | ((0, 0, 0), (100, 100, 100)), 42 | ] 43 | boundary2binimgs = [] 44 | for boundary in boundaries: 45 | preprocess_bg_mask = PreprocessBackgroundMask(boundary) 46 | id_card_img_mask = preprocess_bg_mask.do(id_card_img) 47 | boundary2binimgs.append((boundary, id_card_img_mask)) 48 | 49 | char_set = CharSet() 50 | char_set_data = char_set.get() 51 | 52 | rect_img_clf = RectImageClassifier(None, None, char_set, caffe_cls_width=64, caffe_cls_height=64) 53 | 54 | reco_text_line = RecoTextLine(rect_img_clf) 55 | 56 | 57 | ## just test id card num 58 | for i, segment in enumerate(key_to_segmentation["id"]): 59 | if debug_path is not None: 60 | line_debug_path = "key_%s_%i" % ("id", i) 61 | line_debug_path = os.path.join(debug_path, line_debug_path) 62 | reco_text_line.debug_path = line_debug_path 63 | reco_text_line.char_set = char_set_data["id"] 64 | caffe_cls = cls_ua 65 | ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls) 66 | print("=" * 64) 67 | print(ocr_res) 68 | 69 | 70 | if debug_path is not None: 71 | path_debug_image_mask = os.path.join(debug_path, "reco_debug_01_image_mask.jpg") 72 | cv2.imwrite(path_debug_image_mask, id_card_img_mask) 73 | 74 | -------------------------------------------------------------------------------- /python/test_model.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | 3 | if __name__ == "__main__": 4 | #文件的存放路径 5 | root = '/home/user/Projects/data/caffe_dataset_cn_sim/' 6 | caffe.set_mode_cpu 7 | net = caffe.Net('/home/user/Projects/deepLearning_OCR/lenet_train_test.prototxt',root+'lenet_iter_50000.caffemodel',caffe.TEST) 8 | conv1_w = net.params['conv11'][0].data 9 | conv1_b = net.params['conv11'][1].data 10 | print(conv1_w,conv1_b) 11 | print(conv1_w.size,conv1_b.size) -------------------------------------------------------------------------------- /python/test_reco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('/usr/local/lib/python2.7/site-packages') 4 | import caffe 5 | import json 6 | import numpy as np 7 | import os 8 | import cv2 9 | import shutil 10 | import copy 11 | 12 | 13 | 14 | 15 | if __name__ == "__main__": 16 | #base_dir = "/home/user/Projects/data/caffe_dataset_cn_sim" 17 | base_dir = "/home/user/Projects/data/caffe_dataset_id_num" 18 | #base_dir = "/home/user/Projects/deep_ocr_workspace/data/chongdata_train_ualpha_digits_64_64" 19 | 20 | model_def = os.path.join(base_dir, "deploy_lenet_train_test.prototxt") 21 | model_weights = os.path.join(base_dir, "lenet_iter_50000.caffemodel") 22 | y_tag_json_path = os.path.join(base_dir, "y_tag.json") 23 | 24 | net = caffe.Net(model_def, model_weights, caffe.TEST) 25 | cv2_color_img = cv2.imread('/home/user/Projects/data/2.jpg') 26 | cv2_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_RGB2GRAY) 27 | cv2_img = cv2_img.reshape((1, 1, 64, 64)) 28 | print(cv2_img.shape) 29 | #np_img = np.asarray(cv2_img) 30 | 31 | #print(net.blobs['data'].data.shape) 32 | 33 | #transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 34 | #transformer.set_transpose('data', (2, 0, 1)) 35 | #transformer.set_raw_scale('data', 255) # 缩放到【0,255】之间 36 | #transformer.set_channel_swap('data', (2, 1, 0)) 37 | #net.blobs['data'].reshape(1, 1, 64, 64) 38 | #im=caffe.io.load_image('/home/user/Projects/data/0.jpg') 39 | 40 | #net.blobs['data'].data[...] = transformer.preprocess('data', cv2_img) 41 | net.blobs['data'].data[...] = cv2_img 42 | out = net.forward() 43 | #print(out) 44 | 45 | pridects=out['prob'] 46 | print(max(pridects)) 47 | 48 | #print([(k, v.data.shape) for k, v in net.blobs.items()]) 49 | #print(net.params['conv11'][0].data) 50 | test=net.params['conv11'][0].data 51 | #print(net.blobs['data'].data) 52 | -------------------------------------------------------------------------------- /reco_chars.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('/usr/local/lib/python2.7/site-packages') 4 | import caffe 5 | import json 6 | import numpy as np 7 | import os 8 | import cv2 9 | import shutil 10 | import copy 11 | 12 | class CaffeCls(object): 13 | def __init__(self, 14 | model_def, 15 | model_weights, 16 | y_tag_json_path, 17 | is_mode_cpu=True, 18 | width=64, 19 | height=64): 20 | self.net = caffe.Net(model_def, 21 | model_weights, 22 | caffe.TEST) 23 | if is_mode_cpu: 24 | caffe.set_mode_cpu() 25 | self.y_tag_json = json.load(open(y_tag_json_path, "r")) 26 | self.width = width 27 | self.height = height 28 | 29 | def predict_cv2_img(self, cv2_img): 30 | shape = cv2_img.shape 31 | cv2_imgs = cv2_img.reshape((1, shape[0], shape[1])) 32 | return self.predict_cv2_imgs(cv2_imgs)[0] 33 | 34 | 35 | def _predict_cv2_imgs_sub(self, cv2_imgs, pos_start, pos_end): 36 | cv2_imgs_sub = cv2_imgs[pos_start: pos_end] 37 | 38 | self.net.blobs['data'].reshape(cv2_imgs_sub.shape[0], 1, 39 | self.width, self.height) 40 | self.net.blobs['data'].data[...] = cv2_imgs_sub.reshape( 41 | (cv2_imgs_sub.shape[0], 1, self.width, self.height)) 42 | output = self.net.forward() 43 | 44 | output_tag_to_max_proba = [] 45 | 46 | num_sample = cv2_imgs_sub.shape[0] 47 | for i in range(num_sample): 48 | output_prob = output['prob'][i] 49 | output_prob_index = sorted( 50 | range(len(output_prob)), 51 | key=lambda x:output_prob[x], 52 | reverse=True) 53 | output_tag_to_probas = [] 54 | for index in output_prob_index: 55 | item = (self.y_tag_json[str(index)], 56 | output_prob[index]) 57 | output_tag_to_probas.append(item) 58 | # output_tag_to_probas = output_tag_to_probas[:2] 59 | output_tag_to_max_proba.append(output_tag_to_probas) 60 | return output_tag_to_max_proba 61 | 62 | def predict_cv2_imgs(self, cv2_imgs, step=50): 63 | output_tag_to_max_proba = [] 64 | num_sample = cv2_imgs.shape[0] 65 | for i in range(0, num_sample, step): 66 | pos_end = min(num_sample, (i + step)) 67 | output_tag_to_max_proba += \ 68 | self._predict_cv2_imgs_sub(cv2_imgs, i, pos_end) 69 | return output_tag_to_max_proba 70 | 71 | class PreprocessCropZeros(object): 72 | 73 | def __init__(self): 74 | pass 75 | 76 | def do(self, cv2_gray_img): 77 | height = cv2_gray_img.shape[0] 78 | width = cv2_gray_img.shape[1] 79 | 80 | v_sum = np.sum(cv2_gray_img, axis=0) 81 | h_sum = np.sum(cv2_gray_img, axis=1) 82 | left = 0 83 | right = width - 1 84 | top = 0 85 | low = height - 1 86 | 87 | for i in range(width): 88 | if v_sum[i] > 0: 89 | left = i 90 | break 91 | 92 | for i in range(width - 1, -1, -1): 93 | if v_sum[i] > 0: 94 | right = i 95 | break 96 | 97 | for i in range(height): 98 | if h_sum[i] > 0: 99 | top = i 100 | break 101 | 102 | for i in range(height - 1, -1, -1): 103 | if h_sum[i] > 0: 104 | low = i 105 | break 106 | if not (top < low and right > left): 107 | return cv2_gray_img 108 | 109 | return cv2_gray_img[top: low+1, left: right+1] 110 | 111 | 112 | 113 | 114 | class PreprocessResizeKeepRatio(object): 115 | 116 | def __init__(self, width, height): 117 | self.width = width 118 | self.height = height 119 | 120 | def do(self, cv2_img): 121 | max_width = self.width 122 | max_height = self.height 123 | 124 | cur_height, cur_width = cv2_img.shape[:2] 125 | 126 | ratio_w = float(max_width)/float(cur_width) 127 | ratio_h = float(max_height)/float(cur_height) 128 | ratio = min(ratio_w, ratio_h) 129 | 130 | new_size = (min(int(cur_width*ratio), max_width), 131 | min(int(cur_height*ratio), max_height)) 132 | 133 | new_size = (max(new_size[0], 1), 134 | max(new_size[1], 1),) 135 | 136 | resized_img = cv2.resize(cv2_img, new_size) 137 | return resized_img 138 | 139 | 140 | class PreprocessResizeKeepRatioFillBG(object): 141 | 142 | def __init__(self, width, height, fill_bg=False, 143 | auto_avoid_fill_bg=True, margin=None): 144 | self.width = width 145 | self.height = height 146 | self.fill_bg = fill_bg 147 | self.auto_avoid_fill_bg = auto_avoid_fill_bg 148 | self.margin = margin 149 | 150 | @classmethod 151 | def is_need_fill_bg(cls, cv2_img, th=0.5, max_val=255): 152 | image_shape = cv2_img.shape 153 | height, width = image_shape 154 | if height * 3 < width: 155 | return True 156 | if width * 3 < height: 157 | return True 158 | return False 159 | 160 | @classmethod 161 | def put_img_into_center(cls, img_large, img_small, ): 162 | width_large = img_large.shape[1] 163 | height_large = img_large.shape[0] 164 | 165 | width_small = img_small.shape[1] 166 | height_small = img_small.shape[0] 167 | 168 | if width_large < width_small: 169 | raise ValueError("width_large <= width_small") 170 | if height_large < height_small: 171 | raise ValueError("height_large <= height_small") 172 | 173 | start_width = (width_large - width_small) / 2 174 | start_height = (height_large - height_small) / 2 175 | 176 | img_large[start_height:start_height + height_small, 177 | start_width:start_width + width_small] = img_small 178 | return img_large 179 | 180 | def do(self, cv2_img): 181 | 182 | if self.margin is not None: 183 | width_minus_margin = max(2, self.width - self.margin) 184 | height_minus_margin = max(2, self.height - self.margin) 185 | else: 186 | width_minus_margin = self.width 187 | height_minus_margin = self.height 188 | 189 | cur_height, cur_width = cv2_img.shape[:2] 190 | if len(cv2_img.shape) > 2: 191 | pix_dim = cv2_img.shape[2] 192 | else: 193 | pix_dim = None 194 | 195 | preprocess_resize_keep_ratio = PreprocessResizeKeepRatio( 196 | width_minus_margin, 197 | height_minus_margin) 198 | resized_cv2_img = preprocess_resize_keep_ratio.do(cv2_img) 199 | 200 | if self.auto_avoid_fill_bg: 201 | need_fill_bg = self.is_need_fill_bg(cv2_img) 202 | if not need_fill_bg: 203 | self.fill_bg = False 204 | else: 205 | self.fill_bg = True 206 | 207 | ## should skip horizontal stroke 208 | if not self.fill_bg: 209 | ret_img = cv2.resize(resized_cv2_img, (width_minus_margin, 210 | height_minus_margin)) 211 | else: 212 | if pix_dim is not None: 213 | norm_img = np.zeros((height_minus_margin, 214 | width_minus_margin, 215 | pix_dim), 216 | np.uint8) 217 | else: 218 | norm_img = np.zeros((height_minus_margin, 219 | width_minus_margin), 220 | np.uint8) 221 | ret_img = self.put_img_into_center(norm_img, resized_cv2_img) 222 | 223 | if self.margin is not None: 224 | if pix_dim is not None: 225 | norm_img = np.zeros((self.height, 226 | self.width, 227 | pix_dim), 228 | np.uint8) 229 | else: 230 | norm_img = np.zeros((self.height, 231 | self.width), 232 | np.uint8) 233 | ret_img = self.put_img_into_center(norm_img, ret_img) 234 | return ret_img 235 | 236 | def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2): 237 | start_i = None 238 | end_i = None 239 | peek_ranges = [] 240 | for i, val in enumerate(array_vals): 241 | if val > minimun_val and start_i is None: 242 | start_i = i 243 | elif val > minimun_val and start_i is not None: 244 | pass 245 | elif val < minimun_val and start_i is not None: 246 | end_i = i 247 | if end_i - start_i >= minimun_range: 248 | peek_ranges.append((start_i, end_i)) 249 | start_i = None 250 | end_i = None 251 | elif val < minimun_val and start_i is None: 252 | pass 253 | else: 254 | raise ValueError("cannot parse this case...") 255 | return peek_ranges 256 | 257 | def compute_median_w_from_ranges(peek_ranges): 258 | widthes = [] 259 | for peek_range in peek_ranges: 260 | w = peek_range[1] - peek_range[0] + 1 261 | widthes.append(w) 262 | widthes = np.asarray(widthes) 263 | median_w = np.median(widthes) 264 | return median_w 265 | 266 | def median_split_ranges(peek_ranges): 267 | new_peek_ranges = [] 268 | widthes = [] 269 | for peek_range in peek_ranges: 270 | w = peek_range[1] - peek_range[0] + 1 271 | widthes.append(w) 272 | widthes = np.asarray(widthes) 273 | median_w = np.median(widthes) 274 | for i, peek_range in enumerate(peek_ranges): 275 | num_char = int(round(widthes[i]/median_w, 0)) 276 | if num_char > 1: 277 | char_w = float(widthes[i] / num_char) 278 | for i in range(num_char): 279 | start_point = peek_range[0] + int(i * char_w) 280 | end_point = peek_range[0] + int((i + 1) * char_w) 281 | new_peek_ranges.append((start_point, end_point)) 282 | else: 283 | new_peek_ranges.append(peek_range) 284 | return new_peek_ranges 285 | 286 | 287 | if __name__ == "__main__": 288 | 289 | norm_width = 64 290 | norm_height = 64 291 | 292 | base_dir = "/workspace/data/chongdata_caffe_cn_sim_digits_64_64" 293 | model_def = os.path.join(base_dir, "deploy_lenet_train_test.prototxt") 294 | model_weights = os.path.join(base_dir, "lenet_iter_50000.caffemodel") 295 | y_tag_json_path = os.path.join(base_dir, "y_tag.json") 296 | caffe_cls = CaffeCls(model_def, model_weights, y_tag_json_path) 297 | 298 | test_image = "/opt/deep_ocr/test_data.png" 299 | 300 | debug_dir = "/tmp/debug_dir" 301 | if debug_dir is not None: 302 | if os.path.isdir(debug_dir): 303 | shutil.rmtree(debug_dir) 304 | os.makedirs(debug_dir) 305 | 306 | cv2_color_img = cv2.imread(test_image) 307 | 308 | resize_keep_ratio = PreprocessResizeKeepRatio(1024, 1024) 309 | cv2_color_img = resize_keep_ratio.do(cv2_color_img) 310 | 311 | cv2_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_RGB2GRAY) 312 | height, width = cv2_img.shape 313 | 314 | adaptive_threshold = cv2.adaptiveThreshold( 315 | cv2_img, 316 | 255, 317 | cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ 318 | cv2.THRESH_BINARY, 11, 2) 319 | adaptive_threshold = 255 - adaptive_threshold 320 | 321 | ## Try to find text lines and chars 322 | horizontal_sum = np.sum(adaptive_threshold, axis=1) 323 | peek_ranges = extract_peek_ranges_from_array(horizontal_sum) 324 | vertical_peek_ranges2d = [] 325 | for peek_range in peek_ranges: 326 | start_y = peek_range[0] 327 | end_y = peek_range[1] 328 | line_img = adaptive_threshold[start_y:end_y, :] 329 | vertical_sum = np.sum(line_img, axis=0) 330 | vertical_peek_ranges = extract_peek_ranges_from_array( 331 | vertical_sum, 332 | minimun_val=40, 333 | minimun_range=1) 334 | vertical_peek_ranges = median_split_ranges(vertical_peek_ranges) 335 | vertical_peek_ranges2d.append(vertical_peek_ranges) 336 | 337 | ## remove noise such as comma 338 | filtered_vertical_peek_ranges2d = [] 339 | for i, peek_range in enumerate(peek_ranges): 340 | new_peek_range = [] 341 | median_w = compute_median_w_from_ranges(vertical_peek_ranges2d[i]) 342 | for vertical_range in vertical_peek_ranges2d[i]: 343 | if vertical_range[1] - vertical_range[0] > median_w*0.7: 344 | new_peek_range.append(vertical_range) 345 | filtered_vertical_peek_ranges2d.append(new_peek_range) 346 | vertical_peek_ranges2d = filtered_vertical_peek_ranges2d 347 | 348 | 349 | char_imgs = [] 350 | crop_zeros = PreprocessCropZeros() 351 | resize_keep_ratio = PreprocessResizeKeepRatioFillBG( 352 | norm_width, norm_height, fill_bg=False, margin=4) 353 | for i, peek_range in enumerate(peek_ranges): 354 | for vertical_range in vertical_peek_ranges2d[i]: 355 | x = vertical_range[0] 356 | y = peek_range[0] 357 | w = vertical_range[1] - x 358 | h = peek_range[1] - y 359 | char_img = adaptive_threshold[y:y+h+1, x:x+w+1] 360 | char_img = crop_zeros.do(char_img) 361 | char_img = resize_keep_ratio.do(char_img) 362 | char_imgs.append(char_img) 363 | 364 | np_char_imgs = np.asarray(char_imgs) 365 | 366 | output_tag_to_max_proba = caffe_cls.predict_cv2_imgs(np_char_imgs) 367 | 368 | ocr_res = "" 369 | for item in output_tag_to_max_proba: 370 | ocr_res += item[0][0] 371 | print(ocr_res.encode("utf-8")) 372 | 373 | if debug_dir is not None: 374 | path_adaptive_threshold = os.path.join(debug_dir, 375 | "adaptive_threshold.jpg") 376 | cv2.imwrite(path_adaptive_threshold, adaptive_threshold) 377 | seg_adaptive_threshold = cv2_color_img 378 | 379 | # color = (255, 0, 0) 380 | # for rect in rects: 381 | # x, y, w, h = rect 382 | # pt1 = (x, y) 383 | # pt2 = (x + w, y + h) 384 | # cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color) 385 | 386 | color = (0, 255, 0) 387 | for i, peek_range in enumerate(peek_ranges): 388 | for vertical_range in vertical_peek_ranges2d[i]: 389 | x = vertical_range[0] 390 | y = peek_range[0] 391 | w = vertical_range[1] - x 392 | h = peek_range[1] - y 393 | pt1 = (x, y) 394 | pt2 = (x + w, y + h) 395 | cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color) 396 | 397 | path_seg_adaptive_threshold = os.path.join(debug_dir, 398 | "seg_adaptive_threshold.jpg") 399 | cv2.imwrite(path_seg_adaptive_threshold, seg_adaptive_threshold) 400 | 401 | debug_dir_chars = os.path.join(debug_dir, "chars") 402 | os.makedirs(debug_dir_chars) 403 | for i, char_img in enumerate(char_imgs): 404 | path_char = os.path.join(debug_dir_chars, "%d.jpg" % i) 405 | cv2.imwrite(path_char, char_img) 406 | 407 | 408 | -------------------------------------------------------------------------------- /solver.prototxt: -------------------------------------------------------------------------------- 1 | # The train/test net protocol buffer definition 2 | net: "./lenet_train_test.prototxt" 3 | # test_iter specifies how many forward passes the test should carry out. 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 5 | # covering the full 10,000 testing images. 6 | test_iter: 100 7 | # Carry out testing every 500 training iterations. 8 | test_interval: 500 9 | # The base learning rate, momentum and the weight decay of the network. 10 | base_lr: 0.01 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | # The learning rate policy 14 | lr_policy: "inv" 15 | gamma: 0.0001 16 | power: 0.75 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 50000 21 | # snapshot intermediate results 22 | snapshot: 5000 23 | snapshot_prefix: "./lenet" 24 | # solver mode: CPU or GPU 25 | solver_mode: GPU 26 | --------------------------------------------------------------------------------