├── .gitignore
├── README.md
├── bin
    ├── deep_ocr_id_card_reco
    ├── deep_ocr_id_card_segmentation
    ├── deep_ocr_make_caffe_dataset
    └── deep_ocr_reco_captcha
├── chinese_fonts
    ├── DroidSansFallbackFull.ttf
    ├── NotoSansHans-Black.otf
    ├── NotoSansHans-Bold.otf
    ├── NotoSansHans-DemiLight.otf
    ├── NotoSansHans-Light.otf
    ├── NotoSansHans-Medium.otf
    ├── NotoSansHans-Regular.otf
    ├── NotoSansHans-Thin-Windows.otf
    ├── fangzheng_fangsong.ttf
    ├── fangzheng_heiti.TTF
    ├── fangzheng_jieti.TTF
    ├── fangzheng_shusong.ttf
    └── mingliu.ttc
├── deep_idocr.py
├── deploy_lenet_train_test.prototxt
├── id_num_fonts
    ├── OCR-B.ttf
    ├── huawenxihei.ttf
    ├── msyh.ttf
    ├── simhei.ttf
    └── simkai.ttf
├── lenet_solver.prototxt
├── lenet_train_test.prototxt
├── lenet_train_test.prototxt.bak
├── python
    ├── .gitignore
    ├── deep_ocr
    │   ├── __init__.py
    │   ├── caffe_clf.py
    │   ├── captcha
    │   │   ├── __init__.py
    │   │   ├── char_segmentation.py
    │   │   ├── rm_noise.py
    │   │   └── search_best_segmentation.py
    │   ├── cv2_img_proc.py
    │   ├── id_cards
    │   │   ├── __init__.py
    │   │   ├── char_set.py
    │   │   └── segmentation.py
    │   ├── lang_aux.py
    │   ├── langs
    │   │   ├── __init__.py
    │   │   ├── chi_sim.py
    │   │   ├── chi_tra.py
    │   │   ├── digits.py
    │   │   ├── eng.py
    │   │   ├── id_num.py
    │   │   ├── lower_eng.py
    │   │   ├── test.py
    │   │   └── upper_eng.py
    │   ├── reco_text_line.py
    │   └── utils.py
    ├── deep_ocr_id_card_reco
    ├── deep_ocr_id_card_reco.py
    ├── deep_ocr_make_caffe_dataset
    ├── deep_ocr_reco_captcha
    ├── get_dataset.sh
    ├── make_caffe_dataset.py
    ├── reco_chars.py
    ├── test_id_card_reco.py
    ├── test_model.py
    └── test_reco.py
├── reco_chars.py
└── solver.prototxt


/.gitignore:
--------------------------------------------------------------------------------
1 | ./.idea
2 | python/.idea
3 | *.pyc
4 | .idea/*
5 | .idea


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deepLearning_OCR
 2 | Deep Learning on 身份证识别    
 3 | ## Get dataset
 4 | >deep_ocr_make_caffe_dataset --out_caffe_dir /workspace/caffe_dataset_lower_eng \
 5 |                    --font_dir /opt/deep_ocr/data/fonts/chinese_fonts \
 6 |                    --width 28 --height 28 --margin 4 --langs lower_eng
 7 | ## trained model
 8 | link：  https://pan.baidu.com/s/1YCGVZENzlubH6G1mXEElOw        
 9 | 提取码：baoo 
10 | ## refer
11 | For details, please refer to  http://www.cnblogs.com/ygh1229/p/7224940.html       
12 | Some work reference http://chongdata.com/articles/?p=5       
13 | If it involves infringement, please tell the author.
14 | 
15 | ## new Method
16 | - CTPN: https://github.com/tianzhi0549/CTPN
17 | 


--------------------------------------------------------------------------------
/bin/deep_ocr_id_card_reco:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | import argparse
  7 | from argparse import RawTextHelpFormatter
  8 | import os
  9 | import shutil
 10 | import cv2
 11 | from deep_ocr.caffe_clf import CaffeClsBuilder
 12 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
 13 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
 14 | from deep_ocr.id_cards.segmentation import Segmentation
 15 | from deep_ocr.id_cards.char_set import CharSet
 16 | from deep_ocr.reco_text_line import RecoTextLine
 17 | from deep_ocr.reco_text_line import RectImageClassifier
 18 | 
 19 | 
 20 | 
 21 | if __name__ == "__main__":
 22 | 
 23 |     description = '''
 24 |         # Docker config
 25 |         CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model
 26 |         DEEP_OCR_ROOT=/opt/deep_ocr
 27 |         WORKSPACE=/workspace
 28 | 
 29 |         # PC
 30 |         CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model
 31 |         DEEP_OCR_ROOT=/root/workspace/deep_ocr
 32 |         WORKSPACE=/root/data/deep_ocr_workspace
 33 | 
 34 |         deep_ocr_id_card_reco --img $DEEP_OCR_ROOT/data/id_card_img.jpg \
 35 |             --debug_path /tmp/debug \
 36 |             --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \
 37 |             --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64
 38 | 
 39 |         deep_ocr_id_card_reco --img ~/data/id_card_front \
 40 |             --debug_path /tmp/debug \
 41 |             --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \
 42 |             --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64
 43 |     '''
 44 | 
 45 |     parser = argparse.ArgumentParser(
 46 |         description=description, formatter_class=RawTextHelpFormatter)
 47 |     parser.add_argument('--img', dest='img',
 48 |                         default=None, required=True,
 49 |                         help='id card image to reco')
 50 |     parser.add_argument('--debug_path', dest='debug_path',
 51 |                         default=None, required=False,
 52 |                         help='debug path')
 53 |     parser.add_argument('--cls_sim', dest='cls_sim',
 54 |                         default=None, required=False,
 55 |                         help='cls sim')
 56 |     parser.add_argument('--cls_ua', dest='cls_ua',
 57 |                         default=None, required=False,
 58 |                         help='cls ua')
 59 | 
 60 |     options = parser.parse_args()
 61 |     path_img = os.path.expanduser(options.img)
 62 |     debug_path = os.path.expanduser(options.debug_path)
 63 |     if debug_path is not None:
 64 |         if os.path.isdir(debug_path):
 65 |             shutil.rmtree(debug_path)
 66 |         os.makedirs(debug_path)
 67 | 
 68 |     cls_dir_sim = os.path.expanduser(options.cls_sim)
 69 |     cls_dir_ua = os.path.expanduser(options.cls_ua)
 70 | 
 71 |     caffe_cls_builder = CaffeClsBuilder()
 72 |     cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,)
 73 |     cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,)
 74 |     caffe_classifiers = {"sim": cls_sim, "ua": cls_ua}
 75 | 
 76 |     seg_norm_width = 600
 77 |     seg_norm_height = 600
 78 |     preprocess_resize = PreprocessResizeKeepRatio(
 79 |         seg_norm_width, seg_norm_height)
 80 |     id_card_img = cv2.imread(path_img)
 81 |     id_card_img = preprocess_resize.do(id_card_img)    
 82 |     segmentation = Segmentation(debug_path)
 83 |     key_to_segmentation = segmentation.do(id_card_img)
 84 | 
 85 |     boundaries = [
 86 |         ((0, 0, 0), (100, 100, 100)),
 87 |     ]
 88 |     boundary2binimgs = []
 89 |     for boundary in boundaries:
 90 |         preprocess_bg_mask = PreprocessBackgroundMask(boundary)
 91 |         id_card_img_mask = preprocess_bg_mask.do(id_card_img)
 92 |         boundary2binimgs.append((boundary, id_card_img_mask))
 93 | 
 94 |     char_set = CharSet()
 95 |     char_set_data = char_set.get()
 96 | 
 97 |     rect_img_clf = RectImageClassifier(
 98 |         None,
 99 |         None,
100 |         char_set,
101 |         caffe_cls_width=64,
102 |         caffe_cls_height=64)
103 | 
104 |     reco_text_line = RecoTextLine(rect_img_clf)
105 | 
106 |     key_ocr_res = {}
107 |     for key in key_to_segmentation:
108 |         key_ocr_res[key] = []
109 |         print("="*64)
110 |         print(key)
111 |         for i, segment in enumerate(key_to_segmentation[key]):
112 |             if debug_path is not None:
113 |                 line_debug_path = "key_%s_%i" % (key, i)
114 |                 line_debug_path = os.path.join(debug_path, line_debug_path)
115 |                 reco_text_line.debug_path = line_debug_path
116 |             reco_text_line.char_set = char_set_data[key]
117 |             caffe_cls = caffe_classifiers[
118 |                 char_set_data[key]["caffe_cls"]]
119 |             ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls)
120 |             key_ocr_res[key].append(ocr_res)
121 |     print("ocr res:")
122 |     for key in key_ocr_res:
123 |         print("="*60)
124 |         print(key)
125 |         for res_i in key_ocr_res[key]:
126 |             print(res_i.encode("utf-8"))
127 | 
128 |     if debug_path is not None:
129 |         path_debug_image_mask = os.path.join(
130 |             debug_path, "reco_debug_01_image_mask.jpg")
131 |         cv2.imwrite(path_debug_image_mask, id_card_img_mask)
132 | 


--------------------------------------------------------------------------------
/bin/deep_ocr_id_card_segmentation:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | 
 7 | import argparse
 8 | from argparse import RawTextHelpFormatter
 9 | import os
10 | import shutil
11 | import cv2
12 | import numpy as np
13 | 
14 | from deep_ocr.utils import extract_peek_ranges_from_array
15 | from deep_ocr.utils import median_split_ranges
16 | from deep_ocr.utils import merge_chars_into_line_segments
17 | 
18 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
19 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
20 | 
21 | from deep_ocr.id_cards.segmentation import Segmentation
22 | 
23 | 
24 | 
25 | if __name__ == "__main__":
26 | 
27 |     description = '''
28 |         # Docker config
29 |         CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model
30 |         DEEP_OCR_ROOT=/opt/deep_ocr
31 | 
32 |         # PC
33 |         CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model
34 |         DEEP_OCR_ROOT=/root/workspace/deep_ocr
35 | 
36 |         deep_ocr_id_card_segmentation --img $DEEP_OCR_ROOT/data/id_card_img.jpg \
37 |             --debug_path /tmp/debug
38 | 
39 |         deep_ocr_id_card_segmentation --img ~/data/id_card_front \
40 |             --debug_path /tmp/debug
41 |     '''
42 | 
43 |     parser = argparse.ArgumentParser(
44 |         description=description, formatter_class=RawTextHelpFormatter)
45 |     parser.add_argument('--img', dest='img',
46 |                         default=None, required=True,
47 |                         help='id card image to reco')
48 |     parser.add_argument('--debug_path', dest='debug_path',
49 |                         default=None, required=False,
50 |                         help='debug path')
51 |     options = parser.parse_args()
52 | 
53 |     path_img = os.path.expanduser(options.img)
54 |     debug_path = os.path.expanduser(options.debug_path)
55 |     if debug_path is not None:
56 |         if os.path.isdir(debug_path):
57 |             shutil.rmtree(debug_path)
58 |         os.makedirs(debug_path)
59 |     norm_width = 600
60 |     norm_height = 600
61 | 
62 |     if os.path.isfile(path_img):
63 |         id_card_img = cv2.imread(path_img)
64 |         preprocess_resize = PreprocessResizeKeepRatio(norm_width, norm_height)
65 |         id_card_img = preprocess_resize.do(id_card_img)    
66 |         segmentation = Segmentation(debug_path)
67 |         segmentation.do(id_card_img)
68 |     elif os.path.isdir(path_img):
69 |         filenames = os.listdir(path_img)
70 |         for filename in filenames:
71 |             if filename.lower().endswith(".jpg"):
72 |                 print("process filename=", filename)
73 |                 debug_path_filename = None
74 |                 if debug_path is not None:
75 |                     debug_path_filename = os.path.join(debug_path, filename)
76 |                 each_path_img = os.path.join(path_img, filename)
77 |                 id_card_img = cv2.imread(each_path_img)
78 |                 preprocess_resize = PreprocessResizeKeepRatio(norm_width, norm_height)
79 |                 id_card_img = preprocess_resize.do(id_card_img)    
80 |                 segmentation = Segmentation(debug_path_filename)
81 |                 segmentation.do(id_card_img)


--------------------------------------------------------------------------------
/bin/deep_ocr_make_caffe_dataset:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | 
  7 | import argparse
  8 | from argparse import RawTextHelpFormatter
  9 | import fnmatch
 10 | import os
 11 | import cv2
 12 | import json
 13 | import random
 14 | import numpy as np
 15 | import shutil
 16 | from deep_ocr.lang_aux import LangCharsGenerate
 17 | from deep_ocr.lang_aux import FontCheck
 18 | from deep_ocr.lang_aux import Font2Image
 19 | 
 20 | 
 21 | 
 22 | if __name__ == "__main__":
 23 | 
 24 |     description = '''
 25 |         deep_ocr_make_caffe_dataset --out_caffe_dir /root/data/caffe_dataset \
 26 |             --font_dir /root/workspace/deep_ocr_fonts/chinese_fonts \
 27 |             --width 30 --height 30 --margin 4 --langs lower_eng
 28 |     '''
 29 | 
 30 |     parser = argparse.ArgumentParser(
 31 |         description=description, formatter_class=RawTextHelpFormatter)
 32 |     parser.add_argument('--out_caffe_dir', dest='out_caffe_dir',
 33 |                         default=None, required=True,
 34 |                         help='write a caffe dir')
 35 |     parser.add_argument('--font_dir', dest='font_dir',
 36 |                         default=None, required=True,
 37 |                         help='font dir to to produce images')
 38 |     parser.add_argument('--test_ratio', dest='test_ratio',
 39 |                         default=0.3, required=False,
 40 |                         help='test dataset size')
 41 |     parser.add_argument('--width', dest='width',
 42 |                         default=None, required=True,
 43 |                         help='width')
 44 |     parser.add_argument('--height', dest='height',
 45 |                         default=None, required=True,
 46 |                         help='height')
 47 |     parser.add_argument('--no_crop', dest='no_crop',
 48 |                         default=True, required=False,
 49 |                         help='', action='store_true')
 50 |     parser.add_argument('--margin', dest='margin',
 51 |                         default=0, required=False,
 52 |                         help='', )
 53 |     parser.add_argument('--langs', dest='langs',
 54 |                         default="chi_sim", required=True,
 55 |                         help='deep_ocr.langs.*, e.g. chi_sim, chi_tra, digits...')
 56 |     options = parser.parse_args()
 57 | 
 58 |     out_caffe_dir = os.path.expanduser(options.out_caffe_dir)
 59 |     font_dir = os.path.expanduser(options.font_dir)
 60 |     test_ratio = float(options.test_ratio)
 61 |     width = int(options.width)
 62 |     height = int(options.height)
 63 |     need_crop = not options.no_crop
 64 |     margin = int(options.margin)
 65 |     langs = options.langs
 66 | 
 67 |     image_dir_name = "images"
 68 | 
 69 |     images_dir = os.path.join(out_caffe_dir, image_dir_name)
 70 |     if os.path.isdir(images_dir):
 71 |         shutil.rmtree(images_dir)
 72 |     os.makedirs(images_dir)
 73 |     
 74 |     lang_chars_gen = LangCharsGenerate(langs)
 75 |     lang_chars = lang_chars_gen.do()
 76 |     font_check = FontCheck(lang_chars)
 77 | 
 78 |     y_to_tag = {}
 79 |     y_tag_json_file = os.path.join(out_caffe_dir, "y_tag.json")
 80 |     y_tag_text_file = os.path.join(out_caffe_dir, "y_tag.txt")
 81 |     path_train = os.path.join(out_caffe_dir, "train.txt")
 82 |     path_test = os.path.join(out_caffe_dir, "test.txt")
 83 |     
 84 | 
 85 |     verified_font_paths = []
 86 |     ## search for file fonts
 87 |     for font_name in os.listdir(font_dir):
 88 |         path_font_file = os.path.join(font_dir, font_name)
 89 |         if font_check.do(path_font_file):
 90 |             verified_font_paths.append(path_font_file)
 91 | 
 92 |     train_list = []
 93 |     test_list = []
 94 |     max_train_i = int(len(verified_font_paths) * (1.0 - test_ratio))
 95 | 
 96 |     font2image = Font2Image(width, height, need_crop, margin)
 97 | 
 98 |     for i, verified_font_path in enumerate(verified_font_paths):
 99 |         is_train = True
100 |         if i >= max_train_i:
101 |             is_train = False
102 |         for j, char in enumerate(lang_chars):
103 |             if j not in y_to_tag:
104 |                 y_to_tag[j] = char
105 |             char_dir = os.path.join(images_dir, "%d" % j)
106 |             if not os.path.isdir(char_dir):
107 |                 os.makedirs(char_dir)
108 |             path_image = os.path.join(
109 |                 char_dir,
110 |                 "%d_%s.jpg" % (i, os.path.basename(verified_font_path)))
111 |             relative_path_image = os.path.join(
112 |                 image_dir_name, "%d"%j, 
113 |                 "%d_%s.jpg" % (i, os.path.basename(verified_font_path))
114 |             )
115 |             font2image.do(verified_font_path, char, path_image)
116 |             if is_train:
117 |                 train_list.append((relative_path_image, j))
118 |             else:
119 |                 test_list.append((relative_path_image, j))
120 | 
121 |     h_y_tag_json_file = open(y_tag_json_file, "w+")
122 |     json.dump(y_to_tag, h_y_tag_json_file)
123 |     h_y_tag_json_file.close()
124 | 
125 |     h_y_tag_text_file = open(y_tag_text_file, "w+")
126 |     for key in y_to_tag:
127 |         h_y_tag_text_file.write("%d %s\n" % (key, y_to_tag[key].encode("utf-8")))
128 |     h_y_tag_text_file.close()
129 | 
130 |     fout = open(path_train, "w+")
131 |     for item in train_list:
132 |         fout.write("%s %d\n" % (item[0], item[1]))
133 |     fout.close()
134 | 
135 |     fout = open(path_test, "w+")
136 |     for item in test_list:
137 |         fout.write("%s %d\n" % (item[0], item[1]))
138 |     fout.close()
139 | 


--------------------------------------------------------------------------------
/bin/deep_ocr_reco_captcha:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | 
  7 | import argparse
  8 | from argparse import RawTextHelpFormatter
  9 | import os
 10 | import shutil
 11 | import cv2
 12 | 
 13 | from deep_ocr.captcha.char_segmentation import CharSegmentation
 14 | from deep_ocr.captcha.search_best_segmentation import SearchBestSegmentation
 15 | from deep_ocr.caffe_clf import CaffeCls
 16 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
 17 | 
 18 | 
 19 | if __name__ == "__main__":
 20 | 
 21 |     description = '''
 22 |         # Docker config
 23 |         CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model
 24 |         DEEP_OCR_ROOT=/opt/deep_ocr
 25 |         
 26 |         # PC
 27 |         CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model
 28 |         DEEP_OCR_ROOT=/root/workspace/deep_ocr
 29 | 
 30 |         deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/captcha.png \
 31 |             --num_char 5 \
 32 |             --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \
 33 |             --caffe_network $CAFFE_MODEL/lenet.prototxt \
 34 |             --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \
 35 |             --caffe_img_w 28 --caffe_img_h 28 \
 36 |             --debug_path /tmp/debug_captcha
 37 | 
 38 |         deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/simple.png \
 39 |             --num_char 5 \
 40 |             --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \
 41 |             --caffe_network $CAFFE_MODEL/lenet.prototxt \
 42 |             --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \
 43 |             --caffe_img_w 28 --caffe_img_h 28 \
 44 |             --debug_path /tmp/debug_captcha
 45 | 
 46 |     '''
 47 | 
 48 |     parser = argparse.ArgumentParser(
 49 |         description=description, formatter_class=RawTextHelpFormatter)
 50 |     parser.add_argument('--captcha_img', dest='captcha_img',
 51 |                         default=None, required=True,
 52 |                         help='captcha image to reco')
 53 |     parser.add_argument('--num_char', dest='num_char',
 54 |                         default=None, required=True,
 55 |                         help='m_char')
 56 |     parser.add_argument('--caffe_model', dest='caffe_model',
 57 |                         default=None, required=True,
 58 |                         help='trained caffe model')
 59 |     parser.add_argument('--caffe_network', dest='caffe_network',
 60 |                         default=None, required=True,
 61 |                         help='caffe network')
 62 |     parser.add_argument('--y_tag', dest='y_tag',
 63 |                         default=None, required=True,
 64 |                         help='y_tag')
 65 |     parser.add_argument('--caffe_img_w', dest='caffe_img_w',
 66 |                         default=None, required=True,
 67 |                         help='caffe_img_w')
 68 |     parser.add_argument('--caffe_img_h', dest='caffe_img_h',
 69 |                         default=None, required=True,
 70 |                         help='caffe_img_h')
 71 |     parser.add_argument('--debug_path', dest='debug_path',
 72 |                         default=None, required=False,
 73 |                         help='debug path')
 74 |     options = parser.parse_args()
 75 | 
 76 |     captcha_img = os.path.expanduser(options.captcha_img)
 77 |     num_char = int(options.num_char)
 78 |     caffe_model = os.path.expanduser(options.caffe_model)
 79 |     caffe_network = os.path.expanduser(options.caffe_network)
 80 |     y_tag = os.path.expanduser(options.y_tag)
 81 |     caffe_img_w = int(options.caffe_img_w)
 82 |     caffe_img_h = int(options.caffe_img_h)
 83 |     norm_width = 200
 84 |     norm_height = 200
 85 | 
 86 |     debug_path = None
 87 |     if options.debug_path is not None:
 88 |         debug_path = os.path.expanduser(options.debug_path)
 89 |         if os.path.isdir(debug_path):
 90 |             shutil.rmtree(debug_path)
 91 |         os.makedirs(debug_path)
 92 | 
 93 |     image = cv2.imread(captcha_img)
 94 | 
 95 |     proc_keep_ratio = PreprocessResizeKeepRatio(
 96 |         width=norm_width, height=norm_height)
 97 |     image = proc_keep_ratio.do(image)
 98 | 
 99 |     char_segmentation = CharSegmentation(
100 |         num_char=num_char,
101 |         debug_path=debug_path)
102 |     segmentations = char_segmentation.do(image)
103 | 
104 |     caffe_cls = CaffeCls(caffe_network, caffe_model, y_tag,
105 |                          width=caffe_img_w, height=caffe_img_h)
106 | 
107 |     search_best_segmentation = SearchBestSegmentation(
108 |         caffe_cls, char_segmentation.bin_img,
109 |         debug_path)
110 |     eval_segmentations = search_best_segmentation.do(segmentations)
111 | 
112 |     n_top = 100
113 |     for i, eval_segmentation in enumerate(eval_segmentations):
114 |         if i > n_top:
115 |             break
116 |         print(eval_segmentation)


--------------------------------------------------------------------------------
/chinese_fonts/DroidSansFallbackFull.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/DroidSansFallbackFull.ttf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-Black.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Black.otf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-Bold.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Bold.otf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-DemiLight.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-DemiLight.otf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-Light.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Light.otf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Medium.otf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Regular.otf


--------------------------------------------------------------------------------
/chinese_fonts/NotoSansHans-Thin-Windows.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/NotoSansHans-Thin-Windows.otf


--------------------------------------------------------------------------------
/chinese_fonts/fangzheng_fangsong.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_fangsong.ttf


--------------------------------------------------------------------------------
/chinese_fonts/fangzheng_heiti.TTF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_heiti.TTF


--------------------------------------------------------------------------------
/chinese_fonts/fangzheng_jieti.TTF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_jieti.TTF


--------------------------------------------------------------------------------
/chinese_fonts/fangzheng_shusong.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/fangzheng_shusong.ttf


--------------------------------------------------------------------------------
/chinese_fonts/mingliu.ttc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/chinese_fonts/mingliu.ttc


--------------------------------------------------------------------------------
/deep_idocr.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | from argparse import RawTextHelpFormatter
  7 | import os
  8 | import shutil
  9 | import cv2
 10 | from deep_ocr.caffe_clf import CaffeClsBuilder
 11 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
 12 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
 13 | from deep_ocr.id_cards.segmentation import Segmentation
 14 | from deep_ocr.id_cards.char_set import CharSet
 15 | from deep_ocr.reco_text_line import RecoTextLine
 16 | from deep_ocr.reco_text_line import RectImageClassifier
 17 | 
 18 | import json
 19 | from flask import Flask
 20 | from flask import request
 21 | from flask import redirect
 22 | from flask import jsonify
 23 | app = Flask(__name__)
 24 | 
 25 | @app.route('/upload', methods=['GET', 'POST'])
 26 | def upload_file():
 27 | 	if request.method == 'POST':
 28 | 		f = request.files['file']
 29 | 		f.save('/home/ygh/flask/id_card_img.jpg')
 30 | 		
 31 | 		## path_img = os.path.expanduser("/home/ygh/deep_ocr/data/id_card_img.jpg")
 32 | 		path_img = os.path.expanduser("/home/ygh/flask/id_card_img.jpg")
 33 | 		debug_path = os.path.expanduser("/home/ygh/deep_ocr_workspace/debug")
 34 | 		if debug_path is not None:
 35 | 			if os.path.isdir(debug_path):
 36 | 				shutil.rmtree(debug_path)
 37 | 			os.makedirs(debug_path)
 38 | 
 39 |     	cls_dir_sim = os.path.expanduser("/home/ygh/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64")
 40 |     	cls_dir_ua = os.path.expanduser("/home/ygh/deep_ocr_workspace/data/chongdata_train_ualpha_digits_64_64")
 41 | 
 42 |     	caffe_cls_builder = CaffeClsBuilder()
 43 |     	cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,)
 44 |     	cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,)
 45 |     	caffe_classifiers = {"sim": cls_sim, "ua": cls_ua}
 46 | 
 47 |     	seg_norm_width = 600
 48 |     	seg_norm_height = 600
 49 |     	preprocess_resize = PreprocessResizeKeepRatio(
 50 |         	seg_norm_width, seg_norm_height)
 51 |     	id_card_img = cv2.imread(path_img)
 52 |     	id_card_img = preprocess_resize.do(id_card_img)    
 53 |     	segmentation = Segmentation(debug_path)
 54 |     	key_to_segmentation = segmentation.do(id_card_img)
 55 | 
 56 |     	boundaries = [
 57 |         	((0, 0, 0), (100, 100, 100)),
 58 |     	]
 59 |     	boundary2binimgs = []
 60 |     	for boundary in boundaries:
 61 |         	preprocess_bg_mask = PreprocessBackgroundMask(boundary)
 62 |         	id_card_img_mask = preprocess_bg_mask.do(id_card_img)
 63 |         	boundary2binimgs.append((boundary, id_card_img_mask))
 64 | 
 65 |     	char_set = CharSet()
 66 |     	char_set_data = char_set.get()
 67 | 
 68 |     	rect_img_clf = RectImageClassifier(
 69 |         	None,
 70 |         	None,
 71 |         	char_set,
 72 |         	caffe_cls_width=64,
 73 |         	caffe_cls_height=64)
 74 | 
 75 |     	reco_text_line = RecoTextLine(rect_img_clf)
 76 | 
 77 |     	key_ocr_res = {}
 78 |     	for key in key_to_segmentation:
 79 |         	key_ocr_res[key] = []
 80 |         	print("="*64)
 81 |         	print(key)
 82 |         	for i, segment in enumerate(key_to_segmentation[key]):
 83 | 				if debug_path is not None:
 84 | 					line_debug_path = "key_%s_%i" % (key, i)
 85 | 					line_debug_path = os.path.join(debug_path, line_debug_path)
 86 | 					reco_text_line.debug_path = line_debug_path
 87 | 				reco_text_line.char_set = char_set_data[key]
 88 |                 ## 初始化模型
 89 | 				caffe_cls = caffe_classifiers[
 90 | 					char_set_data[key]["caffe_cls"]]
 91 |                 ## 输入到模型中进行识别
 92 | 				ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls)
 93 |                 ## 将结果输出到列表中
 94 | 				key_ocr_res[key].append(ocr_res)
 95 |     	print("ocr res:")
 96 |     	for key in key_ocr_res:
 97 |         	print("="*60)
 98 |         	print(key)
 99 |         	for res_i in key_ocr_res[key]:
100 | 				print(res_i.encode("utf-8"))
101 |     	if debug_path is not None:
102 |         	path_debug_image_mask = os.path.join(
103 | 				debug_path, "reco_debug_01_image_mask.jpg")
104 | 		cv2.imwrite(path_debug_image_mask, id_card_img_mask)
105 | 
106 |         ## 返回结果 将其封装成json的键值对的格式
107 | 		data = [{"result":"sucess","response":{"name":key_ocr_res["name"],"address":key_ocr_res["address"],"month":key_ocr_res["month"],"minzu":key_ocr_res["minzu"],"year":key_ocr_res["year"],"sex":key_ocr_res["sex"],"id":key_ocr_res["id"],"day":key_ocr_res["day"]}}]
108 | 		## data = '{"result":"sucess"}
109 | 		## result = json.loads(data)
110 | 		return json.dumps(data,skipkeys=True,ensure_ascii=False,encoding="utf-8")
111 | 	else:
112 | 		data2 = [{"result":"error"}]
113 | 		## result2 = json.loads(data2)
114 | 		return json.dumps(data2)
115 | 		## return "error"
116 | 		
117 | 
118 | 
119 | 
120 | if __name__ == '__main__':
121 | 	app.run(host='0.0.0.0',port=8880)
122 | 


--------------------------------------------------------------------------------
/deploy_lenet_train_test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim:1 dim: 1 dim: 64 dim: 64 } }
  7 | }
  8 | layer {
  9 |   name: "conv11"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv11"
 13 |   param {
 14 |     lr_mult: 1
 15 |   }
 16 |   param {
 17 |     lr_mult: 2
 18 |   }
 19 |   convolution_param {
 20 |     num_output: 64
 21 |     pad: 5
 22 |     kernel_size: 11
 23 |     stride: 1
 24 |     weight_filler {
 25 |       type: "xavier"
 26 |     }
 27 |     bias_filler {
 28 |       type: "constant"
 29 |     }
 30 |   }
 31 | }
 32 | layer {
 33 |   name: "pool11"
 34 |   type: "Pooling"
 35 |   bottom: "conv11"
 36 |   top: "pool11"
 37 |   pooling_param {
 38 |     pool: MAX
 39 |     kernel_size: 2
 40 |     stride: 2
 41 |   }
 42 | }
 43 | layer {
 44 |   name: "conv7"
 45 |   type: "Convolution"
 46 |   bottom: "pool11"
 47 |   top: "conv7"
 48 |   param {
 49 |     lr_mult: 1
 50 |   }
 51 |   param {
 52 |     lr_mult: 2
 53 |   }
 54 |   convolution_param {
 55 |     num_output: 128
 56 |     pad: 3
 57 |     kernel_size: 7
 58 |     stride: 1
 59 |     weight_filler {
 60 |       type: "xavier"
 61 |     }
 62 |     bias_filler {
 63 |       type: "constant"
 64 |     }
 65 |   }
 66 | }
 67 | layer {
 68 |   name: "pool7"
 69 |   type: "Pooling"
 70 |   bottom: "conv7"
 71 |   top: "pool7"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv5"
 80 |   type: "Convolution"
 81 |   bottom: "pool7"
 82 |   top: "conv5"
 83 |   param {
 84 |     lr_mult: 1
 85 |   }
 86 |   param {
 87 |     lr_mult: 2
 88 |   }
 89 |   convolution_param {
 90 |     num_output: 256
 91 |     pad: 2
 92 |     kernel_size: 5
 93 |     stride: 1
 94 |     weight_filler {
 95 |       type: "xavier"
 96 |     }
 97 |     bias_filler {
 98 |       type: "constant"
 99 |     }
100 |   }
101 | }
102 | layer {
103 |   name: "pool5"
104 |   type: "Pooling"
105 |   bottom: "conv5"
106 |   top: "pool5"
107 |   pooling_param {
108 |     pool: MAX
109 |     kernel_size: 2
110 |     stride: 2
111 |   }
112 | }
113 | layer {
114 |   name: "conv3"
115 |   type: "Convolution"
116 |   bottom: "pool5"
117 |   top: "conv3"
118 |   param {
119 |     lr_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |     stride: 1
129 |     weight_filler {
130 |       type: "xavier"
131 |     }
132 |     bias_filler {
133 |       type: "constant"
134 |     }
135 |   }
136 | }
137 | layer {
138 |   name: "pool3"
139 |   type: "Pooling"
140 |   bottom: "conv3"
141 |   top: "pool3"
142 |   pooling_param {
143 |     pool: MAX
144 |     kernel_size: 2
145 |     stride: 2
146 |   }
147 | }
148 | layer {
149 |   name: "fc10000"
150 |   type: "InnerProduct"
151 |   # learning rate and decay multipliers for the weights
152 |   param { lr_mult: 1 }
153 |   # learning rate and decay multipliers for the biases
154 |   param { lr_mult: 2 }
155 |   inner_product_param {
156 |     num_output: 10000
157 |     weight_filler {
158 |       type: "gaussian"
159 |       std: 0.01
160 |     }
161 |     bias_filler {
162 |       type: "constant"
163 |       value: 0
164 |     }
165 |   }
166 |   bottom: "pool3"
167 |   top: "fc10000"
168 | }
169 | layer {
170 |   name: "relu1"
171 |   type: "ReLU"
172 |   bottom: "fc10000"
173 |   top: "fc10000"
174 | }
175 | layer {
176 |   name: "fc6503"
177 |   type: "InnerProduct"
178 |   # learning rate and decay multipliers for the weights
179 |   param { lr_mult: 1 }
180 |   # learning rate and decay multipliers for the biases
181 |   param { lr_mult: 2 }
182 |   inner_product_param {
183 |     num_output: 6503
184 |     weight_filler {
185 |       type: "gaussian"
186 |       std: 0.01
187 |     }
188 |     bias_filler {
189 |       type: "constant"
190 |       value: 0
191 |     }
192 |   }
193 |   bottom: "fc10000"
194 |   top: "fc6503"
195 | }
196 | layer {
197 |   name: "prob"
198 |   type: "Softmax"
199 |   bottom: "fc6503"
200 |   top: "prob"
201 | }   
202 | 


--------------------------------------------------------------------------------
/id_num_fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/id_num_fonts/huawenxihei.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/huawenxihei.ttf


--------------------------------------------------------------------------------
/id_num_fonts/msyh.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/msyh.ttf


--------------------------------------------------------------------------------
/id_num_fonts/simhei.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/simhei.ttf


--------------------------------------------------------------------------------
/id_num_fonts/simkai.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/id_num_fonts/simkai.ttf


--------------------------------------------------------------------------------
/lenet_solver.prototxt:
--------------------------------------------------------------------------------
 1 | # The train/test net protocol buffer definition
 2 | net: "examples/dunhe_train_char_cn_sim_digits_64_64/lenet_train_test.prototxt"
 3 | # test_iter specifies how many forward passes the test should carry out.
 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 5 | # covering the full 10,000 testing images.
 6 | test_iter: 100
 7 | # Carry out testing every 500 training iterations.
 8 | test_interval: 500
 9 | # The base learning rate, momentum and the weight decay of the network.
10 | base_lr: 0.01
11 | momentum: 0.9
12 | weight_decay: 0.0005
13 | # The learning rate policy
14 | lr_policy: "inv"
15 | gamma: 0.0001
16 | power: 0.75
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 50000
21 | # snapshot intermediate results
22 | snapshot: 5000
23 | snapshot_prefix: "examples/dunhe_train_char_cn_sim_digits_64_64/lenet"
24 | # solver mode: CPU or GPU
25 | solver_mode: GPU
26 | 


--------------------------------------------------------------------------------
/lenet_train_test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     scale: 0.00390625
 12 |   }
 13 |   data_param {
 14 |     source: "/home/user/Projects/data/caffe_dataset_cn_sim/train_lmdb"
 15 |     batch_size: 64
 16 |     backend: LMDB
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "mnist"
 21 |   type: "Data"
 22 |   top: "data"
 23 |   top: "label"
 24 |   include {
 25 |     phase: TEST
 26 |   }
 27 |   transform_param {
 28 |     scale: 0.00390625
 29 |   }
 30 |   data_param {
 31 |     source: "/home/user/Projects/data/caffe_dataset_cn_sim/val_lmdb"
 32 |     batch_size: 10
 33 |     backend: LMDB
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "conv11"
 38 |   type: "Convolution"
 39 |   bottom: "data"
 40 |   top: "conv11"
 41 |   param {
 42 |     lr_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |   }
 47 |   convolution_param {
 48 |     num_output: 64
 49 |     pad: 5
 50 |     kernel_size: 11
 51 |     stride: 1
 52 |     weight_filler {
 53 |       type: "xavier"
 54 |     }
 55 |     bias_filler {
 56 |       type: "constant"
 57 |     }
 58 |   }
 59 | }
 60 | layer {
 61 |   name: "pool11"
 62 |   type: "Pooling"
 63 |   bottom: "conv11"
 64 |   top: "pool11"
 65 |   pooling_param {
 66 |     pool: MAX
 67 |     kernel_size: 2
 68 |     stride: 2
 69 |   }
 70 | }
 71 | layer {
 72 |   name: "conv7"
 73 |   type: "Convolution"
 74 |   bottom: "pool11"
 75 |   top: "conv7"
 76 |   param {
 77 |     lr_mult: 1
 78 |   }
 79 |   param {
 80 |     lr_mult: 2
 81 |   }
 82 |   convolution_param {
 83 |     num_output: 128
 84 |     pad: 3
 85 |     kernel_size: 7
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool7"
 97 |   type: "Pooling"
 98 |   bottom: "conv7"
 99 |   top: "pool7"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "conv5"
108 |   type: "Convolution"
109 |   bottom: "pool7"
110 |   top: "conv5"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   convolution_param {
118 |     num_output: 256
119 |     pad: 2
120 |     kernel_size: 5
121 |     stride: 1
122 |     weight_filler {
123 |       type: "xavier"
124 |     }
125 |     bias_filler {
126 |       type: "constant"
127 |     }
128 |   }
129 | }
130 | layer {
131 |   name: "pool5"
132 |   type: "Pooling"
133 |   bottom: "conv5"
134 |   top: "pool5"
135 |   pooling_param {
136 |     pool: MAX
137 |     kernel_size: 2
138 |     stride: 2
139 |   }
140 | }
141 | layer {
142 |   name: "conv3"
143 |   type: "Convolution"
144 |   bottom: "pool5"
145 |   top: "conv3"
146 |   param {
147 |     lr_mult: 1
148 |   }
149 |   param {
150 |     lr_mult: 2
151 |   }
152 |   convolution_param {
153 |     num_output: 512
154 |     pad: 1
155 |     kernel_size: 3
156 |     stride: 1
157 |     weight_filler {
158 |       type: "xavier"
159 |     }
160 |     bias_filler {
161 |       type: "constant"
162 |     }
163 |   }
164 | }
165 | layer {
166 |   name: "pool3"
167 |   type: "Pooling"
168 |   bottom: "conv3"
169 |   top: "pool3"
170 |   pooling_param {
171 |     pool: MAX
172 |     kernel_size: 2
173 |     stride: 2
174 |   }
175 | }
176 | layer {
177 |   name: "fc10000"
178 |   type: "InnerProduct"
179 |   # learning rate and decay multipliers for the weights
180 |   param { lr_mult: 1 }
181 |   # learning rate and decay multipliers for the biases
182 |   param { lr_mult: 2 }
183 |   inner_product_param {
184 |     num_output: 10000
185 |     weight_filler {
186 |       type: "gaussian"
187 |       std: 0.01
188 |     }
189 |     bias_filler {
190 |       type: "constant"
191 |       value: 0
192 |     }
193 |   }
194 |   bottom: "pool3"
195 |   top: "fc10000"
196 | }
197 | layer {
198 |   name: "relu1"
199 |   type: "ReLU"
200 |   bottom: "fc10000"
201 |   top: "fc10000"
202 | }
203 | layer {
204 |   name: "fc6492"
205 |   type: "InnerProduct"
206 |   # learning rate and decay multipliers for the weights
207 |   param { lr_mult: 1 }
208 |   # learning rate and decay multipliers for the biases
209 |   param { lr_mult: 2 }
210 |   inner_product_param {
211 |     num_output: 6492
212 |     weight_filler {
213 |       type: "gaussian"
214 |       std: 0.01
215 |     }
216 |     bias_filler {
217 |       type: "constant"
218 |       value: 0
219 |     }
220 |   }
221 |   bottom: "fc10000"
222 |   top: "fc6492"
223 | }
224 | layer {
225 |   name: "accuracy"
226 |   type: "Accuracy"
227 |   bottom: "fc6492"
228 |   bottom: "label"
229 |   top: "accuracy"
230 |   include {
231 |     phase: TEST
232 |   }
233 | }
234 | layer {
235 |   name: "loss"
236 |   type: "SoftmaxWithLoss"
237 |   bottom: "fc6492"
238 |   bottom: "label"
239 |   top: "loss"
240 | }
241 | 


--------------------------------------------------------------------------------
/lenet_train_test.prototxt.bak:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     scale: 0.00390625
 12 |   }
 13 |   data_param {
 14 |     source: "/home/user/Projects/data/caffe_dataset_cn_sim/train_lmdb"
 15 |     batch_size: 64
 16 |     backend: LMDB
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "mnist"
 21 |   type: "Data"
 22 |   top: "data"
 23 |   top: "label"
 24 |   include {
 25 |     phase: TEST
 26 |   }
 27 |   transform_param {
 28 |     scale: 0.00390625
 29 |   }
 30 |   data_param {
 31 |     source: "/home/user/Projects/data/caffe_dataset_cn_sim/val_lmdb"
 32 |     batch_size: 10
 33 |     backend: LMDB
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "conv11"
 38 |   type: "Convolution"
 39 |   bottom: "data"
 40 |   top: "conv11"
 41 |   param {
 42 |     lr_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |   }
 47 |   convolution_param {
 48 |     num_output: 64
 49 |     pad: 5
 50 |     kernel_size: 11
 51 |     stride: 1
 52 |     weight_filler {
 53 |       type: "xavier"
 54 |     }
 55 |     bias_filler {
 56 |       type: "constant"
 57 |     }
 58 |   }
 59 | }
 60 | layer {
 61 |   name: "pool11"
 62 |   type: "Pooling"
 63 |   bottom: "conv11"
 64 |   top: "pool11"
 65 |   pooling_param {
 66 |     pool: MAX
 67 |     kernel_size: 2
 68 |     stride: 2
 69 |   }
 70 | }
 71 | layer {
 72 |   name: "conv7"
 73 |   type: "Convolution"
 74 |   bottom: "pool11"
 75 |   top: "conv7"
 76 |   param {
 77 |     lr_mult: 1
 78 |   }
 79 |   param {
 80 |     lr_mult: 2
 81 |   }
 82 |   convolution_param {
 83 |     num_output: 128
 84 |     pad: 3
 85 |     kernel_size: 7
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool7"
 97 |   type: "Pooling"
 98 |   bottom: "conv7"
 99 |   top: "pool7"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "conv5"
108 |   type: "Convolution"
109 |   bottom: "pool7"
110 |   top: "conv5"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   convolution_param {
118 |     num_output: 256
119 |     pad: 2
120 |     kernel_size: 5
121 |     stride: 1
122 |     weight_filler {
123 |       type: "xavier"
124 |     }
125 |     bias_filler {
126 |       type: "constant"
127 |     }
128 |   }
129 | }
130 | layer {
131 |   name: "pool5"
132 |   type: "Pooling"
133 |   bottom: "conv5"
134 |   top: "pool5"
135 |   pooling_param {
136 |     pool: MAX
137 |     kernel_size: 2
138 |     stride: 2
139 |   }
140 | }
141 | layer {
142 |   name: "conv3"
143 |   type: "Convolution"
144 |   bottom: "pool5"
145 |   top: "conv3"
146 |   param {
147 |     lr_mult: 1
148 |   }
149 |   param {
150 |     lr_mult: 2
151 |   }
152 |   convolution_param {
153 |     num_output: 512
154 |     pad: 1
155 |     kernel_size: 3
156 |     stride: 1
157 |     weight_filler {
158 |       type: "xavier"
159 |     }
160 |     bias_filler {
161 |       type: "constant"
162 |     }
163 |   }
164 | }
165 | layer {
166 |   name: "pool3"
167 |   type: "Pooling"
168 |   bottom: "conv3"
169 |   top: "pool3"
170 |   pooling_param {
171 |     pool: MAX
172 |     kernel_size: 2
173 |     stride: 2
174 |   }
175 | }
176 | layer {
177 |   name: "fc10000"
178 |   type: "InnerProduct"
179 |   # learning rate and decay multipliers for the weights
180 |   param { lr_mult: 1 }
181 |   # learning rate and decay multipliers for the biases
182 |   param { lr_mult: 2 }
183 |   inner_product_param {
184 |     num_output: 10000
185 |     weight_filler {
186 |       type: "gaussian"
187 |       std: 0.01
188 |     }
189 |     bias_filler {
190 |       type: "constant"
191 |       value: 0
192 |     }
193 |   }
194 |   bottom: "pool3"
195 |   top: "fc10000"
196 | }
197 | layer {
198 |   name: "relu1"
199 |   type: "ReLU"
200 |   bottom: "fc10000"
201 |   top: "fc10000"
202 | }
203 | layer {
204 |   name: "fc6503"
205 |   type: "InnerProduct"
206 |   # learning rate and decay multipliers for the weights
207 |   param { lr_mult: 1 }
208 |   # learning rate and decay multipliers for the biases
209 |   param { lr_mult: 2 }
210 |   inner_product_param {
211 |     num_output: 6503
212 |     weight_filler {
213 |       type: "gaussian"
214 |       std: 0.01
215 |     }
216 |     bias_filler {
217 |       type: "constant"
218 |       value: 0
219 |     }
220 |   }
221 |   bottom: "fc10000"
222 |   top: "fc6503"
223 | }
224 | layer {
225 |   name: "accuracy"
226 |   type: "Accuracy"
227 |   bottom: "fc6503"
228 |   bottom: "label"
229 |   top: "accuracy"
230 |   include {
231 |     phase: TEST
232 |   }
233 | }
234 | layer {
235 |   name: "loss"
236 |   type: "SoftmaxWithLoss"
237 |   bottom: "fc6503"
238 |   bottom: "label"
239 |   top: "loss"
240 | }
241 | 


--------------------------------------------------------------------------------
/python/.gitignore:
--------------------------------------------------------------------------------
1 | ./.idea
2 | 


--------------------------------------------------------------------------------
/python/deep_ocr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/python/deep_ocr/__init__.py


--------------------------------------------------------------------------------
/python/deep_ocr/caffe_clf.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import caffe
 3 | import json
 4 | import numpy as np
 5 | import os
 6 | import cv2
 7 | import shutil
 8 | import copy
 9 | 
10 | class CaffeCls(object):
11 |     def __init__(self, 
12 |                  model_def,
13 |                  model_weights,
14 |                  y_tag_json_path,
15 |                  is_mode_cpu=True,
16 |                  width=64,
17 |                  height=64):
18 |         self.net = caffe.Net(model_def,
19 |             model_weights,
20 |             caffe.TEST)
21 |         if is_mode_cpu:
22 |             caffe.set_mode_cpu()
23 |         self.y_tag_json = json.load(open(y_tag_json_path, "r"))
24 |         self.width = width
25 |         self.height = height
26 | 
27 |     def predict_cv2_img(self, cv2_img):
28 |         shape = cv2_img.shape
29 |         cv2_imgs = cv2_img.reshape((1, shape[0], shape[1]))
30 |         return self.predict_cv2_imgs(cv2_imgs)[0]
31 | 
32 | 
33 |     def _predict_cv2_imgs_sub(self, cv2_imgs, pos_start, pos_end):
34 |         cv2_imgs_sub = cv2_imgs[pos_start: pos_end]
35 | 
36 |         self.net.blobs['data'].reshape(cv2_imgs_sub.shape[0], 1,
37 |                                        self.width, self.height)
38 |         self.net.blobs['data'].data[...] = cv2_imgs_sub.reshape(
39 |             (cv2_imgs_sub.shape[0], 1, self.width, self.height))
40 |         output = self.net.forward()
41 | 
42 |         output_tag_to_max_proba = []
43 | 
44 |         num_sample = cv2_imgs_sub.shape[0]
45 |         for i in range(num_sample):
46 |             output_prob = output['prob'][i]
47 |             output_prob_index = sorted(
48 |                 range(len(output_prob)),
49 |                 key=lambda x:output_prob[x],
50 |                 reverse=True)            
51 |             output_tag_to_probas = []
52 |             for index in output_prob_index:
53 |                 item = (self.y_tag_json[str(index)],
54 |                         output_prob[index])
55 |                 output_tag_to_probas.append(item)
56 |             # output_tag_to_probas = output_tag_to_probas[:2]
57 |             output_tag_to_max_proba.append(output_tag_to_probas)
58 |         return output_tag_to_max_proba
59 | 
60 |     def predict_cv2_imgs(self, cv2_imgs, step=50):
61 |         output_tag_to_max_proba = []
62 |         num_sample = cv2_imgs.shape[0]
63 |         for i in range(0, num_sample, step):
64 |             pos_end = min(num_sample, (i + step))
65 |             output_tag_to_max_proba += \
66 |                 self._predict_cv2_imgs_sub(cv2_imgs, i, pos_end)
67 |         return output_tag_to_max_proba
68 | 
69 | 
70 | class CaffeClsBuilder(object):
71 | 
72 |     def __init__(self,):
73 |         pass
74 | 
75 |     def build(self,
76 |               cls_dir,
77 |               is_mode_cpu=True,
78 |               width=64,
79 |               height=64):
80 |         model_def = os.path.join(cls_dir, "model_def.prototxt")
81 |         model_weights = os.path.join(cls_dir, "model_weights.caffemodel")
82 |         y_tag_json_path = os.path.join(cls_dir, "y_tag.json")
83 |         return CaffeCls(
84 |                  model_def=model_def,
85 |                  model_weights=model_weights,
86 |                  y_tag_json_path=y_tag_json_path,
87 |                  is_mode_cpu=is_mode_cpu,
88 |                  width=width,
89 |                  height=height)
90 |         


--------------------------------------------------------------------------------
/python/deep_ocr/captcha/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/python/deep_ocr/captcha/char_segmentation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import cv2
  4 | import os
  5 | import numpy as np
  6 | from scipy.signal import savgol_filter
  7 | from scipy.signal import argrelextrema
  8 | import itertools
  9 | 
 10 | class CharSegmentation(object):
 11 | 
 12 |     def __init__(self, num_char, debug_path = None):
 13 |         self.num_char = num_char
 14 |         self.debug_path = debug_path
 15 |         self.bin_img = None
 16 | 
 17 |     def find_lowest_nonzero_curve(self, bin_img):
 18 |         #cv2.imshow('bin_img', bin_img)
 19 |         #cv2.waitKey(0)
 20 |         #cv2.destroyAllWindows()
 21 |         height, width = bin_img.shape
 22 |         nonzero_curve = []
 23 |         for i in range(width):
 24 |             is_found = False
 25 |             for j in range(height):
 26 |                 if bin_img[j, i] != 0:
 27 |                     nonzero_curve.append(j)
 28 |                     is_found = True
 29 |                     break
 30 |             if not is_found:
 31 |                 nonzero_curve.append(height-1)
 32 |         return np.asarray(nonzero_curve)
 33 | 
 34 |     def merge_closest_points(self, min_x_coordinates, min_diff_x=10):
 35 |         ret = []
 36 |         n = len(min_x_coordinates)
 37 |         taken = [False] * n
 38 |         for i in range(n):
 39 |             if not taken[i]:
 40 |                 count = 1
 41 |                 point = min_x_coordinates[i]
 42 |                 taken[i] = True
 43 |                 for j in range(i+1, n):
 44 |                     if abs(min_x_coordinates[i] - min_x_coordinates[j]) < min_diff_x:
 45 |                         point += min_x_coordinates[j]
 46 |                         count+=1
 47 |                         taken[j] = True
 48 |                 point /= count
 49 |                 ret.append(point)
 50 |         return ret
 51 | 
 52 |     def remove_noise_by_contours(self, bin_img):
 53 |         c_bin_img = np.copy(bin_img)
 54 |         min_area = 100
 55 |         max_area = bin_img.shape[0] * bin_img.shape[1]
 56 |         min_w = 10
 57 |         min_h = 10
 58 |         if cv2.__version__[0] == "2":
 59 |             contours, hierarchy = cv2.findContours(
 60 |                 c_bin_img,
 61 |                 cv2.RETR_TREE,
 62 |                 cv2.CHAIN_APPROX_SIMPLE)
 63 |         else:
 64 |             _, contours, hierarchy = cv2.findContours(
 65 |                 c_bin_img,
 66 |                 cv2.RETR_TREE,
 67 |                 cv2.CHAIN_APPROX_SIMPLE)
 68 | 
 69 |         filtered_contours = []
 70 |         for cnt in contours:
 71 |             x, y, w, h = cv2.boundingRect(cnt)
 72 |             if w * h >= min_area and (h >= min_h \
 73 |                     or w >= min_w) and w * h <= max_area:
 74 |                 filtered_contours.append(cnt)
 75 |             else:
 76 |                 bin_img[y:y+h, x:x+w] = 0
 77 |         contours = filtered_contours
 78 |         return bin_img
 79 | 
 80 | 
 81 |     def do(self, cv2_color_img):
 82 |         # return all the possible segmentations
 83 |         cv_grey_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_BGR2GRAY)
 84 |         height, width = cv_grey_img.shape
 85 |         adaptive_threshold = cv2.adaptiveThreshold(
 86 |             cv_grey_img,
 87 |             255,
 88 |             cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
 89 |             cv2.THRESH_BINARY_INV, 11, 2)
 90 | 
 91 |         adaptive_threshold = self.remove_noise_by_contours(adaptive_threshold)
 92 |         self.bin_img = adaptive_threshold
 93 | 
 94 |         nonzero_curve = self.find_lowest_nonzero_curve(adaptive_threshold)
 95 |         nonzero_curve = savgol_filter(nonzero_curve, 15, 2)
 96 |         min_points = argrelextrema(nonzero_curve, np.greater)
 97 |         min_points = min_points[0]
 98 |         min_points = [i for i in min_points]
 99 | 
100 |         #min_points.append(width-1)
101 |         #min_points = [0,] + min_points
102 | 
103 |         min_points = self.merge_closest_points(min_points, width * 0.02)
104 | 
105 |         print("min_points=", min_points)
106 |         segmentations = []
107 |         for selected_min_points in itertools.combinations(min_points,
108 |                                                           self.num_char+1):
109 |             print("selected_min_points=", selected_min_points)
110 |             one_segmentation = []
111 |             prev_min_point = selected_min_points[0]
112 |             for i, selected_min_point in enumerate(selected_min_points):
113 |                 if i != 0:
114 |                     one_segmentation.append(
115 |                         (prev_min_point, 0,
116 |                          selected_min_point - prev_min_point, height))
117 |                     prev_min_point = selected_min_point
118 |             segmentations.append(one_segmentation)
119 | 
120 |         if self.debug_path is not None:
121 |             import matplotlib.pyplot as plt
122 |             path_cv2_color_img = os.path.join(self.debug_path,
123 |                                               "cv2_color_img.jpg")
124 |             path_cv_grey_img = os.path.join(self.debug_path,
125 |                                               "cv2_grey_img.jpg")
126 |             path_adaptive_threshold = os.path.join(self.debug_path,
127 |                                               "adaptive_threshold.jpg")
128 |             ## draw possible segmentation on image
129 |             for min_point in min_points:
130 |                 cv2.line(cv2_color_img, (min_point, 0),
131 |                          (min_point, height), (255, 0, 0))
132 | 
133 |             cv2.imwrite(path_cv2_color_img, cv2_color_img)
134 |             cv2.imwrite(path_cv_grey_img, cv_grey_img)
135 |             cv2.imwrite(path_adaptive_threshold, adaptive_threshold)
136 | 
137 |             min_point_vals = [nonzero_curve[i] for i in min_points]
138 |             #plt.plot(range(nonzero_curve.shape[0]), nonzero_curve)
139 |             #plt.plot(min_points, min_point_vals, 'ro')
140 |             #plt.gca().invert_yaxis()
141 |             #plt.show()
142 |         return segmentations
143 | 


--------------------------------------------------------------------------------
/python/deep_ocr/captcha/rm_noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import PIL.Image
 4 | import sys
 5 | 
 6 | # http://stackoverflow.com/questions/11253899/removing-the-background-noise-of-a-captcha-image-by-replicating-the-chopping-fil
 7 | 
 8 | class RMNoise(object):
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def do_path(self, image_path, out_path, chop=2):
13 |         # python chop.py [chop-factor] [in-file] [out-file]
14 | 
15 |         image = PIL.Image.open(image_path).convert('1')
16 |         width, height = image.size
17 |         data = image.load()
18 | 
19 |         # Iterate through the rows.
20 |         for y in range(height):
21 |             for x in range(width):
22 |                 # Make sure we're on a dark pixel.
23 |                 if data[x, y] > 128:
24 |                     continue
25 |                 # Keep a total of non-white contiguous pixels.
26 |                 total = 0
27 |                 # Check a sequence ranging from x to image.width.
28 |                 for c in range(x, width):
29 |                     # If the pixel is dark, add it to the total.
30 |                     if data[c, y] < 128:
31 |                         total += 1
32 |                     # If the pixel is light, stop the sequence.
33 |                     else:
34 |                         break
35 |                 # If the total is less than the chop, replace everything with white.
36 |                 if total <= chop:
37 |                     for c in range(total):
38 |                         data[x + c, y] = 255
39 |                 # Skip this sequence we just altered.
40 |                 x += total
41 | 
42 |         # Iterate through the columns.
43 |         for x in range(width):
44 |             for y in range(height):
45 |                 # Make sure we're on a dark pixel.
46 |                 if data[x, y] > 128:
47 |                     continue
48 |                 # Keep a total of non-white contiguous pixels.
49 |                 total = 0
50 |                 # Check a sequence ranging from y to image.height.
51 |                 for c in range(y, height):
52 |                     # If the pixel is dark, add it to the total.
53 |                     if data[x, c] < 128:
54 |                         total += 1
55 |                     # If the pixel is light, stop the sequence.
56 |                     else:
57 |                         break
58 |                 # If the total is less than the chop, replace everything with white.
59 |                 if total <= chop:
60 |                     for c in range(total):
61 |                         data[x, y + c] = 255
62 |                 # Skip this sequence we just altered.
63 |                 y += total
64 | 
65 |         image.save(out_path)


--------------------------------------------------------------------------------
/python/deep_ocr/captcha/search_best_segmentation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatioFillBG
 4 | from deep_ocr.cv2_img_proc import PreprocessCropZeros
 5 | 
 6 | import numpy as np
 7 | import os
 8 | import cv2
 9 | 
10 | class SearchBestSegmentation(object):
11 | 
12 |     def __init__(self, cls, cv2_grey_img, debug_path=None):
13 |         self.cls = cls
14 |         self.cv2_img = cv2_grey_img
15 |         self.debug_path = debug_path
16 | 
17 |     def _extract_sub_img(self, cv2_img, rect):
18 |         x, y, w, h = rect
19 |         return cv2_img[y: y+h-1, x: x+w-1]
20 | 
21 |     def _extract_sub_imgs(self, cv2_img, segmentation):
22 |         char_w = self.cls.width
23 |         char_h = self.cls.height
24 |         proc_resize = PreprocessResizeKeepRatioFillBG(
25 |             width=char_w, height=char_h,
26 |             auto_avoid_fill_bg=False,
27 |             fill_bg=True, margin=2)
28 |         crop_zeros = PreprocessCropZeros()
29 |         sub_imgs = []
30 |         for rect in segmentation:
31 |             sub_img = self._extract_sub_img(cv2_img, rect)
32 |             sub_img = crop_zeros.do(sub_img)
33 |             sub_imgs.append(proc_resize.do(sub_img))
34 |         return np.asarray(sub_imgs)/255.0
35 | 
36 |     def eval_segmentation(self, cv2_img, segmentation):
37 |         sub_imgs = self._extract_sub_imgs(cv2_img, segmentation)
38 |         tag_to_probas = self.cls.predict_cv2_imgs(sub_imgs)
39 |         #compute the proba
40 |         accumulate_proba = 1.0
41 |         tags = []
42 |         for tag_to_proba in tag_to_probas:
43 |             tag = tag_to_proba[0][0]
44 |             proba = tag_to_proba[0][1]
45 |             accumulate_proba *= proba
46 |             tags.append(tag)
47 | 
48 |         if self.debug_path is not None:
49 |             import uuid
50 |             sub_imgs_dir = os.path.join(self.debug_path, str(uuid.uuid1()))
51 |             os.makedirs(sub_imgs_dir)
52 |             for i, sub_img in enumerate(sub_imgs):
53 |                 image_path = os.path.join(sub_imgs_dir, "%d.jpg" % i)
54 |                 cv2.imwrite(image_path, sub_img*255.0)
55 |             stat_path = os.path.join(sub_imgs_dir, "stat.txt")
56 |             f_stat_path = open(stat_path, "w+")
57 |             f_stat_path.write("".join(tags))
58 |             f_stat_path.write("\n")
59 |             f_stat_path.write("%f" % accumulate_proba)
60 |             f_stat_path.write("\n")
61 |             f_stat_path.close()
62 |             sub_imgs_dir_pic = sub_imgs_dir + ".jpg"
63 |             cv2_img_copy = np.copy(cv2_img)
64 |             for one_segmentation in segmentation:
65 |                 left_x = one_segmentation[0]
66 |                 cv2.line(cv2_img_copy, (), )
67 |         return accumulate_proba, tags
68 | 
69 | 
70 |     def do(self, segmentations):
71 |         eval_segmentations = []
72 |         for segmentation in segmentations:
73 |             accumulate_proba, tags = \
74 |                 self.eval_segmentation(self.cv2_img, segmentation)
75 | #            print("=" * 60)
76 | #            print("accumulate_proba=", accumulate_proba)
77 | #            print("tags=", tags)
78 |             eval_segmentations.append((tags, accumulate_proba))
79 |         eval_segmentations = sorted(eval_segmentations, key=lambda x:x[1], reverse=True)
80 |         return eval_segmentations
81 | 


--------------------------------------------------------------------------------
/python/deep_ocr/cv2_img_proc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | from PIL import Image
  6 | from PIL import ImageFont
  7 | from PIL import ImageDraw
  8 | import pickle
  9 | import argparse
 10 | from argparse import RawTextHelpFormatter
 11 | import fnmatch
 12 | import os
 13 | import cv2
 14 | import json
 15 | import random
 16 | import shutil
 17 | import traceback
 18 | import copy
 19 | 
 20 | 
 21 | 
 22 | 
 23 | class PreprocessRemoveNonCharNoise(object):
 24 | 
 25 |     def __init__(self, char_width):
 26 |         self.min_w = char_width * 0.1
 27 |         self.min_h = char_width * 0.1
 28 | 
 29 |         self.min_area = char_width * char_width * 0.05
 30 | 
 31 |         self.max_area = char_width * char_width * 2.0
 32 | 
 33 |     def do(self, bin_img):
 34 |         
 35 |         tmp_bin_img = np.copy(bin_img)
 36 | 
 37 |         if cv2.__version__[0] == "2":
 38 |             contours, hierarchy = cv2.findContours(
 39 |                 tmp_bin_img,
 40 |                 cv2.RETR_TREE,
 41 |                 cv2.CHAIN_APPROX_SIMPLE)
 42 |         else:
 43 |             _, contours, hierarchy = cv2.findContours(
 44 |                 tmp_bin_img,
 45 |                 cv2.RETR_CCOMP,
 46 |                 cv2.CHAIN_APPROX_SIMPLE)
 47 | 
 48 |         filtered_contours = []
 49 |         for cnt in contours:
 50 |             x, y, w, h = cv2.boundingRect(cnt)
 51 |             if w * h > self.max_area or w * h < self.min_area:
 52 |                 bin_img[y:y+h, x:x+w] = 0
 53 |         contours = filtered_contours
 54 | 
 55 | 
 56 | class PreprocessBackgroundMask():
 57 |     
 58 |     def __init__(self, boundary):
 59 |         self.boundary = boundary
 60 | 
 61 |     def do(self, image):
 62 |         (lower, upper) = self.boundary
 63 |         lower = np.array(lower, dtype = "uint8")
 64 |         upper = np.array(upper, dtype = "uint8")
 65 |         mask = cv2.inRange(image, lower, upper)
 66 |         return mask
 67 | 
 68 | 
 69 | class PreprocessCropZeros(object):
 70 | 
 71 |     def __init__(self):
 72 |         pass
 73 | 
 74 |     def do(self, cv2_gray_img):
 75 |         height = cv2_gray_img.shape[0]
 76 |         width = cv2_gray_img.shape[1]
 77 | 
 78 |         v_sum = np.sum(cv2_gray_img, axis=0)
 79 |         h_sum = np.sum(cv2_gray_img, axis=1)
 80 |         left = 0
 81 |         right = width - 1
 82 |         top = 0
 83 |         low = height - 1
 84 | 
 85 |         for i in range(width):
 86 |             if v_sum[i] > 0:
 87 |                 left = i
 88 |                 break
 89 | 
 90 |         for i in range(width - 1, -1, -1):
 91 |             if v_sum[i] > 0:
 92 |                 right = i
 93 |                 break
 94 | 
 95 |         for i in range(height):
 96 |             if h_sum[i] > 0:
 97 |                 top = i
 98 |                 break
 99 | 
100 |         for i in range(height - 1, -1, -1):
101 |             if h_sum[i] > 0:
102 |                 low = i
103 |                 break
104 |         if not (top < low and right > left):
105 |             return cv2_gray_img
106 | 
107 |         return cv2_gray_img[top: low+1, left: right+1]
108 | 
109 | 
110 | 
111 | class FindImageBBox(object):
112 |     def __init__(self, ):
113 |         pass
114 | 
115 |     def do(self, img):
116 |         height = img.shape[0]
117 |         width = img.shape[1]
118 |         v_sum = np.sum(img, axis=0)
119 |         h_sum = np.sum(img, axis=1)
120 |         left = 0
121 |         right = width - 1
122 |         top = 0
123 |         low = height - 1
124 |         for i in range(width):
125 |             if v_sum[i] > 0:
126 |                 left = i
127 |                 break
128 |         for i in range(width - 1, -1, -1):
129 |             if v_sum[i] > 0:
130 |                 right = i
131 |                 break
132 |         for i in range(height):
133 |             if h_sum[i] > 0:
134 |                 top = i
135 |                 break
136 |         for i in range(height - 1, -1, -1):
137 |             if h_sum[i] > 0:
138 |                 low = i
139 |                 break
140 |         return (left, top, right, low)
141 | 
142 | 
143 | class PreprocessResizeKeepRatio(object):
144 | 
145 |     def __init__(self, width, height):
146 |         self.width = width
147 |         self.height = height
148 | 
149 |     def do(self, cv2_img):
150 |         max_width = self.width
151 |         max_height = self.height
152 | 
153 |         cur_height, cur_width = cv2_img.shape[:2]
154 | 
155 |         ratio_w = float(max_width)/float(cur_width)
156 |         ratio_h = float(max_height)/float(cur_height)
157 |         ratio = min(ratio_w, ratio_h)
158 | 
159 |         new_size = (min(int(cur_width*ratio), max_width),
160 |                     min(int(cur_height*ratio), max_height))
161 | 
162 |         new_size = (max(new_size[0], 1),
163 |                     max(new_size[1], 1),)
164 | 
165 |         resized_img = cv2.resize(cv2_img, new_size)
166 |         return resized_img
167 | 
168 | 
169 | class PreprocessResizeKeepRatioFillBG(object):
170 | 
171 |     def __init__(self, width, height,
172 |                  fill_bg=False,
173 |                  auto_avoid_fill_bg=True,
174 |                  margin=None):
175 |         self.width = width
176 |         self.height = height
177 |         self.fill_bg = fill_bg
178 |         self.auto_avoid_fill_bg = auto_avoid_fill_bg
179 |         self.margin = margin
180 | 
181 |     @classmethod
182 |     def is_need_fill_bg(cls, cv2_img, th=0.5, max_val=255):
183 |         image_shape = cv2_img.shape
184 |         height, width = image_shape
185 |         if height * 3 < width:
186 |             return True
187 |         if width * 3 < height:
188 |             return True
189 |         return False
190 | 
191 |     @classmethod
192 |     def put_img_into_center(cls, img_large, img_small, ):
193 |         width_large = img_large.shape[1]
194 |         height_large = img_large.shape[0]
195 | 
196 |         width_small = img_small.shape[1]
197 |         height_small = img_small.shape[0]
198 | 
199 |         if width_large < width_small:
200 |             raise ValueError("width_large <= width_small")
201 |         if height_large < height_small:
202 |             raise ValueError("height_large <= height_small")
203 | 
204 |         start_width = (width_large - width_small) / 2
205 |         start_height = (height_large - height_small) / 2
206 | 
207 |         img_large[int(start_height):int(start_height + height_small), int(start_width):int(start_width + width_small)] = img_small
208 |         return img_large
209 | 
210 |     def do(self, cv2_img):
211 | 
212 |         if self.margin is not None:
213 |             width_minus_margin = max(2, self.width - self.margin)
214 |             height_minus_margin = max(2, self.height - self.margin)
215 |         else:
216 |             width_minus_margin = self.width
217 |             height_minus_margin = self.height
218 | 
219 |         cur_height, cur_width = cv2_img.shape[:2]
220 |         if len(cv2_img.shape) > 2:
221 |             pix_dim = cv2_img.shape[2]
222 |         else:
223 |             pix_dim = None
224 | 
225 |         preprocess_resize_keep_ratio = PreprocessResizeKeepRatio(
226 |             width_minus_margin,
227 |             height_minus_margin)
228 |         resized_cv2_img = preprocess_resize_keep_ratio.do(cv2_img)
229 | 
230 |         if self.auto_avoid_fill_bg:
231 |             need_fill_bg = self.is_need_fill_bg(cv2_img)
232 |             if not need_fill_bg:
233 |                 self.fill_bg = False
234 |             else:
235 |                 self.fill_bg = True
236 | 
237 |         ## should skip horizontal stroke
238 |         if not self.fill_bg:
239 |             ret_img = cv2.resize(resized_cv2_img, (width_minus_margin,
240 |                                                    height_minus_margin))
241 |         else:
242 |             if pix_dim is not None:
243 |                 norm_img = np.zeros((height_minus_margin,
244 |                                      width_minus_margin,
245 |                                      pix_dim),
246 |                                     np.uint8)
247 |             else:
248 |                 norm_img = np.zeros((height_minus_margin,
249 |                                      width_minus_margin),
250 |                                     np.uint8)
251 |             ret_img = self.put_img_into_center(norm_img, resized_cv2_img)
252 | 
253 |         if self.margin is not None:
254 |             if pix_dim is not None:
255 |                 norm_img = np.zeros((self.height,
256 |                                      self.width,
257 |                                      pix_dim),
258 |                                     np.uint8)
259 |             else:
260 |                 norm_img = np.zeros((self.height,
261 |                                      self.width),
262 |                                     np.uint8)
263 |             ret_img = self.put_img_into_center(norm_img, ret_img)
264 |         return ret_img


--------------------------------------------------------------------------------
/python/deep_ocr/id_cards/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | 


--------------------------------------------------------------------------------
/python/deep_ocr/id_cards/char_set.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.langs.digits import data as digit_data
 4 | from deep_ocr.langs.chi_sim import data as sim_data
 5 | 
 6 | 
 7 | class CharSet(object):
 8 |     def __init__(self):
 9 |         self.data = {}
10 |         self.data["name"] = {
11 |                 "set": set(sim_data) - set(digit_data),
12 |                 "width": 0.030,
13 |                 "caffe_cls": "sim",
14 |         }
15 |         self.data["sex"] = {
16 |                 "set": set(u"男女"),
17 |                 "width": 0.030,
18 |                 "caffe_cls": "sim",
19 |         }
20 |         self.data["minzu"] = {
21 |                 "set": set(
22 |             u"汉蒙古回藏维吾尔苗彝壮布依朝鲜满侗瑶白土家哈尼哈萨克"\
23 |             u"傣黎傈僳佤畲拉祜水东乡纳西景颇柯尔克孜"\
24 |             u"土达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌"\
25 |             u"普米塔吉克怒俄罗斯鄂温克德昂保安裕固京"\
26 |             u"塔塔尔独龙鄂伦春赫哲乌孜别克门巴珞巴"\
27 |             u"基诺高山穿青人"),
28 |             "width": 0.030,
29 |             "caffe_cls": "sim",
30 |         }
31 |         self.data["year"] = {
32 |             "set": set("0123456789"),
33 |             "width": 0.015,
34 |             "caffe_cls": "ua",
35 |         }
36 |         self.data["month"] = {
37 |             "set": set("0123456789"),
38 |             "width": 0.015,
39 |             "caffe_cls": "ua",
40 |         }
41 |         self.data["day"] = {
42 |             "set": set("0123456789"),
43 |             "width": 0.015,
44 |             "caffe_cls": "ua",
45 |         }
46 |         self.data["address"] = {
47 |             "set": set(sim_data).union(digit_data),
48 |             "width": 0.030,
49 |             "caffe_cls": "sim",
50 |         }
51 |         self.data["id"] = {
52 |             "set": set(u"0123456789X"),
53 |             "width": 0.02,
54 |             "caffe_cls": "ua",
55 |         }
56 | 
57 |     def get(self):
58 |         return self.data


--------------------------------------------------------------------------------
/python/deep_ocr/id_cards/segmentation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import cv2
  4 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
  5 | from deep_ocr.cv2_img_proc import PreprocessRemoveNonCharNoise
  6 | 
  7 | import numpy as np
  8 | from deep_ocr.utils import extract_peek_ranges_from_array
  9 | from deep_ocr.utils import merge_chars_into_line_segments
 10 | import os
 11 | import shutil
 12 | import sys, traceback
 13 | 
 14 | class Segmentation(object):
 15 | 
 16 |     def __init__(self, debug_path=None):
 17 |         self.debug_path = debug_path
 18 |         self.boundaries = [
 19 |                            ([0, 0, 0], [100, 100, 100]),
 20 |                            ([0, 0, 0], [150, 150, 150]),
 21 |                            ([0, 0, 0], [200, 200, 200]),
 22 |                           ]
 23 | 
 24 |     def check_if_good_boundary(self, boundary, norm_height, norm_width, color_img):
 25 |         preprocess_bg_mask = PreprocessBackgroundMask(boundary)
 26 |         char_w = norm_width / 20
 27 |         remove_noise = PreprocessRemoveNonCharNoise(char_w)
 28 | 
 29 |         id_card_img_mask = preprocess_bg_mask.do(color_img)
 30 |         id_card_img_mask[0:int(norm_height*0.05), :] = 0
 31 |         id_card_img_mask[int(norm_height*0.95):, :] = 0
 32 |         id_card_img_mask[:, 0:int(norm_width*0.05)] = 0
 33 |         id_card_img_mask[:, int(norm_width*0.95):] = 0
 34 | 
 35 |         remove_noise.do(id_card_img_mask)
 36 | 
 37 | #        se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
 38 | #        se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
 39 | #        mask = cv2.morphologyEx(id_card_img_mask, cv2.MORPH_CLOSE, se1)
 40 | #        id_card_img_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, se2)
 41 | 
 42 |         ## remove right head profile
 43 |         left_half_id_card_img_mask = np.copy(id_card_img_mask)
 44 |         left_half_id_card_img_mask[:, int(norm_width/2):] = 0
 45 | 
 46 |         ## Try to find text lines and chars
 47 |         horizontal_sum = np.sum(left_half_id_card_img_mask, axis=1)
 48 |         line_ranges = extract_peek_ranges_from_array(horizontal_sum)
 49 | 
 50 |         return len(line_ranges) >= 5 and len(line_ranges) <= 7
 51 | 
 52 | 
 53 |     def do(self, color_img):
 54 | 
 55 |         shape = color_img.shape
 56 | 
 57 |         norm_height = shape[0]
 58 |         norm_width = shape[1]
 59 | 
 60 |         gray_id_card_img = cv2.cvtColor(color_img, cv2.COLOR_BGR2GRAY)
 61 |         #
 62 |         clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
 63 |         gray_id_card_img = clahe.apply(gray_id_card_img)
 64 | 
 65 |         gray_id_card_img = 255 - gray_id_card_img
 66 | 
 67 |         best_boundary = None
 68 |         for boundary in self.boundaries:
 69 |             if self.check_if_good_boundary(
 70 |                     boundary,
 71 |                     norm_height, norm_width,
 72 |                     color_img):
 73 |                 best_boundary = boundary
 74 |                 break
 75 |         if best_boundary is None:
 76 |             return {}
 77 | 
 78 |         boundary = best_boundary
 79 |         ## boundary = ([0, 0, 0], [100, 100, 100])
 80 |         preprocess_bg_mask = PreprocessBackgroundMask(boundary)
 81 |         id_card_img_mask = preprocess_bg_mask.do(color_img)
 82 |         id_card_img_mask[0:int(norm_height*0.05), :] = 0
 83 |         id_card_img_mask[int(norm_height*0.95):, :] = 0
 84 |         id_card_img_mask[:, 0:int(norm_width*0.05)] = 0
 85 |         id_card_img_mask[:, int(norm_width*0.95):] = 0
 86 | 
 87 | #        se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
 88 | #        se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
 89 | #        mask = cv2.morphologyEx(id_card_img_mask, cv2.MORPH_CLOSE, se1)
 90 | #        id_card_img_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, se2)
 91 | 
 92 |         ## remove right head profile
 93 |         left_half_id_card_img_mask = np.copy(id_card_img_mask)
 94 |         left_half_id_card_img_mask[:, int(norm_width/2):] = 0
 95 | 
 96 |         ## Try to find text lines and chars
 97 |         vertical_peek_ranges2d = []
 98 |         horizontal_sum = np.sum(left_half_id_card_img_mask, axis=1)
 99 |         line_ranges = extract_peek_ranges_from_array(horizontal_sum)
100 | 
101 |         ## char extraction
102 |         for line_range in line_ranges:
103 |             start_y, end_y = line_range
104 |             end_y += 1
105 |             line_img = id_card_img_mask[start_y: end_y]
106 |             vertical_sum = np.sum(line_img, axis=0)
107 |             vertical_peek_ranges = extract_peek_ranges_from_array(vertical_sum, minimun_val=40, minimun_range=1)
108 |             vertical_peek_ranges2d.append(vertical_peek_ranges)
109 | 
110 |         vertical_peek_ranges2d = merge_chars_into_line_segments(vertical_peek_ranges2d)
111 |         img_gray_texts = cv2.bitwise_and(gray_id_card_img, gray_id_card_img, mask=id_card_img_mask)
112 | 
113 |         key_to_segmentation = {}
114 |         try:
115 |             ## name extraction
116 |             range_y = line_ranges[0]
117 |             range_x = vertical_peek_ranges2d[0][0]
118 |             start_x, end_x = range_x
119 |             start_y, end_y = range_y
120 |             w = end_x - start_x
121 |             h = end_y - start_y
122 |             key_to_segmentation["name"] = [(start_x, start_y, w, h), ]
123 |             ## sex extraction
124 |             range_y = line_ranges[1]
125 |             range_x = vertical_peek_ranges2d[1][0]
126 |             start_x, end_x = range_x
127 |             start_y, end_y = range_y
128 |             w = end_x - start_x
129 |             h = end_y - start_y
130 |             key_to_segmentation["sex"] = [(start_x, start_y, w, h), ]
131 |             ## minzu extraction
132 |             range_y = line_ranges[1]
133 |             range_x = vertical_peek_ranges2d[1][1]
134 |             start_x, end_x = range_x
135 |             start_y, end_y = range_y
136 |             w = end_x - start_x
137 |             h = end_y - start_y
138 |             key_to_segmentation["minzu"] = [(start_x, start_y, w, h), ]
139 |             ## year extraction
140 |             range_y = line_ranges[2]
141 |             range_x = vertical_peek_ranges2d[2][0]
142 |             start_x, end_x = range_x
143 |             start_y, end_y = range_y
144 |             w = end_x - start_x
145 |             h = end_y - start_y
146 |             key_to_segmentation["year"] = [(start_x, start_y, w, h), ]
147 |             ## month extraction
148 |             range_y = line_ranges[2]
149 |             range_x = vertical_peek_ranges2d[2][1]
150 |             start_x, end_x = range_x
151 |             start_y, end_y = range_y
152 |             w = end_x - start_x
153 |             h = end_y - start_y
154 |             key_to_segmentation["month"] = [(start_x, start_y, w, h), ]
155 |             ## day extraction
156 |             range_y = line_ranges[2]
157 |             range_x = vertical_peek_ranges2d[2][2]
158 |             start_x, end_x = range_x
159 |             start_y, end_y = range_y
160 |             w = end_x - start_x
161 |             h = end_y - start_y
162 |             key_to_segmentation["day"] = [(start_x, start_y, w, h), ]
163 |             ## address extraction
164 |             key_to_segmentation["address"] = []
165 |             first_line = line_ranges[3][0]
166 |             first_line_range_x = vertical_peek_ranges2d[3][0]
167 |             first_line_start_x = first_line_range_x[0]
168 |             first_line_w = first_line_range_x[1] - first_line_start_x
169 |             for i, line_range in enumerate(line_ranges):
170 |                 if i >= 3:
171 |                     range_y = line_range
172 |                     range_x = vertical_peek_ranges2d[i][0]
173 |                     start_x, end_x = range_x
174 |                     start_y, end_y = range_y
175 |                     if abs(first_line_start_x - start_x)> int(first_line_w * 0.05):
176 |                         break
177 |                     w = end_x - start_x
178 |                     h = end_y - start_y
179 |                     key_to_segmentation["address"].append((start_x, start_y, w, h))
180 | 
181 |             ## id extraction
182 |             range_y = line_ranges[-1]
183 |             range_x = vertical_peek_ranges2d[-1][0]
184 |             start_x, end_x = range_x
185 |             start_y, end_y = range_y
186 |             w = end_x - start_x
187 |             h = end_y - start_y
188 |             key_to_segmentation["id"] = [(start_x, start_y, w, h), ]
189 |         except:
190 |             print("Exception in user code:")
191 |             print('-' * 60)
192 |             traceback.print_exc(file=sys.stdout)
193 |             print('-' * 60)
194 |             key_to_segmentation = {}
195 | 
196 |         debug_path = self.debug_path
197 |         if debug_path is not None:
198 |             import random
199 | 
200 |             if os.path.isdir(debug_path):
201 |                 shutil.rmtree(debug_path)
202 |             os.makedirs(debug_path)
203 | 
204 |             debug_image_path = os.path.join(debug_path, "01_origin_img.jpg")
205 |             debug_gray_image_path = os.path.join(debug_path, "01_gray_img.jpg")
206 |             debug_image_mask_path = os.path.join(debug_path, "02_mask.jpg")
207 |             debug_image_mask_text_lines_path = os.path.join(debug_path, "03_mask_text_lines.jpg")
208 |             debug_image_left_mask_path = os.path.join(debug_path, "04_left_mask.jpg")
209 |             debug_image_gray_texts_path = os.path.join(debug_path, "05_gray_texts.jpg")
210 |             debug_image_chars_path = os.path.join(debug_path, "06_origin_img_chars.jpg")
211 |             debug_image_key_to_segments_path = os.path.join(debug_path, "07_origin_img_key_to_segments.jpg")
212 | 
213 |             cv2.imwrite(debug_image_path, color_img)
214 |             cv2.imwrite(debug_gray_image_path, 255 - gray_id_card_img)
215 |             id_card_img_chars = np.copy(color_img)
216 |             cv2.imwrite(debug_image_mask_path, id_card_img_mask)
217 |             id_card_img_mask_text_lines = np.copy(id_card_img_mask)
218 |     
219 |             for i, line_range in enumerate(line_ranges):
220 |                 start_y, end_y = line_range
221 |                 id_card_img_mask_text_lines[start_y, :] = 255
222 |                 id_card_img_mask_text_lines[end_y, :] = 255
223 |     
224 |             color = (255, 0, 0)
225 |             for i, line_range in enumerate(line_ranges):
226 |                 start_y, end_y = line_range
227 |                 for vertical_peek_range in vertical_peek_ranges2d[i]:
228 |                     start_x, end_x = vertical_peek_range
229 |                     cv2.rectangle(id_card_img_chars,
230 |                                   (start_x, start_y),
231 |                                   (end_x+1, end_y+1),
232 |                                   color)
233 | 
234 |             key_to_segments_img = np.copy(color_img)
235 |             for key in key_to_segmentation:
236 |                 color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
237 |                 rects = key_to_segmentation[key]
238 |                 for rect in rects:
239 |                     pt1 = (rect[0], rect[1])
240 |                     pt2 = (rect[0] + rect[2], rect[1] + rect[3])
241 |                     cv2.rectangle(key_to_segments_img, pt1, pt2, color)
242 |             
243 |             cv2.imwrite(debug_image_mask_text_lines_path, id_card_img_mask_text_lines)
244 |             
245 |             cv2.imwrite(debug_image_left_mask_path, left_half_id_card_img_mask)
246 |             cv2.imwrite(debug_image_gray_texts_path, img_gray_texts)
247 |             cv2.imwrite(debug_image_chars_path, id_card_img_chars)
248 |             cv2.imwrite(debug_image_key_to_segments_path, key_to_segments_img)
249 |         return key_to_segmentation
250 |             
251 | 


--------------------------------------------------------------------------------
/python/deep_ocr/lang_aux.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | from PIL import Image
  5 | from PIL import ImageFont
  6 | from PIL import ImageDraw
  7 | import traceback
  8 | import sys
  9 | import os
 10 | import numpy as np
 11 | import cv2
 12 | import copy
 13 | import random
 14 | 
 15 | from deep_ocr.utils import trim_string
 16 | from deep_ocr.cv2_img_proc import FindImageBBox
 17 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatioFillBG
 18 | 
 19 | 
 20 | class DataAugmentation(object):
 21 |     def __init__(self, noise=True, dilate=True, erode=True):
 22 |         self.noise = noise
 23 |         self.dilate = dilate
 24 |         self.erode = erode
 25 | 
 26 |     @classmethod
 27 |     def add_noise(cls, img):
 28 |         # add some noise
 29 |         for i in range(20):
 30 |             temp_x = np.random.randint(0, img.shape[0])
 31 |             temp_y = np.random.randint(0, img.shape[1])
 32 |             img[temp_x][temp_y] = 255
 33 |         return img
 34 | 
 35 |     @classmethod
 36 |     def add_erode(cls, img):
 37 |         kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
 38 |         img = cv2.erode(img,kernel)
 39 |         return img
 40 | 
 41 |     @classmethod
 42 |     def add_dilate(cls, img):
 43 |         kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
 44 |         img = cv2.dilate(img, kernel)
 45 |         return img
 46 | 
 47 |     def do(self, img):
 48 |         if self.noise and random.random()<0.5:
 49 |             img = self.add_noise(img)
 50 |         if self.dilate and random.random()<0.5:
 51 |             img = self.add_dilate(img)
 52 |         elif self.erode:
 53 |             img = self.add_erode(img)
 54 |         return img
 55 | 
 56 | 
 57 | class LangCharsGenerate(object):
 58 |     def __init__(self, langs):
 59 |         self.langs = langs
 60 | 
 61 |     def do(self, ):
 62 |         lang_list = self.langs.split("+")
 63 |         lang_chars = ""
 64 |         for lang in lang_list:
 65 |             lang_module = "deep_ocr.langs.%s" % lang
 66 |             lang_module_data = __import__(lang_module, fromlist=[''])
 67 |             lang_chars += lang_module_data.data
 68 |         trim_string(lang_chars)
 69 |         return lang_chars
 70 | 
 71 | 
 72 | class FontCheck(object):
 73 | 
 74 |     def __init__(self, lang_chars, width=32, height=32):
 75 |         self.lang_chars = lang_chars
 76 |         self.width = width
 77 |         self.height = height
 78 | 
 79 |     def do(self, font_path):
 80 |         width = self.width
 81 |         height = self.height
 82 |         try:
 83 |             for i, char in enumerate(self.lang_chars):
 84 |                 img = Image.new("RGB", (width, height), "black")
 85 |                 draw = ImageDraw.Draw(img)
 86 |                 font = ImageFont.truetype(font_path, int(width * 0.9),)
 87 |                 draw.text((0, 0), char, (255, 255, 255), font=font)
 88 |                 data = list(img.getdata())
 89 |                 sum_val = 0
 90 |                 for i_data in data:
 91 |                     sum_val += sum(i_data)
 92 |                 if sum_val < 2:
 93 |                     return False
 94 |         except:
 95 |             print("fail to load:%s" % font_path)
 96 |             traceback.print_exc(file=sys.stdout)
 97 |             return False
 98 |         return True
 99 | 
100 | 
101 | class Font2Image(object):
102 | 
103 |     def __init__(self,
104 |                  width, height,
105 |                  need_crop, margin):
106 |         self.width = width
107 |         self.height = height
108 |         self.need_crop = need_crop
109 |         self.margin = margin
110 | 
111 |     def do(self, font_path, char, path_img="", rotate=0, need_aug=True):
112 |         find_image_bbox = FindImageBBox()
113 |         img = Image.new("RGB", (self.width, self.height), "black")
114 |         draw = ImageDraw.Draw(img)
115 |         font = ImageFont.truetype(font_path, int(self.width * 0.7),)
116 |         draw.text((0, 0), char, (255, 255, 255), font=font)
117 | 
118 |         ## rotate
119 |         if rotate != 0:
120 |             img = img.rotate(rotate)
121 | 
122 |         data = list(img.getdata())
123 |         sum_val = 0
124 |         for i_data in data:
125 |             sum_val += sum(i_data)
126 |         if sum_val > 2:
127 |             np_img = np.asarray(data, dtype='uint8')
128 |             np_img = np_img[:, 0]
129 |             np_img = np_img.reshape((self.height, self.width))
130 |             cropped_box = find_image_bbox.do(np_img)
131 |             left, upper, right, lower = cropped_box
132 |             np_img = np_img[upper: lower + 1, left: right + 1]
133 |             if not self.need_crop:
134 |                 preprocess_resize_keep_ratio_fill_bg = \
135 |                     PreprocessResizeKeepRatioFillBG(self.width, self.height,
136 |                                                     fill_bg=False,
137 |                                                     margin=self.margin)
138 |                 np_img = preprocess_resize_keep_ratio_fill_bg.do(np_img)
139 | 
140 |             ## noise
141 |             if need_aug:
142 |                 data_aug = DataAugmentation()
143 |                 np_img = data_aug.do(np_img)
144 | 
145 |             cv2.imwrite(path_img, np_img)
146 | 
147 |         else:
148 |             print("%s doesn't exist." % path_img)
149 | 
150 | 
151 | if __name__ == "__main__":
152 |     lang_chars_gen = LangCharsGenerate("digits+eng")
153 |     lang_chars = lang_chars_gen.do()
154 |     font_check = FontCheck(lang_chars)
155 | 
156 |     font_dir = "/root/workspace/deep_ocr_fonts/chinese_fonts/"
157 |     for font_name in os.listdir(font_dir):
158 |         font_path = os.path.join(font_dir, font_name)
159 |         print("font_path:", font_path)
160 |         lang_chars_gen = LangCharsGenerate("chi_sim")
161 |         lang_chars = lang_chars_gen.do()
162 |         print("char len=", len(lang_chars))
163 |         #print(lang_chars.encode("utf-8"))
164 |         font_check = FontCheck(lang_chars)
165 |         print("can cover all the chars?:", font_check.do(font_path))
166 |     
167 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yghstill/deepLearning_OCR/8467aea227244fa0721fc755d5c42cf4c3b462a3/python/deep_ocr/langs/__init__.py


--------------------------------------------------------------------------------
/python/deep_ocr/langs/chi_sim.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | 
  5 | from deep_ocr.utils import trim_string
  6 | 
  7 | # I take characters from http://hanyu.iciba.com/zt/3500.html
  8 | data = u'''
  9 | 一乙二十丁厂七卜八人入儿匕几九刁了刀力乃又三干于亏工土士才下寸大丈与万上小口山巾千乞川亿个夕久么勺
 10 | 凡丸及广亡门丫义之尸己已巳弓子卫也女刃飞习叉马乡丰王开井天夫元无云专丐扎艺木五支厅不犬太区历歹友尤
 11 | 匹车巨牙屯戈比互切瓦止少曰日中贝冈内水见午牛手气毛壬升夭长仁什片仆化仇币仍仅斤爪反介父从仑今凶分乏
 12 | 公仓月氏勿欠风丹匀乌勾凤六文亢方火为斗忆计订户认冗讥心尺引丑巴孔队办以允予邓劝双书幻玉刊未末示击打
 13 | 巧正扑卉扒功扔去甘世艾古节本术可丙左厉石右布夯戊龙平灭轧东卡北占凸卢业旧帅归旦目且叶甲申叮电号田由
 14 | 只叭史央兄叽叼叫叩叨另叹冉皿凹囚四生矢失乍禾丘付仗代仙们仪白仔他斥瓜乎丛令用甩印尔乐句匆册卯犯外处
 15 | 冬鸟务包饥主市立冯玄闪兰半汁汇头汉宁穴它讨写让礼训议必讯记永司尼民弗弘出辽奶奴召加皮边孕发圣对台矛
 16 | 纠母幼丝邦式迂刑戎动扛寺吉扣考托老巩圾执扩扫地场扬耳芋共芒亚芝朽朴机权过臣吏再协西压厌戌在百有存而
 17 | 页匠夸夺灰达列死成夹夷轨邪尧划迈毕至此贞师尘尖劣光当早吁吐吓虫曲团吕同吊吃因吸吗吆屿屹岁帆回岂则刚
 18 | 网肉年朱先丢廷舌竹迁乔迄伟传乒乓休伍伏优臼伐延仲件任伤价伦份华仰仿伙伪自伊血向似后行舟全会杀合兆企
 19 | 众爷伞创肌肋朵杂危旬旨旭负匈名各多争色壮冲妆冰庄庆亦刘齐交衣次产决亥充妄闭问闯羊并关米灯州汗污江汛
 20 | 池汝汤忙兴宇守宅字安讲讳军讶许讹论讼农讽设访诀寻那迅尽导异弛孙阵阳收阶阴防奸如妇妃好她妈戏羽观欢买
 21 | 红驮纤驯约级纪驰纫巡寿弄麦玖玛形进戒吞远违韧运扶抚坛技坏抠扰扼拒找批址扯走抄贡汞坝攻赤折抓扳抡扮抢
 22 | 孝坎均抑抛投坟坑抗坊抖护壳志块扭声把报拟却抒劫芙芜苇芽花芹芥芬苍芳严芦芯劳克芭苏杆杠杜材村杖杏杉巫
 23 | 极李杨求甫匣更束吾豆两酉丽医辰励否还尬歼来连轩步卤坚肖旱盯呈时吴助县里呆吱吠呕园旷围呀吨足邮男困吵
 24 | 串员呐听吟吩呛吻吹呜吭吧邑吼囤别吮岖岗帐财针钉牡告我乱利秃秀私每兵估体何佐佑但伸佃作伯伶佣低你住位
 25 | 伴身皂伺佛囱近彻役返余希坐谷妥含邻岔肝肛肚肘肠龟甸免狂犹狈角删条彤卵灸岛刨迎饭饮系言冻状亩况床库庇
 26 | 疗吝应这冷庐序辛弃冶忘闰闲间闷判兑灶灿灼弟汪沐沛汰沥沙汽沃沦汹泛沧没沟沪沈沉沁怀忧忱快完宋宏牢究穷
 27 | 灾良证启评补初社祀识诈诉罕诊词译君灵即层屁尿尾迟局改张忌际陆阿陈阻附坠妓妙妖姊妨妒努忍劲矣鸡纬驱纯
 28 | 纱纲纳驳纵纷纸纹纺驴纽奉玩环武青责现玫表规抹卦坷坯拓拢拔坪拣坦担坤押抽拐拖者拍顶拆拎拥抵拘势抱拄垃
 29 | 拉拦幸拌拧拂拙招坡披拨择抬拇拗其取茉苦昔苛若茂苹苗英苟苑苞范直茁茄茎苔茅枉林枝杯枢柜枚析板松枪枫构
 30 | 杭杰述枕丧或画卧事刺枣雨卖郁矾矿码厕奈奔奇奋态欧殴垄妻轰顷转斩轮软到非叔歧肯齿些卓虎虏肾贤尚旺具味
 31 | 果昆国哎咕昌呵畅明易咙昂迪典固忠呻咒咋咐呼鸣咏呢咄咖岸岩帖罗帜帕岭凯败账贩贬购贮图钓制知迭氛垂牧物
 32 | 乖刮秆和季委秉佳侍岳供使例侠侥版侄侦侣侧凭侨佩货侈依卑的迫质欣征往爬彼径所舍金刹命肴斧爸采觅受乳贪
 33 | 念贫忿肤肺肢肿胀朋股肮肪肥服胁周昏鱼兔狐忽狗狞备饰饱饲变京享庞店夜庙府底疟疙疚剂卒郊庚废净盲放刻育
 34 | 氓闸闹郑券卷单炬炒炊炕炎炉沫浅法泄沽河沾泪沮油泊沿泡注泣泞泻泌泳泥沸沼波泼泽治怔怯怖性怕怜怪怡学宝
 35 | 宗定宠宜审宙官空帘宛实试郎诗肩房诚衬衫视祈话诞诡询该详建肃录隶帚屉居届刷屈弧弥弦承孟陋陌孤陕降函限
 36 | 妹姑姐姓妮始姆迢驾叁参艰线练组绅细驶织驹终驻绊驼绍绎经贯契贰奏春帮玷珍玲珊玻毒型拭挂封持拷拱项垮挎
 37 | 城挟挠政赴赵挡拽哉挺括垢拴拾挑垛指垫挣挤拼挖按挥挪拯某甚荆茸革茬荐巷带草茧茵茶荒茫荡荣荤荧故胡荫荔
 38 | 南药标栈柑枯柄栋相查柏栅柳柱柿栏柠树勃要柬咸威歪研砖厘厚砌砂泵砚砍面耐耍牵鸥残殃轴轻鸦皆韭背战点虐
 39 | 临览竖省削尝昧盹是盼眨哇哄哑显冒映星昨咧昭畏趴胃贵界虹虾蚁思蚂虽品咽骂勋哗咱响哈哆咬咳咪哪哟炭峡罚
 40 | 贱贴贻骨幽钙钝钞钟钢钠钥钦钧钩钮卸缸拜看矩毡氢怎牲选适秒香种秋科重复竿段便俩贷顺修俏保促俄俐侮俭俗
 41 | 俘信皇泉鬼侵禹侯追俊盾待徊衍律很须叙剑逃食盆胚胧胆胜胞胖脉胎勉狭狮独狰狡狱狠贸怨急饵饶蚀饺饼峦弯将
 42 | 奖哀亭亮度迹庭疮疯疫疤咨姿亲音帝施闺闻闽阀阁差养美姜叛送类迷籽娄前首逆兹总炼炸烁炮炫烂剃洼洁洪洒柒
 43 | 浇浊洞测洗活派洽染洛浏济洋洲浑浓津恃恒恢恍恬恤恰恼恨举觉宣宦室宫宪突穿窃客诫冠诬语扁袄祖神祝祠误诱
 44 | 诲说诵垦退既屋昼屏屎费陡逊眉孩陨除险院娃姥姨姻娇姚娜怒架贺盈勇怠癸蚤柔垒绑绒结绕骄绘给绚骆络绝绞骇
 45 | 统耕耘耗耙艳泰秦珠班素匿蚕顽盏匪捞栽捕埂捂振载赶起盐捎捍捏埋捉捆捐损袁捌都哲逝捡挫换挽挚热恐捣壶捅
 46 | 埃挨耻耿耽聂恭莽莱莲莫莉荷获晋恶莹莺真框梆桂桔栖档桐株桥桦栓桃格桩校核样根索哥速逗栗贾酌配翅辱唇夏
 47 | 砸砰砾础破原套逐烈殊殉顾轿较顿毙致柴桌虑监紧党逞晒眠晓哮唠鸭晃哺晌剔晕蚌畔蚣蚊蚪蚓哨哩圃哭哦恩鸯唤
 48 | 唁哼唧啊唉唆罢峭峨峰圆峻贼贿赂赃钱钳钻钾铁铃铅缺氧氨特牺造乘敌秤租积秧秩称秘透笔笑笋债借值倚俺倾倒
 49 | 倘俱倡候赁俯倍倦健臭射躬息倔徒徐殷舰舱般航途拿耸爹舀爱豺豹颁颂翁胰脆脂胸胳脏脐胶脑脓逛狸狼卿逢鸵留
 50 | 鸳皱饿馁凌凄恋桨浆衰衷高郭席准座症病疾斋疹疼疲脊效离紊唐瓷资凉站剖竞部旁旅畜阅羞羔瓶拳粉料益兼烤烘
 51 | 烦烧烛烟烙递涛浙涝浦酒涉消涡浩海涂浴浮涣涤流润涧涕浪浸涨烫涩涌悖悟悄悍悔悯悦害宽家宵宴宾窍窄容宰案
 52 | 请朗诸诺读扇诽袜袖袍被祥课冥谁调冤谅谆谈谊剥恳展剧屑弱陵祟陶陷陪娱娟恕娥娘通能难预桑绢绣验继骏球琐
 53 | 理琉琅捧堵措描域捺掩捷排焉掉捶赦堆推埠掀授捻教掏掐掠掂培接掷控探据掘掺职基聆勘聊娶著菱勒黄菲萌萝菌
 54 | 萎菜萄菊菩萍菠萤营乾萧萨菇械彬梦婪梗梧梢梅检梳梯桶梭救曹副票酝酗厢戚硅硕奢盔爽聋袭盛匾雪辅辆颅虚彪
 55 | 雀堂常眶匙晨睁眯眼悬野啪啦曼晦晚啄啡距趾啃跃略蚯蛀蛇唬累鄂唱患啰唾唯啤啥啸崖崎崭逻崔帷崩崇崛婴圈铐
 56 | 铛铝铜铭铲银矫甜秸梨犁秽移笨笼笛笙符第敏做袋悠偿偶偎偷您售停偏躯兜假衅徘徙得衔盘舶船舵斜盒鸽敛悉欲
 57 | 彩领脚脖脯豚脸脱象够逸猜猪猎猫凰猖猛祭馅馆凑减毫烹庶麻庵痊痒痕廊康庸鹿盗章竟商族旋望率阎阐着羚盖眷
 58 | 粘粗粒断剪兽焊焕清添鸿淋涯淹渠渐淑淌混淮淆渊淫渔淘淳液淤淡淀深涮涵婆梁渗情惜惭悼惧惕惟惊惦悴惋惨惯
 59 | 寇寅寄寂宿窒窑密谋谍谎谐袱祷祸谓谚谜逮敢尉屠弹隋堕随蛋隅隆隐婚婶婉颇颈绩绪续骑绰绳维绵绷绸综绽绿缀
 60 | 巢琴琳琢琼斑替揍款堪塔搭堰揩越趁趋超揽堤提博揭喜彭揣插揪搜煮援搀裁搁搓搂搅壹握搔揉斯期欺联葫散惹葬
 61 | 募葛董葡敬葱蒋蒂落韩朝辜葵棒棱棋椰植森焚椅椒棵棍椎棉棚棕棺榔椭惠惑逼粟棘酣酥厨厦硬硝确硫雁殖裂雄颊
 62 | 雳暂雅翘辈悲紫凿辉敞棠赏掌晴睐暑最晰量鼎喷喳晶喇遇喊遏晾景畴践跋跌跑跛遗蛙蛛蜓蜒蛤喝鹃喂喘喉喻啼喧
 63 | 嵌幅帽赋赌赎赐赔黑铸铺链销锁锄锅锈锋锌锐甥掰短智氮毯氯鹅剩稍程稀税筐等筑策筛筒筏答筋筝傲傅牌堡集焦
 64 | 傍储皓皖粤奥街惩御循艇舒逾番释禽腊脾腋腔腕鲁猩猬猾猴惫然馈馋装蛮就敦斌痘痢痪痛童竣阔善翔羡普粪尊奠
 65 | 道遂曾焰港滞湖湘渣渤渺湿温渴溃溅滑湃渝湾渡游滋渲溉愤慌惰愕愣惶愧愉慨割寒富寓窜窝窖窗窘遍雇裕裤裙禅
 66 | 禄谢谣谤谦犀属屡强粥疏隔隙隘媒絮嫂媚婿登缅缆缉缎缓缔缕骗编骚缘瑟鹉瑞瑰瑙魂肆摄摸填搏塌鼓摆携搬摇搞
 67 | 塘摊聘斟蒜勤靴靶鹊蓝墓幕蓬蓄蒲蓉蒙蒸献椿禁楚楷榄想槐榆楼概赖酪酬感碍碘碑碎碰碗碌尴雷零雾雹辐辑输督
 68 | 频龄鉴睛睹睦瞄睫睡睬嗜鄙嗦愚暖盟歇暗暇照畸跨跷跳跺跪路跤跟遣蜈蜗蛾蜂蜕嗅嗡嗓署置罪罩蜀幌错锚锡锣锤
 69 | 锥锦键锯锰矮辞稚稠颓愁筹签简筷毁舅鼠催傻像躲魁衙微愈遥腻腰腥腮腹腺鹏腾腿鲍猿颖触解煞雏馍馏酱禀痹廓
 70 | 痴痰廉靖新韵意誊粮数煎塑慈煤煌满漠滇源滤滥滔溪溜漓滚溢溯滨溶溺粱滩慎誉塞寞窥窟寝谨褂裸福谬群殿辟障
 71 | 媳嫉嫌嫁叠缚缝缠缤剿静碧璃赘熬墙墟嘉摧赫截誓境摘摔撇聚慕暮摹蔓蔑蔡蔗蔽蔼熙蔚兢模槛榴榜榨榕歌遭酵酷
 72 | 酿酸碟碱碳磁愿需辖辗雌裳颗瞅墅嗽踊蜻蜡蝇蜘蝉嘛嘀赚锹锻镀舞舔稳熏箕算箩管箫舆僚僧鼻魄魅貌膜膊膀鲜疑
 73 | 孵馒裹敲豪膏遮腐瘩瘟瘦辣彰竭端旗精粹歉弊熄熔煽潇漆漱漂漫滴漾演漏慢慷寨赛寡察蜜寥谭肇褐褪谱隧嫩翠熊
 74 | 凳骡缩慧撵撕撒撩趣趟撑撮撬播擒墩撞撤增撰聪鞋鞍蕉蕊蔬蕴横槽樱橡樟橄敷豌飘醋醇醉磕磊磅碾震霄霉瞒题暴
 75 | 瞎嘻嘶嘲嘹影踢踏踩踪蝶蝴蝠蝎蝌蝗蝙嘿嘱幢墨镇镐镑靠稽稻黎稿稼箱篓箭篇僵躺僻德艘膝膛鲤鲫熟摩褒瘪瘤瘫
 76 | 凛颜毅糊遵憋潜澎潮潭鲨澳潘澈澜澄懂憔懊憎额翩褥谴鹤憨慰劈履豫缭撼擂操擅燕蕾薯薛薇擎薪薄颠翰噩橱橙橘
 77 | 整融瓢醒霍霎辙冀餐嘴踱蹄蹂蟆螃器噪鹦赠默黔镜赞穆篮篡篷篱儒邀衡膨雕鲸磨瘾瘸凝辨辩糙糖糕燃濒澡激懒憾
 78 | 懈窿壁避缰缴戴擦藉鞠藏藐檬檐檀礁磷霜霞瞭瞧瞬瞳瞩瞪曙蹋蹈螺蟋蟀嚎赡穗魏簧簇繁徽爵朦臊鳄癌辫赢糟糠燥
 79 | 懦豁臀臂翼骤藕鞭藤覆瞻蹦嚣镰翻鳍鹰瀑襟璧戳孽警蘑藻攀曝蹲蹭蹬巅簸簿蟹颤靡癣瓣羹鳖爆疆鬓壤馨耀躁蠕嚼
 80 | 嚷巍籍鳞魔糯灌譬蠢霸露霹躏黯髓赣囊镶瓤罐矗
 81 | 
 82 | 
 83 | 
 84 | 乂乜兀弋孑孓幺亓韦廿卅仄厄仃仉仂兮刈爻卞闩讣尹夬爿毋邗邛艽艿札叵匝丕匜劢卟叱叻仨仕仟仡仫仞卮氐犰
 85 | 刍邝邙汀讦讧讪讫尻阡尕弁驭匡耒玎玑邢圩圬圭扦圪圳圹扪圮圯芊芍芄芨芑芎芗亘厍夼戍尥乩旯曳岌屺凼囡钇缶
 86 | 氘氖牝伎伛伢佤仵伥伧伉伫囟汆刖夙旮刎犷犸舛凫邬饧汕汔汐汲汜汊忖忏讴讵祁讷聿艮阱阮阪丞妁牟纡纣纥纨
 87 | 玕玙抟抔圻坂坍坞抃抉芫邯芸芾苈苣芷芮苋芼苌苁芩芪芡芟苄苎苡杌杓杞杈忑孛邴邳矶奁豕忒欤轫迓邶忐卣邺
 88 | 旰呋呒呓呔呖呃吡町虬呗吽吣吲帏岐岈岘岑岚兕囵囫钊钋钌迕氙氚牤佞邱攸佚佝佟佗伽彷佘佥孚豸坌肟邸奂劬
 89 | 狄狁鸠邹饨饩饪饫饬亨庑庋疔疖肓闱闳闵羌炀沣沅沔沤沌沏沚汩汨沂汾沨汴汶沆沩泐怃怄忡忤忾怅忻忪怆忭忸诂
 90 | 诃诅诋诌诏诒孜陇陀陂陉妍妩妪妣妊妗妫妞姒妤邵劭刭甬邰纭纰纴纶纾玮玡玭玠玢玥玦盂忝匦坩抨拤坫拈垆抻劼
 91 | 拃拊坼坻㧟坨坭抿坳耶苷苯苤茏苫苜苴苒苘茌苻苓茚茆茑茓茔茕茀苕枥枇杪杳枧杵枨枞枋杻杷杼矸砀刳奄瓯殁郏
 92 | 轭郅鸢盱昊昙杲昃咂呸昕昀旻昉炅咔畀虮咀呷黾呱呤咚咆咛呶呣呦咝岢岿岬岫帙岣峁刿迥岷剀帔峄沓囹罔钍钎钏
 93 | 钒钕钗邾迮牦竺迤佶佬佰侑侉臾岱侗侃侏侩佻佾侪佼佯侬帛阜侔徂刽郄怂籴瓮戗肼䏝肽肱肫剁迩郇狙狎狍狒咎炙
 94 | 枭饯饴冽冼庖疠疝疡兖妾劾炜炖炘炝炔泔沭泷泸泱泅泗泠泺泖泫泮沱泯泓泾怙怵怦怛怏怍㤘怩怫怿宕穹宓诓诔诖
 95 | 诘戾诙戽郓衩祆祎祉祇诛诜诟诠诣诤诧诨诩戕孢亟陔妲妯姗帑弩孥驽虱迦迨绀绁绂驷驸绉绌驿骀甾珏珐珂珑玳珀
 96 | 顸珉珈拮垭挝垣挞垤赳贲垱垌郝垧垓挦垠茜荚荑贳荜莒茼茴茱莛荞茯荏荇荃荟荀茗荠茭茨垩荥荦荨荩剋荪茹荬荮
 97 | 柰栉柯柘栊柩枰栌柙枵柚枳柞柝栀柢栎枸柈柁枷柽剌酊郦甭砗砘砒斫砭砜奎耷虺殂殇殄殆轱轲轳轶轸虿毖觇尜哐
 98 | 眄眍郢眇眊眈禺哂咴曷昴昱昵咦哓哔畎毗呲胄畋畈虼虻盅咣哕剐郧咻囿咿哌哙哚咯咩咤哝哏哞峙峣罘帧峒峤峋峥
 99 | 贶钚钛钡钣钤钨钫钯氡氟牯郜秕秭竽笈笃俦俨俅俪叟垡牮俣俚皈俑俟逅徇徉舢俞郗俎郤爰郛瓴胨胪胛胂胙胍胗胝
100 | 朐胫鸨匍狨狯飑狩狲訇逄昝饷饸饹胤孪娈弈奕庥疬疣疥疭庠竑彦飒闼闾闿阂羑迸籼酋炳炻炽炯烀炷烃洱洹洧洌浃
101 | 洇洄洙涎洎洫浍洮洵浒浔浕洳恸恓恹恫恺恻恂恪恽宥扃衲衽衿袂祛祜祓祚诮祗祢诰诳鸩昶郡咫弭牁胥陛陟娅姮娆
102 | 姝姣姘姹怼羿炱矜绔骁骅绗绛骈耖挈珥珙顼珰珩珧珣珞琤珲敖恚埔埕埘埙埚挹耆耄埒捋贽垸捃盍荸莆莳莴莪莠莓
103 | 莜莅荼莩荽莸荻莘莎莞莨鸪莼栲栳郴桓桡桎桢桤梃栝桕桁桧桅栟桉栩逑逋彧鬲豇酐逦厝孬砝砹砺砧砷砟砼砥砣剞
104 | 砻轼轾辂鸫趸龀鸬虔逍眬唛晟眩眙哧哽唔晁晏鸮趵趿畛蚨蚜蚍蚋蚬蚝蚧唢圄唣唏盎唑崂崃罡罟峪觊赅钰钲钴钵钹
105 | 钺钽钼钿铀铂铄铆铈铉铊铋铌铍䥽铎氩氤氦毪舐秣秫盉笄笕笊笏笆俸倩俵偌俳俶倬倏恁倭倪俾倜隼隽倌倥臬皋郫
106 | 倨衄颀徕舫釜奚衾胯胱胴胭脍胼朕脒胺鸱玺鸲狷猁狳猃狺逖桀袅饽凇栾挛亳疳疴疸疽痈疱痂痉衮凋颃恣旆旄旃阃
107 | 阄訚阆恙粑朔郸烜烨烩烊剡郯烬涑浯涞涟娑涅涠浞涓浥涔浜浠浣浚悚悭悝悒悌悛宸窈剜诹冢诼袒袢祯诿谀谂谄谇
108 | 屐屙陬勐奘牂蚩陲姬娠娌娉娲娩娴娣娓婀畚逡绠骊绡骋绥绦绨骎邕鸶彗耜焘舂琏琇麸揶埴埯捯掳掴埸埵赧埤捭逵
109 | 埝堋堍掬鸷掖捽掊堉掸捩掮悫埭埽掇掼聃菁萁菘堇萘萋菽菖萜萸萑棻菔菟萏萃菏菹菪菅菀萦菰菡梵梿梏觋桴桷梓
110 | 棁桫棂啬郾匮敕豉鄄酞酚戛硎硭硒硖硗硐硇硌鸸瓠匏厩龚殒殓殍赉雩辄堑眭眦啧晡晤眺眵眸圊喏喵啉勖晞唵晗冕
111 | 啭畦趺啮跄蚶蛄蛎蛆蚰蛊圉蚱蛉蛏蚴啁啕唿啐唼唷啖啵啶啷唳唰啜帻崚崦帼崮崤崆赇赈赊铑铒铗铙铟铠铡铢铣铤
112 | 铧铨铩铪铫铬铮铯铰铱铳铵铷氪牾鸹秾逶笺筇笸笪笮笠笥笤笳笾笞偾偃偕偈傀偬偻皑皎鸻徜舸舻舴舷龛翎脬脘脲
113 | 匐猗猡猞猝斛猕馗馃馄鸾孰庹庾痔痍疵翊旌旎袤阇阈阉阊阋阍阏羟粝粕敝焐烯焓烽焖烷焗渍渚淇淅淞渎涿淖挲淠
114 | 涸渑淦淝淬涪淙涫渌淄惬悻悱惝惘悸惆惚惇惮窕谌谏扈皲谑裆袷裉谒谔谕谖谗谙谛谝逯郿隈粜隍隗婧婊婕娼婢婵
115 | 胬袈翌恿欸绫骐绮绯绱骒绲骓绶绺绻绾骖缁耠琫琵琶琪瑛琦琥琨靓琰琮琯琬琛琚辇鼋揳堞搽揸揠堙趄揖颉塄揿耋
116 | 揄蛩蛰塆摒揆掾聒葑葚靰靸葳葺葸萼葆葩葶蒌萱戟葭楮棼椟棹椤棰赍椋椁椪棣椐鹁覃酤酢酡鹂厥殚殛雯雱辊辋椠
117 | 辍辎斐睄睑睇睃戢喋嗒喃喱喹晷喈跖跗跞跚跎跏跆蛱蛲蛭蛳蛐蛔蛞蛴蛟蛘喁喟啾嗖喑嗟喽嗞喀喔喙嵘嵖崴遄詈嵎
118 | 崽嵬嵛嵯嵝嵫幄嵋赕铻铼铿锃锂锆锇锉锏锑锒锔锕掣矬氰毳毽犊犄犋鹄犍嵇黍稃稂筚筵筌傣傈舄牍傥傧遑傩遁徨
119 | 媭畲弑颌翕釉鹆舜貂腈腌腓腆腴腑腚腱鱿鲀鲂颍猢猹猥飓觞觚猱颎飧馇馊亵脔裒痣痨痦痞痤痫痧赓竦瓿啻颏鹇阑
120 | 阒阕粞遒孳焯焜焙焱鹈湛渫湮湎湜渭湍湫溲湟溆湲湔湉渥湄滁愠惺愦惴愀愎愔喾寐谟扉裢裎裥祾祺谠幂谡谥谧遐
121 | 孱弼巽骘媪媛婷巯翚皴婺骛缂缃缄彘缇缈缌缑缒缗飨耢瑚瑁瑜瑗瑄瑕遨骜韫髡塬鄢趔趑摅摁蜇搋搪搐搛搠摈彀毂
122 | 搦搡蓁戡蓍鄞靳蓐蓦鹋蒽蓓蓖蓊蒯蓟蓑蒿蒺蓠蒟蒡蒹蒴蒗蓥颐楔楠楂楝楫楸椴槌楯皙榈槎榉楦楣楹椽裘剽甄酮酰
123 | 酯酩蜃碛碓硼碉碚碇碜鹌辏龃龅訾粲虞睚嗪韪嗷嗉睨睢雎睥嘟嗑嗫嗬嗔嗝戥嗄煦暄遢暌跬跶跸跐跣跹跻蛸蜊蜍蜉
124 | 蜣畹蛹嗣嗯嗥嗲嗳嗌嗍嗨嗐嗤嗵罨嵊嵩嵴骰锗锛锜锝锞锟锢锨锩锭锱雉氲犏歃稞稗稔筠筢筮筲筱牒煲敫徭愆艄觎
125 | 毹貊貅貉颔腠腩腼腭腧塍媵詹鲅鲆鲇鲈稣鲋鲐肄鹐飕觥遛馐鹑亶瘃痱痼痿瘐瘁瘆麂裔歆旒雍阖阗阙羧豢粳猷煳煜
126 | 煨煅煊煸煺滟溱溘漭滢溥溧溽裟溻溷滗滫溴滏滃滦溏滂滓溟滪愫慑慊鲎骞窦窠窣裱褚裨裾裰禊谩谪媾嫫媲嫒嫔媸
127 | 缙缜缛辔骝缟缡缢缣骟耥璈瑶瑭獒觏慝嫠韬叆髦摽墁撂摞撄翥踅摭墉墒榖綦蔫蔷靺靼鞅靿甍蔸蔟蔺戬蕖蔻蓿斡鹕
128 | 蓼榛榧榻榫榭槔榱槁槟槠榷僰酽酶酹厮碡碴碣碲磋臧豨殡霆霁辕蜚裴翡龇龈睿䁖睽嘞嘈嘌嘁嘎暧暝踌踉蜞蜥蜮蝈
129 | 蜴蜱蜩蜷蜿螂蜢嘘嘡鹗嘣嘤嘚嗾嘧罴罱幔嶂幛赙罂骷骶鹘锲锴锶锷锸锵镁镂犒箐箦箧箍箸箬箅箪箔箜箢箓毓僖儆
130 | 僳僭劁僮魃魆睾艋鄱膈膑鲑鲔鲚鲛鲟獐觫雒夤馑銮塾麽瘌瘊瘘瘙廖韶旖膂阚鄯鲞粿粼粽糁槊鹚熘熥潢漕滹漯漶潋
131 | 潴漪漉漳漩澉潍慵搴窨寤綮谮褡褙褓褛褊谯谰谲暨屣鹛嫣嫱嫖嫦嫚嫘嫡鼐翟瞀鹜骠缥缦缧缨骢缪缫耦耧瑾璜璀璎
132 | 璁璋璇奭髯髫撷撅赭撸鋆撙撺墀聩觐鞑蕙鞒蕈蕨蕤蕞蕺瞢蕃蕲赜槿樯槭樗樘樊槲醌醅靥魇餍磔磙霈辘龉龊觑瞌瞋
133 | 瞑嘭噎噶颙暹噘踔踝踟踒踬踮踯踺踞蝽蝾蝻蝰蝮螋蝓蝣蝼噗嘬颚噍噢噙噜噌噔颛幞幡嶙嶝骺骼骸镊镉镌镍镏镒镓
134 | 镔稷箴篑篁篌篆牖儋徵磐虢鹞膘滕鲠鲡鲢鲣鲥鲧鲩獗獠觯馓馔麾廛瘛瘼瘢瘠齑羯羰遴糌糍糅熜熵熠澍澌潸潦潲鋈
135 | 潟潼潺憬憧寮窳谳褴褟褫谵熨屦嬉勰戮蝥缬缮缯骣畿耩耨耪璞璟靛璠璘聱螯髻髭髹擀熹甏擞縠磬颞蕻鞘颟薤薨檠
136 | 薏薮薜薅樾橛橇樵檎橹樽樨橼墼橐翮醛醐醍醚磲赝飙殪霖霏霓錾辚臻遽氅瞟瞠瞰嚄嚆噤暾蹀踹踵踽蹉蹁螨蟒螈螅
137 | 螭螠螟噱噬噫噻噼罹圜䦃镖镗镘镚镛镝镞镠氇氆憩穑篝篥篦篪篙盥劓翱魉魈徼歙膳膦膙鲮鲱鲲鲳鲴鲵鲷鲻獴獭獬
138 | 邂鹧廨赟瘰廪瘿瘵瘴癃瘳斓麇麈嬴壅羲糗瞥甑燎燠燔燧濑濉潞澧澹澥澶濂褰寰窸褶禧嬖犟隰嬗颡缱缲缳璨璩璐璪
139 | 螫擤壕觳罄擢薹鞡鞬薷薰藓藁檄檩懋醢翳礅磴鹩龋龌豳壑黻嚏嚅蹑蹒蹊蟥螬螵疃螳蟑嚓羁罽罾嶷黜黝髁髀镡镢镣
140 | 镦镧镩镪镫罅黏簌篾篼簖簋鼢黛儡鹪鼾皤魍龠繇貘邈貔臌膻臆臃鲼鲽鳀鳃鳅鳇鳊螽燮鹫襄糜縻膺癍麋懑濡濮濞濠
141 | 濯蹇謇邃襁檗擘孺隳嬷蟊鹬鍪鏊鳌鬈鬃瞽鞯鞨鞫鞧鞣藜藠藩醪蹙礓燹餮瞿曛颢曜躇蹚鹭蟛蟪蟠蟮鹮黠黟髅髂镬镭
142 | 镯馥簟簪鼬雠艟鳎鳏鳐癞癔癜癖糨蹩鎏懵彝邋鬏攉攒鞲鞴藿蘧蘅麓醮醯酃霪霭霨黼嚯蹰蹶蹽蹼蹴蹾蹿蠖蠓蟾蠊黢
143 | 髋髌镲籀籁齁魑艨鳓鳔鳕鳗鳙麒鏖羸㸆瀚瀣瀛襦谶襞骥缵瓒攘蘩蘖醴霰酆矍曦躅鼍巉黩黥黪镳镴黧纂璺鼯臜鳜鳝
144 | 鳟獾孀骧瓘鼙醺礴颦曩鳢癫麝夔爝灏禳鐾羼蠡耱懿蘸鹳霾氍饕躐髑镵穰饔鬻鬟趱攫攥颧躜鼹癯麟蠲蠹躞衢鑫灞襻
145 | 纛鬣攮囔馕戆爨齉
146 | 
147 | 壹 贰 叁 肆 伍 陆 柒 捌 玖 零 拾 佰 仟 万 亿 圆
148 | 
149 | '''
150 | 
151 | data = trim_string(data)
152 | 
153 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/chi_tra.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from deep_ocr.utils import trim_string
  4 | 
  5 | # I take characters from http://hanyu.iciba.com/zt/3500.html
  6 | data = u'''
  7 | 一 乙
  8 | 
  9 | 二 十 丁 廠 七 蔔 人 入 八 九 幾 兒 了 力 乃 刀 又 
 10 | 
 11 | 三 於 幹 虧 士 工 土 才 寸 下 大 丈 與 萬 上 小 口 巾 山 千 乞 川 億 個 勺 久 凡 及 夕 丸 麽 廣 
 12 | 亡 門 義 之 屍 弓 己 已 子 衛 也 女 飛 刃 習 叉 馬 鄉 
 13 | 
 14 | 豐 王 井 開 夫 天 無 元 專 雲 紮 藝 木 五 支 廳 不 太 犬 區 歷 尤 友 匹 車 巨 牙 屯 比 互 切 瓦 
 15 | 止 少 日 中 岡 貝 內 水 見 午 牛 手 毛 氣 升 長 仁 什 片 仆 化 仇 幣 仍 僅 斤 爪 反 介 父 從 今 
 16 | 兇 分 乏 公 倉 月 氏 勿 欠 風 丹 勻 烏 鳳 勾 文 六 方 火 為 鬥 憶 訂 計 戶 認 心 尺 引 醜 巴 孔 
 17 | 隊 辦 以 允 予 勸 雙 書 幻 
 18 | 
 19 | 玉 刊 示 末 未 擊 打 巧 正 撲 扒 功 扔 去 甘 世 古 節 本 術 可 丙 左 厲 右 石 布 龍 平 滅 軋 東 
 20 | 卡 北 占 業 舊 帥 歸 且 旦 目 葉 甲 申 叮 電 號 田 由 史 只 央 兄 叼 叫 另 叨 嘆 四 生 失 禾 丘 
 21 | 付 仗 代 仙 們 儀 白 仔 他 斥 瓜 乎 叢 令 用 甩 印 樂 句 匆 冊 犯 外 處 冬 鳥 務 包 饑 主 市 立 
 22 | 閃 蘭 半 汁 匯 頭 漢 寧 穴 它 討 寫 讓 禮 訓 必 議 訊 記 永 司 尼 民 出 遼 奶 奴 加 召 皮 邊 發 
 23 | 孕 聖 對 臺 矛 糾 母 幼 絲 
 24 | 
 25 | 式 刑 動 扛 寺 吉 扣 考 托 老 執 鞏 圾 擴 掃 地 揚 場 耳 共 芒 亞 芝 朽 樸 機 權 過 臣 再 協 西 
 26 | 壓 厭 在 有 百 存 而 頁 匠 誇 奪 灰 達 列 死 成 夾 軌 邪 劃 邁 畢 至 此 貞 師 塵 尖 劣 光 當 早 
 27 | 吐 嚇 蟲 曲 團 同 吊 吃 因 吸 嗎 嶼 帆 歲 回 豈 剛 則 肉 網 年 朱 先 丟 舌 竹 遷 喬 偉 傳 乒 乓 
 28 | 休 伍 伏 優 伐 延 件 任 傷 價 份 華 仰 仿 夥 偽 自 血 向 似 後 行 舟 全 會 殺 合 兆 企 眾 爺 傘 
 29 | 創 肌 朵 雜 危 旬 旨 負 各 名 多 爭 色 壯 沖 冰 莊 慶 亦 劉 齊 交 次 衣 產 決 充 妄 閉 問 闖 羊 
 30 | 並 關 米 燈 州 汗 汙 江 池 湯 忙 興 宇 守 宅 字 安 講 軍 許 論 農 諷 設 訪 尋 那 迅 盡 導 異 孫 
 31 | 陣 陽 收 階 陰 防 奸 如 婦 好 她 媽 戲 羽 觀 歡 買 紅 纖 級 約 紀 馳 巡 
 32 | 
 33 | 壽 弄 麥 形 進 戒 吞 遠 違 運 扶 撫 壇 技 壞 擾 拒 找 批 扯 址 走 抄 壩 貢 攻 赤 折 抓 扮 搶 孝 
 34 | 均 拋 投 墳 抗 坑 坊 抖 護 殼 誌 扭 塊 聲 把 報 卻 劫 芽 花 芹 芬 蒼 芳 嚴 蘆 勞 克 蘇 桿 杠 杜 
 35 | 材 村 杏 極 李 楊 求 更 束 豆 兩 麗 醫 辰 勵 否 還 殲 來 連 步 堅 旱 盯 呈 時 吳 助 縣 裏 呆 園 
 36 | 曠 圍 呀 噸 足 郵 男 困 吵 串 員 聽 吩 吹 嗚 吧 吼 別 崗 帳 財 針 釘 告 我 亂 利 禿 秀 私 每 兵 
 37 | 估 體 何 但 伸 作 伯 伶 傭 低 你 住 位 伴 身 皂 佛 近 徹 役 返 余 希 坐 谷 妥 含 鄰 岔 肝 肚 腸 
 38 | 龜 免 狂 猶 角 刪 條 卵 島 迎 飯 飲 系 言 凍 狀 畝 況 床 庫 療 應 冷 這 序 辛 棄 冶 忘 閑 間 悶 
 39 | 判 竈 燦 弟 汪 沙 汽 沃 泛 溝 沒 沈 沈 懷 憂 快 完 宋 宏 牢 究 窮 災 良 證 啟 評 補 初 社 識 訴 
 40 | 診 詞 譯 君 靈 即 層 尿 尾 遲 局 改 張 忌 際 陸 阿 陳 阻 附 妙 妖 妨 努 忍 勁 雞 驅 純 紗 納 綱 
 41 | 駁 縱 紛 紙 紋 紡 驢 紐 
 42 | 
 43 | 奉 玩 環 武 青 責 現 表 規 抹 攏 拔 揀 擔 坦 押 抽 拐 拖 拍 者 頂 拆 擁 抵 拘 勢 抱 垃 拉 攔 拌 
 44 | 幸 招 坡 披 撥 擇 擡 其 取 苦 若 茂 蘋 苗 英 範 直 茄 莖 茅 林 枝 杯 櫃 析 板 松 槍 構 傑 述 枕 
 45 | 喪 或 畫 臥 事 刺 棗 雨 賣 礦 碼 廁 奔 奇 奮 態 歐 壟 妻 轟 頃 轉 斬 輪 軟 到 非 叔 肯 齒 些 虎 
 46 | 虜 腎 賢 尚 旺 具 果 味 昆 國 昌 暢 明 易 昂 典 固 忠 咐 呼 鳴 詠 呢 岸 巖 帖 羅 幟 嶺 凱 敗 販 
 47 | 購 圖 釣 制 知 垂 牧 物 乖 刮 稈 和 季 委 佳 侍 供 使 例 版 侄 偵 側 憑 僑 佩 貨 依 的 迫 質 欣 
 48 | 征 往 爬 彼 徑 所 舍 金 命 斧 爸 采 受 乳 貪 念 貧 膚 肺 肢 腫 脹 朋 股 肥 服 脅 周 昏 魚 兔 狐 
 49 | 忽 狗 備 飾 飽 飼 變 京 享 店 夜 廟 府 底 劑 郊 廢 凈 盲 放 刻 育 閘 鬧 鄭 券 卷 單 炒 炊 炕 炎 
 50 | 爐 沫 淺 法 泄 河 沾 淚 油 泊 沿 泡 註 瀉 泳 泥 沸 波 潑 澤 治 怖 性 怕 憐 怪 學 寶 宗 定 宜 審 
 51 | 宙 官 空 簾 實 試 郎 詩 肩 房 誠 襯 衫 視 話 誕 詢 該 詳 建 肅 錄 隸 居 屆 刷 屈 弦 承 孟 孤 陜 
 52 | 降 限 妹 姑 姐 姓 始 駕 參 艱 線 練 組 細 駛 織 終 駐 駝 紹 經 貫 
 53 | 
 54 | 奏 春 幫 珍 玻 毒 型 掛 封 持 項 垮 挎 城 撓 政 赴 趙 擋 挺 括 拴 拾 挑 指 墊 掙 擠 拼 挖 按 揮 
 55 | 挪 某 甚 革 薦 巷 帶 草 繭 茶 荒 茫 蕩 榮 故 胡 南 藥 標 枯 柄 棟 相 查 柏 柳 柱 柿 欄 樹 要 鹹 
 56 | 威 歪 研 磚 厘 厚 砌 砍 面 耐 耍 牽 殘 殃 輕 鴉 皆 背 戰 點 臨 覽 豎 省 削 嘗 是 盼 眨 哄 顯 啞 
 57 | 冒 映 星 昨 畏 趴 胃 貴 界 虹 蝦 蟻 思 螞 雖 品 咽 罵 嘩 咱 響 哈 咬 咳 哪 炭 峽 罰 賤 貼 骨 鈔 
 58 | 鐘 鋼 鑰 鉤 卸 缸 拜 看 矩 怎 牲 選 適 秒 香 種 秋 科 重 復 竿 段 便 倆 貸 順 修 保 促 侮 儉 俗 
 59 | 俘 信 皇 泉 鬼 侵 追 俊 盾 待 律 很 須 敘 劍 逃 食 盆 膽 勝 胞 胖 脈 勉 狹 獅 獨 狡 獄 狠 貿 怨 
 60 | 急 饒 蝕 餃 餅 彎 將 獎 哀 亭 亮 度 跡 庭 瘡 瘋 疫 疤 姿 親 音 帝 施 聞 閥 閣 差 養 美 姜 叛 送 
 61 | 類 迷 前 首 逆 總 煉 炸 炮 爛 剃 潔 洪 灑 澆 濁 洞 測 洗 活 派 洽 染 濟 洋 洲 渾 濃 津 恒 恢 恰 
 62 | 惱 恨 舉 覺 宣 室 宮 憲 突 穿 竊 客 冠 語 扁 襖 祖 神 祝 誤 誘 說 誦 墾 退 既 屋 晝 費 陡 眉 孩 
 63 | 除 險 院 娃 姥 姨 姻 嬌 怒 架 賀 盈 勇 怠 柔 壘 綁 絨 結 繞 驕 繪 給 絡 駱 絕 絞 統 
 64 | 
 65 | 耕 耗 艷 泰 珠 班 素 蠶 頑 盞 匪 撈 栽 捕 振 載 趕 起 鹽 捎 捏 埋 捉 捆 捐 損 都 哲 逝 撿 換 挽 
 66 | 熱 恐 壺 挨 恥 耽 恭 蓮 莫 荷 獲 晉 惡 真 框 桂 檔 桐 株 橋 桃 格 校 核 樣 根 索 哥 速 逗 栗 配 
 67 | 翅 辱 唇 夏 礎 破 原 套 逐 烈 殊 顧 轎 較 頓 斃 致 柴 桌 慮 監 緊 黨 曬 眠 曉 鴨 晃 晌 暈 蚊 哨 
 68 | 哭 恩 喚 啊 唉 罷 峰 圓 賊 賄 錢 鉗 鉆 鐵 鈴 鉛 缺 氧 特 犧 造 乘 敵 秤 租 積 秧 秩 稱 秘 透 筆 
 69 | 笑 筍 債 借 值 倚 傾 倒 倘 俱 倡 候 俯 倍 倦 健 臭 射 躬 息 徒 徐 艦 艙 般 航 途 拿 爹 愛 頌 翁 
 70 | 脆 脂 胸 胳 臟 膠 腦 貍 狼 逢 留 皺 餓 戀 槳 漿 衰 高 席 準 座 脊 癥 病 疾 疼 疲 效 離 唐 資 涼 
 71 | 站 剖 競 部 旁 旅 畜 閱 羞 瓶 拳 粉 料 益 兼 烤 烘 煩 燒 燭 煙 遞 濤 浙 澇 酒 涉 消 浩 海 塗 浴 
 72 | 浮 流 潤 浪 浸 漲 燙 湧 悟 悄 悔 悅 害 寬 家 宵 宴 賓 窄 容 宰 案 請 朗 諸 讀 扇 襪 袖 袍 被 祥 
 73 | 課 誰 調 冤 諒 談 誼 剝 懇 展 劇 屑 弱 陵 陶 陷 陪 娛 娘 通 能 難 預 桑 絹 繡 驗 繼 
 74 | 
 75 | 球 理 捧 堵 描 域 掩 捷 排 掉 堆 推 掀 授 教 掏 掠 培 接 控 探 據 掘 職 基 著 勒 黃 萌 蘿 菌 菜 
 76 | 萄 菊 萍 菠 營 械 夢 梢 梅 檢 梳 梯 桶 救 副 票 戚 爽 聾 襲 盛 雪 輔 輛 虛 雀 堂 常 匙 晨 睜 瞇 
 77 | 眼 懸 野 啦 晚 啄 距 躍 略 蛇 累 唱 患 唯 崖 嶄 崇 圈 銅 鏟 銀 甜 梨 犁 移 笨 籠 笛 符 第 敏 做 
 78 | 袋 悠 償 偶 偷 您 售 停 偏 假 得 銜 盤 船 斜 盒 鴿 悉 欲 彩 領 腳 脖 臉 脫 象 夠 猜 豬 獵 貓 猛 
 79 | 餡 館 湊 減 毫 麻 癢 痕 廊 康 庸 鹿 盜 章 竟 商 族 旋 望 率 著 蓋 粘 粗 粒 斷 剪 獸 清 添 淋 淹 
 80 | 渠 漸 混 漁 淘 液 淡 深 婆 梁 滲 情 惜 慚 悼 懼 惕 驚 慘 慣 寇 寄 宿 窯 密 謀 謊 禍 謎 逮 敢 屠 
 81 | 彈 隨 蛋 隆 隱 婚 嬸 頸 績 緒 續 騎 繩 維 綿 綢 綠 
 82 | 
 83 | 琴 斑 替 款 堪 搭 塔 越 趁 趨 超 提 堤 博 揭 喜 插 揪 搜 煮 援 裁 擱 摟 攪 握 揉 斯 期 欺 聯 散 
 84 | 惹 葬 葛 董 葡 敬 蔥 落 朝 辜 葵 棒 棋 植 森 椅 椒 棵 棍 棉 棚 棕 惠 惑 逼 廚 廈 硬 確 雁 殖 裂 
 85 | 雄 暫 雅 輩 悲 紫 輝 敞 賞 掌 晴 暑 最 量 噴 晶 喇 遇 喊 景 踐 跌 跑 遺 蛙 蛛 蜓 喝 餵 喘 喉 幅 
 86 | 帽 賭 賠 黑 鑄 鋪 鏈 銷 鎖 鋤 鍋 銹 鋒 銳 短 智 毯 鵝 剩 稍 程 稀 稅 筐 等 築 策 篩 筒 答 筋 箏 
 87 | 傲 傅 牌 堡 集 焦 傍 儲 奧 街 懲 禦 循 艇 舒 番 釋 禽 臘 脾 腔 魯 猾 猴 然 饞 裝 蠻 就 痛 童 闊 
 88 | 善 羨 普 糞 尊 道 曾 焰 港 湖 渣 濕 溫 渴 滑 灣 渡 遊 滋 溉 憤 慌 惰 愧 愉 慨 割 寒 富 竄 窩 窗 
 89 | 遍 裕 褲 裙 謝 謠 謙 屬 屢 強 粥 疏 隔 隙 絮 嫂 登 緞 緩 編 騙 緣 
 90 | 
 91 | 瑞 魂 肆 攝 摸 填 搏 塌 鼓 擺 攜 搬 搖 搞 塘 攤 蒜 勤 鵲 藍 墓 幕 蓬 蓄 蒙 蒸 獻 禁 楚 想 槐 榆
 92 | 樓 概 賴 酬 感 礙 碑 碎 碰 碗 碌 雷 零 霧 雹 輸 督 齡 鑒 睛 睡 睬 鄙 愚 暖 盟 歇 暗 照 跨 跳 跪
 93 | 路 跟 遣 蛾 蜂 嗓 置 罪 罩 錯 錫 鑼 錘 錦 鍵 鋸 矮 辭 稠 愁 籌 簽 簡 毀 舅 鼠 催 傻 像 躲 微 愈
 94 | 遙 腰 腥 腹 騰 腿 觸 解 醬 痰 廉 新 韻 意 糧 數 煎 塑 慈 煤 煌 滿 漠 源 濾 濫 滔 溪 溜 滾 濱 粱
 95 | 灘 慎 譽 塞 謹 福 群 殿 辟 障 嫌 嫁 疊 縫 纏
 96 | 
 97 | 靜 碧 璃 墻 撇 嘉 摧 截 誓 境 摘 摔 聚 蔽 慕 暮 蔑 模 榴 榜 榨 歌 遭 酷 釀 酸 磁 願 需 弊 裳 顆
 98 | 嗽 蜻 蠟 蠅 蜘 賺 鍬 鍛 舞 穩 算 籮 管 僚 鼻 魄 貌 膜 膊 膀 鮮 疑 饅 裹 敲 豪 膏 遮 腐 瘦 辣 竭
 99 | 端 旗 精 歉 熄 熔 漆 漂 漫 滴 演 漏 慢 寨 賽 察 蜜 譜 嫩 翠 熊 凳 騾 縮
100 | 
101 | 慧 撕 撒 趣 趟 撐 播 撞 撤 增 聰 鞋 蕉 蔬 橫 槽 櫻 橡 飄 醋 醉 震 黴 瞞 題 暴 瞎 影 踢 踏 踩 蹤
102 | 蝶 蝴 囑 墨 鎮 靠 稻 黎 稿 稼 箱 箭 篇 僵 躺 僻 德 艘 膝 膛 熟 摩 顏 毅 糊 遵 潛 潮 懂 額 慰 劈
103 | 
104 | 操 燕 薯 薪 薄 顛 橘 整 融 醒 餐 嘴 蹄 器 贈 默 鏡 贊 籃 邀 衡 膨 雕 磨 凝 辨 辯 糖 糕 燃 澡 激
105 | 懶 壁 避 繳
106 | 
107 | 戴 擦 鞠 藏 霜 霞 瞧 蹈 螺 穗 繁 辮 贏 糟 糠 燥 臂 翼 驟
108 | 
109 | 鞭 覆 蹦 鐮 翻 鷹
110 | 
111 | 警 攀 蹲 顫 瓣 爆 疆
112 | 
113 | 壤 耀 躁 嚼 嚷 籍 魔 灌
114 | 
115 | 蠢 霸 露
116 | 
117 | 囊
118 | 
119 | 罐 
120 | '''
121 | 
122 | data = trim_string(data)
123 | 
124 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/digits.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.utils import trim_string
 4 | 
 5 | data = u'''
 6 | 0123456789
 7 | '''
 8 | 
 9 | data = trim_string(data)
10 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/eng.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.utils import trim_string
 4 | 
 5 | data = u'''
 6 | abcdefghijklmnopqrstuvwxyz
 7 | ABCDEFGHIJKLMNOPQRSTUVWXYZ
 8 | '''
 9 | 
10 | data = trim_string(data)
11 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/id_num.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from deep_ocr.utils import trim_string
4 | 
5 | data = u'''
6 | 0123456789X
7 | '''
8 | 
9 | data = trim_string(data)


--------------------------------------------------------------------------------
/python/deep_ocr/langs/lower_eng.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.utils import trim_string
 4 | 
 5 | data = u'''
 6 | abcdefghijklmnopqrstuvwxyz
 7 | '''
 8 | 
 9 | data = trim_string(data)
10 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.utils import trim_string
 4 | 
 5 | data = u'''
 6 | 一二三四
 7 | '''
 8 | 
 9 | data = trim_string(data)
10 | 


--------------------------------------------------------------------------------
/python/deep_ocr/langs/upper_eng.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from deep_ocr.utils import trim_string
 4 | 
 5 | data = u'''
 6 | abcdefghijklmnopqrstuvwxyz
 7 | ABCDEFGHIJKLMNOPQRSTUVWXYZ
 8 | '''
 9 | 
10 | data = trim_string(data)
11 | 


--------------------------------------------------------------------------------
/python/deep_ocr/reco_text_line.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import cv2
  5 | from deep_ocr.cv2_img_proc import PreprocessCropZeros
  6 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatioFillBG
  7 | from deep_ocr.utils import extract_peek_ranges_from_array
  8 | from deep_ocr.utils import merge_peek_ranges_mini_non_digits
  9 | 
 10 | 
 11 | class RectImageClassifier(object):
 12 |     def __init__(self, caffe_cls, bin_image, char_set,
 13 |                  caffe_cls_width=64, caffe_cls_height=64):
 14 |         self.caffe_cls = caffe_cls
 15 |         self.bin_image = bin_image
 16 |         self.cache_res = {}
 17 |         self.char_set = char_set
 18 |         self.caffe_cls_width = caffe_cls_width
 19 |         self.caffe_cls_height = caffe_cls_height
 20 | 
 21 |     def _do(self, rects, boundary):
 22 |         rects_to_reco = []
 23 |         for rect in rects:
 24 |             key = (rect, boundary)
 25 |             if key not in self.cache_res:
 26 |                 rects_to_reco.append(rect)
 27 |         image = self.bin_image
 28 |         char_imgs = []
 29 |         crop_zeros = PreprocessCropZeros()
 30 |         resize_keep_ratio = PreprocessResizeKeepRatioFillBG(
 31 |             self.caffe_cls_width, self.caffe_cls_height,
 32 |             fill_bg=False, margin=4)
 33 |         for i, rect in enumerate(rects_to_reco):
 34 |             x, y, w, h = rect
 35 |             char_img = image[y:y+h+1, x:x+w+1]
 36 |             char_img = crop_zeros.do(char_img)
 37 |             char_img = resize_keep_ratio.do(char_img)
 38 |             char_imgs.append(char_img)
 39 |         np_char_imgs = np.asarray(char_imgs)
 40 |         output_tag_to_max_proba = self.caffe_cls.predict_cv2_imgs(np_char_imgs)
 41 |         for i, item in enumerate(output_tag_to_max_proba):
 42 |             cur_rect = rects_to_reco[i]
 43 |             key = (cur_rect, boundary)
 44 |             if len(self.char_set["set"]) > 0:
 45 |                 for char_p in item:
 46 |                     if char_p[0] in self.char_set["set"]:
 47 |                         self.cache_res[key] = char_p
 48 |                         break
 49 |             else:
 50 |                 self.cache_res[key] = item[0]
 51 | 
 52 |     def do(self, rects, boundary):
 53 |         self._do(rects, boundary)
 54 |         ocr_res = ""
 55 |         for rect in rects:
 56 |             key = (rect, boundary)
 57 |             ocr_res += self.cache_res[key][0]
 58 |         return ocr_res
 59 | 
 60 |     def do_images_maxproba(self, rects, boundaries, bin_images):
 61 |         size = len(boundaries)
 62 |         ## generate cache
 63 |         for i in range(size):
 64 |             boundary = boundaries[i]
 65 |             bin_image = bin_images[i]
 66 |             self.bin_image = bin_image
 67 |             self._do(rects, boundary)
 68 | 
 69 |         mat_proba = []
 70 |         for rect in rects:
 71 |             row_probabilities = []
 72 |             for i in range(size):
 73 |                 boundary = boundaries[i]
 74 |                 key = (rect, boundary)
 75 |                 row_probabilities.append(self.cache_res[key])
 76 |             mat_proba.append(row_probabilities)
 77 | 
 78 |         ocr_res = ""
 79 |         n = len(mat_proba)
 80 |         for i in range(n):
 81 |             mat_proba[i] = sorted(mat_proba[i], key=lambda x: -x[1])
 82 |             ocr_res += mat_proba[i][0][0]
 83 |         return ocr_res
 84 | 
 85 | 
 86 | class RecoTextLine(object):
 87 |     def __init__(self, rect_img_clf,
 88 |                  char_set=None,
 89 |                  debug_path=None):
 90 |         self.char_set = char_set
 91 |         self.debug_path = debug_path
 92 |         self.rect_img_clf = rect_img_clf
 93 | 
 94 |     def convert_peek_ranges_into_rects(self, peek_ranges, line_rect):
 95 |         base_x, base_y, base_w, base_h = line_rect
 96 |         rects = []
 97 |         for peek_range in peek_ranges:
 98 |             x = base_x + peek_range[0]
 99 |             y = base_y
100 |             w = peek_range[1] - peek_range[0]
101 |             h = base_h
102 |             rect = (x, y, w, h)
103 |             rects.append(rect)
104 |         return rects
105 | 
106 |     def do(self, boundary2binimgs, line_rect, caffe_cls):
107 |         boundaries, bin_images = [], []
108 |         for boundary, bin_image in boundary2binimgs:
109 |             boundaries.append(boundary)
110 |             bin_images.append(bin_image)
111 |         
112 |         bin_image = bin_images[-1]
113 |         self.rect_img_clf.caffe_cls = caffe_cls
114 |         self.rect_img_clf.bin_image = None
115 |         x, y, w, h = line_rect
116 |         page_w = bin_image.shape[1]
117 |         img_line = bin_image[y: y + h, x: x + w]
118 |         char_w = page_w * self.char_set["width"]
119 |         ocr_res = None
120 |         ## first segmentation
121 |         vertical_sum = np.sum(img_line, axis=0)
122 |         peek_ranges = extract_peek_ranges_from_array(vertical_sum, minimun_val=10, minimun_range=2)
123 | 
124 |         rects = self.convert_peek_ranges_into_rects(peek_ranges, line_rect)
125 |         self.rect_img_clf.char_set = self.char_set
126 |         ocr_res = self.rect_img_clf.do_images_maxproba(rects, boundaries, bin_images)
127 |         if ocr_res is not None:
128 |             print("before merge..")
129 |             #print(ocr_res.encode("utf-8"))
130 |             print(ocr_res)
131 |             peek_ranges = merge_peek_ranges_mini_non_digits(peek_ranges, char_w, ocr_res)
132 |             rects = self.convert_peek_ranges_into_rects(peek_ranges, line_rect)
133 |             ocr_res = self.rect_img_clf.do_images_maxproba(rects, boundaries, bin_images)
134 |             print("after merge...")
135 |             #print(ocr_res.encode("utf-8"))
136 |             print(ocr_res)
137 | 
138 | #        ## end end segmenetation
139 | #        if self.debug_path is not None:
140 | #            path_debug_image_line = self.debug_path+"_line.jpg"
141 | #            debug_img_line = np.copy(bin_image)
142 | #            for rect in rects:
143 | #                x = rect[0]
144 | #                y = rect[1]
145 | #                w = rect[2]
146 | #                h = rect[3]
147 | #                cv2.rectangle(debug_img_line,
148 | #                              (x, y),
149 | #                              (x + w, y + h),
150 | #                              (255,255,255))
151 | #            cv2.imwrite(path_debug_image_line, debug_img_line)
152 |         return ocr_res


--------------------------------------------------------------------------------
/python/deep_ocr/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | def trim_string(string_data):
  7 |     string_data = string_data.replace("    ", "")
  8 |     string_data = string_data.replace(" ", "")
  9 |     string_data = string_data.replace("\n", "")
 10 |     ### for string
 11 |     string_data = "".join(list(set(string_data)))
 12 |     return string_data
 13 | 
 14 | 
 15 | def merge_peek_ranges(peek_ranges, char_w):
 16 |     new_peek_ranges = []
 17 |     l = len(peek_ranges)
 18 |     cur_range = peek_ranges[0]
 19 |     for i in range(1, l):
 20 |         if char_w > cur_range[1] - cur_range[0]:
 21 |             cur_range = (cur_range[0], peek_ranges[i][1])
 22 |         else:
 23 |             new_peek_ranges.append(cur_range)
 24 |             cur_range = peek_ranges[i]
 25 |     new_peek_ranges.append(cur_range)
 26 |     return new_peek_ranges
 27 | 
 28 | 
 29 | def merge_peek_ranges_mini_non_digits(peek_ranges, char_w, ocr_res):
 30 |     digits = u"0123456789"
 31 |     i = 0
 32 |     n = len(peek_ranges)
 33 |     new_peek_ranges = []
 34 |     while i < n:
 35 |         peek_range = peek_ranges[i]
 36 |         x = peek_range[0]
 37 |         w = peek_range[1] - x
 38 |         j = 1
 39 |         while w < char_w and (i + j) < n and \
 40 |                 (ocr_res[i+j-1] not in digits) and \
 41 |                 (ocr_res[i+j] not in digits) :
 42 |             w = peek_ranges[i+j][1] - x
 43 |             j += 1
 44 |         new_peek_ranges.append((x, x+w))
 45 |         i += j
 46 |     return new_peek_ranges
 47 | 
 48 | 
 49 | def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):
 50 |     start_i = None
 51 |     end_i = None
 52 |     peek_ranges = []
 53 |     for i, val in enumerate(array_vals):
 54 |         if val > minimun_val and start_i is None:
 55 |             start_i = i
 56 |         elif val > minimun_val and i == (len(array_vals) - 1) \
 57 |                 and start_i is not None:
 58 |             end_i = i
 59 |             if end_i - start_i >= minimun_range:
 60 |                 peek_ranges.append((start_i, end_i))
 61 |             start_i = None
 62 |             end_i = None
 63 |         elif val > minimun_val and start_i is not None:
 64 |             pass
 65 |         elif val < minimun_val and start_i is not None:
 66 |             end_i = i
 67 |             if end_i - start_i >= minimun_range:
 68 |                 peek_ranges.append((start_i, end_i))
 69 |             start_i = None
 70 |             end_i = None
 71 |         elif val < minimun_val and start_i is None:
 72 |             pass
 73 |         else:
 74 |             raise ValueError("cannot parse this case...")
 75 |     return peek_ranges
 76 | 
 77 | def compute_median_w_from_ranges(peek_ranges):
 78 |     widthes = []
 79 |     for peek_range in peek_ranges:
 80 |         w = peek_range[1] - peek_range[0] + 1
 81 |         widthes.append(w)
 82 |     widthes = np.asarray(widthes)
 83 |     median_w = np.median(widthes)
 84 |     return median_w
 85 | 
 86 | def median_split_ranges(peek_ranges):
 87 |     new_peek_ranges = []
 88 |     widthes = []
 89 |     for peek_range in peek_ranges:
 90 |         w = peek_range[1] - peek_range[0] + 1
 91 |         widthes.append(w)
 92 |     widthes = np.asarray(widthes)
 93 |     median_w = np.median(widthes)
 94 |     for i, peek_range in enumerate(peek_ranges):
 95 |         num_char = int(round(widthes[i]/median_w, 0))
 96 |         if num_char > 1:
 97 |             char_w = float(widthes[i] / num_char)
 98 |             for i in range(num_char):
 99 |                 start_point = peek_range[0] + int(i * char_w)
100 |                 end_point = peek_range[0] + int((i + 1) * char_w)
101 |                 new_peek_ranges.append((start_point, end_point))
102 |         else:
103 |             new_peek_ranges.append(peek_range)
104 |     return new_peek_ranges
105 | 
106 | def merge_chars_into_line_segments(ranges2d):
107 |     for i, ranges in enumerate(ranges2d):
108 |         m_w = compute_median_w_from_ranges(ranges)
109 |         new_ranges = []
110 |         for j, range_pair in enumerate(ranges):
111 |             if len(new_ranges) == 0:
112 |                 new_ranges.append(range_pair)
113 |             else:
114 |                 start_x, end_x = range_pair
115 |                 pre_start_x, pre_end_x = new_ranges[-1]
116 |                 if start_x > pre_start_x:
117 |                     if start_x - pre_end_x < m_w *2:
118 |                         new_ranges[-1] = (pre_start_x, end_x)
119 |                     else:
120 |                         new_ranges.append(range_pair)
121 |         ranges2d[i] = new_ranges
122 |     return ranges2d
123 | 


--------------------------------------------------------------------------------
/python/deep_ocr_id_card_reco:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | import argparse
  7 | from argparse import RawTextHelpFormatter
  8 | import os
  9 | import shutil
 10 | import cv2
 11 | from deep_ocr.caffe_clf import CaffeClsBuilder
 12 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
 13 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
 14 | from deep_ocr.id_cards.segmentation import Segmentation
 15 | from deep_ocr.id_cards.char_set import CharSet
 16 | from deep_ocr.reco_text_line import RecoTextLine
 17 | from deep_ocr.reco_text_line import RectImageClassifier
 18 | 
 19 | 
 20 | 
 21 | if __name__ == "__main__":
 22 | 
 23 |     description = '''
 24 |         # Docker config
 25 |         CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model
 26 |         DEEP_OCR_ROOT=/opt/deep_ocr
 27 |         WORKSPACE=/workspace
 28 | 
 29 |         # PC
 30 |         CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model
 31 |         DEEP_OCR_ROOT=/root/workspace/deep_ocr
 32 |         WORKSPACE=/root/data/deep_ocr_workspace
 33 | 
 34 |         deep_ocr_id_card_reco --img $DEEP_OCR_ROOT/data/id_card_img.jpg \
 35 |             --debug_path /tmp/debug \
 36 |             --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \
 37 |             --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64
 38 | 
 39 |         deep_ocr_id_card_reco --img ~/data/id_card_front \
 40 |             --debug_path /tmp/debug \
 41 |             --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 \
 42 |             --cls_ua ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64
 43 |     '''
 44 | 
 45 |     parser = argparse.ArgumentParser(
 46 |         description=description, formatter_class=RawTextHelpFormatter)
 47 |     parser.add_argument('--img', dest='img',
 48 |                         default=None, required=True,
 49 |                         help='id card image to reco')
 50 |     parser.add_argument('--debug_path', dest='debug_path',
 51 |                         default=None, required=False,
 52 |                         help='debug path')
 53 |     parser.add_argument('--cls_sim', dest='cls_sim',
 54 |                         default=None, required=False,
 55 |                         help='cls sim')
 56 |     parser.add_argument('--cls_ua', dest='cls_ua',
 57 |                         default=None, required=False,
 58 |                         help='cls ua')
 59 | 
 60 |     options = parser.parse_args()
 61 |     path_img = os.path.expanduser(options.img)
 62 |     debug_path = os.path.expanduser(options.debug_path)
 63 |     if debug_path is not None:
 64 |         if os.path.isdir(debug_path):
 65 |             shutil.rmtree(debug_path)
 66 |         os.makedirs(debug_path)
 67 | 
 68 |     cls_dir_sim = os.path.expanduser(options.cls_sim)
 69 |     cls_dir_ua = os.path.expanduser(options.cls_ua)
 70 | 
 71 |     caffe_cls_builder = CaffeClsBuilder()
 72 |     cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,)
 73 |     cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,)
 74 |     caffe_classifiers = {"sim": cls_sim, "ua": cls_ua}
 75 | 
 76 |     seg_norm_width = 600
 77 |     seg_norm_height = 600
 78 |     preprocess_resize = PreprocessResizeKeepRatio(
 79 |         seg_norm_width, seg_norm_height)
 80 |     id_card_img = cv2.imread(path_img)
 81 |     id_card_img = preprocess_resize.do(id_card_img)    
 82 |     segmentation = Segmentation(debug_path)
 83 |     key_to_segmentation = segmentation.do(id_card_img)
 84 | 
 85 |     boundaries = [
 86 |         ((0, 0, 0), (100, 100, 100)),
 87 |     ]
 88 |     boundary2binimgs = []
 89 |     for boundary in boundaries:
 90 |         preprocess_bg_mask = PreprocessBackgroundMask(boundary)
 91 |         id_card_img_mask = preprocess_bg_mask.do(id_card_img)
 92 |         boundary2binimgs.append((boundary, id_card_img_mask))
 93 | 
 94 |     char_set = CharSet()
 95 |     char_set_data = char_set.get()
 96 | 
 97 |     rect_img_clf = RectImageClassifier(
 98 |         None,
 99 |         None,
100 |         char_set,
101 |         caffe_cls_width=64,
102 |         caffe_cls_height=64)
103 | 
104 |     reco_text_line = RecoTextLine(rect_img_clf)
105 | 
106 |     key_ocr_res = {}
107 |     for key in key_to_segmentation:
108 |         key_ocr_res[key] = []
109 |         print("="*64)
110 |         print(key)
111 |         for i, segment in enumerate(key_to_segmentation[key]):
112 |             if debug_path is not None:
113 |                 line_debug_path = "key_%s_%i" % (key, i)
114 |                 line_debug_path = os.path.join(debug_path, line_debug_path)
115 |                 reco_text_line.debug_path = line_debug_path
116 |             reco_text_line.char_set = char_set_data[key]
117 |             caffe_cls = caffe_classifiers[
118 |                 char_set_data[key]["caffe_cls"]]
119 |             ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls)
120 |             key_ocr_res[key].append(ocr_res)
121 |     print("ocr res:")
122 |     for key in key_ocr_res:
123 |         print("="*60)
124 |         print(key)
125 |         for res_i in key_ocr_res[key]:
126 |             print(res_i.encode("utf-8"))
127 | 
128 |     if debug_path is not None:
129 |         path_debug_image_mask = os.path.join(
130 |             debug_path, "reco_debug_01_image_mask.jpg")
131 |         cv2.imwrite(path_debug_image_mask, id_card_img_mask)
132 | 


--------------------------------------------------------------------------------
/python/deep_ocr_id_card_reco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import print_function
 4 | 
 5 | import argparse
 6 | from argparse import RawTextHelpFormatter
 7 | import os
 8 | import shutil
 9 | import cv2
10 | from deep_ocr.caffe_clf import CaffeClsBuilder
11 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
12 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
13 | from deep_ocr.id_cards.segmentation import Segmentation
14 | from deep_ocr.id_cards.char_set import CharSet
15 | from deep_ocr.reco_text_line import RecoTextLine
16 | from deep_ocr.reco_text_line import RectImageClassifier
17 | 
18 | if __name__ == "__main__":
19 | 
20 |     path_img = os.path.expanduser("/home/user/Projects/data/test_id_card/hehe4.jpg")
21 |     debug_path = os.path.expanduser("/home/user/Projects/data/debug")
22 |     if debug_path is not None:
23 |         if os.path.isdir(debug_path):
24 |             shutil.rmtree(debug_path)
25 |         os.makedirs(debug_path)
26 | 
27 |     cls_dir_sim = os.path.expanduser("/home/user/Projects/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64")
28 |     #cls_dir_ua = os.path.expanduser("/home/user/Projects/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64")
29 |     cls_dir_ua = os.path.expanduser("/home/user/Projects/data/caffe_dataset_id_num")
30 | 
31 |     caffe_cls_builder = CaffeClsBuilder()
32 |     cls_sim = caffe_cls_builder.build(cls_dir=cls_dir_sim,)
33 |     cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua,)
34 |     caffe_classifiers = {"sim": cls_sim, "ua": cls_ua}
35 | 
36 |     seg_norm_width = 600
37 |     seg_norm_height = 600
38 |     preprocess_resize = PreprocessResizeKeepRatio(seg_norm_width, seg_norm_height)
39 |     id_card_img = cv2.imread(path_img)
40 |     id_card_img = preprocess_resize.do(id_card_img)    
41 |     segmentation = Segmentation(debug_path)
42 |     key_to_segmentation = segmentation.do(id_card_img)
43 | 
44 |     boundaries = [
45 |         ((0, 0, 0), (100, 100, 100)),
46 |     ]
47 |     boundary2binimgs = []
48 |     for boundary in boundaries:
49 |         preprocess_bg_mask = PreprocessBackgroundMask(boundary)
50 |         id_card_img_mask = preprocess_bg_mask.do(id_card_img)
51 |         boundary2binimgs.append((boundary, id_card_img_mask))
52 | 
53 |     char_set = CharSet()
54 |     char_set_data = char_set.get()
55 | 
56 |     rect_img_clf = RectImageClassifier(
57 |         None,
58 |         None,
59 |         char_set,
60 |         caffe_cls_width=64,
61 |         caffe_cls_height=64)
62 | 
63 |     reco_text_line = RecoTextLine(rect_img_clf)
64 | 
65 |     key_ocr_res = {}
66 |     for key in key_to_segmentation:
67 |         key_ocr_res[key] = []
68 |         # ============== divider
69 |         print("="*64)
70 |         print(key)
71 |         for i, segment in enumerate(key_to_segmentation[key]):
72 |             if debug_path is not None:
73 |                 line_debug_path = "key_%s_%i" % (key, i)
74 |                 line_debug_path = os.path.join(debug_path, line_debug_path)
75 |                 reco_text_line.debug_path = line_debug_path
76 |             reco_text_line.char_set = char_set_data[key]
77 |             caffe_cls = caffe_classifiers[char_set_data[key]["caffe_cls"]]
78 |             ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls)
79 |             key_ocr_res[key].append(ocr_res)
80 |     print("ocr res:")
81 |     for key in key_ocr_res:
82 |         print("="*60)
83 |         print(key)
84 |         for res_i in key_ocr_res[key]:
85 |             #print(type(res_i))
86 |             print(res_i)
87 |             #print(res_i.encode("utf-8"))
88 | 
89 | 
90 |     if debug_path is not None:
91 |         path_debug_image_mask = os.path.join(debug_path, "reco_debug_01_image_mask.jpg")
92 |         cv2.imwrite(path_debug_image_mask, id_card_img_mask)
93 | 


--------------------------------------------------------------------------------
/python/deep_ocr_make_caffe_dataset:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | 
  7 | import argparse
  8 | from argparse import RawTextHelpFormatter
  9 | import fnmatch
 10 | import os
 11 | import cv2
 12 | import json
 13 | import random
 14 | import numpy as np
 15 | import shutil
 16 | from deep_ocr.lang_aux import LangCharsGenerate
 17 | from deep_ocr.lang_aux import FontCheck
 18 | from deep_ocr.lang_aux import Font2Image
 19 | 
 20 | 
 21 | 
 22 | if __name__ == "__main__":
 23 | 
 24 |     description = '''
 25 |         deep_ocr_make_caffe_dataset --out_caffe_dir /root/data/caffe_dataset \
 26 |             --font_dir /root/workspace/deep_ocr_fonts/chinese_fonts \
 27 |             --width 30 --height 30 --margin 4 --langs lower_eng
 28 |     '''
 29 | 
 30 |     parser = argparse.ArgumentParser(
 31 |         description=description, formatter_class=RawTextHelpFormatter)
 32 |     parser.add_argument('--out_caffe_dir', dest='out_caffe_dir',
 33 |                         default=None, required=True,
 34 |                         help='write a caffe dir')
 35 |     parser.add_argument('--font_dir', dest='font_dir',
 36 |                         default=None, required=True,
 37 |                         help='font dir to to produce images')
 38 |     parser.add_argument('--test_ratio', dest='test_ratio',
 39 |                         default=0.3, required=False,
 40 |                         help='test dataset size')
 41 |     parser.add_argument('--width', dest='width',
 42 |                         default=None, required=True,
 43 |                         help='width')
 44 |     parser.add_argument('--height', dest='height',
 45 |                         default=None, required=True,
 46 |                         help='height')
 47 |     parser.add_argument('--no_crop', dest='no_crop',
 48 |                         default=True, required=False,
 49 |                         help='', action='store_true')
 50 |     parser.add_argument('--margin', dest='margin',
 51 |                         default=0, required=False,
 52 |                         help='', )
 53 |     parser.add_argument('--langs', dest='langs',
 54 |                         default="chi_sim", required=True,
 55 |                         help='deep_ocr.langs.*, e.g. chi_sim, chi_tra, digits...')
 56 |     options = parser.parse_args()
 57 | 
 58 |     out_caffe_dir = os.path.expanduser(options.out_caffe_dir)
 59 |     font_dir = os.path.expanduser(options.font_dir)
 60 |     test_ratio = float(options.test_ratio)
 61 |     width = int(options.width)
 62 |     height = int(options.height)
 63 |     need_crop = not options.no_crop
 64 |     margin = int(options.margin)
 65 |     langs = options.langs
 66 | 
 67 |     image_dir_name = "images"
 68 | 
 69 |     images_dir = os.path.join(out_caffe_dir, image_dir_name)
 70 |     if os.path.isdir(images_dir):
 71 |         shutil.rmtree(images_dir)
 72 |     os.makedirs(images_dir)
 73 |     
 74 |     lang_chars_gen = LangCharsGenerate(langs)
 75 |     lang_chars = lang_chars_gen.do()
 76 |     font_check = FontCheck(lang_chars)
 77 | 
 78 |     y_to_tag = {}
 79 |     y_tag_json_file = os.path.join(out_caffe_dir, "y_tag.json")
 80 |     y_tag_text_file = os.path.join(out_caffe_dir, "y_tag.txt")
 81 |     path_train = os.path.join(out_caffe_dir, "train.txt")
 82 |     path_test = os.path.join(out_caffe_dir, "test.txt")
 83 |     
 84 | 
 85 |     verified_font_paths = []
 86 |     ## search for file fonts
 87 |     for font_name in os.listdir(font_dir):
 88 |         path_font_file = os.path.join(font_dir, font_name)
 89 |         if font_check.do(path_font_file):
 90 |             verified_font_paths.append(path_font_file)
 91 | 
 92 |     train_list = []
 93 |     test_list = []
 94 |     max_train_i = int(len(verified_font_paths) * (1.0 - test_ratio))
 95 | 
 96 |     font2image = Font2Image(width, height, need_crop, margin)
 97 | 
 98 |     for i, verified_font_path in enumerate(verified_font_paths):
 99 |         is_train = True
100 |         if i >= max_train_i:
101 |             is_train = False
102 |         for j, char in enumerate(lang_chars):
103 |             if j not in y_to_tag:
104 |                 y_to_tag[j] = char
105 |             char_dir = os.path.join(images_dir, "%d" % j)
106 |             if not os.path.isdir(char_dir):
107 |                 os.makedirs(char_dir)
108 |             path_image = os.path.join(
109 |                 char_dir,
110 |                 "%d_%s.jpg" % (i, os.path.basename(verified_font_path)))
111 |             relative_path_image = os.path.join(
112 |                 image_dir_name, "%d"%j, 
113 |                 "%d_%s.jpg" % (i, os.path.basename(verified_font_path))
114 |             )
115 |             font2image.do(verified_font_path, char, path_image)
116 |             if is_train:
117 |                 train_list.append((relative_path_image, j))
118 |             else:
119 |                 test_list.append((relative_path_image, j))
120 | 
121 |     h_y_tag_json_file = open(y_tag_json_file, "w+")
122 |     json.dump(y_to_tag, h_y_tag_json_file)
123 |     h_y_tag_json_file.close()
124 | 
125 |     h_y_tag_text_file = open(y_tag_text_file, "w+")
126 |     for key in y_to_tag:
127 |         h_y_tag_text_file.write("%d %s\n" % (key, y_to_tag[key].encode("utf-8")))
128 |     h_y_tag_text_file.close()
129 | 
130 |     fout = open(path_train, "w+")
131 |     for item in train_list:
132 |         fout.write("%s %d\n" % (item[0], item[1]))
133 |     fout.close()
134 | 
135 |     fout = open(path_test, "w+")
136 |     for item in test_list:
137 |         fout.write("%s %d\n" % (item[0], item[1]))
138 |     fout.close()
139 | 


--------------------------------------------------------------------------------
/python/deep_ocr_reco_captcha:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | 
  7 | import argparse
  8 | from argparse import RawTextHelpFormatter
  9 | import os
 10 | import shutil
 11 | import cv2
 12 | 
 13 | from deep_ocr.captcha.char_segmentation import CharSegmentation
 14 | from deep_ocr.captcha.search_best_segmentation import SearchBestSegmentation
 15 | from deep_ocr.caffe_clf import CaffeCls
 16 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
 17 | 
 18 | 
 19 | if __name__ == "__main__":
 20 | 
 21 |     description = '''
 22 |         # Docker config
 23 |         CAFFE_MODEL=/opt/deep_ocr/data/trained_models/mnist_model
 24 |         DEEP_OCR_ROOT=/opt/deep_ocr
 25 |         
 26 |         # PC
 27 |         CAFFE_MODEL=/root/data/deep_ocr_trained_models/mnist_model
 28 |         DEEP_OCR_ROOT=/root/workspace/deep_ocr
 29 | 
 30 |         deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/captcha.png \
 31 |             --num_char 5 \
 32 |             --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \
 33 |             --caffe_network $CAFFE_MODEL/lenet.prototxt \
 34 |             --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \
 35 |             --caffe_img_w 28 --caffe_img_h 28 \
 36 |             --debug_path /tmp/debug_captcha
 37 | 
 38 |         deep_ocr_reco_captcha --captcha_img $DEEP_OCR_ROOT/data/captcha/simple.png \
 39 |             --num_char 5 \
 40 |             --caffe_model $CAFFE_MODEL/lenet_iter_10000.caffemodel \
 41 |             --caffe_network $CAFFE_MODEL/lenet.prototxt \
 42 |             --y_tag $CAFFE_MODEL/deep_ocr_network.y_tag.json \
 43 |             --caffe_img_w 28 --caffe_img_h 28 \
 44 |             --debug_path /tmp/debug_captcha
 45 | 
 46 |     '''
 47 | 
 48 |     parser = argparse.ArgumentParser(
 49 |         description=description, formatter_class=RawTextHelpFormatter)
 50 |     parser.add_argument('--captcha_img', dest='captcha_img',
 51 |                         default=None, required=True,
 52 |                         help='captcha image to reco')
 53 |     parser.add_argument('--num_char', dest='num_char',
 54 |                         default=None, required=True,
 55 |                         help='m_char')
 56 |     parser.add_argument('--caffe_model', dest='caffe_model',
 57 |                         default=None, required=True,
 58 |                         help='trained caffe model')
 59 |     parser.add_argument('--caffe_network', dest='caffe_network',
 60 |                         default=None, required=True,
 61 |                         help='caffe network')
 62 |     parser.add_argument('--y_tag', dest='y_tag',
 63 |                         default=None, required=True,
 64 |                         help='y_tag')
 65 |     parser.add_argument('--caffe_img_w', dest='caffe_img_w',
 66 |                         default=None, required=True,
 67 |                         help='caffe_img_w')
 68 |     parser.add_argument('--caffe_img_h', dest='caffe_img_h',
 69 |                         default=None, required=True,
 70 |                         help='caffe_img_h')
 71 |     parser.add_argument('--debug_path', dest='debug_path',
 72 |                         default=None, required=False,
 73 |                         help='debug path')
 74 |     options = parser.parse_args()
 75 | 
 76 |     captcha_img = os.path.expanduser(options.captcha_img)
 77 |     num_char = int(options.num_char)
 78 |     caffe_model = os.path.expanduser(options.caffe_model)
 79 |     caffe_network = os.path.expanduser(options.caffe_network)
 80 |     y_tag = os.path.expanduser(options.y_tag)
 81 |     caffe_img_w = int(options.caffe_img_w)
 82 |     caffe_img_h = int(options.caffe_img_h)
 83 |     norm_width = 200
 84 |     norm_height = 200
 85 | 
 86 |     debug_path = None
 87 |     if options.debug_path is not None:
 88 |         debug_path = os.path.expanduser(options.debug_path)
 89 |         if os.path.isdir(debug_path):
 90 |             shutil.rmtree(debug_path)
 91 |         os.makedirs(debug_path)
 92 | 
 93 |     image = cv2.imread(captcha_img)
 94 | 
 95 |     proc_keep_ratio = PreprocessResizeKeepRatio(
 96 |         width=norm_width, height=norm_height)
 97 |     image = proc_keep_ratio.do(image)
 98 | 
 99 |     char_segmentation = CharSegmentation(
100 |         num_char=num_char,
101 |         debug_path=debug_path)
102 |     segmentations = char_segmentation.do(image)
103 | 
104 |     caffe_cls = CaffeCls(caffe_network, caffe_model, y_tag,
105 |                          width=caffe_img_w, height=caffe_img_h)
106 | 
107 |     search_best_segmentation = SearchBestSegmentation(
108 |         caffe_cls, char_segmentation.bin_img,
109 |         debug_path)
110 |     eval_segmentations = search_best_segmentation.do(segmentations)
111 | 
112 |     n_top = 100
113 |     for i, eval_segmentation in enumerate(eval_segmentations):
114 |         if i > n_top:
115 |             break
116 |         print(eval_segmentation)


--------------------------------------------------------------------------------
/python/get_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | ./deep_ocr_make_caffe_dataset --out_caffe_dir ~/Projects/data/caffe_dataset_eng/ --font_dir ~/Projects/deepLearning_OCR/chinese_fonts/ --width 64 --height 64 --margin 4 --langs eng
4 | 
5 | 


--------------------------------------------------------------------------------
/python/make_caffe_dataset.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | import argparse
  7 | from argparse import RawTextHelpFormatter
  8 | import fnmatch
  9 | import os
 10 | import cv2
 11 | import json
 12 | import random
 13 | import numpy as np
 14 | import shutil
 15 | from deep_ocr.lang_aux import LangCharsGenerate
 16 | from deep_ocr.lang_aux import FontCheck
 17 | from deep_ocr.lang_aux import Font2Image
 18 | from deep_ocr.lang_aux import DataAugmentation
 19 | 
 20 | if __name__ == "__main__":
 21 | 
 22 |     out_caffe_dir = os.path.expanduser("/home/user/Projects/data/caffe_dataset_id_num/")
 23 |     #font_dir = os.path.expanduser("/home/user/Projects/deepLearning_OCR/chinese_fonts/")
 24 |     font_dir = os.path.expanduser("/home/user/Projects/deepLearning_OCR/id_num_fonts/")
 25 |     test_ratio = float(0.3)
 26 |     width = int(64)
 27 |     height = int(64)
 28 |     need_crop = False
 29 |     margin = int(4)
 30 |     langs = "id_num"
 31 |     rotate = 10
 32 |     rotate_step = 1
 33 | 
 34 |     image_dir_name = "images"
 35 | 
 36 |     images_dir = os.path.join(out_caffe_dir, image_dir_name)
 37 |     if os.path.isdir(images_dir):
 38 |         shutil.rmtree(images_dir)
 39 |     os.makedirs(images_dir)
 40 | 
 41 |     lang_chars_gen = LangCharsGenerate(langs)
 42 |     lang_chars = lang_chars_gen.do()
 43 |     font_check = FontCheck(lang_chars)
 44 | 
 45 |     y_to_tag = {}
 46 |     y_tag_json_file = os.path.join(out_caffe_dir, "y_tag.json")
 47 |     y_tag_text_file = os.path.join(out_caffe_dir, "y_tag.txt")
 48 |     path_train = os.path.join(out_caffe_dir, "train.txt")
 49 |     path_test = os.path.join(out_caffe_dir, "test.txt")
 50 | 
 51 |     ## rotate
 52 |     if rotate < 0:
 53 |         rotate = - rotate
 54 | 
 55 |     if rotate > 0 and rotate <= 45:
 56 |         all_rotate_angles = []
 57 |         for i in range(0, rotate + 1, rotate_step):
 58 |             all_rotate_angles.append(i)
 59 |         for i in range(-rotate, 0, rotate_step):
 60 |             all_rotate_angles.append(i)
 61 |         #print(all_rotate_angles)
 62 | 
 63 |     verified_font_paths = []
 64 |     ## search for file fonts
 65 |     for font_name in os.listdir(font_dir):
 66 |         path_font_file = os.path.join(font_dir, font_name)
 67 |         if font_check.do(path_font_file):
 68 |             verified_font_paths.append(path_font_file)
 69 | 
 70 |     train_list = []
 71 |     test_list = []
 72 |     max_train_i = int(len(verified_font_paths) * (1.0 - test_ratio))
 73 | 
 74 |     font2image = Font2Image(width, height, need_crop, margin)
 75 | 
 76 |     # loop font
 77 |     for i, verified_font_path in enumerate(verified_font_paths):
 78 |         is_train = True
 79 |         if i >= max_train_i:
 80 |             is_train = False
 81 |         # loop char
 82 |         for j, char in enumerate(lang_chars):
 83 |             if j not in y_to_tag:
 84 |                 y_to_tag[j] = char
 85 |             char_dir = os.path.join(images_dir, "%d" % j)
 86 |             if not os.path.isdir(char_dir):
 87 |                 os.makedirs(char_dir)
 88 |             if rotate == 0:
 89 |                 relative_path_image = os.path.join(image_dir_name, "%d" % j, "%d_%s.jpg" % (i, os.path.basename(verified_font_path)))
 90 |                 path_image = os.path.join(char_dir, "%d_%s.jpg" % (i, os.path.basename(verified_font_path)))
 91 |                 font2image.do(verified_font_path, path_image, char)
 92 |                 if is_train:
 93 |                     train_list.append((relative_path_image, j))
 94 |                 else:
 95 |                     test_list.append((relative_path_image, j))
 96 |             else:
 97 |                 for k in all_rotate_angles:
 98 |                     relative_path_image = os.path.join(image_dir_name, "%d" % j, "%d_%s_%d.jpg" % (i, os.path.basename(verified_font_path), k))
 99 |                     path_image = os.path.join(char_dir, "%d_%s_%d.jpg" % (i, os.path.basename(verified_font_path), k))
100 |                     font2image.do(verified_font_path, char, path_image, rotate=k)
101 |                     #font2image.do(verified_font_path, char, path_image)
102 |                     if is_train:
103 |                         train_list.append((relative_path_image, j))
104 |                     else:
105 |                         test_list.append((relative_path_image, j))
106 | 
107 | 
108 | 
109 | 
110 |     h_y_tag_json_file = open(y_tag_json_file, "w+")
111 |     json.dump(y_to_tag, h_y_tag_json_file)
112 |     h_y_tag_json_file.close()
113 | 
114 |     h_y_tag_text_file = open(y_tag_text_file, "w+")
115 |     for key in y_to_tag:
116 |         h_y_tag_text_file.write("%d %s\n" % (key, y_to_tag[key].encode("utf-8")))
117 |     h_y_tag_text_file.close()
118 | 
119 |     fout = open(path_train, "w+")
120 |     for item in train_list:
121 |         fout.write("%s %d\n" % (item[0], item[1]))
122 |     fout.close()
123 | 
124 |     fout = open(path_test, "w+")
125 |     for item in test_list:
126 |         fout.write("%s %d\n" % (item[0], item[1]))
127 |     fout.close()
128 | 


--------------------------------------------------------------------------------
/python/reco_chars.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys
  3 | sys.path.append('/usr/local/lib/python2.7/site-packages')
  4 | import caffe
  5 | import json
  6 | import numpy as np
  7 | import os
  8 | import cv2
  9 | import shutil
 10 | import copy
 11 | 
 12 | 
 13 | class CaffeCls(object):
 14 |     def __init__(self, 
 15 |                  model_def,
 16 |                  model_weights,
 17 |                  y_tag_json_path,
 18 |                  is_mode_cpu=True,
 19 |                  width=64,
 20 |                  height=64):
 21 |         self.net = caffe.Net(model_def, model_weights, caffe.TEST)
 22 |         if is_mode_cpu:
 23 |             caffe.set_mode_cpu()
 24 |         self.y_tag_json = json.load(open(y_tag_json_path, "r"))
 25 |         self.width = width
 26 |         self.height = height
 27 | 
 28 |     def predict_cv2_img(self, cv2_img):
 29 |         shape = cv2_img.shape
 30 |         cv2_imgs = cv2_img.reshape((1, shape[0], shape[1]))
 31 |         return self.predict_cv2_imgs(cv2_imgs)[0]
 32 | 
 33 |     def _predict_cv2_imgs_sub(self, cv2_imgs, pos_start, pos_end):
 34 |         cv2_imgs_sub = cv2_imgs[pos_start: pos_end]
 35 | 
 36 |         #print(cv2_imgs_sub)
 37 |         self.net.blobs['data'].reshape(cv2_imgs_sub.shape[0], 1, self.width, self.height)
 38 | 
 39 |         self.net.blobs['data'].data[...] = cv2_imgs_sub.reshape((cv2_imgs_sub.shape[0], 1, self.width, self.height))
 40 |         output = self.net.forward()
 41 |         #print(self.net.blobs['data'].data)
 42 | 
 43 |         output_tag_to_max_proba = []
 44 | 
 45 |         num_sample = cv2_imgs_sub.shape[0]
 46 |         for i in range(num_sample):
 47 |             output_prob = output['prob'][i]
 48 |             output_prob_index = sorted(
 49 |                 range(len(output_prob)),
 50 |                 key=lambda x:output_prob[x],
 51 |                 reverse=True)            
 52 |             output_tag_to_probas = []
 53 |             for index in output_prob_index:
 54 |                 item = (self.y_tag_json[str(index)],
 55 |                         output_prob[index])
 56 |                 output_tag_to_probas.append(item)
 57 |             # output_tag_to_probas = output_tag_to_probas[:2]
 58 |             output_tag_to_max_proba.append(output_tag_to_probas)
 59 |         return output_tag_to_max_proba
 60 | 
 61 |     def predict_cv2_imgs(self, cv2_imgs, step=50):
 62 |         output_tag_to_max_proba = []
 63 |         num_sample = cv2_imgs.shape[0]
 64 |         for i in range(0, num_sample, step):
 65 |             pos_end = min(num_sample, (i + step))
 66 |             output_tag_to_max_proba += \
 67 |                 self._predict_cv2_imgs_sub(cv2_imgs, i, pos_end)
 68 |         return output_tag_to_max_proba
 69 | 
 70 | 
 71 | class PreprocessCropZeros(object):
 72 | 
 73 |     def __init__(self):
 74 |         pass
 75 | 
 76 |     def do(self, cv2_gray_img):
 77 |         height = cv2_gray_img.shape[0]
 78 |         width = cv2_gray_img.shape[1]
 79 | 
 80 |         v_sum = np.sum(cv2_gray_img, axis=0)
 81 |         h_sum = np.sum(cv2_gray_img, axis=1)
 82 |         left = 0
 83 |         right = width - 1
 84 |         top = 0
 85 |         low = height - 1
 86 | 
 87 |         for i in range(width):
 88 |             if v_sum[i] > 0:
 89 |                 left = i
 90 |                 break
 91 | 
 92 |         for i in range(width - 1, -1, -1):
 93 |             if v_sum[i] > 0:
 94 |                 right = i
 95 |                 break
 96 | 
 97 |         for i in range(height):
 98 |             if h_sum[i] > 0:
 99 |                 top = i
100 |                 break
101 | 
102 |         for i in range(height - 1, -1, -1):
103 |             if h_sum[i] > 0:
104 |                 low = i
105 |                 break
106 |         if not (top < low and right > left):
107 |             return cv2_gray_img
108 | 
109 |         return cv2_gray_img[top: low+1, left: right+1]
110 | 
111 | 
112 | class PreprocessResizeKeepRatio(object):
113 | 
114 |     def __init__(self, width, height):
115 |         self.width = width
116 |         self.height = height
117 | 
118 |     def do(self, cv2_img):
119 |         max_width = self.width
120 |         max_height = self.height
121 | 
122 |         cur_height, cur_width = cv2_img.shape[:2]
123 | 
124 |         ratio_w = float(max_width)/float(cur_width)
125 |         ratio_h = float(max_height)/float(cur_height)
126 |         ratio = min(ratio_w, ratio_h)
127 | 
128 |         new_size = (min(int(cur_width*ratio), max_width),
129 |                     min(int(cur_height*ratio), max_height))
130 | 
131 |         new_size = (max(new_size[0], 1),
132 |                     max(new_size[1], 1),)
133 | 
134 |         resized_img = cv2.resize(cv2_img, new_size)
135 |         return resized_img
136 | 
137 | 
138 | class PreprocessResizeKeepRatioFillBG(object):
139 | 
140 |     def __init__(self, width, height, fill_bg=False,
141 |                  auto_avoid_fill_bg=True, margin=None):
142 |         self.width = width
143 |         self.height = height
144 |         self.fill_bg = fill_bg
145 |         self.auto_avoid_fill_bg = auto_avoid_fill_bg
146 |         self.margin = margin
147 | 
148 |     @classmethod
149 |     def is_need_fill_bg(cls, cv2_img, th=0.5, max_val=255):
150 |         image_shape = cv2_img.shape
151 |         height, width = image_shape
152 |         if height * 3 < width:
153 |             return True
154 |         if width * 3 < height:
155 |             return True
156 |         return False
157 | 
158 |     @classmethod
159 |     def put_img_into_center(cls, img_large, img_small, ):
160 |         width_large = img_large.shape[1]
161 |         height_large = img_large.shape[0]
162 | 
163 |         width_small = img_small.shape[1]
164 |         height_small = img_small.shape[0]
165 | 
166 |         if width_large < width_small:
167 |             raise ValueError("width_large <= width_small")
168 |         if height_large < height_small:
169 |             raise ValueError("height_large <= height_small")
170 | 
171 |         start_width = (width_large - width_small) / 2
172 |         start_height = (height_large - height_small) / 2
173 | 
174 |         img_large[int(start_height):int(start_height) + int(height_small),
175 |                   int(start_width):int(start_width) + int(width_small)] = img_small
176 |         return img_large
177 | 
178 |     def do(self, cv2_img):
179 | 
180 |         if self.margin is not None:
181 |             width_minus_margin = max(2, self.width - self.margin)
182 |             height_minus_margin = max(2, self.height - self.margin)
183 |         else:
184 |             width_minus_margin = self.width
185 |             height_minus_margin = self.height
186 | 
187 |         cur_height, cur_width = cv2_img.shape[:2]
188 |         if len(cv2_img.shape) > 2:
189 |             pix_dim = cv2_img.shape[2]
190 |         else:
191 |             pix_dim = None
192 | 
193 |         preprocess_resize_keep_ratio = PreprocessResizeKeepRatio(
194 |             width_minus_margin,
195 |             height_minus_margin)
196 |         resized_cv2_img = preprocess_resize_keep_ratio.do(cv2_img)
197 | 
198 |         if self.auto_avoid_fill_bg:
199 |             need_fill_bg = self.is_need_fill_bg(cv2_img)
200 |             if not need_fill_bg:
201 |                 self.fill_bg = False
202 |             else:
203 |                 self.fill_bg = True
204 | 
205 |         ## should skip horizontal stroke
206 |         if not self.fill_bg:
207 |             ret_img = cv2.resize(resized_cv2_img, (width_minus_margin,
208 |                                                    height_minus_margin))
209 |         else:
210 |             if pix_dim is not None:
211 |                 norm_img = np.zeros((height_minus_margin,
212 |                                      width_minus_margin,
213 |                                      pix_dim),
214 |                                     np.uint8)
215 |             else:
216 |                 norm_img = np.zeros((height_minus_margin,
217 |                                      width_minus_margin),
218 |                                     np.uint8)
219 |             ret_img = self.put_img_into_center(norm_img, resized_cv2_img)
220 | 
221 |         if self.margin is not None:
222 |             if pix_dim is not None:
223 |                 norm_img = np.zeros((self.height,
224 |                                      self.width,
225 |                                      pix_dim),
226 |                                     np.uint8)
227 |             else:
228 |                 norm_img = np.zeros((self.height,
229 |                                      self.width),
230 |                                     np.uint8)
231 |             ret_img = self.put_img_into_center(norm_img, ret_img)
232 |         return ret_img
233 | 
234 | 
235 | def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):
236 |     start_i = None
237 |     end_i = None
238 |     peek_ranges = []
239 |     for i, val in enumerate(array_vals):
240 |         if val > minimun_val and start_i is None:
241 |             start_i = i
242 |         elif val > minimun_val and start_i is not None:
243 |             pass
244 |         elif val < minimun_val and start_i is not None:
245 |             end_i = i
246 |             if end_i - start_i >= minimun_range:
247 |                 peek_ranges.append((start_i, end_i))
248 |             start_i = None
249 |             end_i = None
250 |         elif val < minimun_val and start_i is None:
251 |             pass
252 |         else:
253 |             raise ValueError("cannot parse this case...")
254 |     return peek_ranges
255 | 
256 | def compute_median_w_from_ranges(peek_ranges):
257 |     widthes = []
258 |     for peek_range in peek_ranges:
259 |         w = peek_range[1] - peek_range[0] + 1
260 |         widthes.append(w)
261 |     widthes = np.asarray(widthes)
262 |     median_w = np.median(widthes)
263 |     return median_w
264 | 
265 | def median_split_ranges(peek_ranges):
266 |     new_peek_ranges = []
267 |     widthes = []
268 |     for peek_range in peek_ranges:
269 |         w = peek_range[1] - peek_range[0] + 1
270 |         widthes.append(w)
271 |     widthes = np.asarray(widthes)
272 |     median_w = np.median(widthes)
273 |     for i, peek_range in enumerate(peek_ranges):
274 |         num_char = int(round(widthes[i]/median_w, 0))
275 |         if num_char > 1:
276 |             char_w = float(widthes[i] / num_char)
277 |             for i in range(num_char):
278 |                 start_point = peek_range[0] + int(i * char_w)
279 |                 end_point = peek_range[0] + int((i + 1) * char_w)
280 |                 new_peek_ranges.append((start_point, end_point))
281 |         else:
282 |             new_peek_ranges.append(peek_range)
283 |     return new_peek_ranges
284 | 
285 | 
286 | if __name__ == "__main__":
287 | 
288 |     norm_width = 64
289 |     norm_height = 64
290 |     #norm_width = 28
291 |     #norm_height = 28
292 | 
293 |     #base_dir = "/workspace/data/chongdata_caffe_cn_sim_digits_64_64"
294 |     #base_dir = "/home/user/Projects/data/caffe_dataset_cn_sim"
295 |     base_dir = "/home/user/Projects/deep_ocr_workspace/data/chongdata_caffe_cn_sim_digits_64_64"
296 |     model_def = os.path.join(base_dir, "deploy_lenet_train_test.prototxt")
297 |     model_weights = os.path.join(base_dir, "lenet_iter_50000.caffemodel")
298 |     y_tag_json_path = os.path.join(base_dir, "y_tag.json")
299 |     caffe_cls = CaffeCls(model_def, model_weights, y_tag_json_path)
300 | 
301 |     test_image = "/home/user/Projects/data/test_data.png"
302 | 
303 |     debug_dir = "/home/user/Projects/data/caffe_dataset_cn_sim/debug_dir"
304 |     if debug_dir is not None:
305 |         if os.path.isdir(debug_dir):
306 |             shutil.rmtree(debug_dir)
307 |         os.makedirs(debug_dir)
308 | 
309 |     cv2_color_img = cv2.imread(test_image)
310 | 
311 |     resize_keep_ratio = PreprocessResizeKeepRatio(1024, 1024)
312 |     cv2_color_img = resize_keep_ratio.do(cv2_color_img)    
313 | 
314 |     cv2_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_RGB2GRAY)
315 |     height, width = cv2_img.shape
316 | 
317 |     adaptive_threshold = cv2.adaptiveThreshold(
318 |         cv2_img,
319 |         255,
320 |         cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
321 |         cv2.THRESH_BINARY, 11, 2)
322 |     adaptive_threshold = 255 - adaptive_threshold
323 | 
324 |     ## Try to find text lines and chars
325 |     horizontal_sum = np.sum(adaptive_threshold, axis=1)
326 |     peek_ranges = extract_peek_ranges_from_array(horizontal_sum)
327 |     vertical_peek_ranges2d = []
328 |     for peek_range in peek_ranges:
329 |         start_y = peek_range[0]
330 |         end_y = peek_range[1]
331 |         line_img = adaptive_threshold[start_y:end_y, :]
332 |         vertical_sum = np.sum(line_img, axis=0)
333 |         vertical_peek_ranges = extract_peek_ranges_from_array(
334 |             vertical_sum,
335 |             minimun_val=40,
336 |             minimun_range=1)
337 |         vertical_peek_ranges = median_split_ranges(vertical_peek_ranges)
338 |         vertical_peek_ranges2d.append(vertical_peek_ranges)
339 | 
340 |     ## remove noise such as comma
341 |     filtered_vertical_peek_ranges2d = []
342 |     for i, peek_range in enumerate(peek_ranges):
343 |         new_peek_range = []
344 |         median_w = compute_median_w_from_ranges(vertical_peek_ranges2d[i])
345 |         for vertical_range in vertical_peek_ranges2d[i]:
346 |             if vertical_range[1] - vertical_range[0] > median_w*0.7:
347 |                 new_peek_range.append(vertical_range)
348 |         filtered_vertical_peek_ranges2d.append(new_peek_range)
349 |     vertical_peek_ranges2d = filtered_vertical_peek_ranges2d
350 | 
351 | 
352 |     char_imgs = []
353 |     crop_zeros = PreprocessCropZeros()
354 |     resize_keep_ratio = PreprocessResizeKeepRatioFillBG(
355 |         norm_width, norm_height, fill_bg=False, margin=4)
356 |     for i, peek_range in enumerate(peek_ranges):
357 |         for vertical_range in vertical_peek_ranges2d[i]:
358 |             x = vertical_range[0]
359 |             y = peek_range[0]
360 |             w = vertical_range[1] - x
361 |             h = peek_range[1] - y
362 |             char_img = adaptive_threshold[y:y+h+1, x:x+w+1]
363 |             char_img = crop_zeros.do(char_img)
364 |             char_img = resize_keep_ratio.do(char_img)
365 |             char_imgs.append(char_img)
366 | 
367 |     np_char_imgs = np.asarray(char_imgs)
368 | 
369 |     #print(np_char_imgs)
370 | 
371 |     output_tag_to_max_proba = caffe_cls.predict_cv2_imgs(np_char_imgs)
372 | 
373 |     ocr_res = ""
374 |     for item in output_tag_to_max_proba:
375 |         #print(item[0][0])
376 |         ocr_res += item[0][0]
377 |     #print(ocr_res.encode("utf-8"))
378 |     print(ocr_res)
379 | 
380 |     if debug_dir is not None:
381 |         path_adaptive_threshold = os.path.join(debug_dir,
382 |                                                "adaptive_threshold.jpg")
383 |         cv2.imwrite(path_adaptive_threshold, adaptive_threshold)
384 |         seg_adaptive_threshold = cv2_color_img
385 | 
386 | #        color = (255, 0, 0)
387 | #        for rect in rects:
388 | #            x, y, w, h = rect
389 | #            pt1 = (x, y)
390 | #            pt2 = (x + w, y + h)
391 | #            cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color)
392 | 
393 |         color = (0, 255, 0)
394 |         for i, peek_range in enumerate(peek_ranges):
395 |             for vertical_range in vertical_peek_ranges2d[i]:
396 |                 x = vertical_range[0]
397 |                 y = peek_range[0]
398 |                 w = vertical_range[1] - x
399 |                 h = peek_range[1] - y
400 |                 pt1 = (x, y)
401 |                 pt2 = (x + w, y + h)
402 |                 cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color)
403 |             
404 |         path_seg_adaptive_threshold = os.path.join(debug_dir,
405 |                                                    "seg_adaptive_threshold.jpg")
406 |         cv2.imwrite(path_seg_adaptive_threshold, seg_adaptive_threshold)
407 | 
408 |         debug_dir_chars = os.path.join(debug_dir, "chars")
409 |         os.makedirs(debug_dir_chars)
410 |         for i, char_img in enumerate(char_imgs):
411 |             path_char = os.path.join(debug_dir_chars, "%d.jpg" % i)
412 |             cv2.imwrite(path_char, char_img)
413 |             
414 |             
415 | 


--------------------------------------------------------------------------------
/python/test_id_card_reco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import print_function
 4 | 
 5 | import argparse
 6 | from argparse import RawTextHelpFormatter
 7 | import os
 8 | import shutil
 9 | import cv2
10 | from deep_ocr.caffe_clf import CaffeClsBuilder
11 | from deep_ocr.cv2_img_proc import PreprocessResizeKeepRatio
12 | from deep_ocr.cv2_img_proc import PreprocessBackgroundMask
13 | from deep_ocr.id_cards.segmentation import Segmentation
14 | from deep_ocr.id_cards.char_set import CharSet
15 | from deep_ocr.reco_text_line import RecoTextLine
16 | from deep_ocr.reco_text_line import RectImageClassifier
17 | 
18 | 
19 | if __name__ == "__main__":
20 | 
21 |     path_img = os.path.expanduser("/home/user/Projects/data/test_id_card/hehe4.jpg")
22 |     debug_path = os.path.expanduser("/home/user/Projects/data/debug")
23 |     if debug_path is not None:
24 |         if os.path.isdir(debug_path):
25 |             shutil.rmtree(debug_path)
26 |         os.makedirs(debug_path)
27 | 
28 |     cls_dir_ua = os.path.expanduser("/home/user/Projects/data/caffe_dataset_id_num")
29 |     caffe_cls_builder = CaffeClsBuilder()
30 |     cls_ua = caffe_cls_builder.build(cls_dir=cls_dir_ua, )
31 | 
32 |     seg_norm_width = 600
33 |     seg_norm_height = 600
34 |     preprocess_resize = PreprocessResizeKeepRatio(seg_norm_width, seg_norm_height)
35 |     id_card_img = cv2.imread(path_img)
36 |     id_card_img = preprocess_resize.do(id_card_img)
37 |     segmentation = Segmentation(debug_path)
38 |     key_to_segmentation = segmentation.do(id_card_img)
39 | 
40 |     boundaries = [
41 |         ((0, 0, 0), (100, 100, 100)),
42 |     ]
43 |     boundary2binimgs = []
44 |     for boundary in boundaries:
45 |         preprocess_bg_mask = PreprocessBackgroundMask(boundary)
46 |         id_card_img_mask = preprocess_bg_mask.do(id_card_img)
47 |         boundary2binimgs.append((boundary, id_card_img_mask))
48 | 
49 |     char_set = CharSet()
50 |     char_set_data = char_set.get()
51 | 
52 |     rect_img_clf = RectImageClassifier(None, None, char_set, caffe_cls_width=64, caffe_cls_height=64)
53 | 
54 |     reco_text_line = RecoTextLine(rect_img_clf)
55 | 
56 | 
57 |     ## just test id card num
58 |     for i, segment in enumerate(key_to_segmentation["id"]):
59 |         if debug_path is not None:
60 |             line_debug_path = "key_%s_%i" % ("id", i)
61 |             line_debug_path = os.path.join(debug_path, line_debug_path)
62 |             reco_text_line.debug_path = line_debug_path
63 |         reco_text_line.char_set = char_set_data["id"]
64 |         caffe_cls = cls_ua
65 |         ocr_res = reco_text_line.do(boundary2binimgs, segment, caffe_cls)
66 |         print("=" * 64)
67 |         print(ocr_res)
68 | 
69 | 
70 |     if debug_path is not None:
71 |         path_debug_image_mask = os.path.join(debug_path, "reco_debug_01_image_mask.jpg")
72 |         cv2.imwrite(path_debug_image_mask, id_card_img_mask)
73 | 
74 | 


--------------------------------------------------------------------------------
/python/test_model.py:
--------------------------------------------------------------------------------
 1 | import caffe
 2 | 
 3 | if __name__ == "__main__":
 4 |     #文件的存放路径
 5 |     root = '/home/user/Projects/data/caffe_dataset_cn_sim/'
 6 |     caffe.set_mode_cpu
 7 |     net = caffe.Net('/home/user/Projects/deepLearning_OCR/lenet_train_test.prototxt',root+'lenet_iter_50000.caffemodel',caffe.TEST)
 8 |     conv1_w = net.params['conv11'][0].data
 9 |     conv1_b = net.params['conv11'][1].data
10 |     print(conv1_w,conv1_b)
11 |     print(conv1_w.size,conv1_b.size)


--------------------------------------------------------------------------------
/python/test_reco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | sys.path.append('/usr/local/lib/python2.7/site-packages')
 4 | import caffe
 5 | import json
 6 | import numpy as np
 7 | import os
 8 | import cv2
 9 | import shutil
10 | import copy
11 | 
12 | 
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     #base_dir = "/home/user/Projects/data/caffe_dataset_cn_sim"
17 |     base_dir = "/home/user/Projects/data/caffe_dataset_id_num"
18 |     #base_dir = "/home/user/Projects/deep_ocr_workspace/data/chongdata_train_ualpha_digits_64_64"
19 | 
20 |     model_def = os.path.join(base_dir, "deploy_lenet_train_test.prototxt")
21 |     model_weights = os.path.join(base_dir, "lenet_iter_50000.caffemodel")
22 |     y_tag_json_path = os.path.join(base_dir, "y_tag.json")
23 | 
24 |     net = caffe.Net(model_def, model_weights, caffe.TEST)
25 |     cv2_color_img = cv2.imread('/home/user/Projects/data/2.jpg')
26 |     cv2_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_RGB2GRAY)
27 |     cv2_img = cv2_img.reshape((1, 1, 64, 64))
28 |     print(cv2_img.shape)
29 |     #np_img = np.asarray(cv2_img)
30 |     
31 |     #print(net.blobs['data'].data.shape)
32 |     
33 |     #transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
34 |     #transformer.set_transpose('data', (2, 0, 1))
35 |     #transformer.set_raw_scale('data', 255)                                # 缩放到【0，255】之间
36 |     #transformer.set_channel_swap('data', (2, 1, 0))
37 |     #net.blobs['data'].reshape(1, 1, 64, 64)
38 |     #im=caffe.io.load_image('/home/user/Projects/data/0.jpg')
39 | 
40 |     #net.blobs['data'].data[...] = transformer.preprocess('data', cv2_img)
41 |     net.blobs['data'].data[...] = cv2_img
42 |     out = net.forward()
43 |     #print(out)
44 | 
45 |     pridects=out['prob']
46 |     print(max(pridects))
47 | 
48 |     #print([(k, v.data.shape) for k, v in net.blobs.items()])
49 |     #print(net.params['conv11'][0].data)
50 |     test=net.params['conv11'][0].data
51 |     #print(net.blobs['data'].data)
52 | 


--------------------------------------------------------------------------------
/reco_chars.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys
  3 | sys.path.append('/usr/local/lib/python2.7/site-packages')
  4 | import caffe
  5 | import json
  6 | import numpy as np
  7 | import os
  8 | import cv2
  9 | import shutil
 10 | import copy
 11 | 
 12 | class CaffeCls(object):
 13 |     def __init__(self, 
 14 |                  model_def,
 15 |                  model_weights,
 16 |                  y_tag_json_path,
 17 |                  is_mode_cpu=True,
 18 |                  width=64,
 19 |                  height=64):
 20 |         self.net = caffe.Net(model_def,
 21 |             model_weights,
 22 |             caffe.TEST)
 23 |         if is_mode_cpu:
 24 |             caffe.set_mode_cpu()
 25 |         self.y_tag_json = json.load(open(y_tag_json_path, "r"))
 26 |         self.width = width
 27 |         self.height = height
 28 | 
 29 |     def predict_cv2_img(self, cv2_img):
 30 |         shape = cv2_img.shape
 31 |         cv2_imgs = cv2_img.reshape((1, shape[0], shape[1]))
 32 |         return self.predict_cv2_imgs(cv2_imgs)[0]
 33 | 
 34 | 
 35 |     def _predict_cv2_imgs_sub(self, cv2_imgs, pos_start, pos_end):
 36 |         cv2_imgs_sub = cv2_imgs[pos_start: pos_end]
 37 | 
 38 |         self.net.blobs['data'].reshape(cv2_imgs_sub.shape[0], 1,
 39 |                                        self.width, self.height)
 40 |         self.net.blobs['data'].data[...] = cv2_imgs_sub.reshape(
 41 |             (cv2_imgs_sub.shape[0], 1, self.width, self.height))
 42 |         output = self.net.forward()
 43 | 
 44 |         output_tag_to_max_proba = []
 45 | 
 46 |         num_sample = cv2_imgs_sub.shape[0]
 47 |         for i in range(num_sample):
 48 |             output_prob = output['prob'][i]
 49 |             output_prob_index = sorted(
 50 |                 range(len(output_prob)),
 51 |                 key=lambda x:output_prob[x],
 52 |                 reverse=True)            
 53 |             output_tag_to_probas = []
 54 |             for index in output_prob_index:
 55 |                 item = (self.y_tag_json[str(index)],
 56 |                         output_prob[index])
 57 |                 output_tag_to_probas.append(item)
 58 |             # output_tag_to_probas = output_tag_to_probas[:2]
 59 |             output_tag_to_max_proba.append(output_tag_to_probas)
 60 |         return output_tag_to_max_proba
 61 | 
 62 |     def predict_cv2_imgs(self, cv2_imgs, step=50):
 63 |         output_tag_to_max_proba = []
 64 |         num_sample = cv2_imgs.shape[0]
 65 |         for i in range(0, num_sample, step):
 66 |             pos_end = min(num_sample, (i + step))
 67 |             output_tag_to_max_proba += \
 68 |                 self._predict_cv2_imgs_sub(cv2_imgs, i, pos_end)
 69 |         return output_tag_to_max_proba
 70 | 
 71 | class PreprocessCropZeros(object):
 72 | 
 73 |     def __init__(self):
 74 |         pass
 75 | 
 76 |     def do(self, cv2_gray_img):
 77 |         height = cv2_gray_img.shape[0]
 78 |         width = cv2_gray_img.shape[1]
 79 | 
 80 |         v_sum = np.sum(cv2_gray_img, axis=0)
 81 |         h_sum = np.sum(cv2_gray_img, axis=1)
 82 |         left = 0
 83 |         right = width - 1
 84 |         top = 0
 85 |         low = height - 1
 86 | 
 87 |         for i in range(width):
 88 |             if v_sum[i] > 0:
 89 |                 left = i
 90 |                 break
 91 | 
 92 |         for i in range(width - 1, -1, -1):
 93 |             if v_sum[i] > 0:
 94 |                 right = i
 95 |                 break
 96 | 
 97 |         for i in range(height):
 98 |             if h_sum[i] > 0:
 99 |                 top = i
100 |                 break
101 | 
102 |         for i in range(height - 1, -1, -1):
103 |             if h_sum[i] > 0:
104 |                 low = i
105 |                 break
106 |         if not (top < low and right > left):
107 |             return cv2_gray_img
108 | 
109 |         return cv2_gray_img[top: low+1, left: right+1]
110 | 
111 | 
112 | 
113 | 
114 | class PreprocessResizeKeepRatio(object):
115 | 
116 |     def __init__(self, width, height):
117 |         self.width = width
118 |         self.height = height
119 | 
120 |     def do(self, cv2_img):
121 |         max_width = self.width
122 |         max_height = self.height
123 | 
124 |         cur_height, cur_width = cv2_img.shape[:2]
125 | 
126 |         ratio_w = float(max_width)/float(cur_width)
127 |         ratio_h = float(max_height)/float(cur_height)
128 |         ratio = min(ratio_w, ratio_h)
129 | 
130 |         new_size = (min(int(cur_width*ratio), max_width),
131 |                     min(int(cur_height*ratio), max_height))
132 | 
133 |         new_size = (max(new_size[0], 1),
134 |                     max(new_size[1], 1),)
135 | 
136 |         resized_img = cv2.resize(cv2_img, new_size)
137 |         return resized_img
138 | 
139 | 
140 | class PreprocessResizeKeepRatioFillBG(object):
141 | 
142 |     def __init__(self, width, height, fill_bg=False,
143 |                  auto_avoid_fill_bg=True, margin=None):
144 |         self.width = width
145 |         self.height = height
146 |         self.fill_bg = fill_bg
147 |         self.auto_avoid_fill_bg = auto_avoid_fill_bg
148 |         self.margin = margin
149 | 
150 |     @classmethod
151 |     def is_need_fill_bg(cls, cv2_img, th=0.5, max_val=255):
152 |         image_shape = cv2_img.shape
153 |         height, width = image_shape
154 |         if height * 3 < width:
155 |             return True
156 |         if width * 3 < height:
157 |             return True
158 |         return False
159 | 
160 |     @classmethod
161 |     def put_img_into_center(cls, img_large, img_small, ):
162 |         width_large = img_large.shape[1]
163 |         height_large = img_large.shape[0]
164 | 
165 |         width_small = img_small.shape[1]
166 |         height_small = img_small.shape[0]
167 | 
168 |         if width_large < width_small:
169 |             raise ValueError("width_large <= width_small")
170 |         if height_large < height_small:
171 |             raise ValueError("height_large <= height_small")
172 | 
173 |         start_width = (width_large - width_small) / 2
174 |         start_height = (height_large - height_small) / 2
175 | 
176 |         img_large[start_height:start_height + height_small,
177 |                   start_width:start_width + width_small] = img_small
178 |         return img_large
179 | 
180 |     def do(self, cv2_img):
181 | 
182 |         if self.margin is not None:
183 |             width_minus_margin = max(2, self.width - self.margin)
184 |             height_minus_margin = max(2, self.height - self.margin)
185 |         else:
186 |             width_minus_margin = self.width
187 |             height_minus_margin = self.height
188 | 
189 |         cur_height, cur_width = cv2_img.shape[:2]
190 |         if len(cv2_img.shape) > 2:
191 |             pix_dim = cv2_img.shape[2]
192 |         else:
193 |             pix_dim = None
194 | 
195 |         preprocess_resize_keep_ratio = PreprocessResizeKeepRatio(
196 |             width_minus_margin,
197 |             height_minus_margin)
198 |         resized_cv2_img = preprocess_resize_keep_ratio.do(cv2_img)
199 | 
200 |         if self.auto_avoid_fill_bg:
201 |             need_fill_bg = self.is_need_fill_bg(cv2_img)
202 |             if not need_fill_bg:
203 |                 self.fill_bg = False
204 |             else:
205 |                 self.fill_bg = True
206 | 
207 |         ## should skip horizontal stroke
208 |         if not self.fill_bg:
209 |             ret_img = cv2.resize(resized_cv2_img, (width_minus_margin,
210 |                                                    height_minus_margin))
211 |         else:
212 |             if pix_dim is not None:
213 |                 norm_img = np.zeros((height_minus_margin,
214 |                                      width_minus_margin,
215 |                                      pix_dim),
216 |                                     np.uint8)
217 |             else:
218 |                 norm_img = np.zeros((height_minus_margin,
219 |                                      width_minus_margin),
220 |                                     np.uint8)
221 |             ret_img = self.put_img_into_center(norm_img, resized_cv2_img)
222 | 
223 |         if self.margin is not None:
224 |             if pix_dim is not None:
225 |                 norm_img = np.zeros((self.height,
226 |                                      self.width,
227 |                                      pix_dim),
228 |                                     np.uint8)
229 |             else:
230 |                 norm_img = np.zeros((self.height,
231 |                                      self.width),
232 |                                     np.uint8)
233 |             ret_img = self.put_img_into_center(norm_img, ret_img)
234 |         return ret_img
235 | 
236 | def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):
237 |     start_i = None
238 |     end_i = None
239 |     peek_ranges = []
240 |     for i, val in enumerate(array_vals):
241 |         if val > minimun_val and start_i is None:
242 |             start_i = i
243 |         elif val > minimun_val and start_i is not None:
244 |             pass
245 |         elif val < minimun_val and start_i is not None:
246 |             end_i = i
247 |             if end_i - start_i >= minimun_range:
248 |                 peek_ranges.append((start_i, end_i))
249 |             start_i = None
250 |             end_i = None
251 |         elif val < minimun_val and start_i is None:
252 |             pass
253 |         else:
254 |             raise ValueError("cannot parse this case...")
255 |     return peek_ranges
256 | 
257 | def compute_median_w_from_ranges(peek_ranges):
258 |     widthes = []
259 |     for peek_range in peek_ranges:
260 |         w = peek_range[1] - peek_range[0] + 1
261 |         widthes.append(w)
262 |     widthes = np.asarray(widthes)
263 |     median_w = np.median(widthes)
264 |     return median_w
265 | 
266 | def median_split_ranges(peek_ranges):
267 |     new_peek_ranges = []
268 |     widthes = []
269 |     for peek_range in peek_ranges:
270 |         w = peek_range[1] - peek_range[0] + 1
271 |         widthes.append(w)
272 |     widthes = np.asarray(widthes)
273 |     median_w = np.median(widthes)
274 |     for i, peek_range in enumerate(peek_ranges):
275 |         num_char = int(round(widthes[i]/median_w, 0))
276 |         if num_char > 1:
277 |             char_w = float(widthes[i] / num_char)
278 |             for i in range(num_char):
279 |                 start_point = peek_range[0] + int(i * char_w)
280 |                 end_point = peek_range[0] + int((i + 1) * char_w)
281 |                 new_peek_ranges.append((start_point, end_point))
282 |         else:
283 |             new_peek_ranges.append(peek_range)
284 |     return new_peek_ranges
285 | 
286 | 
287 | if __name__ == "__main__":
288 | 
289 |     norm_width = 64
290 |     norm_height = 64
291 | 
292 |     base_dir = "/workspace/data/chongdata_caffe_cn_sim_digits_64_64"
293 |     model_def = os.path.join(base_dir, "deploy_lenet_train_test.prototxt")
294 |     model_weights = os.path.join(base_dir, "lenet_iter_50000.caffemodel")
295 |     y_tag_json_path = os.path.join(base_dir, "y_tag.json")
296 |     caffe_cls = CaffeCls(model_def, model_weights, y_tag_json_path)
297 | 
298 |     test_image = "/opt/deep_ocr/test_data.png"
299 | 
300 |     debug_dir = "/tmp/debug_dir"
301 |     if debug_dir is not None:
302 |         if os.path.isdir(debug_dir):
303 |             shutil.rmtree(debug_dir)
304 |         os.makedirs(debug_dir)
305 | 
306 |     cv2_color_img = cv2.imread(test_image)
307 |     
308 |     resize_keep_ratio = PreprocessResizeKeepRatio(1024, 1024)
309 |     cv2_color_img = resize_keep_ratio.do(cv2_color_img)    
310 | 
311 |     cv2_img = cv2.cvtColor(cv2_color_img, cv2.COLOR_RGB2GRAY)
312 |     height, width = cv2_img.shape
313 | 
314 |     adaptive_threshold = cv2.adaptiveThreshold(
315 |         cv2_img,
316 |         255,
317 |         cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
318 |         cv2.THRESH_BINARY, 11, 2)
319 |     adaptive_threshold = 255 - adaptive_threshold
320 | 
321 |     ## Try to find text lines and chars
322 |     horizontal_sum = np.sum(adaptive_threshold, axis=1)
323 |     peek_ranges = extract_peek_ranges_from_array(horizontal_sum)
324 |     vertical_peek_ranges2d = []
325 |     for peek_range in peek_ranges:
326 |         start_y = peek_range[0]
327 |         end_y = peek_range[1]
328 |         line_img = adaptive_threshold[start_y:end_y, :]
329 |         vertical_sum = np.sum(line_img, axis=0)
330 |         vertical_peek_ranges = extract_peek_ranges_from_array(
331 |             vertical_sum,
332 |             minimun_val=40,
333 |             minimun_range=1)
334 |         vertical_peek_ranges = median_split_ranges(vertical_peek_ranges)
335 |         vertical_peek_ranges2d.append(vertical_peek_ranges)
336 | 
337 |     ## remove noise such as comma
338 |     filtered_vertical_peek_ranges2d = []
339 |     for i, peek_range in enumerate(peek_ranges):
340 |         new_peek_range = []
341 |         median_w = compute_median_w_from_ranges(vertical_peek_ranges2d[i])
342 |         for vertical_range in vertical_peek_ranges2d[i]:
343 |             if vertical_range[1] - vertical_range[0] > median_w*0.7:
344 |                 new_peek_range.append(vertical_range)
345 |         filtered_vertical_peek_ranges2d.append(new_peek_range)
346 |     vertical_peek_ranges2d = filtered_vertical_peek_ranges2d
347 | 
348 | 
349 |     char_imgs = []
350 |     crop_zeros = PreprocessCropZeros()
351 |     resize_keep_ratio = PreprocessResizeKeepRatioFillBG(
352 |         norm_width, norm_height, fill_bg=False, margin=4)
353 |     for i, peek_range in enumerate(peek_ranges):
354 |         for vertical_range in vertical_peek_ranges2d[i]:
355 |             x = vertical_range[0]
356 |             y = peek_range[0]
357 |             w = vertical_range[1] - x
358 |             h = peek_range[1] - y
359 |             char_img = adaptive_threshold[y:y+h+1, x:x+w+1]
360 |             char_img = crop_zeros.do(char_img)
361 |             char_img = resize_keep_ratio.do(char_img)
362 |             char_imgs.append(char_img)
363 | 
364 |     np_char_imgs = np.asarray(char_imgs)
365 | 
366 |     output_tag_to_max_proba = caffe_cls.predict_cv2_imgs(np_char_imgs)
367 | 
368 |     ocr_res = ""
369 |     for item in output_tag_to_max_proba:
370 |         ocr_res += item[0][0]
371 |     print(ocr_res.encode("utf-8"))
372 | 
373 |     if debug_dir is not None:
374 |         path_adaptive_threshold = os.path.join(debug_dir,
375 |                                                "adaptive_threshold.jpg")
376 |         cv2.imwrite(path_adaptive_threshold, adaptive_threshold)
377 |         seg_adaptive_threshold = cv2_color_img
378 | 
379 | #        color = (255, 0, 0)
380 | #        for rect in rects:
381 | #            x, y, w, h = rect
382 | #            pt1 = (x, y)
383 | #            pt2 = (x + w, y + h)
384 | #            cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color)
385 | 
386 |         color = (0, 255, 0)
387 |         for i, peek_range in enumerate(peek_ranges):
388 |             for vertical_range in vertical_peek_ranges2d[i]:
389 |                 x = vertical_range[0]
390 |                 y = peek_range[0]
391 |                 w = vertical_range[1] - x
392 |                 h = peek_range[1] - y
393 |                 pt1 = (x, y)
394 |                 pt2 = (x + w, y + h)
395 |                 cv2.rectangle(seg_adaptive_threshold, pt1, pt2, color)
396 |             
397 |         path_seg_adaptive_threshold = os.path.join(debug_dir,
398 |                                                    "seg_adaptive_threshold.jpg")
399 |         cv2.imwrite(path_seg_adaptive_threshold, seg_adaptive_threshold)
400 | 
401 |         debug_dir_chars = os.path.join(debug_dir, "chars")
402 |         os.makedirs(debug_dir_chars)
403 |         for i, char_img in enumerate(char_imgs):
404 |             path_char = os.path.join(debug_dir_chars, "%d.jpg" % i)
405 |             cv2.imwrite(path_char, char_img)
406 |             
407 |             
408 | 


--------------------------------------------------------------------------------
/solver.prototxt:
--------------------------------------------------------------------------------
 1 | # The train/test net protocol buffer definition
 2 | net: "./lenet_train_test.prototxt"
 3 | # test_iter specifies how many forward passes the test should carry out.
 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 5 | # covering the full 10,000 testing images.
 6 | test_iter: 100
 7 | # Carry out testing every 500 training iterations.
 8 | test_interval: 500
 9 | # The base learning rate, momentum and the weight decay of the network.
10 | base_lr: 0.01
11 | momentum: 0.9
12 | weight_decay: 0.0005
13 | # The learning rate policy
14 | lr_policy: "inv"
15 | gamma: 0.0001
16 | power: 0.75
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 50000
21 | # snapshot intermediate results
22 | snapshot: 5000
23 | snapshot_prefix: "./lenet"
24 | # solver mode: CPU or GPU
25 | solver_mode: GPU
26 | 


--------------------------------------------------------------------------------