├── .gitignore ├── README.md ├── config.py ├── demo ├── 8801_win32_JAP_2020-10-30-20-04-47-887566.jpg ├── CH1.jpg ├── CH2.jpg ├── DangoOCR_ENG_2021-01-03_19-43-13.jpg ├── ENG_2.jpg └── KOR.jpg ├── demo_result ├── 8801_win32_JAP_2020-10-30-20-04-47-887566.jpg ├── CH1.jpg ├── CH2.jpg ├── DangoOCR_ENG_2021-01-03_19-43-13.jpg ├── ENG_2.jpg └── KOR.jpg ├── download_model.sh ├── font ├── CH_ENG.TTC ├── KOR.ttf └── japan.ttc ├── ocr_server.py ├── ppocr ├── __init__.py ├── data │ ├── __init__.py │ ├── cls │ │ ├── __init__.py │ │ ├── dataset_traversal.py │ │ └── randaugment.py │ ├── det │ │ ├── __init__.py │ │ ├── data_augment.py │ │ ├── dataset_traversal.py │ │ ├── db_process.py │ │ ├── east_process.py │ │ ├── make_border_map.py │ │ ├── make_shrink_map.py │ │ ├── random_crop_data.py │ │ └── sast_process.py │ ├── reader_main.py │ └── rec │ │ ├── __init__.py │ │ ├── dataset_traversal.py │ │ ├── img_tools.py │ │ └── text_image_aug │ │ ├── augment.py │ │ └── warp_mls.py ├── modeling │ ├── __init__.py │ ├── architectures │ │ ├── __init__.py │ │ ├── cls_model.py │ │ ├── det_model.py │ │ └── rec_model.py │ ├── backbones │ │ ├── __init__.py │ │ ├── det_mobilenet_v3.py │ │ ├── det_resnet_vd.py │ │ ├── det_resnet_vd_sast.py │ │ ├── rec_mobilenet_v3.py │ │ ├── rec_resnet_fpn.py │ │ └── rec_resnet_vd.py │ ├── common_functions.py │ ├── heads │ │ ├── __init__.py │ │ ├── cls_head.py │ │ ├── det_db_head.py │ │ ├── det_east_head.py │ │ ├── det_sast_head.py │ │ ├── rec_attention_head.py │ │ ├── rec_ctc_head.py │ │ ├── rec_seq_encoder.py │ │ ├── rec_srn_all_head.py │ │ └── self_attention │ │ │ ├── __init__.py │ │ │ └── model.py │ ├── losses │ │ ├── __init__.py │ │ ├── cls_loss.py │ │ ├── det_basic_loss.py │ │ ├── det_db_loss.py │ │ ├── det_east_loss.py │ │ ├── det_sast_loss.py │ │ ├── rec_attention_loss.py │ │ ├── rec_ctc_loss.py │ │ └── rec_srn_loss.py │ └── stns │ │ ├── __init__.py │ │ └── tps.py ├── optimizer.py ├── postprocess │ ├── __init__.py │ ├── db_postprocess.py │ ├── east_postprocess.py │ ├── lanms │ │ ├── .gitignore │ │ ├── .ycm_extra_conf.py │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── adaptor.cpp │ │ ├── include │ │ │ ├── clipper │ │ │ │ ├── clipper.cpp │ │ │ │ └── clipper.hpp │ │ │ └── pybind11 │ │ │ │ ├── attr.h │ │ │ │ ├── buffer_info.h │ │ │ │ ├── cast.h │ │ │ │ ├── chrono.h │ │ │ │ ├── class_support.h │ │ │ │ ├── common.h │ │ │ │ ├── complex.h │ │ │ │ ├── descr.h │ │ │ │ ├── eigen.h │ │ │ │ ├── embed.h │ │ │ │ ├── eval.h │ │ │ │ ├── functional.h │ │ │ │ ├── numpy.h │ │ │ │ ├── operators.h │ │ │ │ ├── options.h │ │ │ │ ├── pybind11.h │ │ │ │ ├── pytypes.h │ │ │ │ ├── stl.h │ │ │ │ ├── stl_bind.h │ │ │ │ └── typeid.h │ │ └── lanms.h │ ├── locality_aware_nms.py │ └── sast_postprocess.py └── utils │ ├── __init__.py │ ├── character.py │ ├── check.py │ ├── corpus │ ├── occitan_corpus.txt │ ├── readme.md │ └── readme_ch.md │ ├── dict │ ├── french_dict.txt │ ├── german_dict.txt │ ├── japan_dict.txt │ ├── korean_dict.txt │ └── occitan_dict.txt │ ├── ic15_dict.txt │ ├── ppocr_keys_v1.txt │ ├── save_load.py │ ├── stats.py │ └── utility.py ├── predict_system.py ├── prod_deploy.sh ├── requirements.txt ├── test.py ├── tools ├── infer │ ├── __init__.py │ ├── predict_cls.py │ ├── predict_det.py │ ├── predict_rec.py │ └── utility.py └── logger.py └── translate ├── API.py ├── Bing.py ├── Google.py ├── GoogleJS.js ├── Tencent.py ├── baidufanyi.py └── webtrans.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | .idea/ 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dango-OCR算法服务 2 | 3 | #### 说明: 4 | + [DangoOCR](https://github.com/zhangming8/Dango-ocr)是一个开源的文字识别工具,通过调用的本算法服务实现文字识别。 5 | + 本服务基于百度开源的[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR),如果要部署自己模型请在该项目中的develop分支训练。 6 | + 相关blog [使用飞桨一步步实现多语言OCR文字识别软件](https://blog.csdn.net/u010397980/article/details/111940566) 7 | 8 | #### 环境: 9 | + python>=3.6, paddlepaddle-gpu >= 1.8.5 10 | 11 | #### 训练模型: 12 | + 模型包括检测模型、识别模型。训练过程可以[参考文档](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/doc/doc_ch) 13 | + 检测模型用的是DBnet, 所有的识别模型都用的是CRNN 14 | 15 | #### 导出模型: 16 | + 训练模型导出为inference模型(导出后不必重新定义网络结构,便于部署),[参考](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/inference.md) 17 | 18 | #### 部署/启动服务 19 | + 修改config.py中的检测(det_model_dir)和识别模型(rec_model_dir)路径。其中不同语言的检测模型是共用的,识别模型需要单独训练。 20 | + 目前所有语言的识别都没有使用方向模型,所以所有语言的use_angle_cls=False 21 | + 执行./prod_deploy.sh启动服务。其中'reco_language'为检测模型后面并联的多个识别模型,'port'为算法服务的端口号 22 | + 启动成功会在当前目录创建'log'文件夹,里面会记录启动的日志便于调试代码 23 | 24 | #### 测试/调用服务 25 | + 执行python test.py会读取'demo'文件夹内的图片并调用本服务,同时把结果保存在"deom_result"文件夹,如下图为几个示例 26 | 27 | 28 | + 结果 1 29 |
30 | 31 |
32 | 33 | + 结果 2 34 |
35 | 36 |
37 | 38 | + 结果 3 39 |
40 | 41 |
42 | 43 | + 结果 4 44 |
45 | 46 |
47 | 48 | + 结果 5 49 |
50 | 51 |
52 | 53 | + 结果 6 54 |
55 | 56 |
57 | 58 | #### 参考: 59 | + OCR算法参考百度PaddleOCR: https://github.com/PaddlePaddle/PaddleOCR 60 | + 本OCR服务: https://github.com/zhangming8/ocr_algo_server 61 | + 最终OCR软件: https://github.com/zhangming8/Dango-ocr 62 | -------------------------------------------------------------------------------- /demo/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg -------------------------------------------------------------------------------- /demo/CH1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/CH1.jpg -------------------------------------------------------------------------------- /demo/CH2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/CH2.jpg -------------------------------------------------------------------------------- /demo/DangoOCR_ENG_2021-01-03_19-43-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/DangoOCR_ENG_2021-01-03_19-43-13.jpg -------------------------------------------------------------------------------- /demo/ENG_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/ENG_2.jpg -------------------------------------------------------------------------------- /demo/KOR.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/KOR.jpg -------------------------------------------------------------------------------- /demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg -------------------------------------------------------------------------------- /demo_result/CH1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/CH1.jpg -------------------------------------------------------------------------------- /demo_result/CH2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/CH2.jpg -------------------------------------------------------------------------------- /demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg -------------------------------------------------------------------------------- /demo_result/ENG_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/ENG_2.jpg -------------------------------------------------------------------------------- /demo_result/KOR.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/KOR.jpg -------------------------------------------------------------------------------- /download_model.sh: -------------------------------------------------------------------------------- 1 | 2 | # download text detect model 3 | save_path="inference/det_db" 4 | mkdir -p $save_path 5 | cd $save_path 6 | wget https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar 7 | tar xvf ch_ppocr_server_v1.1_det_infer.tar 8 | cd - 9 | 10 | # download text recognize model 11 | save_path="inference/rec_crnn" 12 | mkdir -p $save_path 13 | cd $save_path 14 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar 15 | wget https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar 16 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar 17 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar 18 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar 19 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar 20 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar 21 | for file in `ls *.tar` 22 | do 23 | echo "unzip ${file}" 24 | tar xvf ${file} 25 | done 26 | cd - 27 | echo "download done" 28 | -------------------------------------------------------------------------------- /font/CH_ENG.TTC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/CH_ENG.TTC -------------------------------------------------------------------------------- /font/KOR.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/KOR.ttf -------------------------------------------------------------------------------- /font/japan.ttc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/japan.ttc -------------------------------------------------------------------------------- /ocr_server.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import os 3 | import json 4 | import cv2 5 | import sys 6 | import traceback 7 | import argparse 8 | from flask import Flask, Response, request 9 | import datetime 10 | from queue import Queue 11 | import threading 12 | import multiprocessing 13 | import time 14 | import random 15 | from setproctitle import setproctitle 16 | 17 | from config import Config 18 | import tools.logger as logger_ 19 | from tools.infer.utility import base64_to_cv2, mkdir 20 | from predict_system import OCR 21 | from translate.API import translate 22 | 23 | app = Flask("server", static_url_path='') 24 | app.config['PROPAGATE_EXCEPTIONS'] = True 25 | _save_image_q = Queue(1000) 26 | 27 | config = Config() 28 | 29 | 30 | @app.route("/dango/algo/ocr/server", methods=['POST', 'GET']) 31 | def ocr_server(): 32 | try: 33 | logger.info("-" * 50) 34 | logger.info("端口 {} /dango/algo/ocr/server 收到请求".format(g_port)) 35 | 36 | now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') 37 | day = "-".join(now_time.split("-")[:3]) 38 | # params = request.get_json() 39 | content = request.form 40 | 41 | images = content['image'] 42 | language_type = content['language_type'] 43 | user_id = content["user_id"] 44 | platform = content.get('platform', None) 45 | need_translate = content.get("translate", 'no') 46 | 47 | s1 = time.time() 48 | images_decode = [base64_to_cv2(images)] 49 | logger.info("收到: {}, {}, {}".format(user_id, platform, language_type)) 50 | 51 | result = ocr.predict(language_type, images=images_decode) 52 | logger.info("识别结果为: {}, 是否需要翻译: {}".format(result, need_translate)) 53 | save_basename = "{}/{}/{}_{}_{}_{}_{}".format(config.save_dir + "/" + g_port, day, g_port, platform, user_id, 54 | language_type, now_time) 55 | _save_image_q.put([save_basename, images_decode, result]) 56 | 57 | translated = False 58 | response_data = {'result': result, 'translated': translated} 59 | if need_translate == 'yes': 60 | logger.info("开始进行翻译...") 61 | s3 = time.time() 62 | rand_idx = random.randint(0, len(config.baidu_translate_secret_key) - 1) 63 | fanyi_app_id = config.baidu_translate_app_id[rand_idx] 64 | fanyi_secret_key = config.baidu_translate_secret_key[rand_idx] 65 | translate_result, translated = translate(result[0], fanyi_app_id, fanyi_secret_key, logger) 66 | if translated: 67 | logger.info("翻译成功: {}, 结果为: {}".format(translated, translate_result)) 68 | response_data['translate_result'] = translate_result 69 | response_data['translated'] = translated 70 | else: 71 | logger.info("翻译失败: {}, 错误码: {}".format(translated, translate_result)) 72 | s4 = time.time() 73 | logger.info("翻译耗时: {}".format(s4 - s3)) 74 | 75 | s2 = time.time() 76 | logger.info("==>> 完成, 总耗时 {} , 开始回复: {}".format(s2 - s1, response_data)) 77 | return Response(json.dumps({'status': 0, 'data': response_data}), 78 | mimetype='application/json') 79 | 80 | except: 81 | e = traceback.format_exc() 82 | logger.info("错误") 83 | logger.error(e) 84 | return Response(json.dumps({'status': -1, 'data': 'None'}), mimetype='application/json') 85 | 86 | 87 | def save_img(): 88 | while True: 89 | try: 90 | save_basename, image_cv2, words_result = _save_image_q.get(block=True) 91 | assert len(image_cv2) == len(words_result) 92 | for idx, img in enumerate(image_cv2): 93 | save_name = save_basename + "_" + str(idx) + ".jpg" 94 | mkdir(os.path.dirname(save_name)) 95 | cv2.imwrite(save_name, img) 96 | with open(save_name.replace(".jpg", ".txt"), "w") as f: 97 | f.write(str(words_result[idx])) 98 | logger.info('保存图片 {} 及 txt'.format(save_name)) 99 | except: 100 | e = traceback.format_exc() 101 | logger.info(e) 102 | 103 | 104 | def do_work(gpu, port): 105 | global logger, g_port, ocr 106 | try: 107 | os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu) 108 | logger = logger_.get_logger("./log/ocr_{}.log".format(port)) 109 | g_port = port 110 | logger.info("===>>> 初始化模型到gpu:{}, port: {}".format(gpu, port)) 111 | ocr = OCR(config, logger, language_list) 112 | logger.info("==>> 启动成功") 113 | app.run(host=config.host, port=port, threaded=True) 114 | 115 | except BaseException as e: 116 | logger.error('错误,启动flask异常{}'.format(e)) 117 | logger.info(traceback.format_exc()) 118 | 119 | 120 | if __name__ == '__main__': 121 | parser = argparse.ArgumentParser() 122 | parser.add_argument('--gpu', type=str, help='gpu index: 0_1_2_3', default="0") 123 | parser.add_argument('--port', type=str, help='server port: 8811_8812_8813', default="8811") 124 | parser.add_argument('--det', type=str, help='detection model', default="DB") 125 | parser.add_argument('--rec', type=str, help='recognize language model', default="ch,japan,en,korean") 126 | args = parser.parse_args() 127 | 128 | setproctitle('ocr_server_{}_{}'.format(args.port, args.rec)) 129 | 130 | ports = args.port.split("_") # [args.port] 131 | gpus = args.gpu.split("_") # [args.gpu] 132 | language_list = args.rec.replace(" ", "").split(",") 133 | if len(gpus) == 1: 134 | gpus = gpus * len(ports) 135 | 136 | gpu_num = len(gpus) 137 | port_num = len(ports) 138 | 139 | if gpu_num != port_num: 140 | print('启动失败:GPU数量 != 端口数量!') 141 | sys.exit(1) 142 | 143 | threading.Thread(target=save_img, name="save img").start() 144 | do_work(gpu=gpus[0], port=ports[0]) 145 | 146 | # pool = multiprocessing.Pool(processes=port_num) 147 | # for index in range(port_num): 148 | # pool.apply_async(do_work, (gpus[index], ports[index])) 149 | # pool.close() 150 | # pool.join() 151 | # save_img() 152 | -------------------------------------------------------------------------------- /ppocr/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/data/cls/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/data/cls/dataset_traversal.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import math 18 | import random 19 | import numpy as np 20 | import cv2 21 | 22 | from ppocr.utils.utility import initial_logger 23 | from ppocr.utils.utility import get_image_file_list 24 | 25 | logger = initial_logger() 26 | 27 | from ppocr.data.rec.img_tools import resize_norm_img, warp 28 | from ppocr.data.cls.randaugment import RandAugment 29 | 30 | 31 | def random_crop(img): 32 | img_h, img_w = img.shape[:2] 33 | if img_w > img_h * 4: 34 | w = random.randint(img_h * 2, img_w) 35 | i = random.randint(0, img_w - w) 36 | 37 | img = img[:, i:i + w, :] 38 | return img 39 | 40 | 41 | class SimpleReader(object): 42 | def __init__(self, params): 43 | if params['mode'] != 'train': 44 | self.num_workers = 1 45 | else: 46 | self.num_workers = params['num_workers'] 47 | if params['mode'] != 'test': 48 | self.img_set_dir = params['img_set_dir'] 49 | self.label_file_path = params['label_file_path'] 50 | self.use_gpu = params['use_gpu'] 51 | self.image_shape = params['image_shape'] 52 | self.mode = params['mode'] 53 | self.infer_img = params['infer_img'] 54 | self.use_distort = params['mode'] == 'train' and params['distort'] 55 | self.randaug = RandAugment() 56 | self.label_list = params['label_list'] 57 | if "distort" in params: 58 | self.use_distort = params['distort'] and params['use_gpu'] 59 | if not params['use_gpu']: 60 | logger.info( 61 | "Distort operation can only support in GPU.Distort will be set to False." 62 | ) 63 | if params['mode'] == 'train': 64 | self.batch_size = params['train_batch_size_per_card'] 65 | self.drop_last = True 66 | else: 67 | self.batch_size = params['test_batch_size_per_card'] 68 | self.drop_last = False 69 | self.use_distort = False 70 | 71 | def __call__(self, process_id): 72 | if self.mode != 'train': 73 | process_id = 0 74 | 75 | def get_device_num(): 76 | if self.use_gpu: 77 | gpus = os.environ.get("CUDA_VISIBLE_DEVICES", "1") 78 | gpu_num = len(gpus.split(',')) 79 | return gpu_num 80 | else: 81 | cpu_num = os.environ.get("CPU_NUM", 1) 82 | return int(cpu_num) 83 | 84 | def sample_iter_reader(): 85 | if self.mode != 'train' and self.infer_img is not None: 86 | image_file_list = get_image_file_list(self.infer_img) 87 | for single_img in image_file_list: 88 | img = cv2.imread(single_img) 89 | if img.shape[-1] == 1 or len(list(img.shape)) == 2: 90 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 91 | norm_img = resize_norm_img(img, self.image_shape) 92 | 93 | norm_img = norm_img[np.newaxis, :] 94 | yield norm_img 95 | else: 96 | with open(self.label_file_path, "rb") as fin: 97 | label_infor_list = fin.readlines() 98 | img_num = len(label_infor_list) 99 | img_id_list = list(range(img_num)) 100 | random.shuffle(img_id_list) 101 | if sys.platform == "win32" and self.num_workers != 1: 102 | print("multiprocess is not fully compatible with Windows." 103 | "num_workers will be 1.") 104 | self.num_workers = 1 105 | if self.batch_size * get_device_num( 106 | ) * self.num_workers > img_num: 107 | raise Exception( 108 | "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})". 109 | format(img_num, self.batch_size * get_device_num() * 110 | self.num_workers)) 111 | for img_id in range(process_id, img_num, self.num_workers): 112 | label_infor = label_infor_list[img_id_list[img_id]] 113 | substr = label_infor.decode('utf-8').strip("\n").split("\t") 114 | label = self.label_list.index(substr[1]) 115 | 116 | img_path = self.img_set_dir + "/" + substr[0] 117 | img = cv2.imread(img_path) 118 | if img is None: 119 | logger.info("{} does not exist!".format(img_path)) 120 | continue 121 | if img.shape[-1] == 1 or len(list(img.shape)) == 2: 122 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 123 | 124 | if self.use_distort: 125 | img = warp(img, 10) 126 | img = self.randaug(img) 127 | norm_img = resize_norm_img(img, self.image_shape) 128 | norm_img = norm_img[np.newaxis, :] 129 | yield (norm_img, label) 130 | 131 | def batch_iter_reader(): 132 | batch_outs = [] 133 | for outs in sample_iter_reader(): 134 | batch_outs.append(outs) 135 | if len(batch_outs) == self.batch_size: 136 | yield batch_outs 137 | batch_outs = [] 138 | if not self.drop_last: 139 | if len(batch_outs) != 0: 140 | yield batch_outs 141 | 142 | if self.infer_img is None: 143 | return batch_iter_reader 144 | return sample_iter_reader 145 | -------------------------------------------------------------------------------- /ppocr/data/cls/randaugment.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | from __future__ import unicode_literals 19 | 20 | from PIL import Image, ImageEnhance, ImageOps 21 | import numpy as np 22 | import random 23 | import six 24 | 25 | 26 | class RawRandAugment(object): 27 | def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)): 28 | self.num_layers = num_layers 29 | self.magnitude = magnitude 30 | self.max_level = 10 31 | 32 | abso_level = self.magnitude / self.max_level 33 | self.level_map = { 34 | "shearX": 0.3 * abso_level, 35 | "shearY": 0.3 * abso_level, 36 | "translateX": 150.0 / 331 * abso_level, 37 | "translateY": 150.0 / 331 * abso_level, 38 | "rotate": 30 * abso_level, 39 | "color": 0.9 * abso_level, 40 | "posterize": int(4.0 * abso_level), 41 | "solarize": 256.0 * abso_level, 42 | "contrast": 0.9 * abso_level, 43 | "sharpness": 0.9 * abso_level, 44 | "brightness": 0.9 * abso_level, 45 | "autocontrast": 0, 46 | "equalize": 0, 47 | "invert": 0 48 | } 49 | 50 | # from https://stackoverflow.com/questions/5252170/ 51 | # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand 52 | def rotate_with_fill(img, magnitude): 53 | rot = img.convert("RGBA").rotate(magnitude) 54 | return Image.composite(rot, 55 | Image.new("RGBA", rot.size, (128, ) * 4), 56 | rot).convert(img.mode) 57 | 58 | rnd_ch_op = random.choice 59 | 60 | self.func = { 61 | "shearX": lambda img, magnitude: img.transform( 62 | img.size, 63 | Image.AFFINE, 64 | (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0), 65 | Image.BICUBIC, 66 | fillcolor=fillcolor), 67 | "shearY": lambda img, magnitude: img.transform( 68 | img.size, 69 | Image.AFFINE, 70 | (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0), 71 | Image.BICUBIC, 72 | fillcolor=fillcolor), 73 | "translateX": lambda img, magnitude: img.transform( 74 | img.size, 75 | Image.AFFINE, 76 | (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0), 77 | fillcolor=fillcolor), 78 | "translateY": lambda img, magnitude: img.transform( 79 | img.size, 80 | Image.AFFINE, 81 | (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])), 82 | fillcolor=fillcolor), 83 | "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), 84 | "color": lambda img, magnitude: ImageEnhance.Color(img).enhance( 85 | 1 + magnitude * rnd_ch_op([-1, 1])), 86 | "posterize": lambda img, magnitude: 87 | ImageOps.posterize(img, magnitude), 88 | "solarize": lambda img, magnitude: 89 | ImageOps.solarize(img, magnitude), 90 | "contrast": lambda img, magnitude: 91 | ImageEnhance.Contrast(img).enhance( 92 | 1 + magnitude * rnd_ch_op([-1, 1])), 93 | "sharpness": lambda img, magnitude: 94 | ImageEnhance.Sharpness(img).enhance( 95 | 1 + magnitude * rnd_ch_op([-1, 1])), 96 | "brightness": lambda img, magnitude: 97 | ImageEnhance.Brightness(img).enhance( 98 | 1 + magnitude * rnd_ch_op([-1, 1])), 99 | "autocontrast": lambda img, magnitude: 100 | ImageOps.autocontrast(img), 101 | "equalize": lambda img, magnitude: ImageOps.equalize(img), 102 | "invert": lambda img, magnitude: ImageOps.invert(img) 103 | } 104 | 105 | def __call__(self, img): 106 | avaiable_op_names = list(self.level_map.keys()) 107 | for layer_num in range(self.num_layers): 108 | op_name = np.random.choice(avaiable_op_names) 109 | img = self.func[op_name](img, self.level_map[op_name]) 110 | return img 111 | 112 | 113 | class RandAugment(RawRandAugment): 114 | """ RandAugment wrapper to auto fit different img types """ 115 | 116 | def __init__(self, *args, **kwargs): 117 | if six.PY2: 118 | super(RandAugment, self).__init__(*args, **kwargs) 119 | else: 120 | super().__init__(*args, **kwargs) 121 | 122 | def __call__(self, img): 123 | if not isinstance(img, Image.Image): 124 | img = np.ascontiguousarray(img) 125 | img = Image.fromarray(img) 126 | 127 | if six.PY2: 128 | img = super(RandAugment, self).__call__(img) 129 | else: 130 | img = super().__call__(img) 131 | 132 | if isinstance(img, Image.Image): 133 | img = np.asarray(img) 134 | 135 | return img 136 | -------------------------------------------------------------------------------- /ppocr/data/det/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/data/det/__init__.py -------------------------------------------------------------------------------- /ppocr/data/det/data_augment.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import numpy as np 9 | import random 10 | import cv2 11 | import math 12 | 13 | import imgaug 14 | import imgaug.augmenters as iaa 15 | 16 | 17 | def AugmentData(data): 18 | img = data['image'] 19 | shape = img.shape 20 | 21 | aug = iaa.Sequential( 22 | [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize( 23 | (0.5, 3))]).to_deterministic() 24 | 25 | def may_augment_annotation(aug, data, shape): 26 | if aug is None: 27 | return data 28 | 29 | line_polys = [] 30 | for poly in data['polys']: 31 | new_poly = may_augment_poly(aug, shape, poly) 32 | line_polys.append(new_poly) 33 | data['polys'] = np.array(line_polys) 34 | return data 35 | 36 | def may_augment_poly(aug, img_shape, poly): 37 | keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly] 38 | keypoints = aug.augment_keypoints( 39 | [imgaug.KeypointsOnImage( 40 | keypoints, shape=img_shape)])[0].keypoints 41 | poly = [(p.x, p.y) for p in keypoints] 42 | return poly 43 | 44 | img_aug = aug.augment_image(img) 45 | data['image'] = img_aug 46 | data = may_augment_annotation(aug, data, shape) 47 | return data 48 | -------------------------------------------------------------------------------- /ppocr/data/det/make_border_map.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import numpy as np 9 | import cv2 10 | np.seterr(divide='ignore', invalid='ignore') 11 | import pyclipper 12 | from shapely.geometry import Polygon 13 | import sys 14 | import warnings 15 | warnings.simplefilter("ignore") 16 | 17 | 18 | def draw_border_map(polygon, canvas, mask, shrink_ratio): 19 | polygon = np.array(polygon) 20 | assert polygon.ndim == 2 21 | assert polygon.shape[1] == 2 22 | 23 | polygon_shape = Polygon(polygon) 24 | if polygon_shape.area <= 0: 25 | return 26 | distance = polygon_shape.area * ( 27 | 1 - np.power(shrink_ratio, 2)) / polygon_shape.length 28 | subject = [tuple(l) for l in polygon] 29 | padding = pyclipper.PyclipperOffset() 30 | padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) 31 | 32 | padded_polygon = np.array(padding.Execute(distance)[0]) 33 | cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) 34 | 35 | xmin = padded_polygon[:, 0].min() 36 | xmax = padded_polygon[:, 0].max() 37 | ymin = padded_polygon[:, 1].min() 38 | ymax = padded_polygon[:, 1].max() 39 | width = xmax - xmin + 1 40 | height = ymax - ymin + 1 41 | 42 | polygon[:, 0] = polygon[:, 0] - xmin 43 | polygon[:, 1] = polygon[:, 1] - ymin 44 | 45 | xs = np.broadcast_to( 46 | np.linspace( 47 | 0, width - 1, num=width).reshape(1, width), (height, width)) 48 | ys = np.broadcast_to( 49 | np.linspace( 50 | 0, height - 1, num=height).reshape(height, 1), (height, width)) 51 | 52 | distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32) 53 | for i in range(polygon.shape[0]): 54 | j = (i + 1) % polygon.shape[0] 55 | absolute_distance = _distance(xs, ys, polygon[i], polygon[j]) 56 | distance_map[i] = np.clip(absolute_distance / distance, 0, 1) 57 | distance_map = distance_map.min(axis=0) 58 | 59 | xmin_valid = min(max(0, xmin), canvas.shape[1] - 1) 60 | xmax_valid = min(max(0, xmax), canvas.shape[1] - 1) 61 | ymin_valid = min(max(0, ymin), canvas.shape[0] - 1) 62 | ymax_valid = min(max(0, ymax), canvas.shape[0] - 1) 63 | canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax( 64 | 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height, 65 | xmin_valid - xmin:xmax_valid - xmax + width], 66 | canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1]) 67 | 68 | 69 | def _distance(xs, ys, point_1, point_2): 70 | ''' 71 | compute the distance from point to a line 72 | ys: coordinates in the first axis 73 | xs: coordinates in the second axis 74 | point_1, point_2: (x, y), the end of the line 75 | ''' 76 | height, width = xs.shape[:2] 77 | square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1]) 78 | square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1]) 79 | square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[ 80 | 1] - point_2[1]) 81 | 82 | cosin = (square_distance - square_distance_1 - square_distance_2) / ( 83 | 2 * np.sqrt(square_distance_1 * square_distance_2)) 84 | square_sin = 1 - np.square(cosin) 85 | square_sin = np.nan_to_num(square_sin) 86 | result = np.sqrt(square_distance_1 * square_distance_2 * square_sin / 87 | square_distance) 88 | 89 | result[cosin < 90 | 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin < 91 | 0] 92 | # self.extend_line(point_1, point_2, result) 93 | return result 94 | 95 | 96 | def extend_line(point_1, point_2, result, shrink_ratio): 97 | ex_point_1 = ( 98 | int( 99 | round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))), 100 | int( 101 | round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio)))) 102 | cv2.line( 103 | result, 104 | tuple(ex_point_1), 105 | tuple(point_1), 106 | 4096.0, 107 | 1, 108 | lineType=cv2.LINE_AA, 109 | shift=0) 110 | ex_point_2 = ( 111 | int( 112 | round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))), 113 | int( 114 | round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio)))) 115 | cv2.line( 116 | result, 117 | tuple(ex_point_2), 118 | tuple(point_2), 119 | 4096.0, 120 | 1, 121 | lineType=cv2.LINE_AA, 122 | shift=0) 123 | return ex_point_1, ex_point_2 124 | 125 | 126 | def MakeBorderMap(data): 127 | shrink_ratio = 0.4 128 | thresh_min = 0.3 129 | thresh_max = 0.7 130 | 131 | im = data['image'] 132 | text_polys = data['polys'] 133 | ignore_tags = data['ignore_tags'] 134 | 135 | canvas = np.zeros(im.shape[:2], dtype=np.float32) 136 | mask = np.zeros(im.shape[:2], dtype=np.float32) 137 | 138 | for i in range(len(text_polys)): 139 | if ignore_tags[i]: 140 | continue 141 | draw_border_map( 142 | text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio) 143 | canvas = canvas * (thresh_max - thresh_min) + thresh_min 144 | 145 | data['threshold_map'] = canvas 146 | data['threshold_mask'] = mask 147 | return data 148 | -------------------------------------------------------------------------------- /ppocr/data/det/make_shrink_map.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import numpy as np 9 | import cv2 10 | from shapely.geometry import Polygon 11 | import pyclipper 12 | 13 | 14 | def validate_polygons(polygons, ignore_tags, h, w): 15 | ''' 16 | polygons (numpy.array, required): of shape (num_instances, num_points, 2) 17 | ''' 18 | if len(polygons) == 0: 19 | return polygons, ignore_tags 20 | assert len(polygons) == len(ignore_tags) 21 | for polygon in polygons: 22 | polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1) 23 | polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1) 24 | 25 | for i in range(len(polygons)): 26 | area = polygon_area(polygons[i]) 27 | if abs(area) < 1: 28 | ignore_tags[i] = True 29 | if area > 0: 30 | polygons[i] = polygons[i][::-1, :] 31 | return polygons, ignore_tags 32 | 33 | 34 | def polygon_area(polygon): 35 | edge = 0 36 | for i in range(polygon.shape[0]): 37 | next_index = (i + 1) % polygon.shape[0] 38 | edge += (polygon[next_index, 0] - polygon[i, 0]) * ( 39 | polygon[next_index, 1] - polygon[i, 1]) 40 | 41 | return edge / 2. 42 | 43 | 44 | def MakeShrinkMap(data): 45 | min_text_size = 8 46 | shrink_ratio = 0.4 47 | 48 | image = data['image'] 49 | text_polys = data['polys'] 50 | ignore_tags = data['ignore_tags'] 51 | 52 | h, w = image.shape[:2] 53 | text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w) 54 | gt = np.zeros((h, w), dtype=np.float32) 55 | # gt = np.zeros((1, h, w), dtype=np.float32) 56 | mask = np.ones((h, w), dtype=np.float32) 57 | for i in range(len(text_polys)): 58 | polygon = text_polys[i] 59 | height = max(polygon[:, 1]) - min(polygon[:, 1]) 60 | width = max(polygon[:, 0]) - min(polygon[:, 0]) 61 | # height = min(np.linalg.norm(polygon[0] - polygon[3]), 62 | # np.linalg.norm(polygon[1] - polygon[2])) 63 | # width = min(np.linalg.norm(polygon[0] - polygon[1]), 64 | # np.linalg.norm(polygon[2] - polygon[3])) 65 | if ignore_tags[i] or min(height, width) < min_text_size: 66 | cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0) 67 | ignore_tags[i] = True 68 | else: 69 | polygon_shape = Polygon(polygon) 70 | distance = polygon_shape.area * ( 71 | 1 - np.power(shrink_ratio, 2)) / polygon_shape.length 72 | subject = [tuple(l) for l in text_polys[i]] 73 | padding = pyclipper.PyclipperOffset() 74 | padding.AddPath(subject, pyclipper.JT_ROUND, 75 | pyclipper.ET_CLOSEDPOLYGON) 76 | shrinked = padding.Execute(-distance) 77 | if shrinked == []: 78 | cv2.fillPoly(mask, 79 | polygon.astype(np.int32)[np.newaxis, :, :], 0) 80 | ignore_tags[i] = True 81 | continue 82 | shrinked = np.array(shrinked[0]).reshape(-1, 2) 83 | cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1) 84 | # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1) 85 | 86 | data['shrink_map'] = gt 87 | data['shrink_mask'] = mask 88 | return data 89 | -------------------------------------------------------------------------------- /ppocr/data/det/random_crop_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import numpy as np 9 | import cv2 10 | import random 11 | 12 | 13 | def is_poly_in_rect(poly, x, y, w, h): 14 | poly = np.array(poly) 15 | if poly[:, 0].min() < x or poly[:, 0].max() > x + w: 16 | return False 17 | if poly[:, 1].min() < y or poly[:, 1].max() > y + h: 18 | return False 19 | return True 20 | 21 | 22 | def is_poly_outside_rect(poly, x, y, w, h): 23 | poly = np.array(poly) 24 | if poly[:, 0].max() < x or poly[:, 0].min() > x + w: 25 | return True 26 | if poly[:, 1].max() < y or poly[:, 1].min() > y + h: 27 | return True 28 | return False 29 | 30 | 31 | def split_regions(axis): 32 | regions = [] 33 | min_axis = 0 34 | for i in range(1, axis.shape[0]): 35 | if axis[i] != axis[i - 1] + 1: 36 | region = axis[min_axis:i] 37 | min_axis = i 38 | regions.append(region) 39 | return regions 40 | 41 | 42 | def random_select(axis, max_size): 43 | xx = np.random.choice(axis, size=2) 44 | xmin = np.min(xx) 45 | xmax = np.max(xx) 46 | xmin = np.clip(xmin, 0, max_size - 1) 47 | xmax = np.clip(xmax, 0, max_size - 1) 48 | return xmin, xmax 49 | 50 | 51 | def region_wise_random_select(regions, max_size): 52 | selected_index = list(np.random.choice(len(regions), 2)) 53 | selected_values = [] 54 | for index in selected_index: 55 | axis = regions[index] 56 | xx = int(np.random.choice(axis, size=1)) 57 | selected_values.append(xx) 58 | xmin = min(selected_values) 59 | xmax = max(selected_values) 60 | return xmin, xmax 61 | 62 | 63 | def crop_area(im, text_polys, min_crop_side_ratio, max_tries): 64 | h, w, _ = im.shape 65 | h_array = np.zeros(h, dtype=np.int32) 66 | w_array = np.zeros(w, dtype=np.int32) 67 | for points in text_polys: 68 | points = np.round(points, decimals=0).astype(np.int32) 69 | minx = np.min(points[:, 0]) 70 | maxx = np.max(points[:, 0]) 71 | w_array[minx:maxx] = 1 72 | miny = np.min(points[:, 1]) 73 | maxy = np.max(points[:, 1]) 74 | h_array[miny:maxy] = 1 75 | # ensure the cropped area not across a text 76 | h_axis = np.where(h_array == 0)[0] 77 | w_axis = np.where(w_array == 0)[0] 78 | 79 | if len(h_axis) == 0 or len(w_axis) == 0: 80 | return 0, 0, w, h 81 | 82 | h_regions = split_regions(h_axis) 83 | w_regions = split_regions(w_axis) 84 | 85 | for i in range(max_tries): 86 | if len(w_regions) > 1: 87 | xmin, xmax = region_wise_random_select(w_regions, w) 88 | else: 89 | xmin, xmax = random_select(w_axis, w) 90 | if len(h_regions) > 1: 91 | ymin, ymax = region_wise_random_select(h_regions, h) 92 | else: 93 | ymin, ymax = random_select(h_axis, h) 94 | 95 | if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h: 96 | # area too small 97 | continue 98 | num_poly_in_rect = 0 99 | for poly in text_polys: 100 | if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, 101 | ymax - ymin): 102 | num_poly_in_rect += 1 103 | break 104 | 105 | if num_poly_in_rect > 0: 106 | return xmin, ymin, xmax - xmin, ymax - ymin 107 | 108 | return 0, 0, w, h 109 | 110 | 111 | def RandomCropData(data, size): 112 | max_tries = 10 113 | min_crop_side_ratio = 0.1 114 | require_original_image = False 115 | keep_ratio = True 116 | 117 | im = data['image'] 118 | text_polys = data['polys'] 119 | ignore_tags = data['ignore_tags'] 120 | texts = data['texts'] 121 | all_care_polys = [ 122 | text_polys[i] for i, tag in enumerate(ignore_tags) if not tag 123 | ] 124 | crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys, 125 | min_crop_side_ratio, max_tries) 126 | dh, dw = size 127 | scale_w = dw / crop_w 128 | scale_h = dh / crop_h 129 | scale = min(scale_w, scale_h) 130 | h = int(crop_h * scale) 131 | w = int(crop_w * scale) 132 | if keep_ratio: 133 | padimg = np.zeros((dh, dw, im.shape[2]), im.dtype) 134 | padimg[:h, :w] = cv2.resize( 135 | im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h)) 136 | img = padimg 137 | else: 138 | img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], 139 | (dw, dh)) 140 | text_polys_crop = [] 141 | ignore_tags_crop = [] 142 | texts_crop = [] 143 | for poly, text, tag in zip(text_polys, texts, ignore_tags): 144 | poly = ((poly - (crop_x, crop_y)) * scale).tolist() 145 | if not is_poly_outside_rect(poly, 0, 0, w, h): 146 | text_polys_crop.append(poly) 147 | ignore_tags_crop.append(tag) 148 | texts_crop.append(text) 149 | data['image'] = img 150 | data['polys'] = np.array(text_polys_crop) 151 | data['ignore_tags'] = ignore_tags_crop 152 | data['texts'] = texts_crop 153 | return data 154 | -------------------------------------------------------------------------------- /ppocr/data/reader_main.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | import os 16 | import random 17 | import numpy as np 18 | 19 | import paddle 20 | from ppocr.utils.utility import create_module 21 | from copy import deepcopy 22 | 23 | from .rec.img_tools import process_image 24 | import cv2 25 | 26 | import sys 27 | import signal 28 | 29 | 30 | # handle terminate reader process, do not print stack frame 31 | def _reader_quit(signum, frame): 32 | print("Reader process exit.") 33 | sys.exit() 34 | 35 | 36 | def _term_group(sig_num, frame): 37 | print('pid {} terminated, terminate group ' 38 | '{}...'.format(os.getpid(), os.getpgrp())) 39 | os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) 40 | 41 | 42 | signal.signal(signal.SIGTERM, _reader_quit) 43 | signal.signal(signal.SIGINT, _term_group) 44 | 45 | 46 | def reader_main(config=None, mode=None): 47 | """Create a reader for trainning 48 | 49 | Args: 50 | settings: arguments 51 | 52 | Returns: 53 | train reader 54 | """ 55 | assert mode in ["train", "eval", "test"],\ 56 | "Nonsupport mode:{}".format(mode) 57 | global_params = config['Global'] 58 | if mode == "train": 59 | params = deepcopy(config['TrainReader']) 60 | elif mode == "eval": 61 | params = deepcopy(config['EvalReader']) 62 | else: 63 | params = deepcopy(config['TestReader']) 64 | params['mode'] = mode 65 | params.update(global_params) 66 | reader_function = params['reader_function'] 67 | function = create_module(reader_function)(params) 68 | if mode == "train": 69 | if sys.platform == "win32": 70 | return function(0) 71 | readers = [] 72 | num_workers = params['num_workers'] 73 | for process_id in range(num_workers): 74 | readers.append(function(process_id)) 75 | return paddle.reader.multiprocess_reader(readers, False) 76 | else: 77 | return function(mode) 78 | -------------------------------------------------------------------------------- /ppocr/data/rec/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/data/rec/text_image_aug/augment.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: RubanSeven 3 | # Reference: https://github.com/RubanSeven/Text-Image-Augmentation-python 4 | 5 | # import cv2 6 | import numpy as np 7 | from .warp_mls import WarpMLS 8 | 9 | 10 | def tia_distort(src, segment=4): 11 | img_h, img_w = src.shape[:2] 12 | 13 | cut = img_w // segment 14 | thresh = cut // 3 15 | 16 | src_pts = list() 17 | dst_pts = list() 18 | 19 | src_pts.append([0, 0]) 20 | src_pts.append([img_w, 0]) 21 | src_pts.append([img_w, img_h]) 22 | src_pts.append([0, img_h]) 23 | 24 | dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)]) 25 | dst_pts.append( 26 | [img_w - np.random.randint(thresh), np.random.randint(thresh)]) 27 | dst_pts.append( 28 | [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)]) 29 | dst_pts.append( 30 | [np.random.randint(thresh), img_h - np.random.randint(thresh)]) 31 | 32 | half_thresh = thresh * 0.5 33 | 34 | for cut_idx in np.arange(1, segment, 1): 35 | src_pts.append([cut * cut_idx, 0]) 36 | src_pts.append([cut * cut_idx, img_h]) 37 | dst_pts.append([ 38 | cut * cut_idx + np.random.randint(thresh) - half_thresh, 39 | np.random.randint(thresh) - half_thresh 40 | ]) 41 | dst_pts.append([ 42 | cut * cut_idx + np.random.randint(thresh) - half_thresh, 43 | img_h + np.random.randint(thresh) - half_thresh 44 | ]) 45 | 46 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 47 | dst = trans.generate() 48 | 49 | return dst 50 | 51 | 52 | def tia_stretch(src, segment=4): 53 | img_h, img_w = src.shape[:2] 54 | 55 | cut = img_w // segment 56 | thresh = cut * 4 // 5 57 | 58 | src_pts = list() 59 | dst_pts = list() 60 | 61 | src_pts.append([0, 0]) 62 | src_pts.append([img_w, 0]) 63 | src_pts.append([img_w, img_h]) 64 | src_pts.append([0, img_h]) 65 | 66 | dst_pts.append([0, 0]) 67 | dst_pts.append([img_w, 0]) 68 | dst_pts.append([img_w, img_h]) 69 | dst_pts.append([0, img_h]) 70 | 71 | half_thresh = thresh * 0.5 72 | 73 | for cut_idx in np.arange(1, segment, 1): 74 | move = np.random.randint(thresh) - half_thresh 75 | src_pts.append([cut * cut_idx, 0]) 76 | src_pts.append([cut * cut_idx, img_h]) 77 | dst_pts.append([cut * cut_idx + move, 0]) 78 | dst_pts.append([cut * cut_idx + move, img_h]) 79 | 80 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 81 | dst = trans.generate() 82 | 83 | return dst 84 | 85 | 86 | def tia_perspective(src): 87 | img_h, img_w = src.shape[:2] 88 | 89 | thresh = img_h // 2 90 | 91 | src_pts = list() 92 | dst_pts = list() 93 | 94 | src_pts.append([0, 0]) 95 | src_pts.append([img_w, 0]) 96 | src_pts.append([img_w, img_h]) 97 | src_pts.append([0, img_h]) 98 | 99 | dst_pts.append([0, np.random.randint(thresh)]) 100 | dst_pts.append([img_w, np.random.randint(thresh)]) 101 | dst_pts.append([img_w, img_h - np.random.randint(thresh)]) 102 | dst_pts.append([0, img_h - np.random.randint(thresh)]) 103 | 104 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 105 | dst = trans.generate() 106 | 107 | return dst 108 | -------------------------------------------------------------------------------- /ppocr/data/rec/text_image_aug/warp_mls.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: RubanSeven 3 | # Reference: https://github.com/RubanSeven/Text-Image-Augmentation-python 4 | import math 5 | import numpy as np 6 | 7 | 8 | class WarpMLS: 9 | def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.): 10 | self.src = src 11 | self.src_pts = src_pts 12 | self.dst_pts = dst_pts 13 | self.pt_count = len(self.dst_pts) 14 | self.dst_w = dst_w 15 | self.dst_h = dst_h 16 | self.trans_ratio = trans_ratio 17 | self.grid_size = 100 18 | self.rdx = np.zeros((self.dst_h, self.dst_w)) 19 | self.rdy = np.zeros((self.dst_h, self.dst_w)) 20 | 21 | @staticmethod 22 | def __bilinear_interp(x, y, v11, v12, v21, v22): 23 | return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 * 24 | (1 - y) + v22 * y) * x 25 | 26 | def generate(self): 27 | self.calc_delta() 28 | return self.gen_img() 29 | 30 | def calc_delta(self): 31 | w = np.zeros(self.pt_count, dtype=np.float32) 32 | 33 | if self.pt_count < 2: 34 | return 35 | 36 | i = 0 37 | while 1: 38 | if self.dst_w <= i < self.dst_w + self.grid_size - 1: 39 | i = self.dst_w - 1 40 | elif i >= self.dst_w: 41 | break 42 | 43 | j = 0 44 | while 1: 45 | if self.dst_h <= j < self.dst_h + self.grid_size - 1: 46 | j = self.dst_h - 1 47 | elif j >= self.dst_h: 48 | break 49 | 50 | sw = 0 51 | swp = np.zeros(2, dtype=np.float32) 52 | swq = np.zeros(2, dtype=np.float32) 53 | new_pt = np.zeros(2, dtype=np.float32) 54 | cur_pt = np.array([i, j], dtype=np.float32) 55 | 56 | k = 0 57 | for k in range(self.pt_count): 58 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: 59 | break 60 | 61 | w[k] = 1. / ( 62 | (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) + 63 | (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1])) 64 | 65 | sw += w[k] 66 | swp = swp + w[k] * np.array(self.dst_pts[k]) 67 | swq = swq + w[k] * np.array(self.src_pts[k]) 68 | 69 | if k == self.pt_count - 1: 70 | pstar = 1 / sw * swp 71 | qstar = 1 / sw * swq 72 | 73 | miu_s = 0 74 | for k in range(self.pt_count): 75 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: 76 | continue 77 | pt_i = self.dst_pts[k] - pstar 78 | miu_s += w[k] * np.sum(pt_i * pt_i) 79 | 80 | cur_pt -= pstar 81 | cur_pt_j = np.array([-cur_pt[1], cur_pt[0]]) 82 | 83 | for k in range(self.pt_count): 84 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: 85 | continue 86 | 87 | pt_i = self.dst_pts[k] - pstar 88 | pt_j = np.array([-pt_i[1], pt_i[0]]) 89 | 90 | tmp_pt = np.zeros(2, dtype=np.float32) 91 | tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \ 92 | np.sum(pt_j * cur_pt) * self.src_pts[k][1] 93 | tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \ 94 | np.sum(pt_j * cur_pt_j) * self.src_pts[k][1] 95 | tmp_pt *= (w[k] / miu_s) 96 | new_pt += tmp_pt 97 | 98 | new_pt += qstar 99 | else: 100 | new_pt = self.src_pts[k] 101 | 102 | self.rdx[j, i] = new_pt[0] - i 103 | self.rdy[j, i] = new_pt[1] - j 104 | 105 | j += self.grid_size 106 | i += self.grid_size 107 | 108 | def gen_img(self): 109 | src_h, src_w = self.src.shape[:2] 110 | dst = np.zeros_like(self.src, dtype=np.float32) 111 | 112 | for i in np.arange(0, self.dst_h, self.grid_size): 113 | for j in np.arange(0, self.dst_w, self.grid_size): 114 | ni = i + self.grid_size 115 | nj = j + self.grid_size 116 | w = h = self.grid_size 117 | if ni >= self.dst_h: 118 | ni = self.dst_h - 1 119 | h = ni - i + 1 120 | if nj >= self.dst_w: 121 | nj = self.dst_w - 1 122 | w = nj - j + 1 123 | 124 | di = np.reshape(np.arange(h), (-1, 1)) 125 | dj = np.reshape(np.arange(w), (1, -1)) 126 | delta_x = self.__bilinear_interp( 127 | di / h, dj / w, self.rdx[i, j], self.rdx[i, nj], 128 | self.rdx[ni, j], self.rdx[ni, nj]) 129 | delta_y = self.__bilinear_interp( 130 | di / h, dj / w, self.rdy[i, j], self.rdy[i, nj], 131 | self.rdy[ni, j], self.rdy[ni, nj]) 132 | nx = j + dj + delta_x * self.trans_ratio 133 | ny = i + di + delta_y * self.trans_ratio 134 | nx = np.clip(nx, 0, src_w - 1) 135 | ny = np.clip(ny, 0, src_h - 1) 136 | nxi = np.array(np.floor(nx), dtype=np.int32) 137 | nyi = np.array(np.floor(ny), dtype=np.int32) 138 | nxi1 = np.array(np.ceil(nx), dtype=np.int32) 139 | nyi1 = np.array(np.ceil(ny), dtype=np.int32) 140 | 141 | if len(self.src.shape) == 3: 142 | x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3)) 143 | y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3)) 144 | else: 145 | x = ny - nyi 146 | y = nx - nxi 147 | dst[i:i + h, j:j + w] = self.__bilinear_interp( 148 | x, y, self.src[nyi, nxi], self.src[nyi, nxi1], 149 | self.src[nyi1, nxi], self.src[nyi1, nxi1]) 150 | 151 | dst = np.clip(dst, 0, 255) 152 | dst = np.array(dst, dtype=np.uint8) 153 | 154 | return dst 155 | -------------------------------------------------------------------------------- /ppocr/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/modeling/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/modeling/architectures/cls_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | from paddle import fluid 20 | 21 | from ppocr.utils.utility import create_module 22 | from ppocr.utils.utility import initial_logger 23 | 24 | logger = initial_logger() 25 | from copy import deepcopy 26 | 27 | 28 | class ClsModel(object): 29 | def __init__(self, params): 30 | super(ClsModel, self).__init__() 31 | global_params = params['Global'] 32 | self.infer_img = global_params['infer_img'] 33 | 34 | backbone_params = deepcopy(params["Backbone"]) 35 | backbone_params.update(global_params) 36 | self.backbone = create_module(backbone_params['function']) \ 37 | (params=backbone_params) 38 | 39 | head_params = deepcopy(params["Head"]) 40 | head_params.update(global_params) 41 | self.head = create_module(head_params['function']) \ 42 | (params=head_params) 43 | 44 | loss_params = deepcopy(params["Loss"]) 45 | loss_params.update(global_params) 46 | self.loss = create_module(loss_params['function']) \ 47 | (params=loss_params) 48 | 49 | self.image_shape = global_params['image_shape'] 50 | 51 | def create_feed(self, mode): 52 | image_shape = deepcopy(self.image_shape) 53 | image_shape.insert(0, -1) 54 | if mode == "train": 55 | image = fluid.data(name='image', shape=image_shape, dtype='float32') 56 | label = fluid.data(name='label', shape=[None, 1], dtype='int64') 57 | feed_list = [image, label] 58 | labels = {'label': label} 59 | loader = fluid.io.DataLoader.from_generator( 60 | feed_list=feed_list, 61 | capacity=64, 62 | use_double_buffer=True, 63 | iterable=False) 64 | else: 65 | labels = None 66 | loader = None 67 | image = fluid.data(name='image', shape=image_shape, dtype='float32') 68 | image.stop_gradient = False 69 | return image, labels, loader 70 | 71 | def __call__(self, mode): 72 | image, labels, loader = self.create_feed(mode) 73 | inputs = image 74 | conv_feas = self.backbone(inputs) 75 | predicts = self.head(conv_feas, labels, mode) 76 | if mode == "train": 77 | loss = self.loss(predicts, labels) 78 | label = labels['label'] 79 | acc = fluid.layers.accuracy(predicts['predict'], label, k=1) 80 | outputs = {'total_loss': loss, 'decoded_out': \ 81 | predicts['decoded_out'], 'label': label, 'acc': acc} 82 | return loader, outputs 83 | elif mode == "export": 84 | return [image, predicts] 85 | else: 86 | return loader, predicts 87 | -------------------------------------------------------------------------------- /ppocr/modeling/architectures/det_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | from paddle import fluid 20 | 21 | from ppocr.utils.utility import create_module 22 | from ppocr.utils.utility import initial_logger 23 | logger = initial_logger() 24 | from copy import deepcopy 25 | 26 | 27 | class DetModel(object): 28 | def __init__(self, params): 29 | """ 30 | Detection module for OCR text detection. 31 | args: 32 | params (dict): the super parameters for detection module. 33 | """ 34 | global_params = params['Global'] 35 | self.algorithm = global_params['algorithm'] 36 | 37 | backbone_params = deepcopy(params["Backbone"]) 38 | backbone_params.update(global_params) 39 | self.backbone = create_module(backbone_params['function'])\ 40 | (params=backbone_params) 41 | 42 | head_params = deepcopy(params["Head"]) 43 | head_params.update(global_params) 44 | self.head = create_module(head_params['function'])\ 45 | (params=head_params) 46 | 47 | loss_params = deepcopy(params["Loss"]) 48 | loss_params.update(global_params) 49 | self.loss = create_module(loss_params['function'])\ 50 | (params=loss_params) 51 | 52 | self.image_shape = global_params['image_shape'] 53 | 54 | def create_feed(self, mode): 55 | """ 56 | create Dataloader feeds 57 | args: 58 | mode (str): 'train' for training or else for evaluation 59 | return: (image, corresponding label, dataloader) 60 | """ 61 | image_shape = deepcopy(self.image_shape) 62 | if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0: 63 | raise Exception("The size of the image must be divisible by 4, " 64 | "received image shape is {}, please reset the " 65 | "Global.image_shape in the yml file".format( 66 | image_shape)) 67 | 68 | image = fluid.layers.data( 69 | name='image', shape=image_shape, dtype='float32') 70 | image.stop_gradient = False 71 | if mode == "train": 72 | if self.algorithm == "EAST": 73 | h, w = int(image_shape[1] // 4), int(image_shape[2] // 4) 74 | score = fluid.layers.data( 75 | name='score', shape=[1, h, w], dtype='float32') 76 | geo = fluid.layers.data( 77 | name='geo', shape=[9, h, w], dtype='float32') 78 | mask = fluid.layers.data( 79 | name='mask', shape=[1, h, w], dtype='float32') 80 | feed_list = [image, score, geo, mask] 81 | labels = {'score': score, 'geo': geo, 'mask': mask} 82 | elif self.algorithm == "DB": 83 | shrink_map = fluid.layers.data( 84 | name='shrink_map', shape=image_shape[1:], dtype='float32') 85 | shrink_mask = fluid.layers.data( 86 | name='shrink_mask', shape=image_shape[1:], dtype='float32') 87 | threshold_map = fluid.layers.data( 88 | name='threshold_map', 89 | shape=image_shape[1:], 90 | dtype='float32') 91 | threshold_mask = fluid.layers.data( 92 | name='threshold_mask', 93 | shape=image_shape[1:], 94 | dtype='float32') 95 | feed_list=[image, shrink_map, shrink_mask,\ 96 | threshold_map, threshold_mask] 97 | labels = {'shrink_map':shrink_map,\ 98 | 'shrink_mask':shrink_mask,\ 99 | 'threshold_map':threshold_map,\ 100 | 'threshold_mask':threshold_mask} 101 | elif self.algorithm == "SAST": 102 | input_score = fluid.layers.data( 103 | name='score', shape=[1, 128, 128], dtype='float32') 104 | input_border = fluid.layers.data( 105 | name='border', shape=[5, 128, 128], dtype='float32') 106 | input_mask = fluid.layers.data( 107 | name='mask', shape=[1, 128, 128], dtype='float32') 108 | input_tvo = fluid.layers.data( 109 | name='tvo', shape=[9, 128, 128], dtype='float32') 110 | input_tco = fluid.layers.data( 111 | name='tco', shape=[3, 128, 128], dtype='float32') 112 | feed_list = [ 113 | image, input_score, input_border, input_mask, input_tvo, 114 | input_tco 115 | ] 116 | labels = {'input_score': input_score,\ 117 | 'input_border': input_border,\ 118 | 'input_mask': input_mask,\ 119 | 'input_tvo': input_tvo,\ 120 | 'input_tco': input_tco} 121 | loader = fluid.io.DataLoader.from_generator( 122 | feed_list=feed_list, 123 | capacity=64, 124 | use_double_buffer=True, 125 | iterable=False) 126 | else: 127 | labels = None 128 | loader = None 129 | return image, labels, loader 130 | 131 | def __call__(self, mode): 132 | """ 133 | run forward of defined module 134 | args: 135 | mode (str): 'train' for training; 'export' for inference, 136 | others for evaluation] 137 | """ 138 | image, labels, loader = self.create_feed(mode) 139 | conv_feas = self.backbone(image) 140 | if self.algorithm == "DB": 141 | predicts = self.head(conv_feas, mode) 142 | else: 143 | predicts = self.head(conv_feas) 144 | if mode == "train": 145 | losses = self.loss(predicts, labels) 146 | return loader, losses 147 | elif mode == "export": 148 | return [image, predicts] 149 | else: 150 | return loader, predicts 151 | -------------------------------------------------------------------------------- /ppocr/modeling/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/modeling/common_functions.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | from paddle.fluid.param_attr import ParamAttr 22 | import math 23 | 24 | 25 | def get_para_bias_attr(l2_decay, k, name): 26 | regularizer = fluid.regularizer.L2Decay(l2_decay) 27 | stdv = 1.0 / math.sqrt(k * 1.0) 28 | initializer = fluid.initializer.Uniform(-stdv, stdv) 29 | para_attr = fluid.ParamAttr( 30 | regularizer=regularizer, initializer=initializer, name=name + "_w_attr") 31 | bias_attr = fluid.ParamAttr( 32 | regularizer=regularizer, initializer=initializer, name=name + "_b_attr") 33 | return [para_attr, bias_attr] 34 | 35 | 36 | def conv_bn_layer(input, 37 | num_filters, 38 | filter_size, 39 | stride=1, 40 | groups=1, 41 | act=None, 42 | name=None): 43 | conv = fluid.layers.conv2d( 44 | input=input, 45 | num_filters=num_filters, 46 | filter_size=filter_size, 47 | stride=stride, 48 | padding=(filter_size - 1) // 2, 49 | groups=groups, 50 | act=None, 51 | param_attr=ParamAttr(name=name + "_weights"), 52 | bias_attr=False, 53 | name=name + '.conv2d') 54 | 55 | bn_name = "bn_" + name 56 | return fluid.layers.batch_norm( 57 | input=conv, 58 | act=act, 59 | name=bn_name + '.output', 60 | param_attr=ParamAttr(name=bn_name + '_scale'), 61 | bias_attr=ParamAttr(bn_name + '_offset'), 62 | moving_mean_name=bn_name + '_mean', 63 | moving_variance_name=bn_name + '_variance') 64 | 65 | 66 | def deconv_bn_layer(input, 67 | num_filters, 68 | filter_size=4, 69 | stride=2, 70 | act='relu', 71 | name=None): 72 | deconv = fluid.layers.conv2d_transpose( 73 | input=input, 74 | num_filters=num_filters, 75 | filter_size=filter_size, 76 | stride=stride, 77 | padding=1, 78 | act=None, 79 | param_attr=ParamAttr(name=name + "_weights"), 80 | bias_attr=False, 81 | name=name + '.deconv2d') 82 | bn_name = "bn_" + name 83 | return fluid.layers.batch_norm( 84 | input=deconv, 85 | act=act, 86 | name=bn_name + '.output', 87 | param_attr=ParamAttr(name=bn_name + '_scale'), 88 | bias_attr=ParamAttr(bn_name + '_offset'), 89 | moving_mean_name=bn_name + '_mean', 90 | moving_variance_name=bn_name + '_variance') 91 | 92 | 93 | def create_tmp_var(program, name, dtype, shape, lod_level=0): 94 | return program.current_block().create_var( 95 | name=name, dtype=dtype, shape=shape, lod_level=lod_level) 96 | -------------------------------------------------------------------------------- /ppocr/modeling/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/modeling/heads/cls_head.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | 21 | import paddle 22 | import paddle.fluid as fluid 23 | 24 | 25 | class ClsHead(object): 26 | """ 27 | Class orientation 28 | 29 | Args: 30 | 31 | params(dict): super parameters for build Class network 32 | """ 33 | 34 | def __init__(self, params): 35 | super(ClsHead, self).__init__() 36 | self.class_dim = params['class_dim'] 37 | 38 | def __call__(self, inputs, labels=None, mode=None): 39 | pool = fluid.layers.pool2d( 40 | input=inputs, pool_type='avg', global_pooling=True) 41 | stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) 42 | 43 | out = fluid.layers.fc( 44 | input=pool, 45 | size=self.class_dim, 46 | param_attr=fluid.param_attr.ParamAttr( 47 | name="fc_0.w_0", 48 | initializer=fluid.initializer.Uniform(-stdv, stdv)), 49 | bias_attr=fluid.param_attr.ParamAttr(name="fc_0.b_0")) 50 | 51 | softmax_out = fluid.layers.softmax(out, use_cudnn=False) 52 | out_label = fluid.layers.argmax(out, axis=1) 53 | predicts = {'predict': softmax_out, 'decoded_out': out_label} 54 | return predicts 55 | -------------------------------------------------------------------------------- /ppocr/modeling/heads/det_east_head.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle.fluid as fluid 20 | from ..common_functions import conv_bn_layer, deconv_bn_layer 21 | from collections import OrderedDict 22 | 23 | 24 | class EASTHead(object): 25 | """ 26 | EAST: An Efficient and Accurate Scene Text Detector 27 | see arxiv: https://arxiv.org/abs/1704.03155 28 | args: 29 | params(dict): the super parameters for network build 30 | """ 31 | 32 | def __init__(self, params): 33 | 34 | self.model_name = params['model_name'] 35 | 36 | def unet_fusion(self, inputs): 37 | f = inputs[::-1] 38 | if self.model_name == "large": 39 | num_outputs = [128, 128, 128, 128] 40 | else: 41 | num_outputs = [64, 64, 64, 64] 42 | g = [None, None, None, None] 43 | h = [None, None, None, None] 44 | for i in range(4): 45 | if i == 0: 46 | h[i] = f[i] 47 | else: 48 | h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1) 49 | h[i] = conv_bn_layer( 50 | input=h[i], 51 | num_filters=num_outputs[i], 52 | filter_size=3, 53 | stride=1, 54 | act='relu', 55 | name="unet_h_%d" % (i)) 56 | if i <= 2: 57 | #can be replaced with unpool 58 | g[i] = deconv_bn_layer( 59 | input=h[i], 60 | num_filters=num_outputs[i], 61 | name="unet_g_%d" % (i)) 62 | else: 63 | g[i] = conv_bn_layer( 64 | input=h[i], 65 | num_filters=num_outputs[i], 66 | filter_size=3, 67 | stride=1, 68 | act='relu', 69 | name="unet_g_%d" % (i)) 70 | return g[3] 71 | 72 | def detector_header(self, f_common): 73 | if self.model_name == "large": 74 | num_outputs = [128, 64, 1, 8] 75 | else: 76 | num_outputs = [64, 32, 1, 8] 77 | f_det = conv_bn_layer( 78 | input=f_common, 79 | num_filters=num_outputs[0], 80 | filter_size=3, 81 | stride=1, 82 | act='relu', 83 | name="det_head1") 84 | f_det = conv_bn_layer( 85 | input=f_det, 86 | num_filters=num_outputs[1], 87 | filter_size=3, 88 | stride=1, 89 | act='relu', 90 | name="det_head2") 91 | #f_score 92 | f_score = conv_bn_layer( 93 | input=f_det, 94 | num_filters=num_outputs[2], 95 | filter_size=1, 96 | stride=1, 97 | act=None, 98 | name="f_score") 99 | f_score = fluid.layers.sigmoid(f_score) 100 | #f_geo 101 | f_geo = conv_bn_layer( 102 | input=f_det, 103 | num_filters=num_outputs[3], 104 | filter_size=1, 105 | stride=1, 106 | act=None, 107 | name="f_geo") 108 | f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800 109 | return f_score, f_geo 110 | 111 | def __call__(self, inputs): 112 | """ 113 | Fuse different levels of feature map from backbone and predict results 114 | Args: 115 | inputs(list): feature maps from backbone 116 | Return: predicts 117 | """ 118 | f_common = self.unet_fusion(inputs) 119 | f_score, f_geo = self.detector_header(f_common) 120 | predicts = OrderedDict() 121 | predicts['f_score'] = f_score 122 | predicts['f_geo'] = f_geo 123 | return predicts 124 | -------------------------------------------------------------------------------- /ppocr/modeling/heads/rec_ctc_head.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | 21 | import paddle 22 | import paddle.fluid as fluid 23 | from paddle.fluid.param_attr import ParamAttr 24 | from .rec_seq_encoder import SequenceEncoder 25 | from ..common_functions import get_para_bias_attr 26 | import numpy as np 27 | 28 | 29 | class CTCPredict(object): 30 | """ 31 | CTC predict 32 | Args: 33 | params(object): Params from yaml file and settings from command line 34 | """ 35 | 36 | def __init__(self, params): 37 | super(CTCPredict, self).__init__() 38 | self.char_num = params['char_num'] 39 | self.encoder = SequenceEncoder(params) 40 | self.encoder_type = params['encoder_type'] 41 | self.fc_decay = params.get("fc_decay", 0.0004) 42 | 43 | def __call__(self, inputs, labels=None, mode=None): 44 | with fluid.scope_guard("skip_quant"): 45 | encoder_features = self.encoder(inputs) 46 | if self.encoder_type != "reshape": 47 | encoder_features = fluid.layers.concat(encoder_features, axis=1) 48 | name = "ctc_fc" 49 | para_attr, bias_attr = get_para_bias_attr( 50 | l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name) 51 | predict = fluid.layers.fc(input=encoder_features, 52 | size=self.char_num + 1, 53 | param_attr=para_attr, 54 | bias_attr=bias_attr, 55 | name=name) 56 | decoded_out = fluid.layers.ctc_greedy_decoder( 57 | input=predict, blank=self.char_num) 58 | predicts = {'predict': predict, 'decoded_out': decoded_out} 59 | return predicts 60 | -------------------------------------------------------------------------------- /ppocr/modeling/heads/rec_seq_encoder.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | import paddle.fluid as fluid 21 | import paddle.fluid.layers as layers 22 | 23 | 24 | class EncoderWithReshape(object): 25 | def __init__(self, params): 26 | super(EncoderWithReshape, self).__init__() 27 | 28 | def __call__(self, inputs): 29 | sliced_feature = layers.im2sequence( 30 | input=inputs, 31 | stride=[1, 1], 32 | filter_size=[inputs.shape[2], 1], 33 | name="sliced_feature") 34 | return sliced_feature 35 | 36 | 37 | class EncoderWithRNN(object): 38 | def __init__(self, params): 39 | super(EncoderWithRNN, self).__init__() 40 | self.rnn_hidden_size = params['SeqRNN']['hidden_size'] 41 | 42 | def __call__(self, inputs): 43 | lstm_list = [] 44 | name_prefix = "lstm" 45 | rnn_hidden_size = self.rnn_hidden_size 46 | for no in range(1, 3): 47 | if no == 1: 48 | is_reverse = False 49 | else: 50 | is_reverse = True 51 | name = "%s_st1_fc%d" % (name_prefix, no) 52 | fc = layers.fc(input=inputs, 53 | size=rnn_hidden_size * 4, 54 | param_attr=fluid.ParamAttr(name=name + "_w"), 55 | bias_attr=fluid.ParamAttr(name=name + "_b"), 56 | name=name) 57 | name = "%s_st1_out%d" % (name_prefix, no) 58 | lstm, _ = layers.dynamic_lstm( 59 | input=fc, 60 | size=rnn_hidden_size * 4, 61 | is_reverse=is_reverse, 62 | param_attr=fluid.ParamAttr(name=name + "_w"), 63 | bias_attr=fluid.ParamAttr(name=name + "_b"), 64 | use_peepholes=False) 65 | name = "%s_st2_fc%d" % (name_prefix, no) 66 | fc = layers.fc(input=lstm, 67 | size=rnn_hidden_size * 4, 68 | param_attr=fluid.ParamAttr(name=name + "_w"), 69 | bias_attr=fluid.ParamAttr(name=name + "_b"), 70 | name=name) 71 | name = "%s_st2_out%d" % (name_prefix, no) 72 | lstm, _ = layers.dynamic_lstm( 73 | input=fc, 74 | size=rnn_hidden_size * 4, 75 | is_reverse=is_reverse, 76 | param_attr=fluid.ParamAttr(name=name + "_w"), 77 | bias_attr=fluid.ParamAttr(name=name + "_b"), 78 | use_peepholes=False) 79 | lstm_list.append(lstm) 80 | return lstm_list 81 | 82 | 83 | class SequenceEncoder(object): 84 | def __init__(self, params): 85 | super(SequenceEncoder, self).__init__() 86 | self.encoder_type = params['encoder_type'] 87 | self.encoder_reshape = EncoderWithReshape(params) 88 | if self.encoder_type == "rnn": 89 | self.encoder_rnn = EncoderWithRNN(params) 90 | 91 | def __call__(self, inputs): 92 | if self.encoder_type == "reshape": 93 | encoder_features = self.encoder_reshape(inputs) 94 | elif self.encoder_type == "rnn": 95 | inputs = self.encoder_reshape(inputs) 96 | encoder_features = self.encoder_rnn(inputs) 97 | else: 98 | assert False, "Unsupport encoder_type:%s"\ 99 | % self.encoder_type 100 | return encoder_features 101 | -------------------------------------------------------------------------------- /ppocr/modeling/heads/self_attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/modeling/heads/self_attention/__init__.py -------------------------------------------------------------------------------- /ppocr/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/cls_loss.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle.fluid as fluid 20 | 21 | 22 | class ClsLoss(object): 23 | def __init__(self, params): 24 | super(ClsLoss, self).__init__() 25 | self.loss_func = fluid.layers.cross_entropy 26 | 27 | def __call__(self, predicts, labels): 28 | predict = predicts['predict'] 29 | label = labels['label'] 30 | # softmax_out = fluid.layers.softmax(predict, use_cudnn=False) 31 | cost = fluid.layers.cross_entropy(input=predict, label=label) 32 | sum_cost = fluid.layers.mean(cost) 33 | return sum_cost 34 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/det_basic_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import numpy as np 20 | 21 | import paddle.fluid as fluid 22 | 23 | 24 | def BalanceLoss(pred, 25 | gt, 26 | mask, 27 | balance_loss=True, 28 | main_loss_type="DiceLoss", 29 | negative_ratio=3, 30 | return_origin=False, 31 | eps=1e-6): 32 | """ 33 | The BalanceLoss for Differentiable Binarization text detection 34 | args: 35 | pred (variable): predicted feature maps. 36 | gt (variable): ground truth feature maps. 37 | mask (variable): masked maps. 38 | balance_loss (bool): whether balance loss or not, default is True 39 | main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss', 40 | 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'. 41 | negative_ratio (int|float): float, default is 3. 42 | return_origin (bool): whether return unbalanced loss or not, default is False. 43 | eps (float): default is 1e-6. 44 | return: (variable) balanced loss 45 | """ 46 | positive = gt * mask 47 | negative = (1 - gt) * mask 48 | 49 | positive_count = fluid.layers.reduce_sum(positive) 50 | positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32) 51 | negative_count = min( 52 | fluid.layers.reduce_sum(negative), positive_count * negative_ratio) 53 | negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32) 54 | 55 | if main_loss_type == "CrossEntropy": 56 | loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True) 57 | loss = fluid.layers.reduce_mean(loss) 58 | elif main_loss_type == "Euclidean": 59 | loss = fluid.layers.square(pred - gt) 60 | loss = fluid.layers.reduce_mean(loss) 61 | elif main_loss_type == "DiceLoss": 62 | loss = DiceLoss(pred, gt, mask) 63 | elif main_loss_type == "BCELoss": 64 | loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt) 65 | elif main_loss_type == "MaskL1Loss": 66 | loss = MaskL1Loss(pred, gt, mask) 67 | else: 68 | loss_type = [ 69 | 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss' 70 | ] 71 | raise Exception("main_loss_type in BalanceLoss() can only be one of {}". 72 | format(loss_type)) 73 | 74 | if not balance_loss: 75 | return loss 76 | 77 | positive_loss = positive * loss 78 | negative_loss = negative * loss 79 | negative_loss = fluid.layers.reshape(negative_loss, shape=[-1]) 80 | negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int) 81 | balance_loss = (fluid.layers.reduce_sum(positive_loss) + 82 | fluid.layers.reduce_sum(negative_loss)) / ( 83 | positive_count + negative_count + eps) 84 | 85 | if return_origin: 86 | return balance_loss, loss 87 | return balance_loss 88 | 89 | 90 | def DiceLoss(pred, gt, mask, weights=None, eps=1e-6): 91 | """ 92 | DiceLoss function. 93 | """ 94 | 95 | assert pred.shape == gt.shape 96 | assert pred.shape == mask.shape 97 | if weights is not None: 98 | assert weights.shape == mask.shape 99 | mask = weights * mask 100 | intersection = fluid.layers.reduce_sum(pred * gt * mask) 101 | 102 | union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum( 103 | gt * mask) + eps 104 | loss = 1 - 2.0 * intersection / union 105 | assert loss <= 1 106 | return loss 107 | 108 | 109 | def MaskL1Loss(pred, gt, mask, eps=1e-6): 110 | """ 111 | Mask L1 Loss 112 | """ 113 | loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / ( 114 | fluid.layers.reduce_sum(mask) + eps) 115 | loss = fluid.layers.reduce_mean(loss) 116 | return loss 117 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/det_db_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss 20 | 21 | 22 | class DBLoss(object): 23 | """ 24 | Differentiable Binarization (DB) Loss Function 25 | args: 26 | param (dict): the super paramter for DB Loss 27 | """ 28 | 29 | def __init__(self, params): 30 | super(DBLoss, self).__init__() 31 | self.balance_loss = params['balance_loss'] 32 | self.main_loss_type = params['main_loss_type'] 33 | 34 | self.alpha = params['alpha'] 35 | self.beta = params['beta'] 36 | self.ohem_ratio = params['ohem_ratio'] 37 | 38 | def __call__(self, predicts, labels): 39 | label_shrink_map = labels['shrink_map'] 40 | label_shrink_mask = labels['shrink_mask'] 41 | label_threshold_map = labels['threshold_map'] 42 | label_threshold_mask = labels['threshold_mask'] 43 | pred = predicts['maps'] 44 | shrink_maps = pred[:, 0, :, :] 45 | threshold_maps = pred[:, 1, :, :] 46 | binary_maps = pred[:, 2, :, :] 47 | 48 | loss_shrink_maps = BalanceLoss( 49 | shrink_maps, 50 | label_shrink_map, 51 | label_shrink_mask, 52 | balance_loss=self.balance_loss, 53 | main_loss_type=self.main_loss_type, 54 | negative_ratio=self.ohem_ratio) 55 | loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map, 56 | label_threshold_mask) 57 | loss_binary_maps = DiceLoss(binary_maps, label_shrink_map, 58 | label_shrink_mask) 59 | loss_shrink_maps = self.alpha * loss_shrink_maps 60 | loss_threshold_maps = self.beta * loss_threshold_maps 61 | 62 | loss_all = loss_shrink_maps + loss_threshold_maps\ 63 | + loss_binary_maps 64 | losses = {'total_loss':loss_all,\ 65 | "loss_shrink_maps":loss_shrink_maps,\ 66 | "loss_threshold_maps":loss_threshold_maps,\ 67 | "loss_binary_maps":loss_binary_maps} 68 | return losses 69 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/det_east_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle.fluid as fluid 20 | 21 | 22 | class EASTLoss(object): 23 | """ 24 | EAST Loss function 25 | """ 26 | 27 | def __init__(self, params=None): 28 | super(EASTLoss, self).__init__() 29 | 30 | def __call__(self, predicts, labels): 31 | f_score = predicts['f_score'] 32 | f_geo = predicts['f_geo'] 33 | l_score = labels['score'] 34 | l_geo = labels['geo'] 35 | l_mask = labels['mask'] 36 | ##dice_loss 37 | intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask) 38 | union = fluid.layers.reduce_sum(f_score * l_mask)\ 39 | + fluid.layers.reduce_sum(l_score * l_mask) 40 | dice_loss = 1 - 2 * intersection / (union + 1e-5) 41 | #smoooth_l1_loss 42 | channels = 8 43 | l_geo_split = fluid.layers.split( 44 | l_geo, num_or_sections=channels + 1, dim=1) 45 | f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1) 46 | smooth_l1 = 0 47 | for i in range(0, channels): 48 | geo_diff = l_geo_split[i] - f_geo_split[i] 49 | abs_geo_diff = fluid.layers.abs(geo_diff) 50 | smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score) 51 | smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32') 52 | in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \ 53 | (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign) 54 | out_loss = l_geo_split[-1] / channels * in_loss * l_score 55 | smooth_l1 += out_loss 56 | smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score) 57 | dice_loss = dice_loss * 0.01 58 | total_loss = dice_loss + smooth_l1_loss 59 | losses = {'total_loss':total_loss, "dice_loss":dice_loss,\ 60 | "smooth_l1_loss":smooth_l1_loss} 61 | return losses 62 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/det_sast_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle.fluid as fluid 20 | 21 | 22 | class SASTLoss(object): 23 | """ 24 | SAST Loss function 25 | """ 26 | 27 | def __init__(self, params=None): 28 | super(SASTLoss, self).__init__() 29 | 30 | def __call__(self, predicts, labels): 31 | """ 32 | tcl_pos: N x 128 x 3 33 | tcl_mask: N x 128 x 1 34 | tcl_label: N x X list or LoDTensor 35 | """ 36 | 37 | f_score = predicts['f_score'] 38 | f_border = predicts['f_border'] 39 | f_tvo = predicts['f_tvo'] 40 | f_tco = predicts['f_tco'] 41 | 42 | l_score = labels['input_score'] 43 | l_border = labels['input_border'] 44 | l_mask = labels['input_mask'] 45 | l_tvo = labels['input_tvo'] 46 | l_tco = labels['input_tco'] 47 | 48 | #score_loss 49 | intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask) 50 | union = fluid.layers.reduce_sum(f_score * l_mask) + fluid.layers.reduce_sum(l_score * l_mask) 51 | score_loss = 1.0 - 2 * intersection / (union + 1e-5) 52 | 53 | #border loss 54 | l_border_split, l_border_norm = fluid.layers.split(l_border, num_or_sections=[4, 1], dim=1) 55 | f_border_split = f_border 56 | l_border_norm_split = fluid.layers.expand(x=l_border_norm, expand_times=[1, 4, 1, 1]) 57 | l_border_score = fluid.layers.expand(x=l_score, expand_times=[1, 4, 1, 1]) 58 | l_border_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 4, 1, 1]) 59 | border_diff = l_border_split - f_border_split 60 | abs_border_diff = fluid.layers.abs(border_diff) 61 | border_sign = abs_border_diff < 1.0 62 | border_sign = fluid.layers.cast(border_sign, dtype='float32') 63 | border_sign.stop_gradient = True 64 | border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \ 65 | (abs_border_diff - 0.5) * (1.0 - border_sign) 66 | border_out_loss = l_border_norm_split * border_in_loss 67 | border_loss = fluid.layers.reduce_sum(border_out_loss * l_border_score * l_border_mask) / \ 68 | (fluid.layers.reduce_sum(l_border_score * l_border_mask) + 1e-5) 69 | 70 | #tvo_loss 71 | l_tvo_split, l_tvo_norm = fluid.layers.split(l_tvo, num_or_sections=[8, 1], dim=1) 72 | f_tvo_split = f_tvo 73 | l_tvo_norm_split = fluid.layers.expand(x=l_tvo_norm, expand_times=[1, 8, 1, 1]) 74 | l_tvo_score = fluid.layers.expand(x=l_score, expand_times=[1, 8, 1, 1]) 75 | l_tvo_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 8, 1, 1]) 76 | # 77 | tvo_geo_diff = l_tvo_split - f_tvo_split 78 | abs_tvo_geo_diff = fluid.layers.abs(tvo_geo_diff) 79 | tvo_sign = abs_tvo_geo_diff < 1.0 80 | tvo_sign = fluid.layers.cast(tvo_sign, dtype='float32') 81 | tvo_sign.stop_gradient = True 82 | tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \ 83 | (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign) 84 | tvo_out_loss = l_tvo_norm_split * tvo_in_loss 85 | tvo_loss = fluid.layers.reduce_sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \ 86 | (fluid.layers.reduce_sum(l_tvo_score * l_tvo_mask) + 1e-5) 87 | 88 | #tco_loss 89 | l_tco_split, l_tco_norm = fluid.layers.split(l_tco, num_or_sections=[2, 1], dim=1) 90 | f_tco_split = f_tco 91 | l_tco_norm_split = fluid.layers.expand(x=l_tco_norm, expand_times=[1, 2, 1, 1]) 92 | l_tco_score = fluid.layers.expand(x=l_score, expand_times=[1, 2, 1, 1]) 93 | l_tco_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 2, 1, 1]) 94 | # 95 | tco_geo_diff = l_tco_split - f_tco_split 96 | abs_tco_geo_diff = fluid.layers.abs(tco_geo_diff) 97 | tco_sign = abs_tco_geo_diff < 1.0 98 | tco_sign = fluid.layers.cast(tco_sign, dtype='float32') 99 | tco_sign.stop_gradient = True 100 | tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \ 101 | (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign) 102 | tco_out_loss = l_tco_norm_split * tco_in_loss 103 | tco_loss = fluid.layers.reduce_sum(tco_out_loss * l_tco_score * l_tco_mask) / \ 104 | (fluid.layers.reduce_sum(l_tco_score * l_tco_mask) + 1e-5) 105 | 106 | 107 | # total loss 108 | tvo_lw, tco_lw = 1.5, 1.5 109 | score_lw, border_lw = 1.0, 1.0 110 | total_loss = score_loss * score_lw + border_loss * border_lw + \ 111 | tvo_loss * tvo_lw + tco_loss * tco_lw 112 | 113 | losses = {'total_loss':total_loss, "score_loss":score_loss,\ 114 | "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss} 115 | return losses -------------------------------------------------------------------------------- /ppocr/modeling/losses/rec_attention_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | 21 | import paddle 22 | import paddle.fluid as fluid 23 | from paddle.fluid.param_attr import ParamAttr 24 | import numpy as np 25 | 26 | 27 | class AttentionLoss(object): 28 | def __init__(self, params): 29 | super(AttentionLoss, self).__init__() 30 | self.char_num = params['char_num'] 31 | 32 | def __call__(self, predicts, labels): 33 | predict = predicts['predict'] 34 | label_out = labels['label_out'] 35 | label_out = fluid.layers.cast(x=label_out, dtype='int64') 36 | # calculate attention loss 37 | cost = fluid.layers.cross_entropy(input=predict, label=label_out) 38 | sum_cost = fluid.layers.reduce_sum(cost) 39 | return sum_cost 40 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/rec_ctc_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | 21 | import paddle 22 | import paddle.fluid as fluid 23 | 24 | 25 | class CTCLoss(object): 26 | def __init__(self, params): 27 | super(CTCLoss, self).__init__() 28 | self.char_num = params['char_num'] 29 | 30 | def __call__(self, predicts, labels): 31 | predict = predicts['predict'] 32 | label = labels['label'] 33 | # calculate ctc loss 34 | cost = fluid.layers.warpctc( 35 | input=predict, label=label, blank=self.char_num, norm_by_times=True) 36 | sum_cost = fluid.layers.reduce_sum(cost) 37 | return sum_cost 38 | -------------------------------------------------------------------------------- /ppocr/modeling/losses/rec_srn_loss.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | 21 | import paddle 22 | import paddle.fluid as fluid 23 | 24 | 25 | class SRNLoss(object): 26 | def __init__(self, params): 27 | super(SRNLoss, self).__init__() 28 | self.char_num = params['char_num'] 29 | 30 | def __call__(self, predicts, others): 31 | predict = predicts['predict'] 32 | word_predict = predicts['word_out'] 33 | gsrm_predict = predicts['gsrm_out'] 34 | label = others['label'] 35 | lbl_weight = others['lbl_weight'] 36 | 37 | casted_label = fluid.layers.cast(x=label, dtype='int64') 38 | cost_word = fluid.layers.cross_entropy( 39 | input=word_predict, label=casted_label) 40 | cost_gsrm = fluid.layers.cross_entropy( 41 | input=gsrm_predict, label=casted_label) 42 | cost_vsfd = fluid.layers.cross_entropy( 43 | input=predict, label=casted_label) 44 | 45 | cost_word = fluid.layers.reshape( 46 | x=fluid.layers.reduce_sum(cost_word), shape=[1]) 47 | cost_gsrm = fluid.layers.reshape( 48 | x=fluid.layers.reduce_sum(cost_gsrm), shape=[1]) 49 | cost_vsfd = fluid.layers.reshape( 50 | x=fluid.layers.reduce_sum(cost_vsfd), shape=[1]) 51 | 52 | sum_cost = fluid.layers.sum( 53 | [cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15]) 54 | 55 | return [sum_cost, cost_vsfd, cost_word] 56 | -------------------------------------------------------------------------------- /ppocr/modeling/stns/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/optimizer.py: -------------------------------------------------------------------------------- 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | import math 18 | import paddle.fluid as fluid 19 | from paddle.fluid.regularizer import L2Decay 20 | from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter 21 | import paddle.fluid.layers.ops as ops 22 | 23 | from ppocr.utils.utility import initial_logger 24 | 25 | logger = initial_logger() 26 | 27 | 28 | def cosine_decay_with_warmup(learning_rate, 29 | step_each_epoch, 30 | epochs=500, 31 | warmup_minibatch=1000): 32 | """ 33 | Applies cosine decay to the learning rate. 34 | lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) 35 | decrease lr for every mini-batch and start with warmup. 36 | args: 37 | learning_rate(float): initial learning rate 38 | step_each_epoch (int): number of step for each epoch in training process 39 | epochs(int): number of training epochs 40 | warmup_minibatch(int): number of minibatch for warmup 41 | return: 42 | lr(tensor): learning rate tensor 43 | """ 44 | global_step = _decay_step_counter() 45 | lr = fluid.layers.tensor.create_global_var( 46 | shape=[1], 47 | value=0.0, 48 | dtype='float32', 49 | persistable=True, 50 | name="learning_rate") 51 | 52 | warmup_minibatch = fluid.layers.fill_constant( 53 | shape=[1], 54 | dtype='float32', 55 | value=float(warmup_minibatch), 56 | force_cpu=True) 57 | 58 | with fluid.layers.control_flow.Switch() as switch: 59 | with switch.case(global_step < warmup_minibatch): 60 | decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch) 61 | fluid.layers.tensor.assign(input=decayed_lr, output=lr) 62 | with switch.default(): 63 | decayed_lr = learning_rate * \ 64 | (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 65 | fluid.layers.tensor.assign(input=decayed_lr, output=lr) 66 | return lr 67 | 68 | 69 | def AdamDecay(params, parameter_list=None): 70 | """ 71 | define optimizer function 72 | args: 73 | params(dict): the super parameters 74 | parameter_list (list): list of Variable names to update to minimize loss 75 | return: 76 | optimizer: a Adam optimizer instance 77 | """ 78 | base_lr = params['base_lr'] 79 | beta1 = params['beta1'] 80 | beta2 = params['beta2'] 81 | l2_decay = params.get("l2_decay", 0.0) 82 | 83 | if 'decay' in params: 84 | supported_decay_mode = [ 85 | "cosine_decay", "cosine_decay_warmup", "piecewise_decay" 86 | ] 87 | params = params['decay'] 88 | decay_mode = params['function'] 89 | assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format( 90 | supported_decay_mode, decay_mode) 91 | 92 | if decay_mode == "cosine_decay": 93 | step_each_epoch = params['step_each_epoch'] 94 | total_epoch = params['total_epoch'] 95 | base_lr = fluid.layers.cosine_decay( 96 | learning_rate=base_lr, 97 | step_each_epoch=step_each_epoch, 98 | epochs=total_epoch) 99 | elif decay_mode == "cosine_decay_warmup": 100 | step_each_epoch = params['step_each_epoch'] 101 | total_epoch = params['total_epoch'] 102 | warmup_minibatch = params.get("warmup_minibatch", 1000) 103 | base_lr = cosine_decay_with_warmup( 104 | learning_rate=base_lr, 105 | step_each_epoch=step_each_epoch, 106 | epochs=total_epoch, 107 | warmup_minibatch=warmup_minibatch) 108 | elif decay_mode == "piecewise_decay": 109 | boundaries = params["boundaries"] 110 | decay_rate = params["decay_rate"] 111 | values = [ 112 | base_lr * decay_rate**idx 113 | for idx in range(len(boundaries) + 1) 114 | ] 115 | base_lr = fluid.layers.piecewise_decay(boundaries, values) 116 | 117 | optimizer = fluid.optimizer.Adam( 118 | learning_rate=base_lr, 119 | beta1=beta1, 120 | beta2=beta2, 121 | regularization=L2Decay(regularization_coeff=l2_decay), 122 | parameter_list=parameter_list) 123 | return optimizer 124 | 125 | 126 | def RMSProp(params, parameter_list=None): 127 | """ 128 | define optimizer function 129 | args: 130 | params(dict): the super parameters 131 | parameter_list (list): list of Variable names to update to minimize loss 132 | return: 133 | optimizer: a RMSProp optimizer instance 134 | """ 135 | base_lr = params.get("base_lr", 0.001) 136 | l2_decay = params.get("l2_decay", 0.00005) 137 | 138 | if 'decay' in params: 139 | supported_decay_mode = ["cosine_decay", "piecewise_decay"] 140 | params = params['decay'] 141 | decay_mode = params['function'] 142 | assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format( 143 | supported_decay_mode, decay_mode) 144 | 145 | if decay_mode == "cosine_decay": 146 | step_each_epoch = params['step_each_epoch'] 147 | total_epoch = params['total_epoch'] 148 | base_lr = fluid.layers.cosine_decay( 149 | learning_rate=base_lr, 150 | step_each_epoch=step_each_epoch, 151 | epochs=total_epoch) 152 | elif decay_mode == "piecewise_decay": 153 | boundaries = params["boundaries"] 154 | decay_rate = params["decay_rate"] 155 | values = [ 156 | base_lr * decay_rate**idx 157 | for idx in range(len(boundaries) + 1) 158 | ] 159 | base_lr = fluid.layers.piecewise_decay(boundaries, values) 160 | 161 | optimizer = fluid.optimizer.RMSProp( 162 | learning_rate=base_lr, 163 | regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay)) 164 | 165 | return optimizer 166 | -------------------------------------------------------------------------------- /ppocr/postprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/postprocess/__init__.py -------------------------------------------------------------------------------- /ppocr/postprocess/db_postprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | import numpy as np 23 | import string 24 | import cv2 25 | from shapely.geometry import Polygon 26 | import pyclipper 27 | from copy import deepcopy 28 | 29 | 30 | class DBPostProcess(object): 31 | """ 32 | The post process for Differentiable Binarization (DB). 33 | """ 34 | 35 | def __init__(self, params): 36 | self.thresh = params['thresh'] 37 | self.box_thresh = params['box_thresh'] 38 | self.max_candidates = params['max_candidates'] 39 | self.unclip_ratio = params['unclip_ratio'] 40 | self.min_size = 3 41 | self.dilation_kernel = np.array([[1, 1], [1, 1]]) 42 | 43 | def boxes_from_bitmap(self, pred, mask): 44 | """ 45 | Get boxes from the binarized image predicted by DB. 46 | :param pred: the binarized image predicted by DB. 47 | :param mask: new 'pred' after threshold filtering. 48 | :return: (boxes, the score of each boxes) 49 | """ 50 | dest_height, dest_width = pred.shape[-2:] 51 | bitmap = deepcopy(mask) 52 | height, width = bitmap.shape 53 | 54 | outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, 55 | cv2.CHAIN_APPROX_SIMPLE) 56 | if len(outs) == 3: 57 | img, contours, _ = outs[0], outs[1], outs[2] 58 | elif len(outs) == 2: 59 | contours, _ = outs[0], outs[1] 60 | 61 | num_contours = min(len(contours), self.max_candidates) 62 | boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) 63 | scores = np.zeros((num_contours, ), dtype=np.float32) 64 | 65 | for index in range(num_contours): 66 | contour = contours[index] 67 | points, sside = self.get_mini_boxes(contour) 68 | if sside < self.min_size: 69 | continue 70 | points = np.array(points) 71 | score = self.box_score_fast(pred, points.reshape(-1, 2)) 72 | if self.box_thresh > score: 73 | continue 74 | 75 | box = self.unclip(points).reshape(-1, 1, 2) 76 | box, sside = self.get_mini_boxes(box) 77 | if sside < self.min_size + 2: 78 | continue 79 | box = np.array(box) 80 | if not isinstance(dest_width, int): 81 | dest_width = dest_width.item() 82 | dest_height = dest_height.item() 83 | 84 | box[:, 0] = np.clip( 85 | np.round(box[:, 0] / width * dest_width), 0, dest_width) 86 | box[:, 1] = np.clip( 87 | np.round(box[:, 1] / height * dest_height), 0, dest_height) 88 | boxes[index, :, :] = box.astype(np.int16) 89 | scores[index] = score 90 | return boxes, scores 91 | 92 | def unclip(self, box): 93 | """ 94 | Shrink or expand the boxaccording to 'unclip_ratio' 95 | :param box: The predicted box. 96 | :return: uncliped box 97 | """ 98 | unclip_ratio = self.unclip_ratio 99 | poly = Polygon(box) 100 | distance = poly.area * unclip_ratio / poly.length 101 | offset = pyclipper.PyclipperOffset() 102 | offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) 103 | expanded = np.array(offset.Execute(distance)) 104 | return expanded 105 | 106 | def get_mini_boxes(self, contour): 107 | """ 108 | Get boxes from the contour or box. 109 | :param contour: The predicted contour. 110 | :return: The predicted box. 111 | """ 112 | bounding_box = cv2.minAreaRect(contour) 113 | points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) 114 | 115 | index_1, index_2, index_3, index_4 = 0, 1, 2, 3 116 | if points[1][1] > points[0][1]: 117 | index_1 = 0 118 | index_4 = 1 119 | else: 120 | index_1 = 1 121 | index_4 = 0 122 | if points[3][1] > points[2][1]: 123 | index_2 = 2 124 | index_3 = 3 125 | else: 126 | index_2 = 3 127 | index_3 = 2 128 | 129 | box = [ 130 | points[index_1], points[index_2], points[index_3], points[index_4] 131 | ] 132 | return box, min(bounding_box[1]) 133 | 134 | def box_score_fast(self, bitmap, _box): 135 | """ 136 | Calculate the score of box. 137 | :param bitmap: The binarized image predicted by DB. 138 | :param _box: The predicted box 139 | :return: score 140 | """ 141 | h, w = bitmap.shape[:2] 142 | box = _box.copy() 143 | xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) 144 | xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) 145 | ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) 146 | ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) 147 | 148 | mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) 149 | box[:, 0] = box[:, 0] - xmin 150 | box[:, 1] = box[:, 1] - ymin 151 | cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) 152 | return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] 153 | 154 | def __call__(self, outs_dict, ratio_list): 155 | pred = outs_dict['maps'] 156 | 157 | pred = pred[:, 0, :, :] 158 | segmentation = pred > self.thresh 159 | boxes_batch = [] 160 | for batch_index in range(pred.shape[0]): 161 | 162 | mask = cv2.dilate( 163 | np.array(segmentation[batch_index]).astype(np.uint8), 164 | self.dilation_kernel) 165 | tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], 166 | mask) 167 | 168 | boxes = [] 169 | for k in range(len(tmp_boxes)): 170 | if tmp_scores[k] > self.box_thresh: 171 | boxes.append(tmp_boxes[k]) 172 | if len(boxes) > 0: 173 | boxes = np.array(boxes) 174 | 175 | ratio_h, ratio_w = ratio_list[batch_index] 176 | boxes[:, :, 0] = boxes[:, :, 0] / ratio_w 177 | boxes[:, :, 1] = boxes[:, :, 1] / ratio_h 178 | 179 | boxes_batch.append(boxes) 180 | return boxes_batch 181 | -------------------------------------------------------------------------------- /ppocr/postprocess/east_postprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import numpy as np 20 | from .locality_aware_nms import nms_locality 21 | import cv2 22 | 23 | import os 24 | import sys 25 | __dir__ = os.path.dirname(os.path.abspath(__file__)) 26 | sys.path.append(__dir__) 27 | sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) 28 | 29 | 30 | class EASTPostPocess(object): 31 | """ 32 | The post process for EAST. 33 | """ 34 | 35 | def __init__(self, params): 36 | self.score_thresh = params['score_thresh'] 37 | self.cover_thresh = params['cover_thresh'] 38 | self.nms_thresh = params['nms_thresh'] 39 | 40 | # c++ la-nms is faster, but only support python 3.5 41 | self.is_python35 = False 42 | if sys.version_info.major == 3 and sys.version_info.minor == 5: 43 | self.is_python35 = True 44 | 45 | def restore_rectangle_quad(self, origin, geometry): 46 | """ 47 | Restore rectangle from quadrangle. 48 | """ 49 | # quad 50 | origin_concat = np.concatenate( 51 | (origin, origin, origin, origin), axis=1) # (n, 8) 52 | pred_quads = origin_concat - geometry 53 | pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2) 54 | return pred_quads 55 | 56 | def detect(self, 57 | score_map, 58 | geo_map, 59 | score_thresh=0.8, 60 | cover_thresh=0.1, 61 | nms_thresh=0.2): 62 | """ 63 | restore text boxes from score map and geo map 64 | """ 65 | score_map = score_map[0] 66 | geo_map = np.swapaxes(geo_map, 1, 0) 67 | geo_map = np.swapaxes(geo_map, 1, 2) 68 | # filter the score map 69 | xy_text = np.argwhere(score_map > score_thresh) 70 | if len(xy_text) == 0: 71 | return [] 72 | # sort the text boxes via the y axis 73 | xy_text = xy_text[np.argsort(xy_text[:, 0])] 74 | #restore quad proposals 75 | text_box_restored = self.restore_rectangle_quad( 76 | xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) 77 | boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) 78 | boxes[:, :8] = text_box_restored.reshape((-1, 8)) 79 | boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] 80 | if self.is_python35: 81 | import lanms 82 | boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) 83 | else: 84 | boxes = nms_locality(boxes.astype(np.float64), nms_thresh) 85 | if boxes.shape[0] == 0: 86 | return [] 87 | # Here we filter some low score boxes by the average score map, 88 | # this is different from the orginal paper. 89 | for i, box in enumerate(boxes): 90 | mask = np.zeros_like(score_map, dtype=np.uint8) 91 | cv2.fillPoly(mask, box[:8].reshape( 92 | (-1, 4, 2)).astype(np.int32) // 4, 1) 93 | boxes[i, 8] = cv2.mean(score_map, mask)[0] 94 | boxes = boxes[boxes[:, 8] > cover_thresh] 95 | return boxes 96 | 97 | def sort_poly(self, p): 98 | """ 99 | Sort polygons. 100 | """ 101 | min_axis = np.argmin(np.sum(p, axis=1)) 102 | p = p[[min_axis, (min_axis + 1) % 4,\ 103 | (min_axis + 2) % 4, (min_axis + 3) % 4]] 104 | if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): 105 | return p 106 | else: 107 | return p[[0, 3, 2, 1]] 108 | 109 | def __call__(self, outs_dict, ratio_list): 110 | score_list = outs_dict['f_score'] 111 | geo_list = outs_dict['f_geo'] 112 | img_num = len(ratio_list) 113 | dt_boxes_list = [] 114 | for ino in range(img_num): 115 | score = score_list[ino] 116 | geo = geo_list[ino] 117 | boxes = self.detect( 118 | score_map=score, 119 | geo_map=geo, 120 | score_thresh=self.score_thresh, 121 | cover_thresh=self.cover_thresh, 122 | nms_thresh=self.nms_thresh) 123 | boxes_norm = [] 124 | if len(boxes) > 0: 125 | ratio_h, ratio_w = ratio_list[ino] 126 | boxes = boxes[:, :8].reshape((-1, 4, 2)) 127 | boxes[:, :, 0] /= ratio_w 128 | boxes[:, :, 1] /= ratio_h 129 | for i_box, box in enumerate(boxes): 130 | box = self.sort_poly(box.astype(np.int32)) 131 | if np.linalg.norm(box[0] - box[1]) < 5 \ 132 | or np.linalg.norm(box[3] - box[0]) < 5: 133 | continue 134 | boxes_norm.append(box) 135 | dt_boxes_list.append(np.array(boxes_norm)) 136 | return dt_boxes_list 137 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/.gitignore: -------------------------------------------------------------------------------- 1 | adaptor.so 2 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/.ycm_extra_conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2014 Google Inc. 4 | # 5 | # This file is part of YouCompleteMe. 6 | # 7 | # YouCompleteMe is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # YouCompleteMe is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with YouCompleteMe. If not, see . 19 | 20 | import os 21 | import sys 22 | import glob 23 | import ycm_core 24 | 25 | # These are the compilation flags that will be used in case there's no 26 | # compilation database set (by default, one is not set). 27 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. 28 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 29 | 30 | 31 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 32 | 33 | from plumbum.cmd import python_config 34 | 35 | 36 | flags = [ 37 | '-Wall', 38 | '-Wextra', 39 | '-Wnon-virtual-dtor', 40 | '-Winvalid-pch', 41 | '-Wno-unused-local-typedefs', 42 | '-std=c++11', 43 | '-x', 'c++', 44 | '-Iinclude', 45 | ] + python_config('--cflags').split() 46 | 47 | 48 | # Set this to the absolute path to the folder (NOT the file!) containing the 49 | # compile_commands.json file to use that instead of 'flags'. See here for 50 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html 51 | # 52 | # Most projects will NOT need to set this to anything; you can just change the 53 | # 'flags' list of compilation flags. 54 | compilation_database_folder = '' 55 | 56 | if os.path.exists( compilation_database_folder ): 57 | database = ycm_core.CompilationDatabase( compilation_database_folder ) 58 | else: 59 | database = None 60 | 61 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ] 62 | 63 | def DirectoryOfThisScript(): 64 | return os.path.dirname( os.path.abspath( __file__ ) ) 65 | 66 | 67 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): 68 | if not working_directory: 69 | return list( flags ) 70 | new_flags = [] 71 | make_next_absolute = False 72 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] 73 | for flag in flags: 74 | new_flag = flag 75 | 76 | if make_next_absolute: 77 | make_next_absolute = False 78 | if not flag.startswith( '/' ): 79 | new_flag = os.path.join( working_directory, flag ) 80 | 81 | for path_flag in path_flags: 82 | if flag == path_flag: 83 | make_next_absolute = True 84 | break 85 | 86 | if flag.startswith( path_flag ): 87 | path = flag[ len( path_flag ): ] 88 | new_flag = path_flag + os.path.join( working_directory, path ) 89 | break 90 | 91 | if new_flag: 92 | new_flags.append( new_flag ) 93 | return new_flags 94 | 95 | 96 | def IsHeaderFile( filename ): 97 | extension = os.path.splitext( filename )[ 1 ] 98 | return extension in [ '.h', '.hxx', '.hpp', '.hh' ] 99 | 100 | 101 | def GetCompilationInfoForFile( filename ): 102 | # The compilation_commands.json file generated by CMake does not have entries 103 | # for header files. So we do our best by asking the db for flags for a 104 | # corresponding source file, if any. If one exists, the flags for that file 105 | # should be good enough. 106 | if IsHeaderFile( filename ): 107 | basename = os.path.splitext( filename )[ 0 ] 108 | for extension in SOURCE_EXTENSIONS: 109 | replacement_file = basename + extension 110 | if os.path.exists( replacement_file ): 111 | compilation_info = database.GetCompilationInfoForFile( 112 | replacement_file ) 113 | if compilation_info.compiler_flags_: 114 | return compilation_info 115 | return None 116 | return database.GetCompilationInfoForFile( filename ) 117 | 118 | 119 | # This is the entry point; this function is called by ycmd to produce flags for 120 | # a file. 121 | def FlagsForFile( filename, **kwargs ): 122 | if database: 123 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a 124 | # python list, but a "list-like" StringVec object 125 | compilation_info = GetCompilationInfoForFile( filename ) 126 | if not compilation_info: 127 | return None 128 | 129 | final_flags = MakeRelativePathsInFlagsAbsolute( 130 | compilation_info.compiler_flags_, 131 | compilation_info.compiler_working_dir_ ) 132 | else: 133 | relative_to = DirectoryOfThisScript() 134 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) 135 | 136 | return { 137 | 'flags': final_flags, 138 | 'do_cache': True 139 | } 140 | 141 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags) 2 | LDFLAGS = $(shell python3-config --ldflags) 3 | 4 | DEPS = lanms.h $(shell find include -xtype f) 5 | CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp 6 | 7 | LIB_SO = adaptor.so 8 | 9 | $(LIB_SO): $(CXX_SOURCES) $(DEPS) 10 | $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC 11 | 12 | clean: 13 | rm -rf $(LIB_SO) 14 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/__init__.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import numpy as np 4 | 5 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 6 | 7 | if subprocess.call(['make', '-C', BASE_DIR]) != 0: # return value 8 | raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR)) 9 | 10 | 11 | def merge_quadrangle_n9(polys, thres=0.3, precision=10000): 12 | from .adaptor import merge_quadrangle_n9 as nms_impl 13 | if len(polys) == 0: 14 | return np.array([], dtype='float32') 15 | p = polys.copy() 16 | p[:,:8] *= precision 17 | ret = np.array(nms_impl(p, thres), dtype='float32') 18 | ret[:,:8] /= precision 19 | return ret 20 | 21 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/__main__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | from . import merge_quadrangle_n9 5 | 6 | if __name__ == '__main__': 7 | # unit square with confidence 1 8 | q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32') 9 | 10 | print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2]))) 11 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/adaptor.cpp: -------------------------------------------------------------------------------- 1 | #include "pybind11/pybind11.h" 2 | #include "pybind11/numpy.h" 3 | #include "pybind11/stl.h" 4 | #include "pybind11/stl_bind.h" 5 | 6 | #include "lanms.h" 7 | 8 | namespace py = pybind11; 9 | 10 | 11 | namespace lanms_adaptor { 12 | 13 | std::vector> polys2floats(const std::vector &polys) { 14 | std::vector> ret; 15 | for (size_t i = 0; i < polys.size(); i ++) { 16 | auto &p = polys[i]; 17 | auto &poly = p.poly; 18 | ret.emplace_back(std::vector{ 19 | float(poly[0].X), float(poly[0].Y), 20 | float(poly[1].X), float(poly[1].Y), 21 | float(poly[2].X), float(poly[2].Y), 22 | float(poly[3].X), float(poly[3].Y), 23 | float(p.score), 24 | }); 25 | } 26 | 27 | return ret; 28 | } 29 | 30 | 31 | /** 32 | * 33 | * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the 34 | * quadrangle, and the last one is the score 35 | * \param iou_threshold two quadrangles with iou score above this threshold 36 | * will be merged 37 | * 38 | * \return an n-by-9 numpy array, the merged quadrangles 39 | */ 40 | std::vector> merge_quadrangle_n9( 41 | py::array_t quad_n9, 42 | float iou_threshold) { 43 | auto pbuf = quad_n9.request(); 44 | if (pbuf.ndim != 2 || pbuf.shape[1] != 9) 45 | throw std::runtime_error("quadrangles must have a shape of (n, 9)"); 46 | auto n = pbuf.shape[0]; 47 | auto ptr = static_cast(pbuf.ptr); 48 | return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold)); 49 | } 50 | 51 | } 52 | 53 | PYBIND11_PLUGIN(adaptor) { 54 | py::module m("adaptor", "NMS"); 55 | 56 | m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9, 57 | "merge quadrangels"); 58 | 59 | return m.ptr(); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/clipper/clipper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/postprocess/lanms/include/clipper/clipper.cpp -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/pybind11/buffer_info.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/buffer_info.h: Python buffer object interface 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "common.h" 13 | 14 | NAMESPACE_BEGIN(pybind11) 15 | 16 | /// Information record describing a Python buffer object 17 | struct buffer_info { 18 | void *ptr = nullptr; // Pointer to the underlying storage 19 | ssize_t itemsize = 0; // Size of individual items in bytes 20 | ssize_t size = 0; // Total number of entries 21 | std::string format; // For homogeneous buffers, this should be set to format_descriptor::format() 22 | ssize_t ndim = 0; // Number of dimensions 23 | std::vector shape; // Shape of the tensor (1 entry per dimension) 24 | std::vector strides; // Number of entries between adjacent entries (for each per dimension) 25 | 26 | buffer_info() { } 27 | 28 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, 29 | detail::any_container shape_in, detail::any_container strides_in) 30 | : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), 31 | shape(std::move(shape_in)), strides(std::move(strides_in)) { 32 | if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) 33 | pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); 34 | for (size_t i = 0; i < (size_t) ndim; ++i) 35 | size *= shape[i]; 36 | } 37 | 38 | template 39 | buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in) 40 | : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in)) { } 41 | 42 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size) 43 | : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { } 44 | 45 | template 46 | buffer_info(T *ptr, ssize_t size) 47 | : buffer_info(ptr, sizeof(T), format_descriptor::format(), size) { } 48 | 49 | explicit buffer_info(Py_buffer *view, bool ownview = true) 50 | : buffer_info(view->buf, view->itemsize, view->format, view->ndim, 51 | {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) { 52 | this->view = view; 53 | this->ownview = ownview; 54 | } 55 | 56 | buffer_info(const buffer_info &) = delete; 57 | buffer_info& operator=(const buffer_info &) = delete; 58 | 59 | buffer_info(buffer_info &&other) { 60 | (*this) = std::move(other); 61 | } 62 | 63 | buffer_info& operator=(buffer_info &&rhs) { 64 | ptr = rhs.ptr; 65 | itemsize = rhs.itemsize; 66 | size = rhs.size; 67 | format = std::move(rhs.format); 68 | ndim = rhs.ndim; 69 | shape = std::move(rhs.shape); 70 | strides = std::move(rhs.strides); 71 | std::swap(view, rhs.view); 72 | std::swap(ownview, rhs.ownview); 73 | return *this; 74 | } 75 | 76 | ~buffer_info() { 77 | if (view && ownview) { PyBuffer_Release(view); delete view; } 78 | } 79 | 80 | private: 81 | struct private_ctr_tag { }; 82 | 83 | buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, 84 | detail::any_container &&shape_in, detail::any_container &&strides_in) 85 | : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { } 86 | 87 | Py_buffer *view = nullptr; 88 | bool ownview = false; 89 | }; 90 | 91 | NAMESPACE_BEGIN(detail) 92 | 93 | template struct compare_buffer_info { 94 | static bool compare(const buffer_info& b) { 95 | return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); 96 | } 97 | }; 98 | 99 | template struct compare_buffer_info::value>> { 100 | static bool compare(const buffer_info& b) { 101 | return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value || 102 | ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) || 103 | ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n"))); 104 | } 105 | }; 106 | 107 | NAMESPACE_END(detail) 108 | NAMESPACE_END(pybind11) 109 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/pybind11/complex.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/complex.h: Complex number support 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "pybind11.h" 13 | #include 14 | 15 | /// glibc defines I as a macro which breaks things, e.g., boost template names 16 | #ifdef I 17 | # undef I 18 | #endif 19 | 20 | NAMESPACE_BEGIN(pybind11) 21 | 22 | template struct format_descriptor, detail::enable_if_t::value>> { 23 | static constexpr const char c = format_descriptor::c; 24 | static constexpr const char value[3] = { 'Z', c, '\0' }; 25 | static std::string format() { return std::string(value); } 26 | }; 27 | 28 | template constexpr const char format_descriptor< 29 | std::complex, detail::enable_if_t::value>>::value[3]; 30 | 31 | NAMESPACE_BEGIN(detail) 32 | 33 | template struct is_fmt_numeric, detail::enable_if_t::value>> { 34 | static constexpr bool value = true; 35 | static constexpr int index = is_fmt_numeric::index + 3; 36 | }; 37 | 38 | template class type_caster> { 39 | public: 40 | bool load(handle src, bool convert) { 41 | if (!src) 42 | return false; 43 | if (!convert && !PyComplex_Check(src.ptr())) 44 | return false; 45 | Py_complex result = PyComplex_AsCComplex(src.ptr()); 46 | if (result.real == -1.0 && PyErr_Occurred()) { 47 | PyErr_Clear(); 48 | return false; 49 | } 50 | value = std::complex((T) result.real, (T) result.imag); 51 | return true; 52 | } 53 | 54 | static handle cast(const std::complex &src, return_value_policy /* policy */, handle /* parent */) { 55 | return PyComplex_FromDoubles((double) src.real(), (double) src.imag()); 56 | } 57 | 58 | PYBIND11_TYPE_CASTER(std::complex, _("complex")); 59 | }; 60 | NAMESPACE_END(detail) 61 | NAMESPACE_END(pybind11) 62 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/pybind11/eval.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/exec.h: Support for evaluating Python expressions and statements 3 | from strings and files 4 | 5 | Copyright (c) 2016 Klemens Morgenstern and 6 | Wenzel Jakob 7 | 8 | All rights reserved. Use of this source code is governed by a 9 | BSD-style license that can be found in the LICENSE file. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "pybind11.h" 15 | 16 | NAMESPACE_BEGIN(pybind11) 17 | 18 | enum eval_mode { 19 | /// Evaluate a string containing an isolated expression 20 | eval_expr, 21 | 22 | /// Evaluate a string containing a single statement. Returns \c none 23 | eval_single_statement, 24 | 25 | /// Evaluate a string containing a sequence of statement. Returns \c none 26 | eval_statements 27 | }; 28 | 29 | template 30 | object eval(str expr, object global = globals(), object local = object()) { 31 | if (!local) 32 | local = global; 33 | 34 | /* PyRun_String does not accept a PyObject / encoding specifier, 35 | this seems to be the only alternative */ 36 | std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr; 37 | 38 | int start; 39 | switch (mode) { 40 | case eval_expr: start = Py_eval_input; break; 41 | case eval_single_statement: start = Py_single_input; break; 42 | case eval_statements: start = Py_file_input; break; 43 | default: pybind11_fail("invalid evaluation mode"); 44 | } 45 | 46 | PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr()); 47 | if (!result) 48 | throw error_already_set(); 49 | return reinterpret_steal(result); 50 | } 51 | 52 | template 53 | object eval(const char (&s)[N], object global = globals(), object local = object()) { 54 | /* Support raw string literals by removing common leading whitespace */ 55 | auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s)) 56 | : str(s); 57 | return eval(expr, global, local); 58 | } 59 | 60 | inline void exec(str expr, object global = globals(), object local = object()) { 61 | eval(expr, global, local); 62 | } 63 | 64 | template 65 | void exec(const char (&s)[N], object global = globals(), object local = object()) { 66 | eval(s, global, local); 67 | } 68 | 69 | template 70 | object eval_file(str fname, object global = globals(), object local = object()) { 71 | if (!local) 72 | local = global; 73 | 74 | int start; 75 | switch (mode) { 76 | case eval_expr: start = Py_eval_input; break; 77 | case eval_single_statement: start = Py_single_input; break; 78 | case eval_statements: start = Py_file_input; break; 79 | default: pybind11_fail("invalid evaluation mode"); 80 | } 81 | 82 | int closeFile = 1; 83 | std::string fname_str = (std::string) fname; 84 | #if PY_VERSION_HEX >= 0x03040000 85 | FILE *f = _Py_fopen_obj(fname.ptr(), "r"); 86 | #elif PY_VERSION_HEX >= 0x03000000 87 | FILE *f = _Py_fopen(fname.ptr(), "r"); 88 | #else 89 | /* No unicode support in open() :( */ 90 | auto fobj = reinterpret_steal(PyFile_FromString( 91 | const_cast(fname_str.c_str()), 92 | const_cast("r"))); 93 | FILE *f = nullptr; 94 | if (fobj) 95 | f = PyFile_AsFile(fobj.ptr()); 96 | closeFile = 0; 97 | #endif 98 | if (!f) { 99 | PyErr_Clear(); 100 | pybind11_fail("File \"" + fname_str + "\" could not be opened!"); 101 | } 102 | 103 | #if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION) 104 | PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(), 105 | local.ptr()); 106 | (void) closeFile; 107 | #else 108 | PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(), 109 | local.ptr(), closeFile); 110 | #endif 111 | 112 | if (!result) 113 | throw error_already_set(); 114 | return reinterpret_steal(result); 115 | } 116 | 117 | NAMESPACE_END(pybind11) 118 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/pybind11/functional.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/functional.h: std::function<> support 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "pybind11.h" 13 | #include 14 | 15 | NAMESPACE_BEGIN(pybind11) 16 | NAMESPACE_BEGIN(detail) 17 | 18 | template 19 | struct type_caster> { 20 | using type = std::function; 21 | using retval_type = conditional_t::value, void_type, Return>; 22 | using function_type = Return (*) (Args...); 23 | 24 | public: 25 | bool load(handle src, bool convert) { 26 | if (src.is_none()) { 27 | // Defer accepting None to other overloads (if we aren't in convert mode): 28 | if (!convert) return false; 29 | return true; 30 | } 31 | 32 | if (!isinstance(src)) 33 | return false; 34 | 35 | auto func = reinterpret_borrow(src); 36 | 37 | /* 38 | When passing a C++ function as an argument to another C++ 39 | function via Python, every function call would normally involve 40 | a full C++ -> Python -> C++ roundtrip, which can be prohibitive. 41 | Here, we try to at least detect the case where the function is 42 | stateless (i.e. function pointer or lambda function without 43 | captured variables), in which case the roundtrip can be avoided. 44 | */ 45 | if (auto cfunc = func.cpp_function()) { 46 | auto c = reinterpret_borrow(PyCFunction_GET_SELF(cfunc.ptr())); 47 | auto rec = (function_record *) c; 48 | 49 | if (rec && rec->is_stateless && 50 | same_type(typeid(function_type), *reinterpret_cast(rec->data[1]))) { 51 | struct capture { function_type f; }; 52 | value = ((capture *) &rec->data)->f; 53 | return true; 54 | } 55 | } 56 | 57 | value = [func](Args... args) -> Return { 58 | gil_scoped_acquire acq; 59 | object retval(func(std::forward(args)...)); 60 | /* Visual studio 2015 parser issue: need parentheses around this expression */ 61 | return (retval.template cast()); 62 | }; 63 | return true; 64 | } 65 | 66 | template 67 | static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) { 68 | if (!f_) 69 | return none().inc_ref(); 70 | 71 | auto result = f_.template target(); 72 | if (result) 73 | return cpp_function(*result, policy).release(); 74 | else 75 | return cpp_function(std::forward(f_), policy).release(); 76 | } 77 | 78 | PYBIND11_TYPE_CASTER(type, _("Callable[[") + 79 | argument_loader::arg_names() + _("], ") + 80 | make_caster::name() + 81 | _("]")); 82 | }; 83 | 84 | NAMESPACE_END(detail) 85 | NAMESPACE_END(pybind11) 86 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/pybind11/options.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/options.h: global settings that are configurable at runtime. 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "common.h" 13 | 14 | NAMESPACE_BEGIN(pybind11) 15 | 16 | class options { 17 | public: 18 | 19 | // Default RAII constructor, which leaves settings as they currently are. 20 | options() : previous_state(global_state()) {} 21 | 22 | // Class is non-copyable. 23 | options(const options&) = delete; 24 | options& operator=(const options&) = delete; 25 | 26 | // Destructor, which restores settings that were in effect before. 27 | ~options() { 28 | global_state() = previous_state; 29 | } 30 | 31 | // Setter methods (affect the global state): 32 | 33 | options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; } 34 | 35 | options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; } 36 | 37 | options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; } 38 | 39 | options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; } 40 | 41 | // Getter methods (return the global state): 42 | 43 | static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; } 44 | 45 | static bool show_function_signatures() { return global_state().show_function_signatures; } 46 | 47 | // This type is not meant to be allocated on the heap. 48 | void* operator new(size_t) = delete; 49 | 50 | private: 51 | 52 | struct state { 53 | bool show_user_defined_docstrings = true; //< Include user-supplied texts in docstrings. 54 | bool show_function_signatures = true; //< Include auto-generated function signatures in docstrings. 55 | }; 56 | 57 | static state &global_state() { 58 | static state instance; 59 | return instance; 60 | } 61 | 62 | state previous_state; 63 | }; 64 | 65 | NAMESPACE_END(pybind11) 66 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/include/pybind11/typeid.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/typeid.h: Compiler-independent access to type identifiers 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | 15 | #if defined(__GNUG__) 16 | #include 17 | #endif 18 | 19 | NAMESPACE_BEGIN(pybind11) 20 | NAMESPACE_BEGIN(detail) 21 | /// Erase all occurrences of a substring 22 | inline void erase_all(std::string &string, const std::string &search) { 23 | for (size_t pos = 0;;) { 24 | pos = string.find(search, pos); 25 | if (pos == std::string::npos) break; 26 | string.erase(pos, search.length()); 27 | } 28 | } 29 | 30 | PYBIND11_NOINLINE inline void clean_type_id(std::string &name) { 31 | #if defined(__GNUG__) 32 | int status = 0; 33 | std::unique_ptr res { 34 | abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free }; 35 | if (status == 0) 36 | name = res.get(); 37 | #else 38 | detail::erase_all(name, "class "); 39 | detail::erase_all(name, "struct "); 40 | detail::erase_all(name, "enum "); 41 | #endif 42 | detail::erase_all(name, "pybind11::"); 43 | } 44 | NAMESPACE_END(detail) 45 | 46 | /// Return a string representation of a C++ type 47 | template static std::string type_id() { 48 | std::string name(typeid(T).name()); 49 | detail::clean_type_id(name); 50 | return name; 51 | } 52 | 53 | NAMESPACE_END(pybind11) 54 | -------------------------------------------------------------------------------- /ppocr/postprocess/lanms/lanms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "clipper/clipper.hpp" 4 | 5 | // locality-aware NMS 6 | namespace lanms { 7 | 8 | namespace cl = ClipperLib; 9 | 10 | struct Polygon { 11 | cl::Path poly; 12 | float score; 13 | }; 14 | 15 | float paths_area(const ClipperLib::Paths &ps) { 16 | float area = 0; 17 | for (auto &&p: ps) 18 | area += cl::Area(p); 19 | return area; 20 | } 21 | 22 | float poly_iou(const Polygon &a, const Polygon &b) { 23 | cl::Clipper clpr; 24 | clpr.AddPath(a.poly, cl::ptSubject, true); 25 | clpr.AddPath(b.poly, cl::ptClip, true); 26 | 27 | cl::Paths inter, uni; 28 | clpr.Execute(cl::ctIntersection, inter, cl::pftEvenOdd); 29 | clpr.Execute(cl::ctUnion, uni, cl::pftEvenOdd); 30 | 31 | auto inter_area = paths_area(inter), 32 | uni_area = paths_area(uni); 33 | return std::abs(inter_area) / std::max(std::abs(uni_area), 1.0f); 34 | } 35 | 36 | bool should_merge(const Polygon &a, const Polygon &b, float iou_threshold) { 37 | return poly_iou(a, b) > iou_threshold; 38 | } 39 | 40 | /** 41 | * Incrementally merge polygons 42 | */ 43 | class PolyMerger { 44 | public: 45 | PolyMerger(): score(0), nr_polys(0) { 46 | memset(data, 0, sizeof(data)); 47 | } 48 | 49 | /** 50 | * Add a new polygon to be merged. 51 | */ 52 | void add(const Polygon &p_given) { 53 | Polygon p; 54 | if (nr_polys > 0) { 55 | // vertices of two polygons to merge may not in the same order; 56 | // we match their vertices by choosing the ordering that 57 | // minimizes the total squared distance. 58 | // see function normalize_poly for details. 59 | p = normalize_poly(get(), p_given); 60 | } else { 61 | p = p_given; 62 | } 63 | assert(p.poly.size() == 4); 64 | auto &poly = p.poly; 65 | auto s = p.score; 66 | data[0] += poly[0].X * s; 67 | data[1] += poly[0].Y * s; 68 | 69 | data[2] += poly[1].X * s; 70 | data[3] += poly[1].Y * s; 71 | 72 | data[4] += poly[2].X * s; 73 | data[5] += poly[2].Y * s; 74 | 75 | data[6] += poly[3].X * s; 76 | data[7] += poly[3].Y * s; 77 | 78 | score += p.score; 79 | 80 | nr_polys += 1; 81 | } 82 | 83 | inline std::int64_t sqr(std::int64_t x) { return x * x; } 84 | 85 | Polygon normalize_poly( 86 | const Polygon &ref, 87 | const Polygon &p) { 88 | 89 | std::int64_t min_d = std::numeric_limits::max(); 90 | size_t best_start = 0, best_order = 0; 91 | 92 | for (size_t start = 0; start < 4; start ++) { 93 | size_t j = start; 94 | std::int64_t d = ( 95 | sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 0) % 4].X) 96 | + sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 0) % 4].Y) 97 | + sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 1) % 4].X) 98 | + sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 1) % 4].Y) 99 | + sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 2) % 4].X) 100 | + sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 2) % 4].Y) 101 | + sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 3) % 4].X) 102 | + sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 3) % 4].Y) 103 | ); 104 | if (d < min_d) { 105 | min_d = d; 106 | best_start = start; 107 | best_order = 0; 108 | } 109 | 110 | d = ( 111 | sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 3) % 4].X) 112 | + sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 3) % 4].Y) 113 | + sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 2) % 4].X) 114 | + sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 2) % 4].Y) 115 | + sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 1) % 4].X) 116 | + sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 1) % 4].Y) 117 | + sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 0) % 4].X) 118 | + sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 0) % 4].Y) 119 | ); 120 | if (d < min_d) { 121 | min_d = d; 122 | best_start = start; 123 | best_order = 1; 124 | } 125 | } 126 | 127 | Polygon r; 128 | r.poly.resize(4); 129 | auto j = best_start; 130 | if (best_order == 0) { 131 | for (size_t i = 0; i < 4; i ++) 132 | r.poly[i] = p.poly[(j + i) % 4]; 133 | } else { 134 | for (size_t i = 0; i < 4; i ++) 135 | r.poly[i] = p.poly[(j + 4 - i - 1) % 4]; 136 | } 137 | r.score = p.score; 138 | return r; 139 | } 140 | 141 | Polygon get() const { 142 | Polygon p; 143 | 144 | auto &poly = p.poly; 145 | poly.resize(4); 146 | auto score_inv = 1.0f / std::max(1e-8f, score); 147 | poly[0].X = data[0] * score_inv; 148 | poly[0].Y = data[1] * score_inv; 149 | poly[1].X = data[2] * score_inv; 150 | poly[1].Y = data[3] * score_inv; 151 | poly[2].X = data[4] * score_inv; 152 | poly[2].Y = data[5] * score_inv; 153 | poly[3].X = data[6] * score_inv; 154 | poly[3].Y = data[7] * score_inv; 155 | 156 | assert(score > 0); 157 | p.score = score; 158 | 159 | return p; 160 | } 161 | 162 | private: 163 | std::int64_t data[8]; 164 | float score; 165 | std::int32_t nr_polys; 166 | }; 167 | 168 | 169 | /** 170 | * The standard NMS algorithm. 171 | */ 172 | std::vector standard_nms(std::vector &polys, float iou_threshold) { 173 | size_t n = polys.size(); 174 | if (n == 0) 175 | return {}; 176 | std::vector indices(n); 177 | std::iota(std::begin(indices), std::end(indices), 0); 178 | std::sort(std::begin(indices), std::end(indices), [&](size_t i, size_t j) { return polys[i].score > polys[j].score; }); 179 | 180 | std::vector keep; 181 | while (indices.size()) { 182 | size_t p = 0, cur = indices[0]; 183 | keep.emplace_back(cur); 184 | for (size_t i = 1; i < indices.size(); i ++) { 185 | if (!should_merge(polys[cur], polys[indices[i]], iou_threshold)) { 186 | indices[p ++] = indices[i]; 187 | } 188 | } 189 | indices.resize(p); 190 | } 191 | 192 | std::vector ret; 193 | for (auto &&i: keep) { 194 | ret.emplace_back(polys[i]); 195 | } 196 | return ret; 197 | } 198 | 199 | std::vector 200 | merge_quadrangle_n9(const float *data, size_t n, float iou_threshold) { 201 | using cInt = cl::cInt; 202 | 203 | // first pass 204 | std::vector polys; 205 | for (size_t i = 0; i < n; i ++) { 206 | auto p = data + i * 9; 207 | Polygon poly{ 208 | { 209 | {cInt(p[0]), cInt(p[1])}, 210 | {cInt(p[2]), cInt(p[3])}, 211 | {cInt(p[4]), cInt(p[5])}, 212 | {cInt(p[6]), cInt(p[7])}, 213 | }, 214 | p[8], 215 | }; 216 | 217 | if (polys.size()) { 218 | // merge with the last one 219 | auto &bpoly = polys.back(); 220 | if (should_merge(poly, bpoly, iou_threshold)) { 221 | PolyMerger merger; 222 | merger.add(bpoly); 223 | merger.add(poly); 224 | bpoly = merger.get(); 225 | } else { 226 | polys.emplace_back(poly); 227 | } 228 | } else { 229 | polys.emplace_back(poly); 230 | } 231 | } 232 | return standard_nms(polys, iou_threshold); 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /ppocr/postprocess/locality_aware_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Locality aware nms. 3 | """ 4 | 5 | import numpy as np 6 | from shapely.geometry import Polygon 7 | 8 | 9 | def intersection(g, p): 10 | """ 11 | Intersection. 12 | """ 13 | g = Polygon(g[:8].reshape((4, 2))) 14 | p = Polygon(p[:8].reshape((4, 2))) 15 | g = g.buffer(0) 16 | p = p.buffer(0) 17 | if not g.is_valid or not p.is_valid: 18 | return 0 19 | inter = Polygon(g).intersection(Polygon(p)).area 20 | union = g.area + p.area - inter 21 | if union == 0: 22 | return 0 23 | else: 24 | return inter / union 25 | 26 | 27 | def intersection_iog(g, p): 28 | """ 29 | Intersection_iog. 30 | """ 31 | g = Polygon(g[:8].reshape((4, 2))) 32 | p = Polygon(p[:8].reshape((4, 2))) 33 | if not g.is_valid or not p.is_valid: 34 | return 0 35 | inter = Polygon(g).intersection(Polygon(p)).area 36 | #union = g.area + p.area - inter 37 | union = p.area 38 | if union == 0: 39 | print("p_area is very small") 40 | return 0 41 | else: 42 | return inter / union 43 | 44 | 45 | def weighted_merge(g, p): 46 | """ 47 | Weighted merge. 48 | """ 49 | g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8]) 50 | g[8] = (g[8] + p[8]) 51 | return g 52 | 53 | 54 | def standard_nms(S, thres): 55 | """ 56 | Standard nms. 57 | """ 58 | order = np.argsort(S[:, 8])[::-1] 59 | keep = [] 60 | while order.size > 0: 61 | i = order[0] 62 | keep.append(i) 63 | ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) 64 | 65 | inds = np.where(ovr <= thres)[0] 66 | order = order[inds + 1] 67 | 68 | return S[keep] 69 | 70 | 71 | def standard_nms_inds(S, thres): 72 | """ 73 | Standard nms, retun inds. 74 | """ 75 | order = np.argsort(S[:, 8])[::-1] 76 | keep = [] 77 | while order.size > 0: 78 | i = order[0] 79 | keep.append(i) 80 | ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) 81 | 82 | inds = np.where(ovr <= thres)[0] 83 | order = order[inds + 1] 84 | 85 | return keep 86 | 87 | 88 | def nms(S, thres): 89 | """ 90 | nms. 91 | """ 92 | order = np.argsort(S[:, 8])[::-1] 93 | keep = [] 94 | while order.size > 0: 95 | i = order[0] 96 | keep.append(i) 97 | ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) 98 | 99 | inds = np.where(ovr <= thres)[0] 100 | order = order[inds + 1] 101 | 102 | return keep 103 | 104 | 105 | def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2): 106 | """ 107 | soft_nms 108 | :para boxes_in, N x 9 (coords + score) 109 | :para threshould, eliminate cases min score(0.001) 110 | :para Nt_thres, iou_threshi 111 | :para sigma, gaussian weght 112 | :method, linear or gaussian 113 | """ 114 | boxes = boxes_in.copy() 115 | N = boxes.shape[0] 116 | if N is None or N < 1: 117 | return np.array([]) 118 | pos, maxpos = 0, 0 119 | weight = 0.0 120 | inds = np.arange(N) 121 | tbox, sbox = boxes[0].copy(), boxes[0].copy() 122 | for i in range(N): 123 | maxscore = boxes[i, 8] 124 | maxpos = i 125 | tbox = boxes[i].copy() 126 | ti = inds[i] 127 | pos = i + 1 128 | #get max box 129 | while pos < N: 130 | if maxscore < boxes[pos, 8]: 131 | maxscore = boxes[pos, 8] 132 | maxpos = pos 133 | pos = pos + 1 134 | #add max box as a detection 135 | boxes[i, :] = boxes[maxpos, :] 136 | inds[i] = inds[maxpos] 137 | #swap 138 | boxes[maxpos, :] = tbox 139 | inds[maxpos] = ti 140 | tbox = boxes[i].copy() 141 | pos = i + 1 142 | #NMS iteration 143 | while pos < N: 144 | sbox = boxes[pos].copy() 145 | ts_iou_val = intersection(tbox, sbox) 146 | if ts_iou_val > 0: 147 | if method == 1: 148 | if ts_iou_val > Nt_thres: 149 | weight = 1 - ts_iou_val 150 | else: 151 | weight = 1 152 | elif method == 2: 153 | weight = np.exp(-1.0 * ts_iou_val**2 / sigma) 154 | else: 155 | if ts_iou_val > Nt_thres: 156 | weight = 0 157 | else: 158 | weight = 1 159 | boxes[pos, 8] = weight * boxes[pos, 8] 160 | #if box score falls below thresold, discard the box by 161 | #swaping last box update N 162 | if boxes[pos, 8] < threshold: 163 | boxes[pos, :] = boxes[N - 1, :] 164 | inds[pos] = inds[N - 1] 165 | N = N - 1 166 | pos = pos - 1 167 | pos = pos + 1 168 | 169 | return boxes[:N] 170 | 171 | 172 | def nms_locality(polys, thres=0.3): 173 | """ 174 | locality aware nms of EAST 175 | :param polys: a N*9 numpy array. first 8 coordinates, then prob 176 | :return: boxes after nms 177 | """ 178 | S = [] 179 | p = None 180 | for g in polys: 181 | if p is not None and intersection(g, p) > thres: 182 | p = weighted_merge(g, p) 183 | else: 184 | if p is not None: 185 | S.append(p) 186 | p = g 187 | if p is not None: 188 | S.append(p) 189 | 190 | if len(S) == 0: 191 | return np.array([]) 192 | return standard_nms(np.array(S), thres) 193 | 194 | 195 | if __name__ == '__main__': 196 | # 343,350,448,135,474,143,369,359 197 | print( 198 | Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]])) 199 | .area) 200 | -------------------------------------------------------------------------------- /ppocr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppocr/utils/check.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | from __future__ import unicode_literals 19 | 20 | import sys 21 | 22 | import paddle.fluid as fluid 23 | 24 | import logging 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | def check_config_params(config, config_name, params): 29 | for param in params: 30 | if param not in config: 31 | err = "param %s didn't find in %s!" % (param, config_name) 32 | assert False, err 33 | return 34 | -------------------------------------------------------------------------------- /ppocr/utils/corpus/readme.md: -------------------------------------------------------------------------------- 1 | # Waiting for your contribution 2 | 3 | PaddleOCR welcomes you to provide multilingual corpus for us to synthesize more data to optimize the model. 4 | 5 | If you are interested, you can submit the corpus text to this directory and name it with {language}_corpus.txt. 6 | PaddleOCR thanks for your contribution. -------------------------------------------------------------------------------- /ppocr/utils/corpus/readme_ch.md: -------------------------------------------------------------------------------- 1 | # 欢迎贡献语料 2 | 3 | PaddleOCR非常欢迎你提供多语言的语料,以供我们合成更多数据来优化模型。 4 | 5 | 如你感兴趣,可将语料文本提交到此目录,并以 {语言}_corpus.txt 命名,PaddleOCR团队感谢你的贡献。 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /ppocr/utils/dict/french_dict.txt: -------------------------------------------------------------------------------- 1 | ! 2 | " 3 | % 4 | & 5 | ' 6 | ( 7 | ) 8 | + 9 | , 10 | - 11 | . 12 | / 13 | 0 14 | 1 15 | 2 16 | 3 17 | 4 18 | 5 19 | 6 20 | 7 21 | 8 22 | 9 23 | : 24 | ; 25 | ? 26 | A 27 | B 28 | C 29 | D 30 | E 31 | F 32 | G 33 | H 34 | I 35 | J 36 | K 37 | L 38 | M 39 | N 40 | O 41 | P 42 | Q 43 | R 44 | S 45 | T 46 | U 47 | V 48 | W 49 | X 50 | Y 51 | Z 52 | [ 53 | ] 54 | a 55 | b 56 | c 57 | d 58 | e 59 | f 60 | g 61 | h 62 | i 63 | j 64 | k 65 | l 66 | m 67 | n 68 | o 69 | p 70 | q 71 | r 72 | s 73 | t 74 | u 75 | v 76 | w 77 | x 78 | y 79 | z 80 | « 81 | ³ 82 | µ 83 | º 84 | » 85 | À 86 | Á 87 |  88 | Å 89 | É 90 | Ê 91 | Î 92 | Ö 93 | ß 94 | à 95 | á 96 | â 97 | ä 98 | å 99 | æ 100 | ç 101 | è 102 | é 103 | ê 104 | ë 105 | í 106 | î 107 | ï 108 | ñ 109 | ò 110 | ó 111 | ô 112 | ö 113 | ø 114 | ù 115 | ú 116 | û 117 | ü 118 | 119 | -------------------------------------------------------------------------------- /ppocr/utils/dict/german_dict.txt: -------------------------------------------------------------------------------- 1 | ! 2 | " 3 | $ 4 | % 5 | & 6 | ' 7 | ( 8 | ) 9 | + 10 | , 11 | - 12 | . 13 | / 14 | 0 15 | 1 16 | 2 17 | 3 18 | 4 19 | 5 20 | 6 21 | 7 22 | 8 23 | 9 24 | : 25 | ; 26 | > 27 | ? 28 | A 29 | B 30 | C 31 | D 32 | E 33 | F 34 | G 35 | H 36 | I 37 | J 38 | K 39 | L 40 | M 41 | N 42 | O 43 | P 44 | Q 45 | R 46 | S 47 | T 48 | U 49 | V 50 | W 51 | X 52 | Y 53 | Z 54 | [ 55 | ] 56 | a 57 | b 58 | c 59 | d 60 | e 61 | f 62 | g 63 | h 64 | i 65 | j 66 | k 67 | l 68 | m 69 | n 70 | o 71 | p 72 | q 73 | r 74 | s 75 | t 76 | u 77 | v 78 | w 79 | x 80 | y 81 | z 82 | £ 83 | § 84 | ­ 85 | ² 86 | ´ 87 | µ 88 | · 89 | º 90 | ¼ 91 | ½ 92 | ¿ 93 | À 94 | Á 95 | Ä 96 | Å 97 | Ç 98 | É 99 | Í 100 | Ï 101 | Ô 102 | Ö 103 | Ø 104 | Ù 105 | Ü 106 | ß 107 | à 108 | á 109 | â 110 | ã 111 | ä 112 | å 113 | æ 114 | ç 115 | è 116 | é 117 | ê 118 | ë 119 | í 120 | ï 121 | ñ 122 | ò 123 | ó 124 | ô 125 | ö 126 | ø 127 | ù 128 | ú 129 | û 130 | ü 131 | 132 | -------------------------------------------------------------------------------- /ppocr/utils/dict/occitan_dict.txt: -------------------------------------------------------------------------------- 1 | ! 2 | " 3 | % 4 | & 5 | ' 6 | ( 7 | ) 8 | + 9 | , 10 | - 11 | . 12 | / 13 | 0 14 | 1 15 | 2 16 | 3 17 | 4 18 | 5 19 | 6 20 | 7 21 | 8 22 | 9 23 | : 24 | ; 25 | ? 26 | [ 27 | ] 28 | « 29 | ³ 30 | µ 31 | º 32 | » 33 | A 34 | Á 35 | À 36 | B 37 | C 38 | Ç 39 | D 40 | E 41 | É 42 | È 43 | F 44 | G 45 | H 46 | I 47 | Í 48 | Ï 49 | J 50 | K 51 | L 52 | M 53 | N 54 | O 55 | Ó 56 | Ò 57 | P 58 | Q 59 | R 60 | S 61 | T 62 | U 63 | V 64 | W 65 | X 66 | Y 67 | Z 68 | a 69 | á 70 | à 71 | b 72 | c 73 | d 74 | e 75 | é 76 | è 77 | f 78 | g 79 | h 80 | i 81 | í 82 | ï 83 | j 84 | k 85 | l 86 | m 87 | n 88 | o 89 | ó 90 | ò 91 | p 92 | q 93 | r 94 | s 95 | t 96 | u 97 | ú 98 | ü 99 | v 100 | w 101 | x 102 | y 103 | z 104 | ç 105 | æ 106 | Æ 107 | ê 108 | Ê 109 | ë 110 | Ë 111 | ñ 112 | Ñ 113 | ô 114 | Ô 115 | œ 116 | Œ 117 | ù 118 | Ù 119 | -------------------------------------------------------------------------------- /ppocr/utils/ic15_dict.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | a 12 | b 13 | c 14 | d 15 | e 16 | f 17 | g 18 | h 19 | i 20 | j 21 | k 22 | l 23 | m 24 | n 25 | o 26 | p 27 | q 28 | r 29 | s 30 | t 31 | u 32 | v 33 | w 34 | x 35 | y 36 | z 37 | A 38 | B 39 | C 40 | D 41 | E 42 | F 43 | G 44 | H 45 | I 46 | J 47 | K 48 | L 49 | M 50 | N 51 | O 52 | P 53 | Q 54 | R 55 | S 56 | T 57 | U 58 | V 59 | W 60 | X 61 | Y 62 | Z 63 | 64 | -------------------------------------------------------------------------------- /ppocr/utils/save_load.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import errno 20 | import os 21 | import shutil 22 | import tempfile 23 | 24 | import paddle.fluid as fluid 25 | 26 | from .utility import initial_logger 27 | import re 28 | logger = initial_logger() 29 | 30 | 31 | def _mkdir_if_not_exist(path): 32 | """ 33 | mkdir if not exists, ignore the exception when multiprocess mkdir together 34 | """ 35 | if not os.path.exists(path): 36 | try: 37 | os.makedirs(path) 38 | except OSError as e: 39 | if e.errno == errno.EEXIST and os.path.isdir(path): 40 | logger.warning( 41 | 'be happy if some process has already created {}'.format( 42 | path)) 43 | else: 44 | raise OSError('Failed to mkdir {}'.format(path)) 45 | 46 | 47 | def _load_state(path): 48 | if os.path.exists(path + '.pdopt'): 49 | # XXX another hack to ignore the optimizer state 50 | tmp = tempfile.mkdtemp() 51 | dst = os.path.join(tmp, os.path.basename(os.path.normpath(path))) 52 | shutil.copy(path + '.pdparams', dst + '.pdparams') 53 | state = fluid.io.load_program_state(dst) 54 | shutil.rmtree(tmp) 55 | else: 56 | state = fluid.io.load_program_state(path) 57 | return state 58 | 59 | 60 | def load_params(exe, prog, path, ignore_params=[]): 61 | """ 62 | Load model from the given path. 63 | Args: 64 | exe (fluid.Executor): The fluid.Executor object. 65 | prog (fluid.Program): load weight to which Program object. 66 | path (string): URL string or loca model path. 67 | ignore_params (list): ignore variable to load when finetuning. 68 | It can be specified by finetune_exclude_pretrained_params 69 | and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md 70 | """ 71 | if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): 72 | raise ValueError("Model pretrain path {} does not " 73 | "exists.".format(path)) 74 | 75 | logger.info('Loading parameters from {}...'.format(path)) 76 | 77 | ignore_set = set() 78 | state = _load_state(path) 79 | 80 | # ignore the parameter which mismatch the shape 81 | # between the model and pretrain weight. 82 | all_var_shape = {} 83 | for block in prog.blocks: 84 | for param in block.all_parameters(): 85 | all_var_shape[param.name] = param.shape 86 | ignore_set.update([ 87 | name for name, shape in all_var_shape.items() 88 | if name in state and shape != state[name].shape 89 | ]) 90 | 91 | if ignore_params: 92 | all_var_names = [var.name for var in prog.list_vars()] 93 | ignore_list = filter( 94 | lambda var: any([re.match(name, var) for name in ignore_params]), 95 | all_var_names) 96 | ignore_set.update(list(ignore_list)) 97 | 98 | if len(ignore_set) > 0: 99 | for k in ignore_set: 100 | if k in state: 101 | logger.warning('variable {} not used'.format(k)) 102 | del state[k] 103 | fluid.io.set_program_state(prog, state) 104 | 105 | 106 | def init_model(config, program, exe): 107 | """ 108 | load model from checkpoint or pretrained_model 109 | """ 110 | checkpoints = config['Global'].get('checkpoints') 111 | if checkpoints: 112 | if os.path.exists(checkpoints + '.pdparams'): 113 | path = checkpoints 114 | fluid.load(program, path, exe) 115 | logger.info("Finish initing model from {}".format(path)) 116 | else: 117 | raise ValueError("Model checkpoints {} does not exists," 118 | "check if you lost the file prefix.".format( 119 | checkpoints + '.pdparams')) 120 | else: 121 | pretrain_weights = config['Global'].get('pretrain_weights') 122 | if pretrain_weights: 123 | path = pretrain_weights 124 | load_params(exe, program, path) 125 | logger.info("Finish initing model from {}".format(path)) 126 | 127 | 128 | def save_model(program, model_path): 129 | """ 130 | save model to the target path 131 | """ 132 | fluid.save(program, model_path) 133 | logger.info("Already save model in {}".format(model_path)) 134 | -------------------------------------------------------------------------------- /ppocr/utils/stats.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections 16 | import numpy as np 17 | import datetime 18 | 19 | __all__ = ['TrainingStats', 'Time'] 20 | 21 | 22 | class SmoothedValue(object): 23 | """Track a series of values and provide access to smoothed values over a 24 | window or the global series average. 25 | """ 26 | 27 | def __init__(self, window_size): 28 | self.deque = collections.deque(maxlen=window_size) 29 | 30 | def add_value(self, value): 31 | self.deque.append(value) 32 | 33 | def get_median_value(self): 34 | return np.median(self.deque) 35 | 36 | 37 | def Time(): 38 | return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') 39 | 40 | 41 | class TrainingStats(object): 42 | def __init__(self, window_size, stats_keys): 43 | self.smoothed_losses_and_metrics = { 44 | key: SmoothedValue(window_size) 45 | for key in stats_keys 46 | } 47 | 48 | def update(self, stats): 49 | for k, v in self.smoothed_losses_and_metrics.items(): 50 | v.add_value(stats[k]) 51 | 52 | def get(self, extras=None): 53 | stats = collections.OrderedDict() 54 | if extras: 55 | for k, v in extras.items(): 56 | stats[k] = v 57 | for k, v in self.smoothed_losses_and_metrics.items(): 58 | stats[k] = round(v.get_median_value(), 6) 59 | 60 | return stats 61 | 62 | def log(self, extras=None): 63 | d = self.get(extras) 64 | strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items()) 65 | return strs 66 | -------------------------------------------------------------------------------- /ppocr/utils/utility.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import imghdr 4 | import cv2 5 | import paddle 6 | from paddle import fluid 7 | import importlib 8 | 9 | 10 | def initial_logger(): 11 | FORMAT = '%(asctime)s-%(levelname)s: %(message)s' 12 | logging.basicConfig(level=logging.INFO, format=FORMAT) 13 | logger = logging.getLogger(__name__) 14 | return logger 15 | 16 | 17 | def create_module(module_str): 18 | tmpss = module_str.split(",") 19 | assert len(tmpss) == 2, "Error formate\ 20 | of the module path: {}".format(module_str) 21 | module_name, function_name = tmpss[0], tmpss[1] 22 | somemodule = importlib.import_module(module_name, __package__) 23 | function = getattr(somemodule, function_name) 24 | return function 25 | 26 | 27 | def get_check_global_params(mode): 28 | check_params = ['use_gpu', 'max_text_length', 'image_shape', \ 29 | 'image_shape', 'character_type', 'loss_type'] 30 | if mode == "train_eval": 31 | check_params = check_params + [ \ 32 | 'train_batch_size_per_card', 'test_batch_size_per_card'] 33 | elif mode == "test": 34 | check_params = check_params + ['test_batch_size_per_card'] 35 | return check_params 36 | 37 | 38 | def get_check_reader_params(mode): 39 | check_params = [] 40 | if mode == "train_eval": 41 | check_params = ['TrainReader', 'EvalReader'] 42 | elif mode == "test": 43 | check_params = ['TestReader'] 44 | return check_params 45 | 46 | 47 | def get_image_file_list(img_file): 48 | imgs_lists = [] 49 | if img_file is None or not os.path.exists(img_file): 50 | raise Exception("not found any img file in {}".format(img_file)) 51 | 52 | img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'} 53 | if os.path.isfile(img_file) and imghdr.what(img_file) in img_end: 54 | imgs_lists.append(img_file) 55 | elif os.path.isdir(img_file): 56 | for single_file in os.listdir(img_file): 57 | file_path = os.path.join(img_file, single_file) 58 | if imghdr.what(file_path) in img_end: 59 | imgs_lists.append(file_path) 60 | if len(imgs_lists) == 0: 61 | raise Exception("not found any img file in {}".format(img_file)) 62 | return imgs_lists 63 | 64 | 65 | def check_and_read_gif(img_path): 66 | if os.path.basename(img_path)[-3:] in ['gif', 'GIF']: 67 | gif = cv2.VideoCapture(img_path) 68 | ret, frame = gif.read() 69 | if not ret: 70 | logging.info("Cannot read {}. This gif image maybe corrupted.") 71 | return None, False 72 | if len(frame.shape) == 2 or frame.shape[-1] == 1: 73 | frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) 74 | imgvalue = frame[:, :, ::-1] 75 | return imgvalue, True 76 | return None, False 77 | 78 | 79 | def create_multi_devices_program(program, loss_var_name): 80 | build_strategy = fluid.BuildStrategy() 81 | build_strategy.memory_optimize = False 82 | build_strategy.enable_inplace = True 83 | exec_strategy = fluid.ExecutionStrategy() 84 | exec_strategy.num_iteration_per_drop_scope = 1 85 | compile_program = fluid.CompiledProgram(program).with_data_parallel( 86 | loss_name=loss_var_name, 87 | build_strategy=build_strategy, 88 | exec_strategy=exec_strategy) 89 | return compile_program 90 | 91 | 92 | def enable_static_mode(): 93 | try: 94 | paddle.enable_static() 95 | except: 96 | pass 97 | -------------------------------------------------------------------------------- /prod_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #wget https://pkg-config.freedesktop.org/releases/pkg-config-0.29.2.tar.gz 4 | #pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 5 | 6 | 7 | # 一个检测模型, 后面并联多个不同语言识别模型 8 | reco_language="ch,japan,en,korean,ch_h,french,german" 9 | port="8811" 10 | gpu=0 11 | 12 | ps aux |grep "ocr_server_${port}_${reco_language}" |awk -F ' ' '{print $2}' |xargs -i kill -9 {} 13 | nohup python ocr_server.py --gpu ${gpu} --port ${port} --rec ${reco_language} >/dev/null 2>&1 & 14 | echo "查看是否启动成功: tail -f log/ocr_${port}.log" 15 | 16 | 17 | # 一个检测模型, 后面接一个语言识别模型 18 | reco_language="ch,ch_h" 19 | port="8812" 20 | ps aux |grep "ocr_server_${port}_${reco_language}" |awk -F ' ' '{print $2}' |xargs -i kill -9 {} 21 | nohup python ocr_server.py --gpu ${gpu} --port ${port} --rec ${reco_language} >/dev/null 2>&1 & 22 | echo "查看是否启动成功: tail -f log/ocr_${port}.log" 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | shapely 2 | imgaug 3 | pyclipper 4 | lmdb 5 | tqdm 6 | numpy 7 | opencv-python 8 | setproctitle 9 | paddlepaddle-gpu==2.0.0b0 10 | paddlehub 11 | tencentcloud-sdk-python 12 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | import requests 3 | import glob 4 | import time 5 | import cv2 6 | import numpy as np 7 | import shutil 8 | import os 9 | import json 10 | from PIL import Image, ImageDraw, ImageFont 11 | 12 | label_color = [[31, 0, 255], [0, 159, 255], [255, 0, 0], [0, 255, 25], [255, 0, 133], 13 | [255, 172, 0], [108, 0, 255], [0, 82, 255], [255, 0, 152], [223, 0, 255], [12, 0, 255], [0, 255, 178], 14 | [108, 255, 0], [184, 0, 255], [255, 0, 76], [146, 255, 0], [51, 0, 255], [0, 197, 255], [255, 248, 0], 15 | [255, 0, 19], [255, 0, 38], [89, 255, 0], [127, 255, 0], [255, 153, 0], [0, 255, 255]] 16 | 17 | 18 | def mkdir(path, rm=False): 19 | if os.path.exists(path): 20 | if rm: 21 | shutil.rmtree(path) 22 | os.makedirs(path) 23 | else: 24 | os.makedirs(path) 25 | 26 | 27 | def add_chinese_text(img, text, left, top, color=(0, 255, 0)): 28 | if isinstance(img, np.ndarray): 29 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 30 | draw = ImageDraw.Draw(img) 31 | draw.text((left, top), text, color, font=font_text) 32 | return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 33 | 34 | 35 | def draw_txt(img, ann, show=False): 36 | num = 0 37 | for one_ann in ann: 38 | text = one_ann["text"] 39 | conf = one_ann["confidence"] 40 | points = one_ann["text_region"] 41 | 42 | text = "{:.2f} {}".format(conf, text) 43 | # print("points num:", len(points)) 44 | 45 | color = tuple(label_color[num % len(label_color)]) 46 | points = (np.reshape(points, [-1, 2])).astype(np.int32) 47 | img = cv2.polylines(img, [points], True, color, 1) 48 | for idx, pt in enumerate(points): 49 | cv2.circle(img, (pt[0], pt[1]), 5, color, thickness=2) 50 | cv2.putText(img, str(idx), (pt[0], pt[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, color, thickness=1) 51 | img = add_chinese_text(img, text, points[0][0], points[0][1] - 20, color=color[::-1]) 52 | num += 1 53 | 54 | if show: 55 | cv2.namedWindow("result", 0) 56 | cv2.imshow("result", img) 57 | key = cv2.waitKey(0) 58 | if key == 27: 59 | exit() 60 | 61 | return img 62 | 63 | 64 | if __name__ == "__main__": 65 | port = '8811' # '8812' 66 | language = ['ENG', 'JAP', 'KOR', 'CH'] 67 | # language = ['JAP'] 68 | request_url = "http://0.0.0.0:{}/dango/algo/ocr/server".format(port) 69 | img_path = "./demo" 70 | test_num = 10 71 | 72 | img_list = glob.glob(img_path + "/*.jpg") 73 | for lang in language: 74 | font_path = "./font/{}.ttc".format(lang) 75 | if lang in ["ENG", "CH"]: 76 | font_path = "./font/CH_ENG.TTC" 77 | elif lang in ["JAP"]: 78 | font_path = "./font/japan.ttc" 79 | else: 80 | font_path = "./font/KOR.ttf" 81 | font_text = ImageFont.truetype(font_path, 20, encoding="utf-8") 82 | num = 0 83 | for i, img_p in enumerate(img_list): 84 | if lang not in img_p: 85 | continue 86 | 87 | num += 1 88 | print("{}/{} {} {}".format(i, len(img_list), lang, img_p)) 89 | f = open(img_p, 'rb') 90 | img = b64encode(f.read()) # .decode() 91 | # print(img) 92 | s1 = time.time() 93 | data = {"image": img, "language_type": lang, "user_id": "234232", "platform": "win32"} 94 | # print(img) 95 | response = requests.post(request_url, data=data).json() 96 | print(response) 97 | s2 = time.time() 98 | print("time cost:", s2 - s1) 99 | 100 | result = response['data']['result'][0] # batch result, now we only use first one 101 | img_cv = cv2.imread(img_p) 102 | img = draw_txt(img_cv, result, True) 103 | cv2.imwrite("./demo_result/"+os.path.basename(img_p), img) 104 | if num > test_num: 105 | break 106 | -------------------------------------------------------------------------------- /tools/infer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/tools/infer/__init__.py -------------------------------------------------------------------------------- /tools/infer/predict_cls.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | __dir__ = os.path.dirname(os.path.abspath(__file__)) 5 | sys.path.append(__dir__) 6 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) 7 | 8 | import tools.infer.utility as utility 9 | from ppocr.utils.utility import initial_logger 10 | 11 | logger = initial_logger() 12 | from ppocr.utils.utility import get_image_file_list, check_and_read_gif 13 | import cv2 14 | import copy 15 | import numpy as np 16 | import math 17 | import time 18 | from paddle import fluid 19 | 20 | 21 | class TextClassifier(object): 22 | def __init__(self, args): 23 | if args.use_pdserving is False: 24 | self.predictor, self.input_tensor, self.output_tensors = \ 25 | utility.create_predictor(args, mode="cls") 26 | self.use_zero_copy_run = args.use_zero_copy_run 27 | self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] 28 | self.cls_batch_num = args.rec_batch_num 29 | self.label_list = args.label_list 30 | self.cls_thresh = args.cls_thresh 31 | 32 | def resize_norm_img(self, img): 33 | imgC, imgH, imgW = self.cls_image_shape 34 | h = img.shape[0] 35 | w = img.shape[1] 36 | ratio = w / float(h) 37 | if math.ceil(imgH * ratio) > imgW: 38 | resized_w = imgW 39 | else: 40 | resized_w = int(math.ceil(imgH * ratio)) 41 | resized_image = cv2.resize(img, (resized_w, imgH)) 42 | resized_image = resized_image.astype('float32') 43 | if self.cls_image_shape[0] == 1: 44 | resized_image = resized_image / 255 45 | resized_image = resized_image[np.newaxis, :] 46 | else: 47 | resized_image = resized_image.transpose((2, 0, 1)) / 255 48 | resized_image -= 0.5 49 | resized_image /= 0.5 50 | padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) 51 | padding_im[:, :, 0:resized_w] = resized_image 52 | return padding_im 53 | 54 | def __call__(self, img_list): 55 | img_list = copy.deepcopy(img_list) 56 | img_num = len(img_list) 57 | # Calculate the aspect ratio of all text bars 58 | width_list = [] 59 | for img in img_list: 60 | width_list.append(img.shape[1] / float(img.shape[0])) 61 | # Sorting can speed up the cls process 62 | indices = np.argsort(np.array(width_list)) 63 | 64 | cls_res = [['', 0.0]] * img_num 65 | batch_num = self.cls_batch_num 66 | predict_time = 0 67 | for beg_img_no in range(0, img_num, batch_num): 68 | end_img_no = min(img_num, beg_img_no + batch_num) 69 | norm_img_batch = [] 70 | max_wh_ratio = 0 71 | for ino in range(beg_img_no, end_img_no): 72 | h, w = img_list[indices[ino]].shape[0:2] 73 | wh_ratio = w * 1.0 / h 74 | max_wh_ratio = max(max_wh_ratio, wh_ratio) 75 | for ino in range(beg_img_no, end_img_no): 76 | norm_img = self.resize_norm_img(img_list[indices[ino]]) 77 | norm_img = norm_img[np.newaxis, :] 78 | norm_img_batch.append(norm_img) 79 | norm_img_batch = np.concatenate(norm_img_batch) 80 | norm_img_batch = norm_img_batch.copy() 81 | starttime = time.time() 82 | 83 | if self.use_zero_copy_run: 84 | self.input_tensor.copy_from_cpu(norm_img_batch) 85 | self.predictor.zero_copy_run() 86 | else: 87 | norm_img_batch = fluid.core.PaddleTensor(norm_img_batch) 88 | self.predictor.run([norm_img_batch]) 89 | 90 | prob_out = self.output_tensors[0].copy_to_cpu() 91 | label_out = self.output_tensors[1].copy_to_cpu() 92 | if len(label_out.shape) != 1: 93 | prob_out, label_out = label_out, prob_out 94 | elapse = time.time() - starttime 95 | predict_time += elapse 96 | for rno in range(len(label_out)): 97 | label_idx = label_out[rno] 98 | score = prob_out[rno][label_idx] 99 | label = self.label_list[label_idx] 100 | cls_res[indices[beg_img_no + rno]] = [label, score] 101 | if '180' in label and score > self.cls_thresh: 102 | img_list[indices[beg_img_no + rno]] = cv2.rotate( 103 | img_list[indices[beg_img_no + rno]], 1) 104 | return img_list, cls_res, predict_time 105 | 106 | 107 | def main(args): 108 | image_file_list = get_image_file_list(args.image_dir) 109 | text_classifier = TextClassifier(args) 110 | valid_image_file_list = [] 111 | img_list = [] 112 | for image_file in image_file_list[:10]: 113 | img, flag = check_and_read_gif(image_file) 114 | if not flag: 115 | img = cv2.imread(image_file) 116 | if img is None: 117 | logger.info("error in loading image:{}".format(image_file)) 118 | continue 119 | valid_image_file_list.append(image_file) 120 | img_list.append(img) 121 | try: 122 | img_list, cls_res, predict_time = text_classifier(img_list) 123 | except Exception as e: 124 | print(e) 125 | exit() 126 | for ino in range(len(img_list)): 127 | print("Predicts of %s:%s" % (valid_image_file_list[ino], cls_res[ino])) 128 | print("Total predict time for %d images:%.3f" % 129 | (len(img_list), predict_time)) 130 | 131 | 132 | if __name__ == "__main__": 133 | main(utility.parse_args()) 134 | -------------------------------------------------------------------------------- /tools/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from logging.handlers import TimedRotatingFileHandler 3 | import os 4 | import time 5 | import multiprocessing 6 | 7 | lock = multiprocessing.Lock() 8 | logger_dict = {} 9 | 10 | 11 | class SafeRotatingFileHandler(TimedRotatingFileHandler): 12 | def __init__(self, filename, when='h', interval=1, backupCount=0, encoding=None, delay=False, utc=False): 13 | TimedRotatingFileHandler.__init__(self, filename, when, interval, backupCount, encoding, delay, utc) 14 | 15 | """ 16 | Override doRollover 17 | lines commanded by "##" is changed by cc 18 | """ 19 | 20 | def doRollover(self): 21 | """ 22 | do a rollover; in this case, a date/time stamp is appended to the filename 23 | when the rollover happens. However, you want the file to be named for the 24 | start of the interval, not the current time. If there is a backup count, 25 | then we have to get a list of matching filenames, sort them and remove 26 | the one with the oldest suffix. 27 | Override, 1. if dfn not exist then do rename 28 | 2. _open with "a" models 29 | """ 30 | if self.stream: 31 | self.stream.close() 32 | self.stream = None 33 | # get the time that this sequence started at and make it a TimeTuple 34 | currentTime = int(time.time()) 35 | dstNow = time.localtime(currentTime)[-1] 36 | t = self.rolloverAt - self.interval 37 | if self.utc: 38 | timeTuple = time.gmtime(t) 39 | else: 40 | timeTuple = time.localtime(t) 41 | dstThen = timeTuple[-1] 42 | if dstNow != dstThen: 43 | if dstNow: 44 | addend = 3600 45 | else: 46 | addend = -3600 47 | timeTuple = time.localtime(t + addend) 48 | dfn = self.baseFilename + "." + time.strftime(self.suffix, timeTuple) 49 | with lock: 50 | if not os.path.exists(dfn) and os.path.exists(self.baseFilename): 51 | os.rename(self.baseFilename, dfn) 52 | if self.backupCount > 0: 53 | for s in self.getFilesToDelete(): 54 | os.remove(s) 55 | if not self.delay: 56 | self.mode = "a" 57 | self.stream = self._open() 58 | newRolloverAt = self.computeRollover(currentTime) 59 | while newRolloverAt <= currentTime: 60 | newRolloverAt = newRolloverAt + self.interval 61 | # If DST changes and midnight or weekly rollover, adjust for this. 62 | if (self.when == 'MIDNIGHT' or self.when.startswith('W')) and not self.utc: 63 | dstAtRollover = time.localtime(newRolloverAt)[-1] 64 | if dstNow != dstAtRollover: 65 | if not dstNow: # DST kicks in before next rollover, so we need to deduct an hour 66 | addend = -3600 67 | else: # DST bows out before next rollover, so we need to add an hour 68 | addend = 3600 69 | newRolloverAt += addend 70 | self.rolloverAt = newRolloverAt 71 | 72 | 73 | def get_logger(log_file): 74 | if log_file in logger_dict.keys(): 75 | return logger_dict[log_file] 76 | # create log file 77 | if not os.path.exists(os.path.dirname(log_file)): 78 | os.mkdir(os.path.dirname(log_file)) 79 | if not os.path.exists(log_file): 80 | open(log_file, "a+").close() 81 | # logger 82 | logger = logging.getLogger(log_file) 83 | logger.setLevel(logging.INFO) 84 | # fhandler 85 | handler = logging.handlers.TimedRotatingFileHandler(filename=log_file, when='D', interval=1, backupCount=7) 86 | 87 | # handler = SafeRotatingFileHandler(log_file, when='midnight', interval=1, backupCount=30, encoding='utf-8') 88 | strfmt = "[%(asctime)s] %(filename)s[line:%(lineno)d] %(levelname)s %(message)s" 89 | # format 90 | formatter = logging.Formatter(strfmt) 91 | handler.setFormatter(formatter) 92 | logger.addHandler(handler) 93 | logger_dict[log_file] = logger 94 | return logger 95 | -------------------------------------------------------------------------------- /translate/Bing.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import urllib.parse 3 | from traceback import print_exc 4 | 5 | 6 | class BingTranslate(object): 7 | 8 | def __init__(self): 9 | 10 | self.url = "http://api.microsofttranslator.com/v2/ajax.svc/TranslateArray2?" 11 | 12 | def translate(self, BingLanguage, content): 13 | 14 | data = {} 15 | data['from'] = '"' + BingLanguage + '"' 16 | data['to'] = '"' + 'zh' + '"' 17 | data['texts'] = '["' 18 | data['texts'] += content 19 | data['texts'] += '"]' 20 | data['options'] = "{}" 21 | data['oncomplete'] = 'onComplete_3' 22 | data['onerror'] = 'onError_3' 23 | data['_'] = '1430745999189' 24 | 25 | try: 26 | data = urllib.parse.urlencode(data).encode('utf-8') 27 | strUrl = self.url + data.decode() + "&appId=%223DAEE5B978BA031557E739EE1E2A68CB1FAD5909%22" 28 | response = urllib.request.urlopen(strUrl) 29 | str_data = response.read().decode('utf-8') 30 | tmp, str_data = str_data.split('"TranslatedText":') 31 | translate_data = str_data[1:str_data.find('",', 1)].replace('\\"', '') 32 | 33 | except Exception: 34 | print_exc() 35 | translate_data = "Bing:我抽风啦!" 36 | 37 | return translate_data 38 | 39 | 40 | if __name__ == '__main__': 41 | content = 'そうすると、可笑しいことや変なこと、滑稽なことや正しくないこと、反対にやるべきことが见えてくるから。とにかく、何かにどっぷりはまっていると、周りのことが见えなくなってしまう。だから、时々一歩引くと物事が见えてくる。' 42 | # content = "Hooray! It's snowing! It's time to make a snowman.James runs out. He makes a big pile of snow. He puts a big snowball on top. He adds a scarf and a hat. He adds an orange for the nose. He adds coal for the eyes and buttons.In the evening, James opens the door. What does he see? The snowman is moving! James invites him in. The snowman has never been inside a house. He says hello to the cat. He plays with paper towels.A moment later, the snowman takes James's hand and goes out.They go up, up, up into the air! They are flying! What a wonderful night!The next morning, James jumps out of bed. He runs to the door.He wants to thank the snowman. But he's gone." 43 | # content = "낙성대는 ‘별이 떨어진 곳’ 이라는 뜻이다.고려시대 때 어는 날 하늘에서 가장 크고 빛나는 별 하나가 땅에 떨어졌는데 그 곳에서 명장 강감찬 장군이 태어났다.그 후부터 그 곳을 낙성대라고 불렀다." 44 | # ja en ko 45 | bing = BingTranslate() 46 | print(bing.translate('ja', content)) 47 | -------------------------------------------------------------------------------- /translate/Google.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import urllib.request 4 | import urllib.parse 5 | from requests import Session 6 | from traceback import print_exc 7 | from js2py import EvalJs 8 | import os 9 | 10 | this_file_dir = os.path.dirname(os.path.realpath(__file__)) 11 | 12 | 13 | class GoogleTranslate(): 14 | 15 | def __init__(self): 16 | 17 | self.headers = { 18 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'} 19 | self.session = Session() 20 | self.session.keep_alive = False 21 | 22 | def getTk(self, text): 23 | 24 | with open(this_file_dir + '/GoogleJS.js', encoding='utf8') as f: 25 | js_data = f.read() 26 | 27 | context = EvalJs() 28 | context.execute(js_data) 29 | tk = context.TL(text) 30 | 31 | return tk 32 | 33 | def buildUrl(self, text, tk): 34 | 35 | baseUrl = 'http://translate.google.cn/translate_a/single' 36 | baseUrl += '?client=webapp&' 37 | baseUrl += 'sl=auto&' 38 | baseUrl += 'tl=' + 'zh-CN' + '&' 39 | baseUrl += 'hl=zh-CN&' 40 | baseUrl += 'dt=at&' 41 | baseUrl += 'dt=bd&' 42 | baseUrl += 'dt=ex&' 43 | baseUrl += 'dt=ld&' 44 | baseUrl += 'dt=md&' 45 | baseUrl += 'dt=qca&' 46 | baseUrl += 'dt=rw&' 47 | baseUrl += 'dt=rm&' 48 | baseUrl += 'dt=ss&' 49 | baseUrl += 'dt=t&' 50 | baseUrl += 'ie=UTF-8&' 51 | baseUrl += 'oe=UTF-8&' 52 | baseUrl += 'clearbtn=1&' 53 | baseUrl += 'otf=1&' 54 | baseUrl += 'pc=1&' 55 | baseUrl += 'srcrom=0&' 56 | baseUrl += 'ssel=0&' 57 | baseUrl += 'tsel=0&' 58 | baseUrl += 'kc=2&' 59 | baseUrl += 'tk=' + str(tk) + '&' 60 | content = urllib.parse.quote(text) 61 | baseUrl += 'q=' + content 62 | 63 | return baseUrl 64 | 65 | def getHtml(self, session, url, headers): 66 | 67 | try: 68 | html = session.get(url, headers=headers) 69 | return html.json() 70 | except Exception: 71 | print_exc() 72 | return None 73 | 74 | def translate(self, text): 75 | 76 | tk = self.getTk(text) 77 | url = self.buildUrl(text, tk) 78 | 79 | try: 80 | result = self.getHtml(self.session, url, self.headers) 81 | 82 | if result != None: 83 | sentence = '' 84 | for i in result[0]: 85 | if i[0] != None: 86 | sentence += i[0] 87 | else: 88 | sentence = "谷歌:我抽风啦!" 89 | 90 | except Exception: 91 | print_exc() 92 | sentence = "谷歌:我抽风啦!" 93 | 94 | return sentence 95 | 96 | 97 | if __name__ == '__main__': 98 | text = "そうすると、可笑しいことや変なこと、滑稽なことや正しくないこと、反対にやるべきことが见えてくるから。とにかく、何かにどっぷりはまっていると、周りのことが见えなくなってしまう。だから、时々一歩引くと物事が见えてくる。" 99 | google = GoogleTranslate() 100 | print(google.translate(text)) 101 | -------------------------------------------------------------------------------- /translate/GoogleJS.js: -------------------------------------------------------------------------------- 1 | function TL(a) { 2 | var k = ""; 3 | var b = 406644; 4 | var b1 = 3293161072; 5 | var jd = "."; 6 | var $b = "+-a^+6"; 7 | var Zb = "+-3^+b+-f"; 8 | for (var e = [], f = 0, g = 0; g < a.length; g++) { 9 | var m = a.charCodeAt(g); 10 | 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), 11 | e[f++] = m >> 18 | 240, 12 | e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, 13 | e[f++] = m >> 6 & 63 | 128), 14 | e[f++] = m & 63 | 128) 15 | } 16 | a = b; 17 | for (f = 0; f < e.length; f++) a += e[f], 18 | a = RL(a, $b); 19 | a = RL(a, Zb); 20 | a ^= b1 || 0; 21 | 0 > a && (a = (a & 2147483647) + 2147483648); 22 | a %= 1E6; 23 | return a.toString() + jd + (a ^ b) 24 | }; 25 | function RL(a, b) { 26 | var t = "a"; 27 | var Yb = "+"; 28 | for (var c = 0; c < b.length - 2; c += 3) { 29 | var d = b.charAt(c + 2), 30 | d = d >= t ? d.charCodeAt(0) - 87 : Number(d), 31 | d = b.charAt(c + 1) == Yb ? a >>> d: a << d; 32 | a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d 33 | } 34 | return a 35 | } -------------------------------------------------------------------------------- /translate/Tencent.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | import requests 4 | from traceback import print_exc 5 | 6 | 7 | def get_filter(text): 8 | if isinstance(text, list): 9 | text = ''.join(text) 10 | text = str(text) 11 | text = text.strip() 12 | filter_list = [ 13 | '\r', '\n', '\t', '\u3000', '\xa0', '\u2002', 14 | '
', '
', ' ', ' ', ' ', '>>', '"', 15 | '展开全部', ' ' 16 | ] 17 | for fl in filter_list: 18 | text = text.replace(fl, '') 19 | return text 20 | 21 | 22 | def get_qtv_qtk(): 23 | api_url = 'https://fanyi.qq.com/' 24 | 25 | headers = { 26 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, ' 27 | 'like Gecko) Chrome/73.0.3683.86 Safari/537.36', } 28 | 29 | res = requests.get(api_url, headers=headers) 30 | data = res.text 31 | fy_guid = res.cookies.get('fy_guid') 32 | reg = re.compile(r'var qtv = "(.*?)"') 33 | qtv = reg.search(data).group(1) 34 | reg = re.compile(r'var qtk = "(.*?)"') 35 | qtk = reg.search(data).group(1) 36 | 37 | return fy_guid, qtv, qtk 38 | 39 | 40 | def getHtml(url, headers, data): 41 | try: 42 | html = requests.post(url=url, data=data, headers=headers) 43 | datas = html.json()['translate']['records'] 44 | 45 | if html != None and datas != None: 46 | trans_result = ''.join([data['targetText'] for data in datas]) 47 | 48 | except Exception: 49 | print_exc() 50 | trans_result = '网页腾讯:我抽风啦!' 51 | 52 | return trans_result 53 | 54 | 55 | class TencentTrans(object): 56 | 57 | def __init__(self): 58 | self.api_url = 'https://fanyi.qq.com/api/translate' 59 | self.headers = { 60 | 'Cookie': 'fy_guid=605ead81-f210-47eb-bd80-ac6ae5e7a2d8; ' 61 | 'qtv=ed286a053ae88763; ' 62 | 'qtk=wfMmjh3k/7Sr2xVNg/LtITgPRlnvGWBzP9a4FN0dn9PE7L5jDYiYJnW03MJLRUGHEFNCRhTfrp/V+wUj0dun1KkKNUUmS86A/wGVf6ydzhwboelTOs0hfHuF0ndtSoX+N3486tUMlm62VU4i856mqw==; ', 63 | 'Host': 'fanyi.qq.com', 64 | 'Origin': 'https://fanyi.qq.com', 65 | 'Referer': 'https://fanyi.qq.com/', 66 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, ' 67 | 'like Gecko) Chrome/73.0.3683.86 Safari/537.36', } 68 | 69 | self.fromlang = 'auto' 70 | self.tolang = 'zh' 71 | self.sessionUuid = str(int(time.time() * 1000)) 72 | 73 | self.fy_guid, self.qtv, self.qtk = get_qtv_qtk() 74 | 75 | self.headers['Cookie'] = self.headers['Cookie'].replace( 76 | '605ead81-f210-47eb-bd80-ac6ae5e7a2d8', self.fy_guid) 77 | 78 | self.headers['Cookie'] = self.headers['Cookie'].replace( 79 | 'ed286a053ae88763', self.qtv) 80 | self.headers['Cookie'] = self.headers['Cookie'].replace( 81 | 'wfMmjh3k/7Sr2xVNg/LtITgPRlnvGWBzP9a4FN0dn9PE7L5jDYiYJnW03MJLRUGHEFNCRhTfrp/V+wUj0dun1KkKNUUmS86A/wGVf6ydzhwboelTOs0hfHuF0ndtSoX+N3486tUMlm62VU4i856mqw==', 82 | self.qtk) 83 | 84 | def get_trans_result(self, text): 85 | data = { 86 | 'source': self.fromlang, 87 | 'target': self.tolang, 88 | 'sourceText': text, 89 | 'qtv': self.qtv, 90 | 'qtk': self.qtk, 91 | 'sessionUuid': self.sessionUuid 92 | } 93 | 94 | trans_result = getHtml(self.api_url, self.headers, data) 95 | 96 | return trans_result 97 | 98 | 99 | if __name__ == '__main__': 100 | Tencent = TencentTrans() 101 | text = 'そうすると、可笑しいことや変なこと、滑稽なことや正しくないこと、反対にやるべきことが见えてくるから。とにかく、何かにどっぷりはまっていると、周りのことが见えなくなってしまう。だから、时々一歩引くと物事が见えてくる。' 102 | print(Tencent.get_trans_result(text)) 103 | -------------------------------------------------------------------------------- /translate/baidufanyi.py: -------------------------------------------------------------------------------- 1 | # 面向对象 2 | # 百度翻译 -- 网页版(自动获取token,sign) 3 | import requests 4 | import js2py 5 | import json 6 | import re 7 | import os 8 | from traceback import print_exc 9 | 10 | this_file_dir = os.path.dirname(os.path.realpath(__file__)) 11 | 12 | 13 | class BaiduWeb(): 14 | """百度翻译网页版爬虫""" 15 | 16 | def __init__(self, query_str): 17 | self.session = requests.session() 18 | headers = { 19 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", 20 | } 21 | self.session.headers = headers 22 | self.baidu_url = "https://www.baidu.com/" 23 | self.root_url = "https://fanyi.baidu.com/" 24 | self.lang_url = "https://fanyi.baidu.com/langdetect" 25 | self.trans_url = "https://fanyi.baidu.com/v2transapi" 26 | self.query_str = query_str 27 | 28 | def get_token_gtk(self): 29 | 30 | '''获取token和gtk(用于合成Sign)''' 31 | 32 | self.session.get(self.root_url) 33 | resp = self.session.get(self.root_url) 34 | html_str = resp.content.decode() 35 | token = re.findall(r"token: '(.*?)'", html_str)[0] 36 | gtk = re.findall(r"window.gtk = '(.*?)'", html_str)[0] 37 | 38 | return token, gtk 39 | 40 | def generate_sign(self, gtk): 41 | 42 | """生成sign""" 43 | # 1. 准备js编译环境 44 | context = js2py.EvalJs() 45 | with open(this_file_dir + '/webtrans.js', encoding='utf8') as f: 46 | js_data = f.read() 47 | js_data = re.sub("window\[l\]", '"' + gtk + '"', js_data) 48 | # js_data = re.sub("window\[l\]", "\"{}\"".format(gtk), js_data) 49 | # print(js_data) 50 | context.execute(js_data) 51 | sign = context.e(self.query_str) 52 | 53 | return sign 54 | 55 | def lang_detect(self): 56 | 57 | '''获取语言转换类型.eg: zh-->en''' 58 | 59 | lang_resp = self.session.post(self.lang_url, data={"query": self.query_str}) 60 | lang_json_str = lang_resp.content.decode() # {"error":0,"msg":"success","lan":"zh"} 61 | lan = json.loads(lang_json_str)['lan'] 62 | to = "en" if lan == "zh" else "zh" 63 | 64 | return lan, to 65 | 66 | def parse_url(self, post_data): 67 | 68 | trans_resp = self.session.post(self.trans_url, data=post_data) 69 | trans_json_str = trans_resp.content.decode() 70 | trans_json = json.loads(trans_json_str) 71 | self.result = trans_json["trans_result"]["data"][0]["dst"] 72 | 73 | def run(self): 74 | 75 | try: 76 | """实现逻辑""" 77 | # 1.获取百度的cookie,(缺乏百度首页的cookie会始终报错998) 78 | self.session.get(self.baidu_url) 79 | # 2. 获取百度翻译的token和gtk(用于合成sign) 80 | token, gtk = self.get_token_gtk() 81 | # 3. 生成sign 82 | sign = self.generate_sign(gtk) 83 | # 4. 获取语言转换类型.eg: zh-->en 84 | lan, to = self.lang_detect() 85 | # 5. 发送请求,获取响应,输出结果 86 | post_data = { 87 | # "from": lan, 88 | "from": lan, 89 | "to": to, 90 | "query": self.query_str, 91 | "transtype": "realtime", 92 | "simple_means_flag": 3, 93 | "sign": sign, 94 | "token": token 95 | } 96 | self.parse_url(post_data) 97 | 98 | except Exception: 99 | print_exc() 100 | self.result = '网页百度:我抽风啦!' 101 | 102 | return self.result 103 | 104 | 105 | if __name__ == '__main__': 106 | webfanyi = BaiduWeb('一歩ひくと见えてくる 何かの中にどっぷり浸かっていると何がなんだか分からなくなってしまうことがある。') 107 | a = webfanyi.run() 108 | print(a) 109 | -------------------------------------------------------------------------------- /translate/webtrans.js: -------------------------------------------------------------------------------- 1 | // webtrans.js 2 | 3 | function n(r, o) { 4 | for (var t = 0; t < o.length - 2; t += 3) { 5 | var a = o.charAt(t + 2); 6 | a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), 7 | a = "+" === o.charAt(t + 1) ? r >>> a : r << a, 8 | r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a 9 | } 10 | return r 11 | } 12 | function e(r) { 13 | var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g); 14 | if (null === o) { 15 | var t = r.length; 16 | t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10)) 17 | } else { 18 | for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) 19 | "" !== e[C] && f.push.apply(f, a(e[C].split(""))), 20 | C !== h - 1 && f.push(o[C]); 21 | var g = f.length; 22 | g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join("")) 23 | } 24 | var u = void 0 25 | , 26 | // l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107); 27 | i = null; 28 | u = null !== i ? i : (i = window[l] || "") || ""; 29 | for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) { 30 | var A = r.charCodeAt(v); 31 | 128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), 32 | S[c++] = A >> 18 | 240, 33 | S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, 34 | S[c++] = A >> 6 & 63 | 128), 35 | S[c++] = 63 & A | 128) 36 | } 37 | for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) 38 | p += S[b], 39 | p = n(p, F); 40 | return p = n(p, D), 41 | p ^= s, 42 | 0 > p && (p = (2147483647 & p) + 2147483648), 43 | p %= 1e6, 44 | p.toString() + "." + (p ^ m) 45 | } --------------------------------------------------------------------------------