├── .gitignore
├── README.md
├── config.py
├── demo
    ├── 8801_win32_JAP_2020-10-30-20-04-47-887566.jpg
    ├── CH1.jpg
    ├── CH2.jpg
    ├── DangoOCR_ENG_2021-01-03_19-43-13.jpg
    ├── ENG_2.jpg
    └── KOR.jpg
├── demo_result
    ├── 8801_win32_JAP_2020-10-30-20-04-47-887566.jpg
    ├── CH1.jpg
    ├── CH2.jpg
    ├── DangoOCR_ENG_2021-01-03_19-43-13.jpg
    ├── ENG_2.jpg
    └── KOR.jpg
├── download_model.sh
├── font
    ├── CH_ENG.TTC
    ├── KOR.ttf
    └── japan.ttc
├── ocr_server.py
├── ppocr
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── cls
    │   │   ├── __init__.py
    │   │   ├── dataset_traversal.py
    │   │   └── randaugment.py
    │   ├── det
    │   │   ├── __init__.py
    │   │   ├── data_augment.py
    │   │   ├── dataset_traversal.py
    │   │   ├── db_process.py
    │   │   ├── east_process.py
    │   │   ├── make_border_map.py
    │   │   ├── make_shrink_map.py
    │   │   ├── random_crop_data.py
    │   │   └── sast_process.py
    │   ├── reader_main.py
    │   └── rec
    │   │   ├── __init__.py
    │   │   ├── dataset_traversal.py
    │   │   ├── img_tools.py
    │   │   └── text_image_aug
    │   │       ├── augment.py
    │   │       └── warp_mls.py
    ├── modeling
    │   ├── __init__.py
    │   ├── architectures
    │   │   ├── __init__.py
    │   │   ├── cls_model.py
    │   │   ├── det_model.py
    │   │   └── rec_model.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── det_mobilenet_v3.py
    │   │   ├── det_resnet_vd.py
    │   │   ├── det_resnet_vd_sast.py
    │   │   ├── rec_mobilenet_v3.py
    │   │   ├── rec_resnet_fpn.py
    │   │   └── rec_resnet_vd.py
    │   ├── common_functions.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── cls_head.py
    │   │   ├── det_db_head.py
    │   │   ├── det_east_head.py
    │   │   ├── det_sast_head.py
    │   │   ├── rec_attention_head.py
    │   │   ├── rec_ctc_head.py
    │   │   ├── rec_seq_encoder.py
    │   │   ├── rec_srn_all_head.py
    │   │   └── self_attention
    │   │   │   ├── __init__.py
    │   │   │   └── model.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── cls_loss.py
    │   │   ├── det_basic_loss.py
    │   │   ├── det_db_loss.py
    │   │   ├── det_east_loss.py
    │   │   ├── det_sast_loss.py
    │   │   ├── rec_attention_loss.py
    │   │   ├── rec_ctc_loss.py
    │   │   └── rec_srn_loss.py
    │   └── stns
    │   │   ├── __init__.py
    │   │   └── tps.py
    ├── optimizer.py
    ├── postprocess
    │   ├── __init__.py
    │   ├── db_postprocess.py
    │   ├── east_postprocess.py
    │   ├── lanms
    │   │   ├── .gitignore
    │   │   ├── .ycm_extra_conf.py
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── adaptor.cpp
    │   │   ├── include
    │   │   │   ├── clipper
    │   │   │   │   ├── clipper.cpp
    │   │   │   │   └── clipper.hpp
    │   │   │   └── pybind11
    │   │   │   │   ├── attr.h
    │   │   │   │   ├── buffer_info.h
    │   │   │   │   ├── cast.h
    │   │   │   │   ├── chrono.h
    │   │   │   │   ├── class_support.h
    │   │   │   │   ├── common.h
    │   │   │   │   ├── complex.h
    │   │   │   │   ├── descr.h
    │   │   │   │   ├── eigen.h
    │   │   │   │   ├── embed.h
    │   │   │   │   ├── eval.h
    │   │   │   │   ├── functional.h
    │   │   │   │   ├── numpy.h
    │   │   │   │   ├── operators.h
    │   │   │   │   ├── options.h
    │   │   │   │   ├── pybind11.h
    │   │   │   │   ├── pytypes.h
    │   │   │   │   ├── stl.h
    │   │   │   │   ├── stl_bind.h
    │   │   │   │   └── typeid.h
    │   │   └── lanms.h
    │   ├── locality_aware_nms.py
    │   └── sast_postprocess.py
    └── utils
    │   ├── __init__.py
    │   ├── character.py
    │   ├── check.py
    │   ├── corpus
    │       ├── occitan_corpus.txt
    │       ├── readme.md
    │       └── readme_ch.md
    │   ├── dict
    │       ├── french_dict.txt
    │       ├── german_dict.txt
    │       ├── japan_dict.txt
    │       ├── korean_dict.txt
    │       └── occitan_dict.txt
    │   ├── ic15_dict.txt
    │   ├── ppocr_keys_v1.txt
    │   ├── save_load.py
    │   ├── stats.py
    │   └── utility.py
├── predict_system.py
├── prod_deploy.sh
├── requirements.txt
├── test.py
├── tools
    ├── infer
    │   ├── __init__.py
    │   ├── predict_cls.py
    │   ├── predict_det.py
    │   ├── predict_rec.py
    │   └── utility.py
    └── logger.py
└── translate
    ├── API.py
    ├── Bing.py
    ├── Google.py
    ├── GoogleJS.js
    ├── Tencent.py
    ├── baidufanyi.py
    └── webtrans.js


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | .idea/
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Dango-OCR算法服务
 2 | 
 3 | #### 说明:
 4 | +  [DangoOCR](https://github.com/zhangming8/Dango-ocr)是一个开源的文字识别工具,通过调用的本算法服务实现文字识别。
 5 | +  本服务基于百度开源的[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)，如果要部署自己模型请在该项目中的develop分支训练。
 6 | +  相关blog [使用飞桨一步步实现多语言OCR文字识别软件](https://blog.csdn.net/u010397980/article/details/111940566)
 7 | 
 8 | #### 环境:
 9 | +  python>=3.6, paddlepaddle-gpu >= 1.8.5
10 |    
11 | #### 训练模型：
12 | +  模型包括检测模型、识别模型。训练过程可以[参考文档](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/doc/doc_ch)
13 | +  检测模型用的是DBnet, 所有的识别模型都用的是CRNN
14 |     
15 | #### 导出模型：
16 | +  训练模型导出为inference模型(导出后不必重新定义网络结构，便于部署)，[参考](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/inference.md)
17 | 
18 | #### 部署/启动服务
19 | +  修改config.py中的检测(det_model_dir)和识别模型(rec_model_dir)路径。其中不同语言的检测模型是共用的，识别模型需要单独训练。
20 | +  目前所有语言的识别都没有使用方向模型，所以所有语言的use_angle_cls=False
21 | +  执行./prod_deploy.sh启动服务。其中'reco_language'为检测模型后面并联的多个识别模型，'port'为算法服务的端口号
22 | +  启动成功会在当前目录创建'log'文件夹，里面会记录启动的日志便于调试代码
23 | 
24 | #### 测试/调用服务
25 | +  执行python test.py会读取'demo'文件夹内的图片并调用本服务，同时把结果保存在"deom_result"文件夹，如下图为几个示例
26 | 
27 | 
28 | + 结果 1
29 | <div align="center">
30 |     <img src="./demo_result/CH2.jpg" width="500">
31 | </div>
32 | 
33 | + 结果 2
34 | <div align="center">
35 |     <img src="./demo_result/CH1.jpg" width="200">
36 | </div>
37 | 
38 | + 结果 3
39 | <div align="center">
40 |     <img src="./demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg" width="500">
41 | </div>
42 | 
43 | + 结果 4
44 | <div align="center">
45 |     <img src="./demo_result/ENG_2.jpg" width="500">
46 | </div>
47 | 
48 | + 结果 5
49 | <div align="center">
50 |     <img src="./demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg" width="500">
51 | </div>
52 | 
53 | + 结果 6
54 | <div align="center">
55 |     <img src="./demo_result/KOR.jpg" width="500">
56 | </div>
57 | 
58 | #### 参考：
59 | + OCR算法参考百度PaddleOCR: https://github.com/PaddlePaddle/PaddleOCR
60 | + 本OCR服务： https://github.com/zhangming8/ocr_algo_server
61 | + 最终OCR软件： https://github.com/zhangming8/Dango-ocr
62 | 


--------------------------------------------------------------------------------
/demo/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg


--------------------------------------------------------------------------------
/demo/CH1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/CH1.jpg


--------------------------------------------------------------------------------
/demo/CH2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/CH2.jpg


--------------------------------------------------------------------------------
/demo/DangoOCR_ENG_2021-01-03_19-43-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/DangoOCR_ENG_2021-01-03_19-43-13.jpg


--------------------------------------------------------------------------------
/demo/ENG_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/ENG_2.jpg


--------------------------------------------------------------------------------
/demo/KOR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/KOR.jpg


--------------------------------------------------------------------------------
/demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg


--------------------------------------------------------------------------------
/demo_result/CH1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/CH1.jpg


--------------------------------------------------------------------------------
/demo_result/CH2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/CH2.jpg


--------------------------------------------------------------------------------
/demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg


--------------------------------------------------------------------------------
/demo_result/ENG_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/ENG_2.jpg


--------------------------------------------------------------------------------
/demo_result/KOR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/KOR.jpg


--------------------------------------------------------------------------------
/download_model.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # download text detect model
 3 | save_path="inference/det_db"
 4 | mkdir -p $save_path
 5 | cd $save_path
 6 | wget https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar
 7 | tar xvf ch_ppocr_server_v1.1_det_infer.tar
 8 | cd -
 9 | 
10 | # download text recognize model
11 | save_path="inference/rec_crnn"
12 | mkdir -p $save_path
13 | cd $save_path
14 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar
15 | wget https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar
16 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar
17 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar
18 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar
19 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar
20 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar
21 | for file in `ls *.tar`
22 | do
23 |     echo "unzip ${file}"
24 |     tar xvf ${file}
25 | done
26 | cd -
27 | echo "download done"
28 | 


--------------------------------------------------------------------------------
/font/CH_ENG.TTC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/CH_ENG.TTC


--------------------------------------------------------------------------------
/font/KOR.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/KOR.ttf


--------------------------------------------------------------------------------
/font/japan.ttc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/japan.ttc


--------------------------------------------------------------------------------
/ocr_server.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | import os
  3 | import json
  4 | import cv2
  5 | import sys
  6 | import traceback
  7 | import argparse
  8 | from flask import Flask, Response, request
  9 | import datetime
 10 | from queue import Queue
 11 | import threading
 12 | import multiprocessing
 13 | import time
 14 | import random
 15 | from setproctitle import setproctitle
 16 | 
 17 | from config import Config
 18 | import tools.logger as logger_
 19 | from tools.infer.utility import base64_to_cv2, mkdir
 20 | from predict_system import OCR
 21 | from translate.API import translate
 22 | 
 23 | app = Flask("server", static_url_path='')
 24 | app.config['PROPAGATE_EXCEPTIONS'] = True
 25 | _save_image_q = Queue(1000)
 26 | 
 27 | config = Config()
 28 | 
 29 | 
 30 | @app.route("/dango/algo/ocr/server", methods=['POST', 'GET'])
 31 | def ocr_server():
 32 |     try:
 33 |         logger.info("-" * 50)
 34 |         logger.info("端口 {} /dango/algo/ocr/server 收到请求".format(g_port))
 35 | 
 36 |         now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
 37 |         day = "-".join(now_time.split("-")[:3])
 38 |         # params = request.get_json()
 39 |         content = request.form
 40 | 
 41 |         images = content['image']
 42 |         language_type = content['language_type']
 43 |         user_id = content["user_id"]
 44 |         platform = content.get('platform', None)
 45 |         need_translate = content.get("translate", 'no')
 46 | 
 47 |         s1 = time.time()
 48 |         images_decode = [base64_to_cv2(images)]
 49 |         logger.info("收到: {}, {}, {}".format(user_id, platform, language_type))
 50 | 
 51 |         result = ocr.predict(language_type, images=images_decode)
 52 |         logger.info("识别结果为: {}, 是否需要翻译: {}".format(result, need_translate))
 53 |         save_basename = "{}/{}/{}_{}_{}_{}_{}".format(config.save_dir + "/" + g_port, day, g_port, platform, user_id,
 54 |                                                       language_type, now_time)
 55 |         _save_image_q.put([save_basename, images_decode, result])
 56 | 
 57 |         translated = False
 58 |         response_data = {'result': result, 'translated': translated}
 59 |         if need_translate == 'yes':
 60 |             logger.info("开始进行翻译...")
 61 |             s3 = time.time()
 62 |             rand_idx = random.randint(0, len(config.baidu_translate_secret_key) - 1)
 63 |             fanyi_app_id = config.baidu_translate_app_id[rand_idx]
 64 |             fanyi_secret_key = config.baidu_translate_secret_key[rand_idx]
 65 |             translate_result, translated = translate(result[0], fanyi_app_id, fanyi_secret_key, logger)
 66 |             if translated:
 67 |                 logger.info("翻译成功: {}, 结果为: {}".format(translated, translate_result))
 68 |                 response_data['translate_result'] = translate_result
 69 |                 response_data['translated'] = translated
 70 |             else:
 71 |                 logger.info("翻译失败: {}, 错误码: {}".format(translated, translate_result))
 72 |             s4 = time.time()
 73 |             logger.info("翻译耗时: {}".format(s4 - s3))
 74 | 
 75 |         s2 = time.time()
 76 |         logger.info("==>> 完成, 总耗时 {} , 开始回复: {}".format(s2 - s1, response_data))
 77 |         return Response(json.dumps({'status': 0, 'data': response_data}),
 78 |                         mimetype='application/json')
 79 | 
 80 |     except:
 81 |         e = traceback.format_exc()
 82 |         logger.info("错误")
 83 |         logger.error(e)
 84 |         return Response(json.dumps({'status': -1, 'data': 'None'}), mimetype='application/json')
 85 | 
 86 | 
 87 | def save_img():
 88 |     while True:
 89 |         try:
 90 |             save_basename, image_cv2, words_result = _save_image_q.get(block=True)
 91 |             assert len(image_cv2) == len(words_result)
 92 |             for idx, img in enumerate(image_cv2):
 93 |                 save_name = save_basename + "_" + str(idx) + ".jpg"
 94 |                 mkdir(os.path.dirname(save_name))
 95 |                 cv2.imwrite(save_name, img)
 96 |                 with open(save_name.replace(".jpg", ".txt"), "w") as f:
 97 |                     f.write(str(words_result[idx]))
 98 |                 logger.info('保存图片 {} 及 txt'.format(save_name))
 99 |         except:
100 |             e = traceback.format_exc()
101 |             logger.info(e)
102 | 
103 | 
104 | def do_work(gpu, port):
105 |     global logger, g_port, ocr
106 |     try:
107 |         os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu)
108 |         logger = logger_.get_logger("./log/ocr_{}.log".format(port))
109 |         g_port = port
110 |         logger.info("===>>> 初始化模型到gpu:{}, port: {}".format(gpu, port))
111 |         ocr = OCR(config, logger, language_list)
112 |         logger.info("==>> 启动成功")
113 |         app.run(host=config.host, port=port, threaded=True)
114 | 
115 |     except BaseException as e:
116 |         logger.error('错误,启动flask异常{}'.format(e))
117 |         logger.info(traceback.format_exc())
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     parser = argparse.ArgumentParser()
122 |     parser.add_argument('--gpu', type=str, help='gpu index: 0_1_2_3', default="0")
123 |     parser.add_argument('--port', type=str, help='server port: 8811_8812_8813', default="8811")
124 |     parser.add_argument('--det', type=str, help='detection model', default="DB")
125 |     parser.add_argument('--rec', type=str, help='recognize language model', default="ch,japan,en,korean")
126 |     args = parser.parse_args()
127 | 
128 |     setproctitle('ocr_server_{}_{}'.format(args.port, args.rec))
129 | 
130 |     ports = args.port.split("_")  # [args.port]
131 |     gpus = args.gpu.split("_")  # [args.gpu]
132 |     language_list = args.rec.replace(" ", "").split(",")
133 |     if len(gpus) == 1:
134 |         gpus = gpus * len(ports)
135 | 
136 |     gpu_num = len(gpus)
137 |     port_num = len(ports)
138 | 
139 |     if gpu_num != port_num:
140 |         print('启动失败:GPU数量 != 端口数量！')
141 |         sys.exit(1)
142 | 
143 |     threading.Thread(target=save_img, name="save img").start()
144 |     do_work(gpu=gpus[0], port=ports[0])
145 | 
146 |     # pool = multiprocessing.Pool(processes=port_num)
147 |     # for index in range(port_num):
148 |     #     pool.apply_async(do_work, (gpus[index], ports[index]))
149 |     # pool.close()
150 |     # pool.join()
151 |     # save_img()
152 | 


--------------------------------------------------------------------------------
/ppocr/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/data/cls/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/data/cls/dataset_traversal.py:
--------------------------------------------------------------------------------
  1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import sys
 17 | import math
 18 | import random
 19 | import numpy as np
 20 | import cv2
 21 | 
 22 | from ppocr.utils.utility import initial_logger
 23 | from ppocr.utils.utility import get_image_file_list
 24 | 
 25 | logger = initial_logger()
 26 | 
 27 | from ppocr.data.rec.img_tools import resize_norm_img, warp
 28 | from ppocr.data.cls.randaugment import RandAugment
 29 | 
 30 | 
 31 | def random_crop(img):
 32 |     img_h, img_w = img.shape[:2]
 33 |     if img_w > img_h * 4:
 34 |         w = random.randint(img_h * 2, img_w)
 35 |         i = random.randint(0, img_w - w)
 36 | 
 37 |         img = img[:, i:i + w, :]
 38 |     return img
 39 | 
 40 | 
 41 | class SimpleReader(object):
 42 |     def __init__(self, params):
 43 |         if params['mode'] != 'train':
 44 |             self.num_workers = 1
 45 |         else:
 46 |             self.num_workers = params['num_workers']
 47 |         if params['mode'] != 'test':
 48 |             self.img_set_dir = params['img_set_dir']
 49 |             self.label_file_path = params['label_file_path']
 50 |         self.use_gpu = params['use_gpu']
 51 |         self.image_shape = params['image_shape']
 52 |         self.mode = params['mode']
 53 |         self.infer_img = params['infer_img']
 54 |         self.use_distort = params['mode'] == 'train' and params['distort']
 55 |         self.randaug = RandAugment()
 56 |         self.label_list = params['label_list']
 57 |         if "distort" in params:
 58 |             self.use_distort = params['distort'] and params['use_gpu']
 59 |             if not params['use_gpu']:
 60 |                 logger.info(
 61 |                     "Distort operation can only support in GPU.Distort will be set to False."
 62 |                 )
 63 |         if params['mode'] == 'train':
 64 |             self.batch_size = params['train_batch_size_per_card']
 65 |             self.drop_last = True
 66 |         else:
 67 |             self.batch_size = params['test_batch_size_per_card']
 68 |             self.drop_last = False
 69 |             self.use_distort = False
 70 | 
 71 |     def __call__(self, process_id):
 72 |         if self.mode != 'train':
 73 |             process_id = 0
 74 | 
 75 |         def get_device_num():
 76 |             if self.use_gpu:
 77 |                 gpus = os.environ.get("CUDA_VISIBLE_DEVICES", "1")
 78 |                 gpu_num = len(gpus.split(','))
 79 |                 return gpu_num
 80 |             else:
 81 |                 cpu_num = os.environ.get("CPU_NUM", 1)
 82 |                 return int(cpu_num)
 83 | 
 84 |         def sample_iter_reader():
 85 |             if self.mode != 'train' and self.infer_img is not None:
 86 |                 image_file_list = get_image_file_list(self.infer_img)
 87 |                 for single_img in image_file_list:
 88 |                     img = cv2.imread(single_img)
 89 |                     if img.shape[-1] == 1 or len(list(img.shape)) == 2:
 90 |                         img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
 91 |                     norm_img = resize_norm_img(img, self.image_shape)
 92 | 
 93 |                     norm_img = norm_img[np.newaxis, :]
 94 |                     yield norm_img
 95 |             else:
 96 |                 with open(self.label_file_path, "rb") as fin:
 97 |                     label_infor_list = fin.readlines()
 98 |                 img_num = len(label_infor_list)
 99 |                 img_id_list = list(range(img_num))
100 |                 random.shuffle(img_id_list)
101 |                 if sys.platform == "win32" and self.num_workers != 1:
102 |                     print("multiprocess is not fully compatible with Windows."
103 |                           "num_workers will be 1.")
104 |                     self.num_workers = 1
105 |                 if self.batch_size * get_device_num(
106 |                 ) * self.num_workers > img_num:
107 |                     raise Exception(
108 |                         "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
109 |                         format(img_num, self.batch_size * get_device_num() *
110 |                                self.num_workers))
111 |                 for img_id in range(process_id, img_num, self.num_workers):
112 |                     label_infor = label_infor_list[img_id_list[img_id]]
113 |                     substr = label_infor.decode('utf-8').strip("\n").split("\t")
114 |                     label = self.label_list.index(substr[1])
115 | 
116 |                     img_path = self.img_set_dir + "/" + substr[0]
117 |                     img = cv2.imread(img_path)
118 |                     if img is None:
119 |                         logger.info("{} does not exist!".format(img_path))
120 |                         continue
121 |                     if img.shape[-1] == 1 or len(list(img.shape)) == 2:
122 |                         img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
123 | 
124 |                     if self.use_distort:
125 |                         img = warp(img, 10)
126 |                         img = self.randaug(img)
127 |                     norm_img = resize_norm_img(img, self.image_shape)
128 |                     norm_img = norm_img[np.newaxis, :]
129 |                     yield (norm_img, label)
130 | 
131 |         def batch_iter_reader():
132 |             batch_outs = []
133 |             for outs in sample_iter_reader():
134 |                 batch_outs.append(outs)
135 |                 if len(batch_outs) == self.batch_size:
136 |                     yield batch_outs
137 |                     batch_outs = []
138 |             if not self.drop_last:
139 |                 if len(batch_outs) != 0:
140 |                     yield batch_outs
141 | 
142 |         if self.infer_img is None:
143 |             return batch_iter_reader
144 |         return sample_iter_reader
145 | 


--------------------------------------------------------------------------------
/ppocr/data/cls/randaugment.py:
--------------------------------------------------------------------------------
  1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | from __future__ import unicode_literals
 19 | 
 20 | from PIL import Image, ImageEnhance, ImageOps
 21 | import numpy as np
 22 | import random
 23 | import six
 24 | 
 25 | 
 26 | class RawRandAugment(object):
 27 |     def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)):
 28 |         self.num_layers = num_layers
 29 |         self.magnitude = magnitude
 30 |         self.max_level = 10
 31 | 
 32 |         abso_level = self.magnitude / self.max_level
 33 |         self.level_map = {
 34 |             "shearX": 0.3 * abso_level,
 35 |             "shearY": 0.3 * abso_level,
 36 |             "translateX": 150.0 / 331 * abso_level,
 37 |             "translateY": 150.0 / 331 * abso_level,
 38 |             "rotate": 30 * abso_level,
 39 |             "color": 0.9 * abso_level,
 40 |             "posterize": int(4.0 * abso_level),
 41 |             "solarize": 256.0 * abso_level,
 42 |             "contrast": 0.9 * abso_level,
 43 |             "sharpness": 0.9 * abso_level,
 44 |             "brightness": 0.9 * abso_level,
 45 |             "autocontrast": 0,
 46 |             "equalize": 0,
 47 |             "invert": 0
 48 |         }
 49 | 
 50 |         # from https://stackoverflow.com/questions/5252170/
 51 |         # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
 52 |         def rotate_with_fill(img, magnitude):
 53 |             rot = img.convert("RGBA").rotate(magnitude)
 54 |             return Image.composite(rot,
 55 |                                    Image.new("RGBA", rot.size, (128, ) * 4),
 56 |                                    rot).convert(img.mode)
 57 | 
 58 |         rnd_ch_op = random.choice
 59 | 
 60 |         self.func = {
 61 |             "shearX": lambda img, magnitude: img.transform(
 62 |                 img.size,
 63 |                 Image.AFFINE,
 64 |                 (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0),
 65 |                 Image.BICUBIC,
 66 |                 fillcolor=fillcolor),
 67 |             "shearY": lambda img, magnitude: img.transform(
 68 |                 img.size,
 69 |                 Image.AFFINE,
 70 |                 (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0),
 71 |                 Image.BICUBIC,
 72 |                 fillcolor=fillcolor),
 73 |             "translateX": lambda img, magnitude: img.transform(
 74 |                 img.size,
 75 |                 Image.AFFINE,
 76 |                 (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0),
 77 |                 fillcolor=fillcolor),
 78 |             "translateY": lambda img, magnitude: img.transform(
 79 |                 img.size,
 80 |                 Image.AFFINE,
 81 |                 (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])),
 82 |                 fillcolor=fillcolor),
 83 |             "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
 84 |             "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(
 85 |                 1 + magnitude * rnd_ch_op([-1, 1])),
 86 |             "posterize": lambda img, magnitude:
 87 |             ImageOps.posterize(img, magnitude),
 88 |             "solarize": lambda img, magnitude:
 89 |             ImageOps.solarize(img, magnitude),
 90 |             "contrast": lambda img, magnitude:
 91 |             ImageEnhance.Contrast(img).enhance(
 92 |                 1 + magnitude * rnd_ch_op([-1, 1])),
 93 |             "sharpness": lambda img, magnitude:
 94 |             ImageEnhance.Sharpness(img).enhance(
 95 |                 1 + magnitude * rnd_ch_op([-1, 1])),
 96 |             "brightness": lambda img, magnitude:
 97 |             ImageEnhance.Brightness(img).enhance(
 98 |                 1 + magnitude * rnd_ch_op([-1, 1])),
 99 |             "autocontrast": lambda img, magnitude:
100 |             ImageOps.autocontrast(img),
101 |             "equalize": lambda img, magnitude: ImageOps.equalize(img),
102 |             "invert": lambda img, magnitude: ImageOps.invert(img)
103 |         }
104 | 
105 |     def __call__(self, img):
106 |         avaiable_op_names = list(self.level_map.keys())
107 |         for layer_num in range(self.num_layers):
108 |             op_name = np.random.choice(avaiable_op_names)
109 |             img = self.func[op_name](img, self.level_map[op_name])
110 |         return img
111 | 
112 | 
113 | class RandAugment(RawRandAugment):
114 |     """ RandAugment wrapper to auto fit different img types """
115 | 
116 |     def __init__(self, *args, **kwargs):
117 |         if six.PY2:
118 |             super(RandAugment, self).__init__(*args, **kwargs)
119 |         else:
120 |             super().__init__(*args, **kwargs)
121 | 
122 |     def __call__(self, img):
123 |         if not isinstance(img, Image.Image):
124 |             img = np.ascontiguousarray(img)
125 |             img = Image.fromarray(img)
126 | 
127 |         if six.PY2:
128 |             img = super(RandAugment, self).__call__(img)
129 |         else:
130 |             img = super().__call__(img)
131 | 
132 |         if isinstance(img, Image.Image):
133 |             img = np.asarray(img)
134 | 
135 |         return img
136 | 


--------------------------------------------------------------------------------
/ppocr/data/det/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/data/det/__init__.py


--------------------------------------------------------------------------------
/ppocr/data/det/data_augment.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import numpy as np
 9 | import random
10 | import cv2
11 | import math
12 | 
13 | import imgaug
14 | import imgaug.augmenters as iaa
15 | 
16 | 
17 | def AugmentData(data):
18 |     img = data['image']
19 |     shape = img.shape
20 | 
21 |     aug = iaa.Sequential(
22 |         [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
23 |             (0.5, 3))]).to_deterministic()
24 | 
25 |     def may_augment_annotation(aug, data, shape):
26 |         if aug is None:
27 |             return data
28 | 
29 |         line_polys = []
30 |         for poly in data['polys']:
31 |             new_poly = may_augment_poly(aug, shape, poly)
32 |             line_polys.append(new_poly)
33 |         data['polys'] = np.array(line_polys)
34 |         return data
35 | 
36 |     def may_augment_poly(aug, img_shape, poly):
37 |         keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
38 |         keypoints = aug.augment_keypoints(
39 |             [imgaug.KeypointsOnImage(
40 |                 keypoints, shape=img_shape)])[0].keypoints
41 |         poly = [(p.x, p.y) for p in keypoints]
42 |         return poly
43 | 
44 |     img_aug = aug.augment_image(img)
45 |     data['image'] = img_aug
46 |     data = may_augment_annotation(aug, data, shape)
47 |     return data
48 | 


--------------------------------------------------------------------------------
/ppocr/data/det/make_border_map.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*- 
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | np.seterr(divide='ignore', invalid='ignore')
 11 | import pyclipper
 12 | from shapely.geometry import Polygon
 13 | import sys
 14 | import warnings
 15 | warnings.simplefilter("ignore")
 16 | 
 17 | 
 18 | def draw_border_map(polygon, canvas, mask, shrink_ratio):
 19 |     polygon = np.array(polygon)
 20 |     assert polygon.ndim == 2
 21 |     assert polygon.shape[1] == 2
 22 | 
 23 |     polygon_shape = Polygon(polygon)
 24 |     if polygon_shape.area <= 0:
 25 |         return
 26 |     distance = polygon_shape.area * (
 27 |         1 - np.power(shrink_ratio, 2)) / polygon_shape.length
 28 |     subject = [tuple(l) for l in polygon]
 29 |     padding = pyclipper.PyclipperOffset()
 30 |     padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
 31 | 
 32 |     padded_polygon = np.array(padding.Execute(distance)[0])
 33 |     cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
 34 | 
 35 |     xmin = padded_polygon[:, 0].min()
 36 |     xmax = padded_polygon[:, 0].max()
 37 |     ymin = padded_polygon[:, 1].min()
 38 |     ymax = padded_polygon[:, 1].max()
 39 |     width = xmax - xmin + 1
 40 |     height = ymax - ymin + 1
 41 | 
 42 |     polygon[:, 0] = polygon[:, 0] - xmin
 43 |     polygon[:, 1] = polygon[:, 1] - ymin
 44 | 
 45 |     xs = np.broadcast_to(
 46 |         np.linspace(
 47 |             0, width - 1, num=width).reshape(1, width), (height, width))
 48 |     ys = np.broadcast_to(
 49 |         np.linspace(
 50 |             0, height - 1, num=height).reshape(height, 1), (height, width))
 51 | 
 52 |     distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
 53 |     for i in range(polygon.shape[0]):
 54 |         j = (i + 1) % polygon.shape[0]
 55 |         absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
 56 |         distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
 57 |     distance_map = distance_map.min(axis=0)
 58 | 
 59 |     xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
 60 |     xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
 61 |     ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
 62 |     ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
 63 |     canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
 64 |         1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
 65 |                          xmin_valid - xmin:xmax_valid - xmax + width],
 66 |         canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
 67 | 
 68 | 
 69 | def _distance(xs, ys, point_1, point_2):
 70 |     '''
 71 |     compute the distance from point to a line
 72 |     ys: coordinates in the first axis
 73 |     xs: coordinates in the second axis
 74 |     point_1, point_2: (x, y), the end of the line
 75 |     '''
 76 |     height, width = xs.shape[:2]
 77 |     square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
 78 |     square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
 79 |     square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
 80 |         1] - point_2[1])
 81 | 
 82 |     cosin = (square_distance - square_distance_1 - square_distance_2) / (
 83 |         2 * np.sqrt(square_distance_1 * square_distance_2))
 84 |     square_sin = 1 - np.square(cosin)
 85 |     square_sin = np.nan_to_num(square_sin)
 86 |     result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
 87 |                      square_distance)
 88 | 
 89 |     result[cosin <
 90 |            0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
 91 |                                                                        0]
 92 |     # self.extend_line(point_1, point_2, result)
 93 |     return result
 94 | 
 95 | 
 96 | def extend_line(point_1, point_2, result, shrink_ratio):
 97 |     ex_point_1 = (
 98 |         int(
 99 |             round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
100 |         int(
101 |             round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
102 |     cv2.line(
103 |         result,
104 |         tuple(ex_point_1),
105 |         tuple(point_1),
106 |         4096.0,
107 |         1,
108 |         lineType=cv2.LINE_AA,
109 |         shift=0)
110 |     ex_point_2 = (
111 |         int(
112 |             round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
113 |         int(
114 |             round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
115 |     cv2.line(
116 |         result,
117 |         tuple(ex_point_2),
118 |         tuple(point_2),
119 |         4096.0,
120 |         1,
121 |         lineType=cv2.LINE_AA,
122 |         shift=0)
123 |     return ex_point_1, ex_point_2
124 | 
125 | 
126 | def MakeBorderMap(data):
127 |     shrink_ratio = 0.4
128 |     thresh_min = 0.3
129 |     thresh_max = 0.7
130 | 
131 |     im = data['image']
132 |     text_polys = data['polys']
133 |     ignore_tags = data['ignore_tags']
134 | 
135 |     canvas = np.zeros(im.shape[:2], dtype=np.float32)
136 |     mask = np.zeros(im.shape[:2], dtype=np.float32)
137 | 
138 |     for i in range(len(text_polys)):
139 |         if ignore_tags[i]:
140 |             continue
141 |         draw_border_map(
142 |             text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
143 |     canvas = canvas * (thresh_max - thresh_min) + thresh_min
144 | 
145 |     data['threshold_map'] = canvas
146 |     data['threshold_mask'] = mask
147 |     return data
148 | 


--------------------------------------------------------------------------------
/ppocr/data/det/make_shrink_map.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import numpy as np
 9 | import cv2
10 | from shapely.geometry import Polygon
11 | import pyclipper
12 | 
13 | 
14 | def validate_polygons(polygons, ignore_tags, h, w):
15 |     '''
16 |     polygons (numpy.array, required): of shape (num_instances, num_points, 2)
17 |     '''
18 |     if len(polygons) == 0:
19 |         return polygons, ignore_tags
20 |     assert len(polygons) == len(ignore_tags)
21 |     for polygon in polygons:
22 |         polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
23 |         polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
24 | 
25 |     for i in range(len(polygons)):
26 |         area = polygon_area(polygons[i])
27 |         if abs(area) < 1:
28 |             ignore_tags[i] = True
29 |         if area > 0:
30 |             polygons[i] = polygons[i][::-1, :]
31 |     return polygons, ignore_tags
32 | 
33 | 
34 | def polygon_area(polygon):
35 |     edge = 0
36 |     for i in range(polygon.shape[0]):
37 |         next_index = (i + 1) % polygon.shape[0]
38 |         edge += (polygon[next_index, 0] - polygon[i, 0]) * (
39 |             polygon[next_index, 1] - polygon[i, 1])
40 | 
41 |     return edge / 2.
42 | 
43 | 
44 | def MakeShrinkMap(data):
45 |     min_text_size = 8
46 |     shrink_ratio = 0.4
47 | 
48 |     image = data['image']
49 |     text_polys = data['polys']
50 |     ignore_tags = data['ignore_tags']
51 | 
52 |     h, w = image.shape[:2]
53 |     text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
54 |     gt = np.zeros((h, w), dtype=np.float32)
55 |     # gt = np.zeros((1, h, w), dtype=np.float32)
56 |     mask = np.ones((h, w), dtype=np.float32)
57 |     for i in range(len(text_polys)):
58 |         polygon = text_polys[i]
59 |         height = max(polygon[:, 1]) - min(polygon[:, 1])
60 |         width = max(polygon[:, 0]) - min(polygon[:, 0])
61 |         # height = min(np.linalg.norm(polygon[0] - polygon[3]),
62 |         #             np.linalg.norm(polygon[1] - polygon[2]))
63 |         # width = min(np.linalg.norm(polygon[0] - polygon[1]),
64 |         #             np.linalg.norm(polygon[2] - polygon[3]))
65 |         if ignore_tags[i] or min(height, width) < min_text_size:
66 |             cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
67 |             ignore_tags[i] = True
68 |         else:
69 |             polygon_shape = Polygon(polygon)
70 |             distance = polygon_shape.area * (
71 |                 1 - np.power(shrink_ratio, 2)) / polygon_shape.length
72 |             subject = [tuple(l) for l in text_polys[i]]
73 |             padding = pyclipper.PyclipperOffset()
74 |             padding.AddPath(subject, pyclipper.JT_ROUND,
75 |                             pyclipper.ET_CLOSEDPOLYGON)
76 |             shrinked = padding.Execute(-distance)
77 |             if shrinked == []:
78 |                 cv2.fillPoly(mask,
79 |                              polygon.astype(np.int32)[np.newaxis, :, :], 0)
80 |                 ignore_tags[i] = True
81 |                 continue
82 |             shrinked = np.array(shrinked[0]).reshape(-1, 2)
83 |             cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
84 |             # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
85 | 
86 |     data['shrink_map'] = gt
87 |     data['shrink_mask'] = mask
88 |     return data
89 | 


--------------------------------------------------------------------------------
/ppocr/data/det/random_crop_data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | import random
 11 | 
 12 | 
 13 | def is_poly_in_rect(poly, x, y, w, h):
 14 |     poly = np.array(poly)
 15 |     if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
 16 |         return False
 17 |     if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
 18 |         return False
 19 |     return True
 20 | 
 21 | 
 22 | def is_poly_outside_rect(poly, x, y, w, h):
 23 |     poly = np.array(poly)
 24 |     if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
 25 |         return True
 26 |     if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
 27 |         return True
 28 |     return False
 29 | 
 30 | 
 31 | def split_regions(axis):
 32 |     regions = []
 33 |     min_axis = 0
 34 |     for i in range(1, axis.shape[0]):
 35 |         if axis[i] != axis[i - 1] + 1:
 36 |             region = axis[min_axis:i]
 37 |             min_axis = i
 38 |             regions.append(region)
 39 |     return regions
 40 | 
 41 | 
 42 | def random_select(axis, max_size):
 43 |     xx = np.random.choice(axis, size=2)
 44 |     xmin = np.min(xx)
 45 |     xmax = np.max(xx)
 46 |     xmin = np.clip(xmin, 0, max_size - 1)
 47 |     xmax = np.clip(xmax, 0, max_size - 1)
 48 |     return xmin, xmax
 49 | 
 50 | 
 51 | def region_wise_random_select(regions, max_size):
 52 |     selected_index = list(np.random.choice(len(regions), 2))
 53 |     selected_values = []
 54 |     for index in selected_index:
 55 |         axis = regions[index]
 56 |         xx = int(np.random.choice(axis, size=1))
 57 |         selected_values.append(xx)
 58 |     xmin = min(selected_values)
 59 |     xmax = max(selected_values)
 60 |     return xmin, xmax
 61 | 
 62 | 
 63 | def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
 64 |     h, w, _ = im.shape
 65 |     h_array = np.zeros(h, dtype=np.int32)
 66 |     w_array = np.zeros(w, dtype=np.int32)
 67 |     for points in text_polys:
 68 |         points = np.round(points, decimals=0).astype(np.int32)
 69 |         minx = np.min(points[:, 0])
 70 |         maxx = np.max(points[:, 0])
 71 |         w_array[minx:maxx] = 1
 72 |         miny = np.min(points[:, 1])
 73 |         maxy = np.max(points[:, 1])
 74 |         h_array[miny:maxy] = 1
 75 |     # ensure the cropped area not across a text
 76 |     h_axis = np.where(h_array == 0)[0]
 77 |     w_axis = np.where(w_array == 0)[0]
 78 | 
 79 |     if len(h_axis) == 0 or len(w_axis) == 0:
 80 |         return 0, 0, w, h
 81 | 
 82 |     h_regions = split_regions(h_axis)
 83 |     w_regions = split_regions(w_axis)
 84 | 
 85 |     for i in range(max_tries):
 86 |         if len(w_regions) > 1:
 87 |             xmin, xmax = region_wise_random_select(w_regions, w)
 88 |         else:
 89 |             xmin, xmax = random_select(w_axis, w)
 90 |         if len(h_regions) > 1:
 91 |             ymin, ymax = region_wise_random_select(h_regions, h)
 92 |         else:
 93 |             ymin, ymax = random_select(h_axis, h)
 94 | 
 95 |         if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
 96 |             # area too small
 97 |             continue
 98 |         num_poly_in_rect = 0
 99 |         for poly in text_polys:
100 |             if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
101 |                                         ymax - ymin):
102 |                 num_poly_in_rect += 1
103 |                 break
104 | 
105 |         if num_poly_in_rect > 0:
106 |             return xmin, ymin, xmax - xmin, ymax - ymin
107 | 
108 |     return 0, 0, w, h
109 | 
110 | 
111 | def RandomCropData(data, size):
112 |     max_tries = 10
113 |     min_crop_side_ratio = 0.1
114 |     require_original_image = False
115 |     keep_ratio = True
116 | 
117 |     im = data['image']
118 |     text_polys = data['polys']
119 |     ignore_tags = data['ignore_tags']
120 |     texts = data['texts']
121 |     all_care_polys = [
122 |         text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
123 |     ]
124 |     crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
125 |                                                min_crop_side_ratio, max_tries)
126 |     dh, dw = size
127 |     scale_w = dw / crop_w
128 |     scale_h = dh / crop_h
129 |     scale = min(scale_w, scale_h)
130 |     h = int(crop_h * scale)
131 |     w = int(crop_w * scale)
132 |     if keep_ratio:
133 |         padimg = np.zeros((dh, dw, im.shape[2]), im.dtype)
134 |         padimg[:h, :w] = cv2.resize(
135 |             im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
136 |         img = padimg
137 |     else:
138 |         img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
139 |                          (dw, dh))
140 |     text_polys_crop = []
141 |     ignore_tags_crop = []
142 |     texts_crop = []
143 |     for poly, text, tag in zip(text_polys, texts, ignore_tags):
144 |         poly = ((poly - (crop_x, crop_y)) * scale).tolist()
145 |         if not is_poly_outside_rect(poly, 0, 0, w, h):
146 |             text_polys_crop.append(poly)
147 |             ignore_tags_crop.append(tag)
148 |             texts_crop.append(text)
149 |     data['image'] = img
150 |     data['polys'] = np.array(text_polys_crop)
151 |     data['ignore_tags'] = ignore_tags_crop
152 |     data['texts'] = texts_crop
153 |     return data
154 | 


--------------------------------------------------------------------------------
/ppocr/data/reader_main.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | import os
16 | import random
17 | import numpy as np
18 | 
19 | import paddle
20 | from ppocr.utils.utility import create_module
21 | from copy import deepcopy
22 | 
23 | from .rec.img_tools import process_image
24 | import cv2
25 | 
26 | import sys
27 | import signal
28 | 
29 | 
30 | # handle terminate reader process, do not print stack frame
31 | def _reader_quit(signum, frame):
32 |     print("Reader process exit.")
33 |     sys.exit()
34 | 
35 | 
36 | def _term_group(sig_num, frame):
37 |     print('pid {} terminated, terminate group '
38 |           '{}...'.format(os.getpid(), os.getpgrp()))
39 |     os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
40 | 
41 | 
42 | signal.signal(signal.SIGTERM, _reader_quit)
43 | signal.signal(signal.SIGINT, _term_group)
44 | 
45 | 
46 | def reader_main(config=None, mode=None):
47 |     """Create a reader for trainning
48 | 
49 |     Args:
50 |         settings: arguments
51 | 
52 |     Returns:
53 |         train reader
54 |     """
55 |     assert mode in ["train", "eval", "test"],\
56 |         "Nonsupport mode:{}".format(mode)
57 |     global_params = config['Global']
58 |     if mode == "train":
59 |         params = deepcopy(config['TrainReader'])
60 |     elif mode == "eval":
61 |         params = deepcopy(config['EvalReader'])
62 |     else:
63 |         params = deepcopy(config['TestReader'])
64 |     params['mode'] = mode
65 |     params.update(global_params)
66 |     reader_function = params['reader_function']
67 |     function = create_module(reader_function)(params)
68 |     if mode == "train":
69 |         if sys.platform == "win32":
70 |             return function(0)
71 |         readers = []
72 |         num_workers = params['num_workers']
73 |         for process_id in range(num_workers):
74 |             readers.append(function(process_id))
75 |         return paddle.reader.multiprocess_reader(readers, False)
76 |     else:
77 |         return function(mode)
78 | 


--------------------------------------------------------------------------------
/ppocr/data/rec/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/data/rec/text_image_aug/augment.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: RubanSeven
  3 | # Reference: https://github.com/RubanSeven/Text-Image-Augmentation-python
  4 | 
  5 | # import cv2
  6 | import numpy as np
  7 | from .warp_mls import WarpMLS
  8 | 
  9 | 
 10 | def tia_distort(src, segment=4):
 11 |     img_h, img_w = src.shape[:2]
 12 | 
 13 |     cut = img_w // segment
 14 |     thresh = cut // 3
 15 | 
 16 |     src_pts = list()
 17 |     dst_pts = list()
 18 | 
 19 |     src_pts.append([0, 0])
 20 |     src_pts.append([img_w, 0])
 21 |     src_pts.append([img_w, img_h])
 22 |     src_pts.append([0, img_h])
 23 | 
 24 |     dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
 25 |     dst_pts.append(
 26 |         [img_w - np.random.randint(thresh), np.random.randint(thresh)])
 27 |     dst_pts.append(
 28 |         [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
 29 |     dst_pts.append(
 30 |         [np.random.randint(thresh), img_h - np.random.randint(thresh)])
 31 | 
 32 |     half_thresh = thresh * 0.5
 33 | 
 34 |     for cut_idx in np.arange(1, segment, 1):
 35 |         src_pts.append([cut * cut_idx, 0])
 36 |         src_pts.append([cut * cut_idx, img_h])
 37 |         dst_pts.append([
 38 |             cut * cut_idx + np.random.randint(thresh) - half_thresh,
 39 |             np.random.randint(thresh) - half_thresh
 40 |         ])
 41 |         dst_pts.append([
 42 |             cut * cut_idx + np.random.randint(thresh) - half_thresh,
 43 |             img_h + np.random.randint(thresh) - half_thresh
 44 |         ])
 45 | 
 46 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
 47 |     dst = trans.generate()
 48 | 
 49 |     return dst
 50 | 
 51 | 
 52 | def tia_stretch(src, segment=4):
 53 |     img_h, img_w = src.shape[:2]
 54 | 
 55 |     cut = img_w // segment
 56 |     thresh = cut * 4 // 5
 57 | 
 58 |     src_pts = list()
 59 |     dst_pts = list()
 60 | 
 61 |     src_pts.append([0, 0])
 62 |     src_pts.append([img_w, 0])
 63 |     src_pts.append([img_w, img_h])
 64 |     src_pts.append([0, img_h])
 65 | 
 66 |     dst_pts.append([0, 0])
 67 |     dst_pts.append([img_w, 0])
 68 |     dst_pts.append([img_w, img_h])
 69 |     dst_pts.append([0, img_h])
 70 | 
 71 |     half_thresh = thresh * 0.5
 72 | 
 73 |     for cut_idx in np.arange(1, segment, 1):
 74 |         move = np.random.randint(thresh) - half_thresh
 75 |         src_pts.append([cut * cut_idx, 0])
 76 |         src_pts.append([cut * cut_idx, img_h])
 77 |         dst_pts.append([cut * cut_idx + move, 0])
 78 |         dst_pts.append([cut * cut_idx + move, img_h])
 79 | 
 80 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
 81 |     dst = trans.generate()
 82 | 
 83 |     return dst
 84 | 
 85 | 
 86 | def tia_perspective(src):
 87 |     img_h, img_w = src.shape[:2]
 88 | 
 89 |     thresh = img_h // 2
 90 | 
 91 |     src_pts = list()
 92 |     dst_pts = list()
 93 | 
 94 |     src_pts.append([0, 0])
 95 |     src_pts.append([img_w, 0])
 96 |     src_pts.append([img_w, img_h])
 97 |     src_pts.append([0, img_h])
 98 | 
 99 |     dst_pts.append([0, np.random.randint(thresh)])
100 |     dst_pts.append([img_w, np.random.randint(thresh)])
101 |     dst_pts.append([img_w, img_h - np.random.randint(thresh)])
102 |     dst_pts.append([0, img_h - np.random.randint(thresh)])
103 | 
104 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
105 |     dst = trans.generate()
106 | 
107 |     return dst
108 | 


--------------------------------------------------------------------------------
/ppocr/data/rec/text_image_aug/warp_mls.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: RubanSeven
  3 | # Reference: https://github.com/RubanSeven/Text-Image-Augmentation-python
  4 | import math
  5 | import numpy as np
  6 | 
  7 | 
  8 | class WarpMLS:
  9 |     def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.):
 10 |         self.src = src
 11 |         self.src_pts = src_pts
 12 |         self.dst_pts = dst_pts
 13 |         self.pt_count = len(self.dst_pts)
 14 |         self.dst_w = dst_w
 15 |         self.dst_h = dst_h
 16 |         self.trans_ratio = trans_ratio
 17 |         self.grid_size = 100
 18 |         self.rdx = np.zeros((self.dst_h, self.dst_w))
 19 |         self.rdy = np.zeros((self.dst_h, self.dst_w))
 20 | 
 21 |     @staticmethod
 22 |     def __bilinear_interp(x, y, v11, v12, v21, v22):
 23 |         return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 *
 24 |                                                       (1 - y) + v22 * y) * x
 25 | 
 26 |     def generate(self):
 27 |         self.calc_delta()
 28 |         return self.gen_img()
 29 | 
 30 |     def calc_delta(self):
 31 |         w = np.zeros(self.pt_count, dtype=np.float32)
 32 | 
 33 |         if self.pt_count < 2:
 34 |             return
 35 | 
 36 |         i = 0
 37 |         while 1:
 38 |             if self.dst_w <= i < self.dst_w + self.grid_size - 1:
 39 |                 i = self.dst_w - 1
 40 |             elif i >= self.dst_w:
 41 |                 break
 42 | 
 43 |             j = 0
 44 |             while 1:
 45 |                 if self.dst_h <= j < self.dst_h + self.grid_size - 1:
 46 |                     j = self.dst_h - 1
 47 |                 elif j >= self.dst_h:
 48 |                     break
 49 | 
 50 |                 sw = 0
 51 |                 swp = np.zeros(2, dtype=np.float32)
 52 |                 swq = np.zeros(2, dtype=np.float32)
 53 |                 new_pt = np.zeros(2, dtype=np.float32)
 54 |                 cur_pt = np.array([i, j], dtype=np.float32)
 55 | 
 56 |                 k = 0
 57 |                 for k in range(self.pt_count):
 58 |                     if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
 59 |                         break
 60 | 
 61 |                     w[k] = 1. / (
 62 |                         (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) +
 63 |                         (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1]))
 64 | 
 65 |                     sw += w[k]
 66 |                     swp = swp + w[k] * np.array(self.dst_pts[k])
 67 |                     swq = swq + w[k] * np.array(self.src_pts[k])
 68 | 
 69 |                 if k == self.pt_count - 1:
 70 |                     pstar = 1 / sw * swp
 71 |                     qstar = 1 / sw * swq
 72 | 
 73 |                     miu_s = 0
 74 |                     for k in range(self.pt_count):
 75 |                         if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
 76 |                             continue
 77 |                         pt_i = self.dst_pts[k] - pstar
 78 |                         miu_s += w[k] * np.sum(pt_i * pt_i)
 79 | 
 80 |                     cur_pt -= pstar
 81 |                     cur_pt_j = np.array([-cur_pt[1], cur_pt[0]])
 82 | 
 83 |                     for k in range(self.pt_count):
 84 |                         if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
 85 |                             continue
 86 | 
 87 |                         pt_i = self.dst_pts[k] - pstar
 88 |                         pt_j = np.array([-pt_i[1], pt_i[0]])
 89 | 
 90 |                         tmp_pt = np.zeros(2, dtype=np.float32)
 91 |                         tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \
 92 |                                     np.sum(pt_j * cur_pt) * self.src_pts[k][1]
 93 |                         tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \
 94 |                                     np.sum(pt_j * cur_pt_j) * self.src_pts[k][1]
 95 |                         tmp_pt *= (w[k] / miu_s)
 96 |                         new_pt += tmp_pt
 97 | 
 98 |                     new_pt += qstar
 99 |                 else:
100 |                     new_pt = self.src_pts[k]
101 | 
102 |                 self.rdx[j, i] = new_pt[0] - i
103 |                 self.rdy[j, i] = new_pt[1] - j
104 | 
105 |                 j += self.grid_size
106 |             i += self.grid_size
107 | 
108 |     def gen_img(self):
109 |         src_h, src_w = self.src.shape[:2]
110 |         dst = np.zeros_like(self.src, dtype=np.float32)
111 | 
112 |         for i in np.arange(0, self.dst_h, self.grid_size):
113 |             for j in np.arange(0, self.dst_w, self.grid_size):
114 |                 ni = i + self.grid_size
115 |                 nj = j + self.grid_size
116 |                 w = h = self.grid_size
117 |                 if ni >= self.dst_h:
118 |                     ni = self.dst_h - 1
119 |                     h = ni - i + 1
120 |                 if nj >= self.dst_w:
121 |                     nj = self.dst_w - 1
122 |                     w = nj - j + 1
123 | 
124 |                 di = np.reshape(np.arange(h), (-1, 1))
125 |                 dj = np.reshape(np.arange(w), (1, -1))
126 |                 delta_x = self.__bilinear_interp(
127 |                     di / h, dj / w, self.rdx[i, j], self.rdx[i, nj],
128 |                     self.rdx[ni, j], self.rdx[ni, nj])
129 |                 delta_y = self.__bilinear_interp(
130 |                     di / h, dj / w, self.rdy[i, j], self.rdy[i, nj],
131 |                     self.rdy[ni, j], self.rdy[ni, nj])
132 |                 nx = j + dj + delta_x * self.trans_ratio
133 |                 ny = i + di + delta_y * self.trans_ratio
134 |                 nx = np.clip(nx, 0, src_w - 1)
135 |                 ny = np.clip(ny, 0, src_h - 1)
136 |                 nxi = np.array(np.floor(nx), dtype=np.int32)
137 |                 nyi = np.array(np.floor(ny), dtype=np.int32)
138 |                 nxi1 = np.array(np.ceil(nx), dtype=np.int32)
139 |                 nyi1 = np.array(np.ceil(ny), dtype=np.int32)
140 | 
141 |                 if len(self.src.shape) == 3:
142 |                     x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3))
143 |                     y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3))
144 |                 else:
145 |                     x = ny - nyi
146 |                     y = nx - nxi
147 |                 dst[i:i + h, j:j + w] = self.__bilinear_interp(
148 |                     x, y, self.src[nyi, nxi], self.src[nyi, nxi1],
149 |                     self.src[nyi1, nxi], self.src[nyi1, nxi1])
150 | 
151 |         dst = np.clip(dst, 0, 255)
152 |         dst = np.array(dst, dtype=np.uint8)
153 | 
154 |         return dst
155 | 


--------------------------------------------------------------------------------
/ppocr/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/modeling/architectures/cls_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | from paddle import fluid
20 | 
21 | from ppocr.utils.utility import create_module
22 | from ppocr.utils.utility import initial_logger
23 | 
24 | logger = initial_logger()
25 | from copy import deepcopy
26 | 
27 | 
28 | class ClsModel(object):
29 |     def __init__(self, params):
30 |         super(ClsModel, self).__init__()
31 |         global_params = params['Global']
32 |         self.infer_img = global_params['infer_img']
33 | 
34 |         backbone_params = deepcopy(params["Backbone"])
35 |         backbone_params.update(global_params)
36 |         self.backbone = create_module(backbone_params['function']) \
37 |             (params=backbone_params)
38 | 
39 |         head_params = deepcopy(params["Head"])
40 |         head_params.update(global_params)
41 |         self.head = create_module(head_params['function']) \
42 |             (params=head_params)
43 | 
44 |         loss_params = deepcopy(params["Loss"])
45 |         loss_params.update(global_params)
46 |         self.loss = create_module(loss_params['function']) \
47 |             (params=loss_params)
48 | 
49 |         self.image_shape = global_params['image_shape']
50 | 
51 |     def create_feed(self, mode):
52 |         image_shape = deepcopy(self.image_shape)
53 |         image_shape.insert(0, -1)
54 |         if mode == "train":
55 |             image = fluid.data(name='image', shape=image_shape, dtype='float32')
56 |             label = fluid.data(name='label', shape=[None, 1], dtype='int64')
57 |             feed_list = [image, label]
58 |             labels = {'label': label}
59 |             loader = fluid.io.DataLoader.from_generator(
60 |                 feed_list=feed_list,
61 |                 capacity=64,
62 |                 use_double_buffer=True,
63 |                 iterable=False)
64 |         else:
65 |             labels = None
66 |             loader = None
67 |             image = fluid.data(name='image', shape=image_shape, dtype='float32')
68 |         image.stop_gradient = False
69 |         return image, labels, loader
70 | 
71 |     def __call__(self, mode):
72 |         image, labels, loader = self.create_feed(mode)
73 |         inputs = image
74 |         conv_feas = self.backbone(inputs)
75 |         predicts = self.head(conv_feas, labels, mode)
76 |         if mode == "train":
77 |             loss = self.loss(predicts, labels)
78 |             label = labels['label']
79 |             acc = fluid.layers.accuracy(predicts['predict'], label, k=1)
80 |             outputs = {'total_loss': loss, 'decoded_out': \
81 |                 predicts['decoded_out'], 'label': label, 'acc': acc}
82 |             return loader, outputs
83 |         elif mode == "export":
84 |             return [image, predicts]
85 |         else:
86 |             return loader, predicts
87 | 


--------------------------------------------------------------------------------
/ppocr/modeling/architectures/det_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | from paddle import fluid
 20 | 
 21 | from ppocr.utils.utility import create_module
 22 | from ppocr.utils.utility import initial_logger
 23 | logger = initial_logger()
 24 | from copy import deepcopy
 25 | 
 26 | 
 27 | class DetModel(object):
 28 |     def __init__(self, params):
 29 |         """
 30 |         Detection module for OCR text detection.
 31 |         args:
 32 |             params (dict): the super parameters for detection module.
 33 |         """
 34 |         global_params = params['Global']
 35 |         self.algorithm = global_params['algorithm']
 36 | 
 37 |         backbone_params = deepcopy(params["Backbone"])
 38 |         backbone_params.update(global_params)
 39 |         self.backbone = create_module(backbone_params['function'])\
 40 |                 (params=backbone_params)
 41 | 
 42 |         head_params = deepcopy(params["Head"])
 43 |         head_params.update(global_params)
 44 |         self.head = create_module(head_params['function'])\
 45 |                 (params=head_params)
 46 | 
 47 |         loss_params = deepcopy(params["Loss"])
 48 |         loss_params.update(global_params)
 49 |         self.loss = create_module(loss_params['function'])\
 50 |                 (params=loss_params)
 51 | 
 52 |         self.image_shape = global_params['image_shape']
 53 | 
 54 |     def create_feed(self, mode):
 55 |         """
 56 |         create Dataloader feeds
 57 |         args:
 58 |             mode (str): 'train' for training  or else for evaluation
 59 |         return: (image, corresponding label, dataloader)
 60 |         """
 61 |         image_shape = deepcopy(self.image_shape)
 62 |         if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
 63 |             raise Exception("The size of the image must be divisible by 4, "
 64 |                             "received image shape is {}, please reset the "
 65 |                             "Global.image_shape in the yml file".format(
 66 |                                 image_shape))
 67 | 
 68 |         image = fluid.layers.data(
 69 |             name='image', shape=image_shape, dtype='float32')
 70 |         image.stop_gradient = False
 71 |         if mode == "train":
 72 |             if self.algorithm == "EAST":
 73 |                 h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
 74 |                 score = fluid.layers.data(
 75 |                     name='score', shape=[1, h, w], dtype='float32')
 76 |                 geo = fluid.layers.data(
 77 |                     name='geo', shape=[9, h, w], dtype='float32')
 78 |                 mask = fluid.layers.data(
 79 |                     name='mask', shape=[1, h, w], dtype='float32')
 80 |                 feed_list = [image, score, geo, mask]
 81 |                 labels = {'score': score, 'geo': geo, 'mask': mask}
 82 |             elif self.algorithm == "DB":
 83 |                 shrink_map = fluid.layers.data(
 84 |                     name='shrink_map', shape=image_shape[1:], dtype='float32')
 85 |                 shrink_mask = fluid.layers.data(
 86 |                     name='shrink_mask', shape=image_shape[1:], dtype='float32')
 87 |                 threshold_map = fluid.layers.data(
 88 |                     name='threshold_map',
 89 |                     shape=image_shape[1:],
 90 |                     dtype='float32')
 91 |                 threshold_mask = fluid.layers.data(
 92 |                     name='threshold_mask',
 93 |                     shape=image_shape[1:],
 94 |                     dtype='float32')
 95 |                 feed_list=[image, shrink_map, shrink_mask,\
 96 |                     threshold_map, threshold_mask]
 97 |                 labels = {'shrink_map':shrink_map,\
 98 |                     'shrink_mask':shrink_mask,\
 99 |                     'threshold_map':threshold_map,\
100 |                     'threshold_mask':threshold_mask}
101 |             elif self.algorithm == "SAST":
102 |                 input_score = fluid.layers.data(
103 |                     name='score', shape=[1, 128, 128], dtype='float32')
104 |                 input_border = fluid.layers.data(
105 |                     name='border', shape=[5, 128, 128], dtype='float32')
106 |                 input_mask = fluid.layers.data(
107 |                     name='mask', shape=[1, 128, 128], dtype='float32')
108 |                 input_tvo = fluid.layers.data(
109 |                     name='tvo', shape=[9, 128, 128], dtype='float32')
110 |                 input_tco = fluid.layers.data(
111 |                     name='tco', shape=[3, 128, 128], dtype='float32')
112 |                 feed_list = [
113 |                     image, input_score, input_border, input_mask, input_tvo,
114 |                     input_tco
115 |                 ]
116 |                 labels = {'input_score': input_score,\
117 |                     'input_border': input_border,\
118 |                     'input_mask': input_mask,\
119 |                     'input_tvo': input_tvo,\
120 |                     'input_tco': input_tco}
121 |             loader = fluid.io.DataLoader.from_generator(
122 |                 feed_list=feed_list,
123 |                 capacity=64,
124 |                 use_double_buffer=True,
125 |                 iterable=False)
126 |         else:
127 |             labels = None
128 |             loader = None
129 |         return image, labels, loader
130 | 
131 |     def __call__(self, mode):
132 |         """
133 |         run forward of defined module
134 |         args:
135 |             mode (str): 'train' for training; 'export'  for inference,
136 |                 others for evaluation]
137 |         """
138 |         image, labels, loader = self.create_feed(mode)
139 |         conv_feas = self.backbone(image)
140 |         if self.algorithm == "DB":
141 |             predicts = self.head(conv_feas, mode)
142 |         else:
143 |             predicts = self.head(conv_feas)
144 |         if mode == "train":
145 |             losses = self.loss(predicts, labels)
146 |             return loader, losses
147 |         elif mode == "export":
148 |             return [image, predicts]
149 |         else:
150 |             return loader, predicts
151 | 


--------------------------------------------------------------------------------
/ppocr/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/modeling/common_functions.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle
20 | import paddle.fluid as fluid
21 | from paddle.fluid.param_attr import ParamAttr
22 | import math
23 | 
24 | 
25 | def get_para_bias_attr(l2_decay, k, name):
26 |     regularizer = fluid.regularizer.L2Decay(l2_decay)
27 |     stdv = 1.0 / math.sqrt(k * 1.0)
28 |     initializer = fluid.initializer.Uniform(-stdv, stdv)
29 |     para_attr = fluid.ParamAttr(
30 |         regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
31 |     bias_attr = fluid.ParamAttr(
32 |         regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
33 |     return [para_attr, bias_attr]
34 | 
35 | 
36 | def conv_bn_layer(input,
37 |                   num_filters,
38 |                   filter_size,
39 |                   stride=1,
40 |                   groups=1,
41 |                   act=None,
42 |                   name=None):
43 |     conv = fluid.layers.conv2d(
44 |         input=input,
45 |         num_filters=num_filters,
46 |         filter_size=filter_size,
47 |         stride=stride,
48 |         padding=(filter_size - 1) // 2,
49 |         groups=groups,
50 |         act=None,
51 |         param_attr=ParamAttr(name=name + "_weights"),
52 |         bias_attr=False,
53 |         name=name + '.conv2d')
54 | 
55 |     bn_name = "bn_" + name
56 |     return fluid.layers.batch_norm(
57 |         input=conv,
58 |         act=act,
59 |         name=bn_name + '.output',
60 |         param_attr=ParamAttr(name=bn_name + '_scale'),
61 |         bias_attr=ParamAttr(bn_name + '_offset'),
62 |         moving_mean_name=bn_name + '_mean',
63 |         moving_variance_name=bn_name + '_variance')
64 | 
65 | 
66 | def deconv_bn_layer(input,
67 |                     num_filters,
68 |                     filter_size=4,
69 |                     stride=2,
70 |                     act='relu',
71 |                     name=None):
72 |     deconv = fluid.layers.conv2d_transpose(
73 |         input=input,
74 |         num_filters=num_filters,
75 |         filter_size=filter_size,
76 |         stride=stride,
77 |         padding=1,
78 |         act=None,
79 |         param_attr=ParamAttr(name=name + "_weights"),
80 |         bias_attr=False,
81 |         name=name + '.deconv2d')
82 |     bn_name = "bn_" + name
83 |     return fluid.layers.batch_norm(
84 |         input=deconv,
85 |         act=act,
86 |         name=bn_name + '.output',
87 |         param_attr=ParamAttr(name=bn_name + '_scale'),
88 |         bias_attr=ParamAttr(bn_name + '_offset'),
89 |         moving_mean_name=bn_name + '_mean',
90 |         moving_variance_name=bn_name + '_variance')
91 | 
92 | 
93 | def create_tmp_var(program, name, dtype, shape, lod_level=0):
94 |     return program.current_block().create_var(
95 |         name=name, dtype=dtype, shape=shape, lod_level=lod_level)
96 | 


--------------------------------------------------------------------------------
/ppocr/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/modeling/heads/cls_head.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import math
20 | 
21 | import paddle
22 | import paddle.fluid as fluid
23 | 
24 | 
25 | class ClsHead(object):
26 |     """
27 |     Class orientation
28 | 
29 |     Args:
30 | 
31 |         params(dict): super parameters for build Class network
32 |     """
33 | 
34 |     def __init__(self, params):
35 |         super(ClsHead, self).__init__()
36 |         self.class_dim = params['class_dim']
37 | 
38 |     def __call__(self, inputs, labels=None, mode=None):
39 |         pool = fluid.layers.pool2d(
40 |             input=inputs, pool_type='avg', global_pooling=True)
41 |         stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
42 | 
43 |         out = fluid.layers.fc(
44 |             input=pool,
45 |             size=self.class_dim,
46 |             param_attr=fluid.param_attr.ParamAttr(
47 |                 name="fc_0.w_0",
48 |                 initializer=fluid.initializer.Uniform(-stdv, stdv)),
49 |             bias_attr=fluid.param_attr.ParamAttr(name="fc_0.b_0"))
50 | 
51 |         softmax_out = fluid.layers.softmax(out, use_cudnn=False)
52 |         out_label = fluid.layers.argmax(out, axis=1)
53 |         predicts = {'predict': softmax_out, 'decoded_out': out_label}
54 |         return predicts
55 | 


--------------------------------------------------------------------------------
/ppocr/modeling/heads/det_east_head.py:
--------------------------------------------------------------------------------
  1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import paddle.fluid as fluid
 20 | from ..common_functions import conv_bn_layer, deconv_bn_layer
 21 | from collections import OrderedDict
 22 | 
 23 | 
 24 | class EASTHead(object):
 25 |     """
 26 |     EAST: An Efficient and Accurate Scene Text Detector
 27 |         see arxiv: https://arxiv.org/abs/1704.03155
 28 |     args:
 29 |         params(dict): the super parameters for network build
 30 |     """
 31 | 
 32 |     def __init__(self, params):
 33 | 
 34 |         self.model_name = params['model_name']
 35 | 
 36 |     def unet_fusion(self, inputs):
 37 |         f = inputs[::-1]
 38 |         if self.model_name == "large":
 39 |             num_outputs = [128, 128, 128, 128]
 40 |         else:
 41 |             num_outputs = [64, 64, 64, 64]
 42 |         g = [None, None, None, None]
 43 |         h = [None, None, None, None]
 44 |         for i in range(4):
 45 |             if i == 0:
 46 |                 h[i] = f[i]
 47 |             else:
 48 |                 h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
 49 |                 h[i] = conv_bn_layer(
 50 |                     input=h[i],
 51 |                     num_filters=num_outputs[i],
 52 |                     filter_size=3,
 53 |                     stride=1,
 54 |                     act='relu',
 55 |                     name="unet_h_%d" % (i))
 56 |             if i <= 2:
 57 |                 #can be replaced with unpool
 58 |                 g[i] = deconv_bn_layer(
 59 |                     input=h[i],
 60 |                     num_filters=num_outputs[i],
 61 |                     name="unet_g_%d" % (i))
 62 |             else:
 63 |                 g[i] = conv_bn_layer(
 64 |                     input=h[i],
 65 |                     num_filters=num_outputs[i],
 66 |                     filter_size=3,
 67 |                     stride=1,
 68 |                     act='relu',
 69 |                     name="unet_g_%d" % (i))
 70 |         return g[3]
 71 | 
 72 |     def detector_header(self, f_common):
 73 |         if self.model_name == "large":
 74 |             num_outputs = [128, 64, 1, 8]
 75 |         else:
 76 |             num_outputs = [64, 32, 1, 8]
 77 |         f_det = conv_bn_layer(
 78 |             input=f_common,
 79 |             num_filters=num_outputs[0],
 80 |             filter_size=3,
 81 |             stride=1,
 82 |             act='relu',
 83 |             name="det_head1")
 84 |         f_det = conv_bn_layer(
 85 |             input=f_det,
 86 |             num_filters=num_outputs[1],
 87 |             filter_size=3,
 88 |             stride=1,
 89 |             act='relu',
 90 |             name="det_head2")
 91 |         #f_score
 92 |         f_score = conv_bn_layer(
 93 |             input=f_det,
 94 |             num_filters=num_outputs[2],
 95 |             filter_size=1,
 96 |             stride=1,
 97 |             act=None,
 98 |             name="f_score")
 99 |         f_score = fluid.layers.sigmoid(f_score)
100 |         #f_geo
101 |         f_geo = conv_bn_layer(
102 |             input=f_det,
103 |             num_filters=num_outputs[3],
104 |             filter_size=1,
105 |             stride=1,
106 |             act=None,
107 |             name="f_geo")
108 |         f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
109 |         return f_score, f_geo
110 | 
111 |     def __call__(self, inputs):
112 |         """
113 |         Fuse different levels of feature map from backbone and predict results
114 |         Args:
115 |             inputs(list): feature maps from backbone
116 |         Return: predicts
117 |         """
118 |         f_common = self.unet_fusion(inputs)
119 |         f_score, f_geo = self.detector_header(f_common)
120 |         predicts = OrderedDict()
121 |         predicts['f_score'] = f_score
122 |         predicts['f_geo'] = f_geo
123 |         return predicts
124 | 


--------------------------------------------------------------------------------
/ppocr/modeling/heads/rec_ctc_head.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import math
20 | 
21 | import paddle
22 | import paddle.fluid as fluid
23 | from paddle.fluid.param_attr import ParamAttr
24 | from .rec_seq_encoder import SequenceEncoder
25 | from ..common_functions import get_para_bias_attr
26 | import numpy as np
27 | 
28 | 
29 | class CTCPredict(object):
30 |     """
31 |     CTC predict
32 |     Args:
33 |         params(object): Params from yaml file and settings from command line
34 |     """
35 | 
36 |     def __init__(self, params):
37 |         super(CTCPredict, self).__init__()
38 |         self.char_num = params['char_num']
39 |         self.encoder = SequenceEncoder(params)
40 |         self.encoder_type = params['encoder_type']
41 |         self.fc_decay = params.get("fc_decay", 0.0004)
42 | 
43 |     def __call__(self, inputs, labels=None, mode=None):
44 |         with fluid.scope_guard("skip_quant"):
45 |             encoder_features = self.encoder(inputs)
46 |             if self.encoder_type != "reshape":
47 |                 encoder_features = fluid.layers.concat(encoder_features, axis=1)
48 |             name = "ctc_fc"
49 |             para_attr, bias_attr = get_para_bias_attr(
50 |                 l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
51 |         predict = fluid.layers.fc(input=encoder_features,
52 |                                   size=self.char_num + 1,
53 |                                   param_attr=para_attr,
54 |                                   bias_attr=bias_attr,
55 |                                   name=name)
56 |         decoded_out = fluid.layers.ctc_greedy_decoder(
57 |             input=predict, blank=self.char_num)
58 |         predicts = {'predict': predict, 'decoded_out': decoded_out}
59 |         return predicts
60 | 


--------------------------------------------------------------------------------
/ppocr/modeling/heads/rec_seq_encoder.py:
--------------------------------------------------------------------------------
  1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import math
 20 | import paddle.fluid as fluid
 21 | import paddle.fluid.layers as layers
 22 | 
 23 | 
 24 | class EncoderWithReshape(object):
 25 |     def __init__(self, params):
 26 |         super(EncoderWithReshape, self).__init__()
 27 | 
 28 |     def __call__(self, inputs):
 29 |         sliced_feature = layers.im2sequence(
 30 |             input=inputs,
 31 |             stride=[1, 1],
 32 |             filter_size=[inputs.shape[2], 1],
 33 |             name="sliced_feature")
 34 |         return sliced_feature
 35 | 
 36 | 
 37 | class EncoderWithRNN(object):
 38 |     def __init__(self, params):
 39 |         super(EncoderWithRNN, self).__init__()
 40 |         self.rnn_hidden_size = params['SeqRNN']['hidden_size']
 41 | 
 42 |     def __call__(self, inputs):
 43 |         lstm_list = []
 44 |         name_prefix = "lstm"
 45 |         rnn_hidden_size = self.rnn_hidden_size
 46 |         for no in range(1, 3):
 47 |             if no == 1:
 48 |                 is_reverse = False
 49 |             else:
 50 |                 is_reverse = True
 51 |             name = "%s_st1_fc%d" % (name_prefix, no)
 52 |             fc = layers.fc(input=inputs,
 53 |                            size=rnn_hidden_size * 4,
 54 |                            param_attr=fluid.ParamAttr(name=name + "_w"),
 55 |                            bias_attr=fluid.ParamAttr(name=name + "_b"),
 56 |                            name=name)
 57 |             name = "%s_st1_out%d" % (name_prefix, no)
 58 |             lstm, _ = layers.dynamic_lstm(
 59 |                 input=fc,
 60 |                 size=rnn_hidden_size * 4,
 61 |                 is_reverse=is_reverse,
 62 |                 param_attr=fluid.ParamAttr(name=name + "_w"),
 63 |                 bias_attr=fluid.ParamAttr(name=name + "_b"),
 64 |                 use_peepholes=False)
 65 |             name = "%s_st2_fc%d" % (name_prefix, no)
 66 |             fc = layers.fc(input=lstm,
 67 |                            size=rnn_hidden_size * 4,
 68 |                            param_attr=fluid.ParamAttr(name=name + "_w"),
 69 |                            bias_attr=fluid.ParamAttr(name=name + "_b"),
 70 |                            name=name)
 71 |             name = "%s_st2_out%d" % (name_prefix, no)
 72 |             lstm, _ = layers.dynamic_lstm(
 73 |                 input=fc,
 74 |                 size=rnn_hidden_size * 4,
 75 |                 is_reverse=is_reverse,
 76 |                 param_attr=fluid.ParamAttr(name=name + "_w"),
 77 |                 bias_attr=fluid.ParamAttr(name=name + "_b"),
 78 |                 use_peepholes=False)
 79 |             lstm_list.append(lstm)
 80 |         return lstm_list
 81 | 
 82 | 
 83 | class SequenceEncoder(object):
 84 |     def __init__(self, params):
 85 |         super(SequenceEncoder, self).__init__()
 86 |         self.encoder_type = params['encoder_type']
 87 |         self.encoder_reshape = EncoderWithReshape(params)
 88 |         if self.encoder_type == "rnn":
 89 |             self.encoder_rnn = EncoderWithRNN(params)
 90 | 
 91 |     def __call__(self, inputs):
 92 |         if self.encoder_type == "reshape":
 93 |             encoder_features = self.encoder_reshape(inputs)
 94 |         elif self.encoder_type == "rnn":
 95 |             inputs = self.encoder_reshape(inputs)
 96 |             encoder_features = self.encoder_rnn(inputs)
 97 |         else:
 98 |             assert False, "Unsupport encoder_type:%s"\
 99 |                 % self.encoder_type
100 |         return encoder_features
101 | 


--------------------------------------------------------------------------------
/ppocr/modeling/heads/self_attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/modeling/heads/self_attention/__init__.py


--------------------------------------------------------------------------------
/ppocr/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/cls_loss.py:
--------------------------------------------------------------------------------
 1 | # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle.fluid as fluid
20 | 
21 | 
22 | class ClsLoss(object):
23 |     def __init__(self, params):
24 |         super(ClsLoss, self).__init__()
25 |         self.loss_func = fluid.layers.cross_entropy
26 | 
27 |     def __call__(self, predicts, labels):
28 |         predict = predicts['predict']
29 |         label = labels['label']
30 |         # softmax_out = fluid.layers.softmax(predict, use_cudnn=False)
31 |         cost = fluid.layers.cross_entropy(input=predict, label=label)
32 |         sum_cost = fluid.layers.mean(cost)
33 |         return sum_cost
34 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_basic_loss.py:
--------------------------------------------------------------------------------
  1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import numpy as np
 20 | 
 21 | import paddle.fluid as fluid
 22 | 
 23 | 
 24 | def BalanceLoss(pred,
 25 |                 gt,
 26 |                 mask,
 27 |                 balance_loss=True,
 28 |                 main_loss_type="DiceLoss",
 29 |                 negative_ratio=3,
 30 |                 return_origin=False,
 31 |                 eps=1e-6):
 32 |     """
 33 |     The BalanceLoss for Differentiable Binarization text detection
 34 |     args:
 35 |         pred (variable): predicted feature maps.
 36 |         gt (variable): ground truth feature maps.
 37 |         mask (variable): masked maps.
 38 |         balance_loss (bool): whether balance loss or not, default is True
 39 |         main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
 40 |             'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
 41 |         negative_ratio (int|float): float, default is 3.
 42 |         return_origin (bool): whether return unbalanced loss or not, default is False.
 43 |         eps (float): default is 1e-6.
 44 |     return: (variable) balanced loss
 45 |     """
 46 |     positive = gt * mask
 47 |     negative = (1 - gt) * mask
 48 | 
 49 |     positive_count = fluid.layers.reduce_sum(positive)
 50 |     positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
 51 |     negative_count = min(
 52 |         fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
 53 |     negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
 54 | 
 55 |     if main_loss_type == "CrossEntropy":
 56 |         loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
 57 |         loss = fluid.layers.reduce_mean(loss)
 58 |     elif main_loss_type == "Euclidean":
 59 |         loss = fluid.layers.square(pred - gt)
 60 |         loss = fluid.layers.reduce_mean(loss)
 61 |     elif main_loss_type == "DiceLoss":
 62 |         loss = DiceLoss(pred, gt, mask)
 63 |     elif main_loss_type == "BCELoss":
 64 |         loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
 65 |     elif main_loss_type == "MaskL1Loss":
 66 |         loss = MaskL1Loss(pred, gt, mask)
 67 |     else:
 68 |         loss_type = [
 69 |             'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
 70 |         ]
 71 |         raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
 72 |                         format(loss_type))
 73 | 
 74 |     if not balance_loss:
 75 |         return loss
 76 | 
 77 |     positive_loss = positive * loss
 78 |     negative_loss = negative * loss
 79 |     negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
 80 |     negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
 81 |     balance_loss = (fluid.layers.reduce_sum(positive_loss) +
 82 |                     fluid.layers.reduce_sum(negative_loss)) / (
 83 |                         positive_count + negative_count + eps)
 84 | 
 85 |     if return_origin:
 86 |         return balance_loss, loss
 87 |     return balance_loss
 88 | 
 89 | 
 90 | def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
 91 |     """
 92 |     DiceLoss function.
 93 |     """
 94 | 
 95 |     assert pred.shape == gt.shape
 96 |     assert pred.shape == mask.shape
 97 |     if weights is not None:
 98 |         assert weights.shape == mask.shape
 99 |         mask = weights * mask
100 |     intersection = fluid.layers.reduce_sum(pred * gt * mask)
101 | 
102 |     union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
103 |         gt * mask) + eps
104 |     loss = 1 - 2.0 * intersection / union
105 |     assert loss <= 1
106 |     return loss
107 | 
108 | 
109 | def MaskL1Loss(pred, gt, mask, eps=1e-6):
110 |     """
111 |     Mask L1 Loss
112 |     """
113 |     loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
114 |         fluid.layers.reduce_sum(mask) + eps)
115 |     loss = fluid.layers.reduce_mean(loss)
116 |     return loss
117 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_db_loss.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
20 | 
21 | 
22 | class DBLoss(object):
23 |     """
24 |     Differentiable Binarization (DB) Loss Function
25 |     args:
26 |         param (dict): the super paramter for DB Loss
27 |     """
28 | 
29 |     def __init__(self, params):
30 |         super(DBLoss, self).__init__()
31 |         self.balance_loss = params['balance_loss']
32 |         self.main_loss_type = params['main_loss_type']
33 | 
34 |         self.alpha = params['alpha']
35 |         self.beta = params['beta']
36 |         self.ohem_ratio = params['ohem_ratio']
37 | 
38 |     def __call__(self, predicts, labels):
39 |         label_shrink_map = labels['shrink_map']
40 |         label_shrink_mask = labels['shrink_mask']
41 |         label_threshold_map = labels['threshold_map']
42 |         label_threshold_mask = labels['threshold_mask']
43 |         pred = predicts['maps']
44 |         shrink_maps = pred[:, 0, :, :]
45 |         threshold_maps = pred[:, 1, :, :]
46 |         binary_maps = pred[:, 2, :, :]
47 | 
48 |         loss_shrink_maps = BalanceLoss(
49 |             shrink_maps,
50 |             label_shrink_map,
51 |             label_shrink_mask,
52 |             balance_loss=self.balance_loss,
53 |             main_loss_type=self.main_loss_type,
54 |             negative_ratio=self.ohem_ratio)
55 |         loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
56 |                                          label_threshold_mask)
57 |         loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
58 |                                     label_shrink_mask)
59 |         loss_shrink_maps = self.alpha * loss_shrink_maps
60 |         loss_threshold_maps = self.beta * loss_threshold_maps
61 | 
62 |         loss_all = loss_shrink_maps + loss_threshold_maps\
63 |             + loss_binary_maps
64 |         losses = {'total_loss':loss_all,\
65 |             "loss_shrink_maps":loss_shrink_maps,\
66 |             "loss_threshold_maps":loss_threshold_maps,\
67 |             "loss_binary_maps":loss_binary_maps}
68 |         return losses
69 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_east_loss.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle.fluid as fluid
20 | 
21 | 
22 | class EASTLoss(object):
23 |     """
24 |     EAST Loss function
25 |     """
26 | 
27 |     def __init__(self, params=None):
28 |         super(EASTLoss, self).__init__()
29 | 
30 |     def __call__(self, predicts, labels):
31 |         f_score = predicts['f_score']
32 |         f_geo = predicts['f_geo']
33 |         l_score = labels['score']
34 |         l_geo = labels['geo']
35 |         l_mask = labels['mask']
36 |         ##dice_loss
37 |         intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
38 |         union = fluid.layers.reduce_sum(f_score * l_mask)\
39 |             + fluid.layers.reduce_sum(l_score * l_mask)
40 |         dice_loss = 1 - 2 * intersection / (union + 1e-5)
41 |         #smoooth_l1_loss
42 |         channels = 8
43 |         l_geo_split = fluid.layers.split(
44 |             l_geo, num_or_sections=channels + 1, dim=1)
45 |         f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
46 |         smooth_l1 = 0
47 |         for i in range(0, channels):
48 |             geo_diff = l_geo_split[i] - f_geo_split[i]
49 |             abs_geo_diff = fluid.layers.abs(geo_diff)
50 |             smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
51 |             smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
52 |             in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
53 |                 (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
54 |             out_loss = l_geo_split[-1] / channels * in_loss * l_score
55 |             smooth_l1 += out_loss
56 |         smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
57 |         dice_loss = dice_loss * 0.01
58 |         total_loss = dice_loss + smooth_l1_loss
59 |         losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
60 |             "smooth_l1_loss":smooth_l1_loss}
61 |         return losses
62 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_sast_loss.py:
--------------------------------------------------------------------------------
  1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import paddle.fluid as fluid
 20 | 
 21 | 
 22 | class SASTLoss(object):
 23 |     """
 24 |     SAST Loss function
 25 |     """
 26 | 
 27 |     def __init__(self, params=None):
 28 |         super(SASTLoss, self).__init__()
 29 | 
 30 |     def __call__(self, predicts, labels):
 31 |         """
 32 |         tcl_pos: N x 128 x 3
 33 |         tcl_mask: N x 128 x 1
 34 |         tcl_label: N x X list or LoDTensor
 35 |         """
 36 |                 
 37 |         f_score = predicts['f_score']
 38 |         f_border = predicts['f_border']
 39 |         f_tvo = predicts['f_tvo']
 40 |         f_tco = predicts['f_tco']
 41 | 
 42 |         l_score = labels['input_score']
 43 |         l_border = labels['input_border']
 44 |         l_mask = labels['input_mask']
 45 |         l_tvo = labels['input_tvo']
 46 |         l_tco = labels['input_tco']
 47 | 
 48 |         #score_loss
 49 |         intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
 50 |         union = fluid.layers.reduce_sum(f_score * l_mask) + fluid.layers.reduce_sum(l_score * l_mask)
 51 |         score_loss = 1.0 - 2 * intersection / (union + 1e-5)
 52 | 
 53 |         #border loss
 54 |         l_border_split, l_border_norm = fluid.layers.split(l_border, num_or_sections=[4, 1], dim=1)
 55 |         f_border_split = f_border
 56 |         l_border_norm_split = fluid.layers.expand(x=l_border_norm, expand_times=[1, 4, 1, 1])
 57 |         l_border_score = fluid.layers.expand(x=l_score, expand_times=[1, 4, 1, 1])   
 58 |         l_border_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 4, 1, 1])   
 59 |         border_diff = l_border_split - f_border_split
 60 |         abs_border_diff = fluid.layers.abs(border_diff) 
 61 |         border_sign = abs_border_diff < 1.0
 62 |         border_sign = fluid.layers.cast(border_sign, dtype='float32')
 63 |         border_sign.stop_gradient = True
 64 |         border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
 65 |                     (abs_border_diff - 0.5) * (1.0 - border_sign)
 66 |         border_out_loss = l_border_norm_split * border_in_loss
 67 |         border_loss = fluid.layers.reduce_sum(border_out_loss * l_border_score * l_border_mask) / \
 68 |                     (fluid.layers.reduce_sum(l_border_score * l_border_mask) + 1e-5)
 69 | 
 70 |         #tvo_loss
 71 |         l_tvo_split, l_tvo_norm = fluid.layers.split(l_tvo, num_or_sections=[8, 1], dim=1)
 72 |         f_tvo_split = f_tvo
 73 |         l_tvo_norm_split = fluid.layers.expand(x=l_tvo_norm, expand_times=[1, 8, 1, 1])
 74 |         l_tvo_score = fluid.layers.expand(x=l_score, expand_times=[1, 8, 1, 1])   
 75 |         l_tvo_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 8, 1, 1])   
 76 |         #
 77 |         tvo_geo_diff = l_tvo_split - f_tvo_split
 78 |         abs_tvo_geo_diff = fluid.layers.abs(tvo_geo_diff) 
 79 |         tvo_sign = abs_tvo_geo_diff < 1.0
 80 |         tvo_sign = fluid.layers.cast(tvo_sign, dtype='float32')
 81 |         tvo_sign.stop_gradient = True
 82 |         tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
 83 |                     (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
 84 |         tvo_out_loss = l_tvo_norm_split * tvo_in_loss
 85 |         tvo_loss = fluid.layers.reduce_sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
 86 |                     (fluid.layers.reduce_sum(l_tvo_score * l_tvo_mask) + 1e-5)
 87 | 
 88 |         #tco_loss
 89 |         l_tco_split, l_tco_norm = fluid.layers.split(l_tco, num_or_sections=[2, 1], dim=1)
 90 |         f_tco_split = f_tco
 91 |         l_tco_norm_split = fluid.layers.expand(x=l_tco_norm, expand_times=[1, 2, 1, 1])
 92 |         l_tco_score = fluid.layers.expand(x=l_score, expand_times=[1, 2, 1, 1])   
 93 |         l_tco_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 2, 1, 1])   
 94 |         #
 95 |         tco_geo_diff = l_tco_split - f_tco_split
 96 |         abs_tco_geo_diff = fluid.layers.abs(tco_geo_diff) 
 97 |         tco_sign = abs_tco_geo_diff < 1.0
 98 |         tco_sign = fluid.layers.cast(tco_sign, dtype='float32')
 99 |         tco_sign.stop_gradient = True
100 |         tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
101 |                     (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
102 |         tco_out_loss = l_tco_norm_split * tco_in_loss
103 |         tco_loss = fluid.layers.reduce_sum(tco_out_loss * l_tco_score * l_tco_mask) / \
104 |                     (fluid.layers.reduce_sum(l_tco_score * l_tco_mask) + 1e-5)
105 | 
106 | 
107 |         # total loss
108 |         tvo_lw, tco_lw = 1.5, 1.5
109 |         score_lw, border_lw = 1.0, 1.0
110 |         total_loss = score_loss * score_lw + border_loss * border_lw + \
111 |                     tvo_loss * tvo_lw + tco_loss * tco_lw
112 |                     
113 |         losses = {'total_loss':total_loss, "score_loss":score_loss,\
114 |             "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
115 |         return losses


--------------------------------------------------------------------------------
/ppocr/modeling/losses/rec_attention_loss.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import math
20 | 
21 | import paddle
22 | import paddle.fluid as fluid
23 | from paddle.fluid.param_attr import ParamAttr
24 | import numpy as np
25 | 
26 | 
27 | class AttentionLoss(object):
28 |     def __init__(self, params):
29 |         super(AttentionLoss, self).__init__()
30 |         self.char_num = params['char_num']
31 | 
32 |     def __call__(self, predicts, labels):
33 |         predict = predicts['predict']
34 |         label_out = labels['label_out']
35 |         label_out = fluid.layers.cast(x=label_out, dtype='int64')
36 |         # calculate attention loss
37 |         cost = fluid.layers.cross_entropy(input=predict, label=label_out)
38 |         sum_cost = fluid.layers.reduce_sum(cost)
39 |         return sum_cost
40 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/rec_ctc_loss.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import math
20 | 
21 | import paddle
22 | import paddle.fluid as fluid
23 | 
24 | 
25 | class CTCLoss(object):
26 |     def __init__(self, params):
27 |         super(CTCLoss, self).__init__()
28 |         self.char_num = params['char_num']
29 | 
30 |     def __call__(self, predicts, labels):
31 |         predict = predicts['predict']
32 |         label = labels['label']
33 |         # calculate ctc loss
34 |         cost = fluid.layers.warpctc(
35 |             input=predict, label=label, blank=self.char_num, norm_by_times=True)
36 |         sum_cost = fluid.layers.reduce_sum(cost)
37 |         return sum_cost
38 | 


--------------------------------------------------------------------------------
/ppocr/modeling/losses/rec_srn_loss.py:
--------------------------------------------------------------------------------
 1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import math
20 | 
21 | import paddle
22 | import paddle.fluid as fluid
23 | 
24 | 
25 | class SRNLoss(object):
26 |     def __init__(self, params):
27 |         super(SRNLoss, self).__init__()
28 |         self.char_num = params['char_num']
29 | 
30 |     def __call__(self, predicts, others):
31 |         predict = predicts['predict']
32 |         word_predict = predicts['word_out']
33 |         gsrm_predict = predicts['gsrm_out']
34 |         label = others['label']
35 |         lbl_weight = others['lbl_weight']
36 | 
37 |         casted_label = fluid.layers.cast(x=label, dtype='int64')
38 |         cost_word = fluid.layers.cross_entropy(
39 |             input=word_predict, label=casted_label)
40 |         cost_gsrm = fluid.layers.cross_entropy(
41 |             input=gsrm_predict, label=casted_label)
42 |         cost_vsfd = fluid.layers.cross_entropy(
43 |             input=predict, label=casted_label)
44 | 
45 |         cost_word = fluid.layers.reshape(
46 |             x=fluid.layers.reduce_sum(cost_word), shape=[1])
47 |         cost_gsrm = fluid.layers.reshape(
48 |             x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
49 |         cost_vsfd = fluid.layers.reshape(
50 |             x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
51 | 
52 |         sum_cost = fluid.layers.sum(
53 |             [cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
54 | 
55 |         return [sum_cost, cost_vsfd, cost_word]
56 | 


--------------------------------------------------------------------------------
/ppocr/modeling/stns/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/optimizer.py:
--------------------------------------------------------------------------------
  1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | from __future__ import absolute_import
 15 | from __future__ import division
 16 | from __future__ import print_function
 17 | import math
 18 | import paddle.fluid as fluid
 19 | from paddle.fluid.regularizer import L2Decay
 20 | from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 21 | import paddle.fluid.layers.ops as ops
 22 | 
 23 | from ppocr.utils.utility import initial_logger
 24 | 
 25 | logger = initial_logger()
 26 | 
 27 | 
 28 | def cosine_decay_with_warmup(learning_rate,
 29 |                              step_each_epoch,
 30 |                              epochs=500,
 31 |                              warmup_minibatch=1000):
 32 |     """
 33 |     Applies cosine decay to the learning rate.
 34 |     lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
 35 |     decrease lr for every mini-batch and start with warmup.
 36 |     args:
 37 |         learning_rate(float): initial learning rate
 38 |         step_each_epoch (int): number of step for each epoch in training process
 39 |         epochs(int): number of training epochs
 40 |         warmup_minibatch(int): number of minibatch for warmup
 41 |     return:
 42 |         lr(tensor): learning rate tensor
 43 |     """
 44 |     global_step = _decay_step_counter()
 45 |     lr = fluid.layers.tensor.create_global_var(
 46 |         shape=[1],
 47 |         value=0.0,
 48 |         dtype='float32',
 49 |         persistable=True,
 50 |         name="learning_rate")
 51 | 
 52 |     warmup_minibatch = fluid.layers.fill_constant(
 53 |         shape=[1],
 54 |         dtype='float32',
 55 |         value=float(warmup_minibatch),
 56 |         force_cpu=True)
 57 | 
 58 |     with fluid.layers.control_flow.Switch() as switch:
 59 |         with switch.case(global_step < warmup_minibatch):
 60 |             decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
 61 |             fluid.layers.tensor.assign(input=decayed_lr, output=lr)
 62 |         with switch.default():
 63 |             decayed_lr = learning_rate * \
 64 |                 (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
 65 |             fluid.layers.tensor.assign(input=decayed_lr, output=lr)
 66 |     return lr
 67 | 
 68 | 
 69 | def AdamDecay(params, parameter_list=None):
 70 |     """
 71 |     define optimizer function
 72 |     args:
 73 |         params(dict): the super parameters
 74 |         parameter_list (list): list of Variable names to update to minimize loss
 75 |     return:
 76 |         optimizer: a Adam optimizer instance
 77 |     """
 78 |     base_lr = params['base_lr']
 79 |     beta1 = params['beta1']
 80 |     beta2 = params['beta2']
 81 |     l2_decay = params.get("l2_decay", 0.0)
 82 | 
 83 |     if 'decay' in params:
 84 |         supported_decay_mode = [
 85 |             "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
 86 |         ]
 87 |         params = params['decay']
 88 |         decay_mode = params['function']
 89 |         assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
 90 |             supported_decay_mode, decay_mode)
 91 | 
 92 |         if decay_mode == "cosine_decay":
 93 |             step_each_epoch = params['step_each_epoch']
 94 |             total_epoch = params['total_epoch']
 95 |             base_lr = fluid.layers.cosine_decay(
 96 |                 learning_rate=base_lr,
 97 |                 step_each_epoch=step_each_epoch,
 98 |                 epochs=total_epoch)
 99 |         elif decay_mode == "cosine_decay_warmup":
100 |             step_each_epoch = params['step_each_epoch']
101 |             total_epoch = params['total_epoch']
102 |             warmup_minibatch = params.get("warmup_minibatch", 1000)
103 |             base_lr = cosine_decay_with_warmup(
104 |                 learning_rate=base_lr,
105 |                 step_each_epoch=step_each_epoch,
106 |                 epochs=total_epoch,
107 |                 warmup_minibatch=warmup_minibatch)
108 |         elif decay_mode == "piecewise_decay":
109 |             boundaries = params["boundaries"]
110 |             decay_rate = params["decay_rate"]
111 |             values = [
112 |                 base_lr * decay_rate**idx
113 |                 for idx in range(len(boundaries) + 1)
114 |             ]
115 |             base_lr = fluid.layers.piecewise_decay(boundaries, values)
116 | 
117 |     optimizer = fluid.optimizer.Adam(
118 |         learning_rate=base_lr,
119 |         beta1=beta1,
120 |         beta2=beta2,
121 |         regularization=L2Decay(regularization_coeff=l2_decay),
122 |         parameter_list=parameter_list)
123 |     return optimizer
124 | 
125 | 
126 | def RMSProp(params, parameter_list=None):
127 |     """
128 |     define optimizer function
129 |     args:
130 |         params(dict): the super parameters
131 |         parameter_list (list): list of Variable names to update to minimize loss
132 |     return:
133 |         optimizer: a RMSProp optimizer instance
134 |     """
135 |     base_lr = params.get("base_lr", 0.001)
136 |     l2_decay = params.get("l2_decay", 0.00005)
137 | 
138 |     if 'decay' in params:
139 |         supported_decay_mode = ["cosine_decay", "piecewise_decay"]
140 |         params = params['decay']
141 |         decay_mode = params['function']
142 |         assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
143 |             supported_decay_mode, decay_mode)
144 | 
145 |         if decay_mode == "cosine_decay":
146 |             step_each_epoch = params['step_each_epoch']
147 |             total_epoch = params['total_epoch']
148 |             base_lr = fluid.layers.cosine_decay(
149 |                 learning_rate=base_lr,
150 |                 step_each_epoch=step_each_epoch,
151 |                 epochs=total_epoch)
152 |         elif decay_mode == "piecewise_decay":
153 |             boundaries = params["boundaries"]
154 |             decay_rate = params["decay_rate"]
155 |             values = [
156 |                 base_lr * decay_rate**idx
157 |                 for idx in range(len(boundaries) + 1)
158 |             ]
159 |             base_lr = fluid.layers.piecewise_decay(boundaries, values)
160 | 
161 |     optimizer = fluid.optimizer.RMSProp(
162 |         learning_rate=base_lr,
163 |         regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
164 | 
165 |     return optimizer
166 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/postprocess/__init__.py


--------------------------------------------------------------------------------
/ppocr/postprocess/db_postprocess.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import paddle
 20 | import paddle.fluid as fluid
 21 | 
 22 | import numpy as np
 23 | import string
 24 | import cv2
 25 | from shapely.geometry import Polygon
 26 | import pyclipper
 27 | from copy import deepcopy
 28 | 
 29 | 
 30 | class DBPostProcess(object):
 31 |     """
 32 |     The post process for Differentiable Binarization (DB).
 33 |     """
 34 | 
 35 |     def __init__(self, params):
 36 |         self.thresh = params['thresh']
 37 |         self.box_thresh = params['box_thresh']
 38 |         self.max_candidates = params['max_candidates']
 39 |         self.unclip_ratio = params['unclip_ratio']
 40 |         self.min_size = 3
 41 |         self.dilation_kernel = np.array([[1, 1], [1, 1]])
 42 | 
 43 |     def boxes_from_bitmap(self, pred, mask):
 44 |         """
 45 |         Get boxes from the binarized image predicted by DB.
 46 |         :param pred: the binarized image predicted by DB.
 47 |         :param mask: new 'pred' after threshold filtering.
 48 |         :return: (boxes, the score of each boxes)
 49 |         """
 50 |         dest_height, dest_width = pred.shape[-2:]
 51 |         bitmap = deepcopy(mask)
 52 |         height, width = bitmap.shape
 53 | 
 54 |         outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
 55 |                                 cv2.CHAIN_APPROX_SIMPLE)
 56 |         if len(outs) == 3:
 57 |             img, contours, _ = outs[0], outs[1], outs[2]
 58 |         elif len(outs) == 2:
 59 |             contours, _ = outs[0], outs[1]
 60 | 
 61 |         num_contours = min(len(contours), self.max_candidates)
 62 |         boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
 63 |         scores = np.zeros((num_contours, ), dtype=np.float32)
 64 | 
 65 |         for index in range(num_contours):
 66 |             contour = contours[index]
 67 |             points, sside = self.get_mini_boxes(contour)
 68 |             if sside < self.min_size:
 69 |                 continue
 70 |             points = np.array(points)
 71 |             score = self.box_score_fast(pred, points.reshape(-1, 2))
 72 |             if self.box_thresh > score:
 73 |                 continue
 74 | 
 75 |             box = self.unclip(points).reshape(-1, 1, 2)
 76 |             box, sside = self.get_mini_boxes(box)
 77 |             if sside < self.min_size + 2:
 78 |                 continue
 79 |             box = np.array(box)
 80 |             if not isinstance(dest_width, int):
 81 |                 dest_width = dest_width.item()
 82 |                 dest_height = dest_height.item()
 83 | 
 84 |             box[:, 0] = np.clip(
 85 |                 np.round(box[:, 0] / width * dest_width), 0, dest_width)
 86 |             box[:, 1] = np.clip(
 87 |                 np.round(box[:, 1] / height * dest_height), 0, dest_height)
 88 |             boxes[index, :, :] = box.astype(np.int16)
 89 |             scores[index] = score
 90 |         return boxes, scores
 91 | 
 92 |     def unclip(self, box):
 93 |         """
 94 |         Shrink or expand the boxaccording to 'unclip_ratio'
 95 |         :param box: The predicted box.
 96 |         :return: uncliped box
 97 |         """
 98 |         unclip_ratio = self.unclip_ratio
 99 |         poly = Polygon(box)
100 |         distance = poly.area * unclip_ratio / poly.length
101 |         offset = pyclipper.PyclipperOffset()
102 |         offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
103 |         expanded = np.array(offset.Execute(distance))
104 |         return expanded
105 | 
106 |     def get_mini_boxes(self, contour):
107 |         """
108 |         Get boxes from the contour or box.
109 |         :param contour: The predicted contour.
110 |         :return: The predicted box.
111 |         """
112 |         bounding_box = cv2.minAreaRect(contour)
113 |         points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
114 | 
115 |         index_1, index_2, index_3, index_4 = 0, 1, 2, 3
116 |         if points[1][1] > points[0][1]:
117 |             index_1 = 0
118 |             index_4 = 1
119 |         else:
120 |             index_1 = 1
121 |             index_4 = 0
122 |         if points[3][1] > points[2][1]:
123 |             index_2 = 2
124 |             index_3 = 3
125 |         else:
126 |             index_2 = 3
127 |             index_3 = 2
128 | 
129 |         box = [
130 |             points[index_1], points[index_2], points[index_3], points[index_4]
131 |         ]
132 |         return box, min(bounding_box[1])
133 | 
134 |     def box_score_fast(self, bitmap, _box):
135 |         """
136 |         Calculate the score of box.
137 |         :param bitmap: The binarized image predicted by DB.
138 |         :param _box: The predicted box
139 |         :return: score
140 |         """
141 |         h, w = bitmap.shape[:2]
142 |         box = _box.copy()
143 |         xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
144 |         xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
145 |         ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
146 |         ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
147 | 
148 |         mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
149 |         box[:, 0] = box[:, 0] - xmin
150 |         box[:, 1] = box[:, 1] - ymin
151 |         cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
152 |         return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
153 | 
154 |     def __call__(self, outs_dict, ratio_list):
155 |         pred = outs_dict['maps']
156 | 
157 |         pred = pred[:, 0, :, :]
158 |         segmentation = pred > self.thresh
159 |         boxes_batch = []
160 |         for batch_index in range(pred.shape[0]):
161 | 
162 |             mask = cv2.dilate(
163 |                 np.array(segmentation[batch_index]).astype(np.uint8),
164 |                 self.dilation_kernel)
165 |             tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index],
166 |                                                            mask)
167 | 
168 |             boxes = []
169 |             for k in range(len(tmp_boxes)):
170 |                 if tmp_scores[k] > self.box_thresh:
171 |                     boxes.append(tmp_boxes[k])
172 |             if len(boxes) > 0:
173 |                 boxes = np.array(boxes)
174 | 
175 |                 ratio_h, ratio_w = ratio_list[batch_index]
176 |                 boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
177 |                 boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
178 | 
179 |             boxes_batch.append(boxes)
180 |         return boxes_batch
181 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/east_postprocess.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import numpy as np
 20 | from .locality_aware_nms import nms_locality
 21 | import cv2
 22 | 
 23 | import os
 24 | import sys
 25 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 26 | sys.path.append(__dir__)
 27 | sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
 28 | 
 29 | 
 30 | class EASTPostPocess(object):
 31 |     """
 32 |     The post process for EAST.
 33 |     """
 34 | 
 35 |     def __init__(self, params):
 36 |         self.score_thresh = params['score_thresh']
 37 |         self.cover_thresh = params['cover_thresh']
 38 |         self.nms_thresh = params['nms_thresh']
 39 |         
 40 |         # c++ la-nms is faster, but only support python 3.5
 41 |         self.is_python35 = False
 42 |         if sys.version_info.major == 3 and sys.version_info.minor == 5:
 43 |             self.is_python35 = True
 44 | 
 45 |     def restore_rectangle_quad(self, origin, geometry):
 46 |         """
 47 |         Restore rectangle from quadrangle.
 48 |         """
 49 |         # quad
 50 |         origin_concat = np.concatenate(
 51 |             (origin, origin, origin, origin), axis=1)  # (n, 8)
 52 |         pred_quads = origin_concat - geometry
 53 |         pred_quads = pred_quads.reshape((-1, 4, 2))  # (n, 4, 2)
 54 |         return pred_quads
 55 | 
 56 |     def detect(self,
 57 |                score_map,
 58 |                geo_map,
 59 |                score_thresh=0.8,
 60 |                cover_thresh=0.1,
 61 |                nms_thresh=0.2):
 62 |         """
 63 |         restore text boxes from score map and geo map
 64 |         """
 65 |         score_map = score_map[0]
 66 |         geo_map = np.swapaxes(geo_map, 1, 0)
 67 |         geo_map = np.swapaxes(geo_map, 1, 2)
 68 |         # filter the score map
 69 |         xy_text = np.argwhere(score_map > score_thresh)
 70 |         if len(xy_text) == 0:
 71 |             return []
 72 |         # sort the text boxes via the y axis
 73 |         xy_text = xy_text[np.argsort(xy_text[:, 0])]
 74 |         #restore quad proposals
 75 |         text_box_restored = self.restore_rectangle_quad(
 76 |             xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
 77 |         boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
 78 |         boxes[:, :8] = text_box_restored.reshape((-1, 8))
 79 |         boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
 80 |         if self.is_python35:
 81 |             import lanms
 82 |             boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
 83 |         else:
 84 |             boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
 85 |         if boxes.shape[0] == 0:
 86 |             return []
 87 |         # Here we filter some low score boxes by the average score map, 
 88 |         #   this is different from the orginal paper.
 89 |         for i, box in enumerate(boxes):
 90 |             mask = np.zeros_like(score_map, dtype=np.uint8)
 91 |             cv2.fillPoly(mask, box[:8].reshape(
 92 |                 (-1, 4, 2)).astype(np.int32) // 4, 1)
 93 |             boxes[i, 8] = cv2.mean(score_map, mask)[0]
 94 |         boxes = boxes[boxes[:, 8] > cover_thresh]
 95 |         return boxes
 96 | 
 97 |     def sort_poly(self, p):
 98 |         """
 99 |         Sort polygons.
100 |         """
101 |         min_axis = np.argmin(np.sum(p, axis=1))
102 |         p = p[[min_axis, (min_axis + 1) % 4,\
103 |             (min_axis + 2) % 4, (min_axis + 3) % 4]]
104 |         if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
105 |             return p
106 |         else:
107 |             return p[[0, 3, 2, 1]]
108 | 
109 |     def __call__(self, outs_dict, ratio_list):
110 |         score_list = outs_dict['f_score']
111 |         geo_list = outs_dict['f_geo']
112 |         img_num = len(ratio_list)
113 |         dt_boxes_list = []
114 |         for ino in range(img_num):
115 |             score = score_list[ino]
116 |             geo = geo_list[ino]
117 |             boxes = self.detect(
118 |                 score_map=score,
119 |                 geo_map=geo,
120 |                 score_thresh=self.score_thresh,
121 |                 cover_thresh=self.cover_thresh,
122 |                 nms_thresh=self.nms_thresh)
123 |             boxes_norm = []
124 |             if len(boxes) > 0:
125 |                 ratio_h, ratio_w = ratio_list[ino]
126 |                 boxes = boxes[:, :8].reshape((-1, 4, 2))
127 |                 boxes[:, :, 0] /= ratio_w
128 |                 boxes[:, :, 1] /= ratio_h
129 |                 for i_box, box in enumerate(boxes):
130 |                     box = self.sort_poly(box.astype(np.int32))
131 |                     if np.linalg.norm(box[0] - box[1]) < 5 \
132 |                         or np.linalg.norm(box[3] - box[0]) < 5:
133 |                         continue
134 |                     boxes_norm.append(box)
135 |             dt_boxes_list.append(np.array(boxes_norm))
136 |         return dt_boxes_list
137 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/.gitignore:
--------------------------------------------------------------------------------
1 | adaptor.so
2 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright (C) 2014  Google Inc.
  4 | #
  5 | # This file is part of YouCompleteMe.
  6 | #
  7 | # YouCompleteMe is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # YouCompleteMe is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with YouCompleteMe.  If not, see <http://www.gnu.org/licenses/>.
 19 | 
 20 | import os
 21 | import sys
 22 | import glob
 23 | import ycm_core
 24 | 
 25 | # These are the compilation flags that will be used in case there's no
 26 | # compilation database set (by default, one is not set).
 27 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
 28 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 29 | 
 30 | 
 31 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 32 | 
 33 | from plumbum.cmd import python_config
 34 | 
 35 | 
 36 | flags = [
 37 |     '-Wall',
 38 |     '-Wextra',
 39 |     '-Wnon-virtual-dtor',
 40 |     '-Winvalid-pch',
 41 |     '-Wno-unused-local-typedefs',
 42 |     '-std=c++11',
 43 |     '-x', 'c++',
 44 |     '-Iinclude',
 45 | ] + python_config('--cflags').split()
 46 | 
 47 | 
 48 | # Set this to the absolute path to the folder (NOT the file!) containing the
 49 | # compile_commands.json file to use that instead of 'flags'. See here for
 50 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
 51 | #
 52 | # Most projects will NOT need to set this to anything; you can just change the
 53 | # 'flags' list of compilation flags.
 54 | compilation_database_folder = ''
 55 | 
 56 | if os.path.exists( compilation_database_folder ):
 57 |   database = ycm_core.CompilationDatabase( compilation_database_folder )
 58 | else:
 59 |   database = None
 60 | 
 61 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
 62 | 
 63 | def DirectoryOfThisScript():
 64 |   return os.path.dirname( os.path.abspath( __file__ ) )
 65 | 
 66 | 
 67 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
 68 |   if not working_directory:
 69 |     return list( flags )
 70 |   new_flags = []
 71 |   make_next_absolute = False
 72 |   path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
 73 |   for flag in flags:
 74 |     new_flag = flag
 75 | 
 76 |     if make_next_absolute:
 77 |       make_next_absolute = False
 78 |       if not flag.startswith( '/' ):
 79 |         new_flag = os.path.join( working_directory, flag )
 80 | 
 81 |     for path_flag in path_flags:
 82 |       if flag == path_flag:
 83 |         make_next_absolute = True
 84 |         break
 85 | 
 86 |       if flag.startswith( path_flag ):
 87 |         path = flag[ len( path_flag ): ]
 88 |         new_flag = path_flag + os.path.join( working_directory, path )
 89 |         break
 90 | 
 91 |     if new_flag:
 92 |       new_flags.append( new_flag )
 93 |   return new_flags
 94 | 
 95 | 
 96 | def IsHeaderFile( filename ):
 97 |   extension = os.path.splitext( filename )[ 1 ]
 98 |   return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
 99 | 
100 | 
101 | def GetCompilationInfoForFile( filename ):
102 |   # The compilation_commands.json file generated by CMake does not have entries
103 |   # for header files. So we do our best by asking the db for flags for a
104 |   # corresponding source file, if any. If one exists, the flags for that file
105 |   # should be good enough.
106 |   if IsHeaderFile( filename ):
107 |     basename = os.path.splitext( filename )[ 0 ]
108 |     for extension in SOURCE_EXTENSIONS:
109 |       replacement_file = basename + extension
110 |       if os.path.exists( replacement_file ):
111 |         compilation_info = database.GetCompilationInfoForFile(
112 |           replacement_file )
113 |         if compilation_info.compiler_flags_:
114 |           return compilation_info
115 |     return None
116 |   return database.GetCompilationInfoForFile( filename )
117 | 
118 | 
119 | # This is the entry point; this function is called by ycmd to produce flags for
120 | # a file.
121 | def FlagsForFile( filename, **kwargs ):
122 |   if database:
123 |     # Bear in mind that compilation_info.compiler_flags_ does NOT return a
124 |     # python list, but a "list-like" StringVec object
125 |     compilation_info = GetCompilationInfoForFile( filename )
126 |     if not compilation_info:
127 |       return None
128 | 
129 |     final_flags = MakeRelativePathsInFlagsAbsolute(
130 |       compilation_info.compiler_flags_,
131 |       compilation_info.compiler_working_dir_ )
132 |   else:
133 |     relative_to = DirectoryOfThisScript()
134 |     final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
135 | 
136 |   return {
137 |     'flags': final_flags,
138 |     'do_cache': True
139 |   }
140 | 
141 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/Makefile:
--------------------------------------------------------------------------------
 1 | CXXFLAGS = -I include  -std=c++11 -O3 $(shell python3-config --cflags)
 2 | LDFLAGS = $(shell python3-config --ldflags)
 3 | 
 4 | DEPS = lanms.h $(shell find include -xtype f)
 5 | CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
 6 | 
 7 | LIB_SO = adaptor.so
 8 | 
 9 | $(LIB_SO): $(CXX_SOURCES) $(DEPS)
10 | 	$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
11 | 
12 | clean:
13 | 	rm -rf $(LIB_SO)
14 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/__init__.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | import numpy as np
 4 | 
 5 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 6 | 
 7 | if subprocess.call(['make', '-C', BASE_DIR]) != 0:  # return value
 8 |     raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR))
 9 | 
10 | 
11 | def merge_quadrangle_n9(polys, thres=0.3, precision=10000):
12 |     from .adaptor import merge_quadrangle_n9 as nms_impl
13 |     if len(polys) == 0:
14 |         return np.array([], dtype='float32')
15 |     p = polys.copy()
16 |     p[:,:8] *= precision
17 |     ret = np.array(nms_impl(p, thres), dtype='float32')
18 |     ret[:,:8] /= precision
19 |     return ret
20 | 
21 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/__main__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | from . import merge_quadrangle_n9
 5 | 
 6 | if __name__ == '__main__':
 7 |     # unit square with confidence 1
 8 |     q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32')
 9 | 
10 |     print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
11 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/adaptor.cpp:
--------------------------------------------------------------------------------
 1 | #include "pybind11/pybind11.h"
 2 | #include "pybind11/numpy.h"
 3 | #include "pybind11/stl.h"
 4 | #include "pybind11/stl_bind.h"
 5 | 
 6 | #include "lanms.h"
 7 | 
 8 | namespace py = pybind11;
 9 | 
10 | 
11 | namespace lanms_adaptor {
12 | 
13 | 	std::vector<std::vector<float>> polys2floats(const std::vector<lanms::Polygon> &polys) {
14 | 		std::vector<std::vector<float>> ret;
15 | 		for (size_t i = 0; i < polys.size(); i ++) {
16 | 			auto &p = polys[i];
17 | 			auto &poly = p.poly;
18 | 			ret.emplace_back(std::vector<float>{
19 | 					float(poly[0].X), float(poly[0].Y),
20 | 					float(poly[1].X), float(poly[1].Y),
21 | 					float(poly[2].X), float(poly[2].Y),
22 | 					float(poly[3].X), float(poly[3].Y),
23 | 					float(p.score),
24 | 					});
25 | 		}
26 | 
27 | 		return ret;
28 | 	}
29 | 
30 | 
31 | 	/**
32 | 	 *
33 | 	 * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the
34 | 	 *		quadrangle, and the last one is the score
35 | 	 * \param iou_threshold two quadrangles with iou score above this threshold
36 | 	 *		will be merged
37 | 	 *
38 | 	 * \return an n-by-9 numpy array, the merged quadrangles
39 | 	 */
40 | 	std::vector<std::vector<float>> merge_quadrangle_n9(
41 | 			py::array_t<float, py::array::c_style | py::array::forcecast> quad_n9,
42 | 			float iou_threshold) {
43 | 		auto pbuf = quad_n9.request();
44 | 		if (pbuf.ndim != 2 || pbuf.shape[1] != 9)
45 | 			throw std::runtime_error("quadrangles must have a shape of (n, 9)");
46 | 		auto n = pbuf.shape[0];
47 | 		auto ptr = static_cast<float *>(pbuf.ptr);
48 | 		return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold));
49 | 	}
50 | 
51 | }
52 | 
53 | PYBIND11_PLUGIN(adaptor) {
54 | 	py::module m("adaptor", "NMS");
55 | 
56 | 	m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9,
57 | 			"merge quadrangels");
58 | 
59 | 	return m.ptr();
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/clipper/clipper.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/postprocess/lanms/include/clipper/clipper.cpp


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/buffer_info.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     pybind11/buffer_info.h: Python buffer object interface
  3 | 
  4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
  5 | 
  6 |     All rights reserved. Use of this source code is governed by a
  7 |     BSD-style license that can be found in the LICENSE file.
  8 | */
  9 | 
 10 | #pragma once
 11 | 
 12 | #include "common.h"
 13 | 
 14 | NAMESPACE_BEGIN(pybind11)
 15 | 
 16 | /// Information record describing a Python buffer object
 17 | struct buffer_info {
 18 |     void *ptr = nullptr;          // Pointer to the underlying storage
 19 |     ssize_t itemsize = 0;         // Size of individual items in bytes
 20 |     ssize_t size = 0;             // Total number of entries
 21 |     std::string format;           // For homogeneous buffers, this should be set to format_descriptor<T>::format()
 22 |     ssize_t ndim = 0;             // Number of dimensions
 23 |     std::vector<ssize_t> shape;   // Shape of the tensor (1 entry per dimension)
 24 |     std::vector<ssize_t> strides; // Number of entries between adjacent entries (for each per dimension)
 25 | 
 26 |     buffer_info() { }
 27 | 
 28 |     buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
 29 |                 detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in)
 30 |     : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
 31 |       shape(std::move(shape_in)), strides(std::move(strides_in)) {
 32 |         if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size())
 33 |             pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
 34 |         for (size_t i = 0; i < (size_t) ndim; ++i)
 35 |             size *= shape[i];
 36 |     }
 37 | 
 38 |     template <typename T>
 39 |     buffer_info(T *ptr, detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in)
 40 |     : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor<T>::format(), static_cast<ssize_t>(shape_in->size()), std::move(shape_in), std::move(strides_in)) { }
 41 | 
 42 |     buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size)
 43 |     : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { }
 44 | 
 45 |     template <typename T>
 46 |     buffer_info(T *ptr, ssize_t size)
 47 |     : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size) { }
 48 | 
 49 |     explicit buffer_info(Py_buffer *view, bool ownview = true)
 50 |     : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
 51 |             {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) {
 52 |         this->view = view;
 53 |         this->ownview = ownview;
 54 |     }
 55 | 
 56 |     buffer_info(const buffer_info &) = delete;
 57 |     buffer_info& operator=(const buffer_info &) = delete;
 58 | 
 59 |     buffer_info(buffer_info &&other) {
 60 |         (*this) = std::move(other);
 61 |     }
 62 | 
 63 |     buffer_info& operator=(buffer_info &&rhs) {
 64 |         ptr = rhs.ptr;
 65 |         itemsize = rhs.itemsize;
 66 |         size = rhs.size;
 67 |         format = std::move(rhs.format);
 68 |         ndim = rhs.ndim;
 69 |         shape = std::move(rhs.shape);
 70 |         strides = std::move(rhs.strides);
 71 |         std::swap(view, rhs.view);
 72 |         std::swap(ownview, rhs.ownview);
 73 |         return *this;
 74 |     }
 75 | 
 76 |     ~buffer_info() {
 77 |         if (view && ownview) { PyBuffer_Release(view); delete view; }
 78 |     }
 79 | 
 80 | private:
 81 |     struct private_ctr_tag { };
 82 | 
 83 |     buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
 84 |                 detail::any_container<ssize_t> &&shape_in, detail::any_container<ssize_t> &&strides_in)
 85 |     : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { }
 86 | 
 87 |     Py_buffer *view = nullptr;
 88 |     bool ownview = false;
 89 | };
 90 | 
 91 | NAMESPACE_BEGIN(detail)
 92 | 
 93 | template <typename T, typename SFINAE = void> struct compare_buffer_info {
 94 |     static bool compare(const buffer_info& b) {
 95 |         return b.format == format_descriptor<T>::format() && b.itemsize == (ssize_t) sizeof(T);
 96 |     }
 97 | };
 98 | 
 99 | template <typename T> struct compare_buffer_info<T, detail::enable_if_t<std::is_integral<T>::value>> {
100 |     static bool compare(const buffer_info& b) {
101 |         return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor<T>::value ||
102 |             ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned<T>::value ? "L" : "l")) ||
103 |             ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned<T>::value ? "N" : "n")));
104 |     }
105 | };
106 | 
107 | NAMESPACE_END(detail)
108 | NAMESPACE_END(pybind11)
109 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/complex.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/complex.h: Complex number support
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include "pybind11.h"
13 | #include <complex>
14 | 
15 | /// glibc defines I as a macro which breaks things, e.g., boost template names
16 | #ifdef I
17 | #  undef I
18 | #endif
19 | 
20 | NAMESPACE_BEGIN(pybind11)
21 | 
22 | template <typename T> struct format_descriptor<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
23 |     static constexpr const char c = format_descriptor<T>::c;
24 |     static constexpr const char value[3] = { 'Z', c, '\0' };
25 |     static std::string format() { return std::string(value); }
26 | };
27 | 
28 | template <typename T> constexpr const char format_descriptor<
29 |     std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>>::value[3];
30 | 
31 | NAMESPACE_BEGIN(detail)
32 | 
33 | template <typename T> struct is_fmt_numeric<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
34 |     static constexpr bool value = true;
35 |     static constexpr int index = is_fmt_numeric<T>::index + 3;
36 | };
37 | 
38 | template <typename T> class type_caster<std::complex<T>> {
39 | public:
40 |     bool load(handle src, bool convert) {
41 |         if (!src)
42 |             return false;
43 |         if (!convert && !PyComplex_Check(src.ptr()))
44 |             return false;
45 |         Py_complex result = PyComplex_AsCComplex(src.ptr());
46 |         if (result.real == -1.0 && PyErr_Occurred()) {
47 |             PyErr_Clear();
48 |             return false;
49 |         }
50 |         value = std::complex<T>((T) result.real, (T) result.imag);
51 |         return true;
52 |     }
53 | 
54 |     static handle cast(const std::complex<T> &src, return_value_policy /* policy */, handle /* parent */) {
55 |         return PyComplex_FromDoubles((double) src.real(), (double) src.imag());
56 |     }
57 | 
58 |     PYBIND11_TYPE_CASTER(std::complex<T>, _("complex"));
59 | };
60 | NAMESPACE_END(detail)
61 | NAMESPACE_END(pybind11)
62 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/eval.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     pybind11/exec.h: Support for evaluating Python expressions and statements
  3 |     from strings and files
  4 | 
  5 |     Copyright (c) 2016 Klemens Morgenstern <klemens.morgenstern@ed-chemnitz.de> and
  6 |                        Wenzel Jakob <wenzel.jakob@epfl.ch>
  7 | 
  8 |     All rights reserved. Use of this source code is governed by a
  9 |     BSD-style license that can be found in the LICENSE file.
 10 | */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "pybind11.h"
 15 | 
 16 | NAMESPACE_BEGIN(pybind11)
 17 | 
 18 | enum eval_mode {
 19 |     /// Evaluate a string containing an isolated expression
 20 |     eval_expr,
 21 | 
 22 |     /// Evaluate a string containing a single statement. Returns \c none
 23 |     eval_single_statement,
 24 | 
 25 |     /// Evaluate a string containing a sequence of statement. Returns \c none
 26 |     eval_statements
 27 | };
 28 | 
 29 | template <eval_mode mode = eval_expr>
 30 | object eval(str expr, object global = globals(), object local = object()) {
 31 |     if (!local)
 32 |         local = global;
 33 | 
 34 |     /* PyRun_String does not accept a PyObject / encoding specifier,
 35 |        this seems to be the only alternative */
 36 |     std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr;
 37 | 
 38 |     int start;
 39 |     switch (mode) {
 40 |         case eval_expr:             start = Py_eval_input;   break;
 41 |         case eval_single_statement: start = Py_single_input; break;
 42 |         case eval_statements:       start = Py_file_input;   break;
 43 |         default: pybind11_fail("invalid evaluation mode");
 44 |     }
 45 | 
 46 |     PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr());
 47 |     if (!result)
 48 |         throw error_already_set();
 49 |     return reinterpret_steal<object>(result);
 50 | }
 51 | 
 52 | template <eval_mode mode = eval_expr, size_t N>
 53 | object eval(const char (&s)[N], object global = globals(), object local = object()) {
 54 |     /* Support raw string literals by removing common leading whitespace */
 55 |     auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s))
 56 |                                : str(s);
 57 |     return eval<mode>(expr, global, local);
 58 | }
 59 | 
 60 | inline void exec(str expr, object global = globals(), object local = object()) {
 61 |     eval<eval_statements>(expr, global, local);
 62 | }
 63 | 
 64 | template <size_t N>
 65 | void exec(const char (&s)[N], object global = globals(), object local = object()) {
 66 |     eval<eval_statements>(s, global, local);
 67 | }
 68 | 
 69 | template <eval_mode mode = eval_statements>
 70 | object eval_file(str fname, object global = globals(), object local = object()) {
 71 |     if (!local)
 72 |         local = global;
 73 | 
 74 |     int start;
 75 |     switch (mode) {
 76 |         case eval_expr:             start = Py_eval_input;   break;
 77 |         case eval_single_statement: start = Py_single_input; break;
 78 |         case eval_statements:       start = Py_file_input;   break;
 79 |         default: pybind11_fail("invalid evaluation mode");
 80 |     }
 81 | 
 82 |     int closeFile = 1;
 83 |     std::string fname_str = (std::string) fname;
 84 | #if PY_VERSION_HEX >= 0x03040000
 85 |     FILE *f = _Py_fopen_obj(fname.ptr(), "r");
 86 | #elif PY_VERSION_HEX >= 0x03000000
 87 |     FILE *f = _Py_fopen(fname.ptr(), "r");
 88 | #else
 89 |     /* No unicode support in open() :( */
 90 |     auto fobj = reinterpret_steal<object>(PyFile_FromString(
 91 |         const_cast<char *>(fname_str.c_str()),
 92 |         const_cast<char*>("r")));
 93 |     FILE *f = nullptr;
 94 |     if (fobj)
 95 |         f = PyFile_AsFile(fobj.ptr());
 96 |     closeFile = 0;
 97 | #endif
 98 |     if (!f) {
 99 |         PyErr_Clear();
100 |         pybind11_fail("File \"" + fname_str + "\" could not be opened!");
101 |     }
102 | 
103 | #if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION)
104 |     PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(),
105 |                                   local.ptr());
106 |     (void) closeFile;
107 | #else
108 |     PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(),
109 |                                     local.ptr(), closeFile);
110 | #endif
111 | 
112 |     if (!result)
113 |         throw error_already_set();
114 |     return reinterpret_steal<object>(result);
115 | }
116 | 
117 | NAMESPACE_END(pybind11)
118 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/functional.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/functional.h: std::function<> support
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include "pybind11.h"
13 | #include <functional>
14 | 
15 | NAMESPACE_BEGIN(pybind11)
16 | NAMESPACE_BEGIN(detail)
17 | 
18 | template <typename Return, typename... Args>
19 | struct type_caster<std::function<Return(Args...)>> {
20 |     using type = std::function<Return(Args...)>;
21 |     using retval_type = conditional_t<std::is_same<Return, void>::value, void_type, Return>;
22 |     using function_type = Return (*) (Args...);
23 | 
24 | public:
25 |     bool load(handle src, bool convert) {
26 |         if (src.is_none()) {
27 |             // Defer accepting None to other overloads (if we aren't in convert mode):
28 |             if (!convert) return false;
29 |             return true;
30 |         }
31 | 
32 |         if (!isinstance<function>(src))
33 |             return false;
34 | 
35 |         auto func = reinterpret_borrow<function>(src);
36 | 
37 |         /*
38 |            When passing a C++ function as an argument to another C++
39 |            function via Python, every function call would normally involve
40 |            a full C++ -> Python -> C++ roundtrip, which can be prohibitive.
41 |            Here, we try to at least detect the case where the function is
42 |            stateless (i.e. function pointer or lambda function without
43 |            captured variables), in which case the roundtrip can be avoided.
44 |          */
45 |         if (auto cfunc = func.cpp_function()) {
46 |             auto c = reinterpret_borrow<capsule>(PyCFunction_GET_SELF(cfunc.ptr()));
47 |             auto rec = (function_record *) c;
48 | 
49 |             if (rec && rec->is_stateless &&
50 |                     same_type(typeid(function_type), *reinterpret_cast<const std::type_info *>(rec->data[1]))) {
51 |                 struct capture { function_type f; };
52 |                 value = ((capture *) &rec->data)->f;
53 |                 return true;
54 |             }
55 |         }
56 | 
57 |         value = [func](Args... args) -> Return {
58 |             gil_scoped_acquire acq;
59 |             object retval(func(std::forward<Args>(args)...));
60 |             /* Visual studio 2015 parser issue: need parentheses around this expression */
61 |             return (retval.template cast<Return>());
62 |         };
63 |         return true;
64 |     }
65 | 
66 |     template <typename Func>
67 |     static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) {
68 |         if (!f_)
69 |             return none().inc_ref();
70 | 
71 |         auto result = f_.template target<function_type>();
72 |         if (result)
73 |             return cpp_function(*result, policy).release();
74 |         else
75 |             return cpp_function(std::forward<Func>(f_), policy).release();
76 |     }
77 | 
78 |     PYBIND11_TYPE_CASTER(type, _("Callable[[") +
79 |             argument_loader<Args...>::arg_names() + _("], ") +
80 |             make_caster<retval_type>::name() +
81 |             _("]"));
82 | };
83 | 
84 | NAMESPACE_END(detail)
85 | NAMESPACE_END(pybind11)
86 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/options.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/options.h: global settings that are configurable at runtime.
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include "common.h"
13 | 
14 | NAMESPACE_BEGIN(pybind11)
15 | 
16 | class options {
17 | public:
18 | 
19 |     // Default RAII constructor, which leaves settings as they currently are.
20 |     options() : previous_state(global_state()) {}
21 | 
22 |     // Class is non-copyable.
23 |     options(const options&) = delete;
24 |     options& operator=(const options&) = delete;
25 | 
26 |     // Destructor, which restores settings that were in effect before.
27 |     ~options() {
28 |         global_state() = previous_state;
29 |     }
30 | 
31 |     // Setter methods (affect the global state):
32 | 
33 |     options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; }
34 | 
35 |     options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; }
36 | 
37 |     options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; }
38 | 
39 |     options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; }
40 | 
41 |     // Getter methods (return the global state):
42 | 
43 |     static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; }
44 | 
45 |     static bool show_function_signatures() { return global_state().show_function_signatures; }
46 | 
47 |     // This type is not meant to be allocated on the heap.
48 |     void* operator new(size_t) = delete;
49 | 
50 | private:
51 | 
52 |     struct state {
53 |         bool show_user_defined_docstrings = true;  //< Include user-supplied texts in docstrings.
54 |         bool show_function_signatures = true;      //< Include auto-generated function signatures in docstrings.
55 |     };
56 | 
57 |     static state &global_state() {
58 |         static state instance;
59 |         return instance;
60 |     }
61 | 
62 |     state previous_state;
63 | };
64 | 
65 | NAMESPACE_END(pybind11)
66 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/typeid.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/typeid.h: Compiler-independent access to type identifiers
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include <cstdio>
13 | #include <cstdlib>
14 | 
15 | #if defined(__GNUG__)
16 | #include <cxxabi.h>
17 | #endif
18 | 
19 | NAMESPACE_BEGIN(pybind11)
20 | NAMESPACE_BEGIN(detail)
21 | /// Erase all occurrences of a substring
22 | inline void erase_all(std::string &string, const std::string &search) {
23 |     for (size_t pos = 0;;) {
24 |         pos = string.find(search, pos);
25 |         if (pos == std::string::npos) break;
26 |         string.erase(pos, search.length());
27 |     }
28 | }
29 | 
30 | PYBIND11_NOINLINE inline void clean_type_id(std::string &name) {
31 | #if defined(__GNUG__)
32 |     int status = 0;
33 |     std::unique_ptr<char, void (*)(void *)> res {
34 |         abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free };
35 |     if (status == 0)
36 |         name = res.get();
37 | #else
38 |     detail::erase_all(name, "class ");
39 |     detail::erase_all(name, "struct ");
40 |     detail::erase_all(name, "enum ");
41 | #endif
42 |     detail::erase_all(name, "pybind11::");
43 | }
44 | NAMESPACE_END(detail)
45 | 
46 | /// Return a string representation of a C++ type
47 | template <typename T> static std::string type_id() {
48 |     std::string name(typeid(T).name());
49 |     detail::clean_type_id(name);
50 |     return name;
51 | }
52 | 
53 | NAMESPACE_END(pybind11)
54 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/lanms.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "clipper/clipper.hpp"
  4 | 
  5 | // locality-aware NMS
  6 | namespace lanms {
  7 | 
  8 | 	namespace cl = ClipperLib;
  9 | 
 10 | 	struct Polygon {
 11 | 		cl::Path poly;
 12 | 		float score;
 13 | 	};
 14 | 
 15 | 	float paths_area(const ClipperLib::Paths &ps) {
 16 | 		float area = 0;
 17 | 		for (auto &&p: ps)
 18 | 			area += cl::Area(p);
 19 | 		return area;
 20 | 	}
 21 | 
 22 | 	float poly_iou(const Polygon &a, const Polygon &b) {
 23 | 		cl::Clipper clpr;
 24 | 		clpr.AddPath(a.poly, cl::ptSubject, true);
 25 | 		clpr.AddPath(b.poly, cl::ptClip, true);
 26 | 
 27 | 		cl::Paths inter, uni;
 28 | 		clpr.Execute(cl::ctIntersection, inter, cl::pftEvenOdd);
 29 | 		clpr.Execute(cl::ctUnion, uni, cl::pftEvenOdd);
 30 | 
 31 | 		auto inter_area = paths_area(inter),
 32 | 			 uni_area = paths_area(uni);
 33 | 		return std::abs(inter_area) / std::max(std::abs(uni_area), 1.0f);
 34 | 	}
 35 | 
 36 | 	bool should_merge(const Polygon &a, const Polygon &b, float iou_threshold) {
 37 | 		return poly_iou(a, b) > iou_threshold;
 38 | 	}
 39 | 
 40 | 	/**
 41 | 	 * Incrementally merge polygons
 42 | 	 */
 43 | 	class PolyMerger {
 44 | 		public:
 45 | 			PolyMerger(): score(0), nr_polys(0) {
 46 | 				memset(data, 0, sizeof(data));
 47 | 			}
 48 | 
 49 | 			/**
 50 | 			 * Add a new polygon to be merged.
 51 | 			 */
 52 | 			void add(const Polygon &p_given) {
 53 | 				Polygon p;
 54 | 				if (nr_polys > 0) {
 55 | 					// vertices of two polygons to merge may not in the same order;
 56 | 					// we match their vertices by choosing the ordering that
 57 | 					// minimizes the total squared distance.
 58 | 					// see function normalize_poly for details.
 59 | 					p = normalize_poly(get(), p_given);
 60 | 				} else {
 61 | 					p = p_given;
 62 | 				}
 63 | 				assert(p.poly.size() == 4);
 64 | 				auto &poly = p.poly;
 65 | 				auto s = p.score;
 66 | 				data[0] += poly[0].X * s;
 67 | 				data[1] += poly[0].Y * s;
 68 | 
 69 | 				data[2] += poly[1].X * s;
 70 | 				data[3] += poly[1].Y * s;
 71 | 
 72 | 				data[4] += poly[2].X * s;
 73 | 				data[5] += poly[2].Y * s;
 74 | 
 75 | 				data[6] += poly[3].X * s;
 76 | 				data[7] += poly[3].Y * s;
 77 | 
 78 | 				score += p.score;
 79 | 
 80 | 				nr_polys += 1;
 81 | 			}
 82 | 
 83 | 			inline std::int64_t sqr(std::int64_t x) { return x * x; }
 84 | 
 85 | 			Polygon normalize_poly(
 86 | 					const Polygon &ref,
 87 | 					const Polygon &p) {
 88 | 
 89 | 				std::int64_t min_d = std::numeric_limits<std::int64_t>::max();
 90 | 				size_t best_start = 0, best_order = 0;
 91 | 
 92 | 				for (size_t start = 0; start < 4; start ++) {
 93 | 					size_t j = start;
 94 | 					std::int64_t d = (
 95 | 							sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 0) % 4].X)
 96 | 							+ sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 0) % 4].Y)
 97 | 							+ sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 1) % 4].X)
 98 | 							+ sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 1) % 4].Y)
 99 | 							+ sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 2) % 4].X)
100 | 							+ sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 2) % 4].Y)
101 | 							+ sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 3) % 4].X)
102 | 							+ sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 3) % 4].Y)
103 | 							);
104 | 					if (d < min_d) {
105 | 						min_d = d;
106 | 						best_start = start;
107 | 						best_order = 0;
108 | 					}
109 | 
110 | 					d = (
111 | 							sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 3) % 4].X)
112 | 							+ sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 3) % 4].Y)
113 | 							+ sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 2) % 4].X)
114 | 							+ sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 2) % 4].Y)
115 | 							+ sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 1) % 4].X)
116 | 							+ sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 1) % 4].Y)
117 | 							+ sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 0) % 4].X)
118 | 							+ sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 0) % 4].Y)
119 | 						);
120 | 					if (d < min_d) {
121 | 						min_d = d;
122 | 						best_start = start;
123 | 						best_order = 1;
124 | 					}
125 | 				}
126 | 
127 | 				Polygon r;
128 | 				r.poly.resize(4);
129 | 				auto j = best_start;
130 | 				if (best_order == 0) {
131 | 					for (size_t i = 0; i < 4; i ++)
132 | 						r.poly[i] = p.poly[(j + i) % 4];
133 | 				} else {
134 | 					for (size_t i = 0; i < 4; i ++)
135 | 						r.poly[i] = p.poly[(j + 4 - i - 1) % 4];
136 | 				}
137 | 				r.score = p.score;
138 | 				return r;
139 | 			}
140 | 
141 | 			Polygon get() const {
142 | 				Polygon p;
143 | 
144 | 				auto &poly = p.poly;
145 | 				poly.resize(4);
146 | 				auto score_inv = 1.0f / std::max(1e-8f, score);
147 | 				poly[0].X = data[0] * score_inv;
148 | 				poly[0].Y = data[1] * score_inv;
149 | 				poly[1].X = data[2] * score_inv;
150 | 				poly[1].Y = data[3] * score_inv;
151 | 				poly[2].X = data[4] * score_inv;
152 | 				poly[2].Y = data[5] * score_inv;
153 | 				poly[3].X = data[6] * score_inv;
154 | 				poly[3].Y = data[7] * score_inv;
155 | 
156 | 				assert(score > 0);
157 | 				p.score = score;
158 | 
159 | 				return p;
160 | 			}
161 | 
162 | 		private:
163 | 			std::int64_t data[8];
164 | 			float score;
165 | 			std::int32_t nr_polys;
166 | 	};
167 | 
168 | 
169 | 	/**
170 | 	 * The standard NMS algorithm.
171 | 	 */
172 | 	std::vector<Polygon> standard_nms(std::vector<Polygon> &polys, float iou_threshold) {
173 | 		size_t n = polys.size();
174 | 		if (n == 0)
175 | 			return {};
176 | 		std::vector<size_t> indices(n);
177 | 		std::iota(std::begin(indices), std::end(indices), 0);
178 | 		std::sort(std::begin(indices), std::end(indices), [&](size_t i, size_t j) { return polys[i].score > polys[j].score; });
179 | 
180 | 		std::vector<size_t> keep;
181 | 		while (indices.size()) {
182 | 			size_t p = 0, cur = indices[0];
183 | 			keep.emplace_back(cur);
184 | 			for (size_t i = 1; i < indices.size(); i ++) {
185 | 				if (!should_merge(polys[cur], polys[indices[i]], iou_threshold)) {
186 | 					indices[p ++] = indices[i];
187 | 				}
188 | 			}
189 | 			indices.resize(p);
190 | 		}
191 | 
192 | 		std::vector<Polygon> ret;
193 | 		for (auto &&i: keep) {
194 | 			ret.emplace_back(polys[i]);
195 | 		}
196 | 		return ret;
197 | 	}
198 | 
199 | 	std::vector<Polygon>
200 | 		merge_quadrangle_n9(const float *data, size_t n, float iou_threshold) {
201 | 			using cInt = cl::cInt;
202 | 
203 | 			// first pass
204 | 			std::vector<Polygon> polys;
205 | 			for (size_t i = 0; i < n; i ++) {
206 | 				auto p = data + i * 9;
207 | 				Polygon poly{
208 | 					{
209 | 						{cInt(p[0]), cInt(p[1])},
210 | 						{cInt(p[2]), cInt(p[3])},
211 | 						{cInt(p[4]), cInt(p[5])},
212 | 						{cInt(p[6]), cInt(p[7])},
213 | 					},
214 | 					p[8],
215 | 				};
216 | 
217 | 				if (polys.size()) {
218 | 					// merge with the last one
219 | 					auto &bpoly = polys.back();
220 | 					if (should_merge(poly, bpoly, iou_threshold)) {
221 | 						PolyMerger merger;
222 | 						merger.add(bpoly);
223 | 						merger.add(poly);
224 | 						bpoly = merger.get();
225 | 					} else {
226 | 						polys.emplace_back(poly);
227 | 					}
228 | 				} else {
229 | 					polys.emplace_back(poly);
230 | 				}
231 | 			}
232 | 			return standard_nms(polys, iou_threshold);
233 | 		}
234 | }
235 | 


--------------------------------------------------------------------------------
/ppocr/postprocess/locality_aware_nms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Locality aware nms.
  3 | """
  4 | 
  5 | import numpy as np
  6 | from shapely.geometry import Polygon
  7 | 
  8 | 
  9 | def intersection(g, p):
 10 |     """
 11 |     Intersection.
 12 |     """
 13 |     g = Polygon(g[:8].reshape((4, 2)))
 14 |     p = Polygon(p[:8].reshape((4, 2)))
 15 |     g = g.buffer(0)
 16 |     p = p.buffer(0)
 17 |     if not g.is_valid or not p.is_valid:
 18 |         return 0
 19 |     inter = Polygon(g).intersection(Polygon(p)).area
 20 |     union = g.area + p.area - inter
 21 |     if union == 0:
 22 |         return 0
 23 |     else:
 24 |         return inter / union
 25 | 
 26 | 
 27 | def intersection_iog(g, p):
 28 |     """
 29 |     Intersection_iog.
 30 |     """
 31 |     g = Polygon(g[:8].reshape((4, 2)))
 32 |     p = Polygon(p[:8].reshape((4, 2)))
 33 |     if not g.is_valid or not p.is_valid:
 34 |         return 0
 35 |     inter = Polygon(g).intersection(Polygon(p)).area
 36 |     #union = g.area + p.area - inter
 37 |     union = p.area
 38 |     if union == 0:
 39 |         print("p_area is very small")
 40 |         return 0
 41 |     else:
 42 |         return inter / union
 43 | 
 44 | 
 45 | def weighted_merge(g, p):
 46 |     """
 47 |     Weighted merge.
 48 |     """
 49 |     g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
 50 |     g[8] = (g[8] + p[8])
 51 |     return g
 52 | 
 53 | 
 54 | def standard_nms(S, thres):
 55 |     """
 56 |     Standard nms.
 57 |     """
 58 |     order = np.argsort(S[:, 8])[::-1]
 59 |     keep = []
 60 |     while order.size > 0:
 61 |         i = order[0]
 62 |         keep.append(i)
 63 |         ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
 64 | 
 65 |         inds = np.where(ovr <= thres)[0]
 66 |         order = order[inds + 1]
 67 | 
 68 |     return S[keep]
 69 | 
 70 | 
 71 | def standard_nms_inds(S, thres):
 72 |     """
 73 |     Standard nms, retun inds.
 74 |     """
 75 |     order = np.argsort(S[:, 8])[::-1]
 76 |     keep = []
 77 |     while order.size > 0:
 78 |         i = order[0]
 79 |         keep.append(i)
 80 |         ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
 81 | 
 82 |         inds = np.where(ovr <= thres)[0]
 83 |         order = order[inds + 1]
 84 | 
 85 |     return keep
 86 | 
 87 | 
 88 | def nms(S, thres):
 89 |     """
 90 |     nms.
 91 |     """
 92 |     order = np.argsort(S[:, 8])[::-1]
 93 |     keep = []
 94 |     while order.size > 0:
 95 |         i = order[0]
 96 |         keep.append(i)
 97 |         ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
 98 | 
 99 |         inds = np.where(ovr <= thres)[0]
100 |         order = order[inds + 1]
101 | 
102 |     return keep
103 | 
104 | 
105 | def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
106 |     """
107 |     soft_nms
108 |     :para boxes_in, N x 9 (coords + score)
109 |     :para threshould, eliminate cases min score(0.001)
110 |     :para Nt_thres, iou_threshi
111 |     :para sigma, gaussian weght
112 |     :method, linear or gaussian
113 |     """
114 |     boxes = boxes_in.copy()
115 |     N = boxes.shape[0]
116 |     if N is None or N < 1:
117 |         return np.array([])
118 |     pos, maxpos = 0, 0
119 |     weight = 0.0
120 |     inds = np.arange(N)
121 |     tbox, sbox = boxes[0].copy(), boxes[0].copy()
122 |     for i in range(N):
123 |         maxscore = boxes[i, 8]
124 |         maxpos = i
125 |         tbox = boxes[i].copy()
126 |         ti = inds[i]
127 |         pos = i + 1
128 |         #get max box
129 |         while pos < N:
130 |             if maxscore < boxes[pos, 8]:
131 |                 maxscore = boxes[pos, 8]
132 |                 maxpos = pos
133 |             pos = pos + 1
134 |         #add max box as a detection
135 |         boxes[i, :] = boxes[maxpos, :]
136 |         inds[i] = inds[maxpos]
137 |         #swap
138 |         boxes[maxpos, :] = tbox
139 |         inds[maxpos] = ti
140 |         tbox = boxes[i].copy()
141 |         pos = i + 1
142 |         #NMS iteration
143 |         while pos < N:
144 |             sbox = boxes[pos].copy()
145 |             ts_iou_val = intersection(tbox, sbox)
146 |             if ts_iou_val > 0:
147 |                 if method == 1:
148 |                     if ts_iou_val > Nt_thres:
149 |                         weight = 1 - ts_iou_val
150 |                     else:
151 |                         weight = 1
152 |                 elif method == 2:
153 |                     weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
154 |                 else:
155 |                     if ts_iou_val > Nt_thres:
156 |                         weight = 0
157 |                     else:
158 |                         weight = 1
159 |                 boxes[pos, 8] = weight * boxes[pos, 8]
160 |                 #if box score falls below thresold, discard the box by
161 |                 #swaping last box update N
162 |                 if boxes[pos, 8] < threshold:
163 |                     boxes[pos, :] = boxes[N - 1, :]
164 |                     inds[pos] = inds[N - 1]
165 |                     N = N - 1
166 |                     pos = pos - 1
167 |             pos = pos + 1
168 | 
169 |     return boxes[:N]
170 | 
171 | 
172 | def nms_locality(polys, thres=0.3):
173 |     """
174 |     locality aware nms of EAST
175 |     :param polys: a N*9 numpy array. first 8 coordinates, then prob
176 |     :return: boxes after nms
177 |     """
178 |     S = []
179 |     p = None
180 |     for g in polys:
181 |         if p is not None and intersection(g, p) > thres:
182 |             p = weighted_merge(g, p)
183 |         else:
184 |             if p is not None:
185 |                 S.append(p)
186 |             p = g
187 |     if p is not None:
188 |         S.append(p)
189 | 
190 |     if len(S) == 0:
191 |         return np.array([])
192 |     return standard_nms(np.array(S), thres)
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     # 343,350,448,135,474,143,369,359
197 |     print(
198 |         Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
199 |         .area)
200 | 


--------------------------------------------------------------------------------
/ppocr/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ppocr/utils/check.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 | 
20 | import sys
21 | 
22 | import paddle.fluid as fluid
23 | 
24 | import logging
25 | logger = logging.getLogger(__name__)
26 | 
27 | 
28 | def check_config_params(config, config_name, params):
29 |     for param in params:
30 |         if param not in config:
31 |             err = "param %s didn't find in %s!" % (param, config_name)
32 |             assert False, err
33 |     return
34 | 


--------------------------------------------------------------------------------
/ppocr/utils/corpus/readme.md:
--------------------------------------------------------------------------------
1 | # Waiting for your contribution
2 | 
3 | PaddleOCR welcomes you to provide multilingual corpus for us to synthesize more data to optimize the model.
4 | 
5 | If you are interested, you can submit the corpus text to this directory and name it with {language}_corpus.txt.
6 | PaddleOCR thanks for your contribution.


--------------------------------------------------------------------------------
/ppocr/utils/corpus/readme_ch.md:
--------------------------------------------------------------------------------
1 | # 欢迎贡献语料
2 | 
3 | PaddleOCR非常欢迎你提供多语言的语料，以供我们合成更多数据来优化模型。
4 | 
5 | 如你感兴趣，可将语料文本提交到此目录，并以 {语言}_corpus.txt 命名，PaddleOCR团队感谢你的贡献。
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/ppocr/utils/dict/french_dict.txt:
--------------------------------------------------------------------------------
  1 | !
  2 | "
  3 | %
  4 | &
  5 | '
  6 | (
  7 | )
  8 | +
  9 | ,
 10 | -
 11 | .
 12 | /
 13 | 0
 14 | 1
 15 | 2
 16 | 3
 17 | 4
 18 | 5
 19 | 6
 20 | 7
 21 | 8
 22 | 9
 23 | :
 24 | ;
 25 | ?
 26 | A
 27 | B
 28 | C
 29 | D
 30 | E
 31 | F
 32 | G
 33 | H
 34 | I
 35 | J
 36 | K
 37 | L
 38 | M
 39 | N
 40 | O
 41 | P
 42 | Q
 43 | R
 44 | S
 45 | T
 46 | U
 47 | V
 48 | W
 49 | X
 50 | Y
 51 | Z
 52 | [
 53 | ]
 54 | a
 55 | b
 56 | c
 57 | d
 58 | e
 59 | f
 60 | g
 61 | h
 62 | i
 63 | j
 64 | k
 65 | l
 66 | m
 67 | n
 68 | o
 69 | p
 70 | q
 71 | r
 72 | s
 73 | t
 74 | u
 75 | v
 76 | w
 77 | x
 78 | y
 79 | z
 80 | «
 81 | ³
 82 | µ
 83 | º
 84 | »
 85 | À
 86 | Á
 87 | Â
 88 | Å
 89 | É
 90 | Ê
 91 | Î
 92 | Ö
 93 | ß
 94 | à
 95 | á
 96 | â
 97 | ä
 98 | å
 99 | æ
100 | ç
101 | è
102 | é
103 | ê
104 | ë
105 | í
106 | î
107 | ï
108 | ñ
109 | ò
110 | ó
111 | ô
112 | ö
113 | ø
114 | ù
115 | ú
116 | û
117 | ü
118 |  
119 | 


--------------------------------------------------------------------------------
/ppocr/utils/dict/german_dict.txt:
--------------------------------------------------------------------------------
  1 | !
  2 | "
  3 | $
  4 | %
  5 | &
  6 | '
  7 | (
  8 | )
  9 | +
 10 | ,
 11 | -
 12 | .
 13 | /
 14 | 0
 15 | 1
 16 | 2
 17 | 3
 18 | 4
 19 | 5
 20 | 6
 21 | 7
 22 | 8
 23 | 9
 24 | :
 25 | ;
 26 | >
 27 | ?
 28 | A
 29 | B
 30 | C
 31 | D
 32 | E
 33 | F
 34 | G
 35 | H
 36 | I
 37 | J
 38 | K
 39 | L
 40 | M
 41 | N
 42 | O
 43 | P
 44 | Q
 45 | R
 46 | S
 47 | T
 48 | U
 49 | V
 50 | W
 51 | X
 52 | Y
 53 | Z
 54 | [
 55 | ]
 56 | a
 57 | b
 58 | c
 59 | d
 60 | e
 61 | f
 62 | g
 63 | h
 64 | i
 65 | j
 66 | k
 67 | l
 68 | m
 69 | n
 70 | o
 71 | p
 72 | q
 73 | r
 74 | s
 75 | t
 76 | u
 77 | v
 78 | w
 79 | x
 80 | y
 81 | z
 82 | £
 83 | §
 84 | ­
 85 | ²
 86 | ´
 87 | µ
 88 | ·
 89 | º
 90 | ¼
 91 | ½
 92 | ¿
 93 | À
 94 | Á
 95 | Ä
 96 | Å
 97 | Ç
 98 | É
 99 | Í
100 | Ï
101 | Ô
102 | Ö
103 | Ø
104 | Ù
105 | Ü
106 | ß
107 | à
108 | á
109 | â
110 | ã
111 | ä
112 | å
113 | æ
114 | ç
115 | è
116 | é
117 | ê
118 | ë
119 | í
120 | ï
121 | ñ
122 | ò
123 | ó
124 | ô
125 | ö
126 | ø
127 | ù
128 | ú
129 | û
130 | ü
131 |  
132 | 


--------------------------------------------------------------------------------
/ppocr/utils/dict/occitan_dict.txt:
--------------------------------------------------------------------------------
  1 | !
  2 | "
  3 | %
  4 | &
  5 | '
  6 | (
  7 | )
  8 | +
  9 | ,
 10 | -
 11 | .
 12 | /
 13 | 0
 14 | 1
 15 | 2
 16 | 3
 17 | 4
 18 | 5
 19 | 6
 20 | 7
 21 | 8
 22 | 9
 23 | :
 24 | ;
 25 | ?
 26 | [
 27 | ]
 28 | «
 29 | ³
 30 | µ
 31 | º
 32 | »
 33 | A
 34 | Á
 35 | À
 36 | B
 37 | C
 38 | Ç
 39 | D
 40 | E
 41 | É
 42 | È
 43 | F
 44 | G
 45 | H
 46 | I
 47 | Í
 48 | Ï
 49 | J
 50 | K
 51 | L
 52 | M
 53 | N
 54 | O
 55 | Ó
 56 | Ò
 57 | P
 58 | Q
 59 | R
 60 | S
 61 | T
 62 | U
 63 | V
 64 | W
 65 | X
 66 | Y
 67 | Z
 68 | a
 69 | á
 70 | à
 71 | b
 72 | c
 73 | d
 74 | e
 75 | é
 76 | è
 77 | f
 78 | g
 79 | h
 80 | i
 81 | í
 82 | ï
 83 | j
 84 | k
 85 | l
 86 | m
 87 | n
 88 | o
 89 | ó
 90 | ò
 91 | p
 92 | q
 93 | r
 94 | s
 95 | t
 96 | u
 97 | ú
 98 | ü
 99 | v
100 | w
101 | x
102 | y
103 | z
104 | ç
105 | æ
106 | Æ
107 | ê
108 | Ê
109 | ë
110 | Ë
111 | ñ
112 | Ñ
113 | ô
114 | Ô
115 | œ
116 | Œ
117 | ù
118 | Ù
119 | 


--------------------------------------------------------------------------------
/ppocr/utils/ic15_dict.txt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 | A
38 | B
39 | C
40 | D
41 | E
42 | F
43 | G
44 | H
45 | I
46 | J
47 | K
48 | L
49 | M
50 | N
51 | O
52 | P
53 | Q
54 | R
55 | S
56 | T
57 | U
58 | V
59 | W
60 | X
61 | Y
62 | Z
63 |  
64 | 


--------------------------------------------------------------------------------
/ppocr/utils/save_load.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import errno
 20 | import os
 21 | import shutil
 22 | import tempfile
 23 | 
 24 | import paddle.fluid as fluid
 25 | 
 26 | from .utility import initial_logger
 27 | import re
 28 | logger = initial_logger()
 29 | 
 30 | 
 31 | def _mkdir_if_not_exist(path):
 32 |     """
 33 |     mkdir if not exists, ignore the exception when multiprocess mkdir together
 34 |     """
 35 |     if not os.path.exists(path):
 36 |         try:
 37 |             os.makedirs(path)
 38 |         except OSError as e:
 39 |             if e.errno == errno.EEXIST and os.path.isdir(path):
 40 |                 logger.warning(
 41 |                     'be happy if some process has already created {}'.format(
 42 |                         path))
 43 |             else:
 44 |                 raise OSError('Failed to mkdir {}'.format(path))
 45 | 
 46 | 
 47 | def _load_state(path):
 48 |     if os.path.exists(path + '.pdopt'):
 49 |         # XXX another hack to ignore the optimizer state
 50 |         tmp = tempfile.mkdtemp()
 51 |         dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
 52 |         shutil.copy(path + '.pdparams', dst + '.pdparams')
 53 |         state = fluid.io.load_program_state(dst)
 54 |         shutil.rmtree(tmp)
 55 |     else:
 56 |         state = fluid.io.load_program_state(path)
 57 |     return state
 58 | 
 59 | 
 60 | def load_params(exe, prog, path, ignore_params=[]):
 61 |     """
 62 |     Load model from the given path.
 63 |     Args:
 64 |         exe (fluid.Executor): The fluid.Executor object.
 65 |         prog (fluid.Program): load weight to which Program object.
 66 |         path (string): URL string or loca model path.
 67 |         ignore_params (list): ignore variable to load when finetuning.
 68 |             It can be specified by finetune_exclude_pretrained_params
 69 |             and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md
 70 |     """
 71 |     if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
 72 |         raise ValueError("Model pretrain path {} does not "
 73 |                          "exists.".format(path))
 74 | 
 75 |     logger.info('Loading parameters from {}...'.format(path))
 76 | 
 77 |     ignore_set = set()
 78 |     state = _load_state(path)
 79 | 
 80 |     # ignore the parameter which mismatch the shape
 81 |     # between the model and pretrain weight.
 82 |     all_var_shape = {}
 83 |     for block in prog.blocks:
 84 |         for param in block.all_parameters():
 85 |             all_var_shape[param.name] = param.shape
 86 |     ignore_set.update([
 87 |         name for name, shape in all_var_shape.items()
 88 |         if name in state and shape != state[name].shape
 89 |     ])
 90 | 
 91 |     if ignore_params:
 92 |         all_var_names = [var.name for var in prog.list_vars()]
 93 |         ignore_list = filter(
 94 |             lambda var: any([re.match(name, var) for name in ignore_params]),
 95 |             all_var_names)
 96 |         ignore_set.update(list(ignore_list))
 97 | 
 98 |     if len(ignore_set) > 0:
 99 |         for k in ignore_set:
100 |             if k in state:
101 |                 logger.warning('variable {} not used'.format(k))
102 |                 del state[k]
103 |     fluid.io.set_program_state(prog, state)
104 | 
105 | 
106 | def init_model(config, program, exe):
107 |     """
108 |     load model from checkpoint or pretrained_model
109 |     """
110 |     checkpoints = config['Global'].get('checkpoints')
111 |     if checkpoints:
112 |         if os.path.exists(checkpoints + '.pdparams'):
113 |             path = checkpoints
114 |             fluid.load(program, path, exe)
115 |             logger.info("Finish initing model from {}".format(path))
116 |         else:
117 |             raise ValueError("Model checkpoints {} does not exists,"
118 |                              "check if you lost the file prefix.".format(
119 |                                  checkpoints + '.pdparams'))
120 |     else:
121 |         pretrain_weights = config['Global'].get('pretrain_weights')
122 |         if pretrain_weights:
123 |             path = pretrain_weights
124 |             load_params(exe, program, path)
125 |             logger.info("Finish initing model from {}".format(path))
126 | 
127 | 
128 | def save_model(program, model_path):
129 |     """
130 |     save model to the target path
131 |     """
132 |     fluid.save(program, model_path)
133 |     logger.info("Already save model in {}".format(model_path))
134 | 


--------------------------------------------------------------------------------
/ppocr/utils/stats.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import collections
16 | import numpy as np
17 | import datetime
18 | 
19 | __all__ = ['TrainingStats', 'Time']
20 | 
21 | 
22 | class SmoothedValue(object):
23 |     """Track a series of values and provide access to smoothed values over a
24 |     window or the global series average.
25 |     """
26 | 
27 |     def __init__(self, window_size):
28 |         self.deque = collections.deque(maxlen=window_size)
29 | 
30 |     def add_value(self, value):
31 |         self.deque.append(value)
32 | 
33 |     def get_median_value(self):
34 |         return np.median(self.deque)
35 | 
36 | 
37 | def Time():
38 |     return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
39 | 
40 | 
41 | class TrainingStats(object):
42 |     def __init__(self, window_size, stats_keys):
43 |         self.smoothed_losses_and_metrics = {
44 |             key: SmoothedValue(window_size)
45 |             for key in stats_keys
46 |         }
47 | 
48 |     def update(self, stats):
49 |         for k, v in self.smoothed_losses_and_metrics.items():
50 |             v.add_value(stats[k])
51 | 
52 |     def get(self, extras=None):
53 |         stats = collections.OrderedDict()
54 |         if extras:
55 |             for k, v in extras.items():
56 |                 stats[k] = v
57 |         for k, v in self.smoothed_losses_and_metrics.items():
58 |             stats[k] = round(v.get_median_value(), 6)
59 | 
60 |         return stats
61 | 
62 |     def log(self, extras=None):
63 |         d = self.get(extras)
64 |         strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items())
65 |         return strs
66 | 


--------------------------------------------------------------------------------
/ppocr/utils/utility.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import imghdr
 4 | import cv2
 5 | import paddle
 6 | from paddle import fluid
 7 | import importlib
 8 | 
 9 | 
10 | def initial_logger():
11 |     FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
12 |     logging.basicConfig(level=logging.INFO, format=FORMAT)
13 |     logger = logging.getLogger(__name__)
14 |     return logger
15 | 
16 | 
17 | def create_module(module_str):
18 |     tmpss = module_str.split(",")
19 |     assert len(tmpss) == 2, "Error formate\
20 |         of the module path: {}".format(module_str)
21 |     module_name, function_name = tmpss[0], tmpss[1]
22 |     somemodule = importlib.import_module(module_name, __package__)
23 |     function = getattr(somemodule, function_name)
24 |     return function
25 | 
26 | 
27 | def get_check_global_params(mode):
28 |     check_params = ['use_gpu', 'max_text_length', 'image_shape', \
29 |                     'image_shape', 'character_type', 'loss_type']
30 |     if mode == "train_eval":
31 |         check_params = check_params + [ \
32 |             'train_batch_size_per_card', 'test_batch_size_per_card']
33 |     elif mode == "test":
34 |         check_params = check_params + ['test_batch_size_per_card']
35 |     return check_params
36 | 
37 | 
38 | def get_check_reader_params(mode):
39 |     check_params = []
40 |     if mode == "train_eval":
41 |         check_params = ['TrainReader', 'EvalReader']
42 |     elif mode == "test":
43 |         check_params = ['TestReader']
44 |     return check_params
45 | 
46 | 
47 | def get_image_file_list(img_file):
48 |     imgs_lists = []
49 |     if img_file is None or not os.path.exists(img_file):
50 |         raise Exception("not found any img file in {}".format(img_file))
51 | 
52 |     img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'}
53 |     if os.path.isfile(img_file) and imghdr.what(img_file) in img_end:
54 |         imgs_lists.append(img_file)
55 |     elif os.path.isdir(img_file):
56 |         for single_file in os.listdir(img_file):
57 |             file_path = os.path.join(img_file, single_file)
58 |             if imghdr.what(file_path) in img_end:
59 |                 imgs_lists.append(file_path)
60 |     if len(imgs_lists) == 0:
61 |         raise Exception("not found any img file in {}".format(img_file))
62 |     return imgs_lists
63 | 
64 | 
65 | def check_and_read_gif(img_path):
66 |     if os.path.basename(img_path)[-3:] in ['gif', 'GIF']:
67 |         gif = cv2.VideoCapture(img_path)
68 |         ret, frame = gif.read()
69 |         if not ret:
70 |             logging.info("Cannot read {}. This gif image maybe corrupted.")
71 |             return None, False
72 |         if len(frame.shape) == 2 or frame.shape[-1] == 1:
73 |             frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
74 |         imgvalue = frame[:, :, ::-1]
75 |         return imgvalue, True
76 |     return None, False
77 | 
78 | 
79 | def create_multi_devices_program(program, loss_var_name):
80 |     build_strategy = fluid.BuildStrategy()
81 |     build_strategy.memory_optimize = False
82 |     build_strategy.enable_inplace = True
83 |     exec_strategy = fluid.ExecutionStrategy()
84 |     exec_strategy.num_iteration_per_drop_scope = 1
85 |     compile_program = fluid.CompiledProgram(program).with_data_parallel(
86 |         loss_name=loss_var_name,
87 |         build_strategy=build_strategy,
88 |         exec_strategy=exec_strategy)
89 |     return compile_program
90 | 
91 | 
92 | def enable_static_mode():
93 |     try:
94 |         paddle.enable_static()
95 |     except:
96 |         pass
97 | 


--------------------------------------------------------------------------------
/prod_deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #wget https://pkg-config.freedesktop.org/releases/pkg-config-0.29.2.tar.gz
 4 | #pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 5 | 
 6 | 
 7 | # 一个检测模型, 后面并联多个不同语言识别模型
 8 | reco_language="ch,japan,en,korean,ch_h,french,german"
 9 | port="8811"
10 | gpu=0
11 | 
12 | ps aux |grep "ocr_server_${port}_${reco_language}" |awk -F ' ' '{print $2}' |xargs -i kill -9 {}
13 | nohup python ocr_server.py --gpu ${gpu} --port ${port} --rec ${reco_language} >/dev/null 2>&1 &
14 | echo "查看是否启动成功: tail -f log/ocr_${port}.log"
15 | 
16 | 
17 | # 一个检测模型, 后面接一个语言识别模型
18 | reco_language="ch,ch_h"
19 | port="8812"
20 | ps aux |grep "ocr_server_${port}_${reco_language}" |awk -F ' ' '{print $2}' |xargs -i kill -9 {}
21 | nohup python ocr_server.py --gpu ${gpu} --port ${port} --rec ${reco_language} >/dev/null 2>&1 &
22 | echo "查看是否启动成功: tail -f log/ocr_${port}.log"
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | shapely
 2 | imgaug
 3 | pyclipper
 4 | lmdb
 5 | tqdm
 6 | numpy
 7 | opencv-python
 8 | setproctitle
 9 | paddlepaddle-gpu==2.0.0b0
10 | paddlehub
11 | tencentcloud-sdk-python
12 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | from base64 import b64encode
  2 | import requests
  3 | import glob
  4 | import time
  5 | import cv2
  6 | import numpy as np
  7 | import shutil
  8 | import os
  9 | import json
 10 | from PIL import Image, ImageDraw, ImageFont
 11 | 
 12 | label_color = [[31, 0, 255], [0, 159, 255], [255, 0, 0], [0, 255, 25], [255, 0, 133],
 13 |                [255, 172, 0], [108, 0, 255], [0, 82, 255], [255, 0, 152], [223, 0, 255], [12, 0, 255], [0, 255, 178],
 14 |                [108, 255, 0], [184, 0, 255], [255, 0, 76], [146, 255, 0], [51, 0, 255], [0, 197, 255], [255, 248, 0],
 15 |                [255, 0, 19], [255, 0, 38], [89, 255, 0], [127, 255, 0], [255, 153, 0], [0, 255, 255]]
 16 | 
 17 | 
 18 | def mkdir(path, rm=False):
 19 |     if os.path.exists(path):
 20 |         if rm:
 21 |             shutil.rmtree(path)
 22 |             os.makedirs(path)
 23 |     else:
 24 |         os.makedirs(path)
 25 | 
 26 | 
 27 | def add_chinese_text(img, text, left, top, color=(0, 255, 0)):
 28 |     if isinstance(img, np.ndarray):
 29 |         img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 30 |     draw = ImageDraw.Draw(img)
 31 |     draw.text((left, top), text, color, font=font_text)
 32 |     return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
 33 | 
 34 | 
 35 | def draw_txt(img, ann, show=False):
 36 |     num = 0
 37 |     for one_ann in ann:
 38 |         text = one_ann["text"]
 39 |         conf = one_ann["confidence"]
 40 |         points = one_ann["text_region"]
 41 | 
 42 |         text = "{:.2f} {}".format(conf, text)
 43 |         # print("points num:", len(points))
 44 | 
 45 |         color = tuple(label_color[num % len(label_color)])
 46 |         points = (np.reshape(points, [-1, 2])).astype(np.int32)
 47 |         img = cv2.polylines(img, [points], True, color, 1)
 48 |         for idx, pt in enumerate(points):
 49 |             cv2.circle(img, (pt[0], pt[1]), 5, color, thickness=2)
 50 |             cv2.putText(img, str(idx), (pt[0], pt[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, color, thickness=1)
 51 |         img = add_chinese_text(img, text, points[0][0], points[0][1] - 20, color=color[::-1])
 52 |         num += 1
 53 | 
 54 |     if show:
 55 |         cv2.namedWindow("result", 0)
 56 |         cv2.imshow("result", img)
 57 |         key = cv2.waitKey(0)
 58 |         if key == 27:
 59 |             exit()
 60 | 
 61 |     return img
 62 | 
 63 | 
 64 | if __name__ == "__main__":
 65 |     port = '8811'  # '8812'
 66 |     language = ['ENG', 'JAP', 'KOR', 'CH']
 67 |     # language = ['JAP']
 68 |     request_url = "http://0.0.0.0:{}/dango/algo/ocr/server".format(port)
 69 |     img_path = "./demo"
 70 |     test_num = 10
 71 | 
 72 |     img_list = glob.glob(img_path + "/*.jpg")
 73 |     for lang in language:
 74 |         font_path = "./font/{}.ttc".format(lang)
 75 |         if lang in ["ENG", "CH"]:
 76 |             font_path = "./font/CH_ENG.TTC"
 77 |         elif lang in ["JAP"]:
 78 |             font_path = "./font/japan.ttc"
 79 |         else:
 80 |             font_path = "./font/KOR.ttf"
 81 |         font_text = ImageFont.truetype(font_path, 20, encoding="utf-8")
 82 |         num = 0
 83 |         for i, img_p in enumerate(img_list):
 84 |             if lang not in img_p:
 85 |                 continue
 86 | 
 87 |             num += 1
 88 |             print("{}/{} {} {}".format(i, len(img_list), lang, img_p))
 89 |             f = open(img_p, 'rb')
 90 |             img = b64encode(f.read())  # .decode()
 91 |             # print(img)
 92 |             s1 = time.time()
 93 |             data = {"image": img, "language_type": lang, "user_id": "234232", "platform": "win32"}
 94 |             # print(img)
 95 |             response = requests.post(request_url, data=data).json()
 96 |             print(response)
 97 |             s2 = time.time()
 98 |             print("time cost:", s2 - s1)
 99 | 
100 |             result = response['data']['result'][0]  # batch result, now we only use first one
101 |             img_cv = cv2.imread(img_p)
102 |             img = draw_txt(img_cv, result, True)
103 |             cv2.imwrite("./demo_result/"+os.path.basename(img_p), img)
104 |             if num > test_num:
105 |                 break
106 | 


--------------------------------------------------------------------------------
/tools/infer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/tools/infer/__init__.py


--------------------------------------------------------------------------------
/tools/infer/predict_cls.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | __dir__ = os.path.dirname(os.path.abspath(__file__))
  5 | sys.path.append(__dir__)
  6 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
  7 | 
  8 | import tools.infer.utility as utility
  9 | from ppocr.utils.utility import initial_logger
 10 | 
 11 | logger = initial_logger()
 12 | from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 13 | import cv2
 14 | import copy
 15 | import numpy as np
 16 | import math
 17 | import time
 18 | from paddle import fluid
 19 | 
 20 | 
 21 | class TextClassifier(object):
 22 |     def __init__(self, args):
 23 |         if args.use_pdserving is False:
 24 |             self.predictor, self.input_tensor, self.output_tensors = \
 25 |                 utility.create_predictor(args, mode="cls")
 26 |             self.use_zero_copy_run = args.use_zero_copy_run
 27 |         self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
 28 |         self.cls_batch_num = args.rec_batch_num
 29 |         self.label_list = args.label_list
 30 |         self.cls_thresh = args.cls_thresh
 31 | 
 32 |     def resize_norm_img(self, img):
 33 |         imgC, imgH, imgW = self.cls_image_shape
 34 |         h = img.shape[0]
 35 |         w = img.shape[1]
 36 |         ratio = w / float(h)
 37 |         if math.ceil(imgH * ratio) > imgW:
 38 |             resized_w = imgW
 39 |         else:
 40 |             resized_w = int(math.ceil(imgH * ratio))
 41 |         resized_image = cv2.resize(img, (resized_w, imgH))
 42 |         resized_image = resized_image.astype('float32')
 43 |         if self.cls_image_shape[0] == 1:
 44 |             resized_image = resized_image / 255
 45 |             resized_image = resized_image[np.newaxis, :]
 46 |         else:
 47 |             resized_image = resized_image.transpose((2, 0, 1)) / 255
 48 |         resized_image -= 0.5
 49 |         resized_image /= 0.5
 50 |         padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
 51 |         padding_im[:, :, 0:resized_w] = resized_image
 52 |         return padding_im
 53 | 
 54 |     def __call__(self, img_list):
 55 |         img_list = copy.deepcopy(img_list)
 56 |         img_num = len(img_list)
 57 |         # Calculate the aspect ratio of all text bars
 58 |         width_list = []
 59 |         for img in img_list:
 60 |             width_list.append(img.shape[1] / float(img.shape[0]))
 61 |         # Sorting can speed up the cls process
 62 |         indices = np.argsort(np.array(width_list))
 63 | 
 64 |         cls_res = [['', 0.0]] * img_num
 65 |         batch_num = self.cls_batch_num
 66 |         predict_time = 0
 67 |         for beg_img_no in range(0, img_num, batch_num):
 68 |             end_img_no = min(img_num, beg_img_no + batch_num)
 69 |             norm_img_batch = []
 70 |             max_wh_ratio = 0
 71 |             for ino in range(beg_img_no, end_img_no):
 72 |                 h, w = img_list[indices[ino]].shape[0:2]
 73 |                 wh_ratio = w * 1.0 / h
 74 |                 max_wh_ratio = max(max_wh_ratio, wh_ratio)
 75 |             for ino in range(beg_img_no, end_img_no):
 76 |                 norm_img = self.resize_norm_img(img_list[indices[ino]])
 77 |                 norm_img = norm_img[np.newaxis, :]
 78 |                 norm_img_batch.append(norm_img)
 79 |             norm_img_batch = np.concatenate(norm_img_batch)
 80 |             norm_img_batch = norm_img_batch.copy()
 81 |             starttime = time.time()
 82 | 
 83 |             if self.use_zero_copy_run:
 84 |                 self.input_tensor.copy_from_cpu(norm_img_batch)
 85 |                 self.predictor.zero_copy_run()
 86 |             else:
 87 |                 norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
 88 |                 self.predictor.run([norm_img_batch])
 89 | 
 90 |             prob_out = self.output_tensors[0].copy_to_cpu()
 91 |             label_out = self.output_tensors[1].copy_to_cpu()
 92 |             if len(label_out.shape) != 1:
 93 |                 prob_out, label_out = label_out, prob_out
 94 |             elapse = time.time() - starttime
 95 |             predict_time += elapse
 96 |             for rno in range(len(label_out)):
 97 |                 label_idx = label_out[rno]
 98 |                 score = prob_out[rno][label_idx]
 99 |                 label = self.label_list[label_idx]
100 |                 cls_res[indices[beg_img_no + rno]] = [label, score]
101 |                 if '180' in label and score > self.cls_thresh:
102 |                     img_list[indices[beg_img_no + rno]] = cv2.rotate(
103 |                         img_list[indices[beg_img_no + rno]], 1)
104 |         return img_list, cls_res, predict_time
105 | 
106 | 
107 | def main(args):
108 |     image_file_list = get_image_file_list(args.image_dir)
109 |     text_classifier = TextClassifier(args)
110 |     valid_image_file_list = []
111 |     img_list = []
112 |     for image_file in image_file_list[:10]:
113 |         img, flag = check_and_read_gif(image_file)
114 |         if not flag:
115 |             img = cv2.imread(image_file)
116 |         if img is None:
117 |             logger.info("error in loading image:{}".format(image_file))
118 |             continue
119 |         valid_image_file_list.append(image_file)
120 |         img_list.append(img)
121 |     try:
122 |         img_list, cls_res, predict_time = text_classifier(img_list)
123 |     except Exception as e:
124 |         print(e)
125 |         exit()
126 |     for ino in range(len(img_list)):
127 |         print("Predicts of %s:%s" % (valid_image_file_list[ino], cls_res[ino]))
128 |     print("Total predict time for %d images:%.3f" %
129 |           (len(img_list), predict_time))
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     main(utility.parse_args())
134 | 


--------------------------------------------------------------------------------
/tools/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from logging.handlers import TimedRotatingFileHandler
 3 | import os
 4 | import time
 5 | import multiprocessing
 6 | 
 7 | lock = multiprocessing.Lock()
 8 | logger_dict = {}
 9 | 
10 | 
11 | class SafeRotatingFileHandler(TimedRotatingFileHandler):
12 |     def __init__(self, filename, when='h', interval=1, backupCount=0, encoding=None, delay=False, utc=False):
13 |         TimedRotatingFileHandler.__init__(self, filename, when, interval, backupCount, encoding, delay, utc)
14 | 
15 |     """
16 |     Override doRollover
17 |     lines commanded by "##" is changed by cc
18 |     """
19 | 
20 |     def doRollover(self):
21 |         """
22 |         do a rollover; in this case, a date/time stamp is appended to the filename
23 |         when the rollover happens.  However, you want the file to be named for the
24 |         start of the interval, not the current time.  If there is a backup count,
25 |         then we have to get a list of matching filenames, sort them and remove
26 |         the one with the oldest suffix.
27 |         Override,   1. if dfn not exist then do rename
28 |                     2. _open with "a" models
29 |         """
30 |         if self.stream:
31 |             self.stream.close()
32 |             self.stream = None
33 |         # get the time that this sequence started at and make it a TimeTuple
34 |         currentTime = int(time.time())
35 |         dstNow = time.localtime(currentTime)[-1]
36 |         t = self.rolloverAt - self.interval
37 |         if self.utc:
38 |             timeTuple = time.gmtime(t)
39 |         else:
40 |             timeTuple = time.localtime(t)
41 |             dstThen = timeTuple[-1]
42 |             if dstNow != dstThen:
43 |                 if dstNow:
44 |                     addend = 3600
45 |                 else:
46 |                     addend = -3600
47 |                 timeTuple = time.localtime(t + addend)
48 |         dfn = self.baseFilename + "." + time.strftime(self.suffix, timeTuple)
49 |         with lock:
50 |             if not os.path.exists(dfn) and os.path.exists(self.baseFilename):
51 |                 os.rename(self.baseFilename, dfn)
52 |             if self.backupCount > 0:
53 |                 for s in self.getFilesToDelete():
54 |                     os.remove(s)
55 |         if not self.delay:
56 |             self.mode = "a"
57 |             self.stream = self._open()
58 |         newRolloverAt = self.computeRollover(currentTime)
59 |         while newRolloverAt <= currentTime:
60 |             newRolloverAt = newRolloverAt + self.interval
61 |         # If DST changes and midnight or weekly rollover, adjust for this.
62 |         if (self.when == 'MIDNIGHT' or self.when.startswith('W')) and not self.utc:
63 |             dstAtRollover = time.localtime(newRolloverAt)[-1]
64 |             if dstNow != dstAtRollover:
65 |                 if not dstNow:  # DST kicks in before next rollover, so we need to deduct an hour
66 |                     addend = -3600
67 |                 else:  # DST bows out before next rollover, so we need to add an hour
68 |                     addend = 3600
69 |                 newRolloverAt += addend
70 |         self.rolloverAt = newRolloverAt
71 | 
72 | 
73 | def get_logger(log_file):
74 |     if log_file in logger_dict.keys():
75 |         return logger_dict[log_file]
76 |     # create log file
77 |     if not os.path.exists(os.path.dirname(log_file)):
78 |         os.mkdir(os.path.dirname(log_file))
79 |     if not os.path.exists(log_file):
80 |         open(log_file, "a+").close()
81 |     # logger
82 |     logger = logging.getLogger(log_file)
83 |     logger.setLevel(logging.INFO)
84 |     # fhandler
85 |     handler = logging.handlers.TimedRotatingFileHandler(filename=log_file, when='D', interval=1, backupCount=7)
86 | 
87 |     # handler = SafeRotatingFileHandler(log_file, when='midnight', interval=1, backupCount=30, encoding='utf-8')
88 |     strfmt = "[%(asctime)s] %(filename)s[line:%(lineno)d] %(levelname)s %(message)s"
89 |     # format
90 |     formatter = logging.Formatter(strfmt)
91 |     handler.setFormatter(formatter)
92 |     logger.addHandler(handler)
93 |     logger_dict[log_file] = logger
94 |     return logger
95 | 


--------------------------------------------------------------------------------
/translate/Bing.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | import urllib.parse
 3 | from traceback import print_exc
 4 | 
 5 | 
 6 | class BingTranslate(object):
 7 | 
 8 |     def __init__(self):
 9 | 
10 |         self.url = "http://api.microsofttranslator.com/v2/ajax.svc/TranslateArray2?"
11 | 
12 |     def translate(self, BingLanguage, content):
13 | 
14 |         data = {}
15 |         data['from'] = '"' + BingLanguage + '"'
16 |         data['to'] = '"' + 'zh' + '"'
17 |         data['texts'] = '["'
18 |         data['texts'] += content
19 |         data['texts'] += '"]'
20 |         data['options'] = "{}"
21 |         data['oncomplete'] = 'onComplete_3'
22 |         data['onerror'] = 'onError_3'
23 |         data['_'] = '1430745999189'
24 | 
25 |         try:
26 |             data = urllib.parse.urlencode(data).encode('utf-8')
27 |             strUrl = self.url + data.decode() + "&appId=%223DAEE5B978BA031557E739EE1E2A68CB1FAD5909%22"
28 |             response = urllib.request.urlopen(strUrl)
29 |             str_data = response.read().decode('utf-8')
30 |             tmp, str_data = str_data.split('"TranslatedText":')
31 |             translate_data = str_data[1:str_data.find('",', 1)].replace('\\"', '')
32 | 
33 |         except Exception:
34 |             print_exc()
35 |             translate_data = "Bing：我抽风啦！"
36 | 
37 |         return translate_data
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     content = 'そうすると、可笑しいことや変なこと、滑稽なことや正しくないこと、反対にやるべきことが见えてくるから。とにかく、何かにどっぷりはまっていると、周りのことが见えなくなってしまう。だから、时々一歩引くと物事が见えてくる。'
42 |     # content = "Hooray! It's snowing! It's time to make a snowman.James runs out. He makes a big pile of snow. He puts a big snowball on top. He adds a scarf and a hat. He adds an orange for the nose. He adds coal for the eyes and buttons.In the evening, James opens the door. What does he see? The snowman is moving! James invites him in. The snowman has never been inside a house. He says hello to the cat. He plays with paper towels.A moment later, the snowman takes James's hand and goes out.They go up, up, up into the air! They are flying! What a wonderful night!The next morning, James jumps out of bed. He runs to the door.He wants to thank the snowman. But he's gone."
43 |     # content = "낙성대는 ‘별이 떨어진 곳’ 이라는 뜻이다.고려시대 때 어는 날 하늘에서 가장 크고 빛나는 별 하나가 땅에 떨어졌는데 그 곳에서 명장 강감찬 장군이 태어났다.그 후부터 그 곳을 낙성대라고 불렀다."
44 |     # ja en ko
45 |     bing = BingTranslate()
46 |     print(bing.translate('ja', content))
47 | 


--------------------------------------------------------------------------------
/translate/Google.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | import urllib.request
  4 | import urllib.parse
  5 | from requests import Session
  6 | from traceback import print_exc
  7 | from js2py import EvalJs
  8 | import os
  9 | 
 10 | this_file_dir = os.path.dirname(os.path.realpath(__file__))
 11 | 
 12 | 
 13 | class GoogleTranslate():
 14 | 
 15 |     def __init__(self):
 16 | 
 17 |         self.headers = {
 18 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}
 19 |         self.session = Session()
 20 |         self.session.keep_alive = False
 21 | 
 22 |     def getTk(self, text):
 23 | 
 24 |         with open(this_file_dir + '/GoogleJS.js', encoding='utf8') as f:
 25 |             js_data = f.read()
 26 | 
 27 |         context = EvalJs()
 28 |         context.execute(js_data)
 29 |         tk = context.TL(text)
 30 | 
 31 |         return tk
 32 | 
 33 |     def buildUrl(self, text, tk):
 34 | 
 35 |         baseUrl = 'http://translate.google.cn/translate_a/single'
 36 |         baseUrl += '?client=webapp&'
 37 |         baseUrl += 'sl=auto&'
 38 |         baseUrl += 'tl=' + 'zh-CN' + '&'
 39 |         baseUrl += 'hl=zh-CN&'
 40 |         baseUrl += 'dt=at&'
 41 |         baseUrl += 'dt=bd&'
 42 |         baseUrl += 'dt=ex&'
 43 |         baseUrl += 'dt=ld&'
 44 |         baseUrl += 'dt=md&'
 45 |         baseUrl += 'dt=qca&'
 46 |         baseUrl += 'dt=rw&'
 47 |         baseUrl += 'dt=rm&'
 48 |         baseUrl += 'dt=ss&'
 49 |         baseUrl += 'dt=t&'
 50 |         baseUrl += 'ie=UTF-8&'
 51 |         baseUrl += 'oe=UTF-8&'
 52 |         baseUrl += 'clearbtn=1&'
 53 |         baseUrl += 'otf=1&'
 54 |         baseUrl += 'pc=1&'
 55 |         baseUrl += 'srcrom=0&'
 56 |         baseUrl += 'ssel=0&'
 57 |         baseUrl += 'tsel=0&'
 58 |         baseUrl += 'kc=2&'
 59 |         baseUrl += 'tk=' + str(tk) + '&'
 60 |         content = urllib.parse.quote(text)
 61 |         baseUrl += 'q=' + content
 62 | 
 63 |         return baseUrl
 64 | 
 65 |     def getHtml(self, session, url, headers):
 66 | 
 67 |         try:
 68 |             html = session.get(url, headers=headers)
 69 |             return html.json()
 70 |         except Exception:
 71 |             print_exc()
 72 |             return None
 73 | 
 74 |     def translate(self, text):
 75 | 
 76 |         tk = self.getTk(text)
 77 |         url = self.buildUrl(text, tk)
 78 | 
 79 |         try:
 80 |             result = self.getHtml(self.session, url, self.headers)
 81 | 
 82 |             if result != None:
 83 |                 sentence = ''
 84 |                 for i in result[0]:
 85 |                     if i[0] != None:
 86 |                         sentence += i[0]
 87 |             else:
 88 |                 sentence = "谷歌：我抽风啦！"
 89 | 
 90 |         except Exception:
 91 |             print_exc()
 92 |             sentence = "谷歌：我抽风啦！"
 93 | 
 94 |         return sentence
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     text = "そうすると、可笑しいことや変なこと、滑稽なことや正しくないこと、反対にやるべきことが见えてくるから。とにかく、何かにどっぷりはまっていると、周りのことが见えなくなってしまう。だから、时々一歩引くと物事が见えてくる。"
 99 |     google = GoogleTranslate()
100 |     print(google.translate(text))
101 | 


--------------------------------------------------------------------------------
/translate/GoogleJS.js:
--------------------------------------------------------------------------------
 1 | function TL(a) { 
 2 |         var k = ""; 
 3 |         var b = 406644; 
 4 |         var b1 = 3293161072; 
 5 |         var jd = "."; 
 6 |         var $b = "+-a^+6"; 
 7 |         var Zb = "+-3^+b+-f"; 
 8 |         for (var e = [], f = 0, g = 0; g < a.length; g++) { 
 9 |             var m = a.charCodeAt(g); 
10 |             128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), 
11 |             e[f++] = m >> 18 | 240, 
12 |             e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, 
13 |             e[f++] = m >> 6 & 63 | 128), 
14 |             e[f++] = m & 63 | 128) 
15 |         } 
16 |         a = b; 
17 |         for (f = 0; f < e.length; f++) a += e[f], 
18 |         a = RL(a, $b); 
19 |         a = RL(a, Zb); 
20 |         a ^= b1 || 0; 
21 |         0 > a && (a = (a & 2147483647) + 2147483648); 
22 |         a %= 1E6; 
23 |         return a.toString() + jd + (a ^ b) 
24 |     }; 
25 |     function RL(a, b) { 
26 |         var t = "a"; 
27 |         var Yb = "+"; 
28 |         for (var c = 0; c < b.length - 2; c += 3) { 
29 |             var d = b.charAt(c + 2), 
30 |             d = d >= t ? d.charCodeAt(0) - 87 : Number(d), 
31 |             d = b.charAt(c + 1) == Yb ? a >>> d: a << d; 
32 |             a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d 
33 |         } 
34 |         return a 
35 |     }


--------------------------------------------------------------------------------
/translate/Tencent.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import time
  3 | import requests
  4 | from traceback import print_exc
  5 | 
  6 | 
  7 | def get_filter(text):
  8 |     if isinstance(text, list):
  9 |         text = ''.join(text)
 10 |     text = str(text)
 11 |     text = text.strip()
 12 |     filter_list = [
 13 |         '\r', '\n', '\t', '\u3000', '\xa0', '\u2002',
 14 |         '<br>', '<br/>', '    ', '  ', '&nbsp;', '>>', '&quot;',
 15 |         '展开全部', ' '
 16 |     ]
 17 |     for fl in filter_list:
 18 |         text = text.replace(fl, '')
 19 |     return text
 20 | 
 21 | 
 22 | def get_qtv_qtk():
 23 |     api_url = 'https://fanyi.qq.com/'
 24 | 
 25 |     headers = {
 26 |         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, '
 27 |                       'like Gecko) Chrome/73.0.3683.86 Safari/537.36', }
 28 | 
 29 |     res = requests.get(api_url, headers=headers)
 30 |     data = res.text
 31 |     fy_guid = res.cookies.get('fy_guid')
 32 |     reg = re.compile(r'var qtv = "(.*?)"')
 33 |     qtv = reg.search(data).group(1)
 34 |     reg = re.compile(r'var qtk = "(.*?)"')
 35 |     qtk = reg.search(data).group(1)
 36 | 
 37 |     return fy_guid, qtv, qtk
 38 | 
 39 | 
 40 | def getHtml(url, headers, data):
 41 |     try:
 42 |         html = requests.post(url=url, data=data, headers=headers)
 43 |         datas = html.json()['translate']['records']
 44 | 
 45 |         if html != None and datas != None:
 46 |             trans_result = ''.join([data['targetText'] for data in datas])
 47 | 
 48 |     except Exception:
 49 |         print_exc()
 50 |         trans_result = '网页腾讯：我抽风啦！'
 51 | 
 52 |     return trans_result
 53 | 
 54 | 
 55 | class TencentTrans(object):
 56 | 
 57 |     def __init__(self):
 58 |         self.api_url = 'https://fanyi.qq.com/api/translate'
 59 |         self.headers = {
 60 |             'Cookie': 'fy_guid=605ead81-f210-47eb-bd80-ac6ae5e7a2d8; '
 61 |                       'qtv=ed286a053ae88763; '
 62 |                       'qtk=wfMmjh3k/7Sr2xVNg/LtITgPRlnvGWBzP9a4FN0dn9PE7L5jDYiYJnW03MJLRUGHEFNCRhTfrp/V+wUj0dun1KkKNUUmS86A/wGVf6ydzhwboelTOs0hfHuF0ndtSoX+N3486tUMlm62VU4i856mqw==; ',
 63 |             'Host': 'fanyi.qq.com',
 64 |             'Origin': 'https://fanyi.qq.com',
 65 |             'Referer': 'https://fanyi.qq.com/',
 66 |             'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, '
 67 |                           'like Gecko) Chrome/73.0.3683.86 Safari/537.36', }
 68 | 
 69 |         self.fromlang = 'auto'
 70 |         self.tolang = 'zh'
 71 |         self.sessionUuid = str(int(time.time() * 1000))
 72 | 
 73 |         self.fy_guid, self.qtv, self.qtk = get_qtv_qtk()
 74 | 
 75 |         self.headers['Cookie'] = self.headers['Cookie'].replace(
 76 |             '605ead81-f210-47eb-bd80-ac6ae5e7a2d8', self.fy_guid)
 77 | 
 78 |         self.headers['Cookie'] = self.headers['Cookie'].replace(
 79 |             'ed286a053ae88763', self.qtv)
 80 |         self.headers['Cookie'] = self.headers['Cookie'].replace(
 81 |             'wfMmjh3k/7Sr2xVNg/LtITgPRlnvGWBzP9a4FN0dn9PE7L5jDYiYJnW03MJLRUGHEFNCRhTfrp/V+wUj0dun1KkKNUUmS86A/wGVf6ydzhwboelTOs0hfHuF0ndtSoX+N3486tUMlm62VU4i856mqw==',
 82 |             self.qtk)
 83 | 
 84 |     def get_trans_result(self, text):
 85 |         data = {
 86 |             'source': self.fromlang,
 87 |             'target': self.tolang,
 88 |             'sourceText': text,
 89 |             'qtv': self.qtv,
 90 |             'qtk': self.qtk,
 91 |             'sessionUuid': self.sessionUuid
 92 |         }
 93 | 
 94 |         trans_result = getHtml(self.api_url, self.headers, data)
 95 | 
 96 |         return trans_result
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     Tencent = TencentTrans()
101 |     text = 'そうすると、可笑しいことや変なこと、滑稽なことや正しくないこと、反対にやるべきことが见えてくるから。とにかく、何かにどっぷりはまっていると、周りのことが见えなくなってしまう。だから、时々一歩引くと物事が见えてくる。'
102 |     print(Tencent.get_trans_result(text))
103 | 


--------------------------------------------------------------------------------
/translate/baidufanyi.py:
--------------------------------------------------------------------------------
  1 | # 面向对象
  2 | # 百度翻译 -- 网页版(自动获取token,sign)
  3 | import requests
  4 | import js2py
  5 | import json
  6 | import re
  7 | import os
  8 | from traceback import print_exc
  9 | 
 10 | this_file_dir = os.path.dirname(os.path.realpath(__file__))
 11 | 
 12 | 
 13 | class BaiduWeb():
 14 |     """百度翻译网页版爬虫"""
 15 | 
 16 |     def __init__(self, query_str):
 17 |         self.session = requests.session()
 18 |         headers = {
 19 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
 20 |         }
 21 |         self.session.headers = headers
 22 |         self.baidu_url = "https://www.baidu.com/"
 23 |         self.root_url = "https://fanyi.baidu.com/"
 24 |         self.lang_url = "https://fanyi.baidu.com/langdetect"
 25 |         self.trans_url = "https://fanyi.baidu.com/v2transapi"
 26 |         self.query_str = query_str
 27 | 
 28 |     def get_token_gtk(self):
 29 | 
 30 |         '''获取token和gtk(用于合成Sign)'''
 31 | 
 32 |         self.session.get(self.root_url)
 33 |         resp = self.session.get(self.root_url)
 34 |         html_str = resp.content.decode()
 35 |         token = re.findall(r"token: '(.*?)'", html_str)[0]
 36 |         gtk = re.findall(r"window.gtk = '(.*?)'", html_str)[0]
 37 | 
 38 |         return token, gtk
 39 | 
 40 |     def generate_sign(self, gtk):
 41 | 
 42 |         """生成sign"""
 43 |         # 1. 准备js编译环境
 44 |         context = js2py.EvalJs()
 45 |         with open(this_file_dir + '/webtrans.js', encoding='utf8') as f:
 46 |             js_data = f.read()
 47 |             js_data = re.sub("window\[l\]", '"' + gtk + '"', js_data)
 48 |             # js_data = re.sub("window\[l\]", "\"{}\"".format(gtk), js_data)
 49 |             # print(js_data)
 50 |             context.execute(js_data)
 51 |         sign = context.e(self.query_str)
 52 | 
 53 |         return sign
 54 | 
 55 |     def lang_detect(self):
 56 | 
 57 |         '''获取语言转换类型.eg: zh-->en'''
 58 | 
 59 |         lang_resp = self.session.post(self.lang_url, data={"query": self.query_str})
 60 |         lang_json_str = lang_resp.content.decode()  # {"error":0,"msg":"success","lan":"zh"}
 61 |         lan = json.loads(lang_json_str)['lan']
 62 |         to = "en" if lan == "zh" else "zh"
 63 | 
 64 |         return lan, to
 65 | 
 66 |     def parse_url(self, post_data):
 67 | 
 68 |         trans_resp = self.session.post(self.trans_url, data=post_data)
 69 |         trans_json_str = trans_resp.content.decode()
 70 |         trans_json = json.loads(trans_json_str)
 71 |         self.result = trans_json["trans_result"]["data"][0]["dst"]
 72 | 
 73 |     def run(self):
 74 | 
 75 |         try:
 76 |             """实现逻辑"""
 77 |             # 1.获取百度的cookie,(缺乏百度首页的cookie会始终报错998)
 78 |             self.session.get(self.baidu_url)
 79 |             # 2. 获取百度翻译的token和gtk(用于合成sign)
 80 |             token, gtk = self.get_token_gtk()
 81 |             # 3. 生成sign
 82 |             sign = self.generate_sign(gtk)
 83 |             # 4. 获取语言转换类型.eg: zh-->en
 84 |             lan, to = self.lang_detect()
 85 |             # 5. 发送请求,获取响应,输出结果
 86 |             post_data = {
 87 |                 # "from": lan,
 88 |                 "from": lan,
 89 |                 "to": to,
 90 |                 "query": self.query_str,
 91 |                 "transtype": "realtime",
 92 |                 "simple_means_flag": 3,
 93 |                 "sign": sign,
 94 |                 "token": token
 95 |             }
 96 |             self.parse_url(post_data)
 97 | 
 98 |         except Exception:
 99 |             print_exc()
100 |             self.result = '网页百度：我抽风啦！'
101 | 
102 |         return self.result
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     webfanyi = BaiduWeb('一歩ひくと见えてくる 何かの中にどっぷり浸かっていると何がなんだか分からなくなってしまうことがある。')
107 |     a = webfanyi.run()
108 |     print(a)
109 | 


--------------------------------------------------------------------------------
/translate/webtrans.js:
--------------------------------------------------------------------------------
 1 | // webtrans.js
 2 | 
 3 | function n(r, o) {
 4 |     for (var t = 0; t < o.length - 2; t += 3) {
 5 |         var a = o.charAt(t + 2);
 6 |         a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
 7 |         a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
 8 |         r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
 9 |     }
10 |     return r
11 | }
12 | function e(r) {
13 |     var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
14 |     if (null === o) {
15 |         var t = r.length;
16 |         t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
17 |     } else {
18 |         for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
19 |             "" !== e[C] && f.push.apply(f, a(e[C].split(""))),
20 |             C !== h - 1 && f.push(o[C]);
21 |         var g = f.length;
22 |         g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
23 |     }
24 |     var u = void 0
25 |       ,
26 |         // l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
27 |         i = null;
28 |         u = null !== i ? i : (i = window[l] || "") || "";
29 |     for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
30 |         var A = r.charCodeAt(v);
31 |         128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
32 |         S[c++] = A >> 18 | 240,
33 |         S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
34 |         S[c++] = A >> 6 & 63 | 128),
35 |         S[c++] = 63 & A | 128)
36 |     }
37 |     for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
38 |         p += S[b],
39 |         p = n(p, F);
40 |     return p = n(p, D),
41 |     p ^= s,
42 |     0 > p && (p = (2147483647 & p) + 2147483648),
43 |     p %= 1e6,
44 |     p.toString() + "." + (p ^ m)
45 | }


--------------------------------------------------------------------------------