├── .gitignore
├── README.md
├── config.py
├── demo
├── 8801_win32_JAP_2020-10-30-20-04-47-887566.jpg
├── CH1.jpg
├── CH2.jpg
├── DangoOCR_ENG_2021-01-03_19-43-13.jpg
├── ENG_2.jpg
└── KOR.jpg
├── demo_result
├── 8801_win32_JAP_2020-10-30-20-04-47-887566.jpg
├── CH1.jpg
├── CH2.jpg
├── DangoOCR_ENG_2021-01-03_19-43-13.jpg
├── ENG_2.jpg
└── KOR.jpg
├── download_model.sh
├── font
├── CH_ENG.TTC
├── KOR.ttf
└── japan.ttc
├── ocr_server.py
├── ppocr
├── __init__.py
├── data
│ ├── __init__.py
│ ├── cls
│ │ ├── __init__.py
│ │ ├── dataset_traversal.py
│ │ └── randaugment.py
│ ├── det
│ │ ├── __init__.py
│ │ ├── data_augment.py
│ │ ├── dataset_traversal.py
│ │ ├── db_process.py
│ │ ├── east_process.py
│ │ ├── make_border_map.py
│ │ ├── make_shrink_map.py
│ │ ├── random_crop_data.py
│ │ └── sast_process.py
│ ├── reader_main.py
│ └── rec
│ │ ├── __init__.py
│ │ ├── dataset_traversal.py
│ │ ├── img_tools.py
│ │ └── text_image_aug
│ │ ├── augment.py
│ │ └── warp_mls.py
├── modeling
│ ├── __init__.py
│ ├── architectures
│ │ ├── __init__.py
│ │ ├── cls_model.py
│ │ ├── det_model.py
│ │ └── rec_model.py
│ ├── backbones
│ │ ├── __init__.py
│ │ ├── det_mobilenet_v3.py
│ │ ├── det_resnet_vd.py
│ │ ├── det_resnet_vd_sast.py
│ │ ├── rec_mobilenet_v3.py
│ │ ├── rec_resnet_fpn.py
│ │ └── rec_resnet_vd.py
│ ├── common_functions.py
│ ├── heads
│ │ ├── __init__.py
│ │ ├── cls_head.py
│ │ ├── det_db_head.py
│ │ ├── det_east_head.py
│ │ ├── det_sast_head.py
│ │ ├── rec_attention_head.py
│ │ ├── rec_ctc_head.py
│ │ ├── rec_seq_encoder.py
│ │ ├── rec_srn_all_head.py
│ │ └── self_attention
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ ├── losses
│ │ ├── __init__.py
│ │ ├── cls_loss.py
│ │ ├── det_basic_loss.py
│ │ ├── det_db_loss.py
│ │ ├── det_east_loss.py
│ │ ├── det_sast_loss.py
│ │ ├── rec_attention_loss.py
│ │ ├── rec_ctc_loss.py
│ │ └── rec_srn_loss.py
│ └── stns
│ │ ├── __init__.py
│ │ └── tps.py
├── optimizer.py
├── postprocess
│ ├── __init__.py
│ ├── db_postprocess.py
│ ├── east_postprocess.py
│ ├── lanms
│ │ ├── .gitignore
│ │ ├── .ycm_extra_conf.py
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── adaptor.cpp
│ │ ├── include
│ │ │ ├── clipper
│ │ │ │ ├── clipper.cpp
│ │ │ │ └── clipper.hpp
│ │ │ └── pybind11
│ │ │ │ ├── attr.h
│ │ │ │ ├── buffer_info.h
│ │ │ │ ├── cast.h
│ │ │ │ ├── chrono.h
│ │ │ │ ├── class_support.h
│ │ │ │ ├── common.h
│ │ │ │ ├── complex.h
│ │ │ │ ├── descr.h
│ │ │ │ ├── eigen.h
│ │ │ │ ├── embed.h
│ │ │ │ ├── eval.h
│ │ │ │ ├── functional.h
│ │ │ │ ├── numpy.h
│ │ │ │ ├── operators.h
│ │ │ │ ├── options.h
│ │ │ │ ├── pybind11.h
│ │ │ │ ├── pytypes.h
│ │ │ │ ├── stl.h
│ │ │ │ ├── stl_bind.h
│ │ │ │ └── typeid.h
│ │ └── lanms.h
│ ├── locality_aware_nms.py
│ └── sast_postprocess.py
└── utils
│ ├── __init__.py
│ ├── character.py
│ ├── check.py
│ ├── corpus
│ ├── occitan_corpus.txt
│ ├── readme.md
│ └── readme_ch.md
│ ├── dict
│ ├── french_dict.txt
│ ├── german_dict.txt
│ ├── japan_dict.txt
│ ├── korean_dict.txt
│ └── occitan_dict.txt
│ ├── ic15_dict.txt
│ ├── ppocr_keys_v1.txt
│ ├── save_load.py
│ ├── stats.py
│ └── utility.py
├── predict_system.py
├── prod_deploy.sh
├── requirements.txt
├── test.py
├── tools
├── infer
│ ├── __init__.py
│ ├── predict_cls.py
│ ├── predict_det.py
│ ├── predict_rec.py
│ └── utility.py
└── logger.py
└── translate
├── API.py
├── Bing.py
├── Google.py
├── GoogleJS.js
├── Tencent.py
├── baidufanyi.py
└── webtrans.js
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 | .idea/
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | pip-wheel-metadata/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dango-OCR算法服务
2 |
3 | #### 说明:
4 | + [DangoOCR](https://github.com/zhangming8/Dango-ocr)是一个开源的文字识别工具,通过调用的本算法服务实现文字识别。
5 | + 本服务基于百度开源的[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR),如果要部署自己模型请在该项目中的develop分支训练。
6 | + 相关blog [使用飞桨一步步实现多语言OCR文字识别软件](https://blog.csdn.net/u010397980/article/details/111940566)
7 |
8 | #### 环境:
9 | + python>=3.6, paddlepaddle-gpu >= 1.8.5
10 |
11 | #### 训练模型:
12 | + 模型包括检测模型、识别模型。训练过程可以[参考文档](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/doc/doc_ch)
13 | + 检测模型用的是DBnet, 所有的识别模型都用的是CRNN
14 |
15 | #### 导出模型:
16 | + 训练模型导出为inference模型(导出后不必重新定义网络结构,便于部署),[参考](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/inference.md)
17 |
18 | #### 部署/启动服务
19 | + 修改config.py中的检测(det_model_dir)和识别模型(rec_model_dir)路径。其中不同语言的检测模型是共用的,识别模型需要单独训练。
20 | + 目前所有语言的识别都没有使用方向模型,所以所有语言的use_angle_cls=False
21 | + 执行./prod_deploy.sh启动服务。其中'reco_language'为检测模型后面并联的多个识别模型,'port'为算法服务的端口号
22 | + 启动成功会在当前目录创建'log'文件夹,里面会记录启动的日志便于调试代码
23 |
24 | #### 测试/调用服务
25 | + 执行python test.py会读取'demo'文件夹内的图片并调用本服务,同时把结果保存在"deom_result"文件夹,如下图为几个示例
26 |
27 |
28 | + 结果 1
29 |
30 |

31 |
32 |
33 | + 结果 2
34 |
35 |

36 |
37 |
38 | + 结果 3
39 |
40 |

41 |
42 |
43 | + 结果 4
44 |
45 |

46 |
47 |
48 | + 结果 5
49 |
50 |

51 |
52 |
53 | + 结果 6
54 |
55 |

56 |
57 |
58 | #### 参考:
59 | + OCR算法参考百度PaddleOCR: https://github.com/PaddlePaddle/PaddleOCR
60 | + 本OCR服务: https://github.com/zhangming8/ocr_algo_server
61 | + 最终OCR软件: https://github.com/zhangming8/Dango-ocr
62 |
--------------------------------------------------------------------------------
/demo/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg
--------------------------------------------------------------------------------
/demo/CH1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/CH1.jpg
--------------------------------------------------------------------------------
/demo/CH2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/CH2.jpg
--------------------------------------------------------------------------------
/demo/DangoOCR_ENG_2021-01-03_19-43-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/DangoOCR_ENG_2021-01-03_19-43-13.jpg
--------------------------------------------------------------------------------
/demo/ENG_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/ENG_2.jpg
--------------------------------------------------------------------------------
/demo/KOR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo/KOR.jpg
--------------------------------------------------------------------------------
/demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/8801_win32_JAP_2020-10-30-20-04-47-887566.jpg
--------------------------------------------------------------------------------
/demo_result/CH1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/CH1.jpg
--------------------------------------------------------------------------------
/demo_result/CH2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/CH2.jpg
--------------------------------------------------------------------------------
/demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/DangoOCR_ENG_2021-01-03_19-43-13.jpg
--------------------------------------------------------------------------------
/demo_result/ENG_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/ENG_2.jpg
--------------------------------------------------------------------------------
/demo_result/KOR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/demo_result/KOR.jpg
--------------------------------------------------------------------------------
/download_model.sh:
--------------------------------------------------------------------------------
1 |
2 | # download text detect model
3 | save_path="inference/det_db"
4 | mkdir -p $save_path
5 | cd $save_path
6 | wget https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar
7 | tar xvf ch_ppocr_server_v1.1_det_infer.tar
8 | cd -
9 |
10 | # download text recognize model
11 | save_path="inference/rec_crnn"
12 | mkdir -p $save_path
13 | cd $save_path
14 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar
15 | wget https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar
16 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar
17 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar
18 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar
19 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar
20 | wget https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar
21 | for file in `ls *.tar`
22 | do
23 | echo "unzip ${file}"
24 | tar xvf ${file}
25 | done
26 | cd -
27 | echo "download done"
28 |
--------------------------------------------------------------------------------
/font/CH_ENG.TTC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/CH_ENG.TTC
--------------------------------------------------------------------------------
/font/KOR.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/KOR.ttf
--------------------------------------------------------------------------------
/font/japan.ttc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/font/japan.ttc
--------------------------------------------------------------------------------
/ocr_server.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | import os
3 | import json
4 | import cv2
5 | import sys
6 | import traceback
7 | import argparse
8 | from flask import Flask, Response, request
9 | import datetime
10 | from queue import Queue
11 | import threading
12 | import multiprocessing
13 | import time
14 | import random
15 | from setproctitle import setproctitle
16 |
17 | from config import Config
18 | import tools.logger as logger_
19 | from tools.infer.utility import base64_to_cv2, mkdir
20 | from predict_system import OCR
21 | from translate.API import translate
22 |
23 | app = Flask("server", static_url_path='')
24 | app.config['PROPAGATE_EXCEPTIONS'] = True
25 | _save_image_q = Queue(1000)
26 |
27 | config = Config()
28 |
29 |
30 | @app.route("/dango/algo/ocr/server", methods=['POST', 'GET'])
31 | def ocr_server():
32 | try:
33 | logger.info("-" * 50)
34 | logger.info("端口 {} /dango/algo/ocr/server 收到请求".format(g_port))
35 |
36 | now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
37 | day = "-".join(now_time.split("-")[:3])
38 | # params = request.get_json()
39 | content = request.form
40 |
41 | images = content['image']
42 | language_type = content['language_type']
43 | user_id = content["user_id"]
44 | platform = content.get('platform', None)
45 | need_translate = content.get("translate", 'no')
46 |
47 | s1 = time.time()
48 | images_decode = [base64_to_cv2(images)]
49 | logger.info("收到: {}, {}, {}".format(user_id, platform, language_type))
50 |
51 | result = ocr.predict(language_type, images=images_decode)
52 | logger.info("识别结果为: {}, 是否需要翻译: {}".format(result, need_translate))
53 | save_basename = "{}/{}/{}_{}_{}_{}_{}".format(config.save_dir + "/" + g_port, day, g_port, platform, user_id,
54 | language_type, now_time)
55 | _save_image_q.put([save_basename, images_decode, result])
56 |
57 | translated = False
58 | response_data = {'result': result, 'translated': translated}
59 | if need_translate == 'yes':
60 | logger.info("开始进行翻译...")
61 | s3 = time.time()
62 | rand_idx = random.randint(0, len(config.baidu_translate_secret_key) - 1)
63 | fanyi_app_id = config.baidu_translate_app_id[rand_idx]
64 | fanyi_secret_key = config.baidu_translate_secret_key[rand_idx]
65 | translate_result, translated = translate(result[0], fanyi_app_id, fanyi_secret_key, logger)
66 | if translated:
67 | logger.info("翻译成功: {}, 结果为: {}".format(translated, translate_result))
68 | response_data['translate_result'] = translate_result
69 | response_data['translated'] = translated
70 | else:
71 | logger.info("翻译失败: {}, 错误码: {}".format(translated, translate_result))
72 | s4 = time.time()
73 | logger.info("翻译耗时: {}".format(s4 - s3))
74 |
75 | s2 = time.time()
76 | logger.info("==>> 完成, 总耗时 {} , 开始回复: {}".format(s2 - s1, response_data))
77 | return Response(json.dumps({'status': 0, 'data': response_data}),
78 | mimetype='application/json')
79 |
80 | except:
81 | e = traceback.format_exc()
82 | logger.info("错误")
83 | logger.error(e)
84 | return Response(json.dumps({'status': -1, 'data': 'None'}), mimetype='application/json')
85 |
86 |
87 | def save_img():
88 | while True:
89 | try:
90 | save_basename, image_cv2, words_result = _save_image_q.get(block=True)
91 | assert len(image_cv2) == len(words_result)
92 | for idx, img in enumerate(image_cv2):
93 | save_name = save_basename + "_" + str(idx) + ".jpg"
94 | mkdir(os.path.dirname(save_name))
95 | cv2.imwrite(save_name, img)
96 | with open(save_name.replace(".jpg", ".txt"), "w") as f:
97 | f.write(str(words_result[idx]))
98 | logger.info('保存图片 {} 及 txt'.format(save_name))
99 | except:
100 | e = traceback.format_exc()
101 | logger.info(e)
102 |
103 |
104 | def do_work(gpu, port):
105 | global logger, g_port, ocr
106 | try:
107 | os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu)
108 | logger = logger_.get_logger("./log/ocr_{}.log".format(port))
109 | g_port = port
110 | logger.info("===>>> 初始化模型到gpu:{}, port: {}".format(gpu, port))
111 | ocr = OCR(config, logger, language_list)
112 | logger.info("==>> 启动成功")
113 | app.run(host=config.host, port=port, threaded=True)
114 |
115 | except BaseException as e:
116 | logger.error('错误,启动flask异常{}'.format(e))
117 | logger.info(traceback.format_exc())
118 |
119 |
120 | if __name__ == '__main__':
121 | parser = argparse.ArgumentParser()
122 | parser.add_argument('--gpu', type=str, help='gpu index: 0_1_2_3', default="0")
123 | parser.add_argument('--port', type=str, help='server port: 8811_8812_8813', default="8811")
124 | parser.add_argument('--det', type=str, help='detection model', default="DB")
125 | parser.add_argument('--rec', type=str, help='recognize language model', default="ch,japan,en,korean")
126 | args = parser.parse_args()
127 |
128 | setproctitle('ocr_server_{}_{}'.format(args.port, args.rec))
129 |
130 | ports = args.port.split("_") # [args.port]
131 | gpus = args.gpu.split("_") # [args.gpu]
132 | language_list = args.rec.replace(" ", "").split(",")
133 | if len(gpus) == 1:
134 | gpus = gpus * len(ports)
135 |
136 | gpu_num = len(gpus)
137 | port_num = len(ports)
138 |
139 | if gpu_num != port_num:
140 | print('启动失败:GPU数量 != 端口数量!')
141 | sys.exit(1)
142 |
143 | threading.Thread(target=save_img, name="save img").start()
144 | do_work(gpu=gpus[0], port=ports[0])
145 |
146 | # pool = multiprocessing.Pool(processes=port_num)
147 | # for index in range(port_num):
148 | # pool.apply_async(do_work, (gpus[index], ports[index]))
149 | # pool.close()
150 | # pool.join()
151 | # save_img()
152 |
--------------------------------------------------------------------------------
/ppocr/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/data/cls/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/data/cls/dataset_traversal.py:
--------------------------------------------------------------------------------
1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os
16 | import sys
17 | import math
18 | import random
19 | import numpy as np
20 | import cv2
21 |
22 | from ppocr.utils.utility import initial_logger
23 | from ppocr.utils.utility import get_image_file_list
24 |
25 | logger = initial_logger()
26 |
27 | from ppocr.data.rec.img_tools import resize_norm_img, warp
28 | from ppocr.data.cls.randaugment import RandAugment
29 |
30 |
31 | def random_crop(img):
32 | img_h, img_w = img.shape[:2]
33 | if img_w > img_h * 4:
34 | w = random.randint(img_h * 2, img_w)
35 | i = random.randint(0, img_w - w)
36 |
37 | img = img[:, i:i + w, :]
38 | return img
39 |
40 |
41 | class SimpleReader(object):
42 | def __init__(self, params):
43 | if params['mode'] != 'train':
44 | self.num_workers = 1
45 | else:
46 | self.num_workers = params['num_workers']
47 | if params['mode'] != 'test':
48 | self.img_set_dir = params['img_set_dir']
49 | self.label_file_path = params['label_file_path']
50 | self.use_gpu = params['use_gpu']
51 | self.image_shape = params['image_shape']
52 | self.mode = params['mode']
53 | self.infer_img = params['infer_img']
54 | self.use_distort = params['mode'] == 'train' and params['distort']
55 | self.randaug = RandAugment()
56 | self.label_list = params['label_list']
57 | if "distort" in params:
58 | self.use_distort = params['distort'] and params['use_gpu']
59 | if not params['use_gpu']:
60 | logger.info(
61 | "Distort operation can only support in GPU.Distort will be set to False."
62 | )
63 | if params['mode'] == 'train':
64 | self.batch_size = params['train_batch_size_per_card']
65 | self.drop_last = True
66 | else:
67 | self.batch_size = params['test_batch_size_per_card']
68 | self.drop_last = False
69 | self.use_distort = False
70 |
71 | def __call__(self, process_id):
72 | if self.mode != 'train':
73 | process_id = 0
74 |
75 | def get_device_num():
76 | if self.use_gpu:
77 | gpus = os.environ.get("CUDA_VISIBLE_DEVICES", "1")
78 | gpu_num = len(gpus.split(','))
79 | return gpu_num
80 | else:
81 | cpu_num = os.environ.get("CPU_NUM", 1)
82 | return int(cpu_num)
83 |
84 | def sample_iter_reader():
85 | if self.mode != 'train' and self.infer_img is not None:
86 | image_file_list = get_image_file_list(self.infer_img)
87 | for single_img in image_file_list:
88 | img = cv2.imread(single_img)
89 | if img.shape[-1] == 1 or len(list(img.shape)) == 2:
90 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
91 | norm_img = resize_norm_img(img, self.image_shape)
92 |
93 | norm_img = norm_img[np.newaxis, :]
94 | yield norm_img
95 | else:
96 | with open(self.label_file_path, "rb") as fin:
97 | label_infor_list = fin.readlines()
98 | img_num = len(label_infor_list)
99 | img_id_list = list(range(img_num))
100 | random.shuffle(img_id_list)
101 | if sys.platform == "win32" and self.num_workers != 1:
102 | print("multiprocess is not fully compatible with Windows."
103 | "num_workers will be 1.")
104 | self.num_workers = 1
105 | if self.batch_size * get_device_num(
106 | ) * self.num_workers > img_num:
107 | raise Exception(
108 | "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
109 | format(img_num, self.batch_size * get_device_num() *
110 | self.num_workers))
111 | for img_id in range(process_id, img_num, self.num_workers):
112 | label_infor = label_infor_list[img_id_list[img_id]]
113 | substr = label_infor.decode('utf-8').strip("\n").split("\t")
114 | label = self.label_list.index(substr[1])
115 |
116 | img_path = self.img_set_dir + "/" + substr[0]
117 | img = cv2.imread(img_path)
118 | if img is None:
119 | logger.info("{} does not exist!".format(img_path))
120 | continue
121 | if img.shape[-1] == 1 or len(list(img.shape)) == 2:
122 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
123 |
124 | if self.use_distort:
125 | img = warp(img, 10)
126 | img = self.randaug(img)
127 | norm_img = resize_norm_img(img, self.image_shape)
128 | norm_img = norm_img[np.newaxis, :]
129 | yield (norm_img, label)
130 |
131 | def batch_iter_reader():
132 | batch_outs = []
133 | for outs in sample_iter_reader():
134 | batch_outs.append(outs)
135 | if len(batch_outs) == self.batch_size:
136 | yield batch_outs
137 | batch_outs = []
138 | if not self.drop_last:
139 | if len(batch_outs) != 0:
140 | yield batch_outs
141 |
142 | if self.infer_img is None:
143 | return batch_iter_reader
144 | return sample_iter_reader
145 |
--------------------------------------------------------------------------------
/ppocr/data/cls/randaugment.py:
--------------------------------------------------------------------------------
1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 |
20 | from PIL import Image, ImageEnhance, ImageOps
21 | import numpy as np
22 | import random
23 | import six
24 |
25 |
26 | class RawRandAugment(object):
27 | def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)):
28 | self.num_layers = num_layers
29 | self.magnitude = magnitude
30 | self.max_level = 10
31 |
32 | abso_level = self.magnitude / self.max_level
33 | self.level_map = {
34 | "shearX": 0.3 * abso_level,
35 | "shearY": 0.3 * abso_level,
36 | "translateX": 150.0 / 331 * abso_level,
37 | "translateY": 150.0 / 331 * abso_level,
38 | "rotate": 30 * abso_level,
39 | "color": 0.9 * abso_level,
40 | "posterize": int(4.0 * abso_level),
41 | "solarize": 256.0 * abso_level,
42 | "contrast": 0.9 * abso_level,
43 | "sharpness": 0.9 * abso_level,
44 | "brightness": 0.9 * abso_level,
45 | "autocontrast": 0,
46 | "equalize": 0,
47 | "invert": 0
48 | }
49 |
50 | # from https://stackoverflow.com/questions/5252170/
51 | # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
52 | def rotate_with_fill(img, magnitude):
53 | rot = img.convert("RGBA").rotate(magnitude)
54 | return Image.composite(rot,
55 | Image.new("RGBA", rot.size, (128, ) * 4),
56 | rot).convert(img.mode)
57 |
58 | rnd_ch_op = random.choice
59 |
60 | self.func = {
61 | "shearX": lambda img, magnitude: img.transform(
62 | img.size,
63 | Image.AFFINE,
64 | (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0),
65 | Image.BICUBIC,
66 | fillcolor=fillcolor),
67 | "shearY": lambda img, magnitude: img.transform(
68 | img.size,
69 | Image.AFFINE,
70 | (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0),
71 | Image.BICUBIC,
72 | fillcolor=fillcolor),
73 | "translateX": lambda img, magnitude: img.transform(
74 | img.size,
75 | Image.AFFINE,
76 | (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0),
77 | fillcolor=fillcolor),
78 | "translateY": lambda img, magnitude: img.transform(
79 | img.size,
80 | Image.AFFINE,
81 | (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])),
82 | fillcolor=fillcolor),
83 | "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
84 | "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(
85 | 1 + magnitude * rnd_ch_op([-1, 1])),
86 | "posterize": lambda img, magnitude:
87 | ImageOps.posterize(img, magnitude),
88 | "solarize": lambda img, magnitude:
89 | ImageOps.solarize(img, magnitude),
90 | "contrast": lambda img, magnitude:
91 | ImageEnhance.Contrast(img).enhance(
92 | 1 + magnitude * rnd_ch_op([-1, 1])),
93 | "sharpness": lambda img, magnitude:
94 | ImageEnhance.Sharpness(img).enhance(
95 | 1 + magnitude * rnd_ch_op([-1, 1])),
96 | "brightness": lambda img, magnitude:
97 | ImageEnhance.Brightness(img).enhance(
98 | 1 + magnitude * rnd_ch_op([-1, 1])),
99 | "autocontrast": lambda img, magnitude:
100 | ImageOps.autocontrast(img),
101 | "equalize": lambda img, magnitude: ImageOps.equalize(img),
102 | "invert": lambda img, magnitude: ImageOps.invert(img)
103 | }
104 |
105 | def __call__(self, img):
106 | avaiable_op_names = list(self.level_map.keys())
107 | for layer_num in range(self.num_layers):
108 | op_name = np.random.choice(avaiable_op_names)
109 | img = self.func[op_name](img, self.level_map[op_name])
110 | return img
111 |
112 |
113 | class RandAugment(RawRandAugment):
114 | """ RandAugment wrapper to auto fit different img types """
115 |
116 | def __init__(self, *args, **kwargs):
117 | if six.PY2:
118 | super(RandAugment, self).__init__(*args, **kwargs)
119 | else:
120 | super().__init__(*args, **kwargs)
121 |
122 | def __call__(self, img):
123 | if not isinstance(img, Image.Image):
124 | img = np.ascontiguousarray(img)
125 | img = Image.fromarray(img)
126 |
127 | if six.PY2:
128 | img = super(RandAugment, self).__call__(img)
129 | else:
130 | img = super().__call__(img)
131 |
132 | if isinstance(img, Image.Image):
133 | img = np.asarray(img)
134 |
135 | return img
136 |
--------------------------------------------------------------------------------
/ppocr/data/det/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/data/det/__init__.py
--------------------------------------------------------------------------------
/ppocr/data/det/data_augment.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 | from __future__ import unicode_literals
7 |
8 | import numpy as np
9 | import random
10 | import cv2
11 | import math
12 |
13 | import imgaug
14 | import imgaug.augmenters as iaa
15 |
16 |
17 | def AugmentData(data):
18 | img = data['image']
19 | shape = img.shape
20 |
21 | aug = iaa.Sequential(
22 | [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
23 | (0.5, 3))]).to_deterministic()
24 |
25 | def may_augment_annotation(aug, data, shape):
26 | if aug is None:
27 | return data
28 |
29 | line_polys = []
30 | for poly in data['polys']:
31 | new_poly = may_augment_poly(aug, shape, poly)
32 | line_polys.append(new_poly)
33 | data['polys'] = np.array(line_polys)
34 | return data
35 |
36 | def may_augment_poly(aug, img_shape, poly):
37 | keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
38 | keypoints = aug.augment_keypoints(
39 | [imgaug.KeypointsOnImage(
40 | keypoints, shape=img_shape)])[0].keypoints
41 | poly = [(p.x, p.y) for p in keypoints]
42 | return poly
43 |
44 | img_aug = aug.augment_image(img)
45 | data['image'] = img_aug
46 | data = may_augment_annotation(aug, data, shape)
47 | return data
48 |
--------------------------------------------------------------------------------
/ppocr/data/det/make_border_map.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 | from __future__ import unicode_literals
7 |
8 | import numpy as np
9 | import cv2
10 | np.seterr(divide='ignore', invalid='ignore')
11 | import pyclipper
12 | from shapely.geometry import Polygon
13 | import sys
14 | import warnings
15 | warnings.simplefilter("ignore")
16 |
17 |
18 | def draw_border_map(polygon, canvas, mask, shrink_ratio):
19 | polygon = np.array(polygon)
20 | assert polygon.ndim == 2
21 | assert polygon.shape[1] == 2
22 |
23 | polygon_shape = Polygon(polygon)
24 | if polygon_shape.area <= 0:
25 | return
26 | distance = polygon_shape.area * (
27 | 1 - np.power(shrink_ratio, 2)) / polygon_shape.length
28 | subject = [tuple(l) for l in polygon]
29 | padding = pyclipper.PyclipperOffset()
30 | padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
31 |
32 | padded_polygon = np.array(padding.Execute(distance)[0])
33 | cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
34 |
35 | xmin = padded_polygon[:, 0].min()
36 | xmax = padded_polygon[:, 0].max()
37 | ymin = padded_polygon[:, 1].min()
38 | ymax = padded_polygon[:, 1].max()
39 | width = xmax - xmin + 1
40 | height = ymax - ymin + 1
41 |
42 | polygon[:, 0] = polygon[:, 0] - xmin
43 | polygon[:, 1] = polygon[:, 1] - ymin
44 |
45 | xs = np.broadcast_to(
46 | np.linspace(
47 | 0, width - 1, num=width).reshape(1, width), (height, width))
48 | ys = np.broadcast_to(
49 | np.linspace(
50 | 0, height - 1, num=height).reshape(height, 1), (height, width))
51 |
52 | distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
53 | for i in range(polygon.shape[0]):
54 | j = (i + 1) % polygon.shape[0]
55 | absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
56 | distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
57 | distance_map = distance_map.min(axis=0)
58 |
59 | xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
60 | xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
61 | ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
62 | ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
63 | canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
64 | 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
65 | xmin_valid - xmin:xmax_valid - xmax + width],
66 | canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
67 |
68 |
69 | def _distance(xs, ys, point_1, point_2):
70 | '''
71 | compute the distance from point to a line
72 | ys: coordinates in the first axis
73 | xs: coordinates in the second axis
74 | point_1, point_2: (x, y), the end of the line
75 | '''
76 | height, width = xs.shape[:2]
77 | square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
78 | square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
79 | square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
80 | 1] - point_2[1])
81 |
82 | cosin = (square_distance - square_distance_1 - square_distance_2) / (
83 | 2 * np.sqrt(square_distance_1 * square_distance_2))
84 | square_sin = 1 - np.square(cosin)
85 | square_sin = np.nan_to_num(square_sin)
86 | result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
87 | square_distance)
88 |
89 | result[cosin <
90 | 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
91 | 0]
92 | # self.extend_line(point_1, point_2, result)
93 | return result
94 |
95 |
96 | def extend_line(point_1, point_2, result, shrink_ratio):
97 | ex_point_1 = (
98 | int(
99 | round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
100 | int(
101 | round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
102 | cv2.line(
103 | result,
104 | tuple(ex_point_1),
105 | tuple(point_1),
106 | 4096.0,
107 | 1,
108 | lineType=cv2.LINE_AA,
109 | shift=0)
110 | ex_point_2 = (
111 | int(
112 | round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
113 | int(
114 | round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
115 | cv2.line(
116 | result,
117 | tuple(ex_point_2),
118 | tuple(point_2),
119 | 4096.0,
120 | 1,
121 | lineType=cv2.LINE_AA,
122 | shift=0)
123 | return ex_point_1, ex_point_2
124 |
125 |
126 | def MakeBorderMap(data):
127 | shrink_ratio = 0.4
128 | thresh_min = 0.3
129 | thresh_max = 0.7
130 |
131 | im = data['image']
132 | text_polys = data['polys']
133 | ignore_tags = data['ignore_tags']
134 |
135 | canvas = np.zeros(im.shape[:2], dtype=np.float32)
136 | mask = np.zeros(im.shape[:2], dtype=np.float32)
137 |
138 | for i in range(len(text_polys)):
139 | if ignore_tags[i]:
140 | continue
141 | draw_border_map(
142 | text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
143 | canvas = canvas * (thresh_max - thresh_min) + thresh_min
144 |
145 | data['threshold_map'] = canvas
146 | data['threshold_mask'] = mask
147 | return data
148 |
--------------------------------------------------------------------------------
/ppocr/data/det/make_shrink_map.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 | from __future__ import unicode_literals
7 |
8 | import numpy as np
9 | import cv2
10 | from shapely.geometry import Polygon
11 | import pyclipper
12 |
13 |
14 | def validate_polygons(polygons, ignore_tags, h, w):
15 | '''
16 | polygons (numpy.array, required): of shape (num_instances, num_points, 2)
17 | '''
18 | if len(polygons) == 0:
19 | return polygons, ignore_tags
20 | assert len(polygons) == len(ignore_tags)
21 | for polygon in polygons:
22 | polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
23 | polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
24 |
25 | for i in range(len(polygons)):
26 | area = polygon_area(polygons[i])
27 | if abs(area) < 1:
28 | ignore_tags[i] = True
29 | if area > 0:
30 | polygons[i] = polygons[i][::-1, :]
31 | return polygons, ignore_tags
32 |
33 |
34 | def polygon_area(polygon):
35 | edge = 0
36 | for i in range(polygon.shape[0]):
37 | next_index = (i + 1) % polygon.shape[0]
38 | edge += (polygon[next_index, 0] - polygon[i, 0]) * (
39 | polygon[next_index, 1] - polygon[i, 1])
40 |
41 | return edge / 2.
42 |
43 |
44 | def MakeShrinkMap(data):
45 | min_text_size = 8
46 | shrink_ratio = 0.4
47 |
48 | image = data['image']
49 | text_polys = data['polys']
50 | ignore_tags = data['ignore_tags']
51 |
52 | h, w = image.shape[:2]
53 | text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
54 | gt = np.zeros((h, w), dtype=np.float32)
55 | # gt = np.zeros((1, h, w), dtype=np.float32)
56 | mask = np.ones((h, w), dtype=np.float32)
57 | for i in range(len(text_polys)):
58 | polygon = text_polys[i]
59 | height = max(polygon[:, 1]) - min(polygon[:, 1])
60 | width = max(polygon[:, 0]) - min(polygon[:, 0])
61 | # height = min(np.linalg.norm(polygon[0] - polygon[3]),
62 | # np.linalg.norm(polygon[1] - polygon[2]))
63 | # width = min(np.linalg.norm(polygon[0] - polygon[1]),
64 | # np.linalg.norm(polygon[2] - polygon[3]))
65 | if ignore_tags[i] or min(height, width) < min_text_size:
66 | cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
67 | ignore_tags[i] = True
68 | else:
69 | polygon_shape = Polygon(polygon)
70 | distance = polygon_shape.area * (
71 | 1 - np.power(shrink_ratio, 2)) / polygon_shape.length
72 | subject = [tuple(l) for l in text_polys[i]]
73 | padding = pyclipper.PyclipperOffset()
74 | padding.AddPath(subject, pyclipper.JT_ROUND,
75 | pyclipper.ET_CLOSEDPOLYGON)
76 | shrinked = padding.Execute(-distance)
77 | if shrinked == []:
78 | cv2.fillPoly(mask,
79 | polygon.astype(np.int32)[np.newaxis, :, :], 0)
80 | ignore_tags[i] = True
81 | continue
82 | shrinked = np.array(shrinked[0]).reshape(-1, 2)
83 | cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
84 | # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
85 |
86 | data['shrink_map'] = gt
87 | data['shrink_mask'] = mask
88 | return data
89 |
--------------------------------------------------------------------------------
/ppocr/data/det/random_crop_data.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 | from __future__ import unicode_literals
7 |
8 | import numpy as np
9 | import cv2
10 | import random
11 |
12 |
13 | def is_poly_in_rect(poly, x, y, w, h):
14 | poly = np.array(poly)
15 | if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
16 | return False
17 | if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
18 | return False
19 | return True
20 |
21 |
22 | def is_poly_outside_rect(poly, x, y, w, h):
23 | poly = np.array(poly)
24 | if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
25 | return True
26 | if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
27 | return True
28 | return False
29 |
30 |
31 | def split_regions(axis):
32 | regions = []
33 | min_axis = 0
34 | for i in range(1, axis.shape[0]):
35 | if axis[i] != axis[i - 1] + 1:
36 | region = axis[min_axis:i]
37 | min_axis = i
38 | regions.append(region)
39 | return regions
40 |
41 |
42 | def random_select(axis, max_size):
43 | xx = np.random.choice(axis, size=2)
44 | xmin = np.min(xx)
45 | xmax = np.max(xx)
46 | xmin = np.clip(xmin, 0, max_size - 1)
47 | xmax = np.clip(xmax, 0, max_size - 1)
48 | return xmin, xmax
49 |
50 |
51 | def region_wise_random_select(regions, max_size):
52 | selected_index = list(np.random.choice(len(regions), 2))
53 | selected_values = []
54 | for index in selected_index:
55 | axis = regions[index]
56 | xx = int(np.random.choice(axis, size=1))
57 | selected_values.append(xx)
58 | xmin = min(selected_values)
59 | xmax = max(selected_values)
60 | return xmin, xmax
61 |
62 |
63 | def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
64 | h, w, _ = im.shape
65 | h_array = np.zeros(h, dtype=np.int32)
66 | w_array = np.zeros(w, dtype=np.int32)
67 | for points in text_polys:
68 | points = np.round(points, decimals=0).astype(np.int32)
69 | minx = np.min(points[:, 0])
70 | maxx = np.max(points[:, 0])
71 | w_array[minx:maxx] = 1
72 | miny = np.min(points[:, 1])
73 | maxy = np.max(points[:, 1])
74 | h_array[miny:maxy] = 1
75 | # ensure the cropped area not across a text
76 | h_axis = np.where(h_array == 0)[0]
77 | w_axis = np.where(w_array == 0)[0]
78 |
79 | if len(h_axis) == 0 or len(w_axis) == 0:
80 | return 0, 0, w, h
81 |
82 | h_regions = split_regions(h_axis)
83 | w_regions = split_regions(w_axis)
84 |
85 | for i in range(max_tries):
86 | if len(w_regions) > 1:
87 | xmin, xmax = region_wise_random_select(w_regions, w)
88 | else:
89 | xmin, xmax = random_select(w_axis, w)
90 | if len(h_regions) > 1:
91 | ymin, ymax = region_wise_random_select(h_regions, h)
92 | else:
93 | ymin, ymax = random_select(h_axis, h)
94 |
95 | if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
96 | # area too small
97 | continue
98 | num_poly_in_rect = 0
99 | for poly in text_polys:
100 | if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
101 | ymax - ymin):
102 | num_poly_in_rect += 1
103 | break
104 |
105 | if num_poly_in_rect > 0:
106 | return xmin, ymin, xmax - xmin, ymax - ymin
107 |
108 | return 0, 0, w, h
109 |
110 |
111 | def RandomCropData(data, size):
112 | max_tries = 10
113 | min_crop_side_ratio = 0.1
114 | require_original_image = False
115 | keep_ratio = True
116 |
117 | im = data['image']
118 | text_polys = data['polys']
119 | ignore_tags = data['ignore_tags']
120 | texts = data['texts']
121 | all_care_polys = [
122 | text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
123 | ]
124 | crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
125 | min_crop_side_ratio, max_tries)
126 | dh, dw = size
127 | scale_w = dw / crop_w
128 | scale_h = dh / crop_h
129 | scale = min(scale_w, scale_h)
130 | h = int(crop_h * scale)
131 | w = int(crop_w * scale)
132 | if keep_ratio:
133 | padimg = np.zeros((dh, dw, im.shape[2]), im.dtype)
134 | padimg[:h, :w] = cv2.resize(
135 | im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
136 | img = padimg
137 | else:
138 | img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
139 | (dw, dh))
140 | text_polys_crop = []
141 | ignore_tags_crop = []
142 | texts_crop = []
143 | for poly, text, tag in zip(text_polys, texts, ignore_tags):
144 | poly = ((poly - (crop_x, crop_y)) * scale).tolist()
145 | if not is_poly_outside_rect(poly, 0, 0, w, h):
146 | text_polys_crop.append(poly)
147 | ignore_tags_crop.append(tag)
148 | texts_crop.append(text)
149 | data['image'] = img
150 | data['polys'] = np.array(text_polys_crop)
151 | data['ignore_tags'] = ignore_tags_crop
152 | data['texts'] = texts_crop
153 | return data
154 |
--------------------------------------------------------------------------------
/ppocr/data/reader_main.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | import os
16 | import random
17 | import numpy as np
18 |
19 | import paddle
20 | from ppocr.utils.utility import create_module
21 | from copy import deepcopy
22 |
23 | from .rec.img_tools import process_image
24 | import cv2
25 |
26 | import sys
27 | import signal
28 |
29 |
30 | # handle terminate reader process, do not print stack frame
31 | def _reader_quit(signum, frame):
32 | print("Reader process exit.")
33 | sys.exit()
34 |
35 |
36 | def _term_group(sig_num, frame):
37 | print('pid {} terminated, terminate group '
38 | '{}...'.format(os.getpid(), os.getpgrp()))
39 | os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
40 |
41 |
42 | signal.signal(signal.SIGTERM, _reader_quit)
43 | signal.signal(signal.SIGINT, _term_group)
44 |
45 |
46 | def reader_main(config=None, mode=None):
47 | """Create a reader for trainning
48 |
49 | Args:
50 | settings: arguments
51 |
52 | Returns:
53 | train reader
54 | """
55 | assert mode in ["train", "eval", "test"],\
56 | "Nonsupport mode:{}".format(mode)
57 | global_params = config['Global']
58 | if mode == "train":
59 | params = deepcopy(config['TrainReader'])
60 | elif mode == "eval":
61 | params = deepcopy(config['EvalReader'])
62 | else:
63 | params = deepcopy(config['TestReader'])
64 | params['mode'] = mode
65 | params.update(global_params)
66 | reader_function = params['reader_function']
67 | function = create_module(reader_function)(params)
68 | if mode == "train":
69 | if sys.platform == "win32":
70 | return function(0)
71 | readers = []
72 | num_workers = params['num_workers']
73 | for process_id in range(num_workers):
74 | readers.append(function(process_id))
75 | return paddle.reader.multiprocess_reader(readers, False)
76 | else:
77 | return function(mode)
78 |
--------------------------------------------------------------------------------
/ppocr/data/rec/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/data/rec/text_image_aug/augment.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: RubanSeven
3 | # Reference: https://github.com/RubanSeven/Text-Image-Augmentation-python
4 |
5 | # import cv2
6 | import numpy as np
7 | from .warp_mls import WarpMLS
8 |
9 |
10 | def tia_distort(src, segment=4):
11 | img_h, img_w = src.shape[:2]
12 |
13 | cut = img_w // segment
14 | thresh = cut // 3
15 |
16 | src_pts = list()
17 | dst_pts = list()
18 |
19 | src_pts.append([0, 0])
20 | src_pts.append([img_w, 0])
21 | src_pts.append([img_w, img_h])
22 | src_pts.append([0, img_h])
23 |
24 | dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
25 | dst_pts.append(
26 | [img_w - np.random.randint(thresh), np.random.randint(thresh)])
27 | dst_pts.append(
28 | [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
29 | dst_pts.append(
30 | [np.random.randint(thresh), img_h - np.random.randint(thresh)])
31 |
32 | half_thresh = thresh * 0.5
33 |
34 | for cut_idx in np.arange(1, segment, 1):
35 | src_pts.append([cut * cut_idx, 0])
36 | src_pts.append([cut * cut_idx, img_h])
37 | dst_pts.append([
38 | cut * cut_idx + np.random.randint(thresh) - half_thresh,
39 | np.random.randint(thresh) - half_thresh
40 | ])
41 | dst_pts.append([
42 | cut * cut_idx + np.random.randint(thresh) - half_thresh,
43 | img_h + np.random.randint(thresh) - half_thresh
44 | ])
45 |
46 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
47 | dst = trans.generate()
48 |
49 | return dst
50 |
51 |
52 | def tia_stretch(src, segment=4):
53 | img_h, img_w = src.shape[:2]
54 |
55 | cut = img_w // segment
56 | thresh = cut * 4 // 5
57 |
58 | src_pts = list()
59 | dst_pts = list()
60 |
61 | src_pts.append([0, 0])
62 | src_pts.append([img_w, 0])
63 | src_pts.append([img_w, img_h])
64 | src_pts.append([0, img_h])
65 |
66 | dst_pts.append([0, 0])
67 | dst_pts.append([img_w, 0])
68 | dst_pts.append([img_w, img_h])
69 | dst_pts.append([0, img_h])
70 |
71 | half_thresh = thresh * 0.5
72 |
73 | for cut_idx in np.arange(1, segment, 1):
74 | move = np.random.randint(thresh) - half_thresh
75 | src_pts.append([cut * cut_idx, 0])
76 | src_pts.append([cut * cut_idx, img_h])
77 | dst_pts.append([cut * cut_idx + move, 0])
78 | dst_pts.append([cut * cut_idx + move, img_h])
79 |
80 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
81 | dst = trans.generate()
82 |
83 | return dst
84 |
85 |
86 | def tia_perspective(src):
87 | img_h, img_w = src.shape[:2]
88 |
89 | thresh = img_h // 2
90 |
91 | src_pts = list()
92 | dst_pts = list()
93 |
94 | src_pts.append([0, 0])
95 | src_pts.append([img_w, 0])
96 | src_pts.append([img_w, img_h])
97 | src_pts.append([0, img_h])
98 |
99 | dst_pts.append([0, np.random.randint(thresh)])
100 | dst_pts.append([img_w, np.random.randint(thresh)])
101 | dst_pts.append([img_w, img_h - np.random.randint(thresh)])
102 | dst_pts.append([0, img_h - np.random.randint(thresh)])
103 |
104 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
105 | dst = trans.generate()
106 |
107 | return dst
108 |
--------------------------------------------------------------------------------
/ppocr/data/rec/text_image_aug/warp_mls.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: RubanSeven
3 | # Reference: https://github.com/RubanSeven/Text-Image-Augmentation-python
4 | import math
5 | import numpy as np
6 |
7 |
8 | class WarpMLS:
9 | def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.):
10 | self.src = src
11 | self.src_pts = src_pts
12 | self.dst_pts = dst_pts
13 | self.pt_count = len(self.dst_pts)
14 | self.dst_w = dst_w
15 | self.dst_h = dst_h
16 | self.trans_ratio = trans_ratio
17 | self.grid_size = 100
18 | self.rdx = np.zeros((self.dst_h, self.dst_w))
19 | self.rdy = np.zeros((self.dst_h, self.dst_w))
20 |
21 | @staticmethod
22 | def __bilinear_interp(x, y, v11, v12, v21, v22):
23 | return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 *
24 | (1 - y) + v22 * y) * x
25 |
26 | def generate(self):
27 | self.calc_delta()
28 | return self.gen_img()
29 |
30 | def calc_delta(self):
31 | w = np.zeros(self.pt_count, dtype=np.float32)
32 |
33 | if self.pt_count < 2:
34 | return
35 |
36 | i = 0
37 | while 1:
38 | if self.dst_w <= i < self.dst_w + self.grid_size - 1:
39 | i = self.dst_w - 1
40 | elif i >= self.dst_w:
41 | break
42 |
43 | j = 0
44 | while 1:
45 | if self.dst_h <= j < self.dst_h + self.grid_size - 1:
46 | j = self.dst_h - 1
47 | elif j >= self.dst_h:
48 | break
49 |
50 | sw = 0
51 | swp = np.zeros(2, dtype=np.float32)
52 | swq = np.zeros(2, dtype=np.float32)
53 | new_pt = np.zeros(2, dtype=np.float32)
54 | cur_pt = np.array([i, j], dtype=np.float32)
55 |
56 | k = 0
57 | for k in range(self.pt_count):
58 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
59 | break
60 |
61 | w[k] = 1. / (
62 | (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) +
63 | (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1]))
64 |
65 | sw += w[k]
66 | swp = swp + w[k] * np.array(self.dst_pts[k])
67 | swq = swq + w[k] * np.array(self.src_pts[k])
68 |
69 | if k == self.pt_count - 1:
70 | pstar = 1 / sw * swp
71 | qstar = 1 / sw * swq
72 |
73 | miu_s = 0
74 | for k in range(self.pt_count):
75 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
76 | continue
77 | pt_i = self.dst_pts[k] - pstar
78 | miu_s += w[k] * np.sum(pt_i * pt_i)
79 |
80 | cur_pt -= pstar
81 | cur_pt_j = np.array([-cur_pt[1], cur_pt[0]])
82 |
83 | for k in range(self.pt_count):
84 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
85 | continue
86 |
87 | pt_i = self.dst_pts[k] - pstar
88 | pt_j = np.array([-pt_i[1], pt_i[0]])
89 |
90 | tmp_pt = np.zeros(2, dtype=np.float32)
91 | tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \
92 | np.sum(pt_j * cur_pt) * self.src_pts[k][1]
93 | tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \
94 | np.sum(pt_j * cur_pt_j) * self.src_pts[k][1]
95 | tmp_pt *= (w[k] / miu_s)
96 | new_pt += tmp_pt
97 |
98 | new_pt += qstar
99 | else:
100 | new_pt = self.src_pts[k]
101 |
102 | self.rdx[j, i] = new_pt[0] - i
103 | self.rdy[j, i] = new_pt[1] - j
104 |
105 | j += self.grid_size
106 | i += self.grid_size
107 |
108 | def gen_img(self):
109 | src_h, src_w = self.src.shape[:2]
110 | dst = np.zeros_like(self.src, dtype=np.float32)
111 |
112 | for i in np.arange(0, self.dst_h, self.grid_size):
113 | for j in np.arange(0, self.dst_w, self.grid_size):
114 | ni = i + self.grid_size
115 | nj = j + self.grid_size
116 | w = h = self.grid_size
117 | if ni >= self.dst_h:
118 | ni = self.dst_h - 1
119 | h = ni - i + 1
120 | if nj >= self.dst_w:
121 | nj = self.dst_w - 1
122 | w = nj - j + 1
123 |
124 | di = np.reshape(np.arange(h), (-1, 1))
125 | dj = np.reshape(np.arange(w), (1, -1))
126 | delta_x = self.__bilinear_interp(
127 | di / h, dj / w, self.rdx[i, j], self.rdx[i, nj],
128 | self.rdx[ni, j], self.rdx[ni, nj])
129 | delta_y = self.__bilinear_interp(
130 | di / h, dj / w, self.rdy[i, j], self.rdy[i, nj],
131 | self.rdy[ni, j], self.rdy[ni, nj])
132 | nx = j + dj + delta_x * self.trans_ratio
133 | ny = i + di + delta_y * self.trans_ratio
134 | nx = np.clip(nx, 0, src_w - 1)
135 | ny = np.clip(ny, 0, src_h - 1)
136 | nxi = np.array(np.floor(nx), dtype=np.int32)
137 | nyi = np.array(np.floor(ny), dtype=np.int32)
138 | nxi1 = np.array(np.ceil(nx), dtype=np.int32)
139 | nyi1 = np.array(np.ceil(ny), dtype=np.int32)
140 |
141 | if len(self.src.shape) == 3:
142 | x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3))
143 | y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3))
144 | else:
145 | x = ny - nyi
146 | y = nx - nxi
147 | dst[i:i + h, j:j + w] = self.__bilinear_interp(
148 | x, y, self.src[nyi, nxi], self.src[nyi, nxi1],
149 | self.src[nyi1, nxi], self.src[nyi1, nxi1])
150 |
151 | dst = np.clip(dst, 0, 255)
152 | dst = np.array(dst, dtype=np.uint8)
153 |
154 | return dst
155 |
--------------------------------------------------------------------------------
/ppocr/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/modeling/architectures/cls_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | from paddle import fluid
20 |
21 | from ppocr.utils.utility import create_module
22 | from ppocr.utils.utility import initial_logger
23 |
24 | logger = initial_logger()
25 | from copy import deepcopy
26 |
27 |
28 | class ClsModel(object):
29 | def __init__(self, params):
30 | super(ClsModel, self).__init__()
31 | global_params = params['Global']
32 | self.infer_img = global_params['infer_img']
33 |
34 | backbone_params = deepcopy(params["Backbone"])
35 | backbone_params.update(global_params)
36 | self.backbone = create_module(backbone_params['function']) \
37 | (params=backbone_params)
38 |
39 | head_params = deepcopy(params["Head"])
40 | head_params.update(global_params)
41 | self.head = create_module(head_params['function']) \
42 | (params=head_params)
43 |
44 | loss_params = deepcopy(params["Loss"])
45 | loss_params.update(global_params)
46 | self.loss = create_module(loss_params['function']) \
47 | (params=loss_params)
48 |
49 | self.image_shape = global_params['image_shape']
50 |
51 | def create_feed(self, mode):
52 | image_shape = deepcopy(self.image_shape)
53 | image_shape.insert(0, -1)
54 | if mode == "train":
55 | image = fluid.data(name='image', shape=image_shape, dtype='float32')
56 | label = fluid.data(name='label', shape=[None, 1], dtype='int64')
57 | feed_list = [image, label]
58 | labels = {'label': label}
59 | loader = fluid.io.DataLoader.from_generator(
60 | feed_list=feed_list,
61 | capacity=64,
62 | use_double_buffer=True,
63 | iterable=False)
64 | else:
65 | labels = None
66 | loader = None
67 | image = fluid.data(name='image', shape=image_shape, dtype='float32')
68 | image.stop_gradient = False
69 | return image, labels, loader
70 |
71 | def __call__(self, mode):
72 | image, labels, loader = self.create_feed(mode)
73 | inputs = image
74 | conv_feas = self.backbone(inputs)
75 | predicts = self.head(conv_feas, labels, mode)
76 | if mode == "train":
77 | loss = self.loss(predicts, labels)
78 | label = labels['label']
79 | acc = fluid.layers.accuracy(predicts['predict'], label, k=1)
80 | outputs = {'total_loss': loss, 'decoded_out': \
81 | predicts['decoded_out'], 'label': label, 'acc': acc}
82 | return loader, outputs
83 | elif mode == "export":
84 | return [image, predicts]
85 | else:
86 | return loader, predicts
87 |
--------------------------------------------------------------------------------
/ppocr/modeling/architectures/det_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | from paddle import fluid
20 |
21 | from ppocr.utils.utility import create_module
22 | from ppocr.utils.utility import initial_logger
23 | logger = initial_logger()
24 | from copy import deepcopy
25 |
26 |
27 | class DetModel(object):
28 | def __init__(self, params):
29 | """
30 | Detection module for OCR text detection.
31 | args:
32 | params (dict): the super parameters for detection module.
33 | """
34 | global_params = params['Global']
35 | self.algorithm = global_params['algorithm']
36 |
37 | backbone_params = deepcopy(params["Backbone"])
38 | backbone_params.update(global_params)
39 | self.backbone = create_module(backbone_params['function'])\
40 | (params=backbone_params)
41 |
42 | head_params = deepcopy(params["Head"])
43 | head_params.update(global_params)
44 | self.head = create_module(head_params['function'])\
45 | (params=head_params)
46 |
47 | loss_params = deepcopy(params["Loss"])
48 | loss_params.update(global_params)
49 | self.loss = create_module(loss_params['function'])\
50 | (params=loss_params)
51 |
52 | self.image_shape = global_params['image_shape']
53 |
54 | def create_feed(self, mode):
55 | """
56 | create Dataloader feeds
57 | args:
58 | mode (str): 'train' for training or else for evaluation
59 | return: (image, corresponding label, dataloader)
60 | """
61 | image_shape = deepcopy(self.image_shape)
62 | if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
63 | raise Exception("The size of the image must be divisible by 4, "
64 | "received image shape is {}, please reset the "
65 | "Global.image_shape in the yml file".format(
66 | image_shape))
67 |
68 | image = fluid.layers.data(
69 | name='image', shape=image_shape, dtype='float32')
70 | image.stop_gradient = False
71 | if mode == "train":
72 | if self.algorithm == "EAST":
73 | h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
74 | score = fluid.layers.data(
75 | name='score', shape=[1, h, w], dtype='float32')
76 | geo = fluid.layers.data(
77 | name='geo', shape=[9, h, w], dtype='float32')
78 | mask = fluid.layers.data(
79 | name='mask', shape=[1, h, w], dtype='float32')
80 | feed_list = [image, score, geo, mask]
81 | labels = {'score': score, 'geo': geo, 'mask': mask}
82 | elif self.algorithm == "DB":
83 | shrink_map = fluid.layers.data(
84 | name='shrink_map', shape=image_shape[1:], dtype='float32')
85 | shrink_mask = fluid.layers.data(
86 | name='shrink_mask', shape=image_shape[1:], dtype='float32')
87 | threshold_map = fluid.layers.data(
88 | name='threshold_map',
89 | shape=image_shape[1:],
90 | dtype='float32')
91 | threshold_mask = fluid.layers.data(
92 | name='threshold_mask',
93 | shape=image_shape[1:],
94 | dtype='float32')
95 | feed_list=[image, shrink_map, shrink_mask,\
96 | threshold_map, threshold_mask]
97 | labels = {'shrink_map':shrink_map,\
98 | 'shrink_mask':shrink_mask,\
99 | 'threshold_map':threshold_map,\
100 | 'threshold_mask':threshold_mask}
101 | elif self.algorithm == "SAST":
102 | input_score = fluid.layers.data(
103 | name='score', shape=[1, 128, 128], dtype='float32')
104 | input_border = fluid.layers.data(
105 | name='border', shape=[5, 128, 128], dtype='float32')
106 | input_mask = fluid.layers.data(
107 | name='mask', shape=[1, 128, 128], dtype='float32')
108 | input_tvo = fluid.layers.data(
109 | name='tvo', shape=[9, 128, 128], dtype='float32')
110 | input_tco = fluid.layers.data(
111 | name='tco', shape=[3, 128, 128], dtype='float32')
112 | feed_list = [
113 | image, input_score, input_border, input_mask, input_tvo,
114 | input_tco
115 | ]
116 | labels = {'input_score': input_score,\
117 | 'input_border': input_border,\
118 | 'input_mask': input_mask,\
119 | 'input_tvo': input_tvo,\
120 | 'input_tco': input_tco}
121 | loader = fluid.io.DataLoader.from_generator(
122 | feed_list=feed_list,
123 | capacity=64,
124 | use_double_buffer=True,
125 | iterable=False)
126 | else:
127 | labels = None
128 | loader = None
129 | return image, labels, loader
130 |
131 | def __call__(self, mode):
132 | """
133 | run forward of defined module
134 | args:
135 | mode (str): 'train' for training; 'export' for inference,
136 | others for evaluation]
137 | """
138 | image, labels, loader = self.create_feed(mode)
139 | conv_feas = self.backbone(image)
140 | if self.algorithm == "DB":
141 | predicts = self.head(conv_feas, mode)
142 | else:
143 | predicts = self.head(conv_feas)
144 | if mode == "train":
145 | losses = self.loss(predicts, labels)
146 | return loader, losses
147 | elif mode == "export":
148 | return [image, predicts]
149 | else:
150 | return loader, predicts
151 |
--------------------------------------------------------------------------------
/ppocr/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/modeling/common_functions.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle
20 | import paddle.fluid as fluid
21 | from paddle.fluid.param_attr import ParamAttr
22 | import math
23 |
24 |
25 | def get_para_bias_attr(l2_decay, k, name):
26 | regularizer = fluid.regularizer.L2Decay(l2_decay)
27 | stdv = 1.0 / math.sqrt(k * 1.0)
28 | initializer = fluid.initializer.Uniform(-stdv, stdv)
29 | para_attr = fluid.ParamAttr(
30 | regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
31 | bias_attr = fluid.ParamAttr(
32 | regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
33 | return [para_attr, bias_attr]
34 |
35 |
36 | def conv_bn_layer(input,
37 | num_filters,
38 | filter_size,
39 | stride=1,
40 | groups=1,
41 | act=None,
42 | name=None):
43 | conv = fluid.layers.conv2d(
44 | input=input,
45 | num_filters=num_filters,
46 | filter_size=filter_size,
47 | stride=stride,
48 | padding=(filter_size - 1) // 2,
49 | groups=groups,
50 | act=None,
51 | param_attr=ParamAttr(name=name + "_weights"),
52 | bias_attr=False,
53 | name=name + '.conv2d')
54 |
55 | bn_name = "bn_" + name
56 | return fluid.layers.batch_norm(
57 | input=conv,
58 | act=act,
59 | name=bn_name + '.output',
60 | param_attr=ParamAttr(name=bn_name + '_scale'),
61 | bias_attr=ParamAttr(bn_name + '_offset'),
62 | moving_mean_name=bn_name + '_mean',
63 | moving_variance_name=bn_name + '_variance')
64 |
65 |
66 | def deconv_bn_layer(input,
67 | num_filters,
68 | filter_size=4,
69 | stride=2,
70 | act='relu',
71 | name=None):
72 | deconv = fluid.layers.conv2d_transpose(
73 | input=input,
74 | num_filters=num_filters,
75 | filter_size=filter_size,
76 | stride=stride,
77 | padding=1,
78 | act=None,
79 | param_attr=ParamAttr(name=name + "_weights"),
80 | bias_attr=False,
81 | name=name + '.deconv2d')
82 | bn_name = "bn_" + name
83 | return fluid.layers.batch_norm(
84 | input=deconv,
85 | act=act,
86 | name=bn_name + '.output',
87 | param_attr=ParamAttr(name=bn_name + '_scale'),
88 | bias_attr=ParamAttr(bn_name + '_offset'),
89 | moving_mean_name=bn_name + '_mean',
90 | moving_variance_name=bn_name + '_variance')
91 |
92 |
93 | def create_tmp_var(program, name, dtype, shape, lod_level=0):
94 | return program.current_block().create_var(
95 | name=name, dtype=dtype, shape=shape, lod_level=lod_level)
96 |
--------------------------------------------------------------------------------
/ppocr/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/modeling/heads/cls_head.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import math
20 |
21 | import paddle
22 | import paddle.fluid as fluid
23 |
24 |
25 | class ClsHead(object):
26 | """
27 | Class orientation
28 |
29 | Args:
30 |
31 | params(dict): super parameters for build Class network
32 | """
33 |
34 | def __init__(self, params):
35 | super(ClsHead, self).__init__()
36 | self.class_dim = params['class_dim']
37 |
38 | def __call__(self, inputs, labels=None, mode=None):
39 | pool = fluid.layers.pool2d(
40 | input=inputs, pool_type='avg', global_pooling=True)
41 | stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
42 |
43 | out = fluid.layers.fc(
44 | input=pool,
45 | size=self.class_dim,
46 | param_attr=fluid.param_attr.ParamAttr(
47 | name="fc_0.w_0",
48 | initializer=fluid.initializer.Uniform(-stdv, stdv)),
49 | bias_attr=fluid.param_attr.ParamAttr(name="fc_0.b_0"))
50 |
51 | softmax_out = fluid.layers.softmax(out, use_cudnn=False)
52 | out_label = fluid.layers.argmax(out, axis=1)
53 | predicts = {'predict': softmax_out, 'decoded_out': out_label}
54 | return predicts
55 |
--------------------------------------------------------------------------------
/ppocr/modeling/heads/det_east_head.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle.fluid as fluid
20 | from ..common_functions import conv_bn_layer, deconv_bn_layer
21 | from collections import OrderedDict
22 |
23 |
24 | class EASTHead(object):
25 | """
26 | EAST: An Efficient and Accurate Scene Text Detector
27 | see arxiv: https://arxiv.org/abs/1704.03155
28 | args:
29 | params(dict): the super parameters for network build
30 | """
31 |
32 | def __init__(self, params):
33 |
34 | self.model_name = params['model_name']
35 |
36 | def unet_fusion(self, inputs):
37 | f = inputs[::-1]
38 | if self.model_name == "large":
39 | num_outputs = [128, 128, 128, 128]
40 | else:
41 | num_outputs = [64, 64, 64, 64]
42 | g = [None, None, None, None]
43 | h = [None, None, None, None]
44 | for i in range(4):
45 | if i == 0:
46 | h[i] = f[i]
47 | else:
48 | h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
49 | h[i] = conv_bn_layer(
50 | input=h[i],
51 | num_filters=num_outputs[i],
52 | filter_size=3,
53 | stride=1,
54 | act='relu',
55 | name="unet_h_%d" % (i))
56 | if i <= 2:
57 | #can be replaced with unpool
58 | g[i] = deconv_bn_layer(
59 | input=h[i],
60 | num_filters=num_outputs[i],
61 | name="unet_g_%d" % (i))
62 | else:
63 | g[i] = conv_bn_layer(
64 | input=h[i],
65 | num_filters=num_outputs[i],
66 | filter_size=3,
67 | stride=1,
68 | act='relu',
69 | name="unet_g_%d" % (i))
70 | return g[3]
71 |
72 | def detector_header(self, f_common):
73 | if self.model_name == "large":
74 | num_outputs = [128, 64, 1, 8]
75 | else:
76 | num_outputs = [64, 32, 1, 8]
77 | f_det = conv_bn_layer(
78 | input=f_common,
79 | num_filters=num_outputs[0],
80 | filter_size=3,
81 | stride=1,
82 | act='relu',
83 | name="det_head1")
84 | f_det = conv_bn_layer(
85 | input=f_det,
86 | num_filters=num_outputs[1],
87 | filter_size=3,
88 | stride=1,
89 | act='relu',
90 | name="det_head2")
91 | #f_score
92 | f_score = conv_bn_layer(
93 | input=f_det,
94 | num_filters=num_outputs[2],
95 | filter_size=1,
96 | stride=1,
97 | act=None,
98 | name="f_score")
99 | f_score = fluid.layers.sigmoid(f_score)
100 | #f_geo
101 | f_geo = conv_bn_layer(
102 | input=f_det,
103 | num_filters=num_outputs[3],
104 | filter_size=1,
105 | stride=1,
106 | act=None,
107 | name="f_geo")
108 | f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
109 | return f_score, f_geo
110 |
111 | def __call__(self, inputs):
112 | """
113 | Fuse different levels of feature map from backbone and predict results
114 | Args:
115 | inputs(list): feature maps from backbone
116 | Return: predicts
117 | """
118 | f_common = self.unet_fusion(inputs)
119 | f_score, f_geo = self.detector_header(f_common)
120 | predicts = OrderedDict()
121 | predicts['f_score'] = f_score
122 | predicts['f_geo'] = f_geo
123 | return predicts
124 |
--------------------------------------------------------------------------------
/ppocr/modeling/heads/rec_ctc_head.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import math
20 |
21 | import paddle
22 | import paddle.fluid as fluid
23 | from paddle.fluid.param_attr import ParamAttr
24 | from .rec_seq_encoder import SequenceEncoder
25 | from ..common_functions import get_para_bias_attr
26 | import numpy as np
27 |
28 |
29 | class CTCPredict(object):
30 | """
31 | CTC predict
32 | Args:
33 | params(object): Params from yaml file and settings from command line
34 | """
35 |
36 | def __init__(self, params):
37 | super(CTCPredict, self).__init__()
38 | self.char_num = params['char_num']
39 | self.encoder = SequenceEncoder(params)
40 | self.encoder_type = params['encoder_type']
41 | self.fc_decay = params.get("fc_decay", 0.0004)
42 |
43 | def __call__(self, inputs, labels=None, mode=None):
44 | with fluid.scope_guard("skip_quant"):
45 | encoder_features = self.encoder(inputs)
46 | if self.encoder_type != "reshape":
47 | encoder_features = fluid.layers.concat(encoder_features, axis=1)
48 | name = "ctc_fc"
49 | para_attr, bias_attr = get_para_bias_attr(
50 | l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
51 | predict = fluid.layers.fc(input=encoder_features,
52 | size=self.char_num + 1,
53 | param_attr=para_attr,
54 | bias_attr=bias_attr,
55 | name=name)
56 | decoded_out = fluid.layers.ctc_greedy_decoder(
57 | input=predict, blank=self.char_num)
58 | predicts = {'predict': predict, 'decoded_out': decoded_out}
59 | return predicts
60 |
--------------------------------------------------------------------------------
/ppocr/modeling/heads/rec_seq_encoder.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import math
20 | import paddle.fluid as fluid
21 | import paddle.fluid.layers as layers
22 |
23 |
24 | class EncoderWithReshape(object):
25 | def __init__(self, params):
26 | super(EncoderWithReshape, self).__init__()
27 |
28 | def __call__(self, inputs):
29 | sliced_feature = layers.im2sequence(
30 | input=inputs,
31 | stride=[1, 1],
32 | filter_size=[inputs.shape[2], 1],
33 | name="sliced_feature")
34 | return sliced_feature
35 |
36 |
37 | class EncoderWithRNN(object):
38 | def __init__(self, params):
39 | super(EncoderWithRNN, self).__init__()
40 | self.rnn_hidden_size = params['SeqRNN']['hidden_size']
41 |
42 | def __call__(self, inputs):
43 | lstm_list = []
44 | name_prefix = "lstm"
45 | rnn_hidden_size = self.rnn_hidden_size
46 | for no in range(1, 3):
47 | if no == 1:
48 | is_reverse = False
49 | else:
50 | is_reverse = True
51 | name = "%s_st1_fc%d" % (name_prefix, no)
52 | fc = layers.fc(input=inputs,
53 | size=rnn_hidden_size * 4,
54 | param_attr=fluid.ParamAttr(name=name + "_w"),
55 | bias_attr=fluid.ParamAttr(name=name + "_b"),
56 | name=name)
57 | name = "%s_st1_out%d" % (name_prefix, no)
58 | lstm, _ = layers.dynamic_lstm(
59 | input=fc,
60 | size=rnn_hidden_size * 4,
61 | is_reverse=is_reverse,
62 | param_attr=fluid.ParamAttr(name=name + "_w"),
63 | bias_attr=fluid.ParamAttr(name=name + "_b"),
64 | use_peepholes=False)
65 | name = "%s_st2_fc%d" % (name_prefix, no)
66 | fc = layers.fc(input=lstm,
67 | size=rnn_hidden_size * 4,
68 | param_attr=fluid.ParamAttr(name=name + "_w"),
69 | bias_attr=fluid.ParamAttr(name=name + "_b"),
70 | name=name)
71 | name = "%s_st2_out%d" % (name_prefix, no)
72 | lstm, _ = layers.dynamic_lstm(
73 | input=fc,
74 | size=rnn_hidden_size * 4,
75 | is_reverse=is_reverse,
76 | param_attr=fluid.ParamAttr(name=name + "_w"),
77 | bias_attr=fluid.ParamAttr(name=name + "_b"),
78 | use_peepholes=False)
79 | lstm_list.append(lstm)
80 | return lstm_list
81 |
82 |
83 | class SequenceEncoder(object):
84 | def __init__(self, params):
85 | super(SequenceEncoder, self).__init__()
86 | self.encoder_type = params['encoder_type']
87 | self.encoder_reshape = EncoderWithReshape(params)
88 | if self.encoder_type == "rnn":
89 | self.encoder_rnn = EncoderWithRNN(params)
90 |
91 | def __call__(self, inputs):
92 | if self.encoder_type == "reshape":
93 | encoder_features = self.encoder_reshape(inputs)
94 | elif self.encoder_type == "rnn":
95 | inputs = self.encoder_reshape(inputs)
96 | encoder_features = self.encoder_rnn(inputs)
97 | else:
98 | assert False, "Unsupport encoder_type:%s"\
99 | % self.encoder_type
100 | return encoder_features
101 |
--------------------------------------------------------------------------------
/ppocr/modeling/heads/self_attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/modeling/heads/self_attention/__init__.py
--------------------------------------------------------------------------------
/ppocr/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/cls_loss.py:
--------------------------------------------------------------------------------
1 | # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle.fluid as fluid
20 |
21 |
22 | class ClsLoss(object):
23 | def __init__(self, params):
24 | super(ClsLoss, self).__init__()
25 | self.loss_func = fluid.layers.cross_entropy
26 |
27 | def __call__(self, predicts, labels):
28 | predict = predicts['predict']
29 | label = labels['label']
30 | # softmax_out = fluid.layers.softmax(predict, use_cudnn=False)
31 | cost = fluid.layers.cross_entropy(input=predict, label=label)
32 | sum_cost = fluid.layers.mean(cost)
33 | return sum_cost
34 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_basic_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import numpy as np
20 |
21 | import paddle.fluid as fluid
22 |
23 |
24 | def BalanceLoss(pred,
25 | gt,
26 | mask,
27 | balance_loss=True,
28 | main_loss_type="DiceLoss",
29 | negative_ratio=3,
30 | return_origin=False,
31 | eps=1e-6):
32 | """
33 | The BalanceLoss for Differentiable Binarization text detection
34 | args:
35 | pred (variable): predicted feature maps.
36 | gt (variable): ground truth feature maps.
37 | mask (variable): masked maps.
38 | balance_loss (bool): whether balance loss or not, default is True
39 | main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
40 | 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
41 | negative_ratio (int|float): float, default is 3.
42 | return_origin (bool): whether return unbalanced loss or not, default is False.
43 | eps (float): default is 1e-6.
44 | return: (variable) balanced loss
45 | """
46 | positive = gt * mask
47 | negative = (1 - gt) * mask
48 |
49 | positive_count = fluid.layers.reduce_sum(positive)
50 | positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
51 | negative_count = min(
52 | fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
53 | negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
54 |
55 | if main_loss_type == "CrossEntropy":
56 | loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
57 | loss = fluid.layers.reduce_mean(loss)
58 | elif main_loss_type == "Euclidean":
59 | loss = fluid.layers.square(pred - gt)
60 | loss = fluid.layers.reduce_mean(loss)
61 | elif main_loss_type == "DiceLoss":
62 | loss = DiceLoss(pred, gt, mask)
63 | elif main_loss_type == "BCELoss":
64 | loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
65 | elif main_loss_type == "MaskL1Loss":
66 | loss = MaskL1Loss(pred, gt, mask)
67 | else:
68 | loss_type = [
69 | 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
70 | ]
71 | raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
72 | format(loss_type))
73 |
74 | if not balance_loss:
75 | return loss
76 |
77 | positive_loss = positive * loss
78 | negative_loss = negative * loss
79 | negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
80 | negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
81 | balance_loss = (fluid.layers.reduce_sum(positive_loss) +
82 | fluid.layers.reduce_sum(negative_loss)) / (
83 | positive_count + negative_count + eps)
84 |
85 | if return_origin:
86 | return balance_loss, loss
87 | return balance_loss
88 |
89 |
90 | def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
91 | """
92 | DiceLoss function.
93 | """
94 |
95 | assert pred.shape == gt.shape
96 | assert pred.shape == mask.shape
97 | if weights is not None:
98 | assert weights.shape == mask.shape
99 | mask = weights * mask
100 | intersection = fluid.layers.reduce_sum(pred * gt * mask)
101 |
102 | union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
103 | gt * mask) + eps
104 | loss = 1 - 2.0 * intersection / union
105 | assert loss <= 1
106 | return loss
107 |
108 |
109 | def MaskL1Loss(pred, gt, mask, eps=1e-6):
110 | """
111 | Mask L1 Loss
112 | """
113 | loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
114 | fluid.layers.reduce_sum(mask) + eps)
115 | loss = fluid.layers.reduce_mean(loss)
116 | return loss
117 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_db_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
20 |
21 |
22 | class DBLoss(object):
23 | """
24 | Differentiable Binarization (DB) Loss Function
25 | args:
26 | param (dict): the super paramter for DB Loss
27 | """
28 |
29 | def __init__(self, params):
30 | super(DBLoss, self).__init__()
31 | self.balance_loss = params['balance_loss']
32 | self.main_loss_type = params['main_loss_type']
33 |
34 | self.alpha = params['alpha']
35 | self.beta = params['beta']
36 | self.ohem_ratio = params['ohem_ratio']
37 |
38 | def __call__(self, predicts, labels):
39 | label_shrink_map = labels['shrink_map']
40 | label_shrink_mask = labels['shrink_mask']
41 | label_threshold_map = labels['threshold_map']
42 | label_threshold_mask = labels['threshold_mask']
43 | pred = predicts['maps']
44 | shrink_maps = pred[:, 0, :, :]
45 | threshold_maps = pred[:, 1, :, :]
46 | binary_maps = pred[:, 2, :, :]
47 |
48 | loss_shrink_maps = BalanceLoss(
49 | shrink_maps,
50 | label_shrink_map,
51 | label_shrink_mask,
52 | balance_loss=self.balance_loss,
53 | main_loss_type=self.main_loss_type,
54 | negative_ratio=self.ohem_ratio)
55 | loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
56 | label_threshold_mask)
57 | loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
58 | label_shrink_mask)
59 | loss_shrink_maps = self.alpha * loss_shrink_maps
60 | loss_threshold_maps = self.beta * loss_threshold_maps
61 |
62 | loss_all = loss_shrink_maps + loss_threshold_maps\
63 | + loss_binary_maps
64 | losses = {'total_loss':loss_all,\
65 | "loss_shrink_maps":loss_shrink_maps,\
66 | "loss_threshold_maps":loss_threshold_maps,\
67 | "loss_binary_maps":loss_binary_maps}
68 | return losses
69 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_east_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle.fluid as fluid
20 |
21 |
22 | class EASTLoss(object):
23 | """
24 | EAST Loss function
25 | """
26 |
27 | def __init__(self, params=None):
28 | super(EASTLoss, self).__init__()
29 |
30 | def __call__(self, predicts, labels):
31 | f_score = predicts['f_score']
32 | f_geo = predicts['f_geo']
33 | l_score = labels['score']
34 | l_geo = labels['geo']
35 | l_mask = labels['mask']
36 | ##dice_loss
37 | intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
38 | union = fluid.layers.reduce_sum(f_score * l_mask)\
39 | + fluid.layers.reduce_sum(l_score * l_mask)
40 | dice_loss = 1 - 2 * intersection / (union + 1e-5)
41 | #smoooth_l1_loss
42 | channels = 8
43 | l_geo_split = fluid.layers.split(
44 | l_geo, num_or_sections=channels + 1, dim=1)
45 | f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
46 | smooth_l1 = 0
47 | for i in range(0, channels):
48 | geo_diff = l_geo_split[i] - f_geo_split[i]
49 | abs_geo_diff = fluid.layers.abs(geo_diff)
50 | smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
51 | smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
52 | in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
53 | (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
54 | out_loss = l_geo_split[-1] / channels * in_loss * l_score
55 | smooth_l1 += out_loss
56 | smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
57 | dice_loss = dice_loss * 0.01
58 | total_loss = dice_loss + smooth_l1_loss
59 | losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
60 | "smooth_l1_loss":smooth_l1_loss}
61 | return losses
62 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/det_sast_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle.fluid as fluid
20 |
21 |
22 | class SASTLoss(object):
23 | """
24 | SAST Loss function
25 | """
26 |
27 | def __init__(self, params=None):
28 | super(SASTLoss, self).__init__()
29 |
30 | def __call__(self, predicts, labels):
31 | """
32 | tcl_pos: N x 128 x 3
33 | tcl_mask: N x 128 x 1
34 | tcl_label: N x X list or LoDTensor
35 | """
36 |
37 | f_score = predicts['f_score']
38 | f_border = predicts['f_border']
39 | f_tvo = predicts['f_tvo']
40 | f_tco = predicts['f_tco']
41 |
42 | l_score = labels['input_score']
43 | l_border = labels['input_border']
44 | l_mask = labels['input_mask']
45 | l_tvo = labels['input_tvo']
46 | l_tco = labels['input_tco']
47 |
48 | #score_loss
49 | intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
50 | union = fluid.layers.reduce_sum(f_score * l_mask) + fluid.layers.reduce_sum(l_score * l_mask)
51 | score_loss = 1.0 - 2 * intersection / (union + 1e-5)
52 |
53 | #border loss
54 | l_border_split, l_border_norm = fluid.layers.split(l_border, num_or_sections=[4, 1], dim=1)
55 | f_border_split = f_border
56 | l_border_norm_split = fluid.layers.expand(x=l_border_norm, expand_times=[1, 4, 1, 1])
57 | l_border_score = fluid.layers.expand(x=l_score, expand_times=[1, 4, 1, 1])
58 | l_border_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 4, 1, 1])
59 | border_diff = l_border_split - f_border_split
60 | abs_border_diff = fluid.layers.abs(border_diff)
61 | border_sign = abs_border_diff < 1.0
62 | border_sign = fluid.layers.cast(border_sign, dtype='float32')
63 | border_sign.stop_gradient = True
64 | border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
65 | (abs_border_diff - 0.5) * (1.0 - border_sign)
66 | border_out_loss = l_border_norm_split * border_in_loss
67 | border_loss = fluid.layers.reduce_sum(border_out_loss * l_border_score * l_border_mask) / \
68 | (fluid.layers.reduce_sum(l_border_score * l_border_mask) + 1e-5)
69 |
70 | #tvo_loss
71 | l_tvo_split, l_tvo_norm = fluid.layers.split(l_tvo, num_or_sections=[8, 1], dim=1)
72 | f_tvo_split = f_tvo
73 | l_tvo_norm_split = fluid.layers.expand(x=l_tvo_norm, expand_times=[1, 8, 1, 1])
74 | l_tvo_score = fluid.layers.expand(x=l_score, expand_times=[1, 8, 1, 1])
75 | l_tvo_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 8, 1, 1])
76 | #
77 | tvo_geo_diff = l_tvo_split - f_tvo_split
78 | abs_tvo_geo_diff = fluid.layers.abs(tvo_geo_diff)
79 | tvo_sign = abs_tvo_geo_diff < 1.0
80 | tvo_sign = fluid.layers.cast(tvo_sign, dtype='float32')
81 | tvo_sign.stop_gradient = True
82 | tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
83 | (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
84 | tvo_out_loss = l_tvo_norm_split * tvo_in_loss
85 | tvo_loss = fluid.layers.reduce_sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
86 | (fluid.layers.reduce_sum(l_tvo_score * l_tvo_mask) + 1e-5)
87 |
88 | #tco_loss
89 | l_tco_split, l_tco_norm = fluid.layers.split(l_tco, num_or_sections=[2, 1], dim=1)
90 | f_tco_split = f_tco
91 | l_tco_norm_split = fluid.layers.expand(x=l_tco_norm, expand_times=[1, 2, 1, 1])
92 | l_tco_score = fluid.layers.expand(x=l_score, expand_times=[1, 2, 1, 1])
93 | l_tco_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 2, 1, 1])
94 | #
95 | tco_geo_diff = l_tco_split - f_tco_split
96 | abs_tco_geo_diff = fluid.layers.abs(tco_geo_diff)
97 | tco_sign = abs_tco_geo_diff < 1.0
98 | tco_sign = fluid.layers.cast(tco_sign, dtype='float32')
99 | tco_sign.stop_gradient = True
100 | tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
101 | (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
102 | tco_out_loss = l_tco_norm_split * tco_in_loss
103 | tco_loss = fluid.layers.reduce_sum(tco_out_loss * l_tco_score * l_tco_mask) / \
104 | (fluid.layers.reduce_sum(l_tco_score * l_tco_mask) + 1e-5)
105 |
106 |
107 | # total loss
108 | tvo_lw, tco_lw = 1.5, 1.5
109 | score_lw, border_lw = 1.0, 1.0
110 | total_loss = score_loss * score_lw + border_loss * border_lw + \
111 | tvo_loss * tvo_lw + tco_loss * tco_lw
112 |
113 | losses = {'total_loss':total_loss, "score_loss":score_loss,\
114 | "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
115 | return losses
--------------------------------------------------------------------------------
/ppocr/modeling/losses/rec_attention_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import math
20 |
21 | import paddle
22 | import paddle.fluid as fluid
23 | from paddle.fluid.param_attr import ParamAttr
24 | import numpy as np
25 |
26 |
27 | class AttentionLoss(object):
28 | def __init__(self, params):
29 | super(AttentionLoss, self).__init__()
30 | self.char_num = params['char_num']
31 |
32 | def __call__(self, predicts, labels):
33 | predict = predicts['predict']
34 | label_out = labels['label_out']
35 | label_out = fluid.layers.cast(x=label_out, dtype='int64')
36 | # calculate attention loss
37 | cost = fluid.layers.cross_entropy(input=predict, label=label_out)
38 | sum_cost = fluid.layers.reduce_sum(cost)
39 | return sum_cost
40 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/rec_ctc_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import math
20 |
21 | import paddle
22 | import paddle.fluid as fluid
23 |
24 |
25 | class CTCLoss(object):
26 | def __init__(self, params):
27 | super(CTCLoss, self).__init__()
28 | self.char_num = params['char_num']
29 |
30 | def __call__(self, predicts, labels):
31 | predict = predicts['predict']
32 | label = labels['label']
33 | # calculate ctc loss
34 | cost = fluid.layers.warpctc(
35 | input=predict, label=label, blank=self.char_num, norm_by_times=True)
36 | sum_cost = fluid.layers.reduce_sum(cost)
37 | return sum_cost
38 |
--------------------------------------------------------------------------------
/ppocr/modeling/losses/rec_srn_loss.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import math
20 |
21 | import paddle
22 | import paddle.fluid as fluid
23 |
24 |
25 | class SRNLoss(object):
26 | def __init__(self, params):
27 | super(SRNLoss, self).__init__()
28 | self.char_num = params['char_num']
29 |
30 | def __call__(self, predicts, others):
31 | predict = predicts['predict']
32 | word_predict = predicts['word_out']
33 | gsrm_predict = predicts['gsrm_out']
34 | label = others['label']
35 | lbl_weight = others['lbl_weight']
36 |
37 | casted_label = fluid.layers.cast(x=label, dtype='int64')
38 | cost_word = fluid.layers.cross_entropy(
39 | input=word_predict, label=casted_label)
40 | cost_gsrm = fluid.layers.cross_entropy(
41 | input=gsrm_predict, label=casted_label)
42 | cost_vsfd = fluid.layers.cross_entropy(
43 | input=predict, label=casted_label)
44 |
45 | cost_word = fluid.layers.reshape(
46 | x=fluid.layers.reduce_sum(cost_word), shape=[1])
47 | cost_gsrm = fluid.layers.reshape(
48 | x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
49 | cost_vsfd = fluid.layers.reshape(
50 | x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
51 |
52 | sum_cost = fluid.layers.sum(
53 | [cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
54 |
55 | return [sum_cost, cost_vsfd, cost_word]
56 |
--------------------------------------------------------------------------------
/ppocr/modeling/stns/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppocr/optimizer.py:
--------------------------------------------------------------------------------
1 | #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | #Licensed under the Apache License, Version 2.0 (the "License");
4 | #you may not use this file except in compliance with the License.
5 | #You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | from __future__ import absolute_import
15 | from __future__ import division
16 | from __future__ import print_function
17 | import math
18 | import paddle.fluid as fluid
19 | from paddle.fluid.regularizer import L2Decay
20 | from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
21 | import paddle.fluid.layers.ops as ops
22 |
23 | from ppocr.utils.utility import initial_logger
24 |
25 | logger = initial_logger()
26 |
27 |
28 | def cosine_decay_with_warmup(learning_rate,
29 | step_each_epoch,
30 | epochs=500,
31 | warmup_minibatch=1000):
32 | """
33 | Applies cosine decay to the learning rate.
34 | lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
35 | decrease lr for every mini-batch and start with warmup.
36 | args:
37 | learning_rate(float): initial learning rate
38 | step_each_epoch (int): number of step for each epoch in training process
39 | epochs(int): number of training epochs
40 | warmup_minibatch(int): number of minibatch for warmup
41 | return:
42 | lr(tensor): learning rate tensor
43 | """
44 | global_step = _decay_step_counter()
45 | lr = fluid.layers.tensor.create_global_var(
46 | shape=[1],
47 | value=0.0,
48 | dtype='float32',
49 | persistable=True,
50 | name="learning_rate")
51 |
52 | warmup_minibatch = fluid.layers.fill_constant(
53 | shape=[1],
54 | dtype='float32',
55 | value=float(warmup_minibatch),
56 | force_cpu=True)
57 |
58 | with fluid.layers.control_flow.Switch() as switch:
59 | with switch.case(global_step < warmup_minibatch):
60 | decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
61 | fluid.layers.tensor.assign(input=decayed_lr, output=lr)
62 | with switch.default():
63 | decayed_lr = learning_rate * \
64 | (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
65 | fluid.layers.tensor.assign(input=decayed_lr, output=lr)
66 | return lr
67 |
68 |
69 | def AdamDecay(params, parameter_list=None):
70 | """
71 | define optimizer function
72 | args:
73 | params(dict): the super parameters
74 | parameter_list (list): list of Variable names to update to minimize loss
75 | return:
76 | optimizer: a Adam optimizer instance
77 | """
78 | base_lr = params['base_lr']
79 | beta1 = params['beta1']
80 | beta2 = params['beta2']
81 | l2_decay = params.get("l2_decay", 0.0)
82 |
83 | if 'decay' in params:
84 | supported_decay_mode = [
85 | "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
86 | ]
87 | params = params['decay']
88 | decay_mode = params['function']
89 | assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
90 | supported_decay_mode, decay_mode)
91 |
92 | if decay_mode == "cosine_decay":
93 | step_each_epoch = params['step_each_epoch']
94 | total_epoch = params['total_epoch']
95 | base_lr = fluid.layers.cosine_decay(
96 | learning_rate=base_lr,
97 | step_each_epoch=step_each_epoch,
98 | epochs=total_epoch)
99 | elif decay_mode == "cosine_decay_warmup":
100 | step_each_epoch = params['step_each_epoch']
101 | total_epoch = params['total_epoch']
102 | warmup_minibatch = params.get("warmup_minibatch", 1000)
103 | base_lr = cosine_decay_with_warmup(
104 | learning_rate=base_lr,
105 | step_each_epoch=step_each_epoch,
106 | epochs=total_epoch,
107 | warmup_minibatch=warmup_minibatch)
108 | elif decay_mode == "piecewise_decay":
109 | boundaries = params["boundaries"]
110 | decay_rate = params["decay_rate"]
111 | values = [
112 | base_lr * decay_rate**idx
113 | for idx in range(len(boundaries) + 1)
114 | ]
115 | base_lr = fluid.layers.piecewise_decay(boundaries, values)
116 |
117 | optimizer = fluid.optimizer.Adam(
118 | learning_rate=base_lr,
119 | beta1=beta1,
120 | beta2=beta2,
121 | regularization=L2Decay(regularization_coeff=l2_decay),
122 | parameter_list=parameter_list)
123 | return optimizer
124 |
125 |
126 | def RMSProp(params, parameter_list=None):
127 | """
128 | define optimizer function
129 | args:
130 | params(dict): the super parameters
131 | parameter_list (list): list of Variable names to update to minimize loss
132 | return:
133 | optimizer: a RMSProp optimizer instance
134 | """
135 | base_lr = params.get("base_lr", 0.001)
136 | l2_decay = params.get("l2_decay", 0.00005)
137 |
138 | if 'decay' in params:
139 | supported_decay_mode = ["cosine_decay", "piecewise_decay"]
140 | params = params['decay']
141 | decay_mode = params['function']
142 | assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
143 | supported_decay_mode, decay_mode)
144 |
145 | if decay_mode == "cosine_decay":
146 | step_each_epoch = params['step_each_epoch']
147 | total_epoch = params['total_epoch']
148 | base_lr = fluid.layers.cosine_decay(
149 | learning_rate=base_lr,
150 | step_each_epoch=step_each_epoch,
151 | epochs=total_epoch)
152 | elif decay_mode == "piecewise_decay":
153 | boundaries = params["boundaries"]
154 | decay_rate = params["decay_rate"]
155 | values = [
156 | base_lr * decay_rate**idx
157 | for idx in range(len(boundaries) + 1)
158 | ]
159 | base_lr = fluid.layers.piecewise_decay(boundaries, values)
160 |
161 | optimizer = fluid.optimizer.RMSProp(
162 | learning_rate=base_lr,
163 | regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
164 |
165 | return optimizer
166 |
--------------------------------------------------------------------------------
/ppocr/postprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/postprocess/__init__.py
--------------------------------------------------------------------------------
/ppocr/postprocess/db_postprocess.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle
20 | import paddle.fluid as fluid
21 |
22 | import numpy as np
23 | import string
24 | import cv2
25 | from shapely.geometry import Polygon
26 | import pyclipper
27 | from copy import deepcopy
28 |
29 |
30 | class DBPostProcess(object):
31 | """
32 | The post process for Differentiable Binarization (DB).
33 | """
34 |
35 | def __init__(self, params):
36 | self.thresh = params['thresh']
37 | self.box_thresh = params['box_thresh']
38 | self.max_candidates = params['max_candidates']
39 | self.unclip_ratio = params['unclip_ratio']
40 | self.min_size = 3
41 | self.dilation_kernel = np.array([[1, 1], [1, 1]])
42 |
43 | def boxes_from_bitmap(self, pred, mask):
44 | """
45 | Get boxes from the binarized image predicted by DB.
46 | :param pred: the binarized image predicted by DB.
47 | :param mask: new 'pred' after threshold filtering.
48 | :return: (boxes, the score of each boxes)
49 | """
50 | dest_height, dest_width = pred.shape[-2:]
51 | bitmap = deepcopy(mask)
52 | height, width = bitmap.shape
53 |
54 | outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
55 | cv2.CHAIN_APPROX_SIMPLE)
56 | if len(outs) == 3:
57 | img, contours, _ = outs[0], outs[1], outs[2]
58 | elif len(outs) == 2:
59 | contours, _ = outs[0], outs[1]
60 |
61 | num_contours = min(len(contours), self.max_candidates)
62 | boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
63 | scores = np.zeros((num_contours, ), dtype=np.float32)
64 |
65 | for index in range(num_contours):
66 | contour = contours[index]
67 | points, sside = self.get_mini_boxes(contour)
68 | if sside < self.min_size:
69 | continue
70 | points = np.array(points)
71 | score = self.box_score_fast(pred, points.reshape(-1, 2))
72 | if self.box_thresh > score:
73 | continue
74 |
75 | box = self.unclip(points).reshape(-1, 1, 2)
76 | box, sside = self.get_mini_boxes(box)
77 | if sside < self.min_size + 2:
78 | continue
79 | box = np.array(box)
80 | if not isinstance(dest_width, int):
81 | dest_width = dest_width.item()
82 | dest_height = dest_height.item()
83 |
84 | box[:, 0] = np.clip(
85 | np.round(box[:, 0] / width * dest_width), 0, dest_width)
86 | box[:, 1] = np.clip(
87 | np.round(box[:, 1] / height * dest_height), 0, dest_height)
88 | boxes[index, :, :] = box.astype(np.int16)
89 | scores[index] = score
90 | return boxes, scores
91 |
92 | def unclip(self, box):
93 | """
94 | Shrink or expand the boxaccording to 'unclip_ratio'
95 | :param box: The predicted box.
96 | :return: uncliped box
97 | """
98 | unclip_ratio = self.unclip_ratio
99 | poly = Polygon(box)
100 | distance = poly.area * unclip_ratio / poly.length
101 | offset = pyclipper.PyclipperOffset()
102 | offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
103 | expanded = np.array(offset.Execute(distance))
104 | return expanded
105 |
106 | def get_mini_boxes(self, contour):
107 | """
108 | Get boxes from the contour or box.
109 | :param contour: The predicted contour.
110 | :return: The predicted box.
111 | """
112 | bounding_box = cv2.minAreaRect(contour)
113 | points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
114 |
115 | index_1, index_2, index_3, index_4 = 0, 1, 2, 3
116 | if points[1][1] > points[0][1]:
117 | index_1 = 0
118 | index_4 = 1
119 | else:
120 | index_1 = 1
121 | index_4 = 0
122 | if points[3][1] > points[2][1]:
123 | index_2 = 2
124 | index_3 = 3
125 | else:
126 | index_2 = 3
127 | index_3 = 2
128 |
129 | box = [
130 | points[index_1], points[index_2], points[index_3], points[index_4]
131 | ]
132 | return box, min(bounding_box[1])
133 |
134 | def box_score_fast(self, bitmap, _box):
135 | """
136 | Calculate the score of box.
137 | :param bitmap: The binarized image predicted by DB.
138 | :param _box: The predicted box
139 | :return: score
140 | """
141 | h, w = bitmap.shape[:2]
142 | box = _box.copy()
143 | xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
144 | xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
145 | ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
146 | ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
147 |
148 | mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
149 | box[:, 0] = box[:, 0] - xmin
150 | box[:, 1] = box[:, 1] - ymin
151 | cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
152 | return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
153 |
154 | def __call__(self, outs_dict, ratio_list):
155 | pred = outs_dict['maps']
156 |
157 | pred = pred[:, 0, :, :]
158 | segmentation = pred > self.thresh
159 | boxes_batch = []
160 | for batch_index in range(pred.shape[0]):
161 |
162 | mask = cv2.dilate(
163 | np.array(segmentation[batch_index]).astype(np.uint8),
164 | self.dilation_kernel)
165 | tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index],
166 | mask)
167 |
168 | boxes = []
169 | for k in range(len(tmp_boxes)):
170 | if tmp_scores[k] > self.box_thresh:
171 | boxes.append(tmp_boxes[k])
172 | if len(boxes) > 0:
173 | boxes = np.array(boxes)
174 |
175 | ratio_h, ratio_w = ratio_list[batch_index]
176 | boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
177 | boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
178 |
179 | boxes_batch.append(boxes)
180 | return boxes_batch
181 |
--------------------------------------------------------------------------------
/ppocr/postprocess/east_postprocess.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import numpy as np
20 | from .locality_aware_nms import nms_locality
21 | import cv2
22 |
23 | import os
24 | import sys
25 | __dir__ = os.path.dirname(os.path.abspath(__file__))
26 | sys.path.append(__dir__)
27 | sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
28 |
29 |
30 | class EASTPostPocess(object):
31 | """
32 | The post process for EAST.
33 | """
34 |
35 | def __init__(self, params):
36 | self.score_thresh = params['score_thresh']
37 | self.cover_thresh = params['cover_thresh']
38 | self.nms_thresh = params['nms_thresh']
39 |
40 | # c++ la-nms is faster, but only support python 3.5
41 | self.is_python35 = False
42 | if sys.version_info.major == 3 and sys.version_info.minor == 5:
43 | self.is_python35 = True
44 |
45 | def restore_rectangle_quad(self, origin, geometry):
46 | """
47 | Restore rectangle from quadrangle.
48 | """
49 | # quad
50 | origin_concat = np.concatenate(
51 | (origin, origin, origin, origin), axis=1) # (n, 8)
52 | pred_quads = origin_concat - geometry
53 | pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
54 | return pred_quads
55 |
56 | def detect(self,
57 | score_map,
58 | geo_map,
59 | score_thresh=0.8,
60 | cover_thresh=0.1,
61 | nms_thresh=0.2):
62 | """
63 | restore text boxes from score map and geo map
64 | """
65 | score_map = score_map[0]
66 | geo_map = np.swapaxes(geo_map, 1, 0)
67 | geo_map = np.swapaxes(geo_map, 1, 2)
68 | # filter the score map
69 | xy_text = np.argwhere(score_map > score_thresh)
70 | if len(xy_text) == 0:
71 | return []
72 | # sort the text boxes via the y axis
73 | xy_text = xy_text[np.argsort(xy_text[:, 0])]
74 | #restore quad proposals
75 | text_box_restored = self.restore_rectangle_quad(
76 | xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
77 | boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
78 | boxes[:, :8] = text_box_restored.reshape((-1, 8))
79 | boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
80 | if self.is_python35:
81 | import lanms
82 | boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
83 | else:
84 | boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
85 | if boxes.shape[0] == 0:
86 | return []
87 | # Here we filter some low score boxes by the average score map,
88 | # this is different from the orginal paper.
89 | for i, box in enumerate(boxes):
90 | mask = np.zeros_like(score_map, dtype=np.uint8)
91 | cv2.fillPoly(mask, box[:8].reshape(
92 | (-1, 4, 2)).astype(np.int32) // 4, 1)
93 | boxes[i, 8] = cv2.mean(score_map, mask)[0]
94 | boxes = boxes[boxes[:, 8] > cover_thresh]
95 | return boxes
96 |
97 | def sort_poly(self, p):
98 | """
99 | Sort polygons.
100 | """
101 | min_axis = np.argmin(np.sum(p, axis=1))
102 | p = p[[min_axis, (min_axis + 1) % 4,\
103 | (min_axis + 2) % 4, (min_axis + 3) % 4]]
104 | if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
105 | return p
106 | else:
107 | return p[[0, 3, 2, 1]]
108 |
109 | def __call__(self, outs_dict, ratio_list):
110 | score_list = outs_dict['f_score']
111 | geo_list = outs_dict['f_geo']
112 | img_num = len(ratio_list)
113 | dt_boxes_list = []
114 | for ino in range(img_num):
115 | score = score_list[ino]
116 | geo = geo_list[ino]
117 | boxes = self.detect(
118 | score_map=score,
119 | geo_map=geo,
120 | score_thresh=self.score_thresh,
121 | cover_thresh=self.cover_thresh,
122 | nms_thresh=self.nms_thresh)
123 | boxes_norm = []
124 | if len(boxes) > 0:
125 | ratio_h, ratio_w = ratio_list[ino]
126 | boxes = boxes[:, :8].reshape((-1, 4, 2))
127 | boxes[:, :, 0] /= ratio_w
128 | boxes[:, :, 1] /= ratio_h
129 | for i_box, box in enumerate(boxes):
130 | box = self.sort_poly(box.astype(np.int32))
131 | if np.linalg.norm(box[0] - box[1]) < 5 \
132 | or np.linalg.norm(box[3] - box[0]) < 5:
133 | continue
134 | boxes_norm.append(box)
135 | dt_boxes_list.append(np.array(boxes_norm))
136 | return dt_boxes_list
137 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/.gitignore:
--------------------------------------------------------------------------------
1 | adaptor.so
2 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (C) 2014 Google Inc.
4 | #
5 | # This file is part of YouCompleteMe.
6 | #
7 | # YouCompleteMe is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # YouCompleteMe is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with YouCompleteMe. If not, see .
19 |
20 | import os
21 | import sys
22 | import glob
23 | import ycm_core
24 |
25 | # These are the compilation flags that will be used in case there's no
26 | # compilation database set (by default, one is not set).
27 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
28 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
29 |
30 |
31 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
32 |
33 | from plumbum.cmd import python_config
34 |
35 |
36 | flags = [
37 | '-Wall',
38 | '-Wextra',
39 | '-Wnon-virtual-dtor',
40 | '-Winvalid-pch',
41 | '-Wno-unused-local-typedefs',
42 | '-std=c++11',
43 | '-x', 'c++',
44 | '-Iinclude',
45 | ] + python_config('--cflags').split()
46 |
47 |
48 | # Set this to the absolute path to the folder (NOT the file!) containing the
49 | # compile_commands.json file to use that instead of 'flags'. See here for
50 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
51 | #
52 | # Most projects will NOT need to set this to anything; you can just change the
53 | # 'flags' list of compilation flags.
54 | compilation_database_folder = ''
55 |
56 | if os.path.exists( compilation_database_folder ):
57 | database = ycm_core.CompilationDatabase( compilation_database_folder )
58 | else:
59 | database = None
60 |
61 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
62 |
63 | def DirectoryOfThisScript():
64 | return os.path.dirname( os.path.abspath( __file__ ) )
65 |
66 |
67 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
68 | if not working_directory:
69 | return list( flags )
70 | new_flags = []
71 | make_next_absolute = False
72 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
73 | for flag in flags:
74 | new_flag = flag
75 |
76 | if make_next_absolute:
77 | make_next_absolute = False
78 | if not flag.startswith( '/' ):
79 | new_flag = os.path.join( working_directory, flag )
80 |
81 | for path_flag in path_flags:
82 | if flag == path_flag:
83 | make_next_absolute = True
84 | break
85 |
86 | if flag.startswith( path_flag ):
87 | path = flag[ len( path_flag ): ]
88 | new_flag = path_flag + os.path.join( working_directory, path )
89 | break
90 |
91 | if new_flag:
92 | new_flags.append( new_flag )
93 | return new_flags
94 |
95 |
96 | def IsHeaderFile( filename ):
97 | extension = os.path.splitext( filename )[ 1 ]
98 | return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
99 |
100 |
101 | def GetCompilationInfoForFile( filename ):
102 | # The compilation_commands.json file generated by CMake does not have entries
103 | # for header files. So we do our best by asking the db for flags for a
104 | # corresponding source file, if any. If one exists, the flags for that file
105 | # should be good enough.
106 | if IsHeaderFile( filename ):
107 | basename = os.path.splitext( filename )[ 0 ]
108 | for extension in SOURCE_EXTENSIONS:
109 | replacement_file = basename + extension
110 | if os.path.exists( replacement_file ):
111 | compilation_info = database.GetCompilationInfoForFile(
112 | replacement_file )
113 | if compilation_info.compiler_flags_:
114 | return compilation_info
115 | return None
116 | return database.GetCompilationInfoForFile( filename )
117 |
118 |
119 | # This is the entry point; this function is called by ycmd to produce flags for
120 | # a file.
121 | def FlagsForFile( filename, **kwargs ):
122 | if database:
123 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a
124 | # python list, but a "list-like" StringVec object
125 | compilation_info = GetCompilationInfoForFile( filename )
126 | if not compilation_info:
127 | return None
128 |
129 | final_flags = MakeRelativePathsInFlagsAbsolute(
130 | compilation_info.compiler_flags_,
131 | compilation_info.compiler_working_dir_ )
132 | else:
133 | relative_to = DirectoryOfThisScript()
134 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
135 |
136 | return {
137 | 'flags': final_flags,
138 | 'do_cache': True
139 | }
140 |
141 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/Makefile:
--------------------------------------------------------------------------------
1 | CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags)
2 | LDFLAGS = $(shell python3-config --ldflags)
3 |
4 | DEPS = lanms.h $(shell find include -xtype f)
5 | CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
6 |
7 | LIB_SO = adaptor.so
8 |
9 | $(LIB_SO): $(CXX_SOURCES) $(DEPS)
10 | $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
11 |
12 | clean:
13 | rm -rf $(LIB_SO)
14 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/__init__.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import os
3 | import numpy as np
4 |
5 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
6 |
7 | if subprocess.call(['make', '-C', BASE_DIR]) != 0: # return value
8 | raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR))
9 |
10 |
11 | def merge_quadrangle_n9(polys, thres=0.3, precision=10000):
12 | from .adaptor import merge_quadrangle_n9 as nms_impl
13 | if len(polys) == 0:
14 | return np.array([], dtype='float32')
15 | p = polys.copy()
16 | p[:,:8] *= precision
17 | ret = np.array(nms_impl(p, thres), dtype='float32')
18 | ret[:,:8] /= precision
19 | return ret
20 |
21 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/__main__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | from . import merge_quadrangle_n9
5 |
6 | if __name__ == '__main__':
7 | # unit square with confidence 1
8 | q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32')
9 |
10 | print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
11 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/adaptor.cpp:
--------------------------------------------------------------------------------
1 | #include "pybind11/pybind11.h"
2 | #include "pybind11/numpy.h"
3 | #include "pybind11/stl.h"
4 | #include "pybind11/stl_bind.h"
5 |
6 | #include "lanms.h"
7 |
8 | namespace py = pybind11;
9 |
10 |
11 | namespace lanms_adaptor {
12 |
13 | std::vector> polys2floats(const std::vector &polys) {
14 | std::vector> ret;
15 | for (size_t i = 0; i < polys.size(); i ++) {
16 | auto &p = polys[i];
17 | auto &poly = p.poly;
18 | ret.emplace_back(std::vector{
19 | float(poly[0].X), float(poly[0].Y),
20 | float(poly[1].X), float(poly[1].Y),
21 | float(poly[2].X), float(poly[2].Y),
22 | float(poly[3].X), float(poly[3].Y),
23 | float(p.score),
24 | });
25 | }
26 |
27 | return ret;
28 | }
29 |
30 |
31 | /**
32 | *
33 | * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the
34 | * quadrangle, and the last one is the score
35 | * \param iou_threshold two quadrangles with iou score above this threshold
36 | * will be merged
37 | *
38 | * \return an n-by-9 numpy array, the merged quadrangles
39 | */
40 | std::vector> merge_quadrangle_n9(
41 | py::array_t quad_n9,
42 | float iou_threshold) {
43 | auto pbuf = quad_n9.request();
44 | if (pbuf.ndim != 2 || pbuf.shape[1] != 9)
45 | throw std::runtime_error("quadrangles must have a shape of (n, 9)");
46 | auto n = pbuf.shape[0];
47 | auto ptr = static_cast(pbuf.ptr);
48 | return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold));
49 | }
50 |
51 | }
52 |
53 | PYBIND11_PLUGIN(adaptor) {
54 | py::module m("adaptor", "NMS");
55 |
56 | m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9,
57 | "merge quadrangels");
58 |
59 | return m.ptr();
60 | }
61 |
62 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/clipper/clipper.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangming8/ocr_algo_server/9d4a859dd037719573e505e2696dcad73662bc14/ppocr/postprocess/lanms/include/clipper/clipper.cpp
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/buffer_info.h:
--------------------------------------------------------------------------------
1 | /*
2 | pybind11/buffer_info.h: Python buffer object interface
3 |
4 | Copyright (c) 2016 Wenzel Jakob
5 |
6 | All rights reserved. Use of this source code is governed by a
7 | BSD-style license that can be found in the LICENSE file.
8 | */
9 |
10 | #pragma once
11 |
12 | #include "common.h"
13 |
14 | NAMESPACE_BEGIN(pybind11)
15 |
16 | /// Information record describing a Python buffer object
17 | struct buffer_info {
18 | void *ptr = nullptr; // Pointer to the underlying storage
19 | ssize_t itemsize = 0; // Size of individual items in bytes
20 | ssize_t size = 0; // Total number of entries
21 | std::string format; // For homogeneous buffers, this should be set to format_descriptor::format()
22 | ssize_t ndim = 0; // Number of dimensions
23 | std::vector shape; // Shape of the tensor (1 entry per dimension)
24 | std::vector strides; // Number of entries between adjacent entries (for each per dimension)
25 |
26 | buffer_info() { }
27 |
28 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
29 | detail::any_container shape_in, detail::any_container strides_in)
30 | : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
31 | shape(std::move(shape_in)), strides(std::move(strides_in)) {
32 | if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size())
33 | pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
34 | for (size_t i = 0; i < (size_t) ndim; ++i)
35 | size *= shape[i];
36 | }
37 |
38 | template
39 | buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in)
40 | : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in)) { }
41 |
42 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size)
43 | : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { }
44 |
45 | template
46 | buffer_info(T *ptr, ssize_t size)
47 | : buffer_info(ptr, sizeof(T), format_descriptor::format(), size) { }
48 |
49 | explicit buffer_info(Py_buffer *view, bool ownview = true)
50 | : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
51 | {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) {
52 | this->view = view;
53 | this->ownview = ownview;
54 | }
55 |
56 | buffer_info(const buffer_info &) = delete;
57 | buffer_info& operator=(const buffer_info &) = delete;
58 |
59 | buffer_info(buffer_info &&other) {
60 | (*this) = std::move(other);
61 | }
62 |
63 | buffer_info& operator=(buffer_info &&rhs) {
64 | ptr = rhs.ptr;
65 | itemsize = rhs.itemsize;
66 | size = rhs.size;
67 | format = std::move(rhs.format);
68 | ndim = rhs.ndim;
69 | shape = std::move(rhs.shape);
70 | strides = std::move(rhs.strides);
71 | std::swap(view, rhs.view);
72 | std::swap(ownview, rhs.ownview);
73 | return *this;
74 | }
75 |
76 | ~buffer_info() {
77 | if (view && ownview) { PyBuffer_Release(view); delete view; }
78 | }
79 |
80 | private:
81 | struct private_ctr_tag { };
82 |
83 | buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
84 | detail::any_container &&shape_in, detail::any_container &&strides_in)
85 | : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { }
86 |
87 | Py_buffer *view = nullptr;
88 | bool ownview = false;
89 | };
90 |
91 | NAMESPACE_BEGIN(detail)
92 |
93 | template struct compare_buffer_info {
94 | static bool compare(const buffer_info& b) {
95 | return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T);
96 | }
97 | };
98 |
99 | template struct compare_buffer_info::value>> {
100 | static bool compare(const buffer_info& b) {
101 | return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value ||
102 | ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) ||
103 | ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n")));
104 | }
105 | };
106 |
107 | NAMESPACE_END(detail)
108 | NAMESPACE_END(pybind11)
109 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/complex.h:
--------------------------------------------------------------------------------
1 | /*
2 | pybind11/complex.h: Complex number support
3 |
4 | Copyright (c) 2016 Wenzel Jakob
5 |
6 | All rights reserved. Use of this source code is governed by a
7 | BSD-style license that can be found in the LICENSE file.
8 | */
9 |
10 | #pragma once
11 |
12 | #include "pybind11.h"
13 | #include
14 |
15 | /// glibc defines I as a macro which breaks things, e.g., boost template names
16 | #ifdef I
17 | # undef I
18 | #endif
19 |
20 | NAMESPACE_BEGIN(pybind11)
21 |
22 | template struct format_descriptor, detail::enable_if_t::value>> {
23 | static constexpr const char c = format_descriptor::c;
24 | static constexpr const char value[3] = { 'Z', c, '\0' };
25 | static std::string format() { return std::string(value); }
26 | };
27 |
28 | template constexpr const char format_descriptor<
29 | std::complex, detail::enable_if_t::value>>::value[3];
30 |
31 | NAMESPACE_BEGIN(detail)
32 |
33 | template struct is_fmt_numeric, detail::enable_if_t::value>> {
34 | static constexpr bool value = true;
35 | static constexpr int index = is_fmt_numeric::index + 3;
36 | };
37 |
38 | template class type_caster> {
39 | public:
40 | bool load(handle src, bool convert) {
41 | if (!src)
42 | return false;
43 | if (!convert && !PyComplex_Check(src.ptr()))
44 | return false;
45 | Py_complex result = PyComplex_AsCComplex(src.ptr());
46 | if (result.real == -1.0 && PyErr_Occurred()) {
47 | PyErr_Clear();
48 | return false;
49 | }
50 | value = std::complex((T) result.real, (T) result.imag);
51 | return true;
52 | }
53 |
54 | static handle cast(const std::complex &src, return_value_policy /* policy */, handle /* parent */) {
55 | return PyComplex_FromDoubles((double) src.real(), (double) src.imag());
56 | }
57 |
58 | PYBIND11_TYPE_CASTER(std::complex, _("complex"));
59 | };
60 | NAMESPACE_END(detail)
61 | NAMESPACE_END(pybind11)
62 |
--------------------------------------------------------------------------------
/ppocr/postprocess/lanms/include/pybind11/eval.h:
--------------------------------------------------------------------------------
1 | /*
2 | pybind11/exec.h: Support for evaluating Python expressions and statements
3 | from strings and files
4 |
5 | Copyright (c) 2016 Klemens Morgenstern and
6 | Wenzel Jakob
7 |
8 | All rights reserved. Use of this source code is governed by a
9 | BSD-style license that can be found in the LICENSE file.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "pybind11.h"
15 |
16 | NAMESPACE_BEGIN(pybind11)
17 |
18 | enum eval_mode {
19 | /// Evaluate a string containing an isolated expression
20 | eval_expr,
21 |
22 | /// Evaluate a string containing a single statement. Returns \c none
23 | eval_single_statement,
24 |
25 | /// Evaluate a string containing a sequence of statement. Returns \c none
26 | eval_statements
27 | };
28 |
29 | template
30 | object eval(str expr, object global = globals(), object local = object()) {
31 | if (!local)
32 | local = global;
33 |
34 | /* PyRun_String does not accept a PyObject / encoding specifier,
35 | this seems to be the only alternative */
36 | std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr;
37 |
38 | int start;
39 | switch (mode) {
40 | case eval_expr: start = Py_eval_input; break;
41 | case eval_single_statement: start = Py_single_input; break;
42 | case eval_statements: start = Py_file_input; break;
43 | default: pybind11_fail("invalid evaluation mode");
44 | }
45 |
46 | PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr());
47 | if (!result)
48 | throw error_already_set();
49 | return reinterpret_steal