├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .pre_commit_tools └── codestyle │ ├── copyright.hook │ └── pylint_pre_commit.hook ├── LICENSE ├── MANIFEST.in ├── README.md ├── demo ├── clarity_assessment │ ├── blured_demo.jpg │ └── clarity_demo.jpg ├── clas_data │ ├── train │ │ ├── n01440764_15008.JPEG │ │ ├── n01530575_10039.JPEG │ │ ├── n01601694_4224.JPEG │ │ ├── n01641577_14447.JPEG │ │ ├── n01682714_8438.JPEG │ │ └── n01698640_9242.JPEG │ └── train_list.txt ├── code_exists │ ├── contains_code_demo.jpg │ └── no_code_demo.jpg ├── image_orientation │ ├── 0.jpg │ ├── 1.jpg │ ├── 2.jpg │ └── 3.jpg ├── ocr_data │ ├── images │ │ ├── lsvt_train_images_0.jpg │ │ ├── lsvt_train_images_1325.jpg │ │ ├── lsvt_train_images_1339.jpg │ │ ├── lsvt_train_images_1410.jpg │ │ └── lsvt_train_images_275.jpg │ └── train_list.txt ├── ocr_rec │ ├── bg │ │ ├── n01530575_10039.JPEG │ │ └── n01601694_4224.JPEG │ └── corpus.txt └── shitu_data │ ├── train │ ├── 0000001.jpg │ ├── 00005129.jpg │ ├── 111687027218_10.JPG │ ├── 2250416.jpg │ └── 2289835.jpg │ └── train_list.txt ├── deploy ├── configs │ ├── dataaug │ │ └── ocr_rec_gen.yaml │ ├── dataaug_clas.yaml │ ├── dataaug_ocr_img2img.yaml │ ├── dataaug_ocr_text2img.yaml │ ├── dataaug_shitu.yaml │ ├── dataclean.yaml │ └── ppcv │ │ ├── big_model_classification.yaml │ │ ├── clarity_assessment.yaml │ │ ├── code_exists.yaml │ │ ├── feature_extract.yaml │ │ ├── image_orientation.yaml │ │ └── text_recognition.yaml ├── ppcv │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── config.py │ │ ├── framework.py │ │ └── workspace.py │ ├── engine │ │ ├── __init__.py │ │ └── pipeline.py │ ├── model_zoo │ │ ├── MODEL_ZOO │ │ ├── __init__.py │ │ └── model_zoo.py │ ├── ops │ │ ├── __init__.py │ │ ├── base.py │ │ ├── connector │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── keyframes_extract_helper.py │ │ │ ├── op_connector.py │ │ │ └── table_matcher.py │ │ ├── general_data_obj.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── classification │ │ │ │ ├── __init__.py │ │ │ │ ├── inference.py │ │ │ │ ├── postprocess.py │ │ │ │ └── preprocess.py │ │ │ ├── detection │ │ │ │ ├── __init__.py │ │ │ │ ├── inference.py │ │ │ │ ├── postprocess.py │ │ │ │ └── preprocess.py │ │ │ ├── feature_extraction │ │ │ │ ├── __init__.py │ │ │ │ ├── inference.py │ │ │ │ ├── postprocess.py │ │ │ │ └── preprocess.py │ │ │ ├── keypoint │ │ │ │ ├── __init__.py │ │ │ │ ├── inference.py │ │ │ │ ├── postprocess.py │ │ │ │ └── preprocess.py │ │ │ ├── ocr_crnn_recognition │ │ │ │ ├── __init__.py │ │ │ │ ├── inference.py │ │ │ │ ├── postprocess.py │ │ │ │ └── preprocess.py │ │ │ └── ocr_db_detection │ │ │ │ ├── __init__.py │ │ │ │ ├── inference.py │ │ │ │ ├── postprocess.py │ │ │ │ └── preprocess.py │ │ ├── output │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── classification.py │ │ │ ├── detection.py │ │ │ ├── feature_extraction.py │ │ │ ├── keypoint.py │ │ │ └── ocr_rec.py │ │ └── predictor.py │ └── utils │ │ ├── __init__.py │ │ ├── download.py │ │ ├── helper.py │ │ ├── logger.py │ │ └── timer.py ├── python │ ├── dataaug │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── imaug │ │ │ │ ├── random_crop_data.py │ │ │ │ └── text_image_aug │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── augment.py │ │ │ │ │ └── warp_mls.py │ │ │ ├── postprocess │ │ │ │ ├── __init__.py │ │ │ │ ├── attr_rec.py │ │ │ │ ├── threshoutput.py │ │ │ │ └── topk.py │ │ │ ├── preprocess │ │ │ │ ├── __init__.py │ │ │ │ ├── batch_ops │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── batch_operators.py │ │ │ │ └── ops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── autoaugment.py │ │ │ │ │ ├── cutout.py │ │ │ │ │ ├── fmix.py │ │ │ │ │ ├── functional.py │ │ │ │ │ ├── grid.py │ │ │ │ │ ├── hide_and_seek.py │ │ │ │ │ ├── operators.py │ │ │ │ │ ├── randaugment.py │ │ │ │ │ ├── random_erasing.py │ │ │ │ │ └── timm_autoaugment.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── get_image_list.py │ │ ├── gen_img.py │ │ ├── gen_ocr_rec.py │ │ ├── predict │ │ │ ├── __init__.py │ │ │ ├── build_gallery.py │ │ │ ├── postprocess.py │ │ │ ├── predict_cls.py │ │ │ ├── predict_rec.py │ │ │ └── preprocess.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── data_utils.py │ │ │ ├── get_image_list.py │ │ │ ├── imagenet1k_label_list.txt │ │ │ ├── liner.py │ │ │ ├── logger.py │ │ │ ├── math_utils.py │ │ │ ├── noiser.py │ │ │ ├── predictor.py │ │ │ ├── remaper.py │ │ │ ├── renderer.py │ │ │ └── utility.py │ └── dataclean │ │ ├── __init__.py │ │ └── demos │ │ ├── paddleclas_demo.py │ │ └── paddleocr_demo.py └── utils │ ├── __init__.py │ ├── label_map │ ├── clarity_assessment_label_list.txt │ ├── code_exists_label_list.txt │ └── image_orientation_label_list.txt │ └── utils.py ├── docs ├── en │ └── .gitkeep ├── images │ └── PP-DataAug │ │ └── .gitkeep └── zh_CN │ ├── DataAug │ ├── DataAug.md │ └── quick_start.md │ ├── DataClean │ ├── DataClean.md │ ├── blured_image_filtering.md │ ├── code_image_filtering.md │ ├── image_orientation_correction.md │ └── quick_start.md │ └── datasets │ ├── .gitkeep │ ├── Annotation_tool │ ├── EISeg.md │ ├── EIVideo.md │ ├── PPOCRLabelv2.md │ ├── PaddleLabel.md │ ├── Speech.md │ ├── Style_Text.md │ └── doccano.md │ ├── dataset.md │ └── datasets │ ├── 3D.md │ ├── Clas.md │ ├── Detection.md │ ├── Image_Denoising.md │ ├── Keypoints.md │ ├── NLP.md │ ├── OCR.md │ ├── Segmentation.md │ ├── Speech.md │ └── Video.md ├── python_whl ├── __init__.py └── easydata.py ├── requirements.txt ├── setup.py ├── test_tipc └── .gitkeep └── tools ├── predict.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | venv 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Following config of Paddle. 2 | # https://github.com/PaddlePaddle/Paddle/blob/4bbb0b3878970d2c8ed4dd9183af9a293900909a/.pre-commit-config.yaml 3 | # Exclude all third-party libraries and auto-generated files globally 4 | repos: 5 | - repo: https://github.com/Lucas-C/pre-commit-hooks.git 6 | rev: v1.1.14 7 | hooks: 8 | - id: remove-crlf 9 | - id: remove-tabs 10 | files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps)$ 11 | args: [--whitespaces-count, '2'] 12 | - id: remove-tabs 13 | files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ 14 | args: [--whitespaces-count, '4'] 15 | - repo: https://github.com/google/yapf 16 | rev: v0.32.0 17 | hooks: 18 | - id: yapf 19 | files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ 20 | - repo: https://github.com/PyCQA/flake8 21 | rev: 4.0.1 22 | hooks: 23 | - id: flake8 24 | - repo: https://github.com/pre-commit/pre-commit-hooks 25 | rev: v4.1.0 26 | hooks: 27 | - id: check-added-large-files 28 | - id: check-merge-conflict 29 | - id: check-symlinks 30 | - id: detect-private-key 31 | - id: end-of-file-fixer 32 | - id: sort-simple-yaml 33 | files: (op|backward|op_[a-z_]+)\.yaml$ 34 | - id: trailing-whitespace 35 | files: (.*\.(py|bzl|md|rst|c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps|cmake|yaml|yml|hook)|BUILD|.*\.BUILD|WORKSPACE|CMakeLists\.txt)$ 36 | - repo: local 37 | hooks: 38 | - id: pylint-doc-string 39 | name: pylint 40 | description: Check python docstring style using docstring_checker. 41 | entry: bash ./.pre_commit_tools/codestyle/pylint_pre_commit.hook 42 | language: system 43 | files: \.(py)$ 44 | - repo: local 45 | hooks: 46 | - id: copyright_checker 47 | name: copyright_checker 48 | entry: python ./.pre_commit_tools/codestyle/copyright.hook 49 | language: system 50 | files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$ 51 | -------------------------------------------------------------------------------- /.pre_commit_tools/codestyle/copyright.hook: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | import re 17 | import sys 18 | import os 19 | import datetime 20 | 21 | COPYRIGHT = '''Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. 22 | 23 | Licensed under the Apache License, Version 2.0 (the "License"); 24 | you may not use this file except in compliance with the License. 25 | You may obtain a copy of the License at 26 | 27 | http://www.apache.org/licenses/LICENSE-2.0 28 | 29 | Unless required by applicable law or agreed to in writing, software 30 | distributed under the License is distributed on an "AS IS" BASIS, 31 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 | See the License for the specific language governing permissions and 33 | limitations under the License.''' 34 | 35 | def _generate_copyright(comment_mark): 36 | copyright=COPYRIGHT.split(os.linesep) 37 | header = copyright[0].rstrip() 38 | 39 | p = re.search('(\d{4})', header).group(0) 40 | now = datetime.datetime.now() 41 | 42 | header = header.replace(p,str(now.year)) 43 | 44 | ans=[comment_mark + " " + header + os.linesep] 45 | for idx, line in enumerate(copyright[1:]): 46 | ans.append(comment_mark + " " + line.rstrip() + os.linesep) 47 | 48 | return ans 49 | 50 | def _get_comment_mark(path): 51 | lang_type=re.compile(r"\.(py|sh)$") 52 | if lang_type.search(path) is not None: 53 | return "#" 54 | 55 | lang_type=re.compile(r"\.(h|c|hpp|cc|cpp|cu|go|cuh|proto)$") 56 | if lang_type.search(path) is not None: 57 | return "//" 58 | 59 | return None 60 | 61 | 62 | RE_ENCODE = re.compile(r"^[ \t\v]*#.*?coding[:=]", re.IGNORECASE) 63 | RE_COPYRIGHT = re.compile(r".*Copyright \(c\) \d{4}", re.IGNORECASE) 64 | RE_SHEBANG = re.compile(r"^[ \t\v]*#[ \t]?\!") 65 | 66 | def _check_copyright(path): 67 | head=[] 68 | try: 69 | with open(path, 'r', encoding='utf-8') as f: 70 | head = [next(f) for x in range(4)] 71 | except StopIteration: 72 | pass 73 | 74 | for idx, line in enumerate(head): 75 | if RE_COPYRIGHT.search(line) is not None: 76 | return True 77 | 78 | return False 79 | 80 | def generate_copyright(path, comment_mark): 81 | original_contents = open(path, 'r', encoding="utf-8").readlines() 82 | head = original_contents[0:4] 83 | 84 | insert_line_no=0 85 | for i, line in enumerate(head): 86 | if RE_ENCODE.search(line) or RE_SHEBANG.search(line): 87 | insert_line_no=i+1 88 | 89 | copyright = _generate_copyright(comment_mark) 90 | if insert_line_no == 0: 91 | new_contents = copyright 92 | if len(original_contents) > 0 and len(original_contents[0].strip()) != 0: 93 | new_contents.append(os.linesep) 94 | new_contents.extend(original_contents) 95 | else: 96 | new_contents=original_contents[0:insert_line_no] 97 | new_contents.append(os.linesep) 98 | new_contents.extend(copyright) 99 | if len(original_contents) > insert_line_no and len(original_contents[insert_line_no].strip()) != 0: 100 | new_contents.append(os.linesep) 101 | new_contents.extend(original_contents[insert_line_no:]) 102 | new_contents="".join(new_contents) 103 | 104 | with open(path, 'w', encoding='utf-8') as output_file: 105 | output_file.write(new_contents) 106 | 107 | 108 | 109 | def main(argv=None): 110 | parser = argparse.ArgumentParser( 111 | description='Checker for copyright declaration.') 112 | parser.add_argument('filenames', nargs='*', help='Filenames to check') 113 | args = parser.parse_args(argv) 114 | 115 | retv = 0 116 | for path in args.filenames: 117 | comment_mark = _get_comment_mark(path) 118 | if comment_mark is None: 119 | print("warning:Unsupported file", path, file=sys.stderr) 120 | continue 121 | 122 | if _check_copyright(path): 123 | continue 124 | 125 | generate_copyright(path, comment_mark) 126 | 127 | 128 | if __name__ == '__main__': 129 | exit(main()) 130 | -------------------------------------------------------------------------------- /.pre_commit_tools/codestyle/pylint_pre_commit.hook: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TOTAL_ERRORS=0 4 | 5 | 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | export PYTHONPATH=$DIR:$PYTHONPATH 8 | 9 | readonly VERSION="2.12.0" 10 | version=$(pylint --version | grep 'pylint') 11 | 12 | if ! [[ $version == *"$VERSION"* ]]; then 13 | pip install pylint==2.12.0 14 | fi 15 | 16 | # The trick to remove deleted files: https://stackoverflow.com/a/2413151 17 | for file in $(git diff --name-status | awk '$1 != "D" {print $2}'); do 18 | pylint --disable=all --load-plugins=docstring_checker \ 19 | --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file; 20 | TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); 21 | done 22 | 23 | exit $TOTAL_ERRORS 24 | #For now, just warning: 25 | #exit 0 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | 4 | recursive-include deploy *.* 5 | -------------------------------------------------------------------------------- /demo/clarity_assessment/blured_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clarity_assessment/blured_demo.jpg -------------------------------------------------------------------------------- /demo/clarity_assessment/clarity_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clarity_assessment/clarity_demo.jpg -------------------------------------------------------------------------------- /demo/clas_data/train/n01440764_15008.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01440764_15008.JPEG -------------------------------------------------------------------------------- /demo/clas_data/train/n01530575_10039.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01530575_10039.JPEG -------------------------------------------------------------------------------- /demo/clas_data/train/n01601694_4224.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01601694_4224.JPEG -------------------------------------------------------------------------------- /demo/clas_data/train/n01641577_14447.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01641577_14447.JPEG -------------------------------------------------------------------------------- /demo/clas_data/train/n01682714_8438.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01682714_8438.JPEG -------------------------------------------------------------------------------- /demo/clas_data/train/n01698640_9242.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01698640_9242.JPEG -------------------------------------------------------------------------------- /demo/clas_data/train_list.txt: -------------------------------------------------------------------------------- 1 | train/n01440764_15008.JPEG 0 2 | train/n01530575_10039.JPEG 1 3 | train/n01601694_4224.JPEG 2 4 | train/n01641577_14447.JPEG 3 5 | train/n01682714_8438.JPEG 4 6 | train/n01698640_9242.JPEG 5 7 | -------------------------------------------------------------------------------- /demo/code_exists/contains_code_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/code_exists/contains_code_demo.jpg -------------------------------------------------------------------------------- /demo/code_exists/no_code_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/code_exists/no_code_demo.jpg -------------------------------------------------------------------------------- /demo/image_orientation/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/0.jpg -------------------------------------------------------------------------------- /demo/image_orientation/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/1.jpg -------------------------------------------------------------------------------- /demo/image_orientation/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/2.jpg -------------------------------------------------------------------------------- /demo/image_orientation/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/3.jpg -------------------------------------------------------------------------------- /demo/ocr_data/images/lsvt_train_images_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_0.jpg -------------------------------------------------------------------------------- /demo/ocr_data/images/lsvt_train_images_1325.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_1325.jpg -------------------------------------------------------------------------------- /demo/ocr_data/images/lsvt_train_images_1339.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_1339.jpg -------------------------------------------------------------------------------- /demo/ocr_data/images/lsvt_train_images_1410.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_1410.jpg -------------------------------------------------------------------------------- /demo/ocr_data/images/lsvt_train_images_275.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_275.jpg -------------------------------------------------------------------------------- /demo/ocr_data/train_list.txt: -------------------------------------------------------------------------------- 1 | images/lsvt_train_images_0.jpg 母婴百汇 2 | images/lsvt_train_images_275.jpg 停车场 3 | images/lsvt_train_images_1325.jpg 品质沙龙 4 | images/lsvt_train_images_1339.jpg 散作乾坤万里春 5 | images/lsvt_train_images_1410.jpg 24小时营业 6 | -------------------------------------------------------------------------------- /demo/ocr_rec/bg/n01530575_10039.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_rec/bg/n01530575_10039.JPEG -------------------------------------------------------------------------------- /demo/ocr_rec/bg/n01601694_4224.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_rec/bg/n01601694_4224.JPEG -------------------------------------------------------------------------------- /demo/ocr_rec/corpus.txt: -------------------------------------------------------------------------------- 1 | 母婴百汇 2 | 停车场 3 | 品质沙龙 4 | 散作乾坤万里春 5 | 24小时营业 6 | -------------------------------------------------------------------------------- /demo/shitu_data/train/0000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/0000001.jpg -------------------------------------------------------------------------------- /demo/shitu_data/train/00005129.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/00005129.jpg -------------------------------------------------------------------------------- /demo/shitu_data/train/111687027218_10.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/111687027218_10.JPG -------------------------------------------------------------------------------- /demo/shitu_data/train/2250416.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/2250416.jpg -------------------------------------------------------------------------------- /demo/shitu_data/train/2289835.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/2289835.jpg -------------------------------------------------------------------------------- /demo/shitu_data/train_list.txt: -------------------------------------------------------------------------------- 1 | train/2289835.jpg 0 2 | train/2250416.jpg 168 3 | train/111687027218_10.JPG 80724 4 | train/0000001.jpg 180409 5 | train/00005129.jpg 185409 6 | -------------------------------------------------------------------------------- /deploy/configs/dataaug/ocr_rec_gen.yaml: -------------------------------------------------------------------------------- 1 | img_size: 2 | h: 48 3 | # Small font_size will make text looks like blured/prydown 4 | font_size: 5 | min: 42 6 | max: 44 7 | # choose Text color range 8 | # color boundary is in R,G,B format 9 | font_color: 10 | enable: true 11 | blue: 12 | fraction: 0.2 13 | l_boundary: [0,0,150] 14 | h_boundary: [60,60,255] 15 | brown: 16 | fraction: 0.15 17 | l_boundary: [139,70,19] 18 | h_boundary: [160,82,43] 19 | black: 20 | fraction: 0.25 21 | l_boundary: [0,0,0] 22 | h_boundary: [10,10,10] 23 | red: 24 | fraction: 0.2 25 | l_boundary: [200,0,0] 26 | h_boundary: [250,10,10] 27 | yellow: 28 | fraction: 0.2 29 | l_boundary: [200, 200, 0] 30 | h_boundary: [225, 225, 50] 31 | 32 | # By default, text is drawed by Pillow with (https://stackoverflow.com/questions/43828955/measuring-width-of-text-python-pil) 33 | # If `random_space` is enabled, some text will be drawed char by char with a random space 34 | random_space: 35 | enable: false 36 | fraction: 0.5 37 | min: 0 # -0.1 will make chars very close or even overlapped 38 | max: 0.4 39 | 40 | random_direction: 41 | enable: true 42 | fraction: 0.5 43 | 44 | 45 | # Do remap with sin() 46 | # Currently this process is very slow! 47 | curve: 48 | enable: true 49 | fraction: 0.3 50 | period: 360 # degree, sin 函数的周期 51 | min: 1 # sin 函数的幅值范围 52 | max: 5 53 | 54 | # random crop text height 55 | crop: 56 | enable: False 57 | fraction: 0.3 58 | 59 | # top and bottom will applied equally 60 | top: 61 | min: 5 62 | max: 10 # in pixel, this value should small than img_height 63 | bottom: 64 | min: 5 65 | max: 10 # in pixel, this value should small than img_height 66 | 67 | # Use image in bg_dir as background for text 68 | img_bg: 69 | enable: true 70 | fraction: 0.7 71 | 72 | # Not work when random_space applied 73 | text_border: 74 | enable: true 75 | fraction: 0.5 76 | 77 | # lighter than word color 78 | light: 79 | enable: true 80 | fraction: 0.5 81 | 82 | # darker than word color 83 | dark: 84 | enable: true 85 | fraction: 0.5 86 | 87 | # https://docs.opencv.org/3.4/df/da0/group__photo__clone.html#ga2bf426e4c93a6b1f21705513dfeca49d 88 | # https://www.cs.virginia.edu/~connelly/class/2014/comp_photo/proj2/poisson.pdf 89 | # Use opencv seamlessClone() to draw text on background 90 | # For some background image, this will make text image looks more real 91 | seamless_clone: 92 | enable: true 93 | fraction: 0.5 94 | 95 | perspective_transform: 96 | max_x: 30 97 | max_y: 30 98 | max_z: 4 99 | 100 | blur: 101 | enable: true 102 | fraction: 0.05 103 | 104 | # If an image is applied blur, it will not be applied prydown 105 | prydown: 106 | enable: false 107 | fraction: 0.05 108 | max_scale: 1.5 # Image will first resize to 1.5x, and than resize to 1x 109 | 110 | noise: 111 | enable: true 112 | fraction: 0.3 113 | 114 | gauss: 115 | enable: true 116 | fraction: 0.35 117 | 118 | uniform: 119 | enable: true 120 | fraction: 0.35 121 | 122 | salt_pepper: 123 | enable: false 124 | fraction: 0.25 125 | 126 | poisson: 127 | enable: true 128 | fraction: 0.30 129 | 130 | line: 131 | enable: true 132 | fraction: 0.1 133 | 134 | under_line: 135 | enable: true 136 | fraction: 0.3 137 | 138 | table_line: 139 | enable: true 140 | fraction: 0.3 141 | 142 | middle_line: 143 | enable: true 144 | fraction: 0.4 145 | 146 | line_color: 147 | enable: true 148 | black: 149 | fraction: 0.4 150 | l_boundary: [0,0,0] 151 | h_boundary: [64,64,64] 152 | blue: 153 | fraction: 0.2 154 | l_boundary: [0,0,150] 155 | h_boundary: [60,60,255] 156 | white: 157 | fraction: 0.4 158 | l_boundary: [254,254,254] 159 | h_boundary: [255,255,255] 160 | 161 | # These operates are applied on the final output image, 162 | # so actually it can also be applied in training process as an data augmentation method. 163 | 164 | # By default, text is darker than background. 165 | # If `reverse_color` is enabled, some images will have dark background and light text 166 | reverse_color: 167 | enable: true 168 | fraction: 0.1 169 | 170 | emboss: 171 | enable: false 172 | fraction: 0.05 173 | 174 | sharp: 175 | enable: false 176 | fraction: 0.1 177 | -------------------------------------------------------------------------------- /deploy/configs/dataaug_clas.yaml: -------------------------------------------------------------------------------- 1 | DataGen: 2 | ops: 3 | - randaugment 4 | - random_erasing 5 | - gridmask 6 | - tia_distort 7 | - tia_stretch 8 | - tia_perspective 9 | ori_data_dir: "demo/clas_data" 10 | label_file: "demo/clas_data/train_list.txt" 11 | gen_label: &gen_label "labels/test.txt" 12 | img_save_folder: "test" 13 | gen_ratio: 0 14 | gen_num: 5 15 | size: 224 16 | 17 | FeatureExtract: 18 | config: "deploy/configs/ppcv/feature_extract.yaml" 19 | thresh: 0.9 20 | file_out: "tmp/rm_repeat.txt" 21 | 22 | # indexing engine config 23 | IndexProcess: 24 | index_method: "HNSW32" # supported: HNSW32, IVF, Flat 25 | image_root: "./test" 26 | index_dir: "./augdata/all_aug" 27 | all_label_file: *gen_label 28 | index_operation: "new" # suported: "append", "remove", "new" 29 | delimiter: " " 30 | dist_type: "IP" 31 | embedding_size: 512 32 | batch_size: 32 33 | return_k: 5 34 | score_thres: 0.5 35 | 36 | BigModel: 37 | model_type: cls # support(cls / ocr_rec) 38 | config: "deploy/configs/ppcv/big_model_classification.yaml" 39 | batch_size: 8 40 | thresh: 0.1 41 | final_label: "high_socre_label.txt" 42 | -------------------------------------------------------------------------------- /deploy/configs/dataaug_ocr_img2img.yaml: -------------------------------------------------------------------------------- 1 | DataGen: 2 | ops: 3 | - randaugment 4 | - random_erasing 5 | - gridmask 6 | - tia_distort 7 | - tia_stretch 8 | - tia_perspective 9 | ori_data_dir: "demo/ocr_data" 10 | label_file: "demo/ocr_data/train_list.txt" 11 | gen_label: &gen_label labels/all_label.txt 12 | model_type: &model_type ocr_rec 13 | delimiter: &delimiter "\t" 14 | img_save_folder: &img_save_folder "test" 15 | gen_ratio: 0 16 | gen_num: 5 17 | size: 224 18 | 19 | FeatureExtract: 20 | config: "deploy/configs/ppcv/feature_extract.yaml" 21 | thresh: 0.9 22 | file_out: "tmp/rm_repeat.txt" 23 | 24 | # indexing engine config 25 | IndexProcess: 26 | index_method: "HNSW32" # supported: HNSW32, IVF, Flat 27 | image_root: *img_save_folder 28 | index_dir: "./augdata/all_aug" 29 | all_label_file: *gen_label 30 | index_operation: "new" # suported: "append", "remove", "new" 31 | delimiter: *delimiter 32 | dist_type: "IP" 33 | embedding_size: 512 34 | batch_size: 32 35 | return_k: 5 36 | score_thres: 0.5 37 | 38 | BigModel: 39 | model_type: *model_type # support(cls / ocr_rec) 40 | config: "deploy/configs/ppcv/text_recognition.yaml" 41 | batch_size: 8 42 | thresh: 0.1 43 | final_label: "high_socre_label.txt" 44 | -------------------------------------------------------------------------------- /deploy/configs/dataaug_ocr_text2img.yaml: -------------------------------------------------------------------------------- 1 | DataGen: 2 | mode: text2img 3 | config: "deploy/configs/dataaug/ocr_rec_gen.yaml" 4 | gen_num: 5 5 | out_dir: "test" 6 | bg_num_per_word: 5 7 | threads: 1 8 | bg_img_dir: "demo/ocr_rec/bg/" 9 | font_dir: "demo/ocr_rec/font" 10 | corpus_file: "demo/ocr_rec/corpus.txt" 11 | img_save_folder: &img_save_folder output_img 12 | gen_label: &gen_label output_img/all_label.txt 13 | delimiter: &delimiter "\t" 14 | 15 | FeatureExtract: 16 | config: "deploy/configs/ppcv/feature_extract.yaml" 17 | thresh: 0.9 18 | file_out: "tmp/rm_repeat.txt" 19 | 20 | # indexing engine config 21 | IndexProcess: 22 | index_method: "HNSW32" # supported: HNSW32, IVF, Flat 23 | image_root: *img_save_folder 24 | index_dir: "./augdata/all_aug" 25 | all_label_file: *gen_label 26 | index_operation: "new" # suported: "append", "remove", "new" 27 | delimiter: *delimiter 28 | dist_type: "IP" 29 | embedding_size: 512 30 | batch_size: 32 31 | return_k: 5 32 | score_thres: 0.5 33 | 34 | BigModel: 35 | model_type: ocr_rec # support(cls / ocr_rec) 36 | config: "deploy/configs/ppcv/text_recognition.yaml" 37 | batch_size: 8 38 | thresh: 0.1 39 | final_label: "high_socre_label.txt" 40 | -------------------------------------------------------------------------------- /deploy/configs/dataaug_shitu.yaml: -------------------------------------------------------------------------------- 1 | DataGen: 2 | ops: 3 | - randaugment 4 | - random_erasing 5 | - gridmask 6 | - tia_distort 7 | - tia_stretch 8 | - tia_perspective 9 | ori_data_dir: "demo/shitu_data" 10 | label_file: "demo/shitu_data/train_list.txt" 11 | gen_label: &gen_label "labels/test.txt" 12 | img_save_folder: &img_save_folder "test" 13 | gen_ratio: 0 14 | gen_num: 5 15 | size: 224 16 | 17 | FeatureExtract: 18 | config: "deploy/configs/ppcv/feature_extract.yaml" 19 | thresh: 0.9 20 | file_out: "tmp/rm_repeat.txt" 21 | 22 | # indexing engine config 23 | IndexProcess: 24 | index_method: "HNSW32" # supported: HNSW32, IVF, Flat 25 | image_root: *img_save_folder 26 | index_dir: "./augdata/all_aug" 27 | all_label_file: *gen_label 28 | index_operation: "new" # suported: "append", "remove", "new" 29 | delimiter: " " 30 | dist_type: "IP" 31 | embedding_size: 512 32 | batch_size: 32 33 | return_k: 5 34 | score_thres: 0.5 35 | -------------------------------------------------------------------------------- /deploy/configs/dataclean.yaml: -------------------------------------------------------------------------------- 1 | DataClean: 2 | ImageOrientation: "deploy/configs/ppcv/image_orientation.yaml" 3 | ClarityAssessment: "deploy/configs/ppcv/clarity_assessment.yaml" 4 | CodeExists: "deploy/configs/ppcv/code_exists.yaml" 5 | -------------------------------------------------------------------------------- /deploy/configs/ppcv/big_model_classification.yaml: -------------------------------------------------------------------------------- 1 | ENV: 2 | run_mode: paddle 3 | device: GPU 4 | min_subgraph_size: 3 5 | shape_info_filename: ./ 6 | trt_calib_mode: False 7 | cpu_threads: 1 8 | trt_use_static: False 9 | return_res: True 10 | print_res: False 11 | 12 | MODEL: 13 | - ClassificationOp: 14 | name: cls 15 | param_path: paddlecv://models/PPLCNet_x1_0_infer/inference.pdiparams 16 | model_path: paddlecv://models/PPLCNet_x1_0_infer/inference.pdmodel 17 | batch_size: 8 18 | last_ops: 19 | - input 20 | PreProcess: 21 | - ResizeImage: 22 | resize_short: 256 23 | - CropImage: 24 | size: 224 25 | - NormalizeImage: 26 | scale: 0.00392157 27 | mean: [0.485, 0.456, 0.406] 28 | std: [0.229, 0.224, 0.225] 29 | order: '' 30 | channel_num: 3 31 | - ToCHWImage: 32 | - ExpandDim: 33 | axis: 0 34 | PostProcess: 35 | - Topk: 36 | topk: 1 37 | class_id_map_file: "paddlecv://dict/classification/imagenet1k_label_list.txt" 38 | Inputs: 39 | - image 40 | 41 | - ClasOutput: 42 | name: vis 43 | last_ops: 44 | - input 45 | - cls 46 | Inputs: 47 | - fn 48 | - image 49 | - class_ids 50 | - scores 51 | - label_names 52 | -------------------------------------------------------------------------------- /deploy/configs/ppcv/clarity_assessment.yaml: -------------------------------------------------------------------------------- 1 | ENV: 2 | output_dir: output 3 | run_mode: paddle 4 | device: GPU 5 | min_subgraph_size: 3 6 | shape_info_filename: ./ 7 | trt_calib_mode: False 8 | cpu_threads: 1 9 | trt_use_static: False 10 | save_output: True 11 | return_res: True 12 | print_res: True 13 | 14 | MODEL: 15 | - ClassificationOp: 16 | name: cls 17 | param_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/clarity_assessment_infer/inference.pdiparams 18 | model_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/clarity_assessment_infer/inference.pdmodel 19 | batch_size: 8 20 | last_ops: 21 | - input 22 | PreProcess: 23 | - ResizeImage: 24 | resize_short: 256 25 | - CropImage: 26 | size: 224 27 | - NormalizeImage: 28 | scale: 0.00392157 29 | mean: [0.485, 0.456, 0.406] 30 | std: [0.229, 0.224, 0.225] 31 | order: '' 32 | channel_num: 3 33 | - ToCHWImage: 34 | - ExpandDim: 35 | axis: 0 36 | PostProcess: 37 | - Topk: 38 | topk: 1 39 | class_id_map_file: deploy/utils/label_map/clarity_assessment_label_list.txt 40 | Inputs: 41 | - image 42 | 43 | - ClasOutput: 44 | name: vis 45 | last_ops: 46 | - input 47 | - cls 48 | Inputs: 49 | - fn 50 | - image 51 | - class_ids 52 | - scores 53 | - label_names 54 | -------------------------------------------------------------------------------- /deploy/configs/ppcv/code_exists.yaml: -------------------------------------------------------------------------------- 1 | ENV: 2 | output_dir: output 3 | run_mode: paddle 4 | device: GPU 5 | min_subgraph_size: 3 6 | shape_info_filename: ./ 7 | trt_calib_mode: False 8 | cpu_threads: 1 9 | trt_use_static: False 10 | save_output: True 11 | return_res: True 12 | print_res: True 13 | 14 | MODEL: 15 | - ClassificationOp: 16 | name: cls 17 | param_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/code_exists_infer/inference.pdiparams 18 | model_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/code_exists_infer/inference.pdmodel 19 | batch_size: 8 20 | last_ops: 21 | - input 22 | PreProcess: 23 | - ResizeImage: 24 | resize_short: 256 25 | - CropImage: 26 | size: 224 27 | - NormalizeImage: 28 | scale: 0.00392157 29 | mean: [0.485, 0.456, 0.406] 30 | std: [0.229, 0.224, 0.225] 31 | order: '' 32 | channel_num: 3 33 | - ToCHWImage: 34 | - ExpandDim: 35 | axis: 0 36 | PostProcess: 37 | - Topk: 38 | topk: 1 39 | class_id_map_file: deploy/utils/label_map/code_exists_label_list.txt 40 | Inputs: 41 | - image 42 | 43 | - ClasOutput: 44 | name: vis 45 | last_ops: 46 | - input 47 | - cls 48 | Inputs: 49 | - fn 50 | - image 51 | - class_ids 52 | - scores 53 | - label_names 54 | -------------------------------------------------------------------------------- /deploy/configs/ppcv/feature_extract.yaml: -------------------------------------------------------------------------------- 1 | ENV: 2 | run_mode: paddle 3 | device: GPU 4 | min_subgraph_size: 3 5 | shape_info_filename: ./ 6 | trt_calib_mode: False 7 | cpu_threads: 1 8 | trt_use_static: False 9 | return_res: True 10 | print_res: False 11 | 12 | MODEL: 13 | - FeatureExtractionOp: 14 | name: feature 15 | param_path: paddlecv://models/general_PPLCNet_x2_5_lite_v1.0_infer/inference.pdiparams 16 | model_path: paddlecv://models/general_PPLCNet_x2_5_lite_v1.0_infer/inference.pdmodel 17 | batch_size: 1 18 | last_ops: 19 | - input 20 | PreProcess: 21 | - ResizeImage: 22 | size: [224, 224] 23 | return_numpy: False 24 | interpolation: bilinear 25 | backend: cv2 26 | - NormalizeImage: 27 | scale: 1.0/255.0 28 | mean: [0.485, 0.456, 0.406] 29 | std: [0.229, 0.224, 0.225] 30 | order: hwc 31 | - ToCHWImage: 32 | - ExpandDim: 33 | axis: 0 34 | PostProcess: 35 | - NormalizeFeature: 36 | normalize: True 37 | Inputs: 38 | - image 39 | 40 | - FeatureOutput: 41 | name: save 42 | last_ops: 43 | - input 44 | - feature 45 | Inputs: 46 | - fn 47 | - feature 48 | -------------------------------------------------------------------------------- /deploy/configs/ppcv/image_orientation.yaml: -------------------------------------------------------------------------------- 1 | ENV: 2 | output_dir: output 3 | run_mode: paddle 4 | device: GPU 5 | min_subgraph_size: 3 6 | shape_info_filename: ./ 7 | trt_calib_mode: False 8 | cpu_threads: 1 9 | trt_use_static: False 10 | save_output: True 11 | return_res: True 12 | print_res: True 13 | 14 | MODEL: 15 | - ClassificationOp: 16 | name: cls 17 | param_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/image_orientation_infer/inference.pdiparams 18 | model_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/image_orientation_infer/inference.pdmodel 19 | batch_size: 8 20 | last_ops: 21 | - input 22 | PreProcess: 23 | - ResizeImage: 24 | resize_short: 256 25 | - CropImage: 26 | size: 224 27 | - NormalizeImage: 28 | scale: 0.00392157 29 | mean: [0.485, 0.456, 0.406] 30 | std: [0.229, 0.224, 0.225] 31 | order: '' 32 | channel_num: 3 33 | - ToCHWImage: 34 | - ExpandDim: 35 | axis: 0 36 | PostProcess: 37 | - ThreshOutput: 38 | threshold: 0 39 | class_id_map_file: deploy/utils/label_map/image_orientation_label_list.txt 40 | Inputs: 41 | - image 42 | 43 | - ClasOutput: 44 | name: vis 45 | last_ops: 46 | - input 47 | - cls 48 | Inputs: 49 | - fn 50 | - image 51 | - class_ids 52 | - scores 53 | - label_names 54 | -------------------------------------------------------------------------------- /deploy/configs/ppcv/text_recognition.yaml: -------------------------------------------------------------------------------- 1 | ENV: 2 | run_mode: paddle 3 | device: GPU 4 | min_subgraph_size: 3 5 | shape_info_filename: ./ 6 | trt_calib_mode: False 7 | cpu_threads: 1 8 | trt_use_static: False 9 | save_img: False 10 | save_res: False 11 | return_res: true 12 | 13 | 14 | MODEL: 15 | - OcrCrnnRecOp: 16 | name: rec 17 | param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams 18 | model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel 19 | batch_size: 6 20 | last_ops: 21 | - input 22 | PreProcess: 23 | - ReisizeNormImg: 24 | rec_image_shape: [3, 48, 320] 25 | PostProcess: 26 | - CTCLabelDecode: 27 | character_dict_path: "paddlecv://dict/ocr/ch_dict.txt" 28 | use_space_char: true 29 | Inputs: 30 | - image 31 | 32 | - OCRRecOutput: 33 | name: vis 34 | last_ops: 35 | - input 36 | - rec 37 | Inputs: 38 | - fn 39 | - image 40 | - rec_text 41 | - rec_score 42 | -------------------------------------------------------------------------------- /deploy/ppcv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import (core, engine, ops, utils, model_zoo) 16 | -------------------------------------------------------------------------------- /deploy/ppcv/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import workspace 16 | from .workspace import * 17 | 18 | __all__ = workspace.__all__ 19 | -------------------------------------------------------------------------------- /deploy/ppcv/core/workspace.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | __all__ = ['register', 'create'] 18 | 19 | global_config = dict() 20 | 21 | 22 | def register(cls): 23 | """ 24 | Register a given module class. 25 | Args: 26 | cls (type): Module class to be registered. 27 | Returns: cls 28 | """ 29 | if cls.__name__ in global_config: 30 | raise ValueError("Module class already registered: {}".format( 31 | cls.__name__)) 32 | global_config[cls.__name__] = cls 33 | return cls 34 | 35 | 36 | def create(cls_name, op_cfg, env_cfg): 37 | """ 38 | Create an instance of given module class. 39 | 40 | Args: 41 | cls_name(str): Class of which to create instnce. 42 | 43 | Return: instance of type `cls_or_name` 44 | """ 45 | assert type(cls_name) == str, "should be a name of class" 46 | if cls_name not in global_config: 47 | raise ValueError("The module {} is not registered".format(cls_name)) 48 | 49 | cls = global_config[cls_name] 50 | return cls(op_cfg, env_cfg) 51 | 52 | 53 | def get_global_op(): 54 | return global_config 55 | -------------------------------------------------------------------------------- /deploy/ppcv/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import pipeline 16 | from .pipeline import * 17 | 18 | __all__ = pipeline.__all__ 19 | -------------------------------------------------------------------------------- /deploy/ppcv/model_zoo/MODEL_ZOO: -------------------------------------------------------------------------------- 1 | classification/PPLCNet_x1_0_infer 2 | -------------------------------------------------------------------------------- /deploy/ppcv/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import model_zoo 16 | from .model_zoo import * 17 | 18 | __all__ = model_zoo.__all__ 19 | -------------------------------------------------------------------------------- /deploy/ppcv/model_zoo/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os.path as osp 16 | import pkg_resources 17 | 18 | try: 19 | from collections.abc import Sequence 20 | except: 21 | from collections import Sequence 22 | 23 | from ppcv.utils.download import get_config_path, get_model_path 24 | from ppcv.utils.logger import setup_logger 25 | logger = setup_logger(__name__) 26 | 27 | __all__ = [ 28 | 'list_model', 'get_config_file', 'get_model_file', 'MODEL_ZOO_FILENAME' 29 | ] 30 | 31 | MODEL_ZOO_FILENAME = 'MODEL_ZOO' 32 | TASK_DICT = { 33 | # general config 34 | 'classification': 'paddlecv://configs/test_classification.yml', 35 | 'detection': 'paddlecv://configs/test_detection.yml', 36 | 'keypoint': 'paddlecv://configs/test_keypoint.yml', 37 | # single model 38 | # TODO 39 | # system 40 | 'PP-OCRv3': 'paddlecv://configs/system/PP-OCRv3.yml', 41 | } 42 | 43 | 44 | def list_model(filters=[]): 45 | model_zoo_file = pkg_resources.resource_filename('ppcv.model_zoo', 46 | MODEL_ZOO_FILENAME) 47 | with open(model_zoo_file) as f: 48 | model_names = f.read().splitlines() 49 | 50 | # filter model_name 51 | def filt(name): 52 | for f in filters: 53 | if name.find(f) < 0: 54 | return False 55 | return True 56 | 57 | if isinstance(filters, str) or not isinstance(filters, Sequence): 58 | filters = [filters] 59 | model_names = [name for name in model_names if filt(name)] 60 | if len(model_names) == 0 and len(filters) > 0: 61 | raise ValueError("no model found, please check filters seeting, " 62 | "filters can be set as following kinds:\n" 63 | "\tTask: classification, detection ...\n" 64 | "\tArchitecture: PPLCNet, PPYOLOE ...\n") 65 | 66 | model_str = "Available Models:\n" 67 | for model_name in model_names: 68 | model_str += "\t{}\n".format(model_name) 69 | logger.info(model_str) 70 | 71 | 72 | # models and configs save on bcebos under dygraph directory 73 | def get_config_file(task): 74 | """Get config path from task. 75 | """ 76 | if task not in TASK_DICT: 77 | tasks = TASK_DICT.keys() 78 | logger.error("Illegal task: {}, please use one of {}".format(task, 79 | tasks)) 80 | path = TASK_DICT[task] 81 | return get_config_path(path) 82 | 83 | 84 | def get_model_file(path): 85 | return get_model_path(path) 86 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import models 16 | from . import output 17 | from . import connector 18 | 19 | from .models import * 20 | from .output import * 21 | from .connector import * 22 | 23 | __all__ = models.__all__ + output.__all__ + connector.__all__ 24 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import importlib 17 | import math 18 | import numpy as np 19 | try: 20 | from collections.abc import Sequence 21 | except Exception: 22 | from collections import Sequence 23 | 24 | import paddle 25 | from paddle.inference import Config 26 | from paddle.inference import create_predictor 27 | 28 | from ppcv.ops.predictor import PaddlePredictor 29 | from ppcv.utils.download import get_model_path 30 | 31 | __all__ = ["BaseOp", ] 32 | 33 | 34 | def create_operators(params, mod): 35 | """ 36 | create operators based on the config 37 | 38 | Args: 39 | params(list): a dict list, used to create some operators 40 | mod(module) : a module that can import single ops 41 | """ 42 | assert isinstance(params, list), ('operator config should be a list') 43 | if mod is None: 44 | mod = importlib.import_module(__name__) 45 | ops = [] 46 | for operator in params: 47 | assert isinstance(operator, 48 | dict) and len(operator) == 1, "yaml format error" 49 | op_name = list(operator)[0] 50 | param = {} if operator[op_name] is None else operator[op_name] 51 | op = getattr(mod, op_name)(**param) 52 | ops.append(op) 53 | 54 | return ops 55 | 56 | 57 | class BaseOp(object): 58 | """ 59 | Base Operator, implement of prediction process 60 | Args 61 | """ 62 | 63 | def __init__(self, model_cfg, env_cfg): 64 | self.model_cfg = model_cfg 65 | self.env_cfg = env_cfg 66 | self.input_keys = model_cfg["Inputs"] 67 | 68 | @classmethod 69 | def type(self): 70 | raise NotImplementedError 71 | 72 | @classmethod 73 | def get_output_keys(cls): 74 | raise NotImplementedError 75 | 76 | def get_input_keys(self): 77 | return self.input_keys 78 | 79 | def filter_input(self, last_outputs, input_name): 80 | f_inputs = [{k.split(".")[-1]: last[k] 81 | for k in input_name} for last in last_outputs] 82 | return f_inputs 83 | 84 | def check_output(self, output, name): 85 | if not isinstance(output, Sequence): 86 | raise ValueError('The output of op: {} must be Sequence').format( 87 | name) 88 | output = output[0] 89 | if not isinstance(output, dict): 90 | raise ValueError( 91 | 'The element of output in op: {} must be dict').format(name) 92 | out_keys = list(output.keys()) 93 | for out, define in zip(out_keys, self.output_keys): 94 | if out != define: 95 | raise ValueError( 96 | 'The output key in op: {} is inconsistent, expect {}, but received {}'. 97 | format(name, define, out)) 98 | 99 | def set_frame(self, frame_id): 100 | self.frame_id = frame_id 101 | 102 | def __call__(self, image_list): 103 | raise NotImplementedError 104 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/connector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .op_connector import * 16 | 17 | __all__ = op_connector.__all__ 18 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/connector/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | from ppcv.core.workspace import register 19 | from ppcv.ops.base import BaseOp 20 | 21 | 22 | @register 23 | class ConnectorBaseOp(BaseOp): 24 | def __init__(self, model_cfg, env_cfg=None): 25 | super(ConnectorBaseOp, self).__init__(model_cfg, env_cfg) 26 | self.name = model_cfg["name"] 27 | keys = self.get_output_keys() 28 | self.output_keys = [self.name + '.' + key for key in keys] 29 | 30 | @classmethod 31 | def type(self): 32 | return 'CONNECTOR' 33 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/general_data_obj.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numby as np 16 | 17 | 18 | class GeneralDataObj(object): 19 | def __init__(self, data): 20 | assert isinstance(data, (dict, )) 21 | self.data_dict = data 22 | pass 23 | 24 | def get(self, key): 25 | """ 26 | key can be one of [list, tuple, str] 27 | """ 28 | if isinstance(key, (list, tuple)): 29 | return [self.data_dict[k] for k in key] 30 | elif isinstance(key, (str)): 31 | return self.data_dict[key] 32 | else: 33 | assert False, f"key({key}) type must be in on of [list, tuple, str] but got {type(key)}" 34 | 35 | def set(self, key, value): 36 | """ 37 | key: str 38 | value: an object 39 | """ 40 | self.data_dict[key] = value 41 | 42 | def keys(self, ): 43 | """ 44 | get all keys of the data 45 | """ 46 | return list(self.data_dict.keys()) 47 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import classification 16 | from . import detection 17 | from . import ocr_db_detection 18 | from . import ocr_crnn_recognition 19 | 20 | from .classification import * 21 | from .feature_extraction import * 22 | from .detection import * 23 | from .keypoint import * 24 | from .ocr_db_detection import * 25 | from .ocr_crnn_recognition import * 26 | 27 | __all__ = classification.__all__ + detection.__all__ + keypoint.__all__ 28 | __all__ += ocr_db_detection.__all__ 29 | __all__ += ocr_crnn_recognition.__all__ 30 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import importlib 17 | import math 18 | import numpy as np 19 | import paddle 20 | from paddle.inference import Config 21 | from paddle.inference import create_predictor 22 | 23 | from ppcv.ops.base import BaseOp 24 | from ppcv.ops.predictor import PaddlePredictor 25 | from ppcv.utils.download import get_model_path 26 | 27 | 28 | class ModelBaseOp(BaseOp): 29 | """ 30 | Base Operator, implement of prediction process 31 | Args 32 | """ 33 | 34 | def __init__(self, model_cfg, env_cfg): 35 | super(ModelBaseOp, self).__init__(model_cfg, env_cfg) 36 | param_path = get_model_path(model_cfg['param_path']) 37 | model_path = get_model_path(model_cfg['model_path']) 38 | env_cfg["batch_size"] = model_cfg.get("batch_size", 1) 39 | self.batch_size = env_cfg["batch_size"] 40 | self.name = model_cfg["name"] 41 | self.frame = -1 42 | self.predictor = PaddlePredictor(param_path, model_path, env_cfg) 43 | 44 | keys = self.get_output_keys() 45 | self.output_keys = [self.name + '.' + key for key in keys] 46 | 47 | @classmethod 48 | def type(self): 49 | return 'MODEL' 50 | 51 | def preprocess(self, inputs): 52 | raise NotImplementedError 53 | 54 | def postprocess(self, inputs): 55 | raise NotImplementedError 56 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import os 17 | import numpy as np 18 | import math 19 | import paddle 20 | 21 | import importlib 22 | 23 | from .inference import ClassificationOp 24 | 25 | __all__ = ['ClassificationOp'] 26 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/classification/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | from functools import reduce 17 | import os 18 | import numpy as np 19 | import math 20 | import paddle 21 | from ..base import ModelBaseOp 22 | 23 | from ppcv.ops.base import create_operators 24 | from ppcv.core.workspace import register 25 | 26 | from .preprocess import * 27 | from .postprocess import * 28 | 29 | 30 | @register 31 | class ClassificationOp(ModelBaseOp): 32 | def __init__(self, model_cfg, env_cfg): 33 | super(ClassificationOp, self).__init__(model_cfg, env_cfg) 34 | mod = importlib.import_module(__name__) 35 | self.preprocessor = create_operators(model_cfg["PreProcess"], mod) 36 | self.postprocessor = create_operators(model_cfg["PostProcess"], mod) 37 | 38 | @classmethod 39 | def get_output_keys(cls): 40 | return ["class_ids", "scores", "label_names"] 41 | 42 | def preprocess(self, inputs): 43 | outputs = inputs 44 | for ops in self.preprocessor: 45 | outputs = ops(outputs) 46 | return outputs 47 | 48 | def postprocess(self, inputs, result): 49 | outputs = result 50 | for idx, ops in enumerate(self.postprocessor): 51 | if idx == len(self.postprocessor) - 1: 52 | outputs = ops(outputs, self.output_keys) 53 | else: 54 | outputs = ops(outputs) 55 | return outputs 56 | 57 | def infer(self, image_list): 58 | inputs = [] 59 | batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) 60 | results = [] 61 | for i in range(batch_loop_cnt): 62 | start_index = i * self.batch_size 63 | end_index = min((i + 1) * self.batch_size, len(image_list)) 64 | batch_image_list = image_list[start_index:end_index] 65 | # preprocess 66 | inputs = [self.preprocess(img) for img in batch_image_list] 67 | inputs = np.concatenate(inputs, axis=0) 68 | # model inference 69 | result = self.predictor.run(inputs)[0] 70 | # postprocess 71 | result = self.postprocess(inputs, result) 72 | results.extend(result) 73 | # results = self.merge_batch_result(results) 74 | return results 75 | 76 | def __call__(self, inputs): 77 | """ 78 | step1: parser inputs 79 | step2: run 80 | step3: merge results 81 | input: a list of dict 82 | """ 83 | key = self.input_keys[0] 84 | is_list = False 85 | if isinstance(inputs[0][key], (list, tuple)): 86 | inputs = [input[key] for input in inputs] 87 | is_list = True 88 | else: 89 | inputs = [[input[key]] for input in inputs] 90 | sub_index_list = [len(input) for input in inputs] 91 | inputs = reduce(lambda x, y: x.extend(y) or x, inputs) 92 | 93 | # step2: run 94 | outputs = self.infer(inputs) 95 | 96 | # step3: merge 97 | curr_offsef_id = 0 98 | pipe_outputs = [] 99 | for idx in range(len(sub_index_list)): 100 | sub_start_idx = curr_offsef_id 101 | sub_end_idx = curr_offsef_id + sub_index_list[idx] 102 | output = outputs[sub_start_idx:sub_end_idx] 103 | output = {k: [o[k] for o in output] for k in output[0]} 104 | if is_list is not True: 105 | output = {k: output[k][0] for k in output} 106 | pipe_outputs.append(output) 107 | 108 | curr_offsef_id = sub_end_idx 109 | return pipe_outputs 110 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/classification/postprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | 18 | 19 | def parse_class_id_map(class_id_map_file): 20 | if class_id_map_file is None: 21 | return None 22 | 23 | if not os.path.exists(class_id_map_file): 24 | print( 25 | "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" 26 | ) 27 | return None 28 | 29 | try: 30 | class_id_map = {} 31 | with open(class_id_map_file, "r", encoding='utf-8') as fin: 32 | lines = fin.readlines() 33 | for line in lines: 34 | partition = line.split("\n")[0].partition(" ") 35 | class_id_map[int(partition[0])] = str(partition[-1]) 36 | except Exception as ex: 37 | print(ex) 38 | class_id_map = None 39 | return class_id_map 40 | 41 | 42 | class Topk(object): 43 | 44 | def __init__(self, topk=1, class_id_map_file=None): 45 | assert isinstance(topk, (int, )) 46 | self.class_id_map = parse_class_id_map(class_id_map_file) 47 | self.topk = topk 48 | 49 | def __call__(self, x, output_keys): 50 | y = [] 51 | for idx, probs in enumerate(x): 52 | index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") 53 | clas_id_list = [] 54 | score_list = [] 55 | label_name_list = [] 56 | for i in index: 57 | clas_id_list.append(i.item()) 58 | score_list.append(probs[i].item()) 59 | if self.class_id_map is not None: 60 | label_name_list.append(self.class_id_map[i.item()]) 61 | 62 | label_name = label_name_list 63 | 64 | result = { 65 | output_keys[0]: clas_id_list, 66 | output_keys[1]: np.around(score_list, decimals=5).tolist(), 67 | output_keys[2]: label_name 68 | } 69 | y.append(result) 70 | return y 71 | 72 | 73 | class ThreshOutput(object): 74 | 75 | def __init__(self, 76 | threshold, 77 | default_label_index=0, 78 | class_id_map_file=None): 79 | self.threshold = threshold 80 | self.default_label_index = default_label_index 81 | self.class_id_map = parse_class_id_map(class_id_map_file) 82 | 83 | def __call__(self, x, output_keys): 84 | y = [] 85 | for idx, probs in enumerate(x): 86 | index = probs.argsort(axis=0)[::-1].astype("int32") 87 | top1_id = index[0] 88 | top1_score = probs[top1_id] 89 | 90 | if top1_score > self.threshold: 91 | rtn_id = top1_id 92 | else: 93 | rtn_id = self.default_label_index 94 | 95 | label_name = self.class_id_map[ 96 | rtn_id] if self.class_id_map is not None else "" 97 | 98 | result = { 99 | output_keys[0]: rtn_id, 100 | output_keys[1]: probs[rtn_id], 101 | output_keys[2]: label_name 102 | } 103 | y.append(result) 104 | return y 105 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import os 17 | import numpy as np 18 | import math 19 | import paddle 20 | 21 | import importlib 22 | 23 | from .inference import DetectionOp 24 | 25 | __all__ = ['DetectionOp'] 26 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/detection/postprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | 19 | class ParserDetResults(object): 20 | def __init__(self, label_list, threshold=0.5, max_det_results=100): 21 | self.threshold = threshold 22 | self.max_det_results = max_det_results 23 | self.clsid2catid, self.catid2name = self.get_categories(label_list) 24 | 25 | def get_categories(self, label_list): 26 | if label_list.endswith('json'): 27 | # lazy import pycocotools here 28 | from pycocotools.coco import COCO 29 | coco = COCO(label_list) 30 | cats = coco.loadCats(coco.getCatIds()) 31 | clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} 32 | catid2name = {cat['id']: cat['name'] for cat in cats} 33 | elif anno_file.endswith('txt'): 34 | cats = [] 35 | with open(anno_file) as f: 36 | for line in f.readlines(): 37 | cats.append(line.strip()) 38 | if cats[0] == 'background': cats = cats[1:] 39 | 40 | clsid2catid = {i: i for i in range(len(cats))} 41 | catid2name = {i: name for i, name in enumerate(cats)} 42 | 43 | else: 44 | raise ValueError("label_list {} should be json or txt.".format( 45 | anno_file)) 46 | return clsid2catid, catid2name 47 | 48 | def __call__(self, preds, bbox_num, output_keys): 49 | start_id = 0 50 | dt_bboxes = [] 51 | scores = [] 52 | class_ids = [] 53 | cls_names = [] 54 | new_bbox_num = [] 55 | 56 | for num in bbox_num: 57 | end_id = start_id + num 58 | pred = preds[start_id:end_id] 59 | start_id = end_id 60 | max_det_results = min(self.max_det_results, pred.shape[-1]) 61 | keep_indexes = pred[:, 1].argsort()[::-1][:max_det_results] 62 | 63 | select_num = 0 64 | for idx in keep_indexes: 65 | single_res = pred[idx].tolist() 66 | class_id = int(single_res[0]) 67 | score = single_res[1] 68 | bbox = single_res[2:] 69 | if score < self.threshold: 70 | continue 71 | if class_id == -1: 72 | continue 73 | select_num += 1 74 | dt_bboxes.append(bbox) 75 | scores.append(score) 76 | class_ids.append(class_id) 77 | cls_names.append(self.catid2name[self.clsid2catid[class_id]]) 78 | new_bbox_num.append(select_num) 79 | result = { 80 | output_keys[0]: dt_bboxes, 81 | output_keys[1]: scores, 82 | output_keys[2]: class_ids, 83 | output_keys[3]: cls_names, 84 | } 85 | new_bbox_num = np.array(new_bbox_num).astype('int32') 86 | return result, new_bbox_num 87 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import os 17 | import numpy as np 18 | import math 19 | import paddle 20 | 21 | import importlib 22 | 23 | from .inference import FeatureExtractionOp 24 | 25 | __all__ = ['FeatureExtractionOp'] 26 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/feature_extraction/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | from functools import reduce 17 | import os 18 | import numpy as np 19 | import math 20 | import paddle 21 | from ..base import ModelBaseOp 22 | 23 | from ppcv.ops.base import create_operators 24 | from ppcv.core.workspace import register 25 | 26 | from .preprocess import * 27 | from .postprocess import * 28 | 29 | 30 | @register 31 | class FeatureExtractionOp(ModelBaseOp): 32 | def __init__(self, model_cfg, env_cfg): 33 | super().__init__(model_cfg, env_cfg) 34 | mod = importlib.import_module(__name__) 35 | self.preprocessor = create_operators(model_cfg["PreProcess"], mod) 36 | self.postprocessor = create_operators(model_cfg["PostProcess"], mod) 37 | 38 | @classmethod 39 | def get_output_keys(cls): 40 | return ["feature"] 41 | 42 | def preprocess(self, inputs): 43 | outputs = inputs 44 | for ops in self.preprocessor: 45 | outputs = ops(outputs) 46 | return outputs 47 | 48 | def postprocess(self, inputs, result): 49 | outputs = result 50 | for idx, ops in enumerate(self.postprocessor): 51 | if idx == len(self.postprocessor) - 1: 52 | outputs = ops(outputs, self.output_keys) 53 | else: 54 | outputs = ops(outputs) 55 | return outputs 56 | 57 | def infer(self, image_list): 58 | inputs = [] 59 | batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) 60 | results = [] 61 | for i in range(batch_loop_cnt): 62 | start_index = i * self.batch_size 63 | end_index = min((i + 1) * self.batch_size, len(image_list)) 64 | batch_image_list = image_list[start_index:end_index] 65 | # preprocess 66 | inputs = [self.preprocess(img) for img in batch_image_list] 67 | inputs = np.concatenate(inputs, axis=0) 68 | # model inference 69 | result = self.predictor.run(inputs)[0] 70 | # postprocess 71 | result = self.postprocess(inputs, result) 72 | results.extend(result) 73 | # results = self.merge_batch_result(results) 74 | return results 75 | 76 | def __call__(self, inputs): 77 | """ 78 | step1: parser inputs 79 | step2: run 80 | step3: merge results 81 | input: a list of dict 82 | """ 83 | key = self.input_keys[0] 84 | is_list = False 85 | if isinstance(inputs[0][key], (list, tuple)): 86 | inputs = [input[key] for input in inputs] 87 | is_list = True 88 | else: 89 | inputs = [[input[key]] for input in inputs] 90 | sub_index_list = [len(input) for input in inputs] 91 | inputs = reduce(lambda x, y: x.extend(y) or x, inputs) 92 | 93 | # step2: run 94 | outputs = self.infer(inputs) 95 | 96 | # step3: merge 97 | curr_offsef_id = 0 98 | pipe_outputs = [] 99 | for idx in range(len(sub_index_list)): 100 | sub_start_idx = curr_offsef_id 101 | sub_end_idx = curr_offsef_id + sub_index_list[idx] 102 | output = outputs[sub_start_idx:sub_end_idx] 103 | output = {k: [o[k] for o in output] for k in output[0]} 104 | if is_list is not True: 105 | output = {k: output[k][0] for k in output} 106 | pipe_outputs.append(output) 107 | 108 | curr_offsef_id = sub_end_idx 109 | return pipe_outputs 110 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/feature_extraction/postprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | 18 | 19 | class NormalizeFeature(object): 20 | def __init__(self, normalize=True): 21 | super().__init__() 22 | self.normalize = normalize 23 | 24 | def __call__(self, x, output_keys): 25 | if self.normalize: 26 | feas_norm = np.sqrt(np.sum(np.square(x), axis=1, keepdims=True)) 27 | x = np.divide(x, feas_norm) 28 | 29 | y = [] 30 | for idx, feature in enumerate(x): 31 | result = {output_keys[0]: feature} 32 | y.append(result) 33 | return y 34 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/feature_extraction/preprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | 17 | from ..classification.preprocess import * -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/keypoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import os 17 | import numpy as np 18 | import math 19 | import paddle 20 | 21 | import importlib 22 | 23 | from .inference import KeypointOp 24 | 25 | __all__ = ['KeypointOp'] 26 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/ocr_crnn_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import os 17 | import numpy as np 18 | import math 19 | import paddle 20 | 21 | import importlib 22 | 23 | from .inference import OcrCrnnRecOp 24 | 25 | __all__ = ['OcrCrnnRecOp'] 26 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/ocr_crnn_recognition/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import importlib 17 | import os 18 | import numpy as np 19 | import math 20 | import paddle 21 | from ..base import ModelBaseOp 22 | 23 | from ppcv.ops.base import create_operators 24 | from ppcv.core.workspace import register 25 | 26 | from .preprocess import * 27 | from .postprocess import * 28 | 29 | 30 | @register 31 | class OcrCrnnRecOp(ModelBaseOp): 32 | def __init__(self, model_cfg, env_cfg): 33 | super(OcrCrnnRecOp, self).__init__(model_cfg, env_cfg) 34 | mod = importlib.import_module(__name__) 35 | self.preprocessor = create_operators(model_cfg["PreProcess"], mod) 36 | self.postprocessor = create_operators(model_cfg["PostProcess"], mod) 37 | self.batch_size = model_cfg["batch_size"] 38 | self.rec_image_shape = list(model_cfg["PreProcess"][0].values())[0][ 39 | "rec_image_shape"] 40 | 41 | @classmethod 42 | def get_output_keys(cls): 43 | return ["rec_text", "rec_score"] 44 | 45 | def preprocess(self, inputs, max_wh_ratio): 46 | outputs = inputs 47 | for ops in self.preprocessor: 48 | outputs = ops(outputs, max_wh_ratio) 49 | return outputs 50 | 51 | def postprocess(self, result): 52 | outputs = result 53 | for idx, ops in enumerate(self.postprocessor): 54 | if idx == len(self.postprocessor) - 1: 55 | outputs = ops(outputs, self.output_keys) 56 | else: 57 | outputs = ops(outputs) 58 | return outputs 59 | 60 | def infer(self, image_list): 61 | width_list = [float(img.shape[1]) / img.shape[0] for img in image_list] 62 | indices = np.argsort(np.array(width_list)) 63 | 64 | inputs = [] 65 | results = [None] * len(image_list) 66 | for beg_img_no in range(0, len(image_list), self.batch_size): 67 | end_img_no = min(len(image_list), beg_img_no + self.batch_size) 68 | imgC, imgH, imgW = self.rec_image_shape 69 | max_wh_ratio = imgW / imgH 70 | 71 | norm_img_batch = [] 72 | for ino in range(beg_img_no, end_img_no): 73 | h, w = image_list[indices[ino]].shape[0:2] 74 | wh_ratio = w * 1.0 / h 75 | max_wh_ratio = max(max_wh_ratio, wh_ratio) 76 | 77 | for ino in range(beg_img_no, end_img_no): 78 | norm_img = self.preprocess(image_list[indices[ino]], 79 | max_wh_ratio) 80 | norm_img = norm_img[np.newaxis, :] 81 | norm_img_batch.append(norm_img) 82 | 83 | norm_img_batch = np.concatenate(norm_img_batch, axis=0) 84 | 85 | # model inference 86 | result = self.predictor.run(norm_img_batch) 87 | # postprocess 88 | result = self.postprocess(result) 89 | 90 | for rno in range(len(result)): 91 | results[indices[beg_img_no + rno]] = result[rno] 92 | return results 93 | 94 | def __call__(self, inputs): 95 | """ 96 | step1: parser inputs 97 | step2: run 98 | step3: merge results 99 | input: a list of dict 100 | """ 101 | key = self.input_keys[0] 102 | is_list = False 103 | if isinstance(inputs[0][key], (list, tuple)): 104 | inputs = [input[key] for input in inputs] 105 | is_list = True 106 | else: 107 | inputs = [[input[key]] for input in inputs] 108 | sub_index_list = [len(input) for input in inputs] 109 | inputs = reduce(lambda x, y: x.extend(y) or x, inputs) 110 | 111 | # step2: run 112 | outputs = self.infer(inputs) 113 | # step3: merge 114 | curr_offsef_id = 0 115 | pipe_outputs = [] 116 | for idx in range(len(sub_index_list)): 117 | sub_start_idx = curr_offsef_id 118 | sub_end_idx = curr_offsef_id + sub_index_list[idx] 119 | output = outputs[sub_start_idx:sub_end_idx] 120 | output = {k: [o[k] for o in output] for k in output[0]} 121 | if is_list is not True: 122 | output = {k: output[k][0] for k in output} 123 | pipe_outputs.append(output) 124 | 125 | curr_offsef_id = sub_end_idx 126 | return pipe_outputs 127 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/ocr_crnn_recognition/preprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | from __future__ import unicode_literals 21 | 22 | import math 23 | import cv2 24 | import numpy as np 25 | from PIL import Image 26 | 27 | 28 | class ReisizeNormImg(object): 29 | def __init__(self, rec_image_shape=[3, 48, 320]): 30 | super().__init__() 31 | self.rec_image_shape = rec_image_shape 32 | 33 | def resize_norm_img(self, img, max_wh_ratio): 34 | imgC, imgH, imgW = self.rec_image_shape 35 | assert imgC == img.shape[2] 36 | imgW = int((imgH * max_wh_ratio)) 37 | 38 | h, w = img.shape[:2] 39 | ratio = w / float(h) 40 | if math.ceil(imgH * ratio) > imgW: 41 | resized_w = imgW 42 | else: 43 | resized_w = int(math.ceil(imgH * ratio)) 44 | resized_image = cv2.resize(img, (resized_w, imgH)) 45 | resized_image = resized_image.astype('float32') 46 | resized_image = resized_image.transpose((2, 0, 1)) / 255 47 | resized_image -= 0.5 48 | resized_image /= 0.5 49 | padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) 50 | padding_im[:, :, 0:resized_w] = resized_image 51 | return padding_im 52 | 53 | def __call__(self, img, max_wh_ratio): 54 | """ 55 | """ 56 | img = self.resize_norm_img(img, max_wh_ratio) 57 | return img 58 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/ocr_db_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import os 17 | import numpy as np 18 | import math 19 | import paddle 20 | 21 | import importlib 22 | 23 | from .inference import OcrDbDetOp 24 | 25 | __all__ = ['OcrDbDetOp'] 26 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/models/ocr_db_detection/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import reduce 16 | import importlib 17 | import os 18 | import numpy as np 19 | import math 20 | import paddle 21 | from ..base import ModelBaseOp 22 | 23 | from ppcv.ops.base import create_operators 24 | from ppcv.core.workspace import register 25 | 26 | from .preprocess import * 27 | from .postprocess import * 28 | 29 | 30 | @register 31 | class OcrDbDetOp(ModelBaseOp): 32 | def __init__(self, model_cfg, env_cfg): 33 | super(OcrDbDetOp, self).__init__(model_cfg, env_cfg) 34 | mod = importlib.import_module(__name__) 35 | self.preprocessor = create_operators(model_cfg["PreProcess"], mod) 36 | self.postprocessor = create_operators(model_cfg["PostProcess"], mod) 37 | self.batch_size = 1 38 | 39 | @classmethod 40 | def get_output_keys(cls): 41 | return ["dt_polys", "dt_scores"] 42 | 43 | def preprocess(self, inputs): 44 | outputs = inputs 45 | for ops in self.preprocessor: 46 | outputs = ops(outputs) 47 | return outputs 48 | 49 | def postprocess(self, result, shape_list): 50 | outputs = result 51 | for idx, ops in enumerate(self.postprocessor): 52 | if idx == len(self.postprocessor) - 1: 53 | outputs = ops(outputs, shape_list, self.output_keys) 54 | else: 55 | outputs = ops(outputs, shape_list) 56 | return outputs 57 | 58 | def infer(self, image_list): 59 | inputs = [] 60 | batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) 61 | results = [] 62 | for i in range(batch_loop_cnt): 63 | start_index = i * self.batch_size 64 | end_index = min((i + 1) * self.batch_size, len(image_list)) 65 | batch_image_list = image_list[start_index:end_index] 66 | # preprocess 67 | inputs, shape_list = self.preprocess({ 68 | "image": batch_image_list[0] 69 | }) 70 | shape_list = np.expand_dims(shape_list, axis=0) 71 | # model inference 72 | result = self.predictor.run(inputs)[0] 73 | # postprocess 74 | result = self.postprocess(result, shape_list) 75 | results.append(result) 76 | return results 77 | 78 | def __call__(self, inputs): 79 | """ 80 | step1: parser inputs 81 | step2: run 82 | step3: merge results 83 | input: a list of dict 84 | """ 85 | key = self.input_keys[0] 86 | is_list = False 87 | if isinstance(inputs[0][key], (list, tuple)): 88 | inputs = [input[key] for input in inputs] 89 | is_list = True 90 | else: 91 | inputs = [[input[key]] for input in inputs] 92 | sub_index_list = [len(input) for input in inputs] 93 | inputs = reduce(lambda x, y: x.extend(y) or x, inputs) 94 | 95 | # step2: run 96 | outputs = self.infer(inputs) 97 | 98 | # step3: merge 99 | curr_offsef_id = 0 100 | pipe_outputs = [] 101 | for idx in range(len(sub_index_list)): 102 | sub_start_idx = curr_offsef_id 103 | sub_end_idx = curr_offsef_id + sub_index_list[idx] 104 | output = outputs[sub_start_idx:sub_end_idx] 105 | output = {k: [o[k] for o in output] for k in output[0]} 106 | if is_list is not True: 107 | output = {k: output[k][0] for k in output} 108 | 109 | pipe_outputs.append(output) 110 | 111 | curr_offsef_id = sub_end_idx 112 | return pipe_outputs 113 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/output/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import OutputBaseOp 16 | from .classification import ClasOutput 17 | from .feature_extraction import FeatureOutput 18 | from .detection import DetOutput 19 | from .keypoint import KptOutput 20 | from .ocr_rec import OCRRecOutput 21 | 22 | __all__ = ['OutputBaseOp', 'ClasOutput', 'FeatureOutput', 'DetOutput', 'KptOutput', 'OCRRecOutput'] 23 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/output/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | import math 18 | import glob 19 | import paddle 20 | import cv2 21 | from collections import defaultdict 22 | 23 | from ppcv.ops.base import BaseOp 24 | 25 | 26 | class OutputBaseOp(BaseOp): 27 | def __init__(self, model_cfg, env_cfg): 28 | super(OutputBaseOp, self).__init__(model_cfg, env_cfg) 29 | self.output_dir = self.env_cfg.get('output_dir', 'output') 30 | self.save_img = self.env_cfg.get('save_img', False) 31 | self.save_res = self.env_cfg.get('save_res', False) 32 | self.return_res = self.env_cfg.get('return_res', False) 33 | self.print_res = self.env_cfg.get('print_res', False) 34 | 35 | @classmethod 36 | def type(self): 37 | return 'OUTPUT' 38 | 39 | def __call__(self, inputs): 40 | return 41 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/output/classification.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | import math 18 | import glob 19 | import paddle 20 | import cv2 21 | import json 22 | from collections import defaultdict 23 | from .base import OutputBaseOp 24 | from ppcv.utils.logger import setup_logger 25 | from ppcv.core.workspace import register 26 | 27 | logger = setup_logger('ClasOutput') 28 | 29 | 30 | @register 31 | class ClasOutput(OutputBaseOp): 32 | 33 | def __init__(self, model_cfg, env_cfg): 34 | super(ClasOutput, self).__init__(model_cfg, env_cfg) 35 | 36 | def __call__(self, inputs): 37 | total_res = [] 38 | for input in inputs: 39 | fn, image, class_ids, scores, label_names = input["fn"], input[ 40 | "image"], input["class_ids"], input["scores"], input[ 41 | "label_names"] 42 | res = dict(filename=fn, 43 | class_ids=class_ids, 44 | scores=scores, 45 | label_names=label_names) 46 | if self.frame_id != -1: 47 | res.update({'frame_id': frame_id}) 48 | if self.print_res: 49 | logger.info(res) 50 | if self.save_img: 51 | image = image[:, :, ::-1] 52 | file_name = os.path.split(fn)[-1] 53 | out_path = os.path.join(self.output_dir, file_name) 54 | logger.info('Save output image to {}'.format(out_path)) 55 | cv2.imwrite(out_path, image) 56 | if self.save_res or self.return_res: 57 | total_res.append(res) 58 | if self.save_res: 59 | res_file_name = 'clas_output.json' 60 | out_path = os.path.join(self.output_dir, res_file_name) 61 | logger.info('Save output result to {}'.format(out_path)) 62 | with open(out_path, 'w') as f: 63 | json.dump(total_res, f) 64 | if self.return_res: 65 | return total_res 66 | return 67 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/output/detection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | import math 18 | import glob 19 | import paddle 20 | import cv2 21 | import json 22 | from collections import defaultdict 23 | from .base import OutputBaseOp 24 | from ppcv.utils.logger import setup_logger 25 | from ppcv.core.workspace import register 26 | from PIL import Image, ImageDraw, ImageFile 27 | 28 | logger = setup_logger('DetOutput') 29 | 30 | 31 | def get_color_map_list(num_classes): 32 | """ 33 | Args: 34 | num_classes (int): number of class 35 | Returns: 36 | color_map (list): RGB color list 37 | """ 38 | color_map = num_classes * [0, 0, 0] 39 | for i in range(0, num_classes): 40 | j = 0 41 | lab = i 42 | while lab: 43 | color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) 44 | color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) 45 | color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) 46 | j += 1 47 | lab >>= 3 48 | color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] 49 | return color_map 50 | 51 | 52 | def draw_det(image, dt_bboxes, dt_scores, dt_cls_names): 53 | im = Image.fromarray(image[:, :, ::-1]) 54 | draw_thickness = min(im.size) // 320 55 | draw = ImageDraw.Draw(im) 56 | name_set = set(dt_cls_names) 57 | name2clsid = {name: i for i, name in enumerate(name_set)} 58 | clsid2color = {} 59 | color_list = get_color_map_list(len(name_set)) 60 | 61 | for box, score, name in zip(dt_bboxes, dt_scores, dt_cls_names): 62 | color = tuple(color_list[name2clsid[name]]) 63 | 64 | xmin, ymin, xmax, ymax = box 65 | # draw bbox 66 | draw.line( 67 | [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), 68 | (xmin, ymin)], 69 | width=draw_thickness, 70 | fill=color) 71 | 72 | # draw label 73 | text = "{} {:.4f}".format(name, score) 74 | box = draw.textbbox((xmin, ymin), text, anchor='lt') 75 | draw.rectangle(box, fill=color) 76 | draw.text((box[0], box[1]), text, fill=(255, 255, 255)) 77 | image = np.array(im) 78 | return image 79 | 80 | 81 | @register 82 | class DetOutput(OutputBaseOp): 83 | def __init__(self, model_cfg, env_cfg): 84 | super(DetOutput, self).__init__(model_cfg, env_cfg) 85 | 86 | def __call__(self, inputs): 87 | total_res = [] 88 | for input in inputs: 89 | fn, image, dt_bboxes, dt_scores, dt_cls_names = input 90 | res = dict( 91 | filename=fn, 92 | dt_bboxes=dt_bboxes, 93 | dt_scores=dt_scores, 94 | dt_cls_names=dt_cls_names) 95 | if self.frame_id != -1: 96 | res.update({'frame_id': frame_id}) 97 | logger.info(res) 98 | if self.save_img: 99 | image = image[:, :, ::-1] 100 | file_name = os.path.split(fn)[-1] 101 | out_path = os.path.join(self.output_dir, file_name) 102 | logger.info('Save output image to {}'.format(out_path)) 103 | cv2.imwrite(out_path, image) 104 | if self.save_res or self.return_res: 105 | total_res.append(res) 106 | if self.save_res: 107 | res_file_name = 'det_output.json' 108 | out_path = os.path.join(self.output_dir, res_file_name) 109 | logger.info('Save output result to {}'.format(out_path)) 110 | with open(out_path, 'w') as f: 111 | json.dump(total_res, f) 112 | if self.return_res: 113 | return total_res 114 | return 115 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/output/feature_extraction.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | import math 18 | import glob 19 | import paddle 20 | import cv2 21 | import json 22 | from collections import defaultdict 23 | from .base import OutputBaseOp 24 | from ppcv.utils.logger import setup_logger 25 | from ppcv.core.workspace import register 26 | 27 | logger = setup_logger('FeatureOutput') 28 | 29 | 30 | @register 31 | class FeatureOutput(OutputBaseOp): 32 | def __init__(self, model_cfg, env_cfg): 33 | super().__init__(model_cfg, env_cfg) 34 | 35 | def __call__(self, inputs): 36 | total_res = [] 37 | for input in inputs: 38 | fn, feature = input["fn"], input["feature"] 39 | res = dict(filename=fn, feature=feature) 40 | # TODO(gaotingquan) 41 | if self.frame_id != -1: 42 | res.update({'frame_id': frame_id}) 43 | if self.print_res: 44 | logger.info(res) 45 | if self.return_res: 46 | total_res.append(res) 47 | if self.return_res: 48 | return total_res 49 | return 50 | -------------------------------------------------------------------------------- /deploy/ppcv/ops/output/ocr_rec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | import math 18 | import glob 19 | import paddle 20 | import cv2 21 | import json 22 | from collections import defaultdict 23 | from .base import OutputBaseOp 24 | from ppcv.utils.logger import setup_logger 25 | from ppcv.core.workspace import register 26 | 27 | logger = setup_logger('OCRRecOutput') 28 | 29 | 30 | @register 31 | class OCRRecOutput(OutputBaseOp): 32 | 33 | def __init__(self, model_cfg, env_cfg): 34 | super(OCRRecOutput, self).__init__(model_cfg, env_cfg) 35 | 36 | def __call__(self, inputs): 37 | total_res = [] 38 | for input in inputs: 39 | fn, image, rec_text, rec_score = input["fn"], input[ 40 | "image"], input["rec_text"], input["rec_score"] 41 | res = dict(filename=fn, 42 | rec_text=rec_text, 43 | rec_score=rec_score) 44 | if self.frame_id != -1: 45 | res.update({'frame_id': frame_id}) 46 | if self.print_res: 47 | logger.info(res) 48 | if self.save_img: 49 | image = image[:, :, ::-1] 50 | file_name = os.path.split(fn)[-1] 51 | out_path = os.path.join(self.output_dir, file_name) 52 | logger.info('Save output image to {}'.format(out_path)) 53 | cv2.imwrite(out_path, image) 54 | if self.save_res or self.return_res: 55 | total_res.append(res) 56 | if self.save_res: 57 | res_file_name = 'ocr_rec_output.json' 58 | out_path = os.path.join(self.output_dir, res_file_name) 59 | logger.info('Save output result to {}'.format(out_path)) 60 | with open(out_path, 'w') as f: 61 | json.dump(total_res, f) 62 | if self.return_res: 63 | return total_res 64 | return 65 | -------------------------------------------------------------------------------- /deploy/ppcv/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deploy/ppcv/utils/helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import numpy as np 18 | import math 19 | import glob 20 | 21 | import ppcv 22 | from ppcv.ops import * 23 | from ppcv.core.workspace import get_global_op 24 | 25 | 26 | def get_output_keys(cfg=None): 27 | op_list = get_global_op() 28 | if cfg is None: 29 | output = dict() 30 | for name, op in op_list.items(): 31 | if op.type() != 'OUTPUT': 32 | keys = op.get_output_keys() 33 | output.update({name: keys}) 34 | else: 35 | output = {'input.image', 'input.video'} 36 | for op in cfg: 37 | op_arch = op_list[list(op.keys())[0]] 38 | op_cfg = list(op.values())[0] 39 | if op_arch.type() == 'OUTPUT': continue 40 | for out_name in op_arch.get_output_keys(): 41 | name = op_cfg['name'] + '.' + out_name 42 | output.add(name) 43 | return output 44 | 45 | 46 | def gen_input_name(input_keys, last_ops, output_keys): 47 | # generate input name according to input_keys and last_ops 48 | # the name format is {last_ops}.{input_key} 49 | input_name = list() 50 | for key in input_keys: 51 | found = False 52 | for op in last_ops: 53 | name = op + '.' + key 54 | if name in input_name: 55 | raise ValueError("Repeat input: {}".format(name)) 56 | if name in output_keys: 57 | input_name.append(name) 58 | found = True 59 | break 60 | if not found: 61 | raise ValueError( 62 | "Input: {} could not be found from the last ops: {}. The outputs of these last ops are {}". 63 | format(key, last_ops, output_keys)) 64 | return input_name 65 | -------------------------------------------------------------------------------- /deploy/ppcv/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import sys 18 | 19 | import paddle.distributed as dist 20 | 21 | __all__ = ['setup_logger'] 22 | 23 | logger_initialized = [] 24 | 25 | 26 | def setup_logger(name="ppcv", output=None): 27 | """ 28 | Initialize logger and set its verbosity level to INFO. 29 | Args: 30 | name (str): the root module name of this logger 31 | output (str): a file name or a directory to save log. If None, will not save log file. 32 | If ends with ".txt" or ".log", assumed to be a file name. 33 | Otherwise, logs will be saved to `output/log.txt`. 34 | 35 | Returns: 36 | logging.Logger: a logger 37 | """ 38 | logger = logging.getLogger(name) 39 | if name in logger_initialized: 40 | return logger 41 | 42 | logger.setLevel(logging.INFO) 43 | logger.propagate = False 44 | 45 | formatter = logging.Formatter( 46 | "[%(asctime)s] %(name)s %(levelname)s: %(message)s", 47 | datefmt="%m/%d %H:%M:%S") 48 | # stdout logging: master only 49 | local_rank = dist.get_rank() 50 | if local_rank == 0: 51 | ch = logging.StreamHandler(stream=sys.stdout) 52 | ch.setLevel(logging.DEBUG) 53 | ch.setFormatter(formatter) 54 | logger.addHandler(ch) 55 | 56 | # file logging: all workers 57 | if output is not None: 58 | if output.endswith(".txt") or output.endswith(".log"): 59 | filename = output 60 | else: 61 | filename = os.path.join(output, "log.txt") 62 | if local_rank > 0: 63 | filename = filename + ".rank{}".format(local_rank) 64 | os.makedirs(os.path.dirname(filename)) 65 | fh = logging.FileHandler(filename, mode='a') 66 | fh.setLevel(logging.DEBUG) 67 | fh.setFormatter(logging.Formatter()) 68 | logger.addHandler(fh) 69 | logger_initialized.append(name) 70 | return logger 71 | -------------------------------------------------------------------------------- /deploy/ppcv/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import os 17 | import ast 18 | import glob 19 | import yaml 20 | import copy 21 | import numpy as np 22 | 23 | 24 | class Times(object): 25 | def __init__(self): 26 | self.time = 0. 27 | # start time 28 | self.st = 0. 29 | # end time 30 | self.et = 0. 31 | 32 | def start(self): 33 | self.st = time.time() 34 | 35 | def end(self, repeats=1, accumulative=True): 36 | self.et = time.time() 37 | if accumulative: 38 | self.time += (self.et - self.st) / repeats 39 | else: 40 | self.time = (self.et - self.st) / repeats 41 | 42 | def reset(self): 43 | self.time = 0. 44 | self.st = 0. 45 | self.et = 0. 46 | 47 | def value(self): 48 | return round(self.time, 4) 49 | 50 | 51 | class PipeTimer(Times): 52 | def __init__(self, cfg): 53 | super(PipeTimer, self).__init__() 54 | self.total_time = Times() 55 | self.module_time = dict() 56 | for op in cfg: 57 | op_name = op.values()['name'] 58 | self.module_time.update({op_name: Times()}) 59 | 60 | self.img_num = 0 61 | 62 | def get_total_time(self): 63 | total_time = self.total_time.value() 64 | average_latency = total_time / max(1, self.img_num) 65 | qps = 0 66 | if total_time > 0: 67 | qps = 1 / average_latency 68 | return total_time, average_latency, qps 69 | 70 | def info(self): 71 | total_time, average_latency, qps = self.get_total_time() 72 | print("------------------ Inference Time Info ----------------------") 73 | print("total_time(ms): {}, img_num: {}".format(total_time * 1000, 74 | self.img_num)) 75 | 76 | for k, v in self.module_time.items(): 77 | v_time = round(v.value(), 4) 78 | if v_time > 0: 79 | print("{} time(ms): {}; per frame average time(ms): {}".format( 80 | k, v_time * 1000, v_time * 1000 / self.img_num)) 81 | print("average latency time(ms): {:.2f}, QPS: {:2f}".format( 82 | average_latency * 1000, qps)) 83 | return qps 84 | 85 | def report(self, average=False): 86 | dic = {} 87 | for m, time in self.module_time: 88 | dic[m] = round(time.value() / max(1, self.img_num), 89 | 4) if average else time.value() 90 | dic['total'] = round(self.total_time.value() / max(1, self.img_num), 91 | 4) if average else self.total_time.value() 92 | dic['img_num'] = self.img_num 93 | return dic 94 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import inspect 16 | import copy 17 | import paddle 18 | import numpy as np 19 | from paddle.io import DistributedBatchSampler, BatchSampler, DataLoader 20 | # from ppcls.utils import logger 21 | 22 | from python.dataaug.data import preprocess 23 | from python.dataaug.data.preprocess import transform 24 | 25 | 26 | def create_operators(params, class_num=None): 27 | """ 28 | create operators based on the config 29 | 30 | Args: 31 | params(list): a dict list, used to create some operators 32 | """ 33 | assert isinstance(params, list), ('operator config should be a list') 34 | ops = [] 35 | for operator in params: 36 | assert isinstance(operator, 37 | dict) and len(operator) == 1, "yaml format error" 38 | op_name = list(operator)[0] 39 | param = {} if operator[op_name] is None else operator[op_name] 40 | op_func = getattr(preprocess, op_name) 41 | if "class_num" in inspect.getfullargspec(op_func).args: 42 | param.update({"class_num": class_num}) 43 | op = op_func(**param) 44 | ops.append(op) 45 | 46 | return ops 47 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/imaug/text_image_aug/__init__.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .augment import tia_perspective, tia_distort, tia_stretch 16 | 17 | __all__ = ['tia_distort', 'tia_stretch', 'tia_perspective'] 18 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/imaug/text_image_aug/augment.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | This code is refer from: 16 | https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py 17 | """ 18 | 19 | import numpy as np 20 | from .warp_mls import WarpMLS 21 | 22 | 23 | def tia_distort(src, segment=4): 24 | img_h, img_w = src.shape[:2] 25 | 26 | cut = img_w // segment 27 | thresh = cut // 3 28 | 29 | src_pts = list() 30 | dst_pts = list() 31 | 32 | src_pts.append([0, 0]) 33 | src_pts.append([img_w, 0]) 34 | src_pts.append([img_w, img_h]) 35 | src_pts.append([0, img_h]) 36 | 37 | dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)]) 38 | dst_pts.append( 39 | [img_w - np.random.randint(thresh), 40 | np.random.randint(thresh)]) 41 | dst_pts.append( 42 | [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)]) 43 | dst_pts.append( 44 | [np.random.randint(thresh), img_h - np.random.randint(thresh)]) 45 | 46 | half_thresh = thresh * 0.5 47 | 48 | for cut_idx in np.arange(1, segment, 1): 49 | src_pts.append([cut * cut_idx, 0]) 50 | src_pts.append([cut * cut_idx, img_h]) 51 | dst_pts.append([ 52 | cut * cut_idx + np.random.randint(thresh) - half_thresh, 53 | np.random.randint(thresh) - half_thresh 54 | ]) 55 | dst_pts.append([ 56 | cut * cut_idx + np.random.randint(thresh) - half_thresh, 57 | img_h + np.random.randint(thresh) - half_thresh 58 | ]) 59 | 60 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 61 | dst = trans.generate() 62 | 63 | return dst 64 | 65 | 66 | def tia_stretch(src, segment=4): 67 | img_h, img_w = src.shape[:2] 68 | 69 | cut = img_w // segment 70 | thresh = cut * 4 // 5 71 | 72 | src_pts = list() 73 | dst_pts = list() 74 | 75 | src_pts.append([0, 0]) 76 | src_pts.append([img_w, 0]) 77 | src_pts.append([img_w, img_h]) 78 | src_pts.append([0, img_h]) 79 | 80 | dst_pts.append([0, 0]) 81 | dst_pts.append([img_w, 0]) 82 | dst_pts.append([img_w, img_h]) 83 | dst_pts.append([0, img_h]) 84 | 85 | half_thresh = thresh * 0.5 86 | 87 | for cut_idx in np.arange(1, segment, 1): 88 | move = np.random.randint(thresh) - half_thresh 89 | src_pts.append([cut * cut_idx, 0]) 90 | src_pts.append([cut * cut_idx, img_h]) 91 | dst_pts.append([cut * cut_idx + move, 0]) 92 | dst_pts.append([cut * cut_idx + move, img_h]) 93 | 94 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 95 | dst = trans.generate() 96 | 97 | return dst 98 | 99 | 100 | def tia_perspective(src): 101 | img_h, img_w = src.shape[:2] 102 | 103 | thresh = img_h // 2 104 | 105 | src_pts = list() 106 | dst_pts = list() 107 | 108 | src_pts.append([0, 0]) 109 | src_pts.append([img_w, 0]) 110 | src_pts.append([img_w, img_h]) 111 | src_pts.append([0, img_h]) 112 | 113 | dst_pts.append([0, np.random.randint(thresh)]) 114 | dst_pts.append([img_w, np.random.randint(thresh)]) 115 | dst_pts.append([img_w, img_h - np.random.randint(thresh)]) 116 | dst_pts.append([0, img_h - np.random.randint(thresh)]) 117 | 118 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 119 | dst = trans.generate() 120 | 121 | return dst 122 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/postprocess/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import copy 15 | import importlib 16 | 17 | from . import topk, threshoutput 18 | 19 | from .topk import Topk, MultiLabelTopk 20 | from .threshoutput import ThreshOutput 21 | from .attr_rec import VehicleAttribute, PersonAttribute 22 | 23 | 24 | def build_postprocess(config): 25 | config = copy.deepcopy(config) 26 | model_name = config.pop("name") 27 | mod = importlib.import_module(__name__) 28 | postprocess_func = getattr(mod, model_name)(**config) 29 | return postprocess_func 30 | 31 | 32 | class DistillationPostProcess(object): 33 | 34 | def __init__(self, model_name="Student", key=None, func="Topk", **kargs): 35 | super().__init__() 36 | self.func = eval(func)(**kargs) 37 | self.model_name = model_name 38 | self.key = key 39 | 40 | def __call__(self, x, file_names=None): 41 | x = x[self.model_name] 42 | if self.key is not None: 43 | x = x[self.key] 44 | return self.func(x, file_names=file_names) 45 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/postprocess/threshoutput.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle.nn.functional as F 16 | 17 | 18 | class ThreshOutput(object): 19 | 20 | def __init__(self, threshold, label_0="0", label_1="1"): 21 | self.threshold = threshold 22 | self.label_0 = label_0 23 | self.label_1 = label_1 24 | 25 | def __call__(self, x, file_names=None): 26 | y = [] 27 | x = F.softmax(x, axis=-1).numpy() 28 | for idx, probs in enumerate(x): 29 | score = probs[1] 30 | if score < self.threshold: 31 | result = { 32 | "class_ids": [0], 33 | "scores": [1 - score], 34 | "label_names": [self.label_0] 35 | } 36 | else: 37 | result = { 38 | "class_ids": [1], 39 | "scores": [score], 40 | "label_names": [self.label_1] 41 | } 42 | if file_names is not None: 43 | result["file_name"] = file_names[idx] 44 | y.append(result) 45 | return y 46 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/postprocess/topk.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import numpy as np 17 | import paddle 18 | import paddle.nn.functional as F 19 | 20 | 21 | class Topk(object): 22 | 23 | def __init__(self, topk=1, class_id_map_file=None, delimiter=None): 24 | assert isinstance(topk, (int, )) 25 | self.topk = topk 26 | self.delimiter = delimiter if delimiter is not None else " " 27 | self.class_id_map = self.parse_class_id_map(class_id_map_file) 28 | 29 | def parse_class_id_map(self, class_id_map_file): 30 | if class_id_map_file is None: 31 | return None 32 | if not os.path.exists(class_id_map_file): 33 | print( 34 | "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" 35 | ) 36 | return None 37 | 38 | try: 39 | class_id_map = {} 40 | with open(class_id_map_file, "r") as fin: 41 | lines = fin.readlines() 42 | for line in lines: 43 | partition = line.split("\n")[0].partition(self.delimiter) 44 | class_id_map[int(partition[0])] = str(partition[-1]) 45 | except Exception as ex: 46 | print(ex) 47 | class_id_map = None 48 | return class_id_map 49 | 50 | def __call__(self, x, file_names=None, multilabel=False): 51 | if isinstance(x, dict): 52 | x = x['logits'] 53 | assert isinstance(x, paddle.Tensor) 54 | if file_names is not None: 55 | assert x.shape[0] == len(file_names) 56 | x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) 57 | x = x.numpy() 58 | y = [] 59 | for idx, probs in enumerate(x): 60 | index = probs.argsort(axis=0)[-self.topk:][::-1].astype( 61 | "int32") if not multilabel else np.where( 62 | probs >= 0.5)[0].astype("int32") 63 | clas_id_list = [] 64 | score_list = [] 65 | label_name_list = [] 66 | for i in index: 67 | clas_id_list.append(i.item()) 68 | score_list.append(probs[i].item()) 69 | if self.class_id_map is not None: 70 | label_name_list.append(self.class_id_map[i.item()]) 71 | result = { 72 | "class_ids": clas_id_list, 73 | "scores": np.around(score_list, decimals=5).tolist(), 74 | } 75 | if file_names is not None: 76 | result["file_name"] = file_names[idx] 77 | if label_name_list is not None: 78 | result["label_names"] = label_name_list 79 | y.append(result) 80 | return y 81 | 82 | 83 | class MultiLabelTopk(Topk): 84 | 85 | def __init__(self, topk=1, class_id_map_file=None): 86 | super().__init__() 87 | 88 | def __call__(self, x, file_names=None): 89 | return super().__call__(x, file_names, multilabel=True) 90 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from python.dataaug.data.preprocess.ops.autoaugment import ImageNetPolicy as RawImageNetPolicy 16 | from python.dataaug.data.preprocess.ops.randaugment import RandAugment as RawRandAugment 17 | from python.dataaug.data.preprocess.ops.timm_autoaugment import RawTimmAutoAugment 18 | from python.dataaug.data.preprocess.ops.cutout import Cutout 19 | 20 | from python.dataaug.data.preprocess.ops.hide_and_seek import HideAndSeek 21 | from python.dataaug.data.preprocess.ops.random_erasing import RandomErasing 22 | from python.dataaug.data.preprocess.ops.grid import GridMask 23 | 24 | from python.dataaug.data.preprocess.ops.operators import DecodeImage 25 | from python.dataaug.data.preprocess.ops.operators import ResizeImage 26 | from python.dataaug.data.preprocess.ops.operators import CropImage 27 | from python.dataaug.data.preprocess.ops.operators import RandCropImage 28 | from python.dataaug.data.preprocess.ops.operators import RandCropImageV2 29 | from python.dataaug.data.preprocess.ops.operators import RandFlipImage 30 | from python.dataaug.data.preprocess.ops.operators import NormalizeImage 31 | from python.dataaug.data.preprocess.ops.operators import ToCHWImage 32 | from python.dataaug.data.preprocess.ops.operators import AugMix 33 | from python.dataaug.data.preprocess.ops.operators import Pad 34 | from python.dataaug.data.preprocess.ops.operators import ToTensor 35 | from python.dataaug.data.preprocess.ops.operators import Normalize 36 | from python.dataaug.data.preprocess.ops.operators import RandomHorizontalFlip 37 | from python.dataaug.data.preprocess.ops.operators import CropWithPadding 38 | from python.dataaug.data.preprocess.ops.operators import RandomInterpolationAugment 39 | from python.dataaug.data.preprocess.ops.operators import ColorJitter 40 | from python.dataaug.data.preprocess.ops.operators import RandomCropImage 41 | from python.dataaug.data.preprocess.ops.operators import Padv2 42 | 43 | import numpy as np 44 | from PIL import Image 45 | import random 46 | 47 | 48 | def transform(data, ops=[]): 49 | """ transform """ 50 | for op in ops: 51 | data = op(data) 52 | return data 53 | 54 | 55 | class AutoAugment(RawImageNetPolicy): 56 | """ ImageNetPolicy wrapper to auto fit different img types """ 57 | 58 | def __init__(self, *args, **kwargs): 59 | super().__init__(*args, **kwargs) 60 | 61 | def __call__(self, img): 62 | if not isinstance(img, Image.Image): 63 | img = np.ascontiguousarray(img) 64 | img = Image.fromarray(img) 65 | 66 | img = super().__call__(img) 67 | 68 | if isinstance(img, Image.Image): 69 | img = np.asarray(img) 70 | 71 | return img 72 | 73 | 74 | class RandAugment(RawRandAugment): 75 | """ RandAugment wrapper to auto fit different img types """ 76 | 77 | def __init__(self, *args, **kwargs): 78 | super().__init__(*args, **kwargs) 79 | 80 | def __call__(self, img): 81 | if not isinstance(img, Image.Image): 82 | img = np.ascontiguousarray(img) 83 | img = Image.fromarray(img) 84 | 85 | img = super().__call__(img) 86 | 87 | if isinstance(img, Image.Image): 88 | img = np.asarray(img) 89 | 90 | return img 91 | 92 | 93 | class TimmAutoAugment(RawTimmAutoAugment): 94 | """ TimmAutoAugment wrapper to auto fit different img tyeps. """ 95 | 96 | def __init__(self, prob=1.0, *args, **kwargs): 97 | super().__init__(*args, **kwargs) 98 | self.prob = prob 99 | 100 | def __call__(self, img): 101 | if not isinstance(img, Image.Image): 102 | img = np.ascontiguousarray(img) 103 | img = Image.fromarray(img) 104 | if random.random() < self.prob: 105 | img = super().__call__(img) 106 | if isinstance(img, Image.Image): 107 | img = np.asarray(img) 108 | 109 | return img 110 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/preprocess/batch_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/deploy/python/dataaug/data/preprocess/batch_ops/__init__.py -------------------------------------------------------------------------------- /deploy/python/dataaug/data/preprocess/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/deploy/python/dataaug/data/preprocess/ops/__init__.py -------------------------------------------------------------------------------- /deploy/python/dataaug/data/preprocess/ops/cutout.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This code is based on https://github.com/uoguelph-mlrg/Cutout 16 | # reference: https://arxiv.org/abs/1708.04552 17 | 18 | import numpy as np 19 | import random 20 | 21 | 22 | class Cutout(object): 23 | 24 | def __init__(self, n_holes=1, length=112): 25 | self.n_holes = n_holes 26 | self.length = length 27 | 28 | def __call__(self, img): 29 | """ cutout_image """ 30 | h, w = img.shape[:2] 31 | mask = np.ones((h, w), np.float32) 32 | 33 | for n in range(self.n_holes): 34 | y = np.random.randint(h) 35 | x = np.random.randint(w) 36 | 37 | y1 = np.clip(y - self.length // 2, 0, h) 38 | y2 = np.clip(y + self.length // 2, 0, h) 39 | x1 = np.clip(x - self.length // 2, 0, w) 40 | x2 = np.clip(x + self.length // 2, 0, w) 41 | 42 | img[y1:y2, x1:x2] = 0 43 | return img 44 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/preprocess/ops/functional.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # encoding: utf-8 16 | 17 | import numpy as np 18 | from PIL import Image, ImageOps, ImageEnhance 19 | 20 | 21 | def int_parameter(level, maxval): 22 | """Helper function to scale `val` between 0 and maxval . 23 | Args: 24 | level: Level of the operation that will be between [0, `PARAMETER_MAX`]. 25 | maxval: Maximum value that the operation can have. This will be scaled to 26 | level/PARAMETER_MAX. 27 | Returns: 28 | An int that results from scaling `maxval` according to `level`. 29 | """ 30 | return int(level * maxval / 10) 31 | 32 | 33 | def float_parameter(level, maxval): 34 | """Helper function to scale `val` between 0 and maxval. 35 | Args: 36 | level: Level of the operation that will be between [0, `PARAMETER_MAX`]. 37 | maxval: Maximum value that the operation can have. This will be scaled to 38 | level/PARAMETER_MAX. 39 | Returns: 40 | A float that results from scaling `maxval` according to `level`. 41 | """ 42 | return float(level) * maxval / 10. 43 | 44 | 45 | def sample_level(n): 46 | return np.random.uniform(low=0.1, high=n) 47 | 48 | 49 | def autocontrast(pil_img, *args): 50 | return ImageOps.autocontrast(pil_img) 51 | 52 | 53 | def equalize(pil_img, *args): 54 | return ImageOps.equalize(pil_img) 55 | 56 | 57 | def posterize(pil_img, level, *args): 58 | level = int_parameter(sample_level(level), 4) 59 | return ImageOps.posterize(pil_img, 4 - level) 60 | 61 | 62 | def rotate(pil_img, level, *args): 63 | degrees = int_parameter(sample_level(level), 30) 64 | if np.random.uniform() > 0.5: 65 | degrees = -degrees 66 | return pil_img.rotate(degrees, resample=Image.BILINEAR) 67 | 68 | 69 | def solarize(pil_img, level, *args): 70 | level = int_parameter(sample_level(level), 256) 71 | return ImageOps.solarize(pil_img, 256 - level) 72 | 73 | 74 | def shear_x(pil_img, level): 75 | level = float_parameter(sample_level(level), 0.3) 76 | if np.random.uniform() > 0.5: 77 | level = -level 78 | return pil_img.transform(pil_img.size, 79 | Image.AFFINE, (1, level, 0, 0, 1, 0), 80 | resample=Image.BILINEAR) 81 | 82 | 83 | def shear_y(pil_img, level): 84 | level = float_parameter(sample_level(level), 0.3) 85 | if np.random.uniform() > 0.5: 86 | level = -level 87 | return pil_img.transform(pil_img.size, 88 | Image.AFFINE, (1, 0, 0, level, 1, 0), 89 | resample=Image.BILINEAR) 90 | 91 | 92 | def translate_x(pil_img, level): 93 | level = int_parameter(sample_level(level), pil_img.size[0] / 3) 94 | if np.random.random() > 0.5: 95 | level = -level 96 | return pil_img.transform(pil_img.size, 97 | Image.AFFINE, (1, 0, level, 0, 1, 0), 98 | resample=Image.BILINEAR) 99 | 100 | 101 | def translate_y(pil_img, level): 102 | level = int_parameter(sample_level(level), pil_img.size[1] / 3) 103 | if np.random.random() > 0.5: 104 | level = -level 105 | return pil_img.transform(pil_img.size, 106 | Image.AFFINE, (1, 0, 0, 0, 1, level), 107 | resample=Image.BILINEAR) 108 | 109 | 110 | # operation that overlaps with ImageNet-C's test set 111 | def color(pil_img, level, *args): 112 | level = float_parameter(sample_level(level), 1.8) + 0.1 113 | return ImageEnhance.Color(pil_img).enhance(level) 114 | 115 | 116 | # operation that overlaps with ImageNet-C's test set 117 | def contrast(pil_img, level, *args): 118 | level = float_parameter(sample_level(level), 1.8) + 0.1 119 | return ImageEnhance.Contrast(pil_img).enhance(level) 120 | 121 | 122 | # operation that overlaps with ImageNet-C's test set 123 | def brightness(pil_img, level, *args): 124 | level = float_parameter(sample_level(level), 1.8) + 0.1 125 | return ImageEnhance.Brightness(pil_img).enhance(level) 126 | 127 | 128 | # operation that overlaps with ImageNet-C's test set 129 | def sharpness(pil_img, level, *args): 130 | level = float_parameter(sample_level(level), 1.8) + 0.1 131 | return ImageEnhance.Sharpness(pil_img).enhance(level) 132 | 133 | 134 | augmentations = [ 135 | autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y, 136 | translate_x, translate_y 137 | ] 138 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/preprocess/ops/hide_and_seek.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This code is based on https://github.com/kkanshul/Hide-and-Seek 16 | # reference: http://krsingh.cs.ucdavis.edu/krishna_files/papers/hide_and_seek/my_files/iccv2017.pdf 17 | 18 | import numpy as np 19 | import random 20 | 21 | 22 | class HideAndSeek(object): 23 | 24 | def __init__(self): 25 | # possible grid size, 0 means no hiding 26 | self.grid_sizes = [0, 16, 32, 44, 56] 27 | # hiding probability 28 | self.hide_prob = 0.5 29 | 30 | def __call__(self, img): 31 | # randomly choose one grid size 32 | grid_size = np.random.choice(self.grid_sizes) 33 | 34 | _, h, w = img.shape 35 | 36 | # hide the patches 37 | if grid_size == 0: 38 | return img 39 | for x in range(0, w, grid_size): 40 | for y in range(0, h, grid_size): 41 | x_end = min(w, x + grid_size) 42 | y_end = min(h, y + grid_size) 43 | if (random.random() <= self.hide_prob): 44 | img[:, x:x_end, y:y_end] = 0 45 | 46 | return img 47 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deploy/python/dataaug/data/utils/get_image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import argparse 17 | import base64 18 | import numpy as np 19 | 20 | 21 | def get_image_list(img_file): 22 | imgs_lists = [] 23 | if img_file is None or not os.path.exists(img_file): 24 | raise Exception("not found any img file in {}".format(img_file)) 25 | 26 | img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp'] 27 | if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end: 28 | imgs_lists.append(img_file) 29 | elif os.path.isdir(img_file): 30 | for single_file in os.listdir(img_file): 31 | if single_file.split('.')[-1] in img_end: 32 | imgs_lists.append(os.path.join(img_file, single_file)) 33 | if len(imgs_lists) == 0: 34 | raise Exception("not found any img file in {}".format(img_file)) 35 | imgs_lists = sorted(imgs_lists) 36 | return imgs_lists 37 | 38 | 39 | def get_image_list_from_label_file(image_path, label_file_path): 40 | imgs_lists = [] 41 | gt_labels = [] 42 | with open(label_file_path, "r") as fin: 43 | lines = fin.readlines() 44 | for line in lines: 45 | image_name, label = line.strip("\n").split() 46 | label = int(label) 47 | imgs_lists.append(os.path.join(image_path, image_name)) 48 | gt_labels.append(int(label)) 49 | return imgs_lists, gt_labels 50 | -------------------------------------------------------------------------------- /deploy/python/dataaug/predict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/deploy/python/dataaug/predict/__init__.py -------------------------------------------------------------------------------- /deploy/python/dataaug/predict/predict_rec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import cv2 17 | import numpy as np 18 | 19 | from utils import logger 20 | from utils import config 21 | from utils.predictor import Predictor 22 | from utils.get_image_list import get_image_list 23 | from .preprocess import create_operators 24 | from .postprocess import build_postprocess 25 | 26 | 27 | class RecPredictor(Predictor): 28 | 29 | def __init__(self, config): 30 | super().__init__(config["Global"], 31 | config["FeatureExtract"]["rec_inference_model_dir"]) 32 | self.preprocess_ops = create_operators( 33 | config["FeatureExtract"]["RecPreProcess"]["transform_ops"]) 34 | self.postprocess = build_postprocess( 35 | config["FeatureExtract"]["RecPostProcess"]) 36 | 37 | def predict(self, images, feature_normalize=True): 38 | input_names = self.predictor.get_input_names() 39 | input_tensor = self.predictor.get_input_handle(input_names[0]) 40 | 41 | output_names = self.predictor.get_output_names() 42 | output_tensor = self.predictor.get_output_handle(output_names[0]) 43 | 44 | if not isinstance(images, (list, )): 45 | images = [images] 46 | for idx in range(len(images)): 47 | for ops in self.preprocess_ops: 48 | images[idx] = ops(images[idx]) 49 | image = np.array(images) 50 | 51 | input_tensor.copy_from_cpu(image) 52 | self.predictor.run() 53 | batch_output = output_tensor.copy_to_cpu() 54 | 55 | if feature_normalize: 56 | feas_norm = np.sqrt( 57 | np.sum(np.square(batch_output), axis=1, keepdims=True)) 58 | batch_output = np.divide(batch_output, feas_norm) 59 | 60 | if self.postprocess is not None: 61 | batch_output = self.postprocess(batch_output) 62 | 63 | return batch_output 64 | 65 | 66 | def main(config): 67 | rec_predictor = RecPredictor(config) 68 | image_list = get_image_list(config["Global"]["infer_imgs"]) 69 | 70 | batch_imgs = [] 71 | batch_names = [] 72 | cnt = 0 73 | for idx, img_path in enumerate(image_list): 74 | img = cv2.imread(img_path) 75 | if img is None: 76 | logger.warning( 77 | "Image file failed to read and has been skipped. The path: {}". 78 | format(img_path)) 79 | else: 80 | img = img[:, :, ::-1] 81 | batch_imgs.append(img) 82 | img_name = os.path.basename(img_path) 83 | batch_names.append(img_name) 84 | cnt += 1 85 | 86 | if cnt % config["Global"]["batch_size"] == 0 or (idx + 87 | 1) == len(image_list): 88 | if len(batch_imgs) == 0: 89 | continue 90 | 91 | batch_results = rec_predictor.predict(batch_imgs) 92 | for number, result_dict in enumerate(batch_results): 93 | filename = batch_names[number] 94 | print("{}:\t{}".format(filename, result_dict)) 95 | batch_imgs = [] 96 | batch_names = [] 97 | 98 | return 99 | 100 | 101 | if __name__ == "__main__": 102 | args = config.parse_args() 103 | config = config.get_config(args.config, overrides=args.override, show=True) 104 | main(config) 105 | -------------------------------------------------------------------------------- /deploy/python/dataaug/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .math_utils import * 16 | from .renderer import * 17 | from .remaper import * 18 | from .liner import * 19 | from .data_utils import * 20 | -------------------------------------------------------------------------------- /deploy/python/dataaug/utils/get_image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import argparse 17 | import base64 18 | import numpy as np 19 | 20 | 21 | def get_image_list(img_file): 22 | imgs_lists = [] 23 | if img_file is None or not os.path.exists(img_file): 24 | raise Exception("not found any img file in {}".format(img_file)) 25 | 26 | img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp'] 27 | if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end: 28 | imgs_lists.append(img_file) 29 | elif os.path.isdir(img_file): 30 | for single_file in os.listdir(img_file): 31 | if single_file.split('.')[-1] in img_end: 32 | imgs_lists.append(os.path.join(img_file, single_file)) 33 | if len(imgs_lists) == 0: 34 | raise Exception("not found any img file in {}".format(img_file)) 35 | imgs_lists = sorted(imgs_lists) 36 | return imgs_lists 37 | 38 | 39 | def get_image_list_from_label_file(label_file_path, delimiter=' '): 40 | imgs_lists = [] 41 | gt_labels = [] 42 | with open(label_file_path, "r", encoding="utf-8") as fin: 43 | lines = fin.readlines() 44 | for line in lines: 45 | image_name, label = line.strip("\n").split(delimiter) 46 | imgs_lists.append(image_name) 47 | gt_labels.append(label) 48 | return imgs_lists, gt_labels 49 | -------------------------------------------------------------------------------- /deploy/python/dataaug/utils/logger.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import datetime 18 | 19 | logging.basicConfig(level=logging.INFO, 20 | format="%(asctime)s %(levelname)s: %(message)s", 21 | datefmt="%Y-%m-%d %H:%M:%S") 22 | 23 | 24 | def time_zone(sec, fmt): 25 | real_time = datetime.datetime.now() 26 | return real_time.timetuple() 27 | 28 | 29 | logging.Formatter.converter = time_zone 30 | _logger = logging.getLogger(__name__) 31 | 32 | Color = { 33 | 'RED': '\033[31m', 34 | 'HEADER': '\033[35m', # deep purple 35 | 'PURPLE': '\033[95m', # purple 36 | 'OKBLUE': '\033[94m', 37 | 'OKGREEN': '\033[92m', 38 | 'WARNING': '\033[93m', 39 | 'FAIL': '\033[91m', 40 | 'ENDC': '\033[0m' 41 | } 42 | 43 | 44 | def coloring(message, color="OKGREEN"): 45 | assert color in Color.keys() 46 | if os.environ.get('PADDLECLAS_COLORING', False): 47 | return Color[color] + str(message) + Color["ENDC"] 48 | else: 49 | return message 50 | 51 | 52 | def anti_fleet(log): 53 | """ 54 | logs will print multi-times when calling Fleet API. 55 | Only display single log and ignore the others. 56 | """ 57 | 58 | def wrapper(fmt, *args): 59 | if int(os.getenv("PADDLE_TRAINER_ID", 0)) == 0: 60 | log(fmt, *args) 61 | 62 | return wrapper 63 | 64 | 65 | @anti_fleet 66 | def info(fmt, *args): 67 | _logger.info(fmt, *args) 68 | 69 | 70 | @anti_fleet 71 | def warning(fmt, *args): 72 | _logger.warning(coloring(fmt, "RED"), *args) 73 | 74 | 75 | @anti_fleet 76 | def error(fmt, *args): 77 | _logger.error(coloring(fmt, "FAIL"), *args) 78 | 79 | 80 | def scaler(name, value, step, writer): 81 | """ 82 | This function will draw a scalar curve generated by the visualdl. 83 | Usage: Install visualdl: pip3 install visualdl==2.0.0b4 84 | and then: 85 | visualdl --logdir ./scalar --host 0.0.0.0 --port 8830 86 | to preview loss corve in real time. 87 | """ 88 | writer.add_scalar(tag=name, step=step, value=value) 89 | 90 | 91 | def advertise(): 92 | """ 93 | Show the advertising message like the following: 94 | 95 | =========================================================== 96 | == EasyData is powered by PaddlePaddle ! == 97 | =========================================================== 98 | == == 99 | == For more info please go to the following website. == 100 | == == 101 | == https://github.com/PaddlePaddle/EasyData == 102 | =========================================================== 103 | 104 | """ 105 | copyright = "EasyData is powered by PaddlePaddle !" 106 | ad = "For more info please go to the following website." 107 | website = "https://github.com/PaddlePaddle/EasyData" 108 | AD_LEN = 6 + len(max([copyright, ad, website], key=len)) 109 | 110 | info( 111 | coloring( 112 | "\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n".format( 113 | "=" * (AD_LEN + 4), 114 | "=={}==".format(copyright.center(AD_LEN)), 115 | "=" * (AD_LEN + 4), 116 | "=={}==".format(' ' * AD_LEN), 117 | "=={}==".format(ad.center(AD_LEN)), 118 | "=={}==".format(' ' * AD_LEN), 119 | "=={}==".format(website.center(AD_LEN)), 120 | "=" * (AD_LEN + 4), 121 | ), "RED")) 122 | -------------------------------------------------------------------------------- /deploy/python/dataaug/utils/noiser.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is refer from: 3 | https://github.com/Sanster/text_renderer/blob/master/textrenderer/noiser.py 4 | """ 5 | import numpy as np 6 | import cv2 7 | 8 | 9 | # https://stackoverflow.com/questions/22937589/how-to-add-noise-gaussian-salt-and-pepper-etc-to-image-in-python-with-opencv 10 | class Noiser(object): 11 | 12 | def __init__(self, cfg): 13 | self.cfg = cfg 14 | 15 | def apply(self, img): 16 | """ 17 | :param img: word image with big background 18 | """ 19 | 20 | p = [] 21 | funcs = [] 22 | if self.cfg['noise']['gauss']['enable']: 23 | p.append(self.cfg['noise']['gauss']['fraction']) 24 | funcs.append(self.apply_gauss_noise) 25 | 26 | if self.cfg['noise']['uniform']['enable']: 27 | p.append(self.cfg['noise']['uniform']['fraction']) 28 | funcs.append(self.apply_uniform_noise) 29 | 30 | if self.cfg['noise']['salt_pepper']['enable']: 31 | p.append(self.cfg['noise']['salt_pepper']['fraction']) 32 | funcs.append(self.apply_sp_noise) 33 | 34 | if self.cfg['noise']['poisson']['enable']: 35 | p.append(self.cfg['noise']['poisson']['fraction']) 36 | funcs.append(self.apply_poisson_noise) 37 | 38 | if len(p) == 0: 39 | return img 40 | 41 | noise_func = np.random.choice(funcs, p=p) 42 | 43 | return noise_func(img) 44 | 45 | def apply_gauss_noise(self, img): 46 | """ 47 | Gaussian-distributed additive noise. 48 | """ 49 | mean = 0 50 | stddev = np.sqrt(15) 51 | gauss_noise = np.zeros(img.shape) 52 | cv2.randn(gauss_noise, mean, stddev) 53 | out = img + gauss_noise 54 | 55 | return out 56 | 57 | def apply_uniform_noise(self, img): 58 | """ 59 | Apply zero-mean uniform noise 60 | """ 61 | imshape = img.shape 62 | alpha = 0.05 63 | gauss = np.random.uniform(0 - alpha, alpha, imshape) 64 | gauss = gauss.reshape(*imshape) 65 | out = img + img * gauss 66 | return out 67 | 68 | def apply_sp_noise(self, img): 69 | """ 70 | Salt and pepper noise. Replaces random pixels with 0 or 255. 71 | """ 72 | s_vs_p = 0.5 73 | amount = np.random.uniform(0.004, 0.01) 74 | out = np.copy(img) 75 | # Salt mode 76 | num_salt = np.ceil(amount * img.size * s_vs_p) 77 | coords = [ 78 | np.random.randint(0, i - 1, int(num_salt)) for i in img.shape 79 | ] 80 | out[coords] = 255. 81 | # Pepper mode 82 | num_pepper = np.ceil(amount * img.size * (1. - s_vs_p)) 83 | coords = [ 84 | np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape 85 | ] 86 | out[coords] = 0 87 | return out 88 | 89 | def apply_poisson_noise(self, img): 90 | """ 91 | Poisson-distributed noise generated from the data. 92 | """ 93 | vals = len(np.unique(img)) 94 | vals = 2**np.ceil(np.log2(vals)) 95 | 96 | if vals < 0: 97 | return img 98 | 99 | noisy = np.random.poisson(img * vals) / float(vals) 100 | return noisy 101 | -------------------------------------------------------------------------------- /deploy/python/dataaug/utils/remaper.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is refer from: 3 | https://github.com/Sanster/text_renderer/blob/master/textrenderer/remaper.py 4 | """ 5 | 6 | # coding=utf-8 7 | import random 8 | import cv2 9 | import numpy as np 10 | 11 | 12 | class Remaper(object): 13 | 14 | def __init__(self, cfg): 15 | self.cfg = cfg 16 | 17 | def apply(self, word_img, text_box_pnts, word_color): 18 | """ 19 | :param word_img: word image with big background 20 | :param text_box_pnts: left-top, right-top, right-bottom, left-bottom of text word 21 | :return: 22 | """ 23 | max_val = np.random.uniform(self.cfg['curve']['min'], 24 | self.cfg['curve']['max']) 25 | 26 | h = word_img.shape[0] 27 | w = word_img.shape[1] 28 | 29 | img_x = np.zeros((h, w), np.float32) 30 | img_y = np.zeros((h, w), np.float32) 31 | 32 | xmin = text_box_pnts[0][0] 33 | xmax = text_box_pnts[1][0] 34 | ymin = text_box_pnts[0][1] 35 | ymax = text_box_pnts[2][1] 36 | 37 | remap_y_min = ymin 38 | remap_y_max = ymax 39 | 40 | for y in range(h): 41 | for x in range(w): 42 | remaped_y = y + self._remap_y(x, max_val) 43 | 44 | if y == ymin: 45 | if remaped_y < remap_y_min: 46 | remap_y_min = remaped_y 47 | 48 | if y == ymax: 49 | if remaped_y > remap_y_max: 50 | remap_y_max = remaped_y 51 | 52 | # 某一个位置的 y 值应该为哪个位置的 y 值 53 | img_y[y, x] = remaped_y 54 | # 某一个位置的 x 值应该为哪个位置的 x 值 55 | img_x[y, x] = x 56 | 57 | remaped_text_box_pnts = [[xmin, remap_y_min], [xmax, remap_y_min], 58 | [xmax, remap_y_max], [xmin, remap_y_max]] 59 | 60 | # TODO: use cuda::remap 61 | dst = cv2.remap(word_img, img_x, img_y, cv2.INTER_CUBIC) 62 | return dst, remaped_text_box_pnts 63 | 64 | def _remap_y(self, x, max_val): 65 | return int(max_val * 66 | np.math.sin(2 * 3.14 * x / self.cfg['curve']['period'])) 67 | -------------------------------------------------------------------------------- /deploy/python/dataaug/utils/utility.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import sys 16 | 17 | parent = os.path.dirname(os.path.abspath(__file__)) 18 | sys.path.insert(0, os.path.abspath(os.path.join(parent, '../deploy/'))) 19 | 20 | from python.dataaug.utils import logger 21 | 22 | 23 | def get_label(data_file, delimiter=" "): 24 | all_label = {} 25 | with open(data_file, "r", encoding="utf-8") as f: 26 | for line in f.readlines(): 27 | path, label = line.strip().split(delimiter) 28 | path = path.split("/")[-1] 29 | all_label[path] = label 30 | return all_label 31 | 32 | 33 | def rm_repeat(all_label, save_list, compare_file, out_file, thresh, delimiter): 34 | count = 0 35 | with open(out_file, "w", encoding="utf-8") as new_aug_file: 36 | with open(compare_file, "r", encoding="utf-8") as f: 37 | for line in f.readlines(): 38 | query = line.strip().split("\t")[0] 39 | gallery = line.strip().split("\t")[1:-1] 40 | score = line.strip().split("\t")[-1] 41 | path = query.split("/")[-1] 42 | if float(score) > thresh and (gallery 43 | or query) not in save_list: 44 | count += 1 45 | save_list.append(gallery) 46 | save_list.append(query) 47 | new_aug_file.write(query + delimiter + 48 | str(all_label[path]) + "\n") 49 | elif float(score) < thresh: 50 | count += 1 51 | save_list.append(query) 52 | new_aug_file.write(query + delimiter + 53 | str(all_label[path]) + "\n") 54 | return count 55 | 56 | 57 | def check_dir(path): 58 | if len(os.path.dirname(path)) < 1: 59 | return 60 | if not os.path.exists(os.path.dirname(path)): 61 | os.makedirs(os.path.dirname(path)) 62 | return 63 | 64 | 65 | def concat_file(label_dir, all_file): 66 | filenames = os.listdir(label_dir) 67 | assert len(filenames) > 0, "Can not find any file in {}".format(label_dir) 68 | check_dir(all_file) 69 | f = open(all_file, 'w', encoding="utf-8") 70 | for filename in filenames: 71 | if os.path.isfile(os.path.join(label_dir, filename)): 72 | logger.info("{} will be merged to {}".format(filename, all_file)) 73 | filepath = label_dir + '/' + filename 74 | for line in open(filepath, encoding="utf-8"): 75 | if len(line) != 0: 76 | f.writelines(line) 77 | else: 78 | continue 79 | f.close() 80 | return all_file 81 | -------------------------------------------------------------------------------- /deploy/python/dataclean/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | from ppcv.engine.pipeline import Pipeline 18 | 19 | from utils.utils import load_yaml 20 | 21 | 22 | class DataClean(object): 23 | 24 | def __init__(self, args): 25 | self.input = os.path.abspath(args.input) 26 | self.model_list = self.build_pipeline(args) 27 | 28 | def build_pipeline(self, args): 29 | config = load_yaml(args.config) 30 | config.pop("DataClean") 31 | model_list = [] 32 | for model in config.keys(): 33 | pipeline_config_path = config[model] 34 | args.config = pipeline_config_path 35 | model_list.append(Pipeline(args)) 36 | return model_list 37 | 38 | def run(self): 39 | for model in self.model_list: 40 | model.run(self.input) 41 | -------------------------------------------------------------------------------- /deploy/python/dataclean/demos/paddleclas_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | import cv2 17 | from easydata import EasyData 18 | from paddleclas import PaddleClas 19 | 20 | 21 | def main(img_path): 22 | orientation_model = EasyData(model="image_orientation", 23 | device="cpu", 24 | return_res=True, 25 | print_res=False) 26 | clas_model = PaddleClas(model_name="PPLCNet_x0_25") 27 | 28 | orientation_result = orientation_model.predict(img_path) 29 | orientation_id = orientation_result[0]["class_ids"] 30 | 31 | img = cv2.imread(img_path)[:, :, ::-1] 32 | img = np.rot90(img, -1 * orientation_id) 33 | 34 | clas_result = clas_model.predict(img) 35 | print(next(clas_result)) 36 | 37 | 38 | if __name__ == "__main__": 39 | img_path = "./easydata_demo_imgs/image_orientation/1.jpg" 40 | main(img_path) 41 | -------------------------------------------------------------------------------- /deploy/python/dataclean/demos/paddleocr_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | import cv2 17 | from PIL import Image 18 | 19 | from easydata import EasyData 20 | from paddleocr import PaddleOCR, draw_ocr 21 | 22 | 23 | def main(img_path): 24 | orientation_model = EasyData(model="image_orientation", 25 | device="cpu", 26 | return_res=True, 27 | print_res=False) 28 | ocr = PaddleOCR(use_angle_cls=True, lang="ch") 29 | 30 | orientation_result = orientation_model.predict(img_path) 31 | orientation_id = orientation_result[0]["class_ids"] 32 | 33 | img = cv2.imread(img_path)[:, :, ::-1] 34 | img = np.rot90(img, -1 * orientation_id) 35 | 36 | result = ocr.ocr(img, cls=True)[0] 37 | for line in result: 38 | print(line) 39 | 40 | image = Image.open(img_path).convert('RGB') 41 | boxes = [line[0] for line in result] 42 | txts = [line[1][0] for line in result] 43 | scores = [line[1][1] for line in result] 44 | im_show = draw_ocr(image, boxes, txts, scores, font_path='./font.ttf') 45 | im_show = Image.fromarray(im_show) 46 | im_show.save('result.jpg') 47 | 48 | 49 | if __name__ == "__main__": 50 | img_path = "./easydata_demo_imgs/image_orientation/3.jpg" 51 | main(img_path) 52 | -------------------------------------------------------------------------------- /deploy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deploy/utils/label_map/clarity_assessment_label_list.txt: -------------------------------------------------------------------------------- 1 | 0 clarity 2 | 1 blured 3 | -------------------------------------------------------------------------------- /deploy/utils/label_map/code_exists_label_list.txt: -------------------------------------------------------------------------------- 1 | 0 no code 2 | 1 contains code 3 | -------------------------------------------------------------------------------- /deploy/utils/label_map/image_orientation_label_list.txt: -------------------------------------------------------------------------------- 1 | 0 0° 2 | 1 90° 3 | 2 180° 4 | 3 270° -------------------------------------------------------------------------------- /deploy/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import yaml 16 | 17 | 18 | def load_yaml(yaml_file): 19 | with open(yaml_file, 'r') as f: 20 | yaml_data = yaml.load(f, Loader=yaml.SafeLoader) 21 | return yaml_data 22 | -------------------------------------------------------------------------------- /docs/en/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/docs/en/.gitkeep -------------------------------------------------------------------------------- /docs/images/PP-DataAug/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/docs/images/PP-DataAug/.gitkeep -------------------------------------------------------------------------------- /docs/zh_CN/DataClean/DataClean.md: -------------------------------------------------------------------------------- 1 | # DataClean 2 | 3 | ------ 4 | 5 | EasyData 是基于飞桨开发的数据处理工具,旨在帮助视觉开发者在模型开发的过程中更好的处理数据,数据清洗工具(DataClean)是 EasyData 的子模块,其主要帮助开发者可以更好的提升数据质量或者筛选和过滤低质数据。数据清洗工具可以应用到部署时数据的预处理中,可以在增加很少推理时间的情况下大幅增加精度。也可以应用到训练数据、测试数据的筛选过滤中,结合相关的后处理,不仅可以进一步增加模型的精度,也可以增加相关产品的满意度。 6 | 7 | 作为可插拔的模块,DataClean 可以嵌到任何视觉任务中,其功能可视化如下: 8 | 9 |
10 | 11 |
12 | 13 | 目前,数据清洗模块包含图像方向矫正、模糊图像过滤、二维码图像过滤,相关的模型介绍及模型下载链接如下: 14 | 15 | | 类别 | 亮点 | 文档说明 | 模型下载 | 16 | | :--: | :--: | :------: | :------: | 17 | |图像方向矫正|自动矫正图像,大大提升多项视觉任务在旋转图像上精度|[文档](image_orientation_correction.md)|[下载链接](https://paddleclas.bj.bcebos.com/models/PULC/inference/image_orientation_infer.tar)| 18 | |模糊图像过滤|判断图像是否模糊,可以广泛应用于模糊图像过滤、视觉相关业务的前处理等|[文档](blured_image_filtering.md)|[下载链接](https://paddleclas.bj.bcebos.com/models/PULC/inference/clarity_assessment_infer.tar)| 19 | |二维码图像过滤|判断图像是否含有二维码、条形码、小程序码,可以广泛应用于二维码、条形码、小程序码过滤、审核等业务|[文档](code_image_filtering.md)|[下载链接](https://paddleclas.bj.bcebos.com/models/PULC/inference/code_exists_infer.tar)| 20 | -------------------------------------------------------------------------------- /docs/zh_CN/DataClean/blured_image_filtering.md: -------------------------------------------------------------------------------- 1 | # 模糊图像过滤 2 | 3 | ------ 4 | 5 | 6 | ## 目录 7 | 8 | - [1. 背景](#1) 9 | - [2. 模型](#2) 10 | - [2.1 训练](#2.1.1) 11 | - [2.2 部署](#2.1.3) 12 | - [3. 视觉任务表现](#3) 13 | - [3.1 验证集指标](#3.1) 14 | - [3.2 ImageNet 数据指标](#3.2) 15 | - [3.3 COCO 数据指标](#3.3) 16 | - [4. 总结](#4) 17 | 18 | 19 | 20 | ## 1. 背景 21 | 22 | 近些年计算机视觉领域迎来了蓬勃的发展,相关的算法也在各种业务上落地,产生了巨大的价值。在落地过程中,数据整理、模型训练、部署后处理等开发环节都至关重要,而在这些重要的环节中,数据的质量可谓是重中之重,其好坏直接决定了最终落地业务的性能。基于此,我们开源了数据清洗的工具,其可以在模型开发的任何阶段使用。在数据整理阶段,其可以通过这个工具筛选掉潜在的低质数据,在模型训练阶段,该工具可以成为数据预处理的一部分,在模型部署及后处理过程中,其可以通过后处理逻辑将低质图片过滤,间接提升模型精度,从而提升用户整体体验。本教程主要介绍数据清洗工具中模糊图像过滤的相关内容。该工具在视觉任务中的使用方式如下图所示: 23 | 24 |
25 | 26 |
27 | 28 | 为了使用户方便地使用该工具,我们尽可能提升了该工具的易用性,模型方面,我们使用了非常轻量的模型,在CPU上也可以很快的运行。在使用方面,我们提供了Whl包的使用方式,用户可以方便地集成到Python代码或者shell中。 29 | 30 | 31 | 32 | ## 2. 模型 33 | 34 | 在方案选型上,我们选择了图像分类的方案,其容易训练、部署,模型大小可以做到非常小,且效果也更容易保证。 35 | 36 | 37 | #### 2.1 训练 38 | 39 | 此处我们希望得到一个高精度且轻量级的二分类模型,所以我们采用了PaddleClas的PULC方案,该任务的训练详情可以参考[PULC模糊清晰图像分类](@clas_pulc)。 40 | 41 | 42 | #### 2.2 部署 43 | 44 | 我们可以很方便地使用EasyData的whl包进行部署,关于其部署方法,可以参考[模糊图像过滤部署](quick_start.md#212)。 45 | 46 | 47 | 48 | ## 3 视觉任务表现 49 | 50 | 51 | #### 3.1 验证集指标 52 | 53 | 在图像清晰模糊二分类训练中,我们的评估数据是来自[blur dataset](https://github.com/Kwentar/blur_dataset),其含有700张模糊图像,350张非模糊图像。该数据均来自真实场景拍摄,其中模糊图像有运动模糊和对焦模糊两种。我们对训练好的模型的评估,指标如下: 54 | 55 | | Accuracy | 模型存储 | 推理时间 | 56 | | :--: | :--: | :--: | 57 | | 95.3% | 7M | 2.1ms | 58 | 59 | 60 | #### 3.2 ImageNet 数据指标 61 | 62 | 我们将模糊过滤模型同样在ImageNet数据上进行了评测,我们将ImageNet val数据(50000条)和[3.1.1](#3.1.1)中的模糊数据(700条)混合起来一起评测。在真实使用场景中,往往在卡特定误报率(FPR)的情况下希望召回率(TPR)越高越好。在该例子中,我们希望ImageNet val的数据尽量被判定为清晰图片,在此基础上,尽可能将模糊图片召回。我们评测了不同阈值下的FPR和TPR,相关指标如下: 63 | 64 | | 阈值 | FPR| TPR | 65 | | :--: | :--: | :--: | 66 | | 0.5 | 29.2% | 96.7% | 67 | | 0.7 | 15.8% | 92.4% | 68 | | 0.8 | 10.1% | 85.4%| 69 | | 0.9 | 4.5% | 71.0% | 70 | | 0.95 | 1.8% | 51.5% | 71 | 72 | **备注:** 73 | 74 | - 该表格中的每一行的含义为在卡阈值的基础上,FPR和TPR的情况。如最后一行表示:将输出的score大于0.95判定为模糊图像,小于0.95判定为清晰图像,在此基础上,FPR为1.8%,即清晰的图像(ImageNet val)有1.8%被判定为了模糊的图像;TPR为51.5%,即模糊的图像(blur dataset中模糊图像)有51.5%被成功识别出。在具体的场景中,阈值需要根据实际情况调整。 75 | 76 | - 我们对ImageNet val的图片做了假设,默认其均为清晰图片,事实上会有部分模糊图片,所以在真实场景中,该TPR指标会更高。 77 | 78 | 79 | 80 | #### 3.3 COCO数据指标 81 | 82 | 我们将模糊过滤模型同样在COCO数据上进行了评测,我们将COCO val数据(5000条)和[3.1](#3.1)中的模糊数据(700条)混合起来一起评测。在真实使用场景中,往往在卡特定误报率(FPR)的情况下希望召回率(TPR)越高越好。在该例子中,我们希望COCO val的数据尽量被判定为清晰图片,在此基础上,尽可能将模糊图片召回。我们评测了不同阈值下的FPR和TPR,相关指标如下: 83 | 84 | | 阈值 | FPR| TPR | 85 | | :--: | :--: | :--: | 86 | | 0.5 | 20.1% | 96.7% | 87 | | 0.7 | 9.8% | 92.4% | 88 | | 0.8 | 5.6% | 85.4%| 89 | | 0.9 | 2.2% | 71.0% | 90 | | 0.95 | 0.80% | 51.5% | 91 | 92 | 93 | 94 | ## 4 总结 95 | 96 | 此文档介绍了EasyData数据清洗模块中模糊图像过滤相关的模型介绍、模型训练、模型使用方法以及模型的指标。该能力将会不断建设,敬请期待。 97 | -------------------------------------------------------------------------------- /docs/zh_CN/DataClean/code_image_filtering.md: -------------------------------------------------------------------------------- 1 | # 二维码图像过滤 2 | 3 | ------ 4 | 5 | 6 | ## 目录 7 | 8 | - [1. 背景](#1) 9 | - [2. 模型](#2) 10 | - [2.1 训练](#2.1) 11 | - [2.2 部署](#2.2) 12 | - [3. 视觉任务表现](#3) 13 | - [3.1 验证集指标](#3.1) 14 | - [3.2 ImageNet 数据指标](#3.2) 15 | - [3.3 COCO 数据指标](#3.3) 16 | - [4. 总结](#4) 17 | 18 | 19 | 20 | ## 1. 背景 21 | 22 | 近些年计算机视觉领域迎来了蓬勃的发展,相关的算法也在各种业务上落地,产生了巨大的价值。在落地过程中,数据整理、模型训练、部署后处理等开发环节都至关重要,而在这些重要的环节中,数据的质量可谓是重中之重,其好坏直接决定了最终落地业务的性能。基于此,我们开源了数据清洗的工具,其可以在模型开发的任何阶段使用。在数据整理阶段,其可以通过这个工具筛选掉潜在的低质数据,在模型训练阶段,该工具可以成为数据预处理的一部分,在模型部署及后处理过程中,其可以通过后处理逻辑将低质图片过滤,间接提升模型精度,从而提升用户整体体验。本教程主要介绍数据清洗工具中二维码图像过滤的相关内容。该工具在视觉任务中的使用方式如下图所示: 23 | 24 |
25 | 26 |
27 | 28 | 为了使用户方便地使用该工具,我们尽可能提升了该工具的易用性,模型方面,我们使用了非常轻量的模型,在CPU上也可以很快的运行。在使用方面,我们提供了Whl包的使用方式,用户可以方便地集成到Python代码或者shell中。 29 | 30 | 31 | 32 | ## 2. 模型 33 | 34 | 在方案选型上,我们选择了图像分类的方案,其容易训练、部署,模型大小可以做到非常小,且效果也更容易保证。 35 | 36 | 37 | #### 2.1 训练 38 | 39 | 此处我们希望得到一个高精度且轻量级的二分类模型,所以我们采用了PaddleClas的PULC方案,该任务的训练详情可以参考[PULC有无二维码图像分类](@clas_pulc)。 40 | 41 | 42 | #### 2.2 部署 43 | 44 | 我们可以很方便地使用 EasyData 的 whl 包进行部署,关于其部署方法,可以参考[二维码图像过滤部署](quick_start.md#232)。 45 | 46 | 47 | 48 | ## 3 视觉任务表现 49 | 50 | 51 | #### 3.1 验证集指标 52 | 53 | 在图像有无二维码训练中,我们的评估数据是来自真实采集的数据,其含有2249条含二维码的图像数据,2554条不含二维码的图像数据。其中,二维码包含二维码、条形码、小程序码。我们对训练好的模型的评估,指标如下: 54 | 55 | | Accuracy | 模型存储 | 推理时间 | 56 | | :--: | :--: | :--: | 57 | | 94.9% | 7M | 2.1ms | 58 | 59 | 60 | #### 3.2 ImageNet 数据指标 61 | 62 | 我们将二维码过滤模型同样在ImageNet数据上进行了评测,我们将ImageNet val数据(50000条)和[3.1](#3.1)中的二维码数据(2249条)混合起来一起评测。在真实使用场景中,往往在卡特定误报率(FPR)的情况下希望召回率(TPR)越高越好。在该例子中,我们希望ImageNet val的数据尽量被判定为非二维码图片,在此基础上,尽可能将二维码的图像召回。我们评测了不同阈值下的FPR和TPR,相关指标如下: 63 | 64 | 65 | | 阈值 | FPR| TPR | 66 | | :--: | :--: | :--: | 67 | | 0.5 | 2.3% | 92.7% | 68 | | 0.7 | 1.2% | 90.9% | 69 | | 0.8 | 0.84% | 90.0%| 70 | | 0.9 | 0.48% | 88.0% | 71 | | 0.95 | 0.31% | 86.1% | 72 | 73 | 74 | **备注:** 75 | 76 | - 该表格中的每一行的含义为在卡阈值的基础上,FPR和TPR的情况。如最后一行表示:将输出的score大于0.95判定为二维码图像,小于0.95判定为非二维码图像,在此基础上,FPR为0.31%,即非二维码的图像(ImageNet val)有0.31%被判定为了二维码的图像;TPR为86.1%,即二维码的图像有86.1%被成功识别出。在具体的场景中,阈值需要根据实际情况调整。 77 | 78 | - 我们对ImageNet val的图片做了假设,默认其均为非二维码图像,事实上会有部分二维码,所以在真实场景中,该TPR指标会更高。 79 | 80 | 81 | #### 3.3 COCO数据指标 82 | 83 | 我们将二维码过滤模型同样在COCO数据上进行了评测,我们将COCO val数据(5000条)和[3.1](#3.1)中的二维码数据(2249条)混合起来一起评测。在真实使用场景中,往往在卡特定误报率(FPR)的情况下希望召回率(TPR)越高越好。在该例子中,我们希望COCO val的数据尽量被判定为非二维码图片,在此基础上,尽可能将二维码图像召回。我们评测了不同阈值下的FPR和TPR,相关指标如下: 84 | 85 | 86 | | 阈值 | FPR| TPR | 87 | | :--: | :--: | :--: | 88 | | 0.5 | 2.7% | 92.7% | 89 | | 0.7 | 1.4% | 90.9% | 90 | | 0.8 | 1.0% | 90.0%| 91 | | 0.9 | 0.42% | 88.0% | 92 | | 0.95 | 0.2% | 86.1% | 93 | 94 | 95 | 96 | ## 4 总结 97 | 98 | 此文档介绍了 EasyData 数据质量提升模块中二维码图像过滤相关的模型介绍、模型训练、模型使用方法以及模型的指标。该能力将会不断建设,敬请期待。 99 | -------------------------------------------------------------------------------- /docs/zh_CN/DataClean/image_orientation_correction.md: -------------------------------------------------------------------------------- 1 | # 图像方向矫正 2 | 3 | ------ 4 | 5 | 6 | ## 目录 7 | 8 | 9 | - [1. 背景](#1) 10 | - [2. 模型](#2) 11 | - [2.1 训练](#2.1) 12 | - [2.2 部署](#2.2) 13 | - [3. 视觉任务表现](#3) 14 | - [3.1 图像分类](#3.1) 15 | - [3.2 目标检测](#3.2) 16 | - [3.3 OCR端到端识别](#3.3) 17 | 18 | 19 | ## 1. 背景 20 | 21 | 深度学习在计算机视觉领域已经取得了较大的发展,在视觉场景的诸多任务上已有广泛应用,我们针对模型部署中面临的数据质量问题,提出了 DataClean 解决方案。 22 | 23 | 深度学习为数据驱动,模型效果极大地依赖于训练数据,而通常情况下训练数据集是针对特定问题进行设计的,这样训练得到的模型通常针对预设问题具有较好效果,但是在真实的部署场景中往往存在大量的低质量数据,这些数据极大地影响了模型的预测效果,比如图像方向问题。目前大部分视觉任务数据集均预设图像方向为正,这一点在大多真实场景中无法确保,因此其他方向的数据在预测时往往得不到预期效果,如果强行将图像方向考虑到训练数据集设计中,则又会对模型预测效果带来负面影响,或是需要更大体量的模型影响推理速度,针对上述问题,DataClean 提供了图像方向矫正模型,该模型能够对图像方向进行分类预测,同时该模型极为轻量,对系统推理速度影响较小。图像方向矫正模型的使用示例如下图所示: 24 | 25 |
26 | 27 |
28 | 29 | 30 | ## 2. 模型介绍 31 | 32 | 在本小节我们会介绍图像方向矫正模型从训练到部署的全部流程。 33 | 34 | 35 | #### 2.1 训练 36 | 37 | 图像方向矫正模型本质为图像分类任务,同时考虑到模型需要足够轻量,因此我们采用了 PaddleClas 的 PULC 方案,训练详情可以参考[PULC图像方向分类](https://github.com/PaddlePaddle/PaddleClas/blob/develop/docs/zh_CN/models/PULC/PULC_image_orientation.md)。 38 | 39 | 40 | #### 2.2 部署 41 | 42 | 对于图像方向矫正模型的使用方法,我们提供了 EasyData whl 的方式,支持命令行直接预测以及在 Python 脚本中调用的方式,具体可以参考[图像方向矫正模型部署](quick_start.md#211)。 43 | 44 | 45 | ## 3. 视觉任务表现 46 | 47 | 图像方向矫正模型对下游任务改善显著,我们在图像分类、目标检测、OCR等任务上进行了评估,具体指标如下: 48 | 49 | 50 | ### 3.1 图像分类 51 | 52 | 对于图像分类任务,我们基于 ImageNet1k 数据集进行了评测,模型使用 [PP-LCNet_x1_0](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.5/docs/zh_CN/models/ImageNet1k/PP-LCNet.md),具体效果如下表所示: 53 | 54 | | 是否进行方向矫正 | 原始数据评测指标 | 多方向数据评测指标 | 55 | | :-- | :--: | :--: | 56 | | ✘ | 71.31% | 53.91% | 57 | | ✔(阈值0) | 50.89% | 70.22% | 58 | | ✔(阈值0.80) | 71.26% | 68.39% | 59 | | ✔(阈值0.90) | 71.31% | 66.16% | 60 | | ✔(阈值0.95) | 71.31% | 53.96% | 61 | 62 | 在上表中: 63 | * 其中指标为 Top-1 Acc; 64 | * 原始数据为 ImageNet1k 数据集,多方向数据为基于 ImageNet1k 数据进行方向扩充后的数据集,扩充方法为:对原始图像数据分别逆时针旋转0°、90°、180°、270°得到; 65 | * ✘ 表示不使用方向矫正模型,✔ 表示使用方向矫正模型并取不同的阈值; 66 | 67 | 68 | ### 3.2 目标检测 69 | 70 | 对于目标检测任务,我们基于 COCO 数据集进行了评测,模型使用 PPYOLOE+L,具体效果如下表所示: 71 | 72 | | 是否进行方向矫正 | 原始数据评测指标 | 多方向数据评测指标 | 73 | | :-- | :--: | :--: | 74 | | ✘ | 52.9% | 31.0% | 75 | | ✔(阈值0) | 52.4% | 52.1% | 76 | | ✔(阈值0.80) | 52.8% | 51.2% | 77 | | ✔(阈值0.90) | 52.9% | 49.6% | 78 | | ✔(阈值0.95) | 52.9% | 31.1% | 79 | 80 | 在上表中: 81 | * 其中指标为 AP0.5:0.95; 82 | * 原始数据为 COCO 数据集,多方向数据为基于 COCO 数据进行方向扩充后的数据集,扩充方法为:对原始图像数据分别逆时针旋转 0°、90°、180°、270°,同时修改对应的标签值; 83 | * ✘ 表示不使用方向矫正模型,✔ 表示使用方向矫正模型并取不同的阈值; 84 | 85 | 86 | ### 3.3 OCR 端到端识别 87 | 88 | 对于 OCR 任务,我们基于 PP-OCRv3 端到端文字检测识别系统进行了实验,具体效果如下表所示: 89 | 90 | | 是否进行方向矫正 | 原始数据评测指标 | 多方向数据评测指标 | 91 | | :-- | :--: | :--: | 92 | | ✘ | 61.89% | 35.89% | 93 | | ✔(阈值0) | 57.92% | 57.98% | 94 | | ✔(阈值0.80) | 61.41% | 56.49% | 95 | | ✔(阈值0.90) | 61.85% | 51.17% | 96 | | ✔(阈值0.95) | 61.89% | 42.19% | 97 | 98 | 在上表中: 99 | * 其中指标为 H-mean; 100 | * 原始数据为 PP-OCRv3 端到端评测数据集,多方向数据为基于原始数据集进行方向扩充后的数据集,扩充方法为:对原始图像数据分别逆时针旋转0°、90°、180°、270°,同时修改对应的标签值; 101 | * ✘ 表示不使用方向矫正模型,✔ 表示使用方向矫正模型并取不同的阈值; 102 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/docs/zh_CN/datasets/.gitkeep -------------------------------------------------------------------------------- /docs/zh_CN/datasets/Annotation_tool/EISeg.md: -------------------------------------------------------------------------------- 1 | 简体中文 | [English](README_EN.md) 2 |
3 | 4 |

5 | LOGO 6 |

7 | 8 | **An Efficient Interactive Segmentation Tool based on [PaddlePaddle](https://github.com/paddlepaddle/paddle).** 9 | 10 | [![Python 3.6](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/) [![PaddlePaddle 2.2](https://img.shields.io/badge/paddlepaddle-2.2-blue.svg)](https://www.python.org/downloads/release/python-360/) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![Downloads](https://pepy.tech/badge/eiseg)](https://pepy.tech/project/eiseg) 11 | 12 |
13 | 14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |
Generic segmentationHuman segmentationRS building segmentationMedical segmentation
Industrial quality inspectionGeneric video segmentation 3D medical segmentation
39 |
40 | 41 | ## 最新动态 42 | * [2022-07-20] :fire: EISeg 1.0版本发布! 43 | - 新增用于通用场景视频交互式分割能力,以EISeg交互式分割模型及[MiVOS](https://github.com/hkchengrex/MiVOS)算法为基础,全面提升视频标注体验。详情使用请参考[视频标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/video.md)。 44 | - 新增用于腹腔多器官及CT椎骨数据3D分割能力,并提供3D可视化工具,给予医疗领域3D标注新的思路。详情使用请参考[3D标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/video.md)。 45 | 46 | ## 简介 47 | 48 | EISeg(Efficient Interactive Segmentation)基于飞桨开发的一个高效智能的交互式分割标注软件。它涵盖了通用、人像、遥感、医疗、视频等不同方向的高质量交互式分割模型。 另外,将EISeg获取到的标注应用到PaddleSeg提供的其他分割模型进行训练,便可得到定制化场景的高精度模型,打通分割任务从数据标注到模型训练及预测的全流程。 49 | 50 | ![4a9ed-a91y1](https://user-images.githubusercontent.com/71769312/141130688-e1529c27-aba8-4bf7-aad8-dda49808c5c7.gif) 51 | 52 | ## 特性 53 | * 高效的半自动标注工具,已上线多个Top标注平台 54 | * 覆盖遥感、医疗、视频、3D医疗等众多垂类场景 55 | * 多平台兼容,简单易用,支持多类别标签管理 56 | 57 | 58 | ## 使用教程 59 | * [安装说明](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/install.md) 60 | * [图像标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/image.md) 61 | * [视频及3D医疗标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/video.md) 62 | * [遥感特色功能](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/remote_sensing.md) 63 | * [医疗特色功能](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/medical.md) 64 | * [数据处理脚本文档](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/tools.md) 65 | 66 | 67 | ## 更新历史 68 | - 2022.07.20 **1.0.0**:【1】新增交互式视频分割功能【2】新增腹腔多器官3D标注模型【3】新增CT椎骨3D标注模型。 69 | - 2022.04.10 **0.5.0**:【1】新增chest_xray模型;【2】新增MRSpineSeg模型;【3】新增铝板质检标注模型;【4】修复保存shp时可能坐标出错。 70 | - 2021.11.16 **0.4.0**:【1】将动态图预测转换成静态图预测,单次点击速度提升十倍;【2】新增遥感图像标注功能,支持多光谱数据通道的选择;【3】支持大尺幅数据的切片(多宫格)处理;【4】新增医疗图像标注功能,支持读取dicom的数据格式,支持选择窗宽和窗位。 71 | - 2021.09.16 **0.3.0**:【1】初步完成多边形编辑功能,支持对交互标注的结果进行编辑;【2】支持中/英界面;【3】支持保存为灰度/伪彩色标签和COCO格式;【4】界面拖动更加灵活;【5】标签栏可拖动,生成mask的覆盖顺序由上往下覆盖。 72 | - 2021.07.07 **0.2.0**:新增contrib:EISeg,可实现人像和通用图像的快速交互式标注。 73 | 74 | 75 | 76 | 77 | ## 贡献者 78 | 79 | - 感谢[Zhiliang Yu](https://github.com/yzl19940819), [Yizhou Chen](https://github.com/geoyee), [Lin Han](https://github.com/linhandev), [Jinrui Ding](https://github.com/Thudjr), [Yiakwy](https://github.com/yiakwy), [GT](https://github.com/GT-ZhangAcer), [Youssef Harby](https://github.com/Youssef-Harby), [Nick Nie](https://github.com/niecongchong) 等开发者及[RITM](https://github.com/saic-vul/ritm_interactive_segmentation)、[MiVOS](https://github.com/hkchengrex/MiVOS) 等算法支持。 80 | - 感谢[LabelMe](https://github.com/wkentaro/labelme)和[LabelImg](https://github.com/tzutalin/labelImg)的标签设计。 81 | - 感谢[Weibin Liao](https://github.com/MrBlankness)提供的ResNet50_DeeplabV3+预训练模型。 82 | - 感谢[Junjie Guo](https://github.com/Guojunjie08)及[Jiajun Feng](https://github.com/richarddddd198)在椎骨模型上提供的技术支持。 83 | 84 | 85 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/Annotation_tool/EIVideo.md: -------------------------------------------------------------------------------- 1 | # EIVideo - 交互式智能视频标注工具 2 | 3 | [![Downloads](https://static.pepy.tech/personalized-badge/eivideo?period=total&units=international_system&left_color=grey&right_color=orange&left_text=EIVideo%20User)](https://pepy.tech/project/eivideo) 4 | [![Downloads](https://static.pepy.tech/personalized-badge/qeivideo?period=total&units=international_system&left_color=grey&right_color=orange&left_text=QEIVideo%20User)](https://pepy.tech/project/qeivideo) 5 | ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/QPT-Family/EIVideo?include_prereleases) 6 | ![GitHub forks](https://img.shields.io/github/forks/QPT-Family/EIVideo) 7 | ![GitHub Repo stars](https://img.shields.io/github/stars/QPT-Family/EIVideo) 8 | ![GitHub](https://img.shields.io/github/license/QPT-Family/EIVideo) 9 | ![](https://img.shields.io/badge/%E6%B7%B1%E5%BA%A6%E9%80%82%E9%85%8D->Win7-9cf) 10 | 11 | --- 12 | 13 | 14 |
15 | 图片 16 |
17 | 18 | EIVideo,基于百度飞桨MA-Net交互式视频分割模型打造的交互式**智能视频**标注工具箱,只需简单标注几帧,即可完成全视频标注,若自动标注结果未达要求还可通过多次和视频交互而不断提升视频分割质量,直至对分割质量满意。 19 | 20 | 戳 -> 了解相关[技术文章&模型原理](等待微信公众号) 21 | 22 |
23 | 图片 24 |
25 | 26 | > 为了更好的解放双手,我们还提供了图形化界面工具QEIVideo,通过它我们可以不使用繁杂的命令方式来完成视频的智能标注工作。 27 | 28 | --- 29 | 30 | ### README目录 31 | 32 | - [EAP - The Early Access Program 早期访问计划](#eap---the-early-access-program-早期访问计划) 33 | - [使用方式](#使用方式) 34 | - [安装&运行](#安装运行) 35 | - [QPT包 - 适合无Python基础用户](#qpt包---适合无python基础用户) 36 | - [标准Python包 - 适合普通Python开发者](#标准python包---适合普通python开发者) 37 | - [开发版本 - 适合高阶开发者进行开发/社区贡献](#开发版本---适合高阶开发者进行开发社区贡献) 38 | - [(Q)EIVideo产品规划安排](#qeivideo产品规划安排) 39 | - [开源协议](#开源协议) 40 | 41 | --- 42 | 43 | ### EAP - The Early Access Program 早期访问计划 44 | 45 | > Warning 当前图形化界面QEIVideo处于**极其初阶**的...建设阶段,并不能保证程序稳定性。 46 | 47 |
图片
48 | 49 | 当您选择使用QEIVideo作为图形化界面时,即可视为同意使用“可能会存在大量体验不佳”的EAP产品。 50 | 51 | 同样,您可选择借助基于[PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) 实现的 52 | 交互式视频标注模型[EIVideo](https://github.com/QPT-Family/EIVideo/EIVideo) 进行二次开发,在此之上也可完成您需要的自定义图形化界面,后续也将提供二次开发指南。 53 | 54 |
图片
55 | 56 | 57 | > 如果您愿意参与到EIVideo或QEIVideo的建设中来,欢迎您与PMC取得联系 -> WX:GT_ZhangAcer 58 | 59 | ## 使用方式 60 | ### 安装&运行 61 | #### QPT包 - 适合无Python基础用户 62 | 自动化配置相关Python环境,但仅支持Windows7/10/11操作系统,且不对盗版Windows7做任何适配。 63 | 下载地址:暂未上传 64 | > 自动化部署工具由[QPT - 自动封装工具](https://github.com/QPT-Family/QPT) 支持 65 | 66 | #### 标准Python包 - 适合普通Python开发者 67 | * 国际方式: 68 | ```shell 69 | python -m pip install eivideo 70 | python qeivideo 71 | ``` 72 | * 国内推荐: 73 | ```shell 74 | python -m pip install eivideo -i https://mirrors.bfsu.edu.cn/pypi/web/simple 75 | python qeivideo 76 | ``` 77 | > 上述命令仅适用于常规情况,若您安装了多个Python或修改了相关开发工具与配置,请自行修改相关命令使其符合您的开发环境。 78 | 79 | #### 开发版本 - 适合高阶开发者进行开发/社区贡献 80 | 81 | * 国际方式: 82 | ```shell 83 | git clone https://github.com/QPT-Family/EIVideo.git 84 | python -m pip install -r requirements.txt 85 | ``` 86 | * 国内推荐: 87 | ```shell 88 | # 请勿用于Push!!! 89 | git clone https://hub.fastgit.org/QPT-Family/EIVideo.git 90 | python -m pip install -r requirements.txt -i https://mirrors.bfsu.edu.cn/pypi/web/simple 91 | ``` 92 | * 运行程序 93 | ```shell 94 | # 进入工作目录 95 | cd 此处填写EIVideo所在的目录的绝对路径,且该目录下拥有EIVideo与QEIVideo两文件夹。 96 | # 运行 97 | python QEIVideo/start.py 98 | 99 | # 如运行时无法找到对应包,可选择下述方式添加环境变量来调整索引次序后执行python 100 | # Windows 101 | set PYTHONPATH=$pwd:$PYTHONPATH 102 | # Linux 103 | export PYTHONPATH=$pwd:$PYTHONPATH 104 | ``` 105 | 106 | > 上述命令仅适用于常规情况,若您安装了多个Python或修改了相关开发工具与配置,请自行修改相关命令使其符合您的开发环境。 107 | 108 | ## (Q)EIVideo产品规划安排 109 | > 由于QEIVideo由飞桨开源社区学生爱好者构成,所以在项目的产出过程中将会以学习为主进行开源贡献,如您原因与我们一同建设,我们也将非常欢迎~ 110 |
图片
111 | 112 | - [x] EIVideo与Demo版QEIVideo发布0.1.0Alpha版本 113 | - [ ] 完善QEIVideo,丰富基础标注功能,于Q1升级至1.0Alpha版本 114 | - [ ] 回归QEIVideo稳定性,于Q2完成1.0正式版本发版 115 | - [ ] 增加视频目标检测、分类任务的交互式标注功能。 116 | 117 | ### 开源协议 118 | 本项目使用GNU LESSER GENERAL PUBLIC LICENSE(LGPL)开源协议。 119 | > 因所使用的模型与数据集等原因,本项目中任一代码、参数均不可直接进行商用,如需商用请与我们取得联系。 120 | 121 | ### 引用来源 122 | 1. EIVideo模型以及相关源码、论文与项目 - [PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) 123 | 2. 部分表情包来源 - [甘城なつき](https://www.pixiv.net/users/3036679) 124 | 125 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/Annotation_tool/PaddleLabel.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |

6 | LOGO 7 |

8 | 9 | **飞桨智能标注,让标注快人一步** 10 | 11 | [![Python](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/) ![PyPI](https://img.shields.io/pypi/v/paddlelabel?color=blue) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE) [![Start](https://img.shields.io/github/stars/PaddleCV-SIG/PaddleLabel?color=orange)]() [![Fork](https://img.shields.io/github/forks/PaddleCV-SIG/PaddleLabel?color=orange)]() ![PyPI - Downloads](https://img.shields.io/pypi/dm/paddlelabel?color=orange) [![OS](https://img.shields.io/badge/os-linux%2C%20windows%2C%20macos-green.svg)]() 12 | 13 |
14 | 15 | ## 最新动态 16 | 17 | - 【2022-08-18】 :fire: PaddleLabel 0.1 版本发布! 18 | - 【分类】支持单分类与多分类标注及标签的导入导出。简单灵活实现自定义数据集分类标注任务并导出供[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)进行训练。 19 | - 【检测】支持检测框标注及标签的导入导出。快速上手生成自己的检测数据集并应用到[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)。 20 | - 【分割】支持多边形、笔刷及交互式等多种标注方式,支持标注语义分割与实例分割两种场景。多种分割标注方式可灵活选择,方便将导出数据应用在[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)获取个性化定制模型。 21 | 22 | ## 简介 23 | 24 | PaddleLabel 是基于飞桨 PaddlePaddle 各个套件的特性提供的配套标注工具。它涵盖分类、检测、分割三种常见的计算机视觉任务的标注能力,具有手动标注和交互式标注相结合的能力。用户可以使用 PaddleLabel 方便快捷的标注自定义数据集并将导出数据用于飞桨提供的其他套件的训练预测流程中。 25 | 整个 PaddleLabel 包括三部分,本项目包含 PaddleLabel 的后端实现。 [PaddleLabel-Frontend](https://github.com/PaddleCV-SIG/PaddleLabel-Frontend)是基于 React 和 Ant Design 构建的 PaddleLabel 前端,[PaddleLabel-ML](https://github.com/PaddleCV-SIG/PaddleLabel-ML)是基于 PaddlePaddle 的自动和交互式标注的机器学习后端。 26 | 27 | ![demo720](https://user-images.githubusercontent.com/71769312/185099439-3230cf80-798d-4a81-bcae-b88bcb714daa.gif) 28 | 29 | ## 特性 30 | 31 | - **简单** 手动标注能力直观易操作,方便用户快速上手。 32 | - **高效** 支持交互式分割功能,分割精度及效率提升显著。 33 | - **灵活** 分类支持单分类和多分类的标注,分割支持多边形、笔刷及交互式分割等多种功能,方便用户根据场景需求切换标注方式。 34 | - **全流程** 与其他飞桨套件密切配合,方便用户完成数据标注、模型训练、模型导出等全流程操作。 35 | 36 | 37 | 38 | ## 使用教程 39 | 40 | **文档** 41 | 42 | - [安装指南](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/install.md) 43 | - [快速开始](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/quick_start.md) 44 | 45 | **进行标注** 46 | 47 | - [图像分类](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/classification.md) 48 | - [目标检测](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/detection.md) 49 | - [语义分割](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/semantic_segmentation.md) 50 | - [实例分割](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/instance_segmentation.md) 51 | 52 | **训练教程** 53 | 54 | - [如何用 PaddleClas 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdClas.md) 55 | - [如何用 PaddleDet 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdDet.md) 56 | - [如何使用 PaddleSeg 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdSeg.md) 57 | - [如何使用 PaddleX 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdX.md) 58 | 59 | **AI Studio 项目** 60 | 61 | - [花朵分类](https://aistudio.baidu.com/aistudio/projectdetail/4337003) 62 | - [道路标志检测](https://aistudio.baidu.com/aistudio/projectdetail/4349280) 63 | - [图像分割](https://aistudio.baidu.com/aistudio/projectdetail/4353528) 64 | - [如何使用 PaddleX 进行训练](https://aistudio.baidu.com/aistudio/projectdetail/4383953) 65 | 66 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/Annotation_tool/Speech.md: -------------------------------------------------------------------------------- 1 | 这里整理了常用的语音标注工具,欢迎各位小伙伴贡献工具! 2 | 3 | 4 | ## 1. Praat 5 | 6 | + 工具描述: 语音合成标注工具 7 | + 工具地址: https://www.fon.hum.uva.nl/praat/ 8 | + 示意图: 9 | 10 | ![](https://user-images.githubusercontent.com/30135920/197728536-14cc083b-6f7a-40dd-b66a-a8a9fe56924f.png) 11 | 12 | ## 2. label-studio 13 | 14 | + 工具描述:多功能标注工具,可以用于语音识别,说话人识别等多种语音标注任务 15 | + 工具地址:https://labelstud.io/guide/index.html 16 | + 示意图 17 | 18 | ![image](https://user-images.githubusercontent.com/30135920/198205186-f99026f9-32a9-4b17-8e9b-9af18c119f41.png) 19 | 20 | 21 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/Annotation_tool/doccano.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## Doccano 5 | - **下载地址**:https://github.com/doccano/doccano 6 | - **工具简介**:doccano是documment anotation的缩写,是一个开源的文本标注工具,我们可以用它为NLP任务的语料库进行打标。它支持情感分析,命名实体识别,文本摘要等任务: 7 |
8 | 9 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/datasets/3D.md: -------------------------------------------------------------------------------- 1 | ## 通用3D数据集 2 | 这里整理了常用3D方向数据集,持续更新中,欢迎各位小伙伴贡献数据集~ 3 | - [KITTI](#KITTI) 4 | - [nuScenes](#nuScenes) 5 | - [Waymo](#Waymo) 6 | 7 | 8 | 9 | ## 1、KITTI 10 | - **数据来源**:https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d 11 | - **数据简介**: KITTI数据集由卡尔斯鲁厄理工学院发布,用于评估自动驾驶场景3D目标检测等感知任务。数据采集真实道路场景,共包含3712个训练样本,3769个验证样本以及7518个测试样本。每个样本都包含lidar和camera两种模态的数据,train和validation都有标注数据,test没有标注数据。需要注意其标注的3d bbox坐标是在camera坐标系下进行的,且只标注了camera FOV内的目标,如下图: 12 |
13 | 14 | - **下载地址**:https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d 15 | 16 | 17 | 18 | ## 2、nuScenes 19 | - **数据来源**:https://www.nuscenes.org/nuscenesobj_benchmark=3d 20 | - **数据简介**: nuScenes数据集用于评估自动驾驶3D感知和规划任务,数据采集来自不同城市的1000个场景中,采集车上配备6个相机(CAM)、1个激光雷达(LIDAR)、5个毫米波雷达(RADAR)。采集的数据包括lidar和camera两种模态数据,共包含28k个训练样本,6k个验证样本,以及6k个测试样本,标注数据提供了物体的3d bbox坐标和类别信息: 21 |
22 | 23 | - **下载地址**:https://www.nuscenes.org/nuscenes 24 | - **其他说明**:nuScenes的camera数据是360度环视相机拍摄,部分相机的FOV具有重叠,环视数据可用于BEV任务。 25 | 26 | 27 | ## 3、Waymo 28 | - **数据来源**:https://waymo.com/open/data/perception/ 29 | - **数据简介**: Waymo是谷歌Waymo无人驾驶公司在2020年发布的数据集,包含Mothion和Perception两大类,用于自动驾驶3D感知和预测任务。采集设备包括5个Lidar,5个Camera。Perception数据中训练集包含798个segment,每个segment包含约200个frame,总共约158361个样本,验证集包含202个segment,总共约40077个样本,每个样本都包含了lidar和camera模态数据。标注数据提供了物体的3d bbox坐标和物体类别信息,标注的坐标均为右手坐标系: 30 | 31 | - **下载地址**:https://waymo.com/open/data/perception/ 32 | - **其他说明**:waymo数据更新多次版本,为了获取更准确的标注信息和应用其它任务,请下载v1.3.2及其之后的版本。 33 | 34 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/datasets/Clas.md: -------------------------------------------------------------------------------- 1 | # 图像分类任务数据集说明 2 | 3 | 本文档将介绍常用的图像分类任务数据集格式,以及图像分类领域的常见数据集介绍。 4 | 5 | --- 6 | 7 | ## 目录 8 | 9 | 10 | - 图像分类任务常见数据集介绍 11 | - [ImageNet1k](#1) 12 | - [Flowers102](#2) 13 | - [CIFAR10 / CIFAR100](#3) 14 | - [MNIST](#4) 15 | - [NUS-WIDE](#5) 16 | 17 | 18 | 19 | ## 图像分类任务常见数据集介绍 20 | 21 | 这里整理了常用的图像分类任务数据集,持续更新中,欢迎各位小伙伴补充完善~ 22 | 23 | 24 | ### 1、ImageNet1k 25 | 26 | [ImageNet](https://image-net.org/)项目是一个大型视觉数据库,用于视觉目标识别研究任务,该项目已手动标注了 1400 多万张图像。ImageNet-1k 是 ImageNet 数据集的子集,其包含 1000 个类别。训练集包含 1281167 个图像数据,验证集包含 50000 个图像数据。2010 年以来,ImageNet 项目每年举办一次图像分类竞赛,即 ImageNet 大规模视觉识别挑战赛(ILSVRC)。挑战赛使用的数据集即为 ImageNet-1k。到目前为止,ImageNet-1k 已经成为计算机视觉领域发展的最重要的数据集之一,其促进了整个计算机视觉的发展,很多计算机视觉下游任务的初始化模型都是基于该数据集训练得到的。 27 | 28 | 29 | 30 | 数据集 | 训练集大小 | 测试集大小 | 类别数 | 备注| 31 | :------:|:---------------:|:---------------------:|:-----------:|:-----------: 32 | [ImageNet1k](http://www.image-net.org/challenges/LSVRC/2012/)|1.2M| 50k | 1000 | 33 | 34 | 35 | ### 2 Flowers102 36 | 数据简介:一个 102 个类别的数据集,由 102 个花卉类别组成。被选为英国常见的花。每个类包含 40 到 258 张图像。可以在此类别统计页面上找到类别的详细信息和每个类别的图像数量。 37 | 38 | 39 | 40 | 数据集 | 训练集大小 | 测试集大小 | 类别数 | 备注| 41 | :------:|:---------------:|:---------------------:|:-----------:|:-----------: 42 | [flowers102](https://www.robots.ox.ac.uk/~vgg/data/flowers/102/)|1k | 6k | 102 | 43 | 44 | 将下载的数据解压后,可以看到以下目录 45 | 46 | ```shell 47 | jpg/ 48 | setid.mat 49 | imagelabels.mat 50 | ``` 51 | 52 | 53 | 54 | ### 3 CIFAR10 / CIFAR100 55 | 56 | CIFAR-10 数据集由 10 个类的 60000 个彩色图像组成,图像分辨率为 32x32,每个类有 6000 个图像,其中训练集 5000 张,验证集 1000 张,10 个不同的类代表飞机、汽车、鸟类、猫、鹿、狗、青蛙、马、轮船和卡车。CIFAR-100 数据集是 CIFAR-10 的扩展,由 100 个类的 60000 个彩色图像组成,图像分辨率为 32x32,每个类有 600 个图像,其中训练集 500 张,验证集 100 张。 57 | 58 | 59 | 数据集地址:http://www.cs.toronto.edu/~kriz/cifar.html 60 | 61 | 62 | ### 4 MNIST 63 | 64 | 数据简介:MMNIST 是一个非常有名的手写体数字识别数据集,在很多资料中,这个数据集都会被用作深度学习的入门样例。其包含 60000 张图片数据,50000 张作为训练集,10000 张作为验证集,每张图片的大小为 28 * 28。 65 | 66 | 数据集地址:http://yann.lecun.com/exdb/mnist/ 67 | 68 | 69 | ### 5 NUS-WIDE 70 | 71 | NUS-WIDE 是一个多分类数据集。该数据集包含 269648 张图片, 81 个类别,每张图片被标记为该 81 个类别中的某一类或某几类。 72 | 73 | 数据集地址:https://lms.comp.nus.edu.sg/wp-content/uploads/2019/research/nuswide/NUS-WIDE.html 74 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/datasets/Detection.md: -------------------------------------------------------------------------------- 1 | ## 通用检测数据集 2 | 这里整理了常用检测数据集,持续更新中,欢迎各位小伙伴贡献数据集~ 3 | - [COCO](#COCO) 4 | - [VOC](#VOC) 5 | - [SCUT_FIR行人检测数据集](#SCUT_FIR行人检测数据集) 6 | - [JHU-CROWD++](#JHU-CROWD++) 7 | - [CIHP人体解析数据集](#CIHP人体解析数据集) 8 | - [AHU-Crowd人群数据集](#AHU-Crowd人群数据集) 9 | - [AudioVisual人群计数](#AudioVisual人群计数) 10 | - [UCF-CC-50](#UCF-CC-50) 11 | - [北京BRT数据集](#北京BRT数据集) 12 | 13 | 14 | ## 1、COCO 15 | - **数据来源**:https://cocodataset.org/#home 16 | - **数据简介**:COCO数据是COCO 比赛使用的数据。同样的,COCO比赛数也包含多个比赛任务,其标注文件中包含多个任务的标注内容。 COCO数据集指的是COCO比赛使用的数据。用户自定义的COCO数据,json文件中的一些字段,请根据实际情况选择是否标注或是否使用默认值。 17 |
18 |
19 |
20 | 21 | - **下载地址**:https://cocodataset.org/ 22 | 23 | 24 | 25 | ## 2、VOC 26 | - **数据来源**:http://host.robots.ox.ac.uk/pascal/VOC/ 27 | - **数据简介**:VOC数据是Pascal VOC 比赛使用的数据。Pascal VOC比赛不仅包含图像分类分类任务,还包含图像目标检测、图像分割等任务,其标注文件中包含多个任务的标注内容。 VOC数据集指的是Pascal VOC比赛使用的数据。用户自定义的VOC数据,xml文件中的非必须字段,请根据实际情况选择是否标注或是否使用默认值。),如下图所示: 28 |
29 |
30 |
31 | 32 | - **下载地址**:http://host.robots.ox.ac.uk/pascal/VOC/ 33 | 34 | 35 | ## 3、SCUT_FIR行人检测数据集 36 | - **数据来源**:https://sourl.cn/4VK3Bn 37 | - **数据简介**:SCUT FIR Pedestrian Datasets 是一个大型远红外行人检测数据集。它由大约 11 小时长的图像序列( 帧)组成,速度为 25 Hz,以低于 80 km/h 的速度在不同的交通场景中行驶。图像序列来自中国广州市中心、郊区、高速公路和校园 4 种场景下的 11 个路段。该数据集注释了 211,011 帧,总共 477,907 个边界框,围绕 7,659 个独特的行人,如下图所示: 38 |
39 |
40 |
41 | 42 | - **下载地址**:https://sourl.cn/4VK3Bn 43 | 44 | 45 | ## 4、JHU-CROWD++ 46 | - **数据来源**:https://sourl.cn/mgxHEY 47 | - **数据简介**:包含 4,372 张图像和 151 万条注释的综合数据集。与现有数据集相比,所提出的数据集是在各种不同的场景和环境条件下收集的。此外,该数据集提供了相对丰富的注释集,如点、近似边界框、模糊级别等。如下图所示: 48 |
49 |
50 |
51 | 52 | - **下载地址**:https://sourl.cn/mgxHEY 53 | 54 | 55 | ## 5、CIHP人体解析数据集 56 | - **数据来源**:https://sourl.cn/W3Tm2J 57 | - **数据简介**:Crowd Instance-level Human Parsing (CIHP) 数据集包含 38,280 张多人图像,这些图像具有精细的注释、高外观可变性和复杂性。该数据集可用于人体部分分割任务。如下图所示: 58 |
59 |
60 |
61 | 62 | - **下载地址**:https://sourl.cn/mgxHEY 63 | 64 | 65 | ## 6、AHU-Crowd人群数据集 66 | - **数据来源**:https://sourl.cn/XFJDCh 67 | - **数据简介**:人群数据集是从各种来源获得的,例如 UCF 和数据驱动的人群数据集,以评估所提出的框架。序列多样,代表了朝圣、车站、马拉松、集会和体育场等各种场景中公共空间的密集人群。此外,这些序列具有不同的视野、分辨率,并表现出多种运动行为,涵盖了明显和微妙的不稳定性。如下图所示: 68 |
69 |
70 |
71 | 72 | - **下载地址**:https://sourl.cn/XFJDCh 73 | 74 | 75 | ## 7、AudioVisual人群计数 76 | - **数据来源**:https://sourl.cn/wfd7wD 77 | - **数据简介**:一个用于人群计数的新数据集,该数据集由中国不同位置的大约 2000 个带注释的图像令牌组成,每个图像对应一个 1 秒的音频剪辑和一个密度图。图像处于不同的照明条件下。如下图所示: 78 |
79 |
80 |
81 | 82 | - **下载地址**:https://sourl.cn/wfd7wD 83 | 84 | 85 | ## 8、UCF-CC-50 86 | - **数据来源**:http://c.nxw.so/9LYoK 87 | - **数据简介**:该数据集包含极其密集人群的图像。图像主要是从 FLICKR 收集的。如下图所示: 88 |
89 |
90 |
91 | 92 | - **下载地址**:http://c.nxw.so/9LYoK 93 | 94 | 95 | ## 9、北京BRT数据集 96 | - **数据来源**:http://c.nxw.so/c1PV9 97 | - **数据简介**:该数据集包含 1,280 张图像和 16,795 个标记的行人,用于人群分析。该数据集使用 720 张图像进行训练,使用 560 张图像进行测试。名为 frame 的文件夹包含人群图像。名为 ground_truth 的文件夹包含ground_truth。例如,'1-20170325134657.jpg'对应于'1-20170325134657.mat',以及这张图片中第i个人的真实位置,其中每一行是位置[x,y]。如下图所示: 98 |
99 |
100 |
101 | 102 | - **下载地址**:http://c.nxw.so/c1PV9 -------------------------------------------------------------------------------- /docs/zh_CN/datasets/datasets/Image_Denoising.md: -------------------------------------------------------------------------------- 1 | ## 图像去噪 2 | 这里整理了常用图像去噪数据集,持续更新中,欢迎各位小伙伴贡献数据集~ 3 | - [PolyU数据集](#PolyU数据集) 4 | - [FMD(荧光显微镜去噪)数据集](#FMD(荧光显微镜去噪)数据集) 5 | - [SIDD智能手机图像去噪数据集](#SIDD智能手机图像去噪数据集) 6 | - [SIDD-small数据集](#SIDD-small数据集) 7 | - [Super_Resolution_Benchmarks](#Super_Resolution_Benchmarks) 8 | 9 | 具体如下: 10 | 11 | 12 | #### 1、PolyU数据集 13 | - **数据来源**:https://sourl.cn/rMsdE8 14 | - **数据简介**: 大多数以前的图像去噪方法都集中在加性高斯白噪声(AWGN)上。然而,随着计算机视觉技术的进步,现实世界中的噪声图像去噪问题也随之而来。为了在实现并发真实世界图像去噪数据集的同时促进对该问题的研究,作者们构建了一个新的基准数据集,其中包含不同自然场景的综合真实世界噪声图像。这些图像是由不同的相机在不同的相机设置下拍摄的,如下图所示: 15 |
16 |
17 |
18 | 19 | - **下载地址**:https://sourl.cn/rMsdE8 20 | 21 | 22 | #### 2、FMD(荧光显微镜去噪)数据集 23 | - **数据来源**:https://sourl.cn/Wyqrui 24 | - **数据简介**: 荧光显微镜使现代生物学取得了巨大的发展。由于其固有的微弱信号,荧光显微镜不仅比摄影噪声大得多,而且还呈现出泊松-高斯噪声,其中泊松噪声或散粒噪声是主要的噪声源。为了获得干净的荧光显微镜图像,非常需要有专门设计用于对荧光显微镜图像进行降噪的有效降噪算法和数据集。虽然存在这样的算法,但没有这样的数据集可用。在本文中,我们通过构建专用于泊松-高斯去噪的数据集 - 荧光显微镜去噪 (FMD) 数据集来填补这一空白。该数据集由 12,000 个真实荧光显微镜图像组成,这些图像使用商业共焦、双光子、宽视野显微镜和代表性生物样本,如细胞、斑马鱼和小鼠脑组织, 如下图所示: 25 |
26 |
27 |
28 | 29 | - **下载地址**:https://sourl.cn/Wyqrui 30 | 31 | 32 | #### 3、SIDD智能手机图像去噪数据集 33 | - **数据来源**:https://sourl.cn/jdpJZ6 34 | - **数据简介**: 该数据集包含以下智能手机在不同光照条件下拍摄的 160 对噪声/真实图像: 35 | GP: Google Pixel 36 | IP: iPhone 7 37 | S6: Samsung Galaxy S6 38 | Edge N6: Motorola Nexus 6 39 | G4: LG G4, 如下图所示: 40 |
41 |
42 |
43 | 44 | - **下载地址**:https://sourl.cn/jdpJZ6 45 | 46 | 47 | #### 4、SIDD-small数据集 48 | - **数据来源**:https://sourl.cn/kaYGxd 49 | - **数据简介**: 一个小型版本的数据集,它由代表 160 个场景实例的160 个图像对(噪声和ground-truth)组成, 如下图所示: 50 |
51 |
52 |
53 | 54 | - **下载地址**:https://sourl.cn/kaYGxd 55 | 56 | 57 | 58 | #### 5、Super_Resolution_Benchmarks 59 | - **数据来源**:https://sourl.cn/Bp6QZs 60 | - **数据简介**: 来自于AIM 2022 压缩图像和视频超分辨率挑战赛”中的前 5 名解决方案工作:Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, 如下图所示: 61 |
62 |
63 |
64 | 65 | - **下载地址**:https://sourl.cn/Bp6QZs 66 | -------------------------------------------------------------------------------- /docs/zh_CN/datasets/datasets/Keypoints.md: -------------------------------------------------------------------------------- 1 | ## 关键点检测数据集 2 | 这里整理了常用关键点检测数据集,持续更新中,欢迎各位小伙伴贡献数据集~ 3 | - [手部姿势关键点检测数据集](#手部姿势关键点检测数据集) 4 | - [动物姿势数据集](#动物姿势数据集) 5 | - [电影人物关节关键点数据集](#电影人物关节关键点数据集) 6 | - [MPIIGaze_Dataset](#MPIIGaze_Dataset) 7 | - [人体足部关键点数据集](#人体足部关键点数据集) 8 | - [人群姿态数据集](#人群姿态数据集) 9 | 10 | 具体如下: 11 | 12 | 13 | #### 1、手部姿势关键点检测数据集 14 | - **数据来源**:http://u3v.cn/6d3lZV 15 | - **数据简介**: 数据集由序列构成。在每个序列中,您都可以找到组成它的帧。一个帧由4个彩色图像、4组投影在每个图像平面中的2D关节、4个边界框、1组Leap Motion Controller提供的3D点和4组重新投影到每个相机坐标帧的3D点组成。,如下图所示: 16 |
17 |
18 |
19 | 20 | - **下载地址**:http://u3v.cn/6d3lZV 21 | 22 | 23 | 24 | #### 2、动物姿势数据集 25 | - **数据来源**:http://u3v.cn/6kDLfr 26 | - **数据简介**:该数据集提供了五个类别的动物姿势注释:狗、猫、牛、马、羊,在4,000 多张图像中总共有6,000多个实例。此外,该数据集还包含其他7 个动物类别的边界框注释。在论文中查找详细信息。一共标注了 20 个关键点:两只眼睛、喉咙、鼻子、马肩隆、两个耳根、尾根、四个肘部、四个膝盖、四个爪子。 27 |
28 |
29 |
30 | 31 | - **下载地址**:http://u3v.cn/6kDLfr 32 | 33 | 34 | #### 3、电影人物关节关键点数据集 35 | - **数据来源**:http://u3v.cn/5tW5zx 36 | - **数据简介**:该数据集从流行的好莱坞电影中自动收集了5003个图像数据。这些图像是通过在30部电影的每10帧上运行一个最先进的人检测器获得的。然后,被高度自信地检测到的人(大约2万名候选人)被送往众包市场亚马逊机械土耳其公司(Amazon Mechanical Turk),以获得地面真实标签。每幅图片都由五名特克斯人以0.01美元的价格标注,以标注10个上身关节。在每个图像中取五个标记的中位数,以对离群值注释保持稳健。 37 | 38 | 39 | #### 4、MPIIGaze_Dataset 40 | - **数据来源**:http://u3v.cn/5BsiEe 41 | - **数据简介**:MPIIGaze数据集包含在三个多月的日常笔记本电脑使用过程中从15名参与者收集的213659张图像。在外观和照明方面,数据集比现有的数据集变化更大。 42 |
43 |
44 |
45 | 46 | - **下载地址**:http://u3v.cn/5BsiEe 47 | 48 | 49 | #### 5、人体足部关键点数据集 50 | - **数据来源**:http://u3v.cn/5IYvIV 51 | - **数据简介**:现有的人体姿势数据集包含有限的身体部位类型。MPII 数据集标注了脚踝、膝盖、臀部、肩膀、肘部、手腕、颈部、躯干和头顶,而 COCO 还包括一些面部关键点。对于这两个数据集,足部注释仅限于脚踝位置。然而,图形应用程序(例如头像重定向或 3D 人体形状重建)需要足部关键点,例如大脚趾和脚跟。在没有足部信息的情况下,这些方法会遇到诸如糖果包装效果、地板穿透和足部滑冰等问题。为了解决这些问题,COCO 数据集中的一小部分脚实例使用 Clickworker 平台进行标记。它分为来自 COCO 训练集的 14K 注释和来自验证集的 545 个注释。总共标记了 6 个英尺关键点。考虑足部关键点的 3D 坐标而不是表面位置。例如,对于确切的脚趾位置,数据集标记了指甲和皮肤连接之间的区域,并且还通过标记脚趾的中心而不是表面来考虑深度。 52 | 53 | 54 | #### 6、人群姿态数据集 55 | - **数据来源**:http://u3v.cn/65x8MQ 56 | - **数据简介**:多人姿态估计是许多计算机视觉任务的基础,近年来取得了重大进展。然而,以前很少有方法研究拥挤场景中的姿态估计问题,而在许多场景中,这仍然是一个具有挑战性和不可避免的问题。此外,目前的基准无法对此类案件进行适当评估。在本文中,我们提出了一种新的有效方法来解决人群中的姿势估计问题,并提出了一个新的数据集来更好地评估算法。 57 |
58 |
59 |
60 | 61 | - **下载地址**:http://u3v.cn/65x8MQ -------------------------------------------------------------------------------- /docs/zh_CN/datasets/datasets/Speech.md: -------------------------------------------------------------------------------- 1 | # 通用语音数据集 2 | 3 | 这里整理了常用语音数据集,持续更新中,欢迎各位小伙伴贡献数据集~ 4 | - [语音识别](#语音识别) 5 | - [语音合成](#语音合成) 6 | - [声音分类](#声音分类) 7 | - [声纹识别](#声纹识别) 8 | - [语音唤醒](#语音唤醒) 9 | 10 | ## 语音识别 11 | 12 | + WenetSpeech 13 | 14 | 数据来源:https://wenet.org.cn/WenetSpeech/ 15 | 16 | 数据简介: 17 | > 从 YouTube 和 Podcast 收集的 10000 多个小时的多域转录普通话语料库。采用光学字符识别 (OCR) 和自动语音识别 (ASR) 技术分别标记每个 YouTube 和 Podcast 录音。为了提高语料库的质量,我们使用一种新颖的端到端标签错误检测方法来进一步验证和过滤数据。10,000 +小时高标签数据,置信度 >= 95%,用于监督训练;2400 +小时弱标签数据0.6 < 置信度 < 0.95,用于半监督或噪声训练等;22400 +总共小时音频,由标记和未标记的数据组成,用于无监督训练或预训练等。 18 | 下载地址:https://wenet.org.cn/WenetSpeech/#download 19 | 20 | 21 | ## 语音合成 22 | 23 | + CSMSC 24 | 数据来源:https://www.data-baker.com/open_source.html 25 | 数据简介: 26 | > 中文标准女声音库】采集对象的音色风格知性阳光、亲切自然,专业标准普通话女声,听感乐观积极。录制环境为专业录音室和录音软件,录音环境和设备自始至终保持不变,录音环境的信噪比不低于35dB;单声道录音,用48KHz 16比特采样频率、PCM WAV格式。录音语料涵盖各类新闻、小说、科技、娱乐、对话等领域,语料设计综合语料样本量,力求在有限的语料数据量内,对音节音子、类型、音调、音连以及韵律等尽可能全面的覆盖。根据合成语音标注标准对音库进行文本音字校对、韵律层级标注、语音文件边界切分标注。 27 | 28 | 下载地址:https://www.data-baker.com/open_source.html 29 | 30 | ## 声音分类 31 | 32 | + esc50 33 | 34 | 数据来源:https://github.com/karolpiczak/ESC-50 35 | 36 | 数据简介: 37 | > ESC-50: Dataset for Environmental Sound Classification 是一个包含有 2000 个带标签的时长为 5 秒的环境声音样本,音频样本采样率为 44,100Hz 的单通道音频文件,所有样本根据标签被划分为 50 个类别,每个类别有 40 个样本。 38 | 39 | 40 | 41 | ## 声纹识别 42 | 43 | + voxceleb 44 | 45 | 数据来源:https://www.robots.ox.ac.uk/~vgg/data/voxceleb/index.html#about 46 | 47 | 数据简介: 48 | > VoxCeleb 是一个视听数据集,由从上传到 YouTube 的采访视频中提取的人类语音短片组成,7,000 + 49 | 50 | 扬声器VoxCeleb 包含来自不同种族、口音、职业和年龄的演讲者的演讲;话语长度100万+话语所有说话的面部轨迹都是“在野外”捕获的,包括背景聊天、笑声、重叠的语音、姿势变化和不同的照明条件;2,000 +小时 51 | VoxCeleb 由音频和视频组成。每个片段至少 3 秒长。 52 | 53 | 54 | ## 语音唤醒 55 | 56 | 57 | + hey_snips 58 | 59 | 数据来源:https://github.com/sonos/keyword-spotting-research-datasets 60 | 61 | 数据简介: 62 | > 唤醒词是“Hey Snips”,两个词之间没有停顿。两个数据集都包含大量的英语口音和录音环境。请注意,负样本是在与唤醒词话语相同的条件下记录的,因此来自相同的域(说话者、硬件、环境等)。 -------------------------------------------------------------------------------- /python_whl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["EasyData"] 16 | from .easydata import EasyData 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | Pillow 3 | scipy>=1.0.0 4 | faiss_cpu==1.7.1.post2 5 | PyYAML>=5.1 6 | tqdm 7 | Shapely 8 | pyclipper 9 | matplotlib 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from io import open 16 | from setuptools import setup 17 | 18 | 19 | def get_requirements(): 20 | with open('requirements.txt', encoding="utf-8-sig") as f: 21 | requirements = f.readlines() 22 | return requirements 23 | 24 | 25 | setup( 26 | name='easydata-python', 27 | packages=['easydata', 'easydata.deploy'], 28 | package_dir={ 29 | 'easydata': 'python_whl', 30 | 'easydata.deploy': 'deploy' 31 | }, 32 | include_package_data=True, 33 | entry_points={"console_scripts": ["easydata=easydata.easydata:main"]}, 34 | version='0.0.0', 35 | install_requires=get_requirements(), 36 | license='Apache License 2.0', 37 | description= 38 | 'A toolkit for processing data powered by PaddlePaddle, which include data augmentation, data cleaning and data annotation.', 39 | long_description= 40 | 'EasyData aims to create a universal, leading and practical data processing toolkit, that supports automatic data augmentation and cleaning, and provides data annotation tools and a collection of open source datasets to help developers obtain high-quality training and inference data more easily, thereby promoting the practical effect of AI algorithms.', 41 | long_description_content_type='text/markdown', 42 | url='https://github.com/PaddlePaddle/EasyData', 43 | download_url='https://github.com/PaddlePaddle/EasyData.git', 44 | keywords=[ 45 | 'PaddlePaddle', 46 | 'DataAug', 47 | 'DataClean', 48 | ], 49 | classifiers=[ 50 | 'Intended Audience :: Developers', 51 | 'Operating System :: OS Independent', 52 | 'Natural Language :: Chinese (Simplified)', 53 | 'Programming Language :: Python :: 3.7', 54 | 'Programming Language :: Python :: 3.8', 55 | 'Programming Language :: Python :: 3.9', 'Topic :: Utilities' 56 | ], 57 | ) 58 | -------------------------------------------------------------------------------- /test_tipc/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/test_tipc/.gitkeep -------------------------------------------------------------------------------- /tools/predict.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | 18 | __dir__ = os.path.dirname(os.path.abspath(__file__)) 19 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../deploy/'))) 20 | 21 | from ppcv.core.config import ArgsParser 22 | 23 | from python.dataclean import DataClean 24 | from python.dataaug import DataAug 25 | from utils.utils import load_yaml 26 | 27 | 28 | def argsparser(): 29 | parser = ArgsParser() 30 | 31 | parser.add_argument("-c", 32 | "--config", 33 | type=str, 34 | default=None, 35 | help=("Path of configure"), 36 | required=True) 37 | parser.add_argument( 38 | "--input", 39 | type=str, 40 | default=None, 41 | help= 42 | "Path of input, suport image file, image directory and video file.", 43 | required=False) 44 | parser.add_argument("--output_dir", 45 | type=str, 46 | default="output", 47 | help="Directory of output visualization files.") 48 | parser.add_argument( 49 | "--run_mode", 50 | type=str, 51 | default='paddle', 52 | help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") 53 | parser.add_argument( 54 | "--device", 55 | type=str, 56 | default='cpu', 57 | help= 58 | "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." 59 | ) 60 | return parser.parse_args() 61 | 62 | 63 | if __name__ == '__main__': 64 | args = argsparser() 65 | config_path = args.config 66 | yaml_data = load_yaml(config_path) 67 | if "DataClean" in yaml_data: 68 | dataclean = DataClean(args) 69 | dataclean.run() 70 | elif "DataGen" in yaml_data: 71 | dataaug = DataAug(args) 72 | dataaug.run() 73 | else: 74 | raise Exception("Error config") 75 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------