├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .pre_commit_tools
    └── codestyle
    │   ├── copyright.hook
    │   └── pylint_pre_commit.hook
├── LICENSE
├── MANIFEST.in
├── README.md
├── demo
    ├── clarity_assessment
    │   ├── blured_demo.jpg
    │   └── clarity_demo.jpg
    ├── clas_data
    │   ├── train
    │   │   ├── n01440764_15008.JPEG
    │   │   ├── n01530575_10039.JPEG
    │   │   ├── n01601694_4224.JPEG
    │   │   ├── n01641577_14447.JPEG
    │   │   ├── n01682714_8438.JPEG
    │   │   └── n01698640_9242.JPEG
    │   └── train_list.txt
    ├── code_exists
    │   ├── contains_code_demo.jpg
    │   └── no_code_demo.jpg
    ├── image_orientation
    │   ├── 0.jpg
    │   ├── 1.jpg
    │   ├── 2.jpg
    │   └── 3.jpg
    ├── ocr_data
    │   ├── images
    │   │   ├── lsvt_train_images_0.jpg
    │   │   ├── lsvt_train_images_1325.jpg
    │   │   ├── lsvt_train_images_1339.jpg
    │   │   ├── lsvt_train_images_1410.jpg
    │   │   └── lsvt_train_images_275.jpg
    │   └── train_list.txt
    ├── ocr_rec
    │   ├── bg
    │   │   ├── n01530575_10039.JPEG
    │   │   └── n01601694_4224.JPEG
    │   └── corpus.txt
    └── shitu_data
    │   ├── train
    │       ├── 0000001.jpg
    │       ├── 00005129.jpg
    │       ├── 111687027218_10.JPG
    │       ├── 2250416.jpg
    │       └── 2289835.jpg
    │   └── train_list.txt
├── deploy
    ├── configs
    │   ├── dataaug
    │   │   └── ocr_rec_gen.yaml
    │   ├── dataaug_clas.yaml
    │   ├── dataaug_ocr_img2img.yaml
    │   ├── dataaug_ocr_text2img.yaml
    │   ├── dataaug_shitu.yaml
    │   ├── dataclean.yaml
    │   └── ppcv
    │   │   ├── big_model_classification.yaml
    │   │   ├── clarity_assessment.yaml
    │   │   ├── code_exists.yaml
    │   │   ├── feature_extract.yaml
    │   │   ├── image_orientation.yaml
    │   │   └── text_recognition.yaml
    ├── ppcv
    │   ├── __init__.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── framework.py
    │   │   └── workspace.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   └── pipeline.py
    │   ├── model_zoo
    │   │   ├── MODEL_ZOO
    │   │   ├── __init__.py
    │   │   └── model_zoo.py
    │   ├── ops
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── connector
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── keyframes_extract_helper.py
    │   │   │   ├── op_connector.py
    │   │   │   └── table_matcher.py
    │   │   ├── general_data_obj.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── postprocess.py
    │   │   │   │   └── preprocess.py
    │   │   │   ├── detection
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── postprocess.py
    │   │   │   │   └── preprocess.py
    │   │   │   ├── feature_extraction
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── postprocess.py
    │   │   │   │   └── preprocess.py
    │   │   │   ├── keypoint
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── postprocess.py
    │   │   │   │   └── preprocess.py
    │   │   │   ├── ocr_crnn_recognition
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── postprocess.py
    │   │   │   │   └── preprocess.py
    │   │   │   └── ocr_db_detection
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── postprocess.py
    │   │   │   │   └── preprocess.py
    │   │   ├── output
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── classification.py
    │   │   │   ├── detection.py
    │   │   │   ├── feature_extraction.py
    │   │   │   ├── keypoint.py
    │   │   │   └── ocr_rec.py
    │   │   └── predictor.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── download.py
    │   │   ├── helper.py
    │   │   ├── logger.py
    │   │   └── timer.py
    ├── python
    │   ├── dataaug
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── imaug
    │   │   │   │   ├── random_crop_data.py
    │   │   │   │   └── text_image_aug
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── augment.py
    │   │   │   │   │   └── warp_mls.py
    │   │   │   ├── postprocess
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attr_rec.py
    │   │   │   │   ├── threshoutput.py
    │   │   │   │   └── topk.py
    │   │   │   ├── preprocess
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── batch_ops
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── batch_operators.py
    │   │   │   │   └── ops
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── autoaugment.py
    │   │   │   │   │   ├── cutout.py
    │   │   │   │   │   ├── fmix.py
    │   │   │   │   │   ├── functional.py
    │   │   │   │   │   ├── grid.py
    │   │   │   │   │   ├── hide_and_seek.py
    │   │   │   │   │   ├── operators.py
    │   │   │   │   │   ├── randaugment.py
    │   │   │   │   │   ├── random_erasing.py
    │   │   │   │   │   └── timm_autoaugment.py
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── get_image_list.py
    │   │   ├── gen_img.py
    │   │   ├── gen_ocr_rec.py
    │   │   ├── predict
    │   │   │   ├── __init__.py
    │   │   │   ├── build_gallery.py
    │   │   │   ├── postprocess.py
    │   │   │   ├── predict_cls.py
    │   │   │   ├── predict_rec.py
    │   │   │   └── preprocess.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── config.py
    │   │   │   ├── data_utils.py
    │   │   │   ├── get_image_list.py
    │   │   │   ├── imagenet1k_label_list.txt
    │   │   │   ├── liner.py
    │   │   │   ├── logger.py
    │   │   │   ├── math_utils.py
    │   │   │   ├── noiser.py
    │   │   │   ├── predictor.py
    │   │   │   ├── remaper.py
    │   │   │   ├── renderer.py
    │   │   │   └── utility.py
    │   └── dataclean
    │   │   ├── __init__.py
    │   │   └── demos
    │   │       ├── paddleclas_demo.py
    │   │       └── paddleocr_demo.py
    └── utils
    │   ├── __init__.py
    │   ├── label_map
    │       ├── clarity_assessment_label_list.txt
    │       ├── code_exists_label_list.txt
    │       └── image_orientation_label_list.txt
    │   └── utils.py
├── docs
    ├── en
    │   └── .gitkeep
    ├── images
    │   └── PP-DataAug
    │   │   └── .gitkeep
    └── zh_CN
    │   ├── DataAug
    │       ├── DataAug.md
    │       └── quick_start.md
    │   ├── DataClean
    │       ├── DataClean.md
    │       ├── blured_image_filtering.md
    │       ├── code_image_filtering.md
    │       ├── image_orientation_correction.md
    │       └── quick_start.md
    │   └── datasets
    │       ├── .gitkeep
    │       ├── Annotation_tool
    │           ├── EISeg.md
    │           ├── EIVideo.md
    │           ├── PPOCRLabelv2.md
    │           ├── PaddleLabel.md
    │           ├── Speech.md
    │           ├── Style_Text.md
    │           └── doccano.md
    │       ├── dataset.md
    │       └── datasets
    │           ├── 3D.md
    │           ├── Clas.md
    │           ├── Detection.md
    │           ├── Image_Denoising.md
    │           ├── Keypoints.md
    │           ├── NLP.md
    │           ├── OCR.md
    │           ├── Segmentation.md
    │           ├── Speech.md
    │           └── Video.md
├── python_whl
    ├── __init__.py
    └── easydata.py
├── requirements.txt
├── setup.py
├── test_tipc
    └── .gitkeep
└── tools
    ├── predict.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | venv
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/.gitmodules


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Following config of Paddle.
 2 | # https://github.com/PaddlePaddle/Paddle/blob/4bbb0b3878970d2c8ed4dd9183af9a293900909a/.pre-commit-config.yaml
 3 | # Exclude all third-party libraries and auto-generated files globally
 4 | repos:
 5 | -   repo: https://github.com/Lucas-C/pre-commit-hooks.git
 6 |     rev: v1.1.14
 7 |     hooks:
 8 |     -   id: remove-crlf
 9 |     -   id: remove-tabs
10 |         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps)$
11 |         args: [--whitespaces-count, '2']
12 |     -   id: remove-tabs
13 |         files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
14 |         args: [--whitespaces-count, '4']
15 | -   repo: https://github.com/google/yapf
16 |     rev: v0.32.0
17 |     hooks:
18 |     -   id: yapf
19 |         files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
20 | -   repo: https://github.com/PyCQA/flake8
21 |     rev: 4.0.1
22 |     hooks:
23 |     -   id: flake8
24 | -   repo: https://github.com/pre-commit/pre-commit-hooks
25 |     rev: v4.1.0
26 |     hooks:
27 |     -   id: check-added-large-files
28 |     -   id: check-merge-conflict
29 |     -   id: check-symlinks
30 |     -   id: detect-private-key
31 |     -   id: end-of-file-fixer
32 |     -   id: sort-simple-yaml
33 |         files: (op|backward|op_[a-z_]+)\.yaml$
34 |     -   id: trailing-whitespace
35 |         files: (.*\.(py|bzl|md|rst|c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps|cmake|yaml|yml|hook)|BUILD|.*\.BUILD|WORKSPACE|CMakeLists\.txt)$
36 | -   repo: local
37 |     hooks:
38 |     -   id: pylint-doc-string
39 |         name: pylint
40 |         description: Check python docstring style using docstring_checker.
41 |         entry: bash ./.pre_commit_tools/codestyle/pylint_pre_commit.hook
42 |         language: system
43 |         files: \.(py)$
44 | -   repo: local
45 |     hooks:
46 |     -   id: copyright_checker
47 |         name: copyright_checker
48 |         entry: python ./.pre_commit_tools/codestyle/copyright.hook
49 |         language: system
50 |         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$
51 | 


--------------------------------------------------------------------------------
/.pre_commit_tools/codestyle/copyright.hook:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import argparse
 16 | import re
 17 | import sys
 18 | import os
 19 | import datetime
 20 | 
 21 | COPYRIGHT = '''Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 22 | 
 23 | Licensed under the Apache License, Version 2.0 (the "License");
 24 | you may not use this file except in compliance with the License.
 25 | You may obtain a copy of the License at
 26 | 
 27 |     http://www.apache.org/licenses/LICENSE-2.0
 28 | 
 29 | Unless required by applicable law or agreed to in writing, software
 30 | distributed under the License is distributed on an "AS IS" BASIS,
 31 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 32 | See the License for the specific language governing permissions and
 33 | limitations under the License.'''
 34 | 
 35 | def _generate_copyright(comment_mark):
 36 |     copyright=COPYRIGHT.split(os.linesep)
 37 |     header = copyright[0].rstrip()
 38 | 
 39 |     p = re.search('(\d{4})', header).group(0)
 40 |     now = datetime.datetime.now()
 41 | 
 42 |     header = header.replace(p,str(now.year))
 43 | 
 44 |     ans=[comment_mark + " " + header + os.linesep]
 45 |     for idx, line in enumerate(copyright[1:]):
 46 |         ans.append(comment_mark + " " + line.rstrip() + os.linesep)
 47 | 
 48 |     return ans
 49 | 
 50 | def _get_comment_mark(path):
 51 |     lang_type=re.compile(r"\.(py|sh)$")
 52 |     if lang_type.search(path) is not None:
 53 |         return "#"
 54 | 
 55 |     lang_type=re.compile(r"\.(h|c|hpp|cc|cpp|cu|go|cuh|proto)$")
 56 |     if lang_type.search(path) is not None:
 57 |         return "//"
 58 | 
 59 |     return None
 60 | 
 61 | 
 62 | RE_ENCODE = re.compile(r"^[ \t\v]*#.*?coding[:=]", re.IGNORECASE)
 63 | RE_COPYRIGHT = re.compile(r".*Copyright \(c\) \d{4}", re.IGNORECASE)
 64 | RE_SHEBANG = re.compile(r"^[ \t\v]*#[ \t]?\!")
 65 | 
 66 | def _check_copyright(path):
 67 |     head=[]
 68 |     try:
 69 |         with open(path, 'r', encoding='utf-8') as f:
 70 |             head = [next(f) for x in range(4)]
 71 |     except StopIteration:
 72 |         pass
 73 | 
 74 |     for idx, line in enumerate(head):
 75 |         if RE_COPYRIGHT.search(line) is not None:
 76 |             return True
 77 | 
 78 |     return False
 79 | 
 80 | def generate_copyright(path, comment_mark):
 81 |     original_contents = open(path, 'r', encoding="utf-8").readlines()
 82 |     head = original_contents[0:4]
 83 | 
 84 |     insert_line_no=0
 85 |     for i, line in enumerate(head):
 86 |         if RE_ENCODE.search(line) or RE_SHEBANG.search(line):
 87 |             insert_line_no=i+1
 88 | 
 89 |     copyright = _generate_copyright(comment_mark)
 90 |     if insert_line_no == 0:
 91 |         new_contents = copyright
 92 |         if len(original_contents) > 0 and len(original_contents[0].strip()) != 0:
 93 |             new_contents.append(os.linesep)
 94 |         new_contents.extend(original_contents)
 95 |     else:
 96 |         new_contents=original_contents[0:insert_line_no]
 97 |         new_contents.append(os.linesep)
 98 |         new_contents.extend(copyright)
 99 |         if len(original_contents) > insert_line_no and len(original_contents[insert_line_no].strip()) != 0:
100 |             new_contents.append(os.linesep)
101 |         new_contents.extend(original_contents[insert_line_no:])
102 |     new_contents="".join(new_contents)
103 | 
104 |     with open(path, 'w', encoding='utf-8') as output_file:
105 |         output_file.write(new_contents)
106 | 
107 | 
108 | 
109 | def main(argv=None):
110 |     parser = argparse.ArgumentParser(
111 |         description='Checker for copyright declaration.')
112 |     parser.add_argument('filenames', nargs='*', help='Filenames to check')
113 |     args = parser.parse_args(argv)
114 | 
115 |     retv = 0
116 |     for path in args.filenames:
117 |         comment_mark = _get_comment_mark(path)
118 |         if comment_mark is None:
119 |             print("warning:Unsupported file", path, file=sys.stderr)
120 |             continue
121 | 
122 |         if _check_copyright(path):
123 |             continue
124 | 
125 |         generate_copyright(path, comment_mark)
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     exit(main())
130 | 


--------------------------------------------------------------------------------
/.pre_commit_tools/codestyle/pylint_pre_commit.hook:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TOTAL_ERRORS=0
 4 | 
 5 | 
 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 7 | export PYTHONPATH=$DIR:$PYTHONPATH
 8 | 
 9 | readonly VERSION="2.12.0"
10 | version=$(pylint --version | grep 'pylint')
11 | 
12 | if ! [[ $version == *"$VERSION"* ]]; then
13 |     pip install pylint==2.12.0
14 | fi
15 | 
16 | # The trick to remove deleted files: https://stackoverflow.com/a/2413151
17 | for file in $(git diff --name-status | awk '$1 != "D" {print $2}'); do
18 |     pylint --disable=all --load-plugins=docstring_checker \
19 |     --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file;
20 |     TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
21 | done
22 | 
23 | exit $TOTAL_ERRORS
24 | #For now, just warning:
25 | #exit 0
26 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | 
4 | recursive-include deploy *.*
5 | 


--------------------------------------------------------------------------------
/demo/clarity_assessment/blured_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clarity_assessment/blured_demo.jpg


--------------------------------------------------------------------------------
/demo/clarity_assessment/clarity_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clarity_assessment/clarity_demo.jpg


--------------------------------------------------------------------------------
/demo/clas_data/train/n01440764_15008.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01440764_15008.JPEG


--------------------------------------------------------------------------------
/demo/clas_data/train/n01530575_10039.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01530575_10039.JPEG


--------------------------------------------------------------------------------
/demo/clas_data/train/n01601694_4224.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01601694_4224.JPEG


--------------------------------------------------------------------------------
/demo/clas_data/train/n01641577_14447.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01641577_14447.JPEG


--------------------------------------------------------------------------------
/demo/clas_data/train/n01682714_8438.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01682714_8438.JPEG


--------------------------------------------------------------------------------
/demo/clas_data/train/n01698640_9242.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/clas_data/train/n01698640_9242.JPEG


--------------------------------------------------------------------------------
/demo/clas_data/train_list.txt:
--------------------------------------------------------------------------------
1 | train/n01440764_15008.JPEG 0
2 | train/n01530575_10039.JPEG 1
3 | train/n01601694_4224.JPEG 2
4 | train/n01641577_14447.JPEG 3
5 | train/n01682714_8438.JPEG 4
6 | train/n01698640_9242.JPEG 5
7 | 


--------------------------------------------------------------------------------
/demo/code_exists/contains_code_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/code_exists/contains_code_demo.jpg


--------------------------------------------------------------------------------
/demo/code_exists/no_code_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/code_exists/no_code_demo.jpg


--------------------------------------------------------------------------------
/demo/image_orientation/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/0.jpg


--------------------------------------------------------------------------------
/demo/image_orientation/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/1.jpg


--------------------------------------------------------------------------------
/demo/image_orientation/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/2.jpg


--------------------------------------------------------------------------------
/demo/image_orientation/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/image_orientation/3.jpg


--------------------------------------------------------------------------------
/demo/ocr_data/images/lsvt_train_images_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_0.jpg


--------------------------------------------------------------------------------
/demo/ocr_data/images/lsvt_train_images_1325.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_1325.jpg


--------------------------------------------------------------------------------
/demo/ocr_data/images/lsvt_train_images_1339.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_1339.jpg


--------------------------------------------------------------------------------
/demo/ocr_data/images/lsvt_train_images_1410.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_1410.jpg


--------------------------------------------------------------------------------
/demo/ocr_data/images/lsvt_train_images_275.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_data/images/lsvt_train_images_275.jpg


--------------------------------------------------------------------------------
/demo/ocr_data/train_list.txt:
--------------------------------------------------------------------------------
1 | images/lsvt_train_images_0.jpg	母婴百汇
2 | images/lsvt_train_images_275.jpg	停车场
3 | images/lsvt_train_images_1325.jpg	品质沙龙
4 | images/lsvt_train_images_1339.jpg	散作乾坤万里春
5 | images/lsvt_train_images_1410.jpg	24小时营业
6 | 


--------------------------------------------------------------------------------
/demo/ocr_rec/bg/n01530575_10039.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_rec/bg/n01530575_10039.JPEG


--------------------------------------------------------------------------------
/demo/ocr_rec/bg/n01601694_4224.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/ocr_rec/bg/n01601694_4224.JPEG


--------------------------------------------------------------------------------
/demo/ocr_rec/corpus.txt:
--------------------------------------------------------------------------------
1 | 母婴百汇
2 | 停车场
3 | 品质沙龙
4 | 散作乾坤万里春
5 | 24小时营业
6 | 


--------------------------------------------------------------------------------
/demo/shitu_data/train/0000001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/0000001.jpg


--------------------------------------------------------------------------------
/demo/shitu_data/train/00005129.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/00005129.jpg


--------------------------------------------------------------------------------
/demo/shitu_data/train/111687027218_10.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/111687027218_10.JPG


--------------------------------------------------------------------------------
/demo/shitu_data/train/2250416.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/2250416.jpg


--------------------------------------------------------------------------------
/demo/shitu_data/train/2289835.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/demo/shitu_data/train/2289835.jpg


--------------------------------------------------------------------------------
/demo/shitu_data/train_list.txt:
--------------------------------------------------------------------------------
1 | train/2289835.jpg 0
2 | train/2250416.jpg 168
3 | train/111687027218_10.JPG 80724
4 | train/0000001.jpg 180409
5 | train/00005129.jpg 185409
6 | 


--------------------------------------------------------------------------------
/deploy/configs/dataaug/ocr_rec_gen.yaml:
--------------------------------------------------------------------------------
  1 | img_size:
  2 |   h: 48
  3 | # Small font_size will make text looks like blured/prydown
  4 | font_size:
  5 |   min: 42
  6 |   max: 44
  7 | # choose Text color range
  8 | # color boundary is in R,G,B format
  9 | font_color:
 10 |   enable: true
 11 |   blue:
 12 |     fraction: 0.2
 13 |     l_boundary: [0,0,150]
 14 |     h_boundary: [60,60,255]
 15 |   brown:
 16 |     fraction: 0.15
 17 |     l_boundary: [139,70,19]
 18 |     h_boundary: [160,82,43]
 19 |   black:
 20 |     fraction: 0.25
 21 |     l_boundary: [0,0,0]
 22 |     h_boundary: [10,10,10]
 23 |   red:
 24 |     fraction: 0.2
 25 |     l_boundary: [200,0,0]
 26 |     h_boundary: [250,10,10]
 27 |   yellow:
 28 |     fraction: 0.2
 29 |     l_boundary: [200, 200, 0]
 30 |     h_boundary: [225, 225, 50]
 31 | 
 32 | # By default, text is drawed by Pillow with (https://stackoverflow.com/questions/43828955/measuring-width-of-text-python-pil)
 33 | # If `random_space` is enabled, some text will be drawed char by char with a random space
 34 | random_space:
 35 |   enable: false
 36 |   fraction: 0.5
 37 |   min: 0 # -0.1 will make chars very close or even overlapped
 38 |   max: 0.4
 39 | 
 40 | random_direction:
 41 |   enable: true
 42 |   fraction: 0.5
 43 | 
 44 | 
 45 | # Do remap with sin()
 46 | # Currently this process is very slow!
 47 | curve:
 48 |   enable: true
 49 |   fraction: 0.3
 50 |   period: 360  # degree, sin 函数的周期
 51 |   min: 1 # sin 函数的幅值范围
 52 |   max: 5
 53 | 
 54 | # random crop text height
 55 | crop:
 56 |   enable: False
 57 |   fraction: 0.3
 58 | 
 59 |   # top and bottom will applied equally
 60 |   top:
 61 |     min: 5
 62 |     max: 10 # in pixel, this value should small than img_height
 63 |   bottom:
 64 |     min: 5
 65 |     max: 10 # in pixel, this value should small than img_height
 66 | 
 67 | # Use image in bg_dir as background for text
 68 | img_bg:
 69 |   enable: true
 70 |   fraction: 0.7
 71 | 
 72 | # Not work when random_space applied
 73 | text_border:
 74 |   enable: true
 75 |   fraction: 0.5
 76 | 
 77 |   # lighter than word color
 78 |   light:
 79 |     enable: true
 80 |     fraction: 0.5
 81 | 
 82 |   # darker than word color
 83 |   dark:
 84 |     enable: true
 85 |     fraction: 0.5
 86 | 
 87 | # https://docs.opencv.org/3.4/df/da0/group__photo__clone.html#ga2bf426e4c93a6b1f21705513dfeca49d
 88 | # https://www.cs.virginia.edu/~connelly/class/2014/comp_photo/proj2/poisson.pdf
 89 | # Use opencv seamlessClone() to draw text on background
 90 | # For some background image, this will make text image looks more real
 91 | seamless_clone:
 92 |   enable: true
 93 |   fraction: 0.5
 94 | 
 95 | perspective_transform:
 96 |   max_x: 30
 97 |   max_y: 30
 98 |   max_z: 4
 99 | 
100 | blur:
101 |   enable: true
102 |   fraction: 0.05
103 | 
104 | # If an image is applied blur, it will not be applied prydown
105 | prydown:
106 |   enable: false
107 |   fraction: 0.05
108 |   max_scale: 1.5 # Image will first resize to 1.5x, and than resize to 1x
109 | 
110 | noise:
111 |   enable: true
112 |   fraction: 0.3
113 | 
114 |   gauss:
115 |     enable: true
116 |     fraction: 0.35
117 | 
118 |   uniform:
119 |     enable: true
120 |     fraction: 0.35
121 | 
122 |   salt_pepper:
123 |     enable: false
124 |     fraction: 0.25
125 | 
126 |   poisson:
127 |     enable: true
128 |     fraction: 0.30
129 | 
130 | line:
131 |   enable: true
132 |   fraction: 0.1
133 | 
134 |   under_line:
135 |     enable: true
136 |     fraction: 0.3
137 | 
138 |   table_line:
139 |     enable: true
140 |     fraction: 0.3
141 | 
142 |   middle_line:
143 |     enable: true
144 |     fraction: 0.4
145 | 
146 | line_color:
147 |   enable: true
148 |   black:
149 |     fraction: 0.4
150 |     l_boundary: [0,0,0]
151 |     h_boundary: [64,64,64]
152 |   blue:
153 |     fraction: 0.2
154 |     l_boundary: [0,0,150]
155 |     h_boundary: [60,60,255]
156 |   white:
157 |     fraction: 0.4
158 |     l_boundary: [254,254,254]
159 |     h_boundary: [255,255,255]
160 | 
161 | # These operates are applied on the final output image,
162 | # so actually it can also be applied in training process as an data augmentation method.
163 | 
164 | # By default, text is darker than background.
165 | # If `reverse_color` is enabled, some images will have dark background and light text
166 | reverse_color:
167 |   enable: true
168 |   fraction: 0.1
169 | 
170 | emboss:
171 |   enable: false
172 |   fraction: 0.05
173 | 
174 | sharp:
175 |   enable: false
176 |   fraction: 0.1
177 | 


--------------------------------------------------------------------------------
/deploy/configs/dataaug_clas.yaml:
--------------------------------------------------------------------------------
 1 | DataGen:
 2 |   ops:
 3 |     - randaugment
 4 |     - random_erasing
 5 |     - gridmask
 6 |     - tia_distort
 7 |     - tia_stretch
 8 |     - tia_perspective
 9 |   ori_data_dir: "demo/clas_data"
10 |   label_file: "demo/clas_data/train_list.txt"
11 |   gen_label: &gen_label "labels/test.txt"
12 |   img_save_folder: "test"
13 |   gen_ratio: 0
14 |   gen_num: 5
15 |   size: 224
16 | 
17 | FeatureExtract:
18 |   config: "deploy/configs/ppcv/feature_extract.yaml"
19 |   thresh: 0.9
20 |   file_out: "tmp/rm_repeat.txt"
21 | 
22 |   # indexing engine config
23 | IndexProcess:
24 |   index_method: "HNSW32" # supported: HNSW32, IVF, Flat
25 |   image_root: "./test"
26 |   index_dir: "./augdata/all_aug"
27 |   all_label_file:  *gen_label
28 |   index_operation: "new" # suported: "append", "remove", "new"
29 |   delimiter: " "
30 |   dist_type: "IP"
31 |   embedding_size: 512
32 |   batch_size: 32
33 |   return_k: 5
34 |   score_thres: 0.5
35 | 
36 | BigModel:
37 |   model_type: cls  # support(cls / ocr_rec)
38 |   config: "deploy/configs/ppcv/big_model_classification.yaml"
39 |   batch_size: 8
40 |   thresh: 0.1
41 |   final_label: "high_socre_label.txt"
42 | 


--------------------------------------------------------------------------------
/deploy/configs/dataaug_ocr_img2img.yaml:
--------------------------------------------------------------------------------
 1 | DataGen:
 2 |   ops:
 3 |     - randaugment
 4 |     - random_erasing
 5 |     - gridmask
 6 |     - tia_distort
 7 |     - tia_stretch
 8 |     - tia_perspective
 9 |   ori_data_dir: "demo/ocr_data"
10 |   label_file: "demo/ocr_data/train_list.txt"
11 |   gen_label: &gen_label labels/all_label.txt
12 |   model_type: &model_type ocr_rec
13 |   delimiter: &delimiter "\t"
14 |   img_save_folder: &img_save_folder "test"
15 |   gen_ratio: 0
16 |   gen_num: 5
17 |   size: 224
18 | 
19 | FeatureExtract:
20 |   config: "deploy/configs/ppcv/feature_extract.yaml"
21 |   thresh: 0.9
22 |   file_out: "tmp/rm_repeat.txt"
23 | 
24 |   # indexing engine config
25 | IndexProcess:
26 |   index_method: "HNSW32" # supported: HNSW32, IVF, Flat
27 |   image_root: *img_save_folder
28 |   index_dir: "./augdata/all_aug"
29 |   all_label_file:  *gen_label
30 |   index_operation: "new" # suported: "append", "remove", "new"
31 |   delimiter: *delimiter
32 |   dist_type: "IP"
33 |   embedding_size: 512
34 |   batch_size: 32
35 |   return_k: 5
36 |   score_thres: 0.5
37 | 
38 | BigModel:
39 |   model_type: *model_type  # support(cls / ocr_rec)
40 |   config: "deploy/configs/ppcv/text_recognition.yaml"
41 |   batch_size: 8
42 |   thresh: 0.1
43 |   final_label: "high_socre_label.txt"
44 | 


--------------------------------------------------------------------------------
/deploy/configs/dataaug_ocr_text2img.yaml:
--------------------------------------------------------------------------------
 1 | DataGen:
 2 |   mode: text2img
 3 |   config: "deploy/configs/dataaug/ocr_rec_gen.yaml"
 4 |   gen_num: 5
 5 |   out_dir: "test"
 6 |   bg_num_per_word: 5
 7 |   threads: 1
 8 |   bg_img_dir: "demo/ocr_rec/bg/"
 9 |   font_dir: "demo/ocr_rec/font"
10 |   corpus_file: "demo/ocr_rec/corpus.txt"
11 |   img_save_folder: &img_save_folder output_img
12 |   gen_label: &gen_label output_img/all_label.txt
13 |   delimiter: &delimiter "\t"
14 | 
15 | FeatureExtract:
16 |   config: "deploy/configs/ppcv/feature_extract.yaml"
17 |   thresh: 0.9
18 |   file_out: "tmp/rm_repeat.txt"
19 | 
20 |   # indexing engine config
21 | IndexProcess:
22 |   index_method: "HNSW32" # supported: HNSW32, IVF, Flat
23 |   image_root: *img_save_folder
24 |   index_dir: "./augdata/all_aug"
25 |   all_label_file:  *gen_label
26 |   index_operation: "new" # suported: "append", "remove", "new"
27 |   delimiter: *delimiter
28 |   dist_type: "IP"
29 |   embedding_size: 512
30 |   batch_size: 32
31 |   return_k: 5
32 |   score_thres: 0.5
33 | 
34 | BigModel:
35 |   model_type: ocr_rec  # support(cls / ocr_rec)
36 |   config: "deploy/configs/ppcv/text_recognition.yaml"
37 |   batch_size: 8
38 |   thresh: 0.1
39 |   final_label: "high_socre_label.txt"
40 | 


--------------------------------------------------------------------------------
/deploy/configs/dataaug_shitu.yaml:
--------------------------------------------------------------------------------
 1 | DataGen:
 2 |   ops:
 3 |     - randaugment
 4 |     - random_erasing
 5 |     - gridmask
 6 |     - tia_distort
 7 |     - tia_stretch
 8 |     - tia_perspective
 9 |   ori_data_dir: "demo/shitu_data"
10 |   label_file: "demo/shitu_data/train_list.txt"
11 |   gen_label: &gen_label "labels/test.txt"
12 |   img_save_folder: &img_save_folder "test"
13 |   gen_ratio: 0
14 |   gen_num: 5
15 |   size: 224
16 | 
17 | FeatureExtract:
18 |   config: "deploy/configs/ppcv/feature_extract.yaml"
19 |   thresh: 0.9
20 |   file_out: "tmp/rm_repeat.txt"
21 | 
22 |   # indexing engine config
23 | IndexProcess:
24 |   index_method: "HNSW32" # supported: HNSW32, IVF, Flat
25 |   image_root: *img_save_folder
26 |   index_dir: "./augdata/all_aug"
27 |   all_label_file:  *gen_label
28 |   index_operation: "new" # suported: "append", "remove", "new"
29 |   delimiter: " "
30 |   dist_type: "IP"
31 |   embedding_size: 512
32 |   batch_size: 32
33 |   return_k: 5
34 |   score_thres: 0.5
35 | 


--------------------------------------------------------------------------------
/deploy/configs/dataclean.yaml:
--------------------------------------------------------------------------------
1 | DataClean:
2 | ImageOrientation: "deploy/configs/ppcv/image_orientation.yaml"
3 | ClarityAssessment: "deploy/configs/ppcv/clarity_assessment.yaml"
4 | CodeExists: "deploy/configs/ppcv/code_exists.yaml"
5 | 


--------------------------------------------------------------------------------
/deploy/configs/ppcv/big_model_classification.yaml:
--------------------------------------------------------------------------------
 1 | ENV:
 2 |   run_mode: paddle
 3 |   device: GPU
 4 |   min_subgraph_size: 3
 5 |   shape_info_filename: ./
 6 |   trt_calib_mode: False
 7 |   cpu_threads: 1
 8 |   trt_use_static: False
 9 |   return_res: True
10 |   print_res: False
11 | 
12 | MODEL:
13 |   - ClassificationOp:
14 |       name: cls
15 |       param_path: paddlecv://models/PPLCNet_x1_0_infer/inference.pdiparams
16 |       model_path: paddlecv://models/PPLCNet_x1_0_infer/inference.pdmodel
17 |       batch_size: 8
18 |       last_ops:
19 |         - input
20 |       PreProcess:
21 |         - ResizeImage:
22 |             resize_short: 256
23 |         - CropImage:
24 |             size: 224
25 |         - NormalizeImage:
26 |             scale: 0.00392157
27 |             mean: [0.485, 0.456, 0.406]
28 |             std: [0.229, 0.224, 0.225]
29 |             order: ''
30 |             channel_num: 3
31 |         - ToCHWImage:
32 |         - ExpandDim:
33 |             axis: 0
34 |       PostProcess:
35 |         - Topk:
36 |             topk: 1
37 |             class_id_map_file: "paddlecv://dict/classification/imagenet1k_label_list.txt"
38 |       Inputs:
39 |         - image
40 | 
41 |   - ClasOutput:
42 |       name: vis
43 |       last_ops:
44 |         - input
45 |         - cls
46 |       Inputs:
47 |         - fn
48 |         - image
49 |         - class_ids
50 |         - scores
51 |         - label_names
52 | 


--------------------------------------------------------------------------------
/deploy/configs/ppcv/clarity_assessment.yaml:
--------------------------------------------------------------------------------
 1 | ENV:
 2 |   output_dir: output
 3 |   run_mode: paddle
 4 |   device: GPU
 5 |   min_subgraph_size: 3
 6 |   shape_info_filename: ./
 7 |   trt_calib_mode: False
 8 |   cpu_threads: 1
 9 |   trt_use_static: False
10 |   save_output: True
11 |   return_res: True
12 |   print_res: True
13 | 
14 | MODEL:
15 |   - ClassificationOp:
16 |       name: cls
17 |       param_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/clarity_assessment_infer/inference.pdiparams
18 |       model_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/clarity_assessment_infer/inference.pdmodel
19 |       batch_size: 8
20 |       last_ops:
21 |         - input
22 |       PreProcess:
23 |         - ResizeImage:
24 |             resize_short: 256
25 |         - CropImage:
26 |             size: 224
27 |         - NormalizeImage:
28 |             scale: 0.00392157
29 |             mean: [0.485, 0.456, 0.406]
30 |             std: [0.229, 0.224, 0.225]
31 |             order: ''
32 |             channel_num: 3
33 |         - ToCHWImage:
34 |         - ExpandDim:
35 |             axis: 0
36 |       PostProcess:
37 |         - Topk:
38 |             topk: 1
39 |             class_id_map_file: deploy/utils/label_map/clarity_assessment_label_list.txt
40 |       Inputs:
41 |         - image
42 | 
43 |   - ClasOutput:
44 |       name: vis
45 |       last_ops:
46 |         - input
47 |         - cls
48 |       Inputs:
49 |         - fn
50 |         - image
51 |         - class_ids
52 |         - scores
53 |         - label_names
54 | 


--------------------------------------------------------------------------------
/deploy/configs/ppcv/code_exists.yaml:
--------------------------------------------------------------------------------
 1 | ENV:
 2 |   output_dir: output
 3 |   run_mode: paddle
 4 |   device: GPU
 5 |   min_subgraph_size: 3
 6 |   shape_info_filename: ./
 7 |   trt_calib_mode: False
 8 |   cpu_threads: 1
 9 |   trt_use_static: False
10 |   save_output: True
11 |   return_res: True
12 |   print_res: True
13 | 
14 | MODEL:
15 |   - ClassificationOp:
16 |       name: cls
17 |       param_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/code_exists_infer/inference.pdiparams
18 |       model_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/code_exists_infer/inference.pdmodel
19 |       batch_size: 8
20 |       last_ops:
21 |         - input
22 |       PreProcess:
23 |         - ResizeImage:
24 |             resize_short: 256
25 |         - CropImage:
26 |             size: 224
27 |         - NormalizeImage:
28 |             scale: 0.00392157
29 |             mean: [0.485, 0.456, 0.406]
30 |             std: [0.229, 0.224, 0.225]
31 |             order: ''
32 |             channel_num: 3
33 |         - ToCHWImage:
34 |         - ExpandDim:
35 |             axis: 0
36 |       PostProcess:
37 |         - Topk:
38 |             topk: 1
39 |             class_id_map_file: deploy/utils/label_map/code_exists_label_list.txt
40 |       Inputs:
41 |         - image
42 | 
43 |   - ClasOutput:
44 |       name: vis
45 |       last_ops:
46 |         - input
47 |         - cls
48 |       Inputs:
49 |         - fn
50 |         - image
51 |         - class_ids
52 |         - scores
53 |         - label_names
54 | 


--------------------------------------------------------------------------------
/deploy/configs/ppcv/feature_extract.yaml:
--------------------------------------------------------------------------------
 1 | ENV:
 2 |   run_mode: paddle
 3 |   device: GPU
 4 |   min_subgraph_size: 3
 5 |   shape_info_filename: ./
 6 |   trt_calib_mode: False
 7 |   cpu_threads: 1
 8 |   trt_use_static: False
 9 |   return_res: True
10 |   print_res: False
11 | 
12 | MODEL:
13 |   - FeatureExtractionOp:
14 |       name: feature
15 |       param_path: paddlecv://models/general_PPLCNet_x2_5_lite_v1.0_infer/inference.pdiparams
16 |       model_path: paddlecv://models/general_PPLCNet_x2_5_lite_v1.0_infer/inference.pdmodel
17 |       batch_size: 1
18 |       last_ops:
19 |         - input
20 |       PreProcess:
21 |         - ResizeImage:
22 |             size: [224, 224]
23 |             return_numpy: False
24 |             interpolation: bilinear
25 |             backend: cv2
26 |         - NormalizeImage:
27 |             scale: 1.0/255.0
28 |             mean: [0.485, 0.456, 0.406]
29 |             std: [0.229, 0.224, 0.225]
30 |             order: hwc
31 |         - ToCHWImage:
32 |         - ExpandDim:
33 |             axis: 0
34 |       PostProcess:
35 |         - NormalizeFeature:
36 |             normalize: True
37 |       Inputs:
38 |         - image
39 | 
40 |   - FeatureOutput:
41 |       name: save
42 |       last_ops:
43 |         - input
44 |         - feature
45 |       Inputs:
46 |         - fn
47 |         - feature
48 | 


--------------------------------------------------------------------------------
/deploy/configs/ppcv/image_orientation.yaml:
--------------------------------------------------------------------------------
 1 | ENV:
 2 |   output_dir: output
 3 |   run_mode: paddle
 4 |   device: GPU
 5 |   min_subgraph_size: 3
 6 |   shape_info_filename: ./
 7 |   trt_calib_mode: False
 8 |   cpu_threads: 1
 9 |   trt_use_static: False
10 |   save_output: True
11 |   return_res: True
12 |   print_res: True
13 | 
14 | MODEL:
15 |   - ClassificationOp:
16 |       name: cls
17 |       param_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/image_orientation_infer/inference.pdiparams
18 |       model_path: https://paddleclas.bj.bcebos.com/models/PULC/inference/image_orientation_infer/inference.pdmodel
19 |       batch_size: 8
20 |       last_ops:
21 |         - input
22 |       PreProcess:
23 |         - ResizeImage:
24 |             resize_short: 256
25 |         - CropImage:
26 |             size: 224
27 |         - NormalizeImage:
28 |             scale: 0.00392157
29 |             mean: [0.485, 0.456, 0.406]
30 |             std: [0.229, 0.224, 0.225]
31 |             order: ''
32 |             channel_num: 3
33 |         - ToCHWImage:
34 |         - ExpandDim:
35 |             axis: 0
36 |       PostProcess:
37 |         - ThreshOutput:
38 |             threshold: 0
39 |             class_id_map_file: deploy/utils/label_map/image_orientation_label_list.txt
40 |       Inputs:
41 |         - image
42 | 
43 |   - ClasOutput:
44 |       name: vis
45 |       last_ops:
46 |         - input
47 |         - cls
48 |       Inputs:
49 |         - fn
50 |         - image
51 |         - class_ids
52 |         - scores
53 |         - label_names
54 | 


--------------------------------------------------------------------------------
/deploy/configs/ppcv/text_recognition.yaml:
--------------------------------------------------------------------------------
 1 | ENV:
 2 |   run_mode: paddle
 3 |   device: GPU
 4 |   min_subgraph_size: 3
 5 |   shape_info_filename: ./
 6 |   trt_calib_mode: False
 7 |   cpu_threads: 1
 8 |   trt_use_static: False
 9 |   save_img: False
10 |   save_res: False
11 |   return_res: true
12 | 
13 | 
14 | MODEL:
15 |   - OcrCrnnRecOp:
16 |       name: rec
17 |       param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams
18 |       model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel
19 |       batch_size: 6
20 |       last_ops:
21 |         - input
22 |       PreProcess:
23 |         - ReisizeNormImg:
24 |             rec_image_shape: [3, 48, 320]
25 |       PostProcess:
26 |         - CTCLabelDecode:
27 |             character_dict_path: "paddlecv://dict/ocr/ch_dict.txt"
28 |             use_space_char: true
29 |       Inputs:
30 |         - image
31 | 
32 |   - OCRRecOutput:
33 |       name: vis
34 |       last_ops:
35 |         - input
36 |         - rec
37 |       Inputs:
38 |         - fn
39 |         - image
40 |         - rec_text
41 |         - rec_score
42 | 


--------------------------------------------------------------------------------
/deploy/ppcv/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import (core, engine, ops, utils, model_zoo)
16 | 


--------------------------------------------------------------------------------
/deploy/ppcv/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import workspace
16 | from .workspace import *
17 | 
18 | __all__ = workspace.__all__
19 | 


--------------------------------------------------------------------------------
/deploy/ppcv/core/workspace.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | __all__ = ['register', 'create']
18 | 
19 | global_config = dict()
20 | 
21 | 
22 | def register(cls):
23 |     """
24 |     Register a given module class.
25 |     Args:
26 |         cls (type): Module class to be registered.
27 |     Returns: cls
28 |     """
29 |     if cls.__name__ in global_config:
30 |         raise ValueError("Module class already registered: {}".format(
31 |             cls.__name__))
32 |     global_config[cls.__name__] = cls
33 |     return cls
34 | 
35 | 
36 | def create(cls_name, op_cfg, env_cfg):
37 |     """
38 |     Create an instance of given module class.
39 | 
40 |     Args:
41 |         cls_name(str): Class of which to create instnce.
42 | 
43 |     Return: instance of type `cls_or_name`
44 |     """
45 |     assert type(cls_name) == str, "should be a name of class"
46 |     if cls_name not in global_config:
47 |         raise ValueError("The module {} is not registered".format(cls_name))
48 | 
49 |     cls = global_config[cls_name]
50 |     return cls(op_cfg, env_cfg)
51 | 
52 | 
53 | def get_global_op():
54 |     return global_config
55 | 


--------------------------------------------------------------------------------
/deploy/ppcv/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import pipeline
16 | from .pipeline import *
17 | 
18 | __all__ = pipeline.__all__
19 | 


--------------------------------------------------------------------------------
/deploy/ppcv/model_zoo/MODEL_ZOO:
--------------------------------------------------------------------------------
1 | classification/PPLCNet_x1_0_infer
2 | 


--------------------------------------------------------------------------------
/deploy/ppcv/model_zoo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import model_zoo
16 | from .model_zoo import *
17 | 
18 | __all__ = model_zoo.__all__
19 | 


--------------------------------------------------------------------------------
/deploy/ppcv/model_zoo/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os.path as osp
16 | import pkg_resources
17 | 
18 | try:
19 |     from collections.abc import Sequence
20 | except:
21 |     from collections import Sequence
22 | 
23 | from ppcv.utils.download import get_config_path, get_model_path
24 | from ppcv.utils.logger import setup_logger
25 | logger = setup_logger(__name__)
26 | 
27 | __all__ = [
28 |     'list_model', 'get_config_file', 'get_model_file', 'MODEL_ZOO_FILENAME'
29 | ]
30 | 
31 | MODEL_ZOO_FILENAME = 'MODEL_ZOO'
32 | TASK_DICT = {
33 |     # general config
34 |     'classification': 'paddlecv://configs/test_classification.yml',
35 |     'detection': 'paddlecv://configs/test_detection.yml',
36 |     'keypoint': 'paddlecv://configs/test_keypoint.yml',
37 |     # single model
38 |     # TODO
39 |     # system
40 |     'PP-OCRv3': 'paddlecv://configs/system/PP-OCRv3.yml',
41 | }
42 | 
43 | 
44 | def list_model(filters=[]):
45 |     model_zoo_file = pkg_resources.resource_filename('ppcv.model_zoo',
46 |                                                      MODEL_ZOO_FILENAME)
47 |     with open(model_zoo_file) as f:
48 |         model_names = f.read().splitlines()
49 | 
50 |     # filter model_name
51 |     def filt(name):
52 |         for f in filters:
53 |             if name.find(f) < 0:
54 |                 return False
55 |         return True
56 | 
57 |     if isinstance(filters, str) or not isinstance(filters, Sequence):
58 |         filters = [filters]
59 |     model_names = [name for name in model_names if filt(name)]
60 |     if len(model_names) == 0 and len(filters) > 0:
61 |         raise ValueError("no model found, please check filters seeting, "
62 |                          "filters can be set as following kinds:\n"
63 |                          "\tTask: classification, detection ...\n"
64 |                          "\tArchitecture: PPLCNet, PPYOLOE ...\n")
65 | 
66 |     model_str = "Available Models:\n"
67 |     for model_name in model_names:
68 |         model_str += "\t{}\n".format(model_name)
69 |     logger.info(model_str)
70 | 
71 | 
72 | # models and configs save on bcebos under dygraph directory
73 | def get_config_file(task):
74 |     """Get config path from task.
75 |     """
76 |     if task not in TASK_DICT:
77 |         tasks = TASK_DICT.keys()
78 |         logger.error("Illegal task: {}, please use one of {}".format(task,
79 |                                                                      tasks))
80 |     path = TASK_DICT[task]
81 |     return get_config_path(path)
82 | 
83 | 
84 | def get_model_file(path):
85 |     return get_model_path(path)
86 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import models
16 | from . import output
17 | from . import connector
18 | 
19 | from .models import *
20 | from .output import *
21 | from .connector import *
22 | 
23 | __all__ = models.__all__ + output.__all__ + connector.__all__
24 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/base.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
  2 | #   
  3 | # Licensed under the Apache License, Version 2.0 (the "License");   
  4 | # you may not use this file except in compliance with the License.  
  5 | # You may obtain a copy of the License at   
  6 | #   
  7 | #     http://www.apache.org/licenses/LICENSE-2.0    
  8 | #   
  9 | # Unless required by applicable law or agreed to in writing, software   
 10 | # distributed under the License is distributed on an "AS IS" BASIS, 
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 12 | # See the License for the specific language governing permissions and   
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import importlib
 17 | import math
 18 | import numpy as np
 19 | try:
 20 |     from collections.abc import Sequence
 21 | except Exception:
 22 |     from collections import Sequence
 23 | 
 24 | import paddle
 25 | from paddle.inference import Config
 26 | from paddle.inference import create_predictor
 27 | 
 28 | from ppcv.ops.predictor import PaddlePredictor
 29 | from ppcv.utils.download import get_model_path
 30 | 
 31 | __all__ = ["BaseOp", ]
 32 | 
 33 | 
 34 | def create_operators(params, mod):
 35 |     """
 36 |     create operators based on the config
 37 | 
 38 |     Args:
 39 |         params(list): a dict list, used to create some operators
 40 |         mod(module) : a module that can import single ops
 41 |     """
 42 |     assert isinstance(params, list), ('operator config should be a list')
 43 |     if mod is None:
 44 |         mod = importlib.import_module(__name__)
 45 |     ops = []
 46 |     for operator in params:
 47 |         assert isinstance(operator,
 48 |                           dict) and len(operator) == 1, "yaml format error"
 49 |         op_name = list(operator)[0]
 50 |         param = {} if operator[op_name] is None else operator[op_name]
 51 |         op = getattr(mod, op_name)(**param)
 52 |         ops.append(op)
 53 | 
 54 |     return ops
 55 | 
 56 | 
 57 | class BaseOp(object):
 58 |     """
 59 |     Base Operator, implement of prediction process
 60 |     Args
 61 |     """
 62 | 
 63 |     def __init__(self, model_cfg, env_cfg):
 64 |         self.model_cfg = model_cfg
 65 |         self.env_cfg = env_cfg
 66 |         self.input_keys = model_cfg["Inputs"]
 67 | 
 68 |     @classmethod
 69 |     def type(self):
 70 |         raise NotImplementedError
 71 | 
 72 |     @classmethod
 73 |     def get_output_keys(cls):
 74 |         raise NotImplementedError
 75 | 
 76 |     def get_input_keys(self):
 77 |         return self.input_keys
 78 | 
 79 |     def filter_input(self, last_outputs, input_name):
 80 |         f_inputs = [{k.split(".")[-1]: last[k]
 81 |                      for k in input_name} for last in last_outputs]
 82 |         return f_inputs
 83 | 
 84 |     def check_output(self, output, name):
 85 |         if not isinstance(output, Sequence):
 86 |             raise ValueError('The output of op: {} must be Sequence').format(
 87 |                 name)
 88 |         output = output[0]
 89 |         if not isinstance(output, dict):
 90 |             raise ValueError(
 91 |                 'The element of output in op: {} must be dict').format(name)
 92 |         out_keys = list(output.keys())
 93 |         for out, define in zip(out_keys, self.output_keys):
 94 |             if out != define:
 95 |                 raise ValueError(
 96 |                     'The output key in op: {} is inconsistent, expect {}, but received {}'.
 97 |                     format(name, define, out))
 98 | 
 99 |     def set_frame(self, frame_id):
100 |         self.frame_id = frame_id
101 | 
102 |     def __call__(self, image_list):
103 |         raise NotImplementedError
104 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/connector/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from .op_connector import *
16 | 
17 | __all__ = op_connector.__all__
18 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/connector/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | from ppcv.core.workspace import register
19 | from ppcv.ops.base import BaseOp
20 | 
21 | 
22 | @register
23 | class ConnectorBaseOp(BaseOp):
24 |     def __init__(self, model_cfg, env_cfg=None):
25 |         super(ConnectorBaseOp, self).__init__(model_cfg, env_cfg)
26 |         self.name = model_cfg["name"]
27 |         keys = self.get_output_keys()
28 |         self.output_keys = [self.name + '.' + key for key in keys]
29 | 
30 |     @classmethod
31 |     def type(self):
32 |         return 'CONNECTOR'
33 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/general_data_obj.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import numby as np
16 | 
17 | 
18 | class GeneralDataObj(object):
19 |     def __init__(self, data):
20 |         assert isinstance(data, (dict, ))
21 |         self.data_dict = data
22 |         pass
23 | 
24 |     def get(self, key):
25 |         """
26 |         key can be one of [list, tuple, str]
27 |         """
28 |         if isinstance(key, (list, tuple)):
29 |             return [self.data_dict[k] for k in key]
30 |         elif isinstance(key, (str)):
31 |             return self.data_dict[key]
32 |         else:
33 |             assert False, f"key({key}) type must be in on of [list, tuple, str] but got {type(key)}"
34 | 
35 |     def set(self, key, value):
36 |         """
37 |         key: str
38 |         value: an object
39 |         """
40 |         self.data_dict[key] = value
41 | 
42 |     def keys(self, ):
43 |         """
44 |         get all keys of the data
45 |         """
46 |         return list(self.data_dict.keys())
47 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import classification
16 | from . import detection
17 | from . import ocr_db_detection
18 | from . import ocr_crnn_recognition
19 | 
20 | from .classification import *
21 | from .feature_extraction import *
22 | from .detection import *
23 | from .keypoint import *
24 | from .ocr_db_detection import *
25 | from .ocr_crnn_recognition import *
26 | 
27 | __all__ = classification.__all__ + detection.__all__ + keypoint.__all__
28 | __all__ += ocr_db_detection.__all__
29 | __all__ += ocr_crnn_recognition.__all__
30 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os
16 | import importlib
17 | import math
18 | import numpy as np
19 | import paddle
20 | from paddle.inference import Config
21 | from paddle.inference import create_predictor
22 | 
23 | from ppcv.ops.base import BaseOp
24 | from ppcv.ops.predictor import PaddlePredictor
25 | from ppcv.utils.download import get_model_path
26 | 
27 | 
28 | class ModelBaseOp(BaseOp):
29 |     """
30 |     Base Operator, implement of prediction process
31 |     Args
32 |     """
33 | 
34 |     def __init__(self, model_cfg, env_cfg):
35 |         super(ModelBaseOp, self).__init__(model_cfg, env_cfg)
36 |         param_path = get_model_path(model_cfg['param_path'])
37 |         model_path = get_model_path(model_cfg['model_path'])
38 |         env_cfg["batch_size"] = model_cfg.get("batch_size", 1)
39 |         self.batch_size = env_cfg["batch_size"]
40 |         self.name = model_cfg["name"]
41 |         self.frame = -1
42 |         self.predictor = PaddlePredictor(param_path, model_path, env_cfg)
43 | 
44 |         keys = self.get_output_keys()
45 |         self.output_keys = [self.name + '.' + key for key in keys]
46 | 
47 |     @classmethod
48 |     def type(self):
49 |         return 'MODEL'
50 | 
51 |     def preprocess(self, inputs):
52 |         raise NotImplementedError
53 | 
54 |     def postprocess(self, inputs):
55 |         raise NotImplementedError
56 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from functools import reduce
16 | import os
17 | import numpy as np
18 | import math
19 | import paddle
20 | 
21 | import importlib
22 | 
23 | from .inference import ClassificationOp
24 | 
25 | __all__ = ['ClassificationOp']
26 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/classification/inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
  2 | #   
  3 | # Licensed under the Apache License, Version 2.0 (the "License");   
  4 | # you may not use this file except in compliance with the License.  
  5 | # You may obtain a copy of the License at   
  6 | #   
  7 | #     http://www.apache.org/licenses/LICENSE-2.0    
  8 | #   
  9 | # Unless required by applicable law or agreed to in writing, software   
 10 | # distributed under the License is distributed on an "AS IS" BASIS, 
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 12 | # See the License for the specific language governing permissions and   
 13 | # limitations under the License.
 14 | 
 15 | import importlib
 16 | from functools import reduce
 17 | import os
 18 | import numpy as np
 19 | import math
 20 | import paddle
 21 | from ..base import ModelBaseOp
 22 | 
 23 | from ppcv.ops.base import create_operators
 24 | from ppcv.core.workspace import register
 25 | 
 26 | from .preprocess import *
 27 | from .postprocess import *
 28 | 
 29 | 
 30 | @register
 31 | class ClassificationOp(ModelBaseOp):
 32 |     def __init__(self, model_cfg, env_cfg):
 33 |         super(ClassificationOp, self).__init__(model_cfg, env_cfg)
 34 |         mod = importlib.import_module(__name__)
 35 |         self.preprocessor = create_operators(model_cfg["PreProcess"], mod)
 36 |         self.postprocessor = create_operators(model_cfg["PostProcess"], mod)
 37 | 
 38 |     @classmethod
 39 |     def get_output_keys(cls):
 40 |         return ["class_ids", "scores", "label_names"]
 41 | 
 42 |     def preprocess(self, inputs):
 43 |         outputs = inputs
 44 |         for ops in self.preprocessor:
 45 |             outputs = ops(outputs)
 46 |         return outputs
 47 | 
 48 |     def postprocess(self, inputs, result):
 49 |         outputs = result
 50 |         for idx, ops in enumerate(self.postprocessor):
 51 |             if idx == len(self.postprocessor) - 1:
 52 |                 outputs = ops(outputs, self.output_keys)
 53 |             else:
 54 |                 outputs = ops(outputs)
 55 |         return outputs
 56 | 
 57 |     def infer(self, image_list):
 58 |         inputs = []
 59 |         batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
 60 |         results = []
 61 |         for i in range(batch_loop_cnt):
 62 |             start_index = i * self.batch_size
 63 |             end_index = min((i + 1) * self.batch_size, len(image_list))
 64 |             batch_image_list = image_list[start_index:end_index]
 65 |             # preprocess
 66 |             inputs = [self.preprocess(img) for img in batch_image_list]
 67 |             inputs = np.concatenate(inputs, axis=0)
 68 |             # model inference
 69 |             result = self.predictor.run(inputs)[0]
 70 |             # postprocess
 71 |             result = self.postprocess(inputs, result)
 72 |             results.extend(result)
 73 |         # results = self.merge_batch_result(results)
 74 |         return results
 75 | 
 76 |     def __call__(self, inputs):
 77 |         """
 78 |         step1: parser inputs
 79 |         step2: run
 80 |         step3: merge results
 81 |         input: a list of dict
 82 |         """
 83 |         key = self.input_keys[0]
 84 |         is_list = False
 85 |         if isinstance(inputs[0][key], (list, tuple)):
 86 |             inputs = [input[key] for input in inputs]
 87 |             is_list = True
 88 |         else:
 89 |             inputs = [[input[key]] for input in inputs]
 90 |         sub_index_list = [len(input) for input in inputs]
 91 |         inputs = reduce(lambda x, y: x.extend(y) or x, inputs)
 92 | 
 93 |         # step2: run
 94 |         outputs = self.infer(inputs)
 95 | 
 96 |         # step3: merge
 97 |         curr_offsef_id = 0
 98 |         pipe_outputs = []
 99 |         for idx in range(len(sub_index_list)):
100 |             sub_start_idx = curr_offsef_id
101 |             sub_end_idx = curr_offsef_id + sub_index_list[idx]
102 |             output = outputs[sub_start_idx:sub_end_idx]
103 |             output = {k: [o[k] for o in output] for k in output[0]}
104 |             if is_list is not True:
105 |                 output = {k: output[k][0] for k in output}
106 |             pipe_outputs.append(output)
107 | 
108 |             curr_offsef_id = sub_end_idx
109 |         return pipe_outputs
110 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/classification/postprocess.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import numpy as np
 17 | 
 18 | 
 19 | def parse_class_id_map(class_id_map_file):
 20 |     if class_id_map_file is None:
 21 |         return None
 22 | 
 23 |     if not os.path.exists(class_id_map_file):
 24 |         print(
 25 |             "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!"
 26 |         )
 27 |         return None
 28 | 
 29 |     try:
 30 |         class_id_map = {}
 31 |         with open(class_id_map_file, "r", encoding='utf-8') as fin:
 32 |             lines = fin.readlines()
 33 |             for line in lines:
 34 |                 partition = line.split("\n")[0].partition(" ")
 35 |                 class_id_map[int(partition[0])] = str(partition[-1])
 36 |     except Exception as ex:
 37 |         print(ex)
 38 |         class_id_map = None
 39 |     return class_id_map
 40 | 
 41 | 
 42 | class Topk(object):
 43 | 
 44 |     def __init__(self, topk=1, class_id_map_file=None):
 45 |         assert isinstance(topk, (int, ))
 46 |         self.class_id_map = parse_class_id_map(class_id_map_file)
 47 |         self.topk = topk
 48 | 
 49 |     def __call__(self, x, output_keys):
 50 |         y = []
 51 |         for idx, probs in enumerate(x):
 52 |             index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32")
 53 |             clas_id_list = []
 54 |             score_list = []
 55 |             label_name_list = []
 56 |             for i in index:
 57 |                 clas_id_list.append(i.item())
 58 |                 score_list.append(probs[i].item())
 59 |                 if self.class_id_map is not None:
 60 |                     label_name_list.append(self.class_id_map[i.item()])
 61 | 
 62 |             label_name = label_name_list
 63 | 
 64 |             result = {
 65 |                 output_keys[0]: clas_id_list,
 66 |                 output_keys[1]: np.around(score_list, decimals=5).tolist(),
 67 |                 output_keys[2]: label_name
 68 |             }
 69 |             y.append(result)
 70 |         return y
 71 | 
 72 | 
 73 | class ThreshOutput(object):
 74 | 
 75 |     def __init__(self,
 76 |                  threshold,
 77 |                  default_label_index=0,
 78 |                  class_id_map_file=None):
 79 |         self.threshold = threshold
 80 |         self.default_label_index = default_label_index
 81 |         self.class_id_map = parse_class_id_map(class_id_map_file)
 82 | 
 83 |     def __call__(self, x, output_keys):
 84 |         y = []
 85 |         for idx, probs in enumerate(x):
 86 |             index = probs.argsort(axis=0)[::-1].astype("int32")
 87 |             top1_id = index[0]
 88 |             top1_score = probs[top1_id]
 89 | 
 90 |             if top1_score > self.threshold:
 91 |                 rtn_id = top1_id
 92 |             else:
 93 |                 rtn_id = self.default_label_index
 94 | 
 95 |             label_name = self.class_id_map[
 96 |                 rtn_id] if self.class_id_map is not None else ""
 97 | 
 98 |             result = {
 99 |                 output_keys[0]: rtn_id,
100 |                 output_keys[1]: probs[rtn_id],
101 |                 output_keys[2]: label_name
102 |             }
103 |             y.append(result)
104 |         return y
105 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/detection/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from functools import reduce
16 | import os
17 | import numpy as np
18 | import math
19 | import paddle
20 | 
21 | import importlib
22 | 
23 | from .inference import DetectionOp
24 | 
25 | __all__ = ['DetectionOp']
26 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/detection/postprocess.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | 
19 | class ParserDetResults(object):
20 |     def __init__(self, label_list, threshold=0.5, max_det_results=100):
21 |         self.threshold = threshold
22 |         self.max_det_results = max_det_results
23 |         self.clsid2catid, self.catid2name = self.get_categories(label_list)
24 | 
25 |     def get_categories(self, label_list):
26 |         if label_list.endswith('json'):
27 |             # lazy import pycocotools here
28 |             from pycocotools.coco import COCO
29 |             coco = COCO(label_list)
30 |             cats = coco.loadCats(coco.getCatIds())
31 |             clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
32 |             catid2name = {cat['id']: cat['name'] for cat in cats}
33 |         elif anno_file.endswith('txt'):
34 |             cats = []
35 |             with open(anno_file) as f:
36 |                 for line in f.readlines():
37 |                     cats.append(line.strip())
38 |             if cats[0] == 'background': cats = cats[1:]
39 | 
40 |             clsid2catid = {i: i for i in range(len(cats))}
41 |             catid2name = {i: name for i, name in enumerate(cats)}
42 | 
43 |         else:
44 |             raise ValueError("label_list {} should be json or txt.".format(
45 |                 anno_file))
46 |         return clsid2catid, catid2name
47 | 
48 |     def __call__(self, preds, bbox_num, output_keys):
49 |         start_id = 0
50 |         dt_bboxes = []
51 |         scores = []
52 |         class_ids = []
53 |         cls_names = []
54 |         new_bbox_num = []
55 | 
56 |         for num in bbox_num:
57 |             end_id = start_id + num
58 |             pred = preds[start_id:end_id]
59 |             start_id = end_id
60 |             max_det_results = min(self.max_det_results, pred.shape[-1])
61 |             keep_indexes = pred[:, 1].argsort()[::-1][:max_det_results]
62 | 
63 |             select_num = 0
64 |             for idx in keep_indexes:
65 |                 single_res = pred[idx].tolist()
66 |                 class_id = int(single_res[0])
67 |                 score = single_res[1]
68 |                 bbox = single_res[2:]
69 |                 if score < self.threshold:
70 |                     continue
71 |                 if class_id == -1:
72 |                     continue
73 |                 select_num += 1
74 |                 dt_bboxes.append(bbox)
75 |                 scores.append(score)
76 |                 class_ids.append(class_id)
77 |                 cls_names.append(self.catid2name[self.clsid2catid[class_id]])
78 |             new_bbox_num.append(select_num)
79 |         result = {
80 |             output_keys[0]: dt_bboxes,
81 |             output_keys[1]: scores,
82 |             output_keys[2]: class_ids,
83 |             output_keys[3]: cls_names,
84 |         }
85 |         new_bbox_num = np.array(new_bbox_num).astype('int32')
86 |         return result, new_bbox_num
87 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from functools import reduce
16 | import os
17 | import numpy as np
18 | import math
19 | import paddle
20 | 
21 | import importlib
22 | 
23 | from .inference import FeatureExtractionOp
24 | 
25 | __all__ = ['FeatureExtractionOp']
26 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/feature_extraction/inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
  2 | #   
  3 | # Licensed under the Apache License, Version 2.0 (the "License");   
  4 | # you may not use this file except in compliance with the License.  
  5 | # You may obtain a copy of the License at   
  6 | #   
  7 | #     http://www.apache.org/licenses/LICENSE-2.0    
  8 | #   
  9 | # Unless required by applicable law or agreed to in writing, software   
 10 | # distributed under the License is distributed on an "AS IS" BASIS, 
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 12 | # See the License for the specific language governing permissions and   
 13 | # limitations under the License.
 14 | 
 15 | import importlib
 16 | from functools import reduce
 17 | import os
 18 | import numpy as np
 19 | import math
 20 | import paddle
 21 | from ..base import ModelBaseOp
 22 | 
 23 | from ppcv.ops.base import create_operators
 24 | from ppcv.core.workspace import register
 25 | 
 26 | from .preprocess import *
 27 | from .postprocess import *
 28 | 
 29 | 
 30 | @register
 31 | class FeatureExtractionOp(ModelBaseOp):
 32 |     def __init__(self, model_cfg, env_cfg):
 33 |         super().__init__(model_cfg, env_cfg)
 34 |         mod = importlib.import_module(__name__)
 35 |         self.preprocessor = create_operators(model_cfg["PreProcess"], mod)
 36 |         self.postprocessor = create_operators(model_cfg["PostProcess"], mod)
 37 | 
 38 |     @classmethod
 39 |     def get_output_keys(cls):
 40 |         return ["feature"]
 41 | 
 42 |     def preprocess(self, inputs):
 43 |         outputs = inputs
 44 |         for ops in self.preprocessor:
 45 |             outputs = ops(outputs)
 46 |         return outputs
 47 | 
 48 |     def postprocess(self, inputs, result):
 49 |         outputs = result
 50 |         for idx, ops in enumerate(self.postprocessor):
 51 |             if idx == len(self.postprocessor) - 1:
 52 |                 outputs = ops(outputs, self.output_keys)
 53 |             else:
 54 |                 outputs = ops(outputs)
 55 |         return outputs
 56 | 
 57 |     def infer(self, image_list):
 58 |         inputs = []
 59 |         batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
 60 |         results = []
 61 |         for i in range(batch_loop_cnt):
 62 |             start_index = i * self.batch_size
 63 |             end_index = min((i + 1) * self.batch_size, len(image_list))
 64 |             batch_image_list = image_list[start_index:end_index]
 65 |             # preprocess
 66 |             inputs = [self.preprocess(img) for img in batch_image_list]
 67 |             inputs = np.concatenate(inputs, axis=0)
 68 |             # model inference
 69 |             result = self.predictor.run(inputs)[0]
 70 |             # postprocess
 71 |             result = self.postprocess(inputs, result)
 72 |             results.extend(result)
 73 |         # results = self.merge_batch_result(results)
 74 |         return results
 75 | 
 76 |     def __call__(self, inputs):
 77 |         """
 78 |         step1: parser inputs
 79 |         step2: run
 80 |         step3: merge results
 81 |         input: a list of dict
 82 |         """
 83 |         key = self.input_keys[0]
 84 |         is_list = False
 85 |         if isinstance(inputs[0][key], (list, tuple)):
 86 |             inputs = [input[key] for input in inputs]
 87 |             is_list = True
 88 |         else:
 89 |             inputs = [[input[key]] for input in inputs]
 90 |         sub_index_list = [len(input) for input in inputs]
 91 |         inputs = reduce(lambda x, y: x.extend(y) or x, inputs)
 92 | 
 93 |         # step2: run
 94 |         outputs = self.infer(inputs)
 95 | 
 96 |         # step3: merge
 97 |         curr_offsef_id = 0
 98 |         pipe_outputs = []
 99 |         for idx in range(len(sub_index_list)):
100 |             sub_start_idx = curr_offsef_id
101 |             sub_end_idx = curr_offsef_id + sub_index_list[idx]
102 |             output = outputs[sub_start_idx:sub_end_idx]
103 |             output = {k: [o[k] for o in output] for k in output[0]}
104 |             if is_list is not True:
105 |                 output = {k: output[k][0] for k in output}
106 |             pipe_outputs.append(output)
107 | 
108 |             curr_offsef_id = sub_end_idx
109 |         return pipe_outputs
110 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/feature_extraction/postprocess.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os
16 | import numpy as np
17 | 
18 | 
19 | class NormalizeFeature(object):
20 |     def __init__(self, normalize=True):
21 |         super().__init__()
22 |         self.normalize = normalize
23 | 
24 |     def __call__(self, x, output_keys):
25 |         if self.normalize:
26 |             feas_norm = np.sqrt(np.sum(np.square(x), axis=1, keepdims=True))
27 |             x = np.divide(x, feas_norm)
28 | 
29 |         y = []
30 |         for idx, feature in enumerate(x):
31 |             result = {output_keys[0]: feature}
32 |             y.append(result)
33 |         return y
34 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/feature_extraction/preprocess.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | 
17 | from ..classification.preprocess import *


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/keypoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from functools import reduce
16 | import os
17 | import numpy as np
18 | import math
19 | import paddle
20 | 
21 | import importlib
22 | 
23 | from .inference import KeypointOp
24 | 
25 | __all__ = ['KeypointOp']
26 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/ocr_crnn_recognition/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from functools import reduce
16 | import os
17 | import numpy as np
18 | import math
19 | import paddle
20 | 
21 | import importlib
22 | 
23 | from .inference import OcrCrnnRecOp
24 | 
25 | __all__ = ['OcrCrnnRecOp']
26 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/ocr_crnn_recognition/inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
  2 | #   
  3 | # Licensed under the Apache License, Version 2.0 (the "License");   
  4 | # you may not use this file except in compliance with the License.  
  5 | # You may obtain a copy of the License at   
  6 | #   
  7 | #     http://www.apache.org/licenses/LICENSE-2.0    
  8 | #   
  9 | # Unless required by applicable law or agreed to in writing, software   
 10 | # distributed under the License is distributed on an "AS IS" BASIS, 
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 12 | # See the License for the specific language governing permissions and   
 13 | # limitations under the License.
 14 | 
 15 | from functools import reduce
 16 | import importlib
 17 | import os
 18 | import numpy as np
 19 | import math
 20 | import paddle
 21 | from ..base import ModelBaseOp
 22 | 
 23 | from ppcv.ops.base import create_operators
 24 | from ppcv.core.workspace import register
 25 | 
 26 | from .preprocess import *
 27 | from .postprocess import *
 28 | 
 29 | 
 30 | @register
 31 | class OcrCrnnRecOp(ModelBaseOp):
 32 |     def __init__(self, model_cfg, env_cfg):
 33 |         super(OcrCrnnRecOp, self).__init__(model_cfg, env_cfg)
 34 |         mod = importlib.import_module(__name__)
 35 |         self.preprocessor = create_operators(model_cfg["PreProcess"], mod)
 36 |         self.postprocessor = create_operators(model_cfg["PostProcess"], mod)
 37 |         self.batch_size = model_cfg["batch_size"]
 38 |         self.rec_image_shape = list(model_cfg["PreProcess"][0].values())[0][
 39 |             "rec_image_shape"]
 40 | 
 41 |     @classmethod
 42 |     def get_output_keys(cls):
 43 |         return ["rec_text", "rec_score"]
 44 | 
 45 |     def preprocess(self, inputs, max_wh_ratio):
 46 |         outputs = inputs
 47 |         for ops in self.preprocessor:
 48 |             outputs = ops(outputs, max_wh_ratio)
 49 |         return outputs
 50 | 
 51 |     def postprocess(self, result):
 52 |         outputs = result
 53 |         for idx, ops in enumerate(self.postprocessor):
 54 |             if idx == len(self.postprocessor) - 1:
 55 |                 outputs = ops(outputs, self.output_keys)
 56 |             else:
 57 |                 outputs = ops(outputs)
 58 |         return outputs
 59 | 
 60 |     def infer(self, image_list):
 61 |         width_list = [float(img.shape[1]) / img.shape[0] for img in image_list]
 62 |         indices = np.argsort(np.array(width_list))
 63 | 
 64 |         inputs = []
 65 |         results = [None] * len(image_list)
 66 |         for beg_img_no in range(0, len(image_list), self.batch_size):
 67 |             end_img_no = min(len(image_list), beg_img_no + self.batch_size)
 68 |             imgC, imgH, imgW = self.rec_image_shape
 69 |             max_wh_ratio = imgW / imgH
 70 | 
 71 |             norm_img_batch = []
 72 |             for ino in range(beg_img_no, end_img_no):
 73 |                 h, w = image_list[indices[ino]].shape[0:2]
 74 |                 wh_ratio = w * 1.0 / h
 75 |                 max_wh_ratio = max(max_wh_ratio, wh_ratio)
 76 | 
 77 |             for ino in range(beg_img_no, end_img_no):
 78 |                 norm_img = self.preprocess(image_list[indices[ino]],
 79 |                                            max_wh_ratio)
 80 |                 norm_img = norm_img[np.newaxis, :]
 81 |                 norm_img_batch.append(norm_img)
 82 | 
 83 |             norm_img_batch = np.concatenate(norm_img_batch, axis=0)
 84 | 
 85 |             # model inference
 86 |             result = self.predictor.run(norm_img_batch)
 87 |             # postprocess
 88 |             result = self.postprocess(result)
 89 | 
 90 |             for rno in range(len(result)):
 91 |                 results[indices[beg_img_no + rno]] = result[rno]
 92 |         return results
 93 | 
 94 |     def __call__(self, inputs):
 95 |         """
 96 |         step1: parser inputs
 97 |         step2: run
 98 |         step3: merge results
 99 |         input: a list of dict
100 |         """
101 |         key = self.input_keys[0]
102 |         is_list = False
103 |         if isinstance(inputs[0][key], (list, tuple)):
104 |             inputs = [input[key] for input in inputs]
105 |             is_list = True
106 |         else:
107 |             inputs = [[input[key]] for input in inputs]
108 |         sub_index_list = [len(input) for input in inputs]
109 |         inputs = reduce(lambda x, y: x.extend(y) or x, inputs)
110 | 
111 |         # step2: run
112 |         outputs = self.infer(inputs)
113 |         # step3: merge
114 |         curr_offsef_id = 0
115 |         pipe_outputs = []
116 |         for idx in range(len(sub_index_list)):
117 |             sub_start_idx = curr_offsef_id
118 |             sub_end_idx = curr_offsef_id + sub_index_list[idx]
119 |             output = outputs[sub_start_idx:sub_end_idx]
120 |             output = {k: [o[k] for o in output] for k in output[0]}
121 |             if is_list is not True:
122 |                 output = {k: output[k][0] for k in output}
123 |             pipe_outputs.append(output)
124 | 
125 |             curr_offsef_id = sub_end_idx
126 |         return pipe_outputs
127 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/ocr_crnn_recognition/preprocess.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | from __future__ import unicode_literals
21 | 
22 | import math
23 | import cv2
24 | import numpy as np
25 | from PIL import Image
26 | 
27 | 
28 | class ReisizeNormImg(object):
29 |     def __init__(self, rec_image_shape=[3, 48, 320]):
30 |         super().__init__()
31 |         self.rec_image_shape = rec_image_shape
32 | 
33 |     def resize_norm_img(self, img, max_wh_ratio):
34 |         imgC, imgH, imgW = self.rec_image_shape
35 |         assert imgC == img.shape[2]
36 |         imgW = int((imgH * max_wh_ratio))
37 | 
38 |         h, w = img.shape[:2]
39 |         ratio = w / float(h)
40 |         if math.ceil(imgH * ratio) > imgW:
41 |             resized_w = imgW
42 |         else:
43 |             resized_w = int(math.ceil(imgH * ratio))
44 |         resized_image = cv2.resize(img, (resized_w, imgH))
45 |         resized_image = resized_image.astype('float32')
46 |         resized_image = resized_image.transpose((2, 0, 1)) / 255
47 |         resized_image -= 0.5
48 |         resized_image /= 0.5
49 |         padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
50 |         padding_im[:, :, 0:resized_w] = resized_image
51 |         return padding_im
52 | 
53 |     def __call__(self, img, max_wh_ratio):
54 |         """
55 |         """
56 |         img = self.resize_norm_img(img, max_wh_ratio)
57 |         return img
58 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/ocr_db_detection/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from functools import reduce
16 | import os
17 | import numpy as np
18 | import math
19 | import paddle
20 | 
21 | import importlib
22 | 
23 | from .inference import OcrDbDetOp
24 | 
25 | __all__ = ['OcrDbDetOp']
26 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/models/ocr_db_detection/inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
  2 | #   
  3 | # Licensed under the Apache License, Version 2.0 (the "License");   
  4 | # you may not use this file except in compliance with the License.  
  5 | # You may obtain a copy of the License at   
  6 | #   
  7 | #     http://www.apache.org/licenses/LICENSE-2.0    
  8 | #   
  9 | # Unless required by applicable law or agreed to in writing, software   
 10 | # distributed under the License is distributed on an "AS IS" BASIS, 
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 12 | # See the License for the specific language governing permissions and   
 13 | # limitations under the License.
 14 | 
 15 | from functools import reduce
 16 | import importlib
 17 | import os
 18 | import numpy as np
 19 | import math
 20 | import paddle
 21 | from ..base import ModelBaseOp
 22 | 
 23 | from ppcv.ops.base import create_operators
 24 | from ppcv.core.workspace import register
 25 | 
 26 | from .preprocess import *
 27 | from .postprocess import *
 28 | 
 29 | 
 30 | @register
 31 | class OcrDbDetOp(ModelBaseOp):
 32 |     def __init__(self, model_cfg, env_cfg):
 33 |         super(OcrDbDetOp, self).__init__(model_cfg, env_cfg)
 34 |         mod = importlib.import_module(__name__)
 35 |         self.preprocessor = create_operators(model_cfg["PreProcess"], mod)
 36 |         self.postprocessor = create_operators(model_cfg["PostProcess"], mod)
 37 |         self.batch_size = 1
 38 | 
 39 |     @classmethod
 40 |     def get_output_keys(cls):
 41 |         return ["dt_polys", "dt_scores"]
 42 | 
 43 |     def preprocess(self, inputs):
 44 |         outputs = inputs
 45 |         for ops in self.preprocessor:
 46 |             outputs = ops(outputs)
 47 |         return outputs
 48 | 
 49 |     def postprocess(self, result, shape_list):
 50 |         outputs = result
 51 |         for idx, ops in enumerate(self.postprocessor):
 52 |             if idx == len(self.postprocessor) - 1:
 53 |                 outputs = ops(outputs, shape_list, self.output_keys)
 54 |             else:
 55 |                 outputs = ops(outputs, shape_list)
 56 |         return outputs
 57 | 
 58 |     def infer(self, image_list):
 59 |         inputs = []
 60 |         batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
 61 |         results = []
 62 |         for i in range(batch_loop_cnt):
 63 |             start_index = i * self.batch_size
 64 |             end_index = min((i + 1) * self.batch_size, len(image_list))
 65 |             batch_image_list = image_list[start_index:end_index]
 66 |             # preprocess
 67 |             inputs, shape_list = self.preprocess({
 68 |                 "image": batch_image_list[0]
 69 |             })
 70 |             shape_list = np.expand_dims(shape_list, axis=0)
 71 |             # model inference
 72 |             result = self.predictor.run(inputs)[0]
 73 |             # postprocess
 74 |             result = self.postprocess(result, shape_list)
 75 |             results.append(result)
 76 |         return results
 77 | 
 78 |     def __call__(self, inputs):
 79 |         """
 80 |         step1: parser inputs
 81 |         step2: run
 82 |         step3: merge results
 83 |         input: a list of dict
 84 |         """
 85 |         key = self.input_keys[0]
 86 |         is_list = False
 87 |         if isinstance(inputs[0][key], (list, tuple)):
 88 |             inputs = [input[key] for input in inputs]
 89 |             is_list = True
 90 |         else:
 91 |             inputs = [[input[key]] for input in inputs]
 92 |         sub_index_list = [len(input) for input in inputs]
 93 |         inputs = reduce(lambda x, y: x.extend(y) or x, inputs)
 94 | 
 95 |         # step2: run
 96 |         outputs = self.infer(inputs)
 97 | 
 98 |         # step3: merge
 99 |         curr_offsef_id = 0
100 |         pipe_outputs = []
101 |         for idx in range(len(sub_index_list)):
102 |             sub_start_idx = curr_offsef_id
103 |             sub_end_idx = curr_offsef_id + sub_index_list[idx]
104 |             output = outputs[sub_start_idx:sub_end_idx]
105 |             output = {k: [o[k] for o in output] for k in output[0]}
106 |             if is_list is not True:
107 |                 output = {k: output[k][0] for k in output}
108 | 
109 |             pipe_outputs.append(output)
110 | 
111 |             curr_offsef_id = sub_end_idx
112 |         return pipe_outputs
113 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/output/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from .base import OutputBaseOp
16 | from .classification import ClasOutput
17 | from .feature_extraction import FeatureOutput
18 | from .detection import DetOutput
19 | from .keypoint import KptOutput
20 | from .ocr_rec import OCRRecOutput
21 | 
22 | __all__ = ['OutputBaseOp', 'ClasOutput', 'FeatureOutput', 'DetOutput', 'KptOutput', 'OCRRecOutput']
23 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/output/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os
16 | import numpy as np
17 | import math
18 | import glob
19 | import paddle
20 | import cv2
21 | from collections import defaultdict
22 | 
23 | from ppcv.ops.base import BaseOp
24 | 
25 | 
26 | class OutputBaseOp(BaseOp):
27 |     def __init__(self, model_cfg, env_cfg):
28 |         super(OutputBaseOp, self).__init__(model_cfg, env_cfg)
29 |         self.output_dir = self.env_cfg.get('output_dir', 'output')
30 |         self.save_img = self.env_cfg.get('save_img', False)
31 |         self.save_res = self.env_cfg.get('save_res', False)
32 |         self.return_res = self.env_cfg.get('return_res', False)
33 |         self.print_res = self.env_cfg.get('print_res', False)
34 | 
35 |     @classmethod
36 |     def type(self):
37 |         return 'OUTPUT'
38 | 
39 |     def __call__(self, inputs):
40 |         return
41 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/output/classification.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import numpy as np
17 | import math
18 | import glob
19 | import paddle
20 | import cv2
21 | import json
22 | from collections import defaultdict
23 | from .base import OutputBaseOp
24 | from ppcv.utils.logger import setup_logger
25 | from ppcv.core.workspace import register
26 | 
27 | logger = setup_logger('ClasOutput')
28 | 
29 | 
30 | @register
31 | class ClasOutput(OutputBaseOp):
32 | 
33 |     def __init__(self, model_cfg, env_cfg):
34 |         super(ClasOutput, self).__init__(model_cfg, env_cfg)
35 | 
36 |     def __call__(self, inputs):
37 |         total_res = []
38 |         for input in inputs:
39 |             fn, image, class_ids, scores, label_names = input["fn"], input[
40 |                 "image"], input["class_ids"], input["scores"], input[
41 |                     "label_names"]
42 |             res = dict(filename=fn,
43 |                        class_ids=class_ids,
44 |                        scores=scores,
45 |                        label_names=label_names)
46 |             if self.frame_id != -1:
47 |                 res.update({'frame_id': frame_id})
48 |             if self.print_res:
49 |                 logger.info(res)
50 |             if self.save_img:
51 |                 image = image[:, :, ::-1]
52 |                 file_name = os.path.split(fn)[-1]
53 |                 out_path = os.path.join(self.output_dir, file_name)
54 |                 logger.info('Save output image to {}'.format(out_path))
55 |                 cv2.imwrite(out_path, image)
56 |             if self.save_res or self.return_res:
57 |                 total_res.append(res)
58 |         if self.save_res:
59 |             res_file_name = 'clas_output.json'
60 |             out_path = os.path.join(self.output_dir, res_file_name)
61 |             logger.info('Save output result to {}'.format(out_path))
62 |             with open(out_path, 'w') as f:
63 |                 json.dump(total_res, f)
64 |         if self.return_res:
65 |             return total_res
66 |         return
67 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/output/detection.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
  2 | #   
  3 | # Licensed under the Apache License, Version 2.0 (the "License");   
  4 | # you may not use this file except in compliance with the License.  
  5 | # You may obtain a copy of the License at   
  6 | #   
  7 | #     http://www.apache.org/licenses/LICENSE-2.0    
  8 | #   
  9 | # Unless required by applicable law or agreed to in writing, software   
 10 | # distributed under the License is distributed on an "AS IS" BASIS, 
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 12 | # See the License for the specific language governing permissions and   
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import numpy as np
 17 | import math
 18 | import glob
 19 | import paddle
 20 | import cv2
 21 | import json
 22 | from collections import defaultdict
 23 | from .base import OutputBaseOp
 24 | from ppcv.utils.logger import setup_logger
 25 | from ppcv.core.workspace import register
 26 | from PIL import Image, ImageDraw, ImageFile
 27 | 
 28 | logger = setup_logger('DetOutput')
 29 | 
 30 | 
 31 | def get_color_map_list(num_classes):
 32 |     """
 33 |     Args:
 34 |         num_classes (int): number of class
 35 |     Returns:
 36 |         color_map (list): RGB color list
 37 |     """
 38 |     color_map = num_classes * [0, 0, 0]
 39 |     for i in range(0, num_classes):
 40 |         j = 0
 41 |         lab = i
 42 |         while lab:
 43 |             color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
 44 |             color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
 45 |             color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
 46 |             j += 1
 47 |             lab >>= 3
 48 |     color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
 49 |     return color_map
 50 | 
 51 | 
 52 | def draw_det(image, dt_bboxes, dt_scores, dt_cls_names):
 53 |     im = Image.fromarray(image[:, :, ::-1])
 54 |     draw_thickness = min(im.size) // 320
 55 |     draw = ImageDraw.Draw(im)
 56 |     name_set = set(dt_cls_names)
 57 |     name2clsid = {name: i for i, name in enumerate(name_set)}
 58 |     clsid2color = {}
 59 |     color_list = get_color_map_list(len(name_set))
 60 | 
 61 |     for box, score, name in zip(dt_bboxes, dt_scores, dt_cls_names):
 62 |         color = tuple(color_list[name2clsid[name]])
 63 | 
 64 |         xmin, ymin, xmax, ymax = box
 65 |         # draw bbox
 66 |         draw.line(
 67 |             [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
 68 |              (xmin, ymin)],
 69 |             width=draw_thickness,
 70 |             fill=color)
 71 | 
 72 |         # draw label
 73 |         text = "{} {:.4f}".format(name, score)
 74 |         box = draw.textbbox((xmin, ymin), text, anchor='lt')
 75 |         draw.rectangle(box, fill=color)
 76 |         draw.text((box[0], box[1]), text, fill=(255, 255, 255))
 77 |     image = np.array(im)
 78 |     return image
 79 | 
 80 | 
 81 | @register
 82 | class DetOutput(OutputBaseOp):
 83 |     def __init__(self, model_cfg, env_cfg):
 84 |         super(DetOutput, self).__init__(model_cfg, env_cfg)
 85 | 
 86 |     def __call__(self, inputs):
 87 |         total_res = []
 88 |         for input in inputs:
 89 |             fn, image, dt_bboxes, dt_scores, dt_cls_names = input
 90 |             res = dict(
 91 |                 filename=fn,
 92 |                 dt_bboxes=dt_bboxes,
 93 |                 dt_scores=dt_scores,
 94 |                 dt_cls_names=dt_cls_names)
 95 |             if self.frame_id != -1:
 96 |                 res.update({'frame_id': frame_id})
 97 |             logger.info(res)
 98 |             if self.save_img:
 99 |                 image = image[:, :, ::-1]
100 |                 file_name = os.path.split(fn)[-1]
101 |                 out_path = os.path.join(self.output_dir, file_name)
102 |                 logger.info('Save output image to {}'.format(out_path))
103 |                 cv2.imwrite(out_path, image)
104 |             if self.save_res or self.return_res:
105 |                 total_res.append(res)
106 |         if self.save_res:
107 |             res_file_name = 'det_output.json'
108 |             out_path = os.path.join(self.output_dir, res_file_name)
109 |             logger.info('Save output result to {}'.format(out_path))
110 |             with open(out_path, 'w') as f:
111 |                 json.dump(total_res, f)
112 |         if self.return_res:
113 |             return total_res
114 |         return
115 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/output/feature_extraction.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os
16 | import numpy as np
17 | import math
18 | import glob
19 | import paddle
20 | import cv2
21 | import json
22 | from collections import defaultdict
23 | from .base import OutputBaseOp
24 | from ppcv.utils.logger import setup_logger
25 | from ppcv.core.workspace import register
26 | 
27 | logger = setup_logger('FeatureOutput')
28 | 
29 | 
30 | @register
31 | class FeatureOutput(OutputBaseOp):
32 |     def __init__(self, model_cfg, env_cfg):
33 |         super().__init__(model_cfg, env_cfg)
34 | 
35 |     def __call__(self, inputs):
36 |         total_res = []
37 |         for input in inputs:
38 |             fn, feature = input["fn"], input["feature"]
39 |             res = dict(filename=fn, feature=feature)
40 |             # TODO(gaotingquan)
41 |             if self.frame_id != -1:
42 |                 res.update({'frame_id': frame_id})
43 |             if self.print_res:
44 |                 logger.info(res)
45 |             if self.return_res:
46 |                 total_res.append(res)
47 |         if self.return_res:
48 |             return total_res
49 |         return
50 | 


--------------------------------------------------------------------------------
/deploy/ppcv/ops/output/ocr_rec.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import numpy as np
17 | import math
18 | import glob
19 | import paddle
20 | import cv2
21 | import json
22 | from collections import defaultdict
23 | from .base import OutputBaseOp
24 | from ppcv.utils.logger import setup_logger
25 | from ppcv.core.workspace import register
26 | 
27 | logger = setup_logger('OCRRecOutput')
28 | 
29 | 
30 | @register
31 | class OCRRecOutput(OutputBaseOp):
32 | 
33 |     def __init__(self, model_cfg, env_cfg):
34 |         super(OCRRecOutput, self).__init__(model_cfg, env_cfg)
35 | 
36 |     def __call__(self, inputs):
37 |         total_res = []
38 |         for input in inputs:
39 |             fn, image, rec_text, rec_score = input["fn"], input[
40 |                 "image"], input["rec_text"], input["rec_score"]
41 |             res = dict(filename=fn,
42 |                        rec_text=rec_text,
43 |                        rec_score=rec_score)
44 |             if self.frame_id != -1:
45 |                 res.update({'frame_id': frame_id})
46 |             if self.print_res:
47 |                 logger.info(res)
48 |             if self.save_img:
49 |                 image = image[:, :, ::-1]
50 |                 file_name = os.path.split(fn)[-1]
51 |                 out_path = os.path.join(self.output_dir, file_name)
52 |                 logger.info('Save output image to {}'.format(out_path))
53 |                 cv2.imwrite(out_path, image)
54 |             if self.save_res or self.return_res:
55 |                 total_res.append(res)
56 |         if self.save_res:
57 |             res_file_name = 'ocr_rec_output.json'
58 |             out_path = os.path.join(self.output_dir, res_file_name)
59 |             logger.info('Save output result to {}'.format(out_path))
60 |             with open(out_path, 'w') as f:
61 |                 json.dump(total_res, f)
62 |         if self.return_res:
63 |             return total_res
64 |         return
65 | 


--------------------------------------------------------------------------------
/deploy/ppcv/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/deploy/ppcv/utils/helper.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import os
16 | import sys
17 | import numpy as np
18 | import math
19 | import glob
20 | 
21 | import ppcv
22 | from ppcv.ops import *
23 | from ppcv.core.workspace import get_global_op
24 | 
25 | 
26 | def get_output_keys(cfg=None):
27 |     op_list = get_global_op()
28 |     if cfg is None:
29 |         output = dict()
30 |         for name, op in op_list.items():
31 |             if op.type() != 'OUTPUT':
32 |                 keys = op.get_output_keys()
33 |                 output.update({name: keys})
34 |     else:
35 |         output = {'input.image', 'input.video'}
36 |         for op in cfg:
37 |             op_arch = op_list[list(op.keys())[0]]
38 |             op_cfg = list(op.values())[0]
39 |             if op_arch.type() == 'OUTPUT': continue
40 |             for out_name in op_arch.get_output_keys():
41 |                 name = op_cfg['name'] + '.' + out_name
42 |                 output.add(name)
43 |     return output
44 | 
45 | 
46 | def gen_input_name(input_keys, last_ops, output_keys):
47 |     # generate input name according to input_keys and last_ops
48 |     # the name format is {last_ops}.{input_key}
49 |     input_name = list()
50 |     for key in input_keys:
51 |         found = False
52 |         for op in last_ops:
53 |             name = op + '.' + key
54 |             if name in input_name:
55 |                 raise ValueError("Repeat input: {}".format(name))
56 |             if name in output_keys:
57 |                 input_name.append(name)
58 |                 found = True
59 |                 break
60 |         if not found:
61 |             raise ValueError(
62 |                 "Input: {} could not be found from the last ops: {}. The outputs of these last ops are {}".
63 |                 format(key, last_ops, output_keys))
64 |     return input_name
65 | 


--------------------------------------------------------------------------------
/deploy/ppcv/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | import sys
18 | 
19 | import paddle.distributed as dist
20 | 
21 | __all__ = ['setup_logger']
22 | 
23 | logger_initialized = []
24 | 
25 | 
26 | def setup_logger(name="ppcv", output=None):
27 |     """
28 |     Initialize logger and set its verbosity level to INFO.
29 |     Args:
30 |         name (str): the root module name of this logger
31 |         output (str): a file name or a directory to save log. If None, will not save log file.
32 |             If ends with ".txt" or ".log", assumed to be a file name.
33 |             Otherwise, logs will be saved to `output/log.txt`.
34 | 
35 |     Returns:
36 |         logging.Logger: a logger
37 |     """
38 |     logger = logging.getLogger(name)
39 |     if name in logger_initialized:
40 |         return logger
41 | 
42 |     logger.setLevel(logging.INFO)
43 |     logger.propagate = False
44 | 
45 |     formatter = logging.Formatter(
46 |         "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
47 |         datefmt="%m/%d %H:%M:%S")
48 |     # stdout logging: master only
49 |     local_rank = dist.get_rank()
50 |     if local_rank == 0:
51 |         ch = logging.StreamHandler(stream=sys.stdout)
52 |         ch.setLevel(logging.DEBUG)
53 |         ch.setFormatter(formatter)
54 |         logger.addHandler(ch)
55 | 
56 |     # file logging: all workers
57 |     if output is not None:
58 |         if output.endswith(".txt") or output.endswith(".log"):
59 |             filename = output
60 |         else:
61 |             filename = os.path.join(output, "log.txt")
62 |         if local_rank > 0:
63 |             filename = filename + ".rank{}".format(local_rank)
64 |         os.makedirs(os.path.dirname(filename))
65 |         fh = logging.FileHandler(filename, mode='a')
66 |         fh.setLevel(logging.DEBUG)
67 |         fh.setFormatter(logging.Formatter())
68 |         logger.addHandler(fh)
69 |     logger_initialized.append(name)
70 |     return logger
71 | 


--------------------------------------------------------------------------------
/deploy/ppcv/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | import os
17 | import ast
18 | import glob
19 | import yaml
20 | import copy
21 | import numpy as np
22 | 
23 | 
24 | class Times(object):
25 |     def __init__(self):
26 |         self.time = 0.
27 |         # start time
28 |         self.st = 0.
29 |         # end time
30 |         self.et = 0.
31 | 
32 |     def start(self):
33 |         self.st = time.time()
34 | 
35 |     def end(self, repeats=1, accumulative=True):
36 |         self.et = time.time()
37 |         if accumulative:
38 |             self.time += (self.et - self.st) / repeats
39 |         else:
40 |             self.time = (self.et - self.st) / repeats
41 | 
42 |     def reset(self):
43 |         self.time = 0.
44 |         self.st = 0.
45 |         self.et = 0.
46 | 
47 |     def value(self):
48 |         return round(self.time, 4)
49 | 
50 | 
51 | class PipeTimer(Times):
52 |     def __init__(self, cfg):
53 |         super(PipeTimer, self).__init__()
54 |         self.total_time = Times()
55 |         self.module_time = dict()
56 |         for op in cfg:
57 |             op_name = op.values()['name']
58 |             self.module_time.update({op_name: Times()})
59 | 
60 |         self.img_num = 0
61 | 
62 |     def get_total_time(self):
63 |         total_time = self.total_time.value()
64 |         average_latency = total_time / max(1, self.img_num)
65 |         qps = 0
66 |         if total_time > 0:
67 |             qps = 1 / average_latency
68 |         return total_time, average_latency, qps
69 | 
70 |     def info(self):
71 |         total_time, average_latency, qps = self.get_total_time()
72 |         print("------------------ Inference Time Info ----------------------")
73 |         print("total_time(ms): {}, img_num: {}".format(total_time * 1000,
74 |                                                        self.img_num))
75 | 
76 |         for k, v in self.module_time.items():
77 |             v_time = round(v.value(), 4)
78 |             if v_time > 0:
79 |                 print("{} time(ms): {}; per frame average time(ms): {}".format(
80 |                     k, v_time * 1000, v_time * 1000 / self.img_num))
81 |         print("average latency time(ms): {:.2f}, QPS: {:2f}".format(
82 |             average_latency * 1000, qps))
83 |         return qps
84 | 
85 |     def report(self, average=False):
86 |         dic = {}
87 |         for m, time in self.module_time:
88 |             dic[m] = round(time.value() / max(1, self.img_num),
89 |                            4) if average else time.value()
90 |         dic['total'] = round(self.total_time.value() / max(1, self.img_num),
91 |                              4) if average else self.total_time.value()
92 |         dic['img_num'] = self.img_num
93 |         return dic
94 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import inspect
16 | import copy
17 | import paddle
18 | import numpy as np
19 | from paddle.io import DistributedBatchSampler, BatchSampler, DataLoader
20 | # from ppcls.utils import logger
21 | 
22 | from python.dataaug.data import preprocess
23 | from python.dataaug.data.preprocess import transform
24 | 
25 | 
26 | def create_operators(params, class_num=None):
27 |     """
28 |     create operators based on the config
29 | 
30 |     Args:
31 |         params(list): a dict list, used to create some operators
32 |     """
33 |     assert isinstance(params, list), ('operator config should be a list')
34 |     ops = []
35 |     for operator in params:
36 |         assert isinstance(operator,
37 |                           dict) and len(operator) == 1, "yaml format error"
38 |         op_name = list(operator)[0]
39 |         param = {} if operator[op_name] is None else operator[op_name]
40 |         op_func = getattr(preprocess, op_name)
41 |         if "class_num" in inspect.getfullargspec(op_func).args:
42 |             param.update({"class_num": class_num})
43 |         op = op_func(**param)
44 |         ops.append(op)
45 | 
46 |     return ops
47 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/imaug/text_image_aug/__init__.py:
--------------------------------------------------------------------------------
 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .augment import tia_perspective, tia_distort, tia_stretch
16 | 
17 | __all__ = ['tia_distort', 'tia_stretch', 'tia_perspective']
18 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/imaug/text_image_aug/augment.py:
--------------------------------------------------------------------------------
  1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | This code is refer from:
 16 | https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py
 17 | """
 18 | 
 19 | import numpy as np
 20 | from .warp_mls import WarpMLS
 21 | 
 22 | 
 23 | def tia_distort(src, segment=4):
 24 |     img_h, img_w = src.shape[:2]
 25 | 
 26 |     cut = img_w // segment
 27 |     thresh = cut // 3
 28 | 
 29 |     src_pts = list()
 30 |     dst_pts = list()
 31 | 
 32 |     src_pts.append([0, 0])
 33 |     src_pts.append([img_w, 0])
 34 |     src_pts.append([img_w, img_h])
 35 |     src_pts.append([0, img_h])
 36 | 
 37 |     dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
 38 |     dst_pts.append(
 39 |         [img_w - np.random.randint(thresh),
 40 |          np.random.randint(thresh)])
 41 |     dst_pts.append(
 42 |         [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
 43 |     dst_pts.append(
 44 |         [np.random.randint(thresh), img_h - np.random.randint(thresh)])
 45 | 
 46 |     half_thresh = thresh * 0.5
 47 | 
 48 |     for cut_idx in np.arange(1, segment, 1):
 49 |         src_pts.append([cut * cut_idx, 0])
 50 |         src_pts.append([cut * cut_idx, img_h])
 51 |         dst_pts.append([
 52 |             cut * cut_idx + np.random.randint(thresh) - half_thresh,
 53 |             np.random.randint(thresh) - half_thresh
 54 |         ])
 55 |         dst_pts.append([
 56 |             cut * cut_idx + np.random.randint(thresh) - half_thresh,
 57 |             img_h + np.random.randint(thresh) - half_thresh
 58 |         ])
 59 | 
 60 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
 61 |     dst = trans.generate()
 62 | 
 63 |     return dst
 64 | 
 65 | 
 66 | def tia_stretch(src, segment=4):
 67 |     img_h, img_w = src.shape[:2]
 68 | 
 69 |     cut = img_w // segment
 70 |     thresh = cut * 4 // 5
 71 | 
 72 |     src_pts = list()
 73 |     dst_pts = list()
 74 | 
 75 |     src_pts.append([0, 0])
 76 |     src_pts.append([img_w, 0])
 77 |     src_pts.append([img_w, img_h])
 78 |     src_pts.append([0, img_h])
 79 | 
 80 |     dst_pts.append([0, 0])
 81 |     dst_pts.append([img_w, 0])
 82 |     dst_pts.append([img_w, img_h])
 83 |     dst_pts.append([0, img_h])
 84 | 
 85 |     half_thresh = thresh * 0.5
 86 | 
 87 |     for cut_idx in np.arange(1, segment, 1):
 88 |         move = np.random.randint(thresh) - half_thresh
 89 |         src_pts.append([cut * cut_idx, 0])
 90 |         src_pts.append([cut * cut_idx, img_h])
 91 |         dst_pts.append([cut * cut_idx + move, 0])
 92 |         dst_pts.append([cut * cut_idx + move, img_h])
 93 | 
 94 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
 95 |     dst = trans.generate()
 96 | 
 97 |     return dst
 98 | 
 99 | 
100 | def tia_perspective(src):
101 |     img_h, img_w = src.shape[:2]
102 | 
103 |     thresh = img_h // 2
104 | 
105 |     src_pts = list()
106 |     dst_pts = list()
107 | 
108 |     src_pts.append([0, 0])
109 |     src_pts.append([img_w, 0])
110 |     src_pts.append([img_w, img_h])
111 |     src_pts.append([0, img_h])
112 | 
113 |     dst_pts.append([0, np.random.randint(thresh)])
114 |     dst_pts.append([img_w, np.random.randint(thresh)])
115 |     dst_pts.append([img_w, img_h - np.random.randint(thresh)])
116 |     dst_pts.append([0, img_h - np.random.randint(thresh)])
117 | 
118 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
119 |     dst = trans.generate()
120 | 
121 |     return dst
122 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/postprocess/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import copy
15 | import importlib
16 | 
17 | from . import topk, threshoutput
18 | 
19 | from .topk import Topk, MultiLabelTopk
20 | from .threshoutput import ThreshOutput
21 | from .attr_rec import VehicleAttribute, PersonAttribute
22 | 
23 | 
24 | def build_postprocess(config):
25 |     config = copy.deepcopy(config)
26 |     model_name = config.pop("name")
27 |     mod = importlib.import_module(__name__)
28 |     postprocess_func = getattr(mod, model_name)(**config)
29 |     return postprocess_func
30 | 
31 | 
32 | class DistillationPostProcess(object):
33 | 
34 |     def __init__(self, model_name="Student", key=None, func="Topk", **kargs):
35 |         super().__init__()
36 |         self.func = eval(func)(**kargs)
37 |         self.model_name = model_name
38 |         self.key = key
39 | 
40 |     def __call__(self, x, file_names=None):
41 |         x = x[self.model_name]
42 |         if self.key is not None:
43 |             x = x[self.key]
44 |         return self.func(x, file_names=file_names)
45 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/postprocess/threshoutput.py:
--------------------------------------------------------------------------------
 1 | # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle.nn.functional as F
16 | 
17 | 
18 | class ThreshOutput(object):
19 | 
20 |     def __init__(self, threshold, label_0="0", label_1="1"):
21 |         self.threshold = threshold
22 |         self.label_0 = label_0
23 |         self.label_1 = label_1
24 | 
25 |     def __call__(self, x, file_names=None):
26 |         y = []
27 |         x = F.softmax(x, axis=-1).numpy()
28 |         for idx, probs in enumerate(x):
29 |             score = probs[1]
30 |             if score < self.threshold:
31 |                 result = {
32 |                     "class_ids": [0],
33 |                     "scores": [1 - score],
34 |                     "label_names": [self.label_0]
35 |                 }
36 |             else:
37 |                 result = {
38 |                     "class_ids": [1],
39 |                     "scores": [score],
40 |                     "label_names": [self.label_1]
41 |                 }
42 |             if file_names is not None:
43 |                 result["file_name"] = file_names[idx]
44 |             y.append(result)
45 |         return y
46 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/postprocess/topk.py:
--------------------------------------------------------------------------------
 1 | # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import numpy as np
17 | import paddle
18 | import paddle.nn.functional as F
19 | 
20 | 
21 | class Topk(object):
22 | 
23 |     def __init__(self, topk=1, class_id_map_file=None, delimiter=None):
24 |         assert isinstance(topk, (int, ))
25 |         self.topk = topk
26 |         self.delimiter = delimiter if delimiter is not None else " "
27 |         self.class_id_map = self.parse_class_id_map(class_id_map_file)
28 | 
29 |     def parse_class_id_map(self, class_id_map_file):
30 |         if class_id_map_file is None:
31 |             return None
32 |         if not os.path.exists(class_id_map_file):
33 |             print(
34 |                 "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!"
35 |             )
36 |             return None
37 | 
38 |         try:
39 |             class_id_map = {}
40 |             with open(class_id_map_file, "r") as fin:
41 |                 lines = fin.readlines()
42 |                 for line in lines:
43 |                     partition = line.split("\n")[0].partition(self.delimiter)
44 |                     class_id_map[int(partition[0])] = str(partition[-1])
45 |         except Exception as ex:
46 |             print(ex)
47 |             class_id_map = None
48 |         return class_id_map
49 | 
50 |     def __call__(self, x, file_names=None, multilabel=False):
51 |         if isinstance(x, dict):
52 |             x = x['logits']
53 |         assert isinstance(x, paddle.Tensor)
54 |         if file_names is not None:
55 |             assert x.shape[0] == len(file_names)
56 |         x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x)
57 |         x = x.numpy()
58 |         y = []
59 |         for idx, probs in enumerate(x):
60 |             index = probs.argsort(axis=0)[-self.topk:][::-1].astype(
61 |                 "int32") if not multilabel else np.where(
62 |                     probs >= 0.5)[0].astype("int32")
63 |             clas_id_list = []
64 |             score_list = []
65 |             label_name_list = []
66 |             for i in index:
67 |                 clas_id_list.append(i.item())
68 |                 score_list.append(probs[i].item())
69 |                 if self.class_id_map is not None:
70 |                     label_name_list.append(self.class_id_map[i.item()])
71 |             result = {
72 |                 "class_ids": clas_id_list,
73 |                 "scores": np.around(score_list, decimals=5).tolist(),
74 |             }
75 |             if file_names is not None:
76 |                 result["file_name"] = file_names[idx]
77 |             if label_name_list is not None:
78 |                 result["label_names"] = label_name_list
79 |             y.append(result)
80 |         return y
81 | 
82 | 
83 | class MultiLabelTopk(Topk):
84 | 
85 |     def __init__(self, topk=1, class_id_map_file=None):
86 |         super().__init__()
87 | 
88 |     def __call__(self, x, file_names=None):
89 |         return super().__call__(x, file_names, multilabel=True)
90 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/preprocess/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from python.dataaug.data.preprocess.ops.autoaugment import ImageNetPolicy as RawImageNetPolicy
 16 | from python.dataaug.data.preprocess.ops.randaugment import RandAugment as RawRandAugment
 17 | from python.dataaug.data.preprocess.ops.timm_autoaugment import RawTimmAutoAugment
 18 | from python.dataaug.data.preprocess.ops.cutout import Cutout
 19 | 
 20 | from python.dataaug.data.preprocess.ops.hide_and_seek import HideAndSeek
 21 | from python.dataaug.data.preprocess.ops.random_erasing import RandomErasing
 22 | from python.dataaug.data.preprocess.ops.grid import GridMask
 23 | 
 24 | from python.dataaug.data.preprocess.ops.operators import DecodeImage
 25 | from python.dataaug.data.preprocess.ops.operators import ResizeImage
 26 | from python.dataaug.data.preprocess.ops.operators import CropImage
 27 | from python.dataaug.data.preprocess.ops.operators import RandCropImage
 28 | from python.dataaug.data.preprocess.ops.operators import RandCropImageV2
 29 | from python.dataaug.data.preprocess.ops.operators import RandFlipImage
 30 | from python.dataaug.data.preprocess.ops.operators import NormalizeImage
 31 | from python.dataaug.data.preprocess.ops.operators import ToCHWImage
 32 | from python.dataaug.data.preprocess.ops.operators import AugMix
 33 | from python.dataaug.data.preprocess.ops.operators import Pad
 34 | from python.dataaug.data.preprocess.ops.operators import ToTensor
 35 | from python.dataaug.data.preprocess.ops.operators import Normalize
 36 | from python.dataaug.data.preprocess.ops.operators import RandomHorizontalFlip
 37 | from python.dataaug.data.preprocess.ops.operators import CropWithPadding
 38 | from python.dataaug.data.preprocess.ops.operators import RandomInterpolationAugment
 39 | from python.dataaug.data.preprocess.ops.operators import ColorJitter
 40 | from python.dataaug.data.preprocess.ops.operators import RandomCropImage
 41 | from python.dataaug.data.preprocess.ops.operators import Padv2
 42 | 
 43 | import numpy as np
 44 | from PIL import Image
 45 | import random
 46 | 
 47 | 
 48 | def transform(data, ops=[]):
 49 |     """ transform """
 50 |     for op in ops:
 51 |         data = op(data)
 52 |     return data
 53 | 
 54 | 
 55 | class AutoAugment(RawImageNetPolicy):
 56 |     """ ImageNetPolicy wrapper to auto fit different img types """
 57 | 
 58 |     def __init__(self, *args, **kwargs):
 59 |         super().__init__(*args, **kwargs)
 60 | 
 61 |     def __call__(self, img):
 62 |         if not isinstance(img, Image.Image):
 63 |             img = np.ascontiguousarray(img)
 64 |             img = Image.fromarray(img)
 65 | 
 66 |         img = super().__call__(img)
 67 | 
 68 |         if isinstance(img, Image.Image):
 69 |             img = np.asarray(img)
 70 | 
 71 |         return img
 72 | 
 73 | 
 74 | class RandAugment(RawRandAugment):
 75 |     """ RandAugment wrapper to auto fit different img types """
 76 | 
 77 |     def __init__(self, *args, **kwargs):
 78 |         super().__init__(*args, **kwargs)
 79 | 
 80 |     def __call__(self, img):
 81 |         if not isinstance(img, Image.Image):
 82 |             img = np.ascontiguousarray(img)
 83 |             img = Image.fromarray(img)
 84 | 
 85 |         img = super().__call__(img)
 86 | 
 87 |         if isinstance(img, Image.Image):
 88 |             img = np.asarray(img)
 89 | 
 90 |         return img
 91 | 
 92 | 
 93 | class TimmAutoAugment(RawTimmAutoAugment):
 94 |     """ TimmAutoAugment wrapper to auto fit different img tyeps. """
 95 | 
 96 |     def __init__(self, prob=1.0, *args, **kwargs):
 97 |         super().__init__(*args, **kwargs)
 98 |         self.prob = prob
 99 | 
100 |     def __call__(self, img):
101 |         if not isinstance(img, Image.Image):
102 |             img = np.ascontiguousarray(img)
103 |             img = Image.fromarray(img)
104 |         if random.random() < self.prob:
105 |             img = super().__call__(img)
106 |         if isinstance(img, Image.Image):
107 |             img = np.asarray(img)
108 | 
109 |         return img
110 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/preprocess/batch_ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/deploy/python/dataaug/data/preprocess/batch_ops/__init__.py


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/preprocess/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/deploy/python/dataaug/data/preprocess/ops/__init__.py


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/preprocess/ops/cutout.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # This code is based on https://github.com/uoguelph-mlrg/Cutout
16 | # reference: https://arxiv.org/abs/1708.04552
17 | 
18 | import numpy as np
19 | import random
20 | 
21 | 
22 | class Cutout(object):
23 | 
24 |     def __init__(self, n_holes=1, length=112):
25 |         self.n_holes = n_holes
26 |         self.length = length
27 | 
28 |     def __call__(self, img):
29 |         """ cutout_image """
30 |         h, w = img.shape[:2]
31 |         mask = np.ones((h, w), np.float32)
32 | 
33 |         for n in range(self.n_holes):
34 |             y = np.random.randint(h)
35 |             x = np.random.randint(w)
36 | 
37 |             y1 = np.clip(y - self.length // 2, 0, h)
38 |             y2 = np.clip(y + self.length // 2, 0, h)
39 |             x1 = np.clip(x - self.length // 2, 0, w)
40 |             x2 = np.clip(x + self.length // 2, 0, w)
41 | 
42 |             img[y1:y2, x1:x2] = 0
43 |         return img
44 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/preprocess/ops/functional.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # encoding: utf-8
 16 | 
 17 | import numpy as np
 18 | from PIL import Image, ImageOps, ImageEnhance
 19 | 
 20 | 
 21 | def int_parameter(level, maxval):
 22 |     """Helper function to scale `val` between 0 and maxval .
 23 |     Args:
 24 |       level: Level of the operation that will be between [0, `PARAMETER_MAX`].
 25 |       maxval: Maximum value that the operation can have. This will be scaled to
 26 |         level/PARAMETER_MAX.
 27 |     Returns:
 28 |       An int that results from scaling `maxval` according to `level`.
 29 |     """
 30 |     return int(level * maxval / 10)
 31 | 
 32 | 
 33 | def float_parameter(level, maxval):
 34 |     """Helper function to scale `val` between 0 and maxval.
 35 |     Args:
 36 |       level: Level of the operation that will be between [0, `PARAMETER_MAX`].
 37 |       maxval: Maximum value that the operation can have. This will be scaled to
 38 |         level/PARAMETER_MAX.
 39 |     Returns:
 40 |       A float that results from scaling `maxval` according to `level`.
 41 |     """
 42 |     return float(level) * maxval / 10.
 43 | 
 44 | 
 45 | def sample_level(n):
 46 |     return np.random.uniform(low=0.1, high=n)
 47 | 
 48 | 
 49 | def autocontrast(pil_img, *args):
 50 |     return ImageOps.autocontrast(pil_img)
 51 | 
 52 | 
 53 | def equalize(pil_img, *args):
 54 |     return ImageOps.equalize(pil_img)
 55 | 
 56 | 
 57 | def posterize(pil_img, level, *args):
 58 |     level = int_parameter(sample_level(level), 4)
 59 |     return ImageOps.posterize(pil_img, 4 - level)
 60 | 
 61 | 
 62 | def rotate(pil_img, level, *args):
 63 |     degrees = int_parameter(sample_level(level), 30)
 64 |     if np.random.uniform() > 0.5:
 65 |         degrees = -degrees
 66 |     return pil_img.rotate(degrees, resample=Image.BILINEAR)
 67 | 
 68 | 
 69 | def solarize(pil_img, level, *args):
 70 |     level = int_parameter(sample_level(level), 256)
 71 |     return ImageOps.solarize(pil_img, 256 - level)
 72 | 
 73 | 
 74 | def shear_x(pil_img, level):
 75 |     level = float_parameter(sample_level(level), 0.3)
 76 |     if np.random.uniform() > 0.5:
 77 |         level = -level
 78 |     return pil_img.transform(pil_img.size,
 79 |                              Image.AFFINE, (1, level, 0, 0, 1, 0),
 80 |                              resample=Image.BILINEAR)
 81 | 
 82 | 
 83 | def shear_y(pil_img, level):
 84 |     level = float_parameter(sample_level(level), 0.3)
 85 |     if np.random.uniform() > 0.5:
 86 |         level = -level
 87 |     return pil_img.transform(pil_img.size,
 88 |                              Image.AFFINE, (1, 0, 0, level, 1, 0),
 89 |                              resample=Image.BILINEAR)
 90 | 
 91 | 
 92 | def translate_x(pil_img, level):
 93 |     level = int_parameter(sample_level(level), pil_img.size[0] / 3)
 94 |     if np.random.random() > 0.5:
 95 |         level = -level
 96 |     return pil_img.transform(pil_img.size,
 97 |                              Image.AFFINE, (1, 0, level, 0, 1, 0),
 98 |                              resample=Image.BILINEAR)
 99 | 
100 | 
101 | def translate_y(pil_img, level):
102 |     level = int_parameter(sample_level(level), pil_img.size[1] / 3)
103 |     if np.random.random() > 0.5:
104 |         level = -level
105 |     return pil_img.transform(pil_img.size,
106 |                              Image.AFFINE, (1, 0, 0, 0, 1, level),
107 |                              resample=Image.BILINEAR)
108 | 
109 | 
110 | # operation that overlaps with ImageNet-C's test set
111 | def color(pil_img, level, *args):
112 |     level = float_parameter(sample_level(level), 1.8) + 0.1
113 |     return ImageEnhance.Color(pil_img).enhance(level)
114 | 
115 | 
116 | # operation that overlaps with ImageNet-C's test set
117 | def contrast(pil_img, level, *args):
118 |     level = float_parameter(sample_level(level), 1.8) + 0.1
119 |     return ImageEnhance.Contrast(pil_img).enhance(level)
120 | 
121 | 
122 | # operation that overlaps with ImageNet-C's test set
123 | def brightness(pil_img, level, *args):
124 |     level = float_parameter(sample_level(level), 1.8) + 0.1
125 |     return ImageEnhance.Brightness(pil_img).enhance(level)
126 | 
127 | 
128 | # operation that overlaps with ImageNet-C's test set
129 | def sharpness(pil_img, level, *args):
130 |     level = float_parameter(sample_level(level), 1.8) + 0.1
131 |     return ImageEnhance.Sharpness(pil_img).enhance(level)
132 | 
133 | 
134 | augmentations = [
135 |     autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
136 |     translate_x, translate_y
137 | ]
138 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/preprocess/ops/hide_and_seek.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # This code is based on https://github.com/kkanshul/Hide-and-Seek
16 | # reference: http://krsingh.cs.ucdavis.edu/krishna_files/papers/hide_and_seek/my_files/iccv2017.pdf
17 | 
18 | import numpy as np
19 | import random
20 | 
21 | 
22 | class HideAndSeek(object):
23 | 
24 |     def __init__(self):
25 |         # possible grid size, 0 means no hiding
26 |         self.grid_sizes = [0, 16, 32, 44, 56]
27 |         # hiding probability
28 |         self.hide_prob = 0.5
29 | 
30 |     def __call__(self, img):
31 |         # randomly choose one grid size
32 |         grid_size = np.random.choice(self.grid_sizes)
33 | 
34 |         _, h, w = img.shape
35 | 
36 |         # hide the patches
37 |         if grid_size == 0:
38 |             return img
39 |         for x in range(0, w, grid_size):
40 |             for y in range(0, h, grid_size):
41 |                 x_end = min(w, x + grid_size)
42 |                 y_end = min(h, y + grid_size)
43 |                 if (random.random() <= self.hide_prob):
44 |                     img[:, x:x_end, y:y_end] = 0
45 | 
46 |         return img
47 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/data/utils/get_image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import argparse
17 | import base64
18 | import numpy as np
19 | 
20 | 
21 | def get_image_list(img_file):
22 |     imgs_lists = []
23 |     if img_file is None or not os.path.exists(img_file):
24 |         raise Exception("not found any img file in {}".format(img_file))
25 | 
26 |     img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
27 |     if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
28 |         imgs_lists.append(img_file)
29 |     elif os.path.isdir(img_file):
30 |         for single_file in os.listdir(img_file):
31 |             if single_file.split('.')[-1] in img_end:
32 |                 imgs_lists.append(os.path.join(img_file, single_file))
33 |     if len(imgs_lists) == 0:
34 |         raise Exception("not found any img file in {}".format(img_file))
35 |     imgs_lists = sorted(imgs_lists)
36 |     return imgs_lists
37 | 
38 | 
39 | def get_image_list_from_label_file(image_path, label_file_path):
40 |     imgs_lists = []
41 |     gt_labels = []
42 |     with open(label_file_path, "r") as fin:
43 |         lines = fin.readlines()
44 |         for line in lines:
45 |             image_name, label = line.strip("\n").split()
46 |             label = int(label)
47 |             imgs_lists.append(os.path.join(image_path, image_name))
48 |             gt_labels.append(int(label))
49 |     return imgs_lists, gt_labels
50 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/predict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/deploy/python/dataaug/predict/__init__.py


--------------------------------------------------------------------------------
/deploy/python/dataaug/predict/predict_rec.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import os
 15 | 
 16 | import cv2
 17 | import numpy as np
 18 | 
 19 | from utils import logger
 20 | from utils import config
 21 | from utils.predictor import Predictor
 22 | from utils.get_image_list import get_image_list
 23 | from .preprocess import create_operators
 24 | from .postprocess import build_postprocess
 25 | 
 26 | 
 27 | class RecPredictor(Predictor):
 28 | 
 29 |     def __init__(self, config):
 30 |         super().__init__(config["Global"],
 31 |                          config["FeatureExtract"]["rec_inference_model_dir"])
 32 |         self.preprocess_ops = create_operators(
 33 |             config["FeatureExtract"]["RecPreProcess"]["transform_ops"])
 34 |         self.postprocess = build_postprocess(
 35 |             config["FeatureExtract"]["RecPostProcess"])
 36 | 
 37 |     def predict(self, images, feature_normalize=True):
 38 |         input_names = self.predictor.get_input_names()
 39 |         input_tensor = self.predictor.get_input_handle(input_names[0])
 40 | 
 41 |         output_names = self.predictor.get_output_names()
 42 |         output_tensor = self.predictor.get_output_handle(output_names[0])
 43 | 
 44 |         if not isinstance(images, (list, )):
 45 |             images = [images]
 46 |         for idx in range(len(images)):
 47 |             for ops in self.preprocess_ops:
 48 |                 images[idx] = ops(images[idx])
 49 |         image = np.array(images)
 50 | 
 51 |         input_tensor.copy_from_cpu(image)
 52 |         self.predictor.run()
 53 |         batch_output = output_tensor.copy_to_cpu()
 54 | 
 55 |         if feature_normalize:
 56 |             feas_norm = np.sqrt(
 57 |                 np.sum(np.square(batch_output), axis=1, keepdims=True))
 58 |             batch_output = np.divide(batch_output, feas_norm)
 59 | 
 60 |         if self.postprocess is not None:
 61 |             batch_output = self.postprocess(batch_output)
 62 | 
 63 |         return batch_output
 64 | 
 65 | 
 66 | def main(config):
 67 |     rec_predictor = RecPredictor(config)
 68 |     image_list = get_image_list(config["Global"]["infer_imgs"])
 69 | 
 70 |     batch_imgs = []
 71 |     batch_names = []
 72 |     cnt = 0
 73 |     for idx, img_path in enumerate(image_list):
 74 |         img = cv2.imread(img_path)
 75 |         if img is None:
 76 |             logger.warning(
 77 |                 "Image file failed to read and has been skipped. The path: {}".
 78 |                 format(img_path))
 79 |         else:
 80 |             img = img[:, :, ::-1]
 81 |             batch_imgs.append(img)
 82 |             img_name = os.path.basename(img_path)
 83 |             batch_names.append(img_name)
 84 |             cnt += 1
 85 | 
 86 |         if cnt % config["Global"]["batch_size"] == 0 or (idx +
 87 |                                                          1) == len(image_list):
 88 |             if len(batch_imgs) == 0:
 89 |                 continue
 90 | 
 91 |             batch_results = rec_predictor.predict(batch_imgs)
 92 |             for number, result_dict in enumerate(batch_results):
 93 |                 filename = batch_names[number]
 94 |                 print("{}:\t{}".format(filename, result_dict))
 95 |             batch_imgs = []
 96 |             batch_names = []
 97 | 
 98 |     return
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     args = config.parse_args()
103 |     config = config.get_config(args.config, overrides=args.override, show=True)
104 |     main(config)
105 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .math_utils import *
16 | from .renderer import *
17 | from .remaper import *
18 | from .liner import *
19 | from .data_utils import *
20 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/utils/get_image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import argparse
17 | import base64
18 | import numpy as np
19 | 
20 | 
21 | def get_image_list(img_file):
22 |     imgs_lists = []
23 |     if img_file is None or not os.path.exists(img_file):
24 |         raise Exception("not found any img file in {}".format(img_file))
25 | 
26 |     img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
27 |     if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
28 |         imgs_lists.append(img_file)
29 |     elif os.path.isdir(img_file):
30 |         for single_file in os.listdir(img_file):
31 |             if single_file.split('.')[-1] in img_end:
32 |                 imgs_lists.append(os.path.join(img_file, single_file))
33 |     if len(imgs_lists) == 0:
34 |         raise Exception("not found any img file in {}".format(img_file))
35 |     imgs_lists = sorted(imgs_lists)
36 |     return imgs_lists
37 | 
38 | 
39 | def get_image_list_from_label_file(label_file_path, delimiter=' '):
40 |     imgs_lists = []
41 |     gt_labels = []
42 |     with open(label_file_path, "r", encoding="utf-8") as fin:
43 |         lines = fin.readlines()
44 |         for line in lines:
45 |             image_name, label = line.strip("\n").split(delimiter)
46 |             imgs_lists.append(image_name)
47 |             gt_labels.append(label)
48 |     return imgs_lists, gt_labels
49 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/utils/logger.py:
--------------------------------------------------------------------------------
  1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | import os
 17 | import datetime
 18 | 
 19 | logging.basicConfig(level=logging.INFO,
 20 |                     format="%(asctime)s %(levelname)s: %(message)s",
 21 |                     datefmt="%Y-%m-%d %H:%M:%S")
 22 | 
 23 | 
 24 | def time_zone(sec, fmt):
 25 |     real_time = datetime.datetime.now()
 26 |     return real_time.timetuple()
 27 | 
 28 | 
 29 | logging.Formatter.converter = time_zone
 30 | _logger = logging.getLogger(__name__)
 31 | 
 32 | Color = {
 33 |     'RED': '\033[31m',
 34 |     'HEADER': '\033[35m',  # deep purple
 35 |     'PURPLE': '\033[95m',  # purple
 36 |     'OKBLUE': '\033[94m',
 37 |     'OKGREEN': '\033[92m',
 38 |     'WARNING': '\033[93m',
 39 |     'FAIL': '\033[91m',
 40 |     'ENDC': '\033[0m'
 41 | }
 42 | 
 43 | 
 44 | def coloring(message, color="OKGREEN"):
 45 |     assert color in Color.keys()
 46 |     if os.environ.get('PADDLECLAS_COLORING', False):
 47 |         return Color[color] + str(message) + Color["ENDC"]
 48 |     else:
 49 |         return message
 50 | 
 51 | 
 52 | def anti_fleet(log):
 53 |     """
 54 |     logs will print multi-times when calling Fleet API.
 55 |     Only display single log and ignore the others.
 56 |     """
 57 | 
 58 |     def wrapper(fmt, *args):
 59 |         if int(os.getenv("PADDLE_TRAINER_ID", 0)) == 0:
 60 |             log(fmt, *args)
 61 | 
 62 |     return wrapper
 63 | 
 64 | 
 65 | @anti_fleet
 66 | def info(fmt, *args):
 67 |     _logger.info(fmt, *args)
 68 | 
 69 | 
 70 | @anti_fleet
 71 | def warning(fmt, *args):
 72 |     _logger.warning(coloring(fmt, "RED"), *args)
 73 | 
 74 | 
 75 | @anti_fleet
 76 | def error(fmt, *args):
 77 |     _logger.error(coloring(fmt, "FAIL"), *args)
 78 | 
 79 | 
 80 | def scaler(name, value, step, writer):
 81 |     """
 82 |     This function will draw a scalar curve generated by the visualdl.
 83 |     Usage: Install visualdl: pip3 install visualdl==2.0.0b4
 84 |            and then:
 85 |            visualdl --logdir ./scalar --host 0.0.0.0 --port 8830
 86 |            to preview loss corve in real time.
 87 |     """
 88 |     writer.add_scalar(tag=name, step=step, value=value)
 89 | 
 90 | 
 91 | def advertise():
 92 |     """
 93 |     Show the advertising message like the following:
 94 | 
 95 |     ===========================================================
 96 |     ==        EasyData is powered by PaddlePaddle !        ==
 97 |     ===========================================================
 98 |     ==                                                       ==
 99 |     ==   For more info please go to the following website.   ==
100 |     ==                                                       ==
101 |     ==       https://github.com/PaddlePaddle/EasyData        ==
102 |     ===========================================================
103 | 
104 |     """
105 |     copyright = "EasyData is powered by PaddlePaddle !"
106 |     ad = "For more info please go to the following website."
107 |     website = "https://github.com/PaddlePaddle/EasyData"
108 |     AD_LEN = 6 + len(max([copyright, ad, website], key=len))
109 | 
110 |     info(
111 |         coloring(
112 |             "\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n".format(
113 |                 "=" * (AD_LEN + 4),
114 |                 "=={}==".format(copyright.center(AD_LEN)),
115 |                 "=" * (AD_LEN + 4),
116 |                 "=={}==".format(' ' * AD_LEN),
117 |                 "=={}==".format(ad.center(AD_LEN)),
118 |                 "=={}==".format(' ' * AD_LEN),
119 |                 "=={}==".format(website.center(AD_LEN)),
120 |                 "=" * (AD_LEN + 4),
121 |             ), "RED"))
122 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/utils/noiser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code is refer from:
  3 | https://github.com/Sanster/text_renderer/blob/master/textrenderer/noiser.py
  4 | """
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | 
  9 | # https://stackoverflow.com/questions/22937589/how-to-add-noise-gaussian-salt-and-pepper-etc-to-image-in-python-with-opencv
 10 | class Noiser(object):
 11 | 
 12 |     def __init__(self, cfg):
 13 |         self.cfg = cfg
 14 | 
 15 |     def apply(self, img):
 16 |         """
 17 |         :param img:  word image with big background
 18 |         """
 19 | 
 20 |         p = []
 21 |         funcs = []
 22 |         if self.cfg['noise']['gauss']['enable']:
 23 |             p.append(self.cfg['noise']['gauss']['fraction'])
 24 |             funcs.append(self.apply_gauss_noise)
 25 | 
 26 |         if self.cfg['noise']['uniform']['enable']:
 27 |             p.append(self.cfg['noise']['uniform']['fraction'])
 28 |             funcs.append(self.apply_uniform_noise)
 29 | 
 30 |         if self.cfg['noise']['salt_pepper']['enable']:
 31 |             p.append(self.cfg['noise']['salt_pepper']['fraction'])
 32 |             funcs.append(self.apply_sp_noise)
 33 | 
 34 |         if self.cfg['noise']['poisson']['enable']:
 35 |             p.append(self.cfg['noise']['poisson']['fraction'])
 36 |             funcs.append(self.apply_poisson_noise)
 37 | 
 38 |         if len(p) == 0:
 39 |             return img
 40 | 
 41 |         noise_func = np.random.choice(funcs, p=p)
 42 | 
 43 |         return noise_func(img)
 44 | 
 45 |     def apply_gauss_noise(self, img):
 46 |         """
 47 |         Gaussian-distributed additive noise.
 48 |         """
 49 |         mean = 0
 50 |         stddev = np.sqrt(15)
 51 |         gauss_noise = np.zeros(img.shape)
 52 |         cv2.randn(gauss_noise, mean, stddev)
 53 |         out = img + gauss_noise
 54 | 
 55 |         return out
 56 | 
 57 |     def apply_uniform_noise(self, img):
 58 |         """
 59 |         Apply zero-mean uniform noise
 60 |         """
 61 |         imshape = img.shape
 62 |         alpha = 0.05
 63 |         gauss = np.random.uniform(0 - alpha, alpha, imshape)
 64 |         gauss = gauss.reshape(*imshape)
 65 |         out = img + img * gauss
 66 |         return out
 67 | 
 68 |     def apply_sp_noise(self, img):
 69 |         """
 70 |         Salt and pepper noise. Replaces random pixels with 0 or 255.
 71 |         """
 72 |         s_vs_p = 0.5
 73 |         amount = np.random.uniform(0.004, 0.01)
 74 |         out = np.copy(img)
 75 |         # Salt mode
 76 |         num_salt = np.ceil(amount * img.size * s_vs_p)
 77 |         coords = [
 78 |             np.random.randint(0, i - 1, int(num_salt)) for i in img.shape
 79 |         ]
 80 |         out[coords] = 255.
 81 |         # Pepper mode
 82 |         num_pepper = np.ceil(amount * img.size * (1. - s_vs_p))
 83 |         coords = [
 84 |             np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape
 85 |         ]
 86 |         out[coords] = 0
 87 |         return out
 88 | 
 89 |     def apply_poisson_noise(self, img):
 90 |         """
 91 |         Poisson-distributed noise generated from the data.
 92 |         """
 93 |         vals = len(np.unique(img))
 94 |         vals = 2**np.ceil(np.log2(vals))
 95 | 
 96 |         if vals < 0:
 97 |             return img
 98 | 
 99 |         noisy = np.random.poisson(img * vals) / float(vals)
100 |         return noisy
101 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/utils/remaper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is refer from:
 3 | https://github.com/Sanster/text_renderer/blob/master/textrenderer/remaper.py
 4 | """
 5 | 
 6 | # coding=utf-8
 7 | import random
 8 | import cv2
 9 | import numpy as np
10 | 
11 | 
12 | class Remaper(object):
13 | 
14 |     def __init__(self, cfg):
15 |         self.cfg = cfg
16 | 
17 |     def apply(self, word_img, text_box_pnts, word_color):
18 |         """
19 |         :param word_img:  word image with big background
20 |         :param text_box_pnts: left-top, right-top, right-bottom, left-bottom of text word
21 |         :return:
22 |         """
23 |         max_val = np.random.uniform(self.cfg['curve']['min'],
24 |                                     self.cfg['curve']['max'])
25 | 
26 |         h = word_img.shape[0]
27 |         w = word_img.shape[1]
28 | 
29 |         img_x = np.zeros((h, w), np.float32)
30 |         img_y = np.zeros((h, w), np.float32)
31 | 
32 |         xmin = text_box_pnts[0][0]
33 |         xmax = text_box_pnts[1][0]
34 |         ymin = text_box_pnts[0][1]
35 |         ymax = text_box_pnts[2][1]
36 | 
37 |         remap_y_min = ymin
38 |         remap_y_max = ymax
39 | 
40 |         for y in range(h):
41 |             for x in range(w):
42 |                 remaped_y = y + self._remap_y(x, max_val)
43 | 
44 |                 if y == ymin:
45 |                     if remaped_y < remap_y_min:
46 |                         remap_y_min = remaped_y
47 | 
48 |                 if y == ymax:
49 |                     if remaped_y > remap_y_max:
50 |                         remap_y_max = remaped_y
51 | 
52 |                 # 某一个位置的 y 值应该为哪个位置的 y 值
53 |                 img_y[y, x] = remaped_y
54 |                 # 某一个位置的 x 值应该为哪个位置的 x 值
55 |                 img_x[y, x] = x
56 | 
57 |         remaped_text_box_pnts = [[xmin, remap_y_min], [xmax, remap_y_min],
58 |                                  [xmax, remap_y_max], [xmin, remap_y_max]]
59 | 
60 |         # TODO: use cuda::remap
61 |         dst = cv2.remap(word_img, img_x, img_y, cv2.INTER_CUBIC)
62 |         return dst, remaped_text_box_pnts
63 | 
64 |     def _remap_y(self, x, max_val):
65 |         return int(max_val *
66 |                    np.math.sin(2 * 3.14 * x / self.cfg['curve']['period']))
67 | 


--------------------------------------------------------------------------------
/deploy/python/dataaug/utils/utility.py:
--------------------------------------------------------------------------------
 1 | # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import sys
16 | 
17 | parent = os.path.dirname(os.path.abspath(__file__))
18 | sys.path.insert(0, os.path.abspath(os.path.join(parent, '../deploy/')))
19 | 
20 | from python.dataaug.utils import logger
21 | 
22 | 
23 | def get_label(data_file, delimiter=" "):
24 |     all_label = {}
25 |     with open(data_file, "r", encoding="utf-8") as f:
26 |         for line in f.readlines():
27 |             path, label = line.strip().split(delimiter)
28 |             path = path.split("/")[-1]
29 |             all_label[path] = label
30 |     return all_label
31 | 
32 | 
33 | def rm_repeat(all_label, save_list, compare_file, out_file, thresh, delimiter):
34 |     count = 0
35 |     with open(out_file, "w", encoding="utf-8") as new_aug_file:
36 |         with open(compare_file, "r", encoding="utf-8") as f:
37 |             for line in f.readlines():
38 |                 query = line.strip().split("\t")[0]
39 |                 gallery = line.strip().split("\t")[1:-1]
40 |                 score = line.strip().split("\t")[-1]
41 |                 path = query.split("/")[-1]
42 |                 if float(score) > thresh and (gallery
43 |                                               or query) not in save_list:
44 |                     count += 1
45 |                     save_list.append(gallery)
46 |                     save_list.append(query)
47 |                     new_aug_file.write(query + delimiter +
48 |                                        str(all_label[path]) + "\n")
49 |                 elif float(score) < thresh:
50 |                     count += 1
51 |                     save_list.append(query)
52 |                     new_aug_file.write(query + delimiter +
53 |                                        str(all_label[path]) + "\n")
54 |     return count
55 | 
56 | 
57 | def check_dir(path):
58 |     if len(os.path.dirname(path)) < 1:
59 |         return
60 |     if not os.path.exists(os.path.dirname(path)):
61 |         os.makedirs(os.path.dirname(path))
62 |     return
63 | 
64 | 
65 | def concat_file(label_dir, all_file):
66 |     filenames = os.listdir(label_dir)
67 |     assert len(filenames) > 0, "Can not find any file in {}".format(label_dir)
68 |     check_dir(all_file)
69 |     f = open(all_file, 'w', encoding="utf-8")
70 |     for filename in filenames:
71 |         if os.path.isfile(os.path.join(label_dir, filename)):
72 |             logger.info("{} will be merged to {}".format(filename, all_file))
73 |             filepath = label_dir + '/' + filename
74 |             for line in open(filepath, encoding="utf-8"):
75 |                 if len(line) != 0:
76 |                     f.writelines(line)
77 |         else:
78 |             continue
79 |     f.close()
80 |     return all_file
81 | 


--------------------------------------------------------------------------------
/deploy/python/dataclean/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from ppcv.engine.pipeline import Pipeline
18 | 
19 | from utils.utils import load_yaml
20 | 
21 | 
22 | class DataClean(object):
23 | 
24 |     def __init__(self, args):
25 |         self.input = os.path.abspath(args.input)
26 |         self.model_list = self.build_pipeline(args)
27 | 
28 |     def build_pipeline(self, args):
29 |         config = load_yaml(args.config)
30 |         config.pop("DataClean")
31 |         model_list = []
32 |         for model in config.keys():
33 |             pipeline_config_path = config[model]
34 |             args.config = pipeline_config_path
35 |             model_list.append(Pipeline(args))
36 |         return model_list
37 | 
38 |     def run(self):
39 |         for model in self.model_list:
40 |             model.run(self.input)
41 | 


--------------------------------------------------------------------------------
/deploy/python/dataclean/demos/paddleclas_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import cv2
17 | from easydata import EasyData
18 | from paddleclas import PaddleClas
19 | 
20 | 
21 | def main(img_path):
22 |     orientation_model = EasyData(model="image_orientation",
23 |                                  device="cpu",
24 |                                  return_res=True,
25 |                                  print_res=False)
26 |     clas_model = PaddleClas(model_name="PPLCNet_x0_25")
27 | 
28 |     orientation_result = orientation_model.predict(img_path)
29 |     orientation_id = orientation_result[0]["class_ids"]
30 | 
31 |     img = cv2.imread(img_path)[:, :, ::-1]
32 |     img = np.rot90(img, -1 * orientation_id)
33 | 
34 |     clas_result = clas_model.predict(img)
35 |     print(next(clas_result))
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     img_path = "./easydata_demo_imgs/image_orientation/1.jpg"
40 |     main(img_path)
41 | 


--------------------------------------------------------------------------------
/deploy/python/dataclean/demos/paddleocr_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import cv2
17 | from PIL import Image
18 | 
19 | from easydata import EasyData
20 | from paddleocr import PaddleOCR, draw_ocr
21 | 
22 | 
23 | def main(img_path):
24 |     orientation_model = EasyData(model="image_orientation",
25 |                                  device="cpu",
26 |                                  return_res=True,
27 |                                  print_res=False)
28 |     ocr = PaddleOCR(use_angle_cls=True, lang="ch")
29 | 
30 |     orientation_result = orientation_model.predict(img_path)
31 |     orientation_id = orientation_result[0]["class_ids"]
32 | 
33 |     img = cv2.imread(img_path)[:, :, ::-1]
34 |     img = np.rot90(img, -1 * orientation_id)
35 | 
36 |     result = ocr.ocr(img, cls=True)[0]
37 |     for line in result:
38 |         print(line)
39 | 
40 |     image = Image.open(img_path).convert('RGB')
41 |     boxes = [line[0] for line in result]
42 |     txts = [line[1][0] for line in result]
43 |     scores = [line[1][1] for line in result]
44 |     im_show = draw_ocr(image, boxes, txts, scores, font_path='./font.ttf')
45 |     im_show = Image.fromarray(im_show)
46 |     im_show.save('result.jpg')
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     img_path = "./easydata_demo_imgs/image_orientation/3.jpg"
51 |     main(img_path)
52 | 


--------------------------------------------------------------------------------
/deploy/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/deploy/utils/label_map/clarity_assessment_label_list.txt:
--------------------------------------------------------------------------------
1 | 0 clarity
2 | 1 blured
3 | 


--------------------------------------------------------------------------------
/deploy/utils/label_map/code_exists_label_list.txt:
--------------------------------------------------------------------------------
1 | 0 no code
2 | 1 contains code
3 | 


--------------------------------------------------------------------------------
/deploy/utils/label_map/image_orientation_label_list.txt:
--------------------------------------------------------------------------------
1 | 0 0°
2 | 1 90°
3 | 2 180°
4 | 3 270°


--------------------------------------------------------------------------------
/deploy/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import yaml
16 | 
17 | 
18 | def load_yaml(yaml_file):
19 |     with open(yaml_file, 'r') as f:
20 |         yaml_data = yaml.load(f, Loader=yaml.SafeLoader)
21 |     return yaml_data
22 | 


--------------------------------------------------------------------------------
/docs/en/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/docs/en/.gitkeep


--------------------------------------------------------------------------------
/docs/images/PP-DataAug/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/docs/images/PP-DataAug/.gitkeep


--------------------------------------------------------------------------------
/docs/zh_CN/DataClean/DataClean.md:
--------------------------------------------------------------------------------
 1 | # DataClean
 2 | 
 3 | ------
 4 | 
 5 | EasyData 是基于飞桨开发的数据处理工具，旨在帮助视觉开发者在模型开发的过程中更好的处理数据，数据清洗工具（DataClean）是 EasyData 的子模块，其主要帮助开发者可以更好的提升数据质量或者筛选和过滤低质数据。数据清洗工具可以应用到部署时数据的预处理中，可以在增加很少推理时间的情况下大幅增加精度。也可以应用到训练数据、测试数据的筛选过滤中，结合相关的后处理，不仅可以进一步增加模型的精度，也可以增加相关产品的满意度。
 6 | 
 7 | 作为可插拔的模块，DataClean 可以嵌到任何视觉任务中，其功能可视化如下：
 8 | 
 9 | <div align="center">
10 | <img src="https://user-images.githubusercontent.com/45199522/201908925-bde8e9ac-1216-41c9-b39f-5ab77506a396.png">
11 | </div>
12 | 
13 | 目前，数据清洗模块包含图像方向矫正、模糊图像过滤、二维码图像过滤，相关的模型介绍及模型下载链接如下：
14 | 
15 | | 类别 | 亮点 | 文档说明 | 模型下载 |
16 | | :--: | :--: | :------: | :------: |
17 | |图像方向矫正|自动矫正图像，大大提升多项视觉任务在旋转图像上精度|[文档](image_orientation_correction.md)|[下载链接](https://paddleclas.bj.bcebos.com/models/PULC/inference/image_orientation_infer.tar)|
18 | |模糊图像过滤|判断图像是否模糊，可以广泛应用于模糊图像过滤、视觉相关业务的前处理等|[文档](blured_image_filtering.md)|[下载链接](https://paddleclas.bj.bcebos.com/models/PULC/inference/clarity_assessment_infer.tar)|
19 | |二维码图像过滤|判断图像是否含有二维码、条形码、小程序码，可以广泛应用于二维码、条形码、小程序码过滤、审核等业务|[文档](code_image_filtering.md)|[下载链接](https://paddleclas.bj.bcebos.com/models/PULC/inference/code_exists_infer.tar)|
20 | 


--------------------------------------------------------------------------------
/docs/zh_CN/DataClean/blured_image_filtering.md:
--------------------------------------------------------------------------------
 1 | # 模糊图像过滤
 2 | 
 3 | ------
 4 | 
 5 | 
 6 | ## 目录
 7 | 
 8 | - [1. 背景](#1)
 9 | - [2. 模型](#2)
10 |   - [2.1 训练](#2.1.1)
11 |   - [2.2 部署](#2.1.3)
12 | - [3. 视觉任务表现](#3)
13 |   - [3.1 验证集指标](#3.1)
14 |   - [3.2 ImageNet 数据指标](#3.2)
15 |   - [3.3 COCO 数据指标](#3.3)
16 | - [4. 总结](#4)
17 | 
18 | <a name="1"></a>
19 | 
20 | ## 1. 背景
21 | 
22 | 近些年计算机视觉领域迎来了蓬勃的发展，相关的算法也在各种业务上落地，产生了巨大的价值。在落地过程中，数据整理、模型训练、部署后处理等开发环节都至关重要，而在这些重要的环节中，数据的质量可谓是重中之重，其好坏直接决定了最终落地业务的性能。基于此，我们开源了数据清洗的工具，其可以在模型开发的任何阶段使用。在数据整理阶段，其可以通过这个工具筛选掉潜在的低质数据，在模型训练阶段，该工具可以成为数据预处理的一部分，在模型部署及后处理过程中，其可以通过后处理逻辑将低质图片过滤，间接提升模型精度，从而提升用户整体体验。本教程主要介绍数据清洗工具中模糊图像过滤的相关内容。该工具在视觉任务中的使用方式如下图所示：
23 | 
24 | <div align="center">
25 |   <img src="https://user-images.githubusercontent.com/45199522/200730732-f6bede0f-4d9a-4914-b27f-c0ba0e3fe455.png" width = "800"/>
26 | </div>
27 | 
28 | 为了使用户方便地使用该工具，我们尽可能提升了该工具的易用性，模型方面，我们使用了非常轻量的模型，在CPU上也可以很快的运行。在使用方面，我们提供了Whl包的使用方式，用户可以方便地集成到Python代码或者shell中。
29 | 
30 | <a name="2"></a>
31 | 
32 | ## 2. 模型
33 | 
34 | 在方案选型上，我们选择了图像分类的方案，其容易训练、部署，模型大小可以做到非常小，且效果也更容易保证。
35 | 
36 | <a name="2.1"></a>
37 | #### 2.1 训练
38 | 
39 | 此处我们希望得到一个高精度且轻量级的二分类模型，所以我们采用了PaddleClas的PULC方案，该任务的训练详情可以参考[PULC模糊清晰图像分类](@clas_pulc)。
40 | 
41 | <a name="2.2"></a>
42 | #### 2.2 部署
43 | 
44 | 我们可以很方便地使用EasyData的whl包进行部署，关于其部署方法，可以参考[模糊图像过滤部署](quick_start.md#212)。
45 | 
46 | <a name="3"></a>
47 | 
48 | ## 3 视觉任务表现
49 | 
50 | <a name="3.1"></a>
51 | #### 3.1 验证集指标
52 | 
53 | 在图像清晰模糊二分类训练中，我们的评估数据是来自[blur dataset](https://github.com/Kwentar/blur_dataset)，其含有700张模糊图像，350张非模糊图像。该数据均来自真实场景拍摄，其中模糊图像有运动模糊和对焦模糊两种。我们对训练好的模型的评估，指标如下：
54 | 
55 | | Accuracy | 模型存储 | 推理时间 |
56 | | :--: | :--: | :--: |
57 | |   95.3%   |   7M   |     2.1ms     |
58 | 
59 | <a name="3.2"></a>
60 | #### 3.2 ImageNet 数据指标
61 | 
62 | 我们将模糊过滤模型同样在ImageNet数据上进行了评测，我们将ImageNet val数据（50000条）和[3.1.1](#3.1.1)中的模糊数据（700条）混合起来一起评测。在真实使用场景中，往往在卡特定误报率（FPR）的情况下希望召回率（TPR）越高越好。在该例子中，我们希望ImageNet val的数据尽量被判定为清晰图片，在此基础上，尽可能将模糊图片召回。我们评测了不同阈值下的FPR和TPR，相关指标如下：
63 | 
64 | | 阈值 | FPR| TPR |
65 | | :--: | :--: | :--: |
66 | | 0.5 | 29.2% | 96.7% |
67 | | 0.7 | 15.8% | 92.4% |
68 | | 0.8 | 10.1% | 85.4%|
69 | | 0.9 | 4.5% | 71.0% |
70 | | 0.95 | 1.8% | 51.5% |
71 | 
72 | **备注：**
73 | 
74 | - 该表格中的每一行的含义为在卡阈值的基础上，FPR和TPR的情况。如最后一行表示：将输出的score大于0.95判定为模糊图像，小于0.95判定为清晰图像，在此基础上，FPR为1.8%，即清晰的图像（ImageNet val）有1.8%被判定为了模糊的图像；TPR为51.5%，即模糊的图像（blur dataset中模糊图像）有51.5%被成功识别出。在具体的场景中，阈值需要根据实际情况调整。
75 | 
76 | - 我们对ImageNet val的图片做了假设，默认其均为清晰图片，事实上会有部分模糊图片，所以在真实场景中，该TPR指标会更高。
77 | 
78 | 
79 | <a name="3.3"></a>
80 | #### 3.3 COCO数据指标
81 | 
82 | 我们将模糊过滤模型同样在COCO数据上进行了评测，我们将COCO val数据（5000条）和[3.1](#3.1)中的模糊数据（700条）混合起来一起评测。在真实使用场景中，往往在卡特定误报率（FPR）的情况下希望召回率（TPR）越高越好。在该例子中，我们希望COCO val的数据尽量被判定为清晰图片，在此基础上，尽可能将模糊图片召回。我们评测了不同阈值下的FPR和TPR，相关指标如下：
83 | 
84 | | 阈值 | FPR| TPR |
85 | | :--: | :--: | :--: |
86 | | 0.5 | 20.1% | 96.7% |
87 | | 0.7 | 9.8% | 92.4% |
88 | | 0.8 | 5.6% | 85.4%|
89 | | 0.9 | 2.2% | 71.0% |
90 | | 0.95 | 0.80% | 51.5% |
91 | 
92 | 
93 | <a name="4"></a>
94 | ## 4 总结
95 | 
96 | 此文档介绍了EasyData数据清洗模块中模糊图像过滤相关的模型介绍、模型训练、模型使用方法以及模型的指标。该能力将会不断建设，敬请期待。
97 | 


--------------------------------------------------------------------------------
/docs/zh_CN/DataClean/code_image_filtering.md:
--------------------------------------------------------------------------------
 1 | # 二维码图像过滤
 2 | 
 3 | ------
 4 | 
 5 | 
 6 | ## 目录
 7 | 
 8 | - [1. 背景](#1)
 9 | - [2. 模型](#2)
10 |   - [2.1 训练](#2.1)
11 |   - [2.2 部署](#2.2)
12 | - [3. 视觉任务表现](#3)
13 |   - [3.1 验证集指标](#3.1)
14 |   - [3.2 ImageNet 数据指标](#3.2)
15 |   - [3.3 COCO 数据指标](#3.3)
16 | - [4. 总结](#4)
17 | 
18 | <a name="1"></a>
19 | 
20 | ## 1. 背景
21 | 
22 | 近些年计算机视觉领域迎来了蓬勃的发展，相关的算法也在各种业务上落地，产生了巨大的价值。在落地过程中，数据整理、模型训练、部署后处理等开发环节都至关重要，而在这些重要的环节中，数据的质量可谓是重中之重，其好坏直接决定了最终落地业务的性能。基于此，我们开源了数据清洗的工具，其可以在模型开发的任何阶段使用。在数据整理阶段，其可以通过这个工具筛选掉潜在的低质数据，在模型训练阶段，该工具可以成为数据预处理的一部分，在模型部署及后处理过程中，其可以通过后处理逻辑将低质图片过滤，间接提升模型精度，从而提升用户整体体验。本教程主要介绍数据清洗工具中二维码图像过滤的相关内容。该工具在视觉任务中的使用方式如下图所示：
23 | 
24 | <div align="center">
25 |   <img src="https://user-images.githubusercontent.com/45199522/200730521-8c3c1bc0-fb77-4416-8b8d-b435c96cb4c1.png" width = "800"/>
26 | </div>
27 | 
28 | 为了使用户方便地使用该工具，我们尽可能提升了该工具的易用性，模型方面，我们使用了非常轻量的模型，在CPU上也可以很快的运行。在使用方面，我们提供了Whl包的使用方式，用户可以方便地集成到Python代码或者shell中。
29 | 
30 | <a name="2"></a>
31 | 
32 | ## 2. 模型
33 | 
34 | 在方案选型上，我们选择了图像分类的方案，其容易训练、部署，模型大小可以做到非常小，且效果也更容易保证。
35 | 
36 | <a name="2.1"></a>
37 | #### 2.1 训练
38 | 
39 | 此处我们希望得到一个高精度且轻量级的二分类模型，所以我们采用了PaddleClas的PULC方案，该任务的训练详情可以参考[PULC有无二维码图像分类](@clas_pulc)。
40 | 
41 | <a name="2.2"></a>
42 | #### 2.2 部署
43 | 
44 | 我们可以很方便地使用 EasyData 的 whl 包进行部署，关于其部署方法，可以参考[二维码图像过滤部署](quick_start.md#232)。
45 | 
46 | <a name="3"></a>
47 | 
48 | ## 3 视觉任务表现
49 | 
50 | <a name="3.1"></a>
51 | #### 3.1 验证集指标
52 | 
53 | 在图像有无二维码训练中，我们的评估数据是来自真实采集的数据，其含有2249条含二维码的图像数据，2554条不含二维码的图像数据。其中，二维码包含二维码、条形码、小程序码。我们对训练好的模型的评估，指标如下：
54 | 
55 | | Accuracy | 模型存储 | 推理时间 |
56 | | :--: | :--: | :--: |
57 | |   94.9%   |   7M   | 2.1ms |
58 | 
59 | <a name="3.2"></a>
60 | #### 3.2 ImageNet 数据指标
61 | 
62 | 我们将二维码过滤模型同样在ImageNet数据上进行了评测，我们将ImageNet val数据（50000条）和[3.1](#3.1)中的二维码数据（2249条）混合起来一起评测。在真实使用场景中，往往在卡特定误报率（FPR）的情况下希望召回率（TPR）越高越好。在该例子中，我们希望ImageNet val的数据尽量被判定为非二维码图片，在此基础上，尽可能将二维码的图像召回。我们评测了不同阈值下的FPR和TPR，相关指标如下：
63 | 
64 | 
65 | | 阈值 | FPR| TPR |
66 | | :--: | :--: | :--: |
67 | | 0.5 | 2.3% | 92.7% |
68 | | 0.7 | 1.2% | 90.9% |
69 | | 0.8 | 0.84% | 90.0%|
70 | | 0.9 | 0.48% | 88.0% |
71 | | 0.95 | 0.31% | 86.1% |
72 | 
73 | 
74 | **备注：**
75 | 
76 | - 该表格中的每一行的含义为在卡阈值的基础上，FPR和TPR的情况。如最后一行表示：将输出的score大于0.95判定为二维码图像，小于0.95判定为非二维码图像，在此基础上，FPR为0.31%，即非二维码的图像（ImageNet val）有0.31%被判定为了二维码的图像；TPR为86.1%，即二维码的图像有86.1%被成功识别出。在具体的场景中，阈值需要根据实际情况调整。
77 | 
78 | - 我们对ImageNet val的图片做了假设，默认其均为非二维码图像，事实上会有部分二维码，所以在真实场景中，该TPR指标会更高。
79 | 
80 | <a name="3.3"></a>
81 | #### 3.3 COCO数据指标
82 | 
83 | 我们将二维码过滤模型同样在COCO数据上进行了评测，我们将COCO val数据（5000条）和[3.1](#3.1)中的二维码数据（2249条）混合起来一起评测。在真实使用场景中，往往在卡特定误报率（FPR）的情况下希望召回率（TPR）越高越好。在该例子中，我们希望COCO val的数据尽量被判定为非二维码图片，在此基础上，尽可能将二维码图像召回。我们评测了不同阈值下的FPR和TPR，相关指标如下：
84 | 
85 | 
86 | | 阈值 | FPR| TPR |
87 | | :--: | :--: | :--: |
88 | | 0.5 | 2.7% | 92.7% |
89 | | 0.7 | 1.4% | 90.9% |
90 | | 0.8 | 1.0% | 90.0%|
91 | | 0.9 | 0.42% | 88.0% |
92 | | 0.95 | 0.2% | 86.1% |
93 | 
94 | <a name="4"></a>
95 | 
96 | ## 4 总结
97 | 
98 | 此文档介绍了 EasyData 数据质量提升模块中二维码图像过滤相关的模型介绍、模型训练、模型使用方法以及模型的指标。该能力将会不断建设，敬请期待。
99 | 


--------------------------------------------------------------------------------
/docs/zh_CN/DataClean/image_orientation_correction.md:
--------------------------------------------------------------------------------
  1 | # 图像方向矫正
  2 | 
  3 | ------
  4 | 
  5 | 
  6 | ## 目录
  7 | 
  8 | 
  9 | - [1. 背景](#1)
 10 | - [2. 模型](#2)
 11 |     - [2.1 训练](#2.1)
 12 |     - [2.2 部署](#2.2)
 13 | - [3. 视觉任务表现](#3)
 14 |     - [3.1 图像分类](#3.1)
 15 |     - [3.2 目标检测](#3.2)
 16 |     - [3.3 OCR端到端识别](#3.3)
 17 | 
 18 | <a name="1"></a>
 19 | ## 1. 背景
 20 | 
 21 | 深度学习在计算机视觉领域已经取得了较大的发展，在视觉场景的诸多任务上已有广泛应用，我们针对模型部署中面临的数据质量问题，提出了 DataClean 解决方案。
 22 | 
 23 | 深度学习为数据驱动，模型效果极大地依赖于训练数据，而通常情况下训练数据集是针对特定问题进行设计的，这样训练得到的模型通常针对预设问题具有较好效果，但是在真实的部署场景中往往存在大量的低质量数据，这些数据极大地影响了模型的预测效果，比如图像方向问题。目前大部分视觉任务数据集均预设图像方向为正，这一点在大多真实场景中无法确保，因此其他方向的数据在预测时往往得不到预期效果，如果强行将图像方向考虑到训练数据集设计中，则又会对模型预测效果带来负面影响，或是需要更大体量的模型影响推理速度，针对上述问题，DataClean 提供了图像方向矫正模型，该模型能够对图像方向进行分类预测，同时该模型极为轻量，对系统推理速度影响较小。图像方向矫正模型的使用示例如下图所示：
 24 | 
 25 | <div align="center">
 26 |   <img src="https://user-images.githubusercontent.com/45199522/200730301-9c6b772a-6aed-40b6-a67b-c573c91d856b.png" width = "800"/>
 27 | </div>
 28 | 
 29 | <a name="2"></a>
 30 | ## 2. 模型介绍
 31 | 
 32 | 在本小节我们会介绍图像方向矫正模型从训练到部署的全部流程。
 33 | 
 34 | <a name="2.1"></a>
 35 | #### 2.1 训练
 36 | 
 37 | 图像方向矫正模型本质为图像分类任务，同时考虑到模型需要足够轻量，因此我们采用了 PaddleClas 的 PULC 方案，训练详情可以参考[PULC图像方向分类](https://github.com/PaddlePaddle/PaddleClas/blob/develop/docs/zh_CN/models/PULC/PULC_image_orientation.md)。
 38 | 
 39 | <a name="2.2"></a>
 40 | #### 2.2 部署
 41 | 
 42 | 对于图像方向矫正模型的使用方法，我们提供了 EasyData whl 的方式，支持命令行直接预测以及在 Python 脚本中调用的方式，具体可以参考[图像方向矫正模型部署](quick_start.md#211)。
 43 | 
 44 | <a name="3"></a>
 45 | ## 3. 视觉任务表现
 46 | 
 47 | 图像方向矫正模型对下游任务改善显著，我们在图像分类、目标检测、OCR等任务上进行了评估，具体指标如下：
 48 | 
 49 | <a name="3.1"></a>
 50 | ### 3.1 图像分类
 51 | 
 52 | 对于图像分类任务，我们基于 ImageNet1k 数据集进行了评测，模型使用 [PP-LCNet_x1_0](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.5/docs/zh_CN/models/ImageNet1k/PP-LCNet.md)，具体效果如下表所示：
 53 | 
 54 | | 是否进行方向矫正 | 原始数据评测指标 | 多方向数据评测指标 |
 55 | | :--            | :--:   | :--:    |
 56 | | ✘              | 71.31% | 53.91%  |
 57 | | ✔（阈值0）     | 50.89% | 70.22%  |
 58 | | ✔（阈值0.80）  | 71.26% | 68.39%  |
 59 | | ✔（阈值0.90）  | 71.31% | 66.16%  |
 60 | | ✔（阈值0.95）  | 71.31% | 53.96%  |
 61 | 
 62 | 在上表中：
 63 | * 其中指标为 Top-1 Acc；
 64 | * 原始数据为 ImageNet1k 数据集，多方向数据为基于 ImageNet1k 数据进行方向扩充后的数据集，扩充方法为：对原始图像数据分别逆时针旋转0°、90°、180°、270°得到；
 65 | * ✘ 表示不使用方向矫正模型，✔ 表示使用方向矫正模型并取不同的阈值；
 66 | 
 67 | <a name="3.2"></a>
 68 | ### 3.2 目标检测
 69 | 
 70 | 对于目标检测任务，我们基于 COCO 数据集进行了评测，模型使用 PPYOLOE+L，具体效果如下表所示：
 71 | 
 72 | | 是否进行方向矫正 | 原始数据评测指标 | 多方向数据评测指标 |
 73 | | :--            | :--:     | :--:    |
 74 | | ✘              | 52.9%    | 31.0%   |
 75 | | ✔（阈值0）     | 52.4%    | 52.1%   |
 76 | | ✔（阈值0.80）  | 52.8%    | 51.2%   |
 77 | | ✔（阈值0.90）  | 52.9%    | 49.6%   |
 78 | | ✔（阈值0.95）  | 52.9%    | 31.1%   |
 79 | 
 80 | 在上表中：
 81 | * 其中指标为 AP<sup>0.5:0.95；
 82 | * 原始数据为 COCO 数据集，多方向数据为基于 COCO 数据进行方向扩充后的数据集，扩充方法为：对原始图像数据分别逆时针旋转 0°、90°、180°、270°，同时修改对应的标签值；
 83 | * ✘ 表示不使用方向矫正模型，✔ 表示使用方向矫正模型并取不同的阈值；
 84 | 
 85 | <a name="3.3"></a>
 86 | ### 3.3 OCR 端到端识别
 87 | 
 88 | 对于 OCR 任务，我们基于 PP-OCRv3 端到端文字检测识别系统进行了实验，具体效果如下表所示：
 89 | 
 90 | | 是否进行方向矫正 | 原始数据评测指标 | 多方向数据评测指标 |
 91 | | :--            | :--:      | :--:     |
 92 | | ✘              | 61.89%    | 35.89%   |
 93 | | ✔（阈值0）     | 57.92%    | 57.98%   |
 94 | | ✔（阈值0.80）  | 61.41%    | 56.49%   |
 95 | | ✔（阈值0.90）  | 61.85%    | 51.17%   |
 96 | | ✔（阈值0.95）  | 61.89%    | 42.19%   |
 97 | 
 98 | 在上表中：
 99 | * 其中指标为 H-mean；
100 | * 原始数据为 PP-OCRv3 端到端评测数据集，多方向数据为基于原始数据集进行方向扩充后的数据集，扩充方法为：对原始图像数据分别逆时针旋转0°、90°、180°、270°，同时修改对应的标签值；
101 | * ✘ 表示不使用方向矫正模型，✔ 表示使用方向矫正模型并取不同的阈值；
102 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/docs/zh_CN/datasets/.gitkeep


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/Annotation_tool/EISeg.md:
--------------------------------------------------------------------------------
 1 | 简体中文 | [English](README_EN.md)
 2 | <div align="center">
 3 | 
 4 | <p align="center">
 5 |   <img src="https://user-images.githubusercontent.com/35907364/179460858-7dfb19b1-cabf-4f8a-9e81-eb15b6cc7d5f.png" align="middle" alt="LOGO" width = "500" />
 6 | </p>
 7 | 
 8 | **An Efficient Interactive Segmentation Tool based on [PaddlePaddle](https://github.com/paddlepaddle/paddle).**
 9 | 
10 | [![Python 3.6](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/) [![PaddlePaddle 2.2](https://img.shields.io/badge/paddlepaddle-2.2-blue.svg)](https://www.python.org/downloads/release/python-360/) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![Downloads](https://pepy.tech/badge/eiseg)](https://pepy.tech/project/eiseg)
11 | 
12 | </div>
13 | 
14 | <div align="center">
15 | <table>
16 |     <tr>
17 |         <td><img src="https://user-images.githubusercontent.com/71769312/179209324-eb074e65-4a32-4568-a1d3-7680331dbf22.gif"></td>
18 |         <td><img src="https://user-images.githubusercontent.com/71769312/179209332-e3bcb1f0-d4d9-44e1-8b2a-8d7fac8996d4.gif"></td>
19 |         <td><img src="https://user-images.githubusercontent.com/71769312/179209312-0febfe78-810d-49b2-9169-eb15f0523af7.gif"></td>
20 |         <td><img src="https://user-images.githubusercontent.com/71769312/179209340-d04a0cec-d9a7-4962-93f1-b4953c6c9f39.gif"></td>
21 |     <tr>
22 |     <tr>
23 |         <td align="center">Generic segmentation</td>
24 |         <td align="center">Human segmentation</td>
25 |         <td align="center">RS building segmentation</td>
26 |         <td align="center">Medical segmentation</td>
27 |     <tr>
28 |     <tr>
29 |         <td><img src="https://user-images.githubusercontent.com/71769312/179209338-45b06ded-8142-4385-9486-33c328d591cb.gif"></td>
30 |         <td><img src="https://user-images.githubusercontent.com/71769312/179209328-87174780-6c6f-4b53-b2a2-90d289ac1c8a.gif"></td>
31 |         <td colspan="2"><img src="https://user-images.githubusercontent.com/71769312/179209342-5b75e61e-d9cf-4702-ba3e-971f47a10f5f.gif"></td>
32 |     <tr>
33 |     <tr>
34 |         <td align="center">Industrial quality inspection</td>
35 |         <td align="center">Generic video segmentation</td>
36 |         <td align="center" colspan="2"> 3D medical segmentation</td>
37 |     <tr>
38 | </table>
39 | </div>
40 | 
41 | ##  最新动态
42 | * [2022-07-20] :fire: EISeg 1.0版本发布！
43 |   - 新增用于通用场景视频交互式分割能力，以EISeg交互式分割模型及[MiVOS](https://github.com/hkchengrex/MiVOS)算法为基础，全面提升视频标注体验。详情使用请参考[视频标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/video.md)。
44 |   - 新增用于腹腔多器官及CT椎骨数据3D分割能力，并提供3D可视化工具，给予医疗领域3D标注新的思路。详情使用请参考[3D标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/video.md)。
45 | 
46 | ##  简介
47 | 
48 | EISeg(Efficient Interactive Segmentation)基于飞桨开发的一个高效智能的交互式分割标注软件。它涵盖了通用、人像、遥感、医疗、视频等不同方向的高质量交互式分割模型。 另外，将EISeg获取到的标注应用到PaddleSeg提供的其他分割模型进行训练，便可得到定制化场景的高精度模型，打通分割任务从数据标注到模型训练及预测的全流程。
49 | 
50 | ![4a9ed-a91y1](https://user-images.githubusercontent.com/71769312/141130688-e1529c27-aba8-4bf7-aad8-dda49808c5c7.gif)
51 | 
52 | ##  特性
53 |   * 高效的半自动标注工具，已上线多个Top标注平台
54 |   * 覆盖遥感、医疗、视频、3D医疗等众多垂类场景
55 |   * 多平台兼容，简单易用，支持多类别标签管理
56 | 
57 | 
58 | ##  使用教程
59 | * [安装说明](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/install.md)
60 | * [图像标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/image.md)
61 | * [视频及3D医疗标注](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/video.md)
62 | * [遥感特色功能](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/remote_sensing.md)
63 | * [医疗特色功能](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/medical.md)
64 | * [数据处理脚本文档](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/EISeg/docs/tools.md)
65 | 
66 | 
67 | ##  更新历史
68 | - 2022.07.20  **1.0.0**：【1】新增交互式视频分割功能【2】新增腹腔多器官3D标注模型【3】新增CT椎骨3D标注模型。
69 | - 2022.04.10  **0.5.0**：【1】新增chest_xray模型；【2】新增MRSpineSeg模型；【3】新增铝板质检标注模型；【4】修复保存shp时可能坐标出错。
70 | - 2021.11.16  **0.4.0**：【1】将动态图预测转换成静态图预测，单次点击速度提升十倍；【2】新增遥感图像标注功能，支持多光谱数据通道的选择；【3】支持大尺幅数据的切片（多宫格）处理；【4】新增医疗图像标注功能，支持读取dicom的数据格式，支持选择窗宽和窗位。
71 | - 2021.09.16  **0.3.0**：【1】初步完成多边形编辑功能，支持对交互标注的结果进行编辑；【2】支持中/英界面；【3】支持保存为灰度/伪彩色标签和COCO格式；【4】界面拖动更加灵活；【5】标签栏可拖动，生成mask的覆盖顺序由上往下覆盖。
72 | - 2021.07.07  **0.2.0**：新增contrib：EISeg，可实现人像和通用图像的快速交互式标注。
73 | 
74 | 
75 | 
76 | 
77 | ## 贡献者
78 | 
79 | - 感谢[Zhiliang Yu](https://github.com/yzl19940819), [Yizhou Chen](https://github.com/geoyee), [Lin Han](https://github.com/linhandev), [Jinrui Ding](https://github.com/Thudjr), [Yiakwy](https://github.com/yiakwy), [GT](https://github.com/GT-ZhangAcer), [Youssef Harby](https://github.com/Youssef-Harby), [Nick Nie](https://github.com/niecongchong) 等开发者及[RITM](https://github.com/saic-vul/ritm_interactive_segmentation)、[MiVOS](https://github.com/hkchengrex/MiVOS) 等算法支持。
80 | - 感谢[LabelMe](https://github.com/wkentaro/labelme)和[LabelImg](https://github.com/tzutalin/labelImg)的标签设计。
81 | - 感谢[Weibin Liao](https://github.com/MrBlankness)提供的ResNet50_DeeplabV3+预训练模型。
82 | - 感谢[Junjie Guo](https://github.com/Guojunjie08)及[Jiajun Feng](https://github.com/richarddddd198)在椎骨模型上提供的技术支持。
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/Annotation_tool/EIVideo.md:
--------------------------------------------------------------------------------
  1 | # EIVideo - 交互式智能视频标注工具
  2 | 
  3 | [![Downloads](https://static.pepy.tech/personalized-badge/eivideo?period=total&units=international_system&left_color=grey&right_color=orange&left_text=EIVideo%20User)](https://pepy.tech/project/eivideo)
  4 | [![Downloads](https://static.pepy.tech/personalized-badge/qeivideo?period=total&units=international_system&left_color=grey&right_color=orange&left_text=QEIVideo%20User)](https://pepy.tech/project/qeivideo)
  5 | ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/QPT-Family/EIVideo?include_prereleases)
  6 | ![GitHub forks](https://img.shields.io/github/forks/QPT-Family/EIVideo)
  7 | ![GitHub Repo stars](https://img.shields.io/github/stars/QPT-Family/EIVideo)
  8 | ![GitHub](https://img.shields.io/github/license/QPT-Family/EIVideo)
  9 | ![](https://img.shields.io/badge/%E6%B7%B1%E5%BA%A6%E9%80%82%E9%85%8D->Win7-9cf)
 10 | 
 11 | ---
 12 | 
 13 | 
 14 | <div align="center">
 15 | <img width="600" alt="图片" src="https://user-images.githubusercontent.com/46156734/148925774-a04b641c-6a71-43ed-a7c0-d4b66e8d6e8a.png">
 16 | </div>
 17 |   
 18 | EIVideo，基于百度飞桨MA-Net交互式视频分割模型打造的交互式**智能视频**标注工具箱，只需简单标注几帧，即可完成全视频标注，若自动标注结果未达要求还可通过多次和视频交互而不断提升视频分割质量，直至对分割质量满意。  
 19 | 
 20 | 戳 -> 了解相关[技术文章&模型原理](等待微信公众号)
 21 | 
 22 | <div align="center">
 23 | <img width="300" alt="图片" src="https://ai-studio-static-online.cdn.bcebos.com/f792bac0dd3b4f44ade7d744b58e908e2a85ed8718b541cfb6b2ce9fc8ad4374">
 24 | </div>
 25 | 
 26 | > 为了更好的解放双手，我们还提供了图形化界面工具QEIVideo，通过它我们可以不使用繁杂的命令方式来完成视频的智能标注工作。
 27 | 
 28 | ---
 29 | 
 30 | ### README目录
 31 | 
 32 | - [EAP - The Early Access Program 早期访问计划](#eap---the-early-access-program-早期访问计划)
 33 | - [使用方式](#使用方式)
 34 |   - [安装&运行](#安装运行)
 35 |     - [QPT包 - 适合无Python基础用户](#qpt包---适合无python基础用户)
 36 |     - [标准Python包 - 适合普通Python开发者](#标准python包---适合普通python开发者)
 37 |     - [开发版本 - 适合高阶开发者进行开发/社区贡献](#开发版本---适合高阶开发者进行开发社区贡献)
 38 | - [(Q)EIVideo产品规划安排](#qeivideo产品规划安排)
 39 | - [开源协议](#开源协议)
 40 | 
 41 | ---
 42 | 
 43 | ### EAP - The Early Access Program 早期访问计划
 44 | 
 45 | > Warning 当前图形化界面QEIVideo处于**极其初阶**的...建设阶段，并不能保证程序稳定性。
 46 | 
 47 | <div align="center"> <img width="100" alt="图片" src="https://user-images.githubusercontent.com/46156734/148927601-791362c0-0286-4fb9-b9d1-c193f7485de1.png"> </div>
 48 | 
 49 | 当您选择使用QEIVideo作为图形化界面时，即可视为同意使用“可能会存在大量体验不佳”的EAP产品。
 50 | 
 51 | 同样，您可选择借助基于[PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) 实现的
 52 | 交互式视频标注模型[EIVideo](https://github.com/QPT-Family/EIVideo/EIVideo) 进行二次开发，在此之上也可完成您需要的自定义图形化界面，后续也将提供二次开发指南。
 53 | 
 54 | <div align="center"> <img width="100" alt="图片" src="https://user-images.githubusercontent.com/46156734/148928046-b1490080-52f0-4a15-b7ff-11d54b135039.png"> </div>
 55 | 
 56 | 
 57 | > 如果您愿意参与到EIVideo或QEIVideo的建设中来，欢迎您与PMC取得联系 -> WX:GT_ZhangAcer  
 58 | 
 59 | ## 使用方式
 60 | ### 安装&运行
 61 | #### QPT包 - 适合无Python基础用户
 62 | 自动化配置相关Python环境，但仅支持Windows7/10/11操作系统，且不对盗版Windows7做任何适配。  
 63 | 下载地址：暂未上传
 64 | > 自动化部署工具由[QPT - 自动封装工具](https://github.com/QPT-Family/QPT) 支持  
 65 | 
 66 | #### 标准Python包 - 适合普通Python开发者
 67 | * 国际方式：
 68 |   ```shell
 69 |   python -m pip install eivideo
 70 |   python qeivideo
 71 |   ```
 72 | * 国内推荐：
 73 |   ```shell
 74 |   python -m pip install eivideo -i https://mirrors.bfsu.edu.cn/pypi/web/simple
 75 |   python qeivideo
 76 |   ```
 77 | > 上述命令仅适用于常规情况，若您安装了多个Python或修改了相关开发工具与配置，请自行修改相关命令使其符合您的开发环境。
 78 | 
 79 | #### 开发版本 - 适合高阶开发者进行开发/社区贡献
 80 | 
 81 | * 国际方式：
 82 |   ```shell
 83 |   git clone https://github.com/QPT-Family/EIVideo.git
 84 |   python -m pip install -r requirements.txt
 85 |   ```
 86 | * 国内推荐：
 87 |   ```shell
 88 |   # 请勿用于Push！！！
 89 |   git clone https://hub.fastgit.org/QPT-Family/EIVideo.git
 90 |   python -m pip install -r requirements.txt -i https://mirrors.bfsu.edu.cn/pypi/web/simple
 91 |   ```
 92 | * 运行程序
 93 |   ```shell
 94 |   # 进入工作目录
 95 |   cd 此处填写EIVideo所在的目录的绝对路径，且该目录下拥有EIVideo与QEIVideo两文件夹。
 96 |   # 运行
 97 |   python QEIVideo/start.py
 98 |   
 99 |   # 如运行时无法找到对应包，可选择下述方式添加环境变量来调整索引次序后执行python
100 |   # Windows
101 |   set PYTHONPATH=$pwd:$PYTHONPATH
102 |   # Linux
103 |   export PYTHONPATH=$pwd:$PYTHONPATH
104 |   ```
105 | 
106 | > 上述命令仅适用于常规情况，若您安装了多个Python或修改了相关开发工具与配置，请自行修改相关命令使其符合您的开发环境。
107 | 
108 | ## (Q)EIVideo产品规划安排  
109 | > 由于QEIVideo由飞桨开源社区学生爱好者构成，所以在项目的产出过程中将会以学习为主进行开源贡献，如您原因与我们一同建设，我们也将非常欢迎~
110 | <div align="center"> <img width="100" alt="图片" src="https://user-images.githubusercontent.com/46156734/148928475-b5b340b7-241d-4ddc-8155-70d98c6384a9.png"> </div>
111 | 
112 | - [x] EIVideo与Demo版QEIVideo发布0.1.0Alpha版本
113 | - [ ] 完善QEIVideo，丰富基础标注功能，于Q1升级至1.0Alpha版本
114 | - [ ] 回归QEIVideo稳定性，于Q2完成1.0正式版本发版
115 | - [ ] 增加视频目标检测、分类任务的交互式标注功能。
116 | 
117 | ### 开源协议
118 | 本项目使用GNU LESSER GENERAL PUBLIC LICENSE(LGPL)开源协议。  
119 | > 因所使用的模型与数据集等原因，本项目中任一代码、参数均不可直接进行商用，如需商用请与我们取得联系。
120 | 
121 | ### 引用来源
122 | 1. EIVideo模型以及相关源码、论文与项目 - [PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo)
123 | 2. 部分表情包来源 - [甘城なつき](https://www.pixiv.net/users/3036679)
124 | 
125 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/Annotation_tool/PaddleLabel.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | <div align="center">
 4 | 
 5 | <p align="center">
 6 |   <img src="https://user-images.githubusercontent.com/35907364/182084617-ea94f744-3a34-4193-98fe-5d6869a118fc.png" align="middle" alt="LOGO" width = "500" />
 7 | </p>
 8 | 
 9 | **飞桨智能标注，让标注快人一步**
10 | 
11 | [![Python](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/) ![PyPI](https://img.shields.io/pypi/v/paddlelabel?color=blue) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE) [![Start](https://img.shields.io/github/stars/PaddleCV-SIG/PaddleLabel?color=orange)]() [![Fork](https://img.shields.io/github/forks/PaddleCV-SIG/PaddleLabel?color=orange)]() ![PyPI - Downloads](https://img.shields.io/pypi/dm/paddlelabel?color=orange) [![OS](https://img.shields.io/badge/os-linux%2C%20windows%2C%20macos-green.svg)]()
12 | 
13 | </div>
14 | 
15 | ## 最新动态
16 | 
17 | - 【2022-08-18】 :fire: PaddleLabel 0.1 版本发布！
18 |   - 【分类】支持单分类与多分类标注及标签的导入导出。简单灵活实现自定义数据集分类标注任务并导出供[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)进行训练。
19 |   - 【检测】支持检测框标注及标签的导入导出。快速上手生成自己的检测数据集并应用到[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)。
20 |   - 【分割】支持多边形、笔刷及交互式等多种标注方式，支持标注语义分割与实例分割两种场景。多种分割标注方式可灵活选择，方便将导出数据应用在[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)获取个性化定制模型。
21 | 
22 | ## 简介
23 | 
24 | PaddleLabel 是基于飞桨 PaddlePaddle 各个套件的特性提供的配套标注工具。它涵盖分类、检测、分割三种常见的计算机视觉任务的标注能力，具有手动标注和交互式标注相结合的能力。用户可以使用 PaddleLabel 方便快捷的标注自定义数据集并将导出数据用于飞桨提供的其他套件的训练预测流程中。
25 | 整个 PaddleLabel 包括三部分，本项目包含 PaddleLabel 的后端实现。 [PaddleLabel-Frontend](https://github.com/PaddleCV-SIG/PaddleLabel-Frontend)是基于 React 和 Ant Design 构建的 PaddleLabel 前端，[PaddleLabel-ML](https://github.com/PaddleCV-SIG/PaddleLabel-ML)是基于 PaddlePaddle 的自动和交互式标注的机器学习后端。
26 | 
27 | ![demo720](https://user-images.githubusercontent.com/71769312/185099439-3230cf80-798d-4a81-bcae-b88bcb714daa.gif)
28 | 
29 | ## 特性
30 | 
31 | - **简单** 手动标注能力直观易操作，方便用户快速上手。
32 | - **高效** 支持交互式分割功能，分割精度及效率提升显著。
33 | - **灵活** 分类支持单分类和多分类的标注，分割支持多边形、笔刷及交互式分割等多种功能，方便用户根据场景需求切换标注方式。
34 | - **全流程** 与其他飞桨套件密切配合，方便用户完成数据标注、模型训练、模型导出等全流程操作。
35 | 
36 | 
37 | 
38 | ## 使用教程
39 | 
40 | **文档**
41 | 
42 | - [安装指南](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/install.md)
43 | - [快速开始](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/quick_start.md)
44 | 
45 | **进行标注**
46 | 
47 | - [图像分类](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/classification.md)
48 | - [目标检测](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/detection.md)
49 | - [语义分割](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/semantic_segmentation.md)
50 | - [实例分割](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/project/instance_segmentation.md)
51 | 
52 | **训练教程**
53 | 
54 | - [如何用 PaddleClas 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdClas.md)
55 | - [如何用 PaddleDet 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdDet.md)
56 | - [如何使用 PaddleSeg 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdSeg.md)
57 | - [如何使用 PaddleX 进行训练](https://github.com/PaddleCV-SIG/PaddleLabel/blob/v0.1.0/doc/CN/training/PdLabel_PdX.md)
58 | 
59 | **AI Studio 项目**
60 | 
61 | - [花朵分类](https://aistudio.baidu.com/aistudio/projectdetail/4337003)
62 | - [道路标志检测](https://aistudio.baidu.com/aistudio/projectdetail/4349280)
63 | - [图像分割](https://aistudio.baidu.com/aistudio/projectdetail/4353528)
64 | - [如何使用 PaddleX 进行训练](https://aistudio.baidu.com/aistudio/projectdetail/4383953)
65 | 
66 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/Annotation_tool/Speech.md:
--------------------------------------------------------------------------------
 1 | 这里整理了常用的语音标注工具，欢迎各位小伙伴贡献工具!
 2 | 
 3 | 
 4 | ## 1. Praat
 5 | 
 6 | + 工具描述： 语音合成标注工具
 7 | + 工具地址： https://www.fon.hum.uva.nl/praat/
 8 | + 示意图：
 9 | 
10 | ![](https://user-images.githubusercontent.com/30135920/197728536-14cc083b-6f7a-40dd-b66a-a8a9fe56924f.png)
11 | 
12 | ## 2. label-studio
13 | 
14 | + 工具描述：多功能标注工具，可以用于语音识别，说话人识别等多种语音标注任务
15 | + 工具地址：https://labelstud.io/guide/index.html
16 | + 示意图
17 | 
18 | ![image](https://user-images.githubusercontent.com/30135920/198205186-f99026f9-32a9-4b17-8e9b-9af18c119f41.png)
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/Annotation_tool/doccano.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | ## Doccano
5 | - **下载地址**：https://github.com/doccano/doccano
6 | - **工具简介**：doccano是documment anotation的缩写，是一个开源的文本标注工具，我们可以用它为NLP任务的语料库进行打标。它支持情感分析，命名实体识别，文本摘要等任务：  
7 |      <img src="https://raw.githubusercontent.com/doccano/doccano/master/docs/images/demo/demo.gif"><br>
8 |     
9 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/datasets/3D.md:
--------------------------------------------------------------------------------
 1 | ## 通用3D数据集
 2 | 这里整理了常用3D方向数据集，持续更新中，欢迎各位小伙伴贡献数据集～
 3 | - [KITTI](#KITTI)
 4 | - [nuScenes](#nuScenes)
 5 | - [Waymo](#Waymo)
 6 | 
 7 | 
 8 | <a name="KITTI"></a>
 9 | ## 1、KITTI
10 | - **数据来源**：https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d
11 | - **数据简介**： KITTI数据集由卡尔斯鲁厄理工学院发布，用于评估自动驾驶场景3D目标检测等感知任务。数据采集真实道路场景，共包含3712个训练样本，3769个验证样本以及7518个测试样本。每个样本都包含lidar和camera两种模态的数据，train和validation都有标注数据，test没有标注数据。需要注意其标注的3d bbox坐标是在camera坐标系下进行的，且只标注了camera FOV内的目标,如下图：  
12 |      <img src="https://www.cvlibs.net/datasets/kitti/images/header_3dobject.jpg"><br>
13 |     
14 | - **下载地址**：https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d
15 | 
16 | 
17 | <a name="nuScenes"></a>
18 | ## 2、nuScenes
19 | - **数据来源**：https://www.nuscenes.org/nuscenesobj_benchmark=3d
20 | - **数据简介**： nuScenes数据集用于评估自动驾驶3D感知和规划任务，数据采集来自不同城市的1000个场景中，采集车上配备6个相机（CAM）、1个激光雷达（LIDAR）、5个毫米波雷达（RADAR）。采集的数据包括lidar和camera两种模态数据，共包含28k个训练样本，6k个验证样本，以及6k个测试样本，标注数据提供了物体的3d bbox坐标和类别信息：  
21 |      <img src="https://www.nuscenes.org/static/media/data.9ef46c59.png"><br>
22 |     
23 | - **下载地址**：https://www.nuscenes.org/nuscenes
24 | - **其他说明**：nuScenes的camera数据是360度环视相机拍摄，部分相机的FOV具有重叠，环视数据可用于BEV任务。
25 | 
26 | <a name="Waymo"></a>
27 | ## 3、Waymo
28 | - **数据来源**：https://waymo.com/open/data/perception/
29 | - **数据简介**： Waymo是谷歌Waymo无人驾驶公司在2020年发布的数据集，包含Mothion和Perception两大类，用于自动驾驶3D感知和预测任务。采集设备包括5个Lidar，5个Camera。Perception数据中训练集包含798个segment，每个segment包含约200个frame，总共约158361个样本，验证集包含202个segment，总共约40077个样本，每个样本都包含了lidar和camera模态数据。标注数据提供了物体的3d bbox坐标和物体类别信息，标注的坐标均为右手坐标系：  
30 |     
31 | - **下载地址**：https://waymo.com/open/data/perception/
32 | - **其他说明**：waymo数据更新多次版本，为了获取更准确的标注信息和应用其它任务，请下载v1.3.2及其之后的版本。
33 | 
34 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/datasets/Clas.md:
--------------------------------------------------------------------------------
 1 | # 图像分类任务数据集说明
 2 | 
 3 | 本文档将介绍常用的图像分类任务数据集格式，以及图像分类领域的常见数据集介绍。
 4 | 
 5 | ---
 6 | 
 7 | ## 目录
 8 | 
 9 | 
10 | - 图像分类任务常见数据集介绍
11 |     - [ImageNet1k](#1)
12 |     - [Flowers102](#2)
13 |     - [CIFAR10 / CIFAR100](#3)
14 |     - [MNIST](#4)
15 |     - [NUS-WIDE](#5)
16 | 
17 | 
18 | <a name="1"></a>
19 | ## 图像分类任务常见数据集介绍
20 | 
21 | 这里整理了常用的图像分类任务数据集，持续更新中，欢迎各位小伙伴补充完善～
22 | 
23 | <a name="1"></a>
24 | ### 1、ImageNet1k
25 | 
26 | [ImageNet](https://image-net.org/)项目是一个大型视觉数据库，用于视觉目标识别研究任务，该项目已手动标注了 1400 多万张图像。ImageNet-1k 是 ImageNet 数据集的子集，其包含 1000 个类别。训练集包含 1281167 个图像数据，验证集包含 50000 个图像数据。2010 年以来，ImageNet 项目每年举办一次图像分类竞赛，即 ImageNet 大规模视觉识别挑战赛（ILSVRC）。挑战赛使用的数据集即为 ImageNet-1k。到目前为止，ImageNet-1k 已经成为计算机视觉领域发展的最重要的数据集之一，其促进了整个计算机视觉的发展，很多计算机视觉下游任务的初始化模型都是基于该数据集训练得到的。
27 | 
28 | <img src="https://image-net.org/static_files/figures/ILSVRC2012_val_00042692.png">
29 | 
30 | 数据集 | 训练集大小 | 测试集大小 | 类别数 | 备注|
31 | :------:|:---------------:|:---------------------:|:-----------:|:-----------:
32 | [ImageNet1k](http://www.image-net.org/challenges/LSVRC/2012/)|1.2M| 50k | 1000 |
33 | 
34 | <a name="2"></a>
35 | ### 2 Flowers102
36 | 数据简介：一个 102 个类别的数据集，由 102 个花卉类别组成。被选为英国常见的花。每个类包含 40 到 258 张图像。可以在此类别统计页面上找到类别的详细信息和每个类别的图像数量。
37 | 
38 | <img src="https://www.robots.ox.ac.uk/~vgg/data/flowers/102/T_shapeiso.jpg">
39 | 
40 | 数据集 | 训练集大小 | 测试集大小 | 类别数 | 备注|
41 | :------:|:---------------:|:---------------------:|:-----------:|:-----------:
42 | [flowers102](https://www.robots.ox.ac.uk/~vgg/data/flowers/102/)|1k | 6k | 102 |
43 | 
44 | 将下载的数据解压后，可以看到以下目录
45 | 
46 | ```shell
47 | jpg/
48 | setid.mat
49 | imagelabels.mat
50 | ```
51 | 
52 | 
53 | <a name="3"></a>
54 | ### 3 CIFAR10 / CIFAR100
55 | 
56 | CIFAR-10 数据集由 10 个类的 60000 个彩色图像组成，图像分辨率为 32x32，每个类有 6000 个图像，其中训练集 5000 张，验证集 1000 张，10 个不同的类代表飞机、汽车、鸟类、猫、鹿、狗、青蛙、马、轮船和卡车。CIFAR-100 数据集是 CIFAR-10 的扩展，由 100 个类的 60000 个彩色图像组成，图像分辨率为 32x32，每个类有 600 个图像，其中训练集 500 张，验证集 100 张。
57 | <img src="http://corochann.com/wp-content/uploads/2021/09/cifar10_plot.png">
58 | 
59 | 数据集地址：http://www.cs.toronto.edu/~kriz/cifar.html
60 | 
61 | <a name="4"></a>
62 | ### 4 MNIST
63 | 
64 | 数据简介：MMNIST 是一个非常有名的手写体数字识别数据集，在很多资料中，这个数据集都会被用作深度学习的入门样例。其包含 60000 张图片数据，50000 张作为训练集，10000 张作为验证集，每张图片的大小为 28 * 28。
65 | <img src="https://ai-studio-static-online.cdn.bcebos.com/959776f4cd9c4b77b380c7d29f59df1cf47be626cd8b4bd1ac1af2a7d8e3c1cf">
66 | 数据集地址：http://yann.lecun.com/exdb/mnist/
67 | 
68 | <a name="5"></a>
69 | ### 5 NUS-WIDE
70 | 
71 | NUS-WIDE 是一个多分类数据集。该数据集包含 269648 张图片, 81 个类别，每张图片被标记为该 81 个类别中的某一类或某几类。
72 | 
73 | 数据集地址：https://lms.comp.nus.edu.sg/wp-content/uploads/2019/research/nuswide/NUS-WIDE.html
74 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/datasets/Detection.md:
--------------------------------------------------------------------------------
  1 | ## 通用检测数据集
  2 | 这里整理了常用检测数据集，持续更新中，欢迎各位小伙伴贡献数据集～
  3 | - [COCO](#COCO)
  4 | - [VOC](#VOC)
  5 | - [SCUT_FIR行人检测数据集](#SCUT_FIR行人检测数据集)
  6 | - [JHU-CROWD++](#JHU-CROWD++)
  7 | - [CIHP人体解析数据集](#CIHP人体解析数据集)
  8 | - [AHU-Crowd人群数据集](#AHU-Crowd人群数据集)
  9 | - [AudioVisual人群计数](#AudioVisual人群计数)
 10 | - [UCF-CC-50](#UCF-CC-50)
 11 | - [北京BRT数据集](#北京BRT数据集)
 12 | 
 13 | <a name="COCO"></a>
 14 | ## 1、COCO
 15 | - **数据来源**：https://cocodataset.org/#home
 16 | - **数据简介**：COCO数据是COCO 比赛使用的数据。同样的，COCO比赛数也包含多个比赛任务，其标注文件中包含多个任务的标注内容。 COCO数据集指的是COCO比赛使用的数据。用户自定义的COCO数据，json文件中的一些字段，请根据实际情况选择是否标注或是否使用默认值。
 17 | <div align="center">
 18 | <img src="https://cocodataset.org/images/coco-examples.jpg" width="400"><br>
 19 | </div>
 20 | 
 21 | - **下载地址**：https://cocodataset.org/
 22 | 
 23 | 
 24 | <a name="VOC"></a>
 25 | ## 2、VOC
 26 | - **数据来源**：http://host.robots.ox.ac.uk/pascal/VOC/
 27 | - **数据简介**：VOC数据是Pascal VOC 比赛使用的数据。Pascal VOC比赛不仅包含图像分类分类任务，还包含图像目标检测、图像分割等任务，其标注文件中包含多个任务的标注内容。 VOC数据集指的是Pascal VOC比赛使用的数据。用户自定义的VOC数据，xml文件中的非必须字段，请根据实际情况选择是否标注或是否使用默认值。），如下图所示：  
 28 | <div align="center">
 29 |      <img src="https://user-images.githubusercontent.com/48054808/173036043-68b90df7-e95e-4ada-96ae-20f52bc98d7c.png" width="400"><br>
 30 | </div>
 31 |   
 32 | - **下载地址**：http://host.robots.ox.ac.uk/pascal/VOC/
 33 | 
 34 | <a name="SCUT_FIR行人检测数据集"></a>
 35 | ## 3、SCUT_FIR行人检测数据集
 36 | - **数据来源**：https://sourl.cn/4VK3Bn
 37 | - **数据简介**：SCUT FIR Pedestrian Datasets 是一个大型远红外行人检测数据集。它由大约 11 小时长的图像序列（ 帧）组成，速度为 25 Hz，以低于 80 km/h 的速度在不同的交通场景中行驶。图像序列来自中国广州市中心、郊区、高速公路和校园 4 种场景下的 11 个路段。该数据集注释了 211,011 帧，总共 477,907 个边界框，围绕 7,659 个独特的行人，如下图所示：  
 38 | <div align="center">
 39 |      <img src="https://user-images.githubusercontent.com/59186797/202468903-9ff76e88-c8bd-4958-8d66-3fcda67fa853.jpg" width="400"><br>
 40 | </div>
 41 |   
 42 | - **下载地址**：https://sourl.cn/4VK3Bn
 43 | 
 44 | <a name="JHU-CROWD++"></a>
 45 | ## 4、JHU-CROWD++
 46 | - **数据来源**：https://sourl.cn/mgxHEY
 47 | - **数据简介**：包含 4,372 张图像和 151 万条注释的综合数据集。与现有数据集相比，所提出的数据集是在各种不同的场景和环境条件下收集的。此外，该数据集提供了相对丰富的注释集，如点、近似边界框、模糊级别等。如下图所示：  
 48 | <div align="center">
 49 |      <img src="https://user-images.githubusercontent.com/59186797/202469983-e4400a52-1bc5-48a4-8d5c-dc1d9f68e845.jpg" width="400"><br>
 50 | </div>
 51 | 
 52 | - **下载地址**：https://sourl.cn/mgxHEY
 53 | 
 54 | <a name="CIHP人体解析数据集"></a>
 55 | ## 5、CIHP人体解析数据集
 56 | - **数据来源**：https://sourl.cn/W3Tm2J
 57 | - **数据简介**：Crowd Instance-level Human Parsing (CIHP) 数据集包含 38,280 张多人图像，这些图像具有精细的注释、高外观可变性和复杂性。该数据集可用于人体部分分割任务。如下图所示：  
 58 | <div align="center">
 59 |      <img src="https://user-images.githubusercontent.com/59186797/202471400-ca7f23b5-64ce-4f37-95f3-ef6a4b17af54.jpg" width="400"><br>
 60 | </div>
 61 | 
 62 | - **下载地址**：https://sourl.cn/mgxHEY
 63 | 
 64 | <a name="AHU-Crowd人群数据集"></a>
 65 | ## 6、AHU-Crowd人群数据集
 66 | - **数据来源**：https://sourl.cn/XFJDCh
 67 | - **数据简介**：人群数据集是从各种来源获得的，例如 UCF 和数据驱动的人群数据集，以评估所提出的框架。序列多样，代表了朝圣、车站、马拉松、集会和体育场等各种场景中公共空间的密集人群。此外，这些序列具有不同的视野、分辨率，并表现出多种运动行为，涵盖了明显和微妙的不稳定性。如下图所示：  
 68 | <div align="center">
 69 |      <img src="https://user-images.githubusercontent.com/59186797/202471846-12811a33-da23-4c45-bb3c-02c4c843173e.jpg" width="400"><br>
 70 | </div>
 71 | 
 72 | - **下载地址**：https://sourl.cn/XFJDCh
 73 | 
 74 | <a name="AudioVisual人群计数"></a>
 75 | ## 7、AudioVisual人群计数
 76 | - **数据来源**：https://sourl.cn/wfd7wD
 77 | - **数据简介**：一个用于人群计数的新数据集，该数据集由中国不同位置的大约 2000 个带注释的图像令牌组成，每个图像对应一个 1 秒的音频剪辑和一个密度图。图像处于不同的照明条件下。如下图所示：  
 78 | <div align="center">
 79 |      <img src="https://user-images.githubusercontent.com/59186797/202472387-803e8e32-e34d-4dd2-a4ce-6e0c94543478.jpg" width="400"><br>
 80 | </div>
 81 | 
 82 | - **下载地址**：https://sourl.cn/wfd7wD
 83 | 
 84 | <a name="UCF-CC-50"></a>
 85 | ## 8、UCF-CC-50
 86 | - **数据来源**：http://c.nxw.so/9LYoK
 87 | - **数据简介**：该数据集包含极其密集人群的图像。图像主要是从 FLICKR 收集的。如下图所示：  
 88 | <div align="center">
 89 |      <img src="https://user-images.githubusercontent.com/59186797/202472839-ab93afd0-790a-4280-83bd-b9a75871990d.jpg" width="400"><br>
 90 | </div>
 91 | 
 92 | - **下载地址**：http://c.nxw.so/9LYoK
 93 | 
 94 | <a name="北京BRT数据集"></a>
 95 | ## 9、北京BRT数据集
 96 | - **数据来源**：http://c.nxw.so/c1PV9
 97 | - **数据简介**：该数据集包含 1,280 张图像和 16,795 个标记的行人，用于人群分析。该数据集使用 720 张图像进行训练，使用 560 张图像进行测试。名为 frame 的文件夹包含人群图像。名为 ground_truth 的文件夹包含ground_truth。例如，'1-20170325134657.jpg'对应于'1-20170325134657.mat'，以及这张图片中第i个人的真实位置，其中每一行是位置[x,y]。如下图所示：  
 98 | <div align="center">
 99 |      <img src="https://user-images.githubusercontent.com/59186797/202473508-b7906f77-8b2c-4a8b-a6f1-a850dc8d601a.jpg" width="400"><br>
100 | </div>
101 | 
102 | - **下载地址**：http://c.nxw.so/c1PV9


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/datasets/Image_Denoising.md:
--------------------------------------------------------------------------------
 1 | ## 图像去噪
 2 | 这里整理了常用图像去噪数据集，持续更新中，欢迎各位小伙伴贡献数据集～
 3 | - [PolyU数据集](#PolyU数据集)
 4 | - [FMD（荧光显微镜去噪）数据集](#FMD（荧光显微镜去噪）数据集)
 5 | - [SIDD智能手机图像去噪数据集](#SIDD智能手机图像去噪数据集)
 6 | - [SIDD-small数据集](#SIDD-small数据集)
 7 | - [Super_Resolution_Benchmarks](#Super_Resolution_Benchmarks)
 8 | 
 9 | 具体如下：
10 | 
11 | <a name="PolyU数据集"></a>
12 | #### 1、PolyU数据集
13 | - **数据来源**：https://sourl.cn/rMsdE8
14 | - **数据简介**： 大多数以前的图像去噪方法都集中在加性高斯白噪声（AWGN）上。然而，随着计算机视觉技术的进步，现实世界中的噪声图像去噪问题也随之而来。为了在实现并发真实世界图像去噪数据集的同时促进对该问题的研究，作者们构建了一个新的基准数据集，其中包含不同自然场景的综合真实世界噪声图像。这些图像是由不同的相机在不同的相机设置下拍摄的，如下图所示：  
15 | <div align="center">
16 |      <img src="https://user-images.githubusercontent.com/59186797/202465410-a60c3b16-a2e1-4a70-8dc5-17c9a1660a32.jpg" width="400" ><br>
17 | </div>
18 |     
19 | - **下载地址**：https://sourl.cn/rMsdE8
20 | 
21 | <a name="FMD（荧光显微镜去噪）数据集"></a>
22 | #### 2、FMD（荧光显微镜去噪）数据集
23 | - **数据来源**：https://sourl.cn/Wyqrui
24 | - **数据简介**： 荧光显微镜使现代生物学取得了巨大的发展。由于其固有的微弱信号，荧光显微镜不仅比摄影噪声大得多，而且还呈现出泊松-高斯噪声，其中泊松噪声或散粒噪声是主要的噪声源。为了获得干净的荧光显微镜图像，非常需要有专门设计用于对荧光显微镜图像进行降噪的有效降噪算法和数据集。虽然存在这样的算法，但没有这样的数据集可用。在本文中，我们通过构建专用于泊松-高斯去噪的数据集 - 荧光显微镜去噪 (FMD) 数据集来填补这一空白。该数据集由 12,000 个真实荧光显微镜图像组成，这些图像使用商业共焦、双光子、宽视野显微镜和代表性生物样本，如细胞、斑马鱼和小鼠脑组织, 如下图所示：  
25 | <div align="center">
26 |      <img src="https://user-images.githubusercontent.com/59186797/202466052-8ce1c295-7511-40e0-95e6-f227eb7946aa.jpg" width="400" ><br>
27 | </div>
28 |     
29 | - **下载地址**：https://sourl.cn/Wyqrui
30 | 
31 | <a name="SIDD智能手机图像去噪数据集"></a>
32 | #### 3、SIDD智能手机图像去噪数据集
33 | - **数据来源**：https://sourl.cn/jdpJZ6
34 | - **数据简介**： 该数据集包含以下智能手机在不同光照条件下拍摄的 160 对噪声/真实图像：
35 |     GP: Google Pixel 
36 |     IP: iPhone 7 
37 |     S6: Samsung Galaxy S6 
38 |     Edge N6: Motorola Nexus 6 
39 |     G4: LG G4, 如下图所示：  
40 | <div align="center">
41 |      <img src="https://user-images.githubusercontent.com/59186797/202466681-c66c991b-00a0-4ddc-a3bb-b2f922dc1eb4.jpg" width="400" ><br>
42 | </div>
43 |     
44 | - **下载地址**：https://sourl.cn/jdpJZ6
45 | 
46 | <a name="SIDD-small数据集"></a>
47 | #### 4、SIDD-small数据集
48 | - **数据来源**：https://sourl.cn/kaYGxd
49 | - **数据简介**： 一个小型版本的数据集，它由代表 160 个场景实例的160 个图像对（噪声和ground-truth）组成, 如下图所示：  
50 | <div align="center">
51 |      <img src="https://user-images.githubusercontent.com/59186797/202467313-eaebea37-761d-4f8d-aa4c-41b8536b4034.jpg" width="400" ><br>
52 | </div>
53 |     
54 | - **下载地址**：https://sourl.cn/kaYGxd
55 | 
56 | 
57 | <a name="Super_Resolution_Benchmarks"></a>
58 | #### 5、Super_Resolution_Benchmarks
59 | - **数据来源**：https://sourl.cn/Bp6QZs
60 | - **数据简介**： 来自于AIM 2022 压缩图像和视频超分辨率挑战赛”中的前 5 名解决方案工作：Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, 如下图所示：  
61 | <div align="center">
62 |      <img src="https://user-images.githubusercontent.com/59186797/202467744-e2e7181b-150a-45fe-b675-39fd3ff66d66.jpg" width="400" ><br>
63 | </div>
64 |     
65 | - **下载地址**：https://sourl.cn/Bp6QZs
66 | 


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/datasets/Keypoints.md:
--------------------------------------------------------------------------------
 1 | ## 关键点检测数据集
 2 | 这里整理了常用关键点检测数据集，持续更新中，欢迎各位小伙伴贡献数据集～
 3 | - [手部姿势关键点检测数据集](#手部姿势关键点检测数据集)
 4 | - [动物姿势数据集](#动物姿势数据集)
 5 | - [电影人物关节关键点数据集](#电影人物关节关键点数据集)
 6 | - [MPIIGaze_Dataset](#MPIIGaze_Dataset)
 7 | - [人体足部关键点数据集](#人体足部关键点数据集)
 8 | - [人群姿态数据集](#人群姿态数据集)
 9 | 
10 | 具体如下：
11 | 
12 | <a name="手部姿势关键点检测数据集"></a>
13 | #### 1、手部姿势关键点检测数据集
14 | - **数据来源**：http://u3v.cn/6d3lZV
15 | - **数据简介**： 数据集由序列构成。在每个序列中，您都可以找到组成它的帧。一个帧由4个彩色图像、4组投影在每个图像平面中的2D关节、4个边界框、1组Leap Motion Controller提供的3D点和4组重新投影到每个相机坐标帧的3D点组成。，如下图所示：  
16 | <div align="center">
17 |      <img src="https://user-images.githubusercontent.com/59186797/202458599-f2d33cb4-a140-41ee-8494-ab198ea31480.jpg" width="400" ><br>
18 | </div>
19 |     
20 | - **下载地址**：http://u3v.cn/6d3lZV
21 | 
22 | 
23 | <a name="动物姿势数据集"></a>
24 | #### 2、动物姿势数据集
25 | - **数据来源**：http://u3v.cn/6kDLfr
26 | - **数据简介**：该数据集提供了五个类别的动物姿势注释：狗、猫、牛、马、羊，在4,000 多张图像中总共有6,000多个实例。此外，该数据集还包含其他7 个动物类别的边界框注释。在论文中查找详细信息。一共标注了 20 个关键点：两只眼睛、喉咙、鼻子、马肩隆、两个耳根、尾根、四个肘部、四个膝盖、四个爪子。
27 | <div align="center">
28 |  <img src="https://user-images.githubusercontent.com/59186797/202460382-582665cd-0c31-4724-9fab-1a5bc99cd708.jpg" width="400"><br>
29 |  </div>
30 | 
31 | - **下载地址**：http://u3v.cn/6kDLfr
32 | 
33 | <a name="电影人物关节关键点数据集"></a>
34 | #### 3、电影人物关节关键点数据集
35 | - **数据来源**：http://u3v.cn/5tW5zx
36 | - **数据简介**：该数据集从流行的好莱坞电影中自动收集了5003个图像数据。这些图像是通过在30部电影的每10帧上运行一个最先进的人检测器获得的。然后，被高度自信地检测到的人（大约2万名候选人）被送往众包市场亚马逊机械土耳其公司（Amazon Mechanical Turk），以获得地面真实标签。每幅图片都由五名特克斯人以0.01美元的价格标注，以标注10个上身关节。在每个图像中取五个标记的中位数，以对离群值注释保持稳健。
37 | 
38 | <a name="MPIIGaze_Dataset"></a>
39 | #### 4、MPIIGaze_Dataset
40 | - **数据来源**：http://u3v.cn/5BsiEe
41 | - **数据简介**：MPIIGaze数据集包含在三个多月的日常笔记本电脑使用过程中从15名参与者收集的213659张图像。在外观和照明方面，数据集比现有的数据集变化更大。
42 | <div align="center">
43 |  <img src="https://user-images.githubusercontent.com/59186797/202461761-b5487802-b0c7-4876-9beb-cdca99be11fa.jpg" width="400"><br>
44 |  </div>
45 | 
46 | - **下载地址**：http://u3v.cn/5BsiEe
47 | 
48 | <a name="人体足部关键点数据集"></a>
49 | #### 5、人体足部关键点数据集
50 | - **数据来源**：http://u3v.cn/5IYvIV
51 | - **数据简介**：现有的人体姿势数据集包含有限的身体部位类型。MPII 数据集标注了脚踝、膝盖、臀部、肩膀、肘部、手腕、颈部、躯干和头顶，而 COCO 还包括一些面部关键点。对于这两个数据集，足部注释仅限于脚踝位置。然而，图形应用程序（例如头像重定向或 3D 人体形状重建）需要足部关键点，例如大脚趾和脚跟。在没有足部信息的情况下，这些方法会遇到诸如糖果包装效果、地板穿透和足部滑冰等问题。为了解决这些问题，COCO 数据集中的一小部分脚实例使用 Clickworker 平台进行标记。它分为来自 COCO 训练集的 14K 注释和来自验证集的 545 个注释。总共标记了 6 个英尺关键点。考虑足部关键点的 3D 坐标而不是表面位置。例如，对于确切的脚趾位置，数据集标记了指甲和皮肤连接之间的区域，并且还通过标记脚趾的中心而不是表面来考虑深度。
52 | 
53 | <a name="人群姿态数据集"></a>
54 | #### 6、人群姿态数据集
55 | - **数据来源**：http://u3v.cn/65x8MQ
56 | - **数据简介**：多人姿态估计是许多计算机视觉任务的基础，近年来取得了重大进展。然而，以前很少有方法研究拥挤场景中的姿态估计问题，而在许多场景中，这仍然是一个具有挑战性和不可避免的问题。此外，目前的基准无法对此类案件进行适当评估。在本文中，我们提出了一种新的有效方法来解决人群中的姿势估计问题，并提出了一个新的数据集来更好地评估算法。
57 | <div align="center">
58 |  <img src="https://user-images.githubusercontent.com/59186797/202463467-47ebad1b-ec93-4ee7-9cc6-cdd1013a7af6.jpg" width="400"><br>
59 |  </div>
60 | 
61 | - **下载地址**：http://u3v.cn/65x8MQ


--------------------------------------------------------------------------------
/docs/zh_CN/datasets/datasets/Speech.md:
--------------------------------------------------------------------------------
 1 | # 通用语音数据集
 2 | 
 3 | 这里整理了常用语音数据集，持续更新中，欢迎各位小伙伴贡献数据集～
 4 | - [语音识别](#语音识别)
 5 | - [语音合成](#语音合成)
 6 | - [声音分类](#声音分类)
 7 | - [声纹识别](#声纹识别)
 8 | - [语音唤醒](#语音唤醒)
 9 | 
10 | ## 语音识别
11 | 
12 | + WenetSpeech
13 | 
14 | 数据来源：https://wenet.org.cn/WenetSpeech/
15 | 
16 | 数据简介：
17 | > 从 YouTube 和 Podcast 收集的 10000 多个小时的多域转录普通话语料库。采用光学字符识别 (OCR) 和自动语音识别 (ASR) 技术分别标记每个 YouTube 和 Podcast 录音。为了提高语料库的质量，我们使用一种新颖的端到端标签错误检测方法来进一步验证和过滤数据。10,000 +小时高标签数据,置信度 >= 95%，用于监督训练;2400 +小时弱标签数据0.6 < 置信度 < 0.95，用于半监督或噪声训练等;22400 +总共小时音频,由标记和未标记的数据组成，用于无监督训练或预训练等。
18 | 下载地址：https://wenet.org.cn/WenetSpeech/#download
19 | 
20 | 
21 | ## 语音合成
22 | 
23 | + CSMSC
24 | 数据来源：https://www.data-baker.com/open_source.html
25 | 数据简介：
26 | > 中文标准女声音库】采集对象的音色风格知性阳光、亲切自然，专业标准普通话女声，听感乐观积极。录制环境为专业录音室和录音软件，录音环境和设备自始至终保持不变，录音环境的信噪比不低于35dB;单声道录音，用48KHz 16比特采样频率、PCM WAV格式。录音语料涵盖各类新闻、小说、科技、娱乐、对话等领域，语料设计综合语料样本量，力求在有限的语料数据量内，对音节音子、类型、音调、音连以及韵律等尽可能全面的覆盖。根据合成语音标注标准对音库进行文本音字校对、韵律层级标注、语音文件边界切分标注。
27 | 
28 | 下载地址：https://www.data-baker.com/open_source.html
29 | 
30 | ## 声音分类
31 | 
32 | + esc50
33 | 
34 | 数据来源：https://github.com/karolpiczak/ESC-50
35 | 
36 | 数据简介：
37 | > ESC-50: Dataset for Environmental Sound Classification 是一个包含有 2000 个带标签的时长为 5 秒的环境声音样本，音频样本采样率为 44,100Hz 的单通道音频文件，所有样本根据标签被划分为 50 个类别，每个类别有 40 个样本。
38 | 
39 | 
40 | 
41 | ## 声纹识别
42 | 
43 | + voxceleb
44 | 
45 | 数据来源：https://www.robots.ox.ac.uk/~vgg/data/voxceleb/index.html#about
46 | 
47 | 数据简介：
48 | > VoxCeleb 是一个视听数据集，由从上传到 YouTube 的采访视频中提取的人类语音短片组成,7,000 +
49 | 
50 | 扬声器VoxCeleb 包含来自不同种族、口音、职业和年龄的演讲者的演讲;话语长度100万+话语所有说话的面部轨迹都是“在野外”捕获的，包括背景聊天、笑声、重叠的语音、姿势变化和不同的照明条件;2,000 +小时
51 | VoxCeleb 由音频和视频组成。每个片段至少 3 秒长。
52 | 
53 | 
54 | ## 语音唤醒
55 | 
56 | 
57 | + hey_snips
58 | 
59 | 数据来源：https://github.com/sonos/keyword-spotting-research-datasets
60 | 
61 | 数据简介：
62 | > 唤醒词是“Hey Snips”，两个词之间没有停顿。两个数据集都包含大量的英语口音和录音环境。请注意，负样本是在与唤醒词话语相同的条件下记录的，因此来自相同的域（说话者、硬件、环境等）。


--------------------------------------------------------------------------------
/python_whl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["EasyData"]
16 | from .easydata import EasyData
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | Pillow
 3 | scipy>=1.0.0
 4 | faiss_cpu==1.7.1.post2
 5 | PyYAML>=5.1
 6 | tqdm
 7 | Shapely
 8 | pyclipper
 9 | matplotlib
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from io import open
16 | from setuptools import setup
17 | 
18 | 
19 | def get_requirements():
20 |     with open('requirements.txt', encoding="utf-8-sig") as f:
21 |         requirements = f.readlines()
22 |     return requirements
23 | 
24 | 
25 | setup(
26 |     name='easydata-python',
27 |     packages=['easydata', 'easydata.deploy'],
28 |     package_dir={
29 |         'easydata': 'python_whl',
30 |         'easydata.deploy': 'deploy'
31 |     },
32 |     include_package_data=True,
33 |     entry_points={"console_scripts": ["easydata=easydata.easydata:main"]},
34 |     version='0.0.0',
35 |     install_requires=get_requirements(),
36 |     license='Apache License 2.0',
37 |     description=
38 |     'A toolkit for processing data powered by PaddlePaddle, which include data augmentation, data cleaning and data annotation.',
39 |     long_description=
40 |     'EasyData aims to create a universal, leading and practical data processing toolkit, that supports automatic data augmentation and cleaning, and provides data annotation tools and a collection of open source datasets to help developers obtain high-quality training and inference data more easily, thereby promoting the practical effect of AI algorithms.',
41 |     long_description_content_type='text/markdown',
42 |     url='https://github.com/PaddlePaddle/EasyData',
43 |     download_url='https://github.com/PaddlePaddle/EasyData.git',
44 |     keywords=[
45 |         'PaddlePaddle',
46 |         'DataAug',
47 |         'DataClean',
48 |     ],
49 |     classifiers=[
50 |         'Intended Audience :: Developers',
51 |         'Operating System :: OS Independent',
52 |         'Natural Language :: Chinese (Simplified)',
53 |         'Programming Language :: Python :: 3.7',
54 |         'Programming Language :: Python :: 3.8',
55 |         'Programming Language :: Python :: 3.9', 'Topic :: Utilities'
56 |     ],
57 | )
58 | 


--------------------------------------------------------------------------------
/test_tipc/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/EasyData/7beeb5fdc4bcec2706b1845120bbb1811f3d04f2/test_tipc/.gitkeep


--------------------------------------------------------------------------------
/tools/predict.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import sys
17 | 
18 | __dir__ = os.path.dirname(os.path.abspath(__file__))
19 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../deploy/')))
20 | 
21 | from ppcv.core.config import ArgsParser
22 | 
23 | from python.dataclean import DataClean
24 | from python.dataaug import DataAug
25 | from utils.utils import load_yaml
26 | 
27 | 
28 | def argsparser():
29 |     parser = ArgsParser()
30 | 
31 |     parser.add_argument("-c",
32 |                         "--config",
33 |                         type=str,
34 |                         default=None,
35 |                         help=("Path of configure"),
36 |                         required=True)
37 |     parser.add_argument(
38 |         "--input",
39 |         type=str,
40 |         default=None,
41 |         help=
42 |         "Path of input, suport image file, image directory and video file.",
43 |         required=False)
44 |     parser.add_argument("--output_dir",
45 |                         type=str,
46 |                         default="output",
47 |                         help="Directory of output visualization files.")
48 |     parser.add_argument(
49 |         "--run_mode",
50 |         type=str,
51 |         default='paddle',
52 |         help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)")
53 |     parser.add_argument(
54 |         "--device",
55 |         type=str,
56 |         default='cpu',
57 |         help=
58 |         "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
59 |     )
60 |     return parser.parse_args()
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     args = argsparser()
65 |     config_path = args.config
66 |     yaml_data = load_yaml(config_path)
67 |     if "DataClean" in yaml_data:
68 |         dataclean = DataClean(args)
69 |         dataclean.run()
70 |     elif "DataGen" in yaml_data:
71 |         dataaug = DataAug(args)
72 |         dataaug.run()
73 |     else:
74 |         raise Exception("Error config")
75 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------