├── .devcontainer
    ├── devcontainer.json
    └── start.sh
├── .github
    └── workflows
    │   └── build-image.yml
├── .gitignore
├── .vscode
    ├── extensions.json
    └── settings.json
├── Dockerfile
├── LICENSE
├── README.md
├── README_EN.md
├── README_JP.md
├── README_KO.md
├── app.py
├── app.spec
├── assets
    ├── comfyui.png
    ├── community-uniapp-wechat-miniprogram.png
    ├── community-web.png
    ├── community-wechat-miniprogram.png
    ├── demo.png
    ├── demoImage.jpg
    ├── face++.png
    ├── gradio-image.jpeg
    ├── harry.png
    ├── hivision_logo.png
    └── social_template.png
├── demo
    ├── assets
    │   ├── american-style.png
    │   ├── color_list_CN.csv
    │   ├── color_list_EN.csv
    │   ├── size_list_CN.csv
    │   ├── size_list_EN.csv
    │   └── title.md
    ├── config.py
    ├── images
    │   ├── test0.jpg
    │   ├── test1.jpg
    │   ├── test2.jpg
    │   ├── test3.jpg
    │   └── test4.jpg
    ├── locales.py
    ├── processor.py
    ├── ui.py
    └── utils.py
├── deploy_api.py
├── docker-compose.yml
├── docs
    ├── api_CN.md
    ├── api_EN.md
    ├── face++_CN.md
    └── face++_EN.md
├── hivision
    ├── __init__.py
    ├── creator
    │   ├── __init__.py
    │   ├── choose_handler.py
    │   ├── context.py
    │   ├── face_detector.py
    │   ├── human_matting.py
    │   ├── layout_calculator.py
    │   ├── move_image.py
    │   ├── photo_adjuster.py
    │   ├── retinaface
    │   │   ├── __init__.py
    │   │   ├── box_utils.py
    │   │   ├── inference.py
    │   │   ├── prior_box.py
    │   │   └── weights
    │   │   │   └── .gitkeep
    │   ├── rotation_adjust.py
    │   ├── tensor2numpy.py
    │   ├── utils.py
    │   └── weights
    │   │   └── .gitkeep
    ├── error.py
    ├── plugin
    │   ├── beauty
    │   │   ├── __init__.py
    │   │   ├── base_adjust.py
    │   │   ├── beauty_tools.py
    │   │   ├── grind_skin.py
    │   │   ├── handler.py
    │   │   ├── lut
    │   │   │   └── lut_origin.png
    │   │   ├── thin_face.py
    │   │   └── whitening.py
    │   ├── font
    │   │   ├── .gitkeep
    │   │   └── 青鸟华光简琥珀.ttf
    │   ├── template
    │   │   ├── assets
    │   │   │   ├── template_1.png
    │   │   │   ├── template_2.png
    │   │   │   └── template_config.json
    │   │   └── template_calculator.py
    │   └── watermark.py
    └── utils.py
├── inference.py
├── requirements-app.txt
├── requirements-dev.txt
├── requirements.txt
├── scripts
    ├── build_pypi.py
    └── download_model.py
└── test
    ├── create_id_photo.py
    └── temp
        └── .gitkeep


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/universal
 3 | {
 4 | 	"name": "Default Linux Universal",
 5 | 	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 6 | 	"image": "mcr.microsoft.com/devcontainers/universal:2-linux",
 7 | 
 8 | 	// Features to add to the dev container. More info: https://containers.dev/features.
 9 | 	// "features": {},
10 | 
11 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
12 | 	// "forwardPorts": [],
13 | 
14 | 	// Use 'postCreateCommand' to run commands after the container is created.
15 | 	"onCreateCommand": "sh .devcontainer/start.sh",
16 | 
17 | 	// Configure tool-specific properties.
18 | 	"customizations": {
19 | 		"vscode": {
20 | 			"extensions": [
21 | 				"ms-python.python",
22 | 				"eamodio.gitlens",
23 | 				"mhutchie.git-graph"
24 | 			]
25 | 		}
26 | 	}
27 | 
28 | 	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
29 | 	// "remoteUser": "root"
30 | }
31 | 


--------------------------------------------------------------------------------
/.devcontainer/start.sh:
--------------------------------------------------------------------------------
 1 | sudo apt-get update && sudo apt-get install ffmpeg libsm6 libxext6 -y
 2 | 
 3 | conda create -n HivisionIDPhotos python=3.10 -y
 4 | conda init
 5 | echo 'conda activate HivisionIDPhotos' >> ~/.bashrc
 6 | 
 7 | ENV_PATH="/opt/conda/envs/HivisionIDPhotos/bin"
 8 | $ENV_PATH/pip install -r requirements.txt -r requirements-app.txt -r requirements-dev.txt
 9 | 
10 | $ENV_PATH/python scripts/download_model.py --models all
11 | 


--------------------------------------------------------------------------------
/.github/workflows/build-image.yml:
--------------------------------------------------------------------------------
 1 | name: build image and push
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - '*'
 7 | 
 8 | jobs:
 9 |   docker:
10 |     runs-on: ubuntu-latest
11 |     environment: release
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install requests tqdm
23 | 
24 |       - name: Download models
25 |         run: python scripts/download_model.py --models all
26 | 
27 |       - name: Set up QEMU
28 |         uses: docker/setup-qemu-action@v3
29 | 
30 |       - name: Set up Docker Buildx
31 |         uses: docker/setup-buildx-action@v3
32 | 
33 |       - name: Login to Docker Hub
34 |         uses: docker/login-action@v3
35 |         with:
36 |           username: ${{ vars.DOCKERHUB_USERNAME }}
37 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
38 | 
39 |       - name: Build and push
40 |         uses: docker/build-push-action@v6
41 |         with:
42 |           context: .
43 |           platforms: linux/amd64,linux/arm64
44 |           push: true
45 |           tags: |
46 |             ${{ vars.IMAGE_NAME }}:latest
47 |             ${{ vars.IMAGE_NAME }}:${{ github.ref_name }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | **/__pycache__/
 3 | .idea
 4 | .vscode/*
 5 | .history
 6 | .DS_Store
 7 | .env
 8 | demo/kb_output/*.jpg
 9 | demo/kb_output/*.png
10 | scripts/sync_swanhub.py
11 | scripts/sync_huggingface.py
12 | scripts/sync_modelscope.py
13 | scripts/sync_all.py
14 | **/flagged/
15 | # build outputs
16 | dist
17 | build
18 | # checkpoint
19 | *.pth
20 | *.pt
21 | *.onnx
22 | *.mnn
23 | test/temp/*
24 | !test/temp/.gitkeep
25 | 
26 | .python-version
27 | 
28 | # Ignore .png and .jpg files in the root directory
29 | /*.png
30 | /*.jpg
31 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "recommendations": [
 3 |     "ms-python.black-formatter",
 4 |     "donjayamanne.python-extension-pack",
 5 |     "njpwerner.autodocstring",
 6 | 
 7 |     "editorconfig.editorconfig",
 8 | 
 9 |     "gruntfuggly.todo-tree",
10 | 
11 |     "eamodio.gitlens",
12 | 
13 |     "PKief.material-icon-theme",
14 |     "davidanson.vscode-markdownlint",
15 |     "usernamehw.errorlens",
16 |     "tamasfe.even-better-toml",
17 | 
18 |     "littlefoxteam.vscode-python-test-adapter"
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "workbench.iconTheme": "material-icon-theme",
 3 |   "material-icon-theme.files.associations": {
 4 |     ".env.mock": "Tune",
 5 |     "requirements-dev.txt": "python-misc",
 6 |     "requirements-media.txt": "python-misc"
 7 |   },
 8 |   /** 后端代码格式化部分，python格式化 */
 9 |   "[python]": {
10 |     "editor.defaultFormatter": "ms-python.black-formatter",
11 |     "editor.formatOnSave": true
12 |   },
13 |   /** TODO tree 配置 */
14 |   "todo-tree.general.tags": [
15 |     "TODO", // 待办
16 |     "FIXME", // 待修复
17 |     "COMPAT", // 兼容性问题
18 |     "WARNING" // 警告
19 |   ],
20 |   "todo-tree.highlights.customHighlight": {
21 |     "TODO": {
22 |       "icon": "check",
23 |       "type": "tag",
24 |       "foreground": "#ffff00",
25 |       "iconColour": "#ffff"
26 |     },
27 |     "WARNING": {
28 |       "icon": "alert",
29 |       "type": "tag",
30 |       "foreground": "#ff0000",
31 |       "iconColour": "#ff0000"
32 |     },
33 |     "FIXME": {
34 |       "icon": "flame",
35 |       "type": "tag",
36 |       "foreground": "#ff0000",
37 |       "iconColour": "#ff0000"
38 |     },
39 |     "COMPAT": {
40 |       "icon": "flame",
41 |       "type": "tag",
42 |       "foreground": "#00ff00",
43 |       "iconColour": "#ffff"
44 |     }
45 |   },
46 | 
47 |   /** python代码注释 */
48 |   "autoDocstring.docstringFormat": "numpy",
49 | 
50 |   /** markdown格式检查 */
51 |   "markdownlint.config": {
52 |     // 允许使用html标签
53 |     "MD033": false,
54 |     // 允许首行不是level1标题
55 |     "MD041": false
56 |   },
57 | 
58 |   /** 不显示文件夹 */
59 |   "files.exclude": {
60 |     "**/.git": true,
61 |     "**/.svn": true,
62 |     "**/.hg": true,
63 |     "**/CVS": true,
64 |     "**/.DS_Store": true,
65 |     "**/Thumbs.db": true,
66 |     "**/__pycache__": true,
67 |     ".idea": true
68 |   },
69 |   "python.testing.pytestEnabled": true,
70 |   "ros.distro": "humble"
71 | }
72 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | 
 3 | # Install system dependencies
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |     ffmpeg \
 6 |     libgl1-mesa-glx \
 7 |     libglib2.0-0 \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | WORKDIR /app
11 | 
12 | COPY requirements.txt requirements-app.txt ./
13 | 
14 | RUN pip install --no-cache-dir -r requirements.txt -r requirements-app.txt
15 | 
16 | COPY . .
17 | 
18 | EXPOSE 7860
19 | EXPOSE 8080
20 | 
21 | CMD ["python3", "-u", "app.py", "--host", "0.0.0.0", "--port", "7860"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from demo.processor import IDPhotoProcessor
 4 | from demo.ui import create_ui
 5 | from hivision.creator.choose_handler import HUMAN_MATTING_MODELS
 6 | 
 7 | root_dir = os.path.dirname(os.path.abspath(__file__))
 8 | 
 9 | # 获取存在的人像分割模型列表
10 | # 通过检查 hivision/creator/weights 目录下的 .onnx 和 .mnn 文件
11 | # 只保留文件名（不包括扩展名）
12 | HUMAN_MATTING_MODELS_EXIST = [
13 |     os.path.splitext(file)[0]
14 |     for file in os.listdir(os.path.join(root_dir, "hivision/creator/weights"))
15 |     if file.endswith(".onnx") or file.endswith(".mnn")
16 | ]
17 | # 在HUMAN_MATTING_MODELS中的模型才会被加载到Gradio中显示
18 | HUMAN_MATTING_MODELS_CHOICE = [
19 |     model for model in HUMAN_MATTING_MODELS if model in HUMAN_MATTING_MODELS_EXIST
20 | ]
21 | 
22 | if len(HUMAN_MATTING_MODELS_CHOICE) == 0:
23 |     raise ValueError(
24 |         "未找到任何存在的人像分割模型，请检查 hivision/creator/weights 目录下的文件"
25 |         + "\n"
26 |         + "No existing portrait segmentation model was found, please check the files in the hivision/creator/weights directory."
27 |     )
28 | 
29 | FACE_DETECT_MODELS = ["face++ (联网Online API)", "mtcnn"]
30 | FACE_DETECT_MODELS_EXPAND = (
31 |     ["retinaface-resnet50"]
32 |     if os.path.exists(
33 |         os.path.join(
34 |             root_dir, "hivision/creator/retinaface/weights/retinaface-resnet50.onnx"
35 |         )
36 |     )
37 |     else []
38 | )
39 | FACE_DETECT_MODELS_CHOICE = FACE_DETECT_MODELS + FACE_DETECT_MODELS_EXPAND
40 | 
41 | LANGUAGE = ["zh", "en", "ko", "ja"]
42 | 
43 | if __name__ == "__main__":
44 |     argparser = argparse.ArgumentParser()
45 |     argparser.add_argument(
46 |         "--port", type=int, default=7860, help="The port number of the server"
47 |     )
48 |     argparser.add_argument(
49 |         "--host", type=str, default="127.0.0.1", help="The host of the server"
50 |     )
51 |     argparser.add_argument(
52 |         "--root_path",
53 |         type=str,
54 |         default=None,
55 |         help="The root path of the server, default is None (='/'), e.g. '/myapp'",
56 |     )
57 |     args = argparser.parse_args()
58 | 
59 |     processor = IDPhotoProcessor()
60 | 
61 |     demo = create_ui(
62 |         processor,
63 |         root_dir,
64 |         HUMAN_MATTING_MODELS_CHOICE,
65 |         FACE_DETECT_MODELS_CHOICE,
66 |         LANGUAGE,
67 |     )
68 |     
69 |     # 如果RUN_MODE是Beast，打印已开启野兽模式
70 |     if os.getenv("RUN_MODE") == "beast":
71 |         print("[Beast mode activated.] 已开启野兽模式。")
72 | 
73 |     demo.launch(
74 |         server_name=args.host,
75 |         server_port=args.port,
76 |         favicon_path=os.path.join(root_dir, "assets/hivision_logo.png"),
77 |         root_path=args.root_path,
78 |         show_api=False,
79 |     )
80 | 


--------------------------------------------------------------------------------
/app.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | from PyInstaller.utils.hooks import collect_data_files
 3 | 
 4 | datas = [('hivisionai', 'hivisionai'), ('hivision_modnet.onnx', '.'), ('size_list_CN.csv', '.')]
 5 | datas += collect_data_files('gradio_client')
 6 | datas += collect_data_files('gradio')
 7 | 
 8 | 
 9 | a = Analysis(
10 |     ['app/web.py'],
11 |     pathex=[],
12 |     binaries=[],
13 |     datas=datas,
14 |     hiddenimports=[],
15 |     hookspath=[],
16 |     hooksconfig={},
17 |     runtime_hooks=[],
18 |     excludes=[],
19 |     noarchive=False,
20 |     optimize=0,
21 | )
22 | pyz = PYZ(a.pure)
23 | 
24 | exe = EXE(
25 |     pyz,
26 |     a.scripts,
27 |     a.binaries,
28 |     a.datas,
29 |     [],
30 |     name='HivisionIDPhotos',
31 |     debug=False,
32 |     bootloader_ignore_signals=False,
33 |     strip=False,
34 |     upx=True,
35 |     upx_exclude=[],
36 |     runtime_tmpdir=None,
37 |     console=True,
38 |     disable_windowed_traceback=False,
39 |     argv_emulation=False,
40 |     target_arch=None,
41 |     codesign_identity=None,
42 |     entitlements_file=None,
43 |     icon=['assets\hivisionai.ico'],
44 | )
45 | 


--------------------------------------------------------------------------------
/assets/comfyui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/comfyui.png


--------------------------------------------------------------------------------
/assets/community-uniapp-wechat-miniprogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/community-uniapp-wechat-miniprogram.png


--------------------------------------------------------------------------------
/assets/community-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/community-web.png


--------------------------------------------------------------------------------
/assets/community-wechat-miniprogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/community-wechat-miniprogram.png


--------------------------------------------------------------------------------
/assets/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/demo.png


--------------------------------------------------------------------------------
/assets/demoImage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/demoImage.jpg


--------------------------------------------------------------------------------
/assets/face++.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/face++.png


--------------------------------------------------------------------------------
/assets/gradio-image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/gradio-image.jpeg


--------------------------------------------------------------------------------
/assets/harry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/harry.png


--------------------------------------------------------------------------------
/assets/hivision_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/hivision_logo.png


--------------------------------------------------------------------------------
/assets/social_template.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/assets/social_template.png


--------------------------------------------------------------------------------
/demo/assets/american-style.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/demo/assets/american-style.png


--------------------------------------------------------------------------------
/demo/assets/color_list_CN.csv:
--------------------------------------------------------------------------------
1 | Name,Hex
2 | 蓝色,628bce
3 | 白色,ffffff
4 | 红色,d74532
5 | 黑色,000000
6 | 深蓝色,4b6190
7 | 浅灰色,f2f0f0


--------------------------------------------------------------------------------
/demo/assets/color_list_EN.csv:
--------------------------------------------------------------------------------
1 | Name,Hex  
2 | Blue,628bce  
3 | White,ffffff  
4 | Red,d74532  
5 | Black,000000  
6 | Dark Blue,4b6190
7 | Light Gray,f2f0f0


--------------------------------------------------------------------------------
/demo/assets/size_list_CN.csv:
--------------------------------------------------------------------------------
 1 | Name,Height,Width
 2 | 一寸,413,295
 3 | 二寸,626,413
 4 | 小一寸,378,260
 5 | 小二寸,531,413
 6 | 大一寸,567,390
 7 | 大二寸,626,413
 8 | 五寸,1499,1050
 9 | 教师资格证,413,295
10 | 国家公务员考试,413,295
11 | 初级会计考试,413,295
12 | 英语四六级考试,192,144
13 | 计算机等级考试,567,390
14 | 研究生考试,709,531
15 | 社保卡,441,358
16 | 电子驾驶证,378,260
17 | 美国签证,600,600
18 | 日本签证,413,295
19 | 韩国签证,531,413


--------------------------------------------------------------------------------
/demo/assets/size_list_EN.csv:
--------------------------------------------------------------------------------
 1 | Name,Height,Width
 2 | One inch,413,295
 3 | Two inches,626,413
 4 | Small one inch,378,260
 5 | Small two inches,531,413
 6 | Large one inch,567,390
 7 | Large two inches,626,413 
 8 | Five inches,1499,1050
 9 | Teacher qualification certificate,413,295
10 | National civil service exa,413,295
11 | Primary accounting exam,413,295
12 | English CET-4 and CET-6 exams,192,144
13 | Computer level exam,567,390
14 | Graduate entrance exam,709,531
15 | Social security card,441,358
16 | Electronic driver's license,378,260
17 | American visa,600,600
18 | Japanese visa,413,295
19 | Korean visa,531,413


--------------------------------------------------------------------------------
/demo/assets/title.md:
--------------------------------------------------------------------------------
 1 | <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; text-align: center; font-size: 40px;">
 2 |   <div style="display: flex; align-items: center;">
 3 |     <img src="https://swanhub.co/git/repo/ZeYiLin%2FHivisionIDPhotos/file/preview?ref=master&path=assets/hivision_logo.png" alt="HivisionIDPhotos" style="width: 65px; height: 65px; margin-right: 10px;" onerror="this.style.display='none';" loading="lazy">
 4 |     <b style="color: #6e9abb;">HivisionIDPhotos</b><span style="font-size: 18px; color: #638fb3; margin-left: 10px;"> v1.3.1</span>
 5 |   </div>
 6 |   <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
 7 |       <a href="https://github.com/xiaolin199912/HivisionIDPhotos"><img alt="Github" src="https://img.shields.io/static/v1?label=GitHub&message=GitHub&color=black" onerror="this.style.display='none';"></a> &ensp;
 8 |       <a href="https://github.com/xiaolin199912/HivisionIDPhotos/stargazers"><img alt="GitHub stars" src="https://img.shields.io/github/stars/zeyi-lin/hivisionidphotos?color=ffcb47&labelColor=black&style=flat-square" onerror="this.style.display='none';"></a> &ensp;
 9 |       <a href="https://swanlab.cn?utm_source=hivision_demo"><img alt="SwanLab" src="https://img.shields.io/badge/Training%20by-SwanLab-4cb55e" onerror="this.style.display='none';"></a> &ensp;
10 |       <a href="https://github.com/Zeyi-Lin/HivisionIDPhotos/blob/master/docs/api_CN.md" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/API_Docs-API文档-315bce" onerror="this.style.display='none';"></a>
11 |   </div>
12 | </div>
13 | 


--------------------------------------------------------------------------------
/demo/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from demo.utils import csv_to_size_list, csv_to_color_list
 3 | 
 4 | 
 5 | def load_configuration(root_dir):
 6 |     size_list_dict_CN = csv_to_size_list(
 7 |         os.path.join(root_dir, "assets/size_list_CN.csv")
 8 |     )
 9 |     size_list_dict_EN = csv_to_size_list(
10 |         os.path.join(root_dir, "assets/size_list_EN.csv")
11 |     )
12 |     color_list_dict_CN = csv_to_color_list(
13 |         os.path.join(root_dir, "assets/color_list_CN.csv")
14 |     )
15 |     color_list_dict_EN = csv_to_color_list(
16 |         os.path.join(root_dir, "assets/color_list_EN.csv")
17 |     )
18 | 
19 |     return size_list_dict_CN, size_list_dict_EN, color_list_dict_CN, color_list_dict_EN
20 | 


--------------------------------------------------------------------------------
/demo/images/test0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/demo/images/test0.jpg


--------------------------------------------------------------------------------
/demo/images/test1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/demo/images/test1.jpg


--------------------------------------------------------------------------------
/demo/images/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/demo/images/test2.jpg


--------------------------------------------------------------------------------
/demo/images/test3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/demo/images/test3.jpg


--------------------------------------------------------------------------------
/demo/images/test4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/demo/images/test4.jpg


--------------------------------------------------------------------------------
/demo/utils.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | def csv_to_size_list(csv_file: str) -> dict:
 5 |     # 初始化一个空字典
 6 |     size_list_dict = {}
 7 | 
 8 |     # 打开 CSV 文件并读取数据
 9 |     with open(csv_file, mode="r", encoding="utf-8") as file:
10 |         reader = csv.reader(file)
11 |         # 跳过表头
12 |         next(reader)
13 |         # 读取数据并填充字典
14 |         for row in reader:
15 |             size_name, h, w = row
16 |             size_name_add_size = "{}\t\t({}, {})".format(size_name, h, w)
17 |             size_list_dict[size_name_add_size] = (int(h), int(w))
18 | 
19 |     return size_list_dict
20 | 
21 | 
22 | def csv_to_color_list(csv_file: str) -> dict:
23 |     # 初始化一个空字典
24 |     color_list_dict = {}
25 | 
26 |     # 打开 CSV 文件并读取数据
27 |     with open(csv_file, mode="r", encoding="utf-8") as file:
28 |         reader = csv.reader(file)
29 |         # 跳过表头
30 |         next(reader)
31 |         # 读取数据并填充字典
32 |         for row in reader:
33 |             color_name, hex_code = row
34 |             color_list_dict[color_name] = hex_code
35 | 
36 |     return color_list_dict
37 | 
38 | 
39 | def range_check(value, min_value=0, max_value=255):
40 |     value = int(value)
41 |     return max(min_value, min(value, max_value))
42 | 


--------------------------------------------------------------------------------
/deploy_api.py:
--------------------------------------------------------------------------------
  1 | from fastapi import FastAPI, UploadFile, Form, File
  2 | from hivision import IDCreator
  3 | from hivision.error import FaceError
  4 | from hivision.creator.layout_calculator import (
  5 |     generate_layout_array,
  6 |     generate_layout_image,
  7 | )
  8 | from hivision.creator.choose_handler import choose_handler
  9 | from hivision.utils import (
 10 |     add_background,
 11 |     resize_image_to_kb,
 12 |     bytes_2_base64,
 13 |     base64_2_numpy,
 14 |     hex_to_rgb,
 15 |     add_watermark,
 16 |     save_image_dpi_to_bytes,
 17 | )
 18 | import numpy as np
 19 | import cv2
 20 | from starlette.middleware.cors import CORSMiddleware
 21 | from starlette.formparsers import MultiPartParser
 22 | 
 23 | # 设置Starlette表单字段大小限制
 24 | MultiPartParser.max_part_size = 10 * 1024 * 1024  # 10MB
 25 | # 设置Starlette文件上传大小限制
 26 | MultiPartParser.max_file_size = 20 * 1024 * 1024   # 20MB
 27 | 
 28 | app = FastAPI()
 29 | creator = IDCreator()
 30 | 
 31 | # 添加 CORS 中间件 解决跨域问题
 32 | app.add_middleware(
 33 |     CORSMiddleware,
 34 |     allow_origins=["*"],  # 允许的请求来源
 35 |     allow_credentials=True,  # 允许携带 Cookie
 36 |     allow_methods=[
 37 |         "*"
 38 |     ],  # 允许的请求方法，例如：GET, POST 等，也可以指定 ["GET", "POST"]
 39 |     allow_headers=["*"],  # 允许的请求头，也可以指定具体的头部
 40 | )
 41 | 
 42 | 
 43 | # 证件照智能制作接口
 44 | @app.post("/idphoto")
 45 | async def idphoto_inference(
 46 |     input_image: UploadFile = File(None),
 47 |     input_image_base64: str = Form(None),
 48 |     height: int = Form(413),
 49 |     width: int = Form(295),
 50 |     human_matting_model: str = Form("modnet_photographic_portrait_matting"),
 51 |     face_detect_model: str = Form("mtcnn"),
 52 |     hd: bool = Form(True),
 53 |     dpi: int = Form(300),
 54 |     face_align: bool = Form(False),
 55 |     head_measure_ratio: float = Form(0.2),
 56 |     head_height_ratio: float = Form(0.45),
 57 |     top_distance_max: float = Form(0.12),
 58 |     top_distance_min: float = Form(0.10),
 59 |     brightness_strength: float = Form(0),
 60 |     contrast_strength: float = Form(0),
 61 |     sharpen_strength: float = Form(0),
 62 |     saturation_strength: float = Form(0),
 63 | ):  
 64 |     # 如果传入了base64，则直接使用base64解码
 65 |     if input_image_base64:
 66 |         img = base64_2_numpy(input_image_base64)
 67 |     # 否则使用上传的图片
 68 |     else:
 69 |         image_bytes = await input_image.read()
 70 |         nparr = np.frombuffer(image_bytes, np.uint8)
 71 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 72 | 
 73 |     # ------------------- 选择抠图与人脸检测模型 -------------------
 74 |     choose_handler(creator, human_matting_model, face_detect_model)
 75 | 
 76 |     # 将字符串转为元组
 77 |     size = (int(height), int(width))
 78 |     try:
 79 |         result = creator(
 80 |             img,
 81 |             size=size,
 82 |             head_measure_ratio=head_measure_ratio,
 83 |             head_height_ratio=head_height_ratio,
 84 |             head_top_range=(top_distance_max, top_distance_min),
 85 |             face_alignment=face_align,
 86 |             brightness_strength=brightness_strength,
 87 |             contrast_strength=contrast_strength,
 88 |             sharpen_strength=sharpen_strength,
 89 |             saturation_strength=saturation_strength,
 90 |         )
 91 |     except FaceError:
 92 |         result_message = {"status": False}
 93 |     # 如果检测到人脸数量等于1, 则返回标准证和高清照结果（png 4通道图像）
 94 |     else:
 95 |         result_image_standard_bytes = save_image_dpi_to_bytes(cv2.cvtColor(result.standard, cv2.COLOR_RGBA2BGRA), None, dpi)
 96 |         
 97 |         result_message = {
 98 |             "status": True,
 99 |             "image_base64_standard": bytes_2_base64(result_image_standard_bytes),
100 |         }
101 | 
102 |         # 如果hd为True, 则增加高清照结果（png 4通道图像）
103 |         if hd:
104 |             result_image_hd_bytes = save_image_dpi_to_bytes(cv2.cvtColor(result.hd, cv2.COLOR_RGBA2BGRA), None, dpi)
105 |             result_message["image_base64_hd"] = bytes_2_base64(result_image_hd_bytes)
106 | 
107 |     return result_message
108 | 
109 | 
110 | # 人像抠图接口
111 | @app.post("/human_matting")
112 | async def human_matting_inference(
113 |     input_image: UploadFile = File(None),
114 |     input_image_base64: str = Form(None),
115 |     human_matting_model: str = Form("hivision_modnet"),
116 |     dpi: int = Form(300),
117 | ):
118 |     if input_image_base64:
119 |         img = base64_2_numpy(input_image_base64)
120 |     else:
121 |         image_bytes = await input_image.read()
122 |         nparr = np.frombuffer(image_bytes, np.uint8)
123 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
124 | 
125 |     # ------------------- 选择抠图与人脸检测模型 -------------------
126 |     choose_handler(creator, human_matting_model, None)
127 | 
128 |     try:
129 |         result = creator(
130 |             img,
131 |             change_bg_only=True,
132 |         )
133 |     except FaceError:
134 |         result_message = {"status": False}
135 | 
136 |     else:
137 |         result_image_standard_bytes = save_image_dpi_to_bytes(cv2.cvtColor(result.standard, cv2.COLOR_RGBA2BGRA), None, dpi)
138 |         result_message = {
139 |             "status": True,
140 |             "image_base64": bytes_2_base64(result_image_standard_bytes),
141 |         }
142 |     return result_message
143 | 
144 | 
145 | # 透明图像添加纯色背景接口
146 | @app.post("/add_background")
147 | async def photo_add_background(
148 |     input_image: UploadFile = File(None),
149 |     input_image_base64: str = Form(None),
150 |     color: str = Form("000000"),
151 |     kb: int = Form(None),
152 |     dpi: int = Form(300),
153 |     render: int = Form(0),
154 | ):
155 |     render_choice = ["pure_color", "updown_gradient", "center_gradient"]
156 | 
157 |     if input_image_base64:
158 |         img = base64_2_numpy(input_image_base64)
159 |     else:
160 |         image_bytes = await input_image.read()
161 |         nparr = np.frombuffer(image_bytes, np.uint8)
162 |         img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
163 | 
164 |     color = hex_to_rgb(color)
165 |     color = (color[2], color[1], color[0])
166 | 
167 |     result_image = add_background(
168 |         img,
169 |         bgr=color,
170 |         mode=render_choice[render],
171 |     ).astype(np.uint8)
172 | 
173 |     result_image = cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR)
174 |     if kb:
175 |         result_image_bytes = resize_image_to_kb(result_image, None, int(kb), dpi=dpi)
176 |     else:
177 |         result_image_bytes = save_image_dpi_to_bytes(result_image, None, dpi=dpi)
178 | 
179 |     result_messgae = {
180 |         "status": True,
181 |         "image_base64": bytes_2_base64(result_image_bytes),
182 |     }
183 | 
184 |     return result_messgae
185 | 
186 | 
187 | # 六寸排版照生成接口
188 | @app.post("/generate_layout_photos")
189 | async def generate_layout_photos(
190 |     input_image: UploadFile = File(None),
191 |     input_image_base64: str = Form(None),
192 |     height: int = Form(413),
193 |     width: int = Form(295),
194 |     kb: int = Form(None),
195 |     dpi: int = Form(300),
196 | ):
197 |     # try:
198 |     if input_image_base64:
199 |         img = base64_2_numpy(input_image_base64)
200 |     else:
201 |         image_bytes = await input_image.read()
202 |         nparr = np.frombuffer(image_bytes, np.uint8)
203 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
204 | 
205 |     size = (int(height), int(width))
206 | 
207 |     typography_arr, typography_rotate = generate_layout_array(
208 |         input_height=size[0], input_width=size[1]
209 |     )
210 | 
211 |     result_layout_image = generate_layout_image(
212 |         img, typography_arr, typography_rotate, height=size[0], width=size[1]
213 |     ).astype(np.uint8)
214 | 
215 |     result_layout_image = cv2.cvtColor(result_layout_image, cv2.COLOR_RGB2BGR)
216 |     if kb:
217 |         result_layout_image_bytes = resize_image_to_kb(
218 |             result_layout_image, None, int(kb), dpi=dpi
219 |         )
220 |     else:
221 |         result_layout_image_bytes = save_image_dpi_to_bytes(result_layout_image, None, dpi=dpi)
222 |         
223 |     result_layout_image_base64 = bytes_2_base64(result_layout_image_bytes)
224 | 
225 |     result_messgae = {
226 |         "status": True,
227 |         "image_base64": result_layout_image_base64,
228 |     }
229 | 
230 |     return result_messgae
231 | 
232 | 
233 | # 透明图像添加水印接口
234 | @app.post("/watermark")
235 | async def watermark(
236 |     input_image: UploadFile = File(None),
237 |     input_image_base64: str = Form(None),
238 |     text: str = Form("Hello"),
239 |     size: int = 20,
240 |     opacity: float = 0.5,
241 |     angle: int = 30,
242 |     color: str = "#000000",
243 |     space: int = 25,
244 |     kb: int = Form(None),
245 |     dpi: int = Form(300),
246 | ):
247 |     if input_image_base64:
248 |         img = base64_2_numpy(input_image_base64)
249 |     else:
250 |         image_bytes = await input_image.read()
251 |         nparr = np.frombuffer(image_bytes, np.uint8)
252 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
253 | 
254 |     try:
255 |         result_image = add_watermark(img, text, size, opacity, angle, color, space)
256 | 
257 |         result_image = cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR)
258 |         if kb:
259 |             result_image_bytes = resize_image_to_kb(result_image, None, int(kb), dpi=dpi)
260 |         else:
261 |             result_image_bytes = save_image_dpi_to_bytes(result_image, None, dpi=dpi)
262 |         result_image_base64 = bytes_2_base64(result_image_bytes)
263 | 
264 |         result_messgae = {
265 |             "status": True,
266 |             "image_base64": result_image_base64,
267 |         }
268 |     except Exception as e:
269 |         result_messgae = {
270 |             "status": False,
271 |             "error": str(e),
272 |         }
273 | 
274 |     return result_messgae
275 | 
276 | 
277 | # 设置照片KB值接口(RGB图)
278 | @app.post("/set_kb")
279 | async def set_kb(
280 |     input_image: UploadFile = File(None),
281 |     input_image_base64: str = Form(None),
282 |     dpi: int = Form(300),
283 |     kb: int = Form(50),
284 | ):
285 |     if input_image_base64:
286 |         img = base64_2_numpy(input_image_base64)
287 |     else:
288 |         image_bytes = await input_image.read()
289 |         nparr = np.frombuffer(image_bytes, np.uint8)
290 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
291 | 
292 |     try:
293 |         result_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
294 |         result_image_bytes = resize_image_to_kb(result_image, None, int(kb), dpi=dpi)
295 |         result_image_base64 = bytes_2_base64(result_image_bytes)
296 | 
297 |         result_messgae = {
298 |             "status": True,
299 |             "image_base64": result_image_base64,
300 |         }
301 |     except Exception as e:
302 |         result_messgae = {
303 |             "status": False,
304 |             "error": e,
305 |         }
306 | 
307 |     return result_messgae
308 | 
309 | 
310 | # 证件照智能裁剪接口
311 | @app.post("/idphoto_crop")
312 | async def idphoto_crop_inference(
313 |     input_image: UploadFile = File(None),
314 |     input_image_base64: str = Form(None),
315 |     height: int = Form(413),
316 |     width: int = Form(295),
317 |     face_detect_model: str = Form("mtcnn"),
318 |     hd: bool = Form(True),
319 |     dpi: int = Form(300),
320 |     head_measure_ratio: float = Form(0.2),
321 |     head_height_ratio: float = Form(0.45),
322 |     top_distance_max: float = Form(0.12),
323 |     top_distance_min: float = Form(0.10),
324 | ):
325 |     if input_image_base64:
326 |         img = base64_2_numpy(input_image_base64)
327 |     else:
328 |         image_bytes = await input_image.read()
329 |         nparr = np.frombuffer(image_bytes, np.uint8)
330 |         img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)  # 读取图像(4通道)
331 | 
332 |     # ------------------- 选择抠图与人脸检测模型 -------------------
333 |     choose_handler(creator, face_detect_option=face_detect_model)
334 | 
335 |     # 将字符串转为元组
336 |     size = (int(height), int(width))
337 |     try:
338 |         result = creator(
339 |             img,
340 |             size=size,
341 |             head_measure_ratio=head_measure_ratio,
342 |             head_height_ratio=head_height_ratio,
343 |             head_top_range=(top_distance_max, top_distance_min),
344 |             crop_only=True,
345 |         )
346 |     except FaceError:
347 |         result_message = {"status": False}
348 |     # 如果检测到人脸数量等于1, 则返回标准证和高清照结果（png 4通道图像）
349 |     else:
350 |         result_image_standard_bytes = save_image_dpi_to_bytes(cv2.cvtColor(result.standard, cv2.COLOR_RGBA2BGRA), None, dpi)
351 |         
352 |         result_message = {
353 |             "status": True,
354 |             "image_base64_standard": bytes_2_base64(result_image_standard_bytes),
355 |         }
356 | 
357 |         # 如果hd为True, 则增加高清照结果（png 4通道图像）
358 |         if hd:
359 |             result_image_hd_bytes = save_image_dpi_to_bytes(cv2.cvtColor(result.hd, cv2.COLOR_RGBA2BGRA), None, dpi)
360 |             result_message["image_base64_hd"] = bytes_2_base64(result_image_hd_bytes)
361 | 
362 |     return result_message
363 | 
364 | 
365 | if __name__ == "__main__":
366 |     import uvicorn
367 | 
368 |     # 在8080端口运行推理服务
369 |     uvicorn.run(app, host="0.0.0.0", port=8080)
370 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   hivision_idphotos:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     image: linzeyi/hivision_idphotos
 9 |     command: python3 -u app.py --host 0.0.0.0 --port 7860
10 |     ports:
11 |       - '7860:7860'
12 | 
13 |   hivision_idphotos_api:
14 |     build:
15 |       context: .
16 |       dockerfile: Dockerfile
17 |     image: linzeyi/hivision_idphotos
18 |     command: python3 deploy_api.py
19 |     ports:
20 |       - '8080:8080'
21 | 


--------------------------------------------------------------------------------
/docs/api_CN.md:
--------------------------------------------------------------------------------
  1 | # API Docs
  2 | 
  3 | [English](api_EN.md) / 中文
  4 | 
  5 | 
  6 | ## 目录
  7 | 
  8 | - [开始之前：开启后端服务](#开始之前开启后端服务)
  9 | - [接口功能说明](#接口功能说明)
 10 |   - [1.生成证件照(底透明)](#1生成证件照底透明)
 11 |   - [2.添加背景色](#2添加背景色)
 12 |   - [3.生成六寸排版照](#3生成六寸排版照)
 13 |   - [4.人像抠图](#4人像抠图)
 14 |   - [5.图像加水印](#5图像加水印)
 15 |   - [6.设置图像KB大小](#6设置图像KB大小)
 16 |   - [7.证件照裁切](#7证件照裁切)
 17 | - [cURL 请求示例](#curl-请求示例)
 18 | - [Python 请求示例](#python-请求示例)
 19 | 
 20 | ## 开始之前：开启后端服务
 21 | 
 22 | 在请求 API 之前，请先运行后端服务
 23 | 
 24 | ```bash
 25 | python deploy_api.py
 26 | ```
 27 | 
 28 | <br>
 29 | 
 30 | ## 接口功能说明
 31 | 
 32 | ### 1.生成证件照(底透明)
 33 | 
 34 | 接口名：`idphoto`
 35 | 
 36 | `生成证件照`接口的逻辑是发送一张 RGB 图像，输出一张标准证件照和一张高清证件照：
 37 | 
 38 | - **高清证件照**：根据`size`的宽高比例制作的证件照，文件名为`output_image_dir`增加`_hd`后缀
 39 | - **标准证件照**：尺寸等于`size`，由高清证件照缩放而来，文件名为`output_image_dir`
 40 | 
 41 | 需要注意的是，生成的两张照片都是透明的（RGBA 四通道图像），要生成完整的证件照，还需要下面的`添加背景色`接口。
 42 | 
 43 | > 问：为什么这么设计？  
 44 | > 答：因为在实际产品中，经常用户会频繁切换底色预览效果，直接给透明底图像，由前端 js 代码合成颜色是更好体验的做法。
 45 | 
 46 | **请求参数：**
 47 | 
 48 | | 参数名 | 类型 | 必填 | 说明 |
 49 | | :--- | :--- | :--- | :--- |
 50 | | input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
 51 | | input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
 52 | | height | int | 否 | 标准证件照高度，默认为`413` |
 53 | | width | int | 否 | 标准证件照宽度，默认为`295` |
 54 | | human_matting_model | str | 否 | 人像分割模型，默认为`modnet_photographic_portrait_matting`。可选值为`modnet_photographic_portrait_matting`、`hivision_modnet`、`rmbg-1.4`、`birefnet-v1-lite` |
 55 | | face_detect_model | str | 否 | 人脸检测模型，默认为`mtcnn`。可选值为`mtcnn`、`face_plusplus`、`retinaface-resnet50` |
 56 | | hd | bool | 否 | 是否生成高清证件照，默认为`true` |
 57 | | dpi | int | 否 | 图像分辨率，默认为`300` |
 58 | | face_alignment | bool | 否 | 是否进行人脸对齐，默认为`true` |
 59 | | head_measure_ratio | float | 否 | 面部面积与照片面积的比例，默认为`0.2` |
 60 | | head_height_ratio | float | 否 | 面部中心与照片顶部的高度比例，默认为`0.45` |
 61 | | top_distance_max | float | 否 | 头部与照片顶部距离的比例最大值，默认为`0.12` |
 62 | | top_distance_min | float | 否 | 头部与照片顶部距离的比例最小值，默认为`0.1` |
 63 | | brightness_strength | float | 否 | 亮度调整强度，默认为`0` |
 64 | | contrast_strength | float | 否 | 对比度调整强度，默认为`0` |
 65 | | sharpen_strength | float | 否 | 锐化调整强度，默认为`0` |
 66 | | saturation_strength | float | 否 | 饱和度调整强度，默认为`0` |
 67 | 
 68 | **返回参数：**
 69 | 
 70 | | 参数名 | 类型 | 说明 |
 71 | | :--- | :--- | :--- |
 72 | | status | int | 状态码，`true`表示成功 |
 73 | | image_base64_standard | str | 标准证件照的base64编码 |
 74 | | image_base64_hd | str | 高清证件照的base64编码。如`hd`参数为`false`，则不返回该参数 |
 75 | 
 76 | <br>
 77 | 
 78 | ### 2.添加背景色
 79 | 
 80 | 接口名：`add_background`
 81 | 
 82 | `添加背景色`接口的逻辑是接收一张 RGBA 图像（透明图），根据`color`添加背景色，合成一张 JPG 图像。
 83 | 
 84 | **请求参数：**
 85 | 
 86 | | 参数名 | 类型 | 必填 | 说明 |
 87 | | :--- | :--- | :--- | :--- |
 88 | | input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGBA四通道图像。 |
 89 | | input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGBA四通道图像。 |
 90 | | color | str | 否 | 背景色HEX值，默认为`000000` |
 91 | | kb | int | 否 | 输出照片的 KB 值，默认为`None`，即不对图像进行KB调整。|
 92 | | render | int | 否 | 渲染模式，默认为`0`。可选值为`0`、`1`、`2`，分别对应`纯色`、`上下渐变`、`中心渐变`。 |
 93 | | dpi | int | 否 | 图像分辨率，默认为`300` |
 94 | 
 95 | **返回参数：**
 96 | 
 97 | | 参数名 | 类型 | 说明 |
 98 | | :--- | :--- | :--- |
 99 | | status | int | 状态码，`true`表示成功 |
100 | | image_base64 | str | 添加背景色之后的图像的base64编码 |
101 | 
102 | <br>
103 | 
104 | ### 3.生成六寸排版照
105 | 
106 | 接口名：`generate_layout_photos`
107 | 
108 | `生成六寸排版照`接口的逻辑是接收一张 RGB 图像（一般为添加背景色之后的证件照），根据`size`进行照片排布，然后生成一张六寸排版照。
109 | 
110 | **请求参数：**
111 | 
112 | | 参数名 | 类型 | 必填 | 说明 |
113 | | :--- | :--- | :--- | :--- |
114 | | input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
115 | | input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
116 | | height | int | 否 | 输入图像的高度，默认为`413` |
117 | | width | int | 否 | 输入图像的宽度，默认为`295` |
118 | | kb | int | 否 | 输出照片的 KB 值，默认为`None`，即不对图像进行KB调整。|
119 | | dpi | int | 否 | 图像分辨率，默认为`300` |
120 | 
121 | **返回参数：**
122 | 
123 | | 参数名 | 类型 | 说明 |
124 | | :--- | :--- | :--- |
125 | | status | int | 状态码，`true`表示成功 |
126 | | image_base64 | str | 六寸排版照的base64编码 |
127 | 
128 | <br>
129 | 
130 | ### 4.人像抠图
131 | 
132 | 接口名：`human_matting`
133 | 
134 | `人像抠图`接口的逻辑是接收一张 RGB 图像，输出一张标准抠图人像照和高清抠图人像照（无任何背景填充）。
135 | 
136 | **请求参数：**
137 | 
138 | | 参数名 | 类型 | 必填 | 说明 |
139 | | :--- | :--- | :--- | :--- |
140 | | input_image | file | 是 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
141 | | human_matting_model | str | 否 | 人像分割模型，默认为`modnet_photographic_portrait_matting`。可选值为`modnet_photographic_portrait_matting`、`hivision_modnet`、`rmbg-1.4`、`birefnet-v1-lite` |
142 | | dpi | int | 否 | 图像分辨率，默认为`300` |
143 | 
144 | **返回参数：**
145 | 
146 | | 参数名 | 类型 | 说明 |
147 | | :--- | :--- | :--- |
148 | | status | int | 状态码，`true`表示成功 |
149 | | image_base64 | str | 抠图人像照的base64编码 |
150 | 
151 | <br>
152 | 
153 | ### 5.图像加水印
154 | 
155 | 接口名：`watermark`
156 | 
157 | `图像加水印`接口的功能是接收一个水印文本，然后在原图上添加指定的水印。用户可以指定水印的位置、透明度和大小等属性，以便将水印无缝地融合到原图中。
158 | 
159 | **请求参数：**
160 | 
161 | | 参数名 | 类型 | 必填 | 说明 |
162 | | :--- | :--- | :--- | :--- |
163 | | input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
164 | | input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
165 | | text | str | 否 | 水印文本，默认为`Hello` |
166 | | size | int | 否 | 水印字体大小，默认为`20` |
167 | | opacity | float | 否 | 水印透明度，默认为`0.5` |
168 | | angle | int | 否 | 水印旋转角度，默认为`30` |
169 | | color | str | 否 | 水印颜色，默认为`#000000` |
170 | | space | int | 否 | 水印间距，默认为`25` |
171 | | dpi | int | 否 | 图像分辨率，默认为`300` |
172 | 
173 | **返回参数：**
174 | 
175 | | 参数名 | 类型 | 说明 |
176 | | :--- | :--- | :--- |
177 | | status | int | 状态码，`true`表示成功 |
178 | | image_base64 | str | 添加水印之后的图像的base64编码 |
179 | 
180 | <br>
181 | 
182 | ### 6.设置图像KB大小
183 | 
184 | 接口名：`set_kb`
185 | 
186 | `设置图像KB大小`接口的功能是接收一张图像和目标文件大小（以KB为单位），如果设置的KB值小于原文件，则调整压缩率；如果设置的KB值大于源文件，则通过给文件头添加信息的方式调大KB值，目标是让图像的最终大小与设置的KB值一致。
187 | 
188 | **请求参数：**
189 | 
190 | | 参数名 | 类型 | 必填 | 说明 |
191 | | :--- | :--- | :--- | :--- |
192 | | input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
193 | | input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
194 | | kb | int | 否 | 输出照片的 KB 值，默认为`None`，即不对图像进行KB调整。|
195 | | dpi | int | 否 | 图像分辨率，默认为`300` |
196 | 
197 | **返回参数：**
198 | 
199 | | 参数名 | 类型 | 说明 |
200 | | :--- | :--- | :--- |
201 | | status | int | 状态码，`true`表示成功 |
202 | | image_base64 | str | 设置KB大小之后的图像的base64编码 |
203 | 
204 | <br>
205 | 
206 | ### 7.证件照裁切
207 | 
208 | 接口名：`idphoto_crop`
209 | 
210 | `证件照裁切`接口的功能是接收一张 RBGA 图像（透明图），输出一张标准证件照和一张高清证件照。
211 | 
212 | **请求参数：**
213 | 
214 | | 参数名 | 类型 | 必填 | 说明 |
215 | | :--- | :--- | :--- | :--- |
216 | | input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGBA四通道图像。 |
217 | | input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGBA四通道图像。 |
218 | | height | int | 否 | 标准证件照高度，默认为`413` |
219 | | width | int | 否 | 标准证件照宽度，默认为`295` |
220 | | face_detect_model | str | 否 | 人脸检测模型，默认为`mtcnn`。可选值为`mtcnn`、`face_plusplus`、`retinaface-resnet50` |
221 | | hd | bool | 否 | 是否生成高清证件照，默认为`true` |
222 | | dpi | int | 否 | 图像分辨率，默认为`300` |
223 | | head_measure_ratio | float | 否 | 面部面积与照片面积的比例，默认为`0.2` |
224 | | head_height_ratio | float | 否 | 面部中心与照片顶部的高度比例，默认为`0.45` |
225 | | top_distance_max | float | 否 | 头部与照片顶部距离的比例最大值，默认为`0.12` |
226 | | top_distance_min | float | 否 | 头部与照片顶部距离的比例最小值，默认为`0.1` |
227 | 
228 | **返回参数：**
229 | 
230 | | 参数名 | 类型 | 说明 |
231 | | :--- | :--- | :--- |
232 | | status | int | 状态码，`true`表示成功 |
233 | | image_base64 | str | 证件照裁切之后的图像的base64编码 |
234 | | image_base64_hd | str | 高清证件照裁切之后的图像的base64编码，如`hd`参数为`false`，则不返回该参数 |
235 | 
236 | <br>
237 | 
238 | ## cURL 请求示例
239 | 
240 | cURL 是一个命令行工具，用于使用各种网络协议传输数据。以下是使用 cURL 调用这些 API 的示例。
241 | 
242 | ### 1. 生成证件照(底透明)
243 | 
244 | ```bash
245 | curl -X POST "http://127.0.0.1:8080/idphoto" \
246 | -F "input_image=@demo/images/test0.jpg" \
247 | -F "height=413" \
248 | -F "width=295" \
249 | -F "human_matting_model=modnet_photographic_portrait_matting" \
250 | -F "face_detect_model=mtcnn" \
251 | -F "hd=true" \
252 | -F "dpi=300" \
253 | -F "face_alignment=true" \
254 | -F 'head_height_ratio=0.45' \
255 | -F 'head_measure_ratio=0.2' \
256 | -F 'top_distance_min=0.1' \
257 | -F 'top_distance_max=0.12' \
258 | -F 'sharpen_strength=0' \
259 | -F 'saturation_strength=0' \
260 | -F 'brightness_strength=10' \
261 | -F 'contrast_strength=0'
262 | ```
263 | 
264 | ### 2. 添加背景色
265 | 
266 | ```bash
267 | curl -X POST "http://127.0.0.1:8080/add_background" \
268 | -F "input_image=@test.png" \
269 | -F "color=638cce" \
270 | -F "kb=200" \
271 | -F "render=0" \
272 | -F "dpi=300"
273 | ```
274 | 
275 | ### 3. 生成六寸排版照
276 | 
277 | ```bash
278 | curl -X POST "http://127.0.0.1:8080/generate_layout_photos" \
279 | -F "input_image=@test.jpg" \
280 | -F "height=413" \
281 | -F "width=295" \
282 | -F "kb=200" \
283 | -F "dpi=300"
284 | ```
285 | 
286 | ### 4. 人像抠图
287 | 
288 | ```bash
289 | curl -X POST "http://127.0.0.1:8080/human_matting" \
290 | -F "input_image=@demo/images/test0.jpg" \
291 | -F "human_matting_model=modnet_photographic_portrait_matting" \
292 | -F "dpi=300"
293 | ```
294 | 
295 | ### 5. 图片加水印
296 | ```bash
297 | curl -X 'POST' \
298 |   'http://127.0.0.1:8080/watermark?size=20&opacity=0.5&angle=30&color=%23000000&space=25' \
299 |   -H 'accept: application/json' \
300 |   -H 'Content-Type: multipart/form-data' \
301 |   -F 'input_image=@demo/images/test0.jpg;type=image/jpeg' \
302 |   -F 'text=Hello' \
303 |   -F 'dpi=300'
304 | ```
305 | 
306 | ### 6. 设置图像KB大小
307 | ```bash
308 | curl -X 'POST' \
309 |   'http://127.0.0.1:8080/set_kb' \
310 |   -H 'accept: application/json' \
311 |   -H 'Content-Type: multipart/form-data' \
312 |   -F 'input_image=@demo/images/test0.jpg;type=image/jpeg' \
313 |   -F 'kb=50' \
314 |   -F 'dpi=300'
315 | ```
316 | 
317 | ### 7. 证件照裁切
318 | ```bash
319 | curl -X 'POST' 'http://127.0.0.1:8080/idphoto_crop' \
320 |   -H 'accept: application/json' \
321 |   -H 'Content-Type: multipart/form-data' \
322 |   -F 'input_image=@idphoto_matting.png;type=image/png' \
323 |   -F 'height=413' \
324 |   -F 'width=295' \
325 |   -F 'face_detect_model=mtcnn' \
326 |   -F 'hd=true' \
327 |   -F 'dpi=300' \
328 |   -F 'head_height_ratio=0.45' \
329 |   -F 'head_measure_ratio=0.2' \
330 |   -F 'top_distance_min=0.1' \
331 |   -F 'top_distance_max=0.12'
332 | ```
333 | 
334 | <br>
335 | 
336 | ## Python 请求示例
337 | 
338 | #### 1.生成证件照(底透明)
339 | ```python
340 | import requests
341 | 
342 | url = "http://127.0.0.1:8080/idphoto"
343 | input_image_path = "demo/images/test0.jpg"
344 | 
345 | files = {"input_image": open(input_image_path, "rb")}
346 | data = {
347 |     "height": 413,
348 |     "width": 295,
349 |     "human_matting_model": "modnet_photographic_portrait_matting",
350 |     "face_detect_model": "mtcnn",
351 |     "hd": True,
352 |     "dpi": 300,
353 |     "face_alignment": True,
354 |     "head_measure_ratio": 0.2,
355 |     "head_height_ratio": 0.45,
356 |     "top_distance_max": 0.12,
357 |     "top_distance_min": 0.1,
358 |     "brightness_strength": 0,
359 |     "contrast_strength": 0,
360 |     "sharpen_strength": 0,
361 |     "saturation_strength": 0,
362 | }
363 | 
364 | response = requests.post(url, params=params, files=files, data=data).json()
365 | 
366 | # response为一个json格式字典，包含status、image_base64_standard和image_base64_hd三项
367 | print(response)
368 | ```
369 | 
370 | #### 2.添加背景色
371 | 
372 | ```python
373 | import requests
374 | 
375 | url = "http://127.0.0.1:8080/add_background"
376 | input_image_path = "test.png"
377 | 
378 | files = {"input_image": open(input_image_path, "rb")}
379 | data = {
380 |     "color": '638cce',
381 |     "kb": None,
382 |     "render": 0,
383 |     "dpi": 300,
384 | }
385 | 
386 | response = requests.post(url, files=files, data=data).json()
387 | 
388 | # response为一个json格式字典，包含status和image_base64
389 | print(response)
390 | ```
391 | 
392 | #### 3.生成六寸排版照
393 | 
394 | ```python
395 | import requests
396 | 
397 | url = "http://127.0.0.1:8080/generate_layout_photos"
398 | input_image_path = "test.jpg"
399 | 
400 | files = {"input_image": open(input_image_path, "rb")}
401 | data = {
402 |     "height": 413,
403 |     "width": 295,
404 |     "kb": 200,
405 |     "dpi": 300,
406 | }
407 | 
408 | response = requests.post(url, files=files, data=data).json()
409 | 
410 | # response为一个json格式字典，包含status和image_base64
411 | print(response)
412 | ```
413 | 
414 | #### 4.人像抠图
415 | 
416 | ```python
417 | import requests
418 | 
419 | url = "http://127.0.0.1:8080/human_matting"
420 | input_image_path = "test.jpg"
421 | 
422 | files = {"input_image": open(input_image_path, "rb")}
423 | data = {
424 |     "human_matting_model": "modnet_photographic_portrait_matting",
425 |     "dpi": 300,
426 | }
427 | 
428 | response = requests.post(url, files=files, data=data).json()
429 | 
430 | # response为一个json格式字典，包含status和image_base64
431 | print(response)
432 | ```
433 | 
434 | #### 5.图片加水印
435 | 
436 | ```python
437 | import requests
438 | 
439 | # 设置请求的 URL 和参数
440 | url = "http://127.0.0.1:8080/watermark"
441 | params = {
442 |     "size": 20,
443 |     "opacity": 0.5,
444 |     "angle": 30,
445 |     "color": "#000000",
446 |     "space": 25,
447 | }
448 | 
449 | # 设置文件和其他表单数据
450 | input_image_path = "demo/images/test0.jpg"
451 | files = {"input_image": open(input_image_path, "rb")}
452 | data = {"text": "Hello", "dpi": 300}
453 | 
454 | # 发送 POST 请求
455 | response = requests.post(url, params=params, files=files, data=data)
456 | 
457 | # 检查响应
458 | if response.ok:
459 |     # 输出响应内容
460 |     print(response.json())
461 | else:
462 |     # 输出错误信息
463 |     print(f"Request failed with status code {response.status_code}: {response.text}")
464 | ```
465 | 
466 | ### 6. 设置图像KB大小
467 | 
468 | ```python
469 | import requests
470 | 
471 | # 设置请求的 URL
472 | url = "http://127.0.0.1:8080/set_kb"
473 | 
474 | # 设置文件和其他表单数据
475 | input_image_path = "demo/images/test0.jpg"
476 | files = {"input_image": open(input_image_path, "rb")}
477 | data = {"kb": 50, "dpi": 300}
478 | 
479 | # 发送 POST 请求
480 | response = requests.post(url, files=files, data=data)
481 | 
482 | # 检查响应
483 | if response.ok:
484 |     # 输出响应内容
485 |     print(response.json())
486 | else:
487 |     # 输出错误信息
488 |     print(f"Request failed with status code {response.status_code}: {response.text}")
489 | ```
490 | 
491 | ### 7. 证件照裁切
492 | 
493 | ```python
494 | import requests
495 | 
496 | # 设置请求的 URL
497 | url = "http://127.0.0.1:8080/idphoto_crop"
498 | 
499 | # 设置文件和其他表单数据
500 | input_image_path = "idphoto_matting.png"
501 | files = {"input_image": ("idphoto_matting.png", open(input_image_path, "rb"), "image/png")}
502 | data = {
503 |     "height": 413,
504 |     "width": 295,
505 |     "face_detect_model": "mtcnn",
506 |     "hd": "true",
507 |     "dpi": 300,
508 |     "head_measure_ratio": 0.2,
509 |     "head_height_ratio": 0.45,
510 |     "top_distance_max": 0.12,
511 |     "top_distance_min": 0.1,
512 | }
513 | 
514 | # 发送 POST 请求
515 | response = requests.post(url, files=files, data=data)
516 | 
517 | # 检查响应
518 | if response.ok:
519 |     # 输出响应内容
520 |     print(response.json())
521 | else:
522 |     # 输出错误信息
523 |     print(f"Request failed with status code {response.status_code}: {response.text}")
524 | ```


--------------------------------------------------------------------------------
/docs/face++_CN.md:
--------------------------------------------------------------------------------
 1 | # Face++ 人脸检测配置文档
 2 | 
 3 | [Face++官方文档](https://console.faceplusplus.com.cn/documents/4888373)
 4 | 
 5 | ## 1. 注册Face++账号
 6 | 要使用 Face++ 的人脸检测 API，您首先需要在 Face++ 的官方网站上注册一个账号。注册后，您将能够访问 API 控制台和相关服务。
 7 | 
 8 | ### 步骤：
 9 | 1. 访问 [Face++ 官网](https://www.faceplusplus.com.cn/)。
10 | 2. 点击“注册”按钮，填写相关信息以创建您的账号。
11 | 
12 | ## 2. 获取API KEY和API SECRET
13 | 注册并登录后，您需要获取用于身份验证的 API Key 和 API Secret。这些信息是调用 API 时必需的。
14 | 
15 | ### 步骤：
16 | 1. 登录到您的 Face++ 账号。
17 | 2. 进入 控制台 -> 应用管理 -> API Key。
18 | 3. 在控制台中，您将看到您的 API Key 和 API Secret。
19 | 
20 | ## 3. 设置环境变量
21 | 为了在代码中安全地使用 API Key 和 API Secret，建议将它们设置为环境变量。这样可以避免在代码中硬编码敏感信息。
22 | 
23 | ### 在不同操作系统中设置环境变量的步骤：
24 | - **Windows**:
25 |     1. 打开命令提示符。
26 |     2. 输入以下命令并按回车：
27 |        ```cmd
28 |        set FACE_PLUS_API_KEY="您的API_KEY"
29 |        set FACE_PLUS_API_SECRET="您的API_SECRET"
30 |        ```
31 | 
32 | - **Linux / macOS**:
33 |     1. 打开终端。
34 |     2. 输入以下命令并按回车：
35 |        ```bash
36 |        export FACE_PLUS_API_KEY="你的API_KEY"
37 |        export FACE_PLUS_API_SECRET="你的API_SECRET"
38 |        ```
39 | 
40 | > **注意**: 您可能需要在启动应用程序之前运行上述命令，或者将这些命令添加到您的 shell 配置文件（例如 `.bashrc` 或 `.bash_profile`）中，以便每次启动终端时自动加载。
41 | 
42 | ## 4. 启动Gradio服务
43 | 
44 | 运行gradio服务，在「人脸检测模型」中选择「face++」即可。
45 | 
46 | ```bash
47 | python app.py
48 | ```
49 | 
50 | ![alt text](../assets/face++.png)
51 | 
52 | ## 错误码的解释
53 | 
54 | https://console.faceplusplus.com.cn/documents/4888373


--------------------------------------------------------------------------------
/docs/face++_EN.md:
--------------------------------------------------------------------------------
 1 | Here's the translated document in English:
 2 | 
 3 | # Face++ Face Detection Configuration Document
 4 | 
 5 | [Face++ Official Documentation](https://console.faceplusplus.com.cn/documents/4888373)
 6 | 
 7 | ## 1. Register a Face++ Account
 8 | To use the Face++ Face Detection API, you first need to register an account on the Face++ official website. After registration, you will be able to access the API console and related services.
 9 | 
10 | ### Steps:
11 | 1. Visit the [Face++ Official Website](https://www.faceplusplus.com.cn/).
12 | 2. Click the "Register" button and fill in the relevant information to create your account.
13 | 
14 | ## 2. Obtain API KEY and API SECRET
15 | After registering and logging in, you need to obtain the API Key and API Secret for authentication. This information is necessary for calling the API.
16 | 
17 | ### Steps:
18 | 1. Log in to your Face++ account.
19 | 2. Go to Console -> Application Management -> API Key.
20 | 3. In the console, you will see your API Key and API Secret.
21 | 
22 | ## 3. Set Environment Variables
23 | To securely use the API Key and API Secret in your code, it is recommended to set them as environment variables. This avoids hardcoding sensitive information in your code.
24 | 
25 | ### Steps to Set Environment Variables on Different Operating Systems:
26 | - **Windows**:
27 |     1. Open the Command Prompt.
28 |     2. Enter the following commands and press Enter:
29 |        ```cmd
30 |        set FACE_PLUS_API_KEY="Your_API_KEY"
31 |        set FACE_PLUS_API_SECRET="Your_API_SECRET"
32 |        ```
33 | 
34 | - **Linux / macOS**:
35 |     1. Open the terminal.
36 |     2. Enter the following commands and press Enter:
37 |        ```bash
38 |        export FACE_PLUS_API_KEY="Your_API_KEY"
39 |        export FACE_PLUS_API_SECRET="Your_API_SECRET"
40 |        ```
41 | 
42 | > **Note**: You may need to run the above commands before starting your application, or add these commands to your shell configuration file (e.g., `.bashrc` or `.bash_profile`) so that they are automatically loaded each time you start the terminal.
43 | 
44 | ## 4. Start Gradio Service
45 | Run the Gradio service, and select "face++" in the "Face Detection Model".
46 | 
47 | ```bash
48 | python app.py
49 | ```
50 | 
51 | ![alt text](../assets/face++.png)
52 | 
53 | ## Explanation of error codes
54 | 
55 | https://console.faceplusplus.com.cn/documents/4888373


--------------------------------------------------------------------------------
/hivision/__init__.py:
--------------------------------------------------------------------------------
1 | from .creator import IDCreator, Params as IDParams, Result as IDResult
2 | 
3 | 
4 | __all__ = ["IDCreator", "IDParams", "IDResult", "utils", "error"]
5 | 


--------------------------------------------------------------------------------
/hivision/creator/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 16:45
  5 | @File: __init__.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     创建证件照
  9 | """
 10 | import numpy as np
 11 | from typing import Tuple
 12 | import hivision.creator.utils as U
 13 | from .context import Context, ContextHandler, Params, Result
 14 | from .human_matting import extract_human
 15 | from .face_detector import detect_face_mtcnn
 16 | from hivision.plugin.beauty.handler import beauty_face
 17 | from .photo_adjuster import adjust_photo
 18 | import cv2
 19 | import time
 20 | 
 21 | 
 22 | class IDCreator:
 23 |     """
 24 |     证件照创建类，包含完整的证件照流程
 25 |     """
 26 | 
 27 |     def __init__(self):
 28 |         # 回调时机
 29 |         self.before_all: ContextHandler = None
 30 |         """
 31 |         在所有处理之前，此时图像已经被 resize 到最大边长为 2000
 32 |         """
 33 |         self.after_matting: ContextHandler = None
 34 |         """
 35 |         在抠图之后，ctx.matting_image 被赋值
 36 |         """
 37 |         self.after_detect: ContextHandler = None
 38 |         """
 39 |         在人脸检测之后，ctx.face 被赋值，如果为仅换底，则不会执行此回调
 40 |         """
 41 |         self.after_all: ContextHandler = None
 42 |         """
 43 |         在所有处理之后，此时 ctx.result 被赋值
 44 |         """
 45 |         # 处理者
 46 |         self.matting_handler: ContextHandler = extract_human
 47 |         self.detection_handler: ContextHandler = detect_face_mtcnn
 48 |         self.beauty_handler: ContextHandler = beauty_face
 49 |         # 上下文
 50 |         self.ctx = None
 51 | 
 52 |     def __call__(
 53 |         self,
 54 |         image: np.ndarray,
 55 |         size: Tuple[int, int] = (413, 295),
 56 |         change_bg_only: bool = False,
 57 |         crop_only: bool = False,
 58 |         head_measure_ratio: float = 0.2,
 59 |         head_height_ratio: float = 0.45,
 60 |         head_top_range: float = (0.12, 0.1),
 61 |         face: Tuple[int, int, int, int] = None,
 62 |         whitening_strength: int = 0,
 63 |         brightness_strength: int = 0,
 64 |         contrast_strength: int = 0,
 65 |         sharpen_strength: int = 0,
 66 |         saturation_strength: int = 0,
 67 |         face_alignment: bool = False,
 68 |     ) -> Result:
 69 |         """
 70 |         证件照处理函数
 71 |         :param image: 输入图像
 72 |         :param change_bg_only: 是否只需要抠图
 73 |         :param crop_only: 是否只需要裁剪
 74 |         :param size: 输出的图像大小（h,w)
 75 |         :param head_measure_ratio: 人脸面积与全图面积的期望比值
 76 |         :param head_height_ratio: 人脸中心处在全图高度的比例期望值
 77 |         :param head_top_range: 头距离顶部的比例（max,min)
 78 |         :param face: 人脸坐标
 79 |         :param whitening_strength: 美白强度
 80 |         :param brightness_strength: 亮度强度
 81 |         :param contrast_strength: 对比度强度
 82 |         :param sharpen_strength: 锐化强度
 83 |         :param align_face: 是否需要人脸矫正
 84 | 
 85 |         :return: 返回处理后的证件照和一系列参数
 86 |         """
 87 |         # 0.初始化上下文
 88 |         params = Params(
 89 |             size=size,
 90 |             change_bg_only=change_bg_only,
 91 |             head_measure_ratio=head_measure_ratio,
 92 |             head_height_ratio=head_height_ratio,
 93 |             head_top_range=head_top_range,
 94 |             crop_only=crop_only,
 95 |             face=face,
 96 |             whitening_strength=whitening_strength,
 97 |             brightness_strength=brightness_strength,
 98 |             contrast_strength=contrast_strength,
 99 |             sharpen_strength=sharpen_strength,
100 |             saturation_strength=saturation_strength,
101 |             face_alignment=face_alignment,
102 |         )
103 | 
104 | 
105 |         # 总的开始时间
106 |         total_start_time = time.time()
107 |         
108 |         self.ctx = Context(params)
109 |         ctx = self.ctx
110 |         ctx.processing_image = image
111 |         ctx.processing_image = U.resize_image_esp(
112 |             ctx.processing_image, 2000
113 |         )  # 将输入图片 resize 到最大边长为 2000
114 |         ctx.origin_image = ctx.processing_image.copy()
115 |         self.before_all and self.before_all(ctx)
116 | 
117 |         # 1. ------------------人像抠图------------------
118 |         # 如果仅裁剪，则不进行抠图
119 |         if not ctx.params.crop_only:
120 |             # 调用抠图工作流
121 |             print("[1]  Start Human Matting...")
122 |             start_matting_time = time.time()
123 |             self.matting_handler(ctx)
124 |             end_matting_time = time.time()
125 |             print(f"[1]  Human Matting Time: {end_matting_time - start_matting_time:.3f}s")
126 |             self.after_matting and self.after_matting(ctx)
127 |         # 如果进行抠图
128 |         else:
129 |             ctx.matting_image = ctx.processing_image
130 | 
131 | 
132 |         # 2. ------------------美颜------------------
133 |         print("[2]  Start Beauty...")
134 |         start_beauty_time = time.time()
135 |         self.beauty_handler(ctx)
136 |         end_beauty_time = time.time()
137 |         print(f"[2]  Beauty Time: {end_beauty_time - start_beauty_time:.3f}s")
138 | 
139 |         # 如果仅换底，则直接返回抠图结果
140 |         if ctx.params.change_bg_only:
141 |             ctx.result = Result(
142 |                 standard=ctx.matting_image,
143 |                 hd=ctx.matting_image,
144 |                 matting=ctx.matting_image,
145 |                 clothing_params=None,
146 |                 typography_params=None,
147 |                 face=None,
148 |             )
149 |             self.after_all and self.after_all(ctx)
150 |             return ctx.result
151 | 
152 |         # 3. ------------------人脸检测------------------
153 |         print("[3]  Start Face Detection...")
154 |         start_detection_time = time.time()
155 |         self.detection_handler(ctx)
156 |         end_detection_time = time.time()
157 |         print(f"[3]  Face Detection Time: {end_detection_time - start_detection_time:.3f}s")
158 |         self.after_detect and self.after_detect(ctx)
159 | 
160 |         # 3.1 ------------------人脸对齐------------------
161 |         if ctx.params.face_alignment and abs(ctx.face["roll_angle"]) > 2:
162 |             print("[3.1]  Start Face Alignment...")
163 |             start_alignment_time = time.time()
164 |             from hivision.creator.rotation_adjust import rotate_bound_4channels
165 | 
166 |             # 根据角度旋转原图和抠图
167 |             b, g, r, a = cv2.split(ctx.matting_image)
168 |             ctx.origin_image, ctx.matting_image, _, _, _, _ = rotate_bound_4channels(
169 |                 cv2.merge((b, g, r)),
170 |                 a,
171 |                 -1 * ctx.face["roll_angle"],
172 |             )
173 | 
174 |             # 旋转后再执行一遍人脸检测
175 |             self.detection_handler(ctx)
176 |             self.after_detect and self.after_detect(ctx)
177 |             end_alignment_time = time.time()
178 |             print(f"[3.1]  Face Alignment Time: {end_alignment_time - start_alignment_time:.3f}s")
179 | 
180 |         # 4. ------------------图像调整------------------
181 |         print("[4]  Start Image Post-Adjustment...")
182 |         start_adjust_time = time.time()
183 |         result_image_hd, result_image_standard, clothing_params, typography_params = (
184 |             adjust_photo(ctx)
185 |         )
186 |         end_adjust_time = time.time()
187 |         print(f"[4]  Image Post-Adjustment Time: {end_adjust_time - start_adjust_time:.3f}s")
188 | 
189 |         # 5. ------------------返回结果------------------
190 |         ctx.result = Result(
191 |             standard=result_image_standard,
192 |             hd=result_image_hd,
193 |             matting=ctx.matting_image,
194 |             clothing_params=clothing_params,
195 |             typography_params=typography_params,
196 |             face=ctx.face,
197 |         )
198 |         self.after_all and self.after_all(ctx)
199 | 
200 |         # 总的结束时间
201 |         total_end_time = time.time()
202 |         print(f"[Total]  Total Time: {total_end_time - total_start_time:.3f}s")
203 | 
204 |         return ctx.result
205 | 


--------------------------------------------------------------------------------
/hivision/creator/choose_handler.py:
--------------------------------------------------------------------------------
 1 | from hivision.creator.human_matting import *
 2 | from hivision.creator.face_detector import *
 3 | 
 4 | 
 5 | HUMAN_MATTING_MODELS = [
 6 |     "modnet_photographic_portrait_matting",
 7 |     "birefnet-v1-lite",
 8 |     "hivision_modnet",
 9 |     "rmbg-1.4",
10 | ]
11 | 
12 | FACE_DETECT_MODELS = ["face++ (联网Online API)", "mtcnn", "retinaface-resnet50"]
13 | 
14 | 
15 | def choose_handler(creator, matting_model_option=None, face_detect_option=None):
16 |     if matting_model_option == "modnet_photographic_portrait_matting":
17 |         creator.matting_handler = extract_human_modnet_photographic_portrait_matting
18 |     elif matting_model_option == "mnn_hivision_modnet":
19 |         creator.matting_handler = extract_human_mnn_modnet
20 |     elif matting_model_option == "rmbg-1.4":
21 |         creator.matting_handler = extract_human_rmbg
22 |     elif matting_model_option == "birefnet-v1-lite":
23 |         creator.matting_handler = extract_human_birefnet_lite
24 |     else:
25 |         creator.matting_handler = extract_human
26 | 
27 |     if (
28 |         face_detect_option == "face_plusplus"
29 |         or face_detect_option == "face++ (联网Online API)"
30 |     ):
31 |         creator.detection_handler = detect_face_face_plusplus
32 |     elif face_detect_option == "retinaface-resnet50":
33 |         creator.detection_handler = detect_face_retinaface
34 |     else:
35 |         creator.detection_handler = detect_face_mtcnn
36 | 


--------------------------------------------------------------------------------
/hivision/creator/context.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 19:20
  5 | @File: context.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     证件照创建上下文类，用于同步信息
  9 | """
 10 | from typing import Optional, Callable, Tuple
 11 | import numpy as np
 12 | 
 13 | 
 14 | class Params:
 15 |     def __init__(
 16 |         self,
 17 |         size: Tuple[int, int] = (413, 295),
 18 |         change_bg_only: bool = False,
 19 |         crop_only: bool = False,
 20 |         head_measure_ratio: float = 0.2,
 21 |         head_height_ratio: float = 0.45,
 22 |         head_top_range: float = (0.12, 0.1),
 23 |         face: Tuple[int, int, int, int] = None,
 24 |         whitening_strength: int = 0,
 25 |         brightness_strength: int = 0,
 26 |         contrast_strength: int = 0,
 27 |         sharpen_strength: int = 0,
 28 |         saturation_strength: int = 0,
 29 |         face_alignment: bool = False,
 30 |     ):
 31 |         self.__size = size
 32 |         self.__change_bg_only = change_bg_only
 33 |         self.__crop_only = crop_only
 34 |         self.__head_measure_ratio = head_measure_ratio
 35 |         self.__head_height_ratio = head_height_ratio
 36 |         self.__head_top_range = head_top_range
 37 |         self.__face = face
 38 |         self.__whitening_strength = whitening_strength
 39 |         self.__brightness_strength = brightness_strength
 40 |         self.__contrast_strength = contrast_strength
 41 |         self.__sharpen_strength = sharpen_strength
 42 |         self.__saturation_strength = saturation_strength
 43 |         self.__face_alignment = face_alignment
 44 | 
 45 |     @property
 46 |     def size(self):
 47 |         return self.__size
 48 | 
 49 |     @property
 50 |     def change_bg_only(self):
 51 |         return self.__change_bg_only
 52 | 
 53 |     @property
 54 |     def head_measure_ratio(self):
 55 |         return self.__head_measure_ratio
 56 | 
 57 |     @property
 58 |     def head_height_ratio(self):
 59 |         return self.__head_height_ratio
 60 | 
 61 |     @property
 62 |     def head_top_range(self):
 63 |         return self.__head_top_range
 64 | 
 65 |     @property
 66 |     def crop_only(self):
 67 |         return self.__crop_only
 68 | 
 69 |     @property
 70 |     def face(self):
 71 |         return self.__face
 72 | 
 73 |     @property
 74 |     def whitening_strength(self):
 75 |         return self.__whitening_strength
 76 | 
 77 |     @property
 78 |     def brightness_strength(self):
 79 |         return self.__brightness_strength
 80 | 
 81 |     @property
 82 |     def contrast_strength(self):
 83 |         return self.__contrast_strength
 84 | 
 85 |     @property
 86 |     def sharpen_strength(self):
 87 |         return self.__sharpen_strength
 88 | 
 89 |     @property
 90 |     def saturation_strength(self):
 91 |         return self.__saturation_strength
 92 | 
 93 |     @property
 94 |     def face_alignment(self):
 95 |         return self.__face_alignment
 96 | 
 97 | 
 98 | class Result:
 99 |     def __init__(
100 |         self,
101 |         standard: np.ndarray,
102 |         hd: np.ndarray,
103 |         matting: np.ndarray,
104 |         clothing_params: Optional[dict],
105 |         typography_params: Optional[dict],
106 |         face: Optional[Tuple[int, int, int, int, float]],
107 |     ):
108 |         self.standard = standard
109 |         self.hd = hd
110 |         self.matting = matting
111 |         self.clothing_params = clothing_params
112 |         """
113 |         服装参数，仅换底时为 None
114 |         """
115 |         self.typography_params = typography_params
116 |         """
117 |         排版参数，仅换底时为 None
118 |         """
119 |         self.face = face
120 | 
121 |     def __iter__(self):
122 |         return iter(
123 |             [
124 |                 self.standard,
125 |                 self.hd,
126 |                 self.matting,
127 |                 self.clothing_params,
128 |                 self.typography_params,
129 |                 self.face,
130 |             ]
131 |         )
132 | 
133 | 
134 | class Context:
135 |     def __init__(self, params: Params):
136 |         self.params: Params = params
137 |         """
138 |         证件照处理参数
139 |         """
140 |         self.origin_image: Optional[np.ndarray] = None
141 |         """
142 |         输入的原始图像，处理时会进行resize，长宽不一定等于输入图像
143 |         """
144 |         self.processing_image: Optional[np.ndarray] = None
145 |         """
146 |         当前正在处理的图像
147 |         """
148 |         self.matting_image: Optional[np.ndarray] = None
149 |         """
150 |         人像抠图结果
151 |         """
152 |         self.face: dict = dict(rectangle=None, roll_angle=None)
153 |         """
154 |         人脸检测结果，大于一个人脸时已在上层抛出异常
155 |         rectangle: 人脸矩形框，包含 x1, y1, width, height 的坐标, x1, y1 为左上角坐标, width, height 为矩形框的宽度和高度
156 |         roll_angle: 人脸偏转角度，以眼睛为标准，计算的人脸偏转角度，用于人脸矫正
157 |         """
158 |         self.result: Optional[Result] = None
159 |         """
160 |         证件照处理结果
161 |         """
162 |         self.align_info: Optional[dict] = None
163 |         """
164 |         人脸矫正信息，仅当 align_face 为 True 时存在
165 |         """
166 | 
167 | 
168 | ContextHandler = Optional[Callable[[Context], None]]
169 | 


--------------------------------------------------------------------------------
/hivision/creator/face_detector.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 19:32
  5 | @File: face_detector.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     人脸检测器
  9 | """
 10 | try:
 11 |     from mtcnnruntime import MTCNN
 12 | except ImportError:
 13 |     raise ImportError(
 14 |         "Please install mtcnn-runtime by running `pip install mtcnn-runtime`"
 15 |     )
 16 | from .context import Context
 17 | from hivision.error import FaceError, APIError
 18 | from hivision.utils import resize_image_to_kb_base64
 19 | from hivision.creator.retinaface import retinaface_detect_faces
 20 | import requests
 21 | import cv2
 22 | import os
 23 | import numpy as np
 24 | 
 25 | 
 26 | mtcnn = None
 27 | base_dir = os.path.dirname(os.path.abspath(__file__))
 28 | RETINAFCE_SESS = None
 29 | 
 30 | 
 31 | def detect_face_mtcnn(ctx: Context, scale: int = 2):
 32 |     """
 33 |     基于MTCNN模型的人脸检测处理器，只进行人脸数量的检测
 34 |     :param ctx: 上下文，此时已获取到原始图和抠图结果，但是我们只需要原始图
 35 |     :param scale: 最大边长缩放比例，原图:缩放图 = 1:scale
 36 |     :raise FaceError: 人脸检测错误，多个人脸或者没有人脸
 37 |     """
 38 |     global mtcnn
 39 |     if mtcnn is None:
 40 |         mtcnn = MTCNN()
 41 |     image = cv2.resize(
 42 |         ctx.origin_image,
 43 |         (ctx.origin_image.shape[1] // scale, ctx.origin_image.shape[0] // scale),
 44 |         interpolation=cv2.INTER_AREA,
 45 |     )
 46 |     # landmarks 是 5 个关键点，分别是左眼、右眼、鼻子、左嘴角、右嘴角，
 47 |     faces, landmarks = mtcnn.detect(image, thresholds=[0.8, 0.8, 0.8])
 48 | 
 49 |     # print(len(faces))
 50 |     if len(faces) != 1:
 51 |         # 保险措施，如果检测到多个人脸或者没有人脸，用原图再检测一次
 52 |         faces, landmarks = mtcnn.detect(ctx.origin_image)
 53 |     else:
 54 |         # 如果只有一个人脸，将人脸坐标放大
 55 |         for item, param in enumerate(faces[0]):
 56 |             faces[0][item] = param * 2
 57 |     if len(faces) != 1:
 58 |         raise FaceError("Expected 1 face, but got {}".format(len(faces)), len(faces))
 59 | 
 60 |     # 计算人脸坐标
 61 |     left = faces[0][0]
 62 |     top = faces[0][1]
 63 |     width = faces[0][2] - left + 1
 64 |     height = faces[0][3] - top + 1
 65 |     ctx.face["rectangle"] = (left, top, width, height)
 66 | 
 67 |     # 根据landmarks计算人脸偏转角度，以眼睛为标准，计算的人脸偏转角度，用于人脸矫正
 68 |     # 示例landmarks [106.37181  150.77415  127.21012  108.369156 144.61522  105.24723 107.45625  133.62355  151.24269  153.34407 ]
 69 |     landmarks = landmarks[0]
 70 |     left_eye = np.array([landmarks[0], landmarks[5]])
 71 |     right_eye = np.array([landmarks[1], landmarks[6]])
 72 |     dy = right_eye[1] - left_eye[1]
 73 |     dx = right_eye[0] - left_eye[0]
 74 |     roll_angle = np.degrees(np.arctan2(dy, dx))
 75 | 
 76 |     ctx.face["roll_angle"] = roll_angle
 77 | 
 78 | 
 79 | def detect_face_face_plusplus(ctx: Context):
 80 |     """
 81 |     基于Face++ API接口的人脸检测处理器，只进行人脸数量的检测
 82 |     :param ctx: 上下文，此时已获取到原始图和抠图结果，但是我们只需要原始图
 83 |     :param scale: 最大边长缩放比例，原图:缩放图 = 1:scale
 84 |     :raise FaceError: 人脸检测错误，多个人脸或者没有人脸
 85 |     """
 86 |     url = "https://api-cn.faceplusplus.com/facepp/v3/detect"
 87 |     api_key = os.getenv("FACE_PLUS_API_KEY")
 88 |     api_secret = os.getenv("FACE_PLUS_API_SECRET")
 89 | 
 90 |     print("调用了face++")
 91 | 
 92 |     image = ctx.origin_image
 93 |     # 将图片转为 base64, 且不大于2MB（Face++ API接口限制）
 94 |     image_base64 = resize_image_to_kb_base64(image, 2000, mode="max")
 95 | 
 96 |     files = {
 97 |         "api_key": (None, api_key),
 98 |         "api_secret": (None, api_secret),
 99 |         "image_base64": (None, image_base64),
100 |         "return_landmark": (None, "1"),
101 |         "return_attributes": (None, "headpose"),
102 |     }
103 | 
104 |     # 发送 POST 请求
105 |     response = requests.post(url, files=files)
106 | 
107 |     # 获取响应状态码
108 |     status_code = response.status_code
109 |     response_json = response.json()
110 | 
111 |     if status_code == 200:
112 |         face_num = response_json["face_num"]
113 |         if face_num == 1:
114 |             face_rectangle = response_json["faces"][0]["face_rectangle"]
115 | 
116 |             # 获取人脸关键点
117 |             # landmarks = response_json["faces"][0]["landmark"]
118 |             # print("face++ landmarks", landmarks)
119 | 
120 |             # headpose 是一个字典，包含俯仰角（pitch）、偏航角（yaw）和滚转角（roll）
121 |             # headpose示例 {'pitch_angle': 6.997899, 'roll_angle': 1.8011835, 'yaw_angle': 5.043002}
122 |             headpose = response_json["faces"][0]["attributes"]["headpose"]
123 |             # 以眼睛为标准，计算的人脸偏转角度，用于人脸矫正
124 |             roll_angle = headpose["roll_angle"] / 2
125 | 
126 |             ctx.face["rectangle"] = (
127 |                 face_rectangle["left"],
128 |                 face_rectangle["top"],
129 |                 face_rectangle["width"],
130 |                 face_rectangle["height"],
131 |             )
132 |             ctx.face["roll_angle"] = roll_angle
133 |         else:
134 |             raise FaceError(
135 |                 "Expected 1 face, but got {}".format(face_num), len(face_num)
136 |             )
137 | 
138 |     elif status_code == 401:
139 |         raise APIError(
140 |             f"Face++ Status code {status_code} Authentication error: API key and secret do not match.",
141 |             status_code,
142 |         )
143 | 
144 |     elif status_code == 403:
145 |         reason = response_json.get("error_message", "Unknown authorization error.")
146 |         raise APIError(
147 |             f"Authorization error: {reason}",
148 |             status_code,
149 |         )
150 | 
151 |     elif status_code == 400:
152 |         error_message = response_json.get("error_message", "Bad request.")
153 |         raise APIError(
154 |             f"Bad request error: {error_message}",
155 |             status_code,
156 |         )
157 | 
158 |     elif status_code == 413:
159 |         raise APIError(
160 |             f"Face++ Status code {status_code} Request entity too large: The image exceeds the 2MB limit.",
161 |             status_code,
162 |         )
163 | 
164 | 
165 | def detect_face_retinaface(ctx: Context):
166 |     """
167 |     基于RetinaFace模型的人脸检测处理器，只进行人脸数量的检测
168 |     :param ctx: 上下文，此时已获取到原始图和抠图结果，但是我们只需要原始图
169 |     :raise FaceError: 人脸检测错误，多个人脸或者没有人脸
170 |     """
171 |     from time import time
172 | 
173 |     global RETINAFCE_SESS
174 | 
175 |     if RETINAFCE_SESS is None:
176 |         # 计算用时
177 |         tic = time()
178 |         faces_dets, sess = retinaface_detect_faces(
179 |             ctx.origin_image,
180 |             os.path.join(base_dir, "retinaface/weights/retinaface-resnet50.onnx"),
181 |             sess=None,
182 |         )
183 |         RETINAFCE_SESS = sess
184 |     else:
185 |         tic = time()
186 |         faces_dets, _ = retinaface_detect_faces(
187 |             ctx.origin_image,
188 |             os.path.join(base_dir, "retinaface/weights/retinaface-resnet50.onnx"),
189 |             sess=RETINAFCE_SESS,
190 |         )
191 | 
192 |     faces_num = len(faces_dets)
193 |     faces_landmarks = []
194 |     for face_det in faces_dets:
195 |         faces_landmarks.append(face_det[5:])
196 | 
197 |     if faces_num != 1:
198 |         raise FaceError("Expected 1 face, but got {}".format(faces_num), faces_num)
199 |     face_det = faces_dets[0]
200 |     ctx.face["rectangle"] = (
201 |         face_det[0],
202 |         face_det[1],
203 |         face_det[2] - face_det[0] + 1,
204 |         face_det[3] - face_det[1] + 1,
205 |     )
206 | 
207 |     # 计算roll_angle
208 |     face_landmarks = faces_landmarks[0]
209 |     # print("face_landmarks", face_landmarks)
210 |     left_eye = np.array([face_landmarks[0], face_landmarks[1]])
211 |     right_eye = np.array([face_landmarks[2], face_landmarks[3]])
212 |     dy = right_eye[1] - left_eye[1]
213 |     dx = right_eye[0] - left_eye[0]
214 |     roll_angle = np.degrees(np.arctan2(dy, dx))
215 |     ctx.face["roll_angle"] = roll_angle
216 | 
217 |     # 如果RUN_MODE不是野兽模式，则释放模型
218 |     if os.getenv("RUN_MODE") == "beast":
219 |         RETINAFCE_SESS = None


--------------------------------------------------------------------------------
/hivision/creator/human_matting.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 21:21
  5 | @File: human_matting.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     人像抠图
  9 | """
 10 | import numpy as np
 11 | from PIL import Image
 12 | import onnxruntime
 13 | from .tensor2numpy import NNormalize, NTo_Tensor, NUnsqueeze
 14 | from .context import Context
 15 | import cv2
 16 | import os
 17 | from time import time
 18 | 
 19 | 
 20 | WEIGHTS = {
 21 |     "hivision_modnet": os.path.join(
 22 |         os.path.dirname(__file__), "weights", "hivision_modnet.onnx"
 23 |     ),
 24 |     "modnet_photographic_portrait_matting": os.path.join(
 25 |         os.path.dirname(__file__),
 26 |         "weights",
 27 |         "modnet_photographic_portrait_matting.onnx",
 28 |     ),
 29 |     "mnn_hivision_modnet": os.path.join(
 30 |         os.path.dirname(__file__),
 31 |         "weights",
 32 |         "mnn_hivision_modnet.mnn",
 33 |     ),
 34 |     "rmbg-1.4": os.path.join(os.path.dirname(__file__), "weights", "rmbg-1.4.onnx"),
 35 |     "birefnet-v1-lite": os.path.join(
 36 |         os.path.dirname(__file__), "weights", "birefnet-v1-lite.onnx"
 37 |     ),
 38 | }
 39 | 
 40 | ONNX_DEVICE = onnxruntime.get_device()
 41 | ONNX_PROVIDER = (
 42 |     "CUDAExecutionProvider" if ONNX_DEVICE == "GPU" else "CPUExecutionProvider"
 43 | )
 44 | 
 45 | HIVISION_MODNET_SESS = None
 46 | MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS = None
 47 | RMBG_SESS = None
 48 | BIREFNET_V1_LITE_SESS = None
 49 | 
 50 | 
 51 | def load_onnx_model(checkpoint_path, set_cpu=False):
 52 |     providers = (
 53 |         ["CUDAExecutionProvider", "CPUExecutionProvider"]
 54 |         if ONNX_PROVIDER == "CUDAExecutionProvider"
 55 |         else ["CPUExecutionProvider"]
 56 |     )
 57 | 
 58 |     if set_cpu:
 59 |         sess = onnxruntime.InferenceSession(
 60 |             checkpoint_path, providers=["CPUExecutionProvider"]
 61 |         )
 62 |     else:
 63 |         try:
 64 |             sess = onnxruntime.InferenceSession(checkpoint_path, providers=providers)
 65 |         except Exception as e:
 66 |             if ONNX_DEVICE == "CUDAExecutionProvider":
 67 |                 print(f"Failed to load model with CUDAExecutionProvider: {e}")
 68 |                 print("Falling back to CPUExecutionProvider")
 69 |                 # 尝试使用CPU加载模型
 70 |                 sess = onnxruntime.InferenceSession(
 71 |                     checkpoint_path, providers=["CPUExecutionProvider"]
 72 |                 )
 73 |             else:
 74 |                 raise e  # 如果是CPU执行失败，重新抛出异常
 75 | 
 76 |     return sess
 77 | 
 78 | 
 79 | def extract_human(ctx: Context):
 80 |     """
 81 |     人像抠图
 82 |     :param ctx: 上下文
 83 |     """
 84 |     # 抠图
 85 |     matting_image = get_modnet_matting(ctx.processing_image, WEIGHTS["hivision_modnet"])
 86 |     # 修复抠图
 87 |     ctx.processing_image = hollow_out_fix(matting_image)
 88 |     ctx.matting_image = ctx.processing_image.copy()
 89 | 
 90 | 
 91 | def extract_human_modnet_photographic_portrait_matting(ctx: Context):
 92 |     """
 93 |     人像抠图
 94 |     :param ctx: 上下文
 95 |     """
 96 |     # 抠图
 97 |     matting_image = get_modnet_matting_photographic_portrait_matting(
 98 |         ctx.processing_image, WEIGHTS["modnet_photographic_portrait_matting"]
 99 |     )
100 |     # 修复抠图
101 |     ctx.processing_image = matting_image
102 |     ctx.matting_image = ctx.processing_image.copy()
103 | 
104 | 
105 | def extract_human_mnn_modnet(ctx: Context):
106 |     matting_image = get_mnn_modnet_matting(
107 |         ctx.processing_image, WEIGHTS["mnn_hivision_modnet"]
108 |     )
109 |     ctx.processing_image = hollow_out_fix(matting_image)
110 |     ctx.matting_image = ctx.processing_image.copy()
111 | 
112 | 
113 | def extract_human_rmbg(ctx: Context):
114 |     matting_image = get_rmbg_matting(ctx.processing_image, WEIGHTS["rmbg-1.4"])
115 |     ctx.processing_image = matting_image
116 |     ctx.matting_image = ctx.processing_image.copy()
117 | 
118 | 
119 | # def extract_human_birefnet_portrait(ctx: Context):
120 | #     matting_image = get_birefnet_portrait_matting(
121 | #         ctx.processing_image, WEIGHTS["birefnet-portrait"]
122 | #     )
123 | #     ctx.processing_image = matting_image
124 | #     ctx.matting_image = ctx.processing_image.copy()
125 | 
126 | 
127 | def extract_human_birefnet_lite(ctx: Context):
128 |     matting_image = get_birefnet_portrait_matting(
129 |         ctx.processing_image, WEIGHTS["birefnet-v1-lite"]
130 |     )
131 |     ctx.processing_image = matting_image
132 |     ctx.matting_image = ctx.processing_image.copy()
133 | 
134 | 
135 | def hollow_out_fix(src: np.ndarray) -> np.ndarray:
136 |     """
137 |     修补抠图区域，作为抠图模型精度不够的补充
138 |     :param src:
139 |     :return:
140 |     """
141 |     b, g, r, a = cv2.split(src)
142 |     src_bgr = cv2.merge((b, g, r))
143 |     # -----------padding---------- #
144 |     add_area = np.zeros((10, a.shape[1]), np.uint8)
145 |     a = np.vstack((add_area, a, add_area))
146 |     add_area = np.zeros((a.shape[0], 10), np.uint8)
147 |     a = np.hstack((add_area, a, add_area))
148 |     # -------------end------------ #
149 |     _, a_threshold = cv2.threshold(a, 127, 255, 0)
150 |     a_erode = cv2.erode(
151 |         a_threshold,
152 |         kernel=cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)),
153 |         iterations=3,
154 |     )
155 |     contours, hierarchy = cv2.findContours(
156 |         a_erode, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
157 |     )
158 |     contours = [x for x in contours]
159 |     # contours = np.squeeze(contours)
160 |     contours.sort(key=lambda c: cv2.contourArea(c), reverse=True)
161 |     a_contour = cv2.drawContours(np.zeros(a.shape, np.uint8), contours[0], -1, 255, 2)
162 |     # a_base = a_contour[1:-1, 1:-1]
163 |     h, w = a.shape[:2]
164 |     mask = np.zeros(
165 |         [h + 2, w + 2], np.uint8
166 |     )  # mask 必须行和列都加 2，且必须为 uint8 单通道阵列
167 |     cv2.floodFill(a_contour, mask=mask, seedPoint=(0, 0), newVal=255)
168 |     a = cv2.add(a, 255 - a_contour)
169 |     return cv2.merge((src_bgr, a[10:-10, 10:-10]))
170 | 
171 | 
172 | def image2bgr(input_image):
173 |     if len(input_image.shape) == 2:
174 |         input_image = input_image[:, :, None]
175 |     if input_image.shape[2] == 1:
176 |         result_image = np.repeat(input_image, 3, axis=2)
177 |     elif input_image.shape[2] == 4:
178 |         result_image = input_image[:, :, 0:3]
179 |     else:
180 |         result_image = input_image
181 | 
182 |     return result_image
183 | 
184 | 
185 | def read_modnet_image(input_image, ref_size=512):
186 |     im = Image.fromarray(np.uint8(input_image))
187 |     width, length = im.size[0], im.size[1]
188 |     im = np.asarray(im)
189 |     im = image2bgr(im)
190 |     im = cv2.resize(im, (ref_size, ref_size), interpolation=cv2.INTER_AREA)
191 |     im = NNormalize(im, mean=np.array([0.5, 0.5, 0.5]), std=np.array([0.5, 0.5, 0.5]))
192 |     im = NUnsqueeze(NTo_Tensor(im))
193 | 
194 |     return im, width, length
195 | 
196 | 
197 | def get_modnet_matting(input_image, checkpoint_path, ref_size=512):
198 |     global HIVISION_MODNET_SESS
199 | 
200 |     if not os.path.exists(checkpoint_path):
201 |         print(f"Checkpoint file not found: {checkpoint_path}")
202 |         return None
203 | 
204 |     # 如果RUN_MODE不是野兽模式，则不加载模型
205 |     if HIVISION_MODNET_SESS is None:
206 |         HIVISION_MODNET_SESS = load_onnx_model(checkpoint_path, set_cpu=True)
207 | 
208 |     input_name = HIVISION_MODNET_SESS.get_inputs()[0].name
209 |     output_name = HIVISION_MODNET_SESS.get_outputs()[0].name
210 | 
211 |     im, width, length = read_modnet_image(input_image=input_image, ref_size=ref_size)
212 | 
213 |     matte = HIVISION_MODNET_SESS.run([output_name], {input_name: im})
214 |     matte = (matte[0] * 255).astype("uint8")
215 |     matte = np.squeeze(matte)
216 |     mask = cv2.resize(matte, (width, length), interpolation=cv2.INTER_AREA)
217 |     b, g, r = cv2.split(np.uint8(input_image))
218 | 
219 |     output_image = cv2.merge((b, g, r, mask))
220 |     
221 |     # 如果RUN_MODE不是野兽模式，则释放模型
222 |     if os.getenv("RUN_MODE") != "beast":
223 |         HIVISION_MODNET_SESS = None
224 | 
225 |     return output_image
226 | 
227 | 
228 | def get_modnet_matting_photographic_portrait_matting(
229 |     input_image, checkpoint_path, ref_size=512
230 | ):
231 |     global MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS
232 | 
233 |     if not os.path.exists(checkpoint_path):
234 |         print(f"Checkpoint file not found: {checkpoint_path}")
235 |         return None
236 | 
237 |     # 如果RUN_MODE不是野兽模式，则不加载模型
238 |     if MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS is None:
239 |         MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS = load_onnx_model(
240 |             checkpoint_path, set_cpu=True
241 |         )
242 | 
243 |     input_name = MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS.get_inputs()[0].name
244 |     output_name = MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS.get_outputs()[0].name
245 | 
246 |     im, width, length = read_modnet_image(input_image=input_image, ref_size=ref_size)
247 | 
248 |     matte = MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS.run(
249 |         [output_name], {input_name: im}
250 |     )
251 |     matte = (matte[0] * 255).astype("uint8")
252 |     matte = np.squeeze(matte)
253 |     mask = cv2.resize(matte, (width, length), interpolation=cv2.INTER_AREA)
254 |     b, g, r = cv2.split(np.uint8(input_image))
255 | 
256 |     output_image = cv2.merge((b, g, r, mask))
257 |     
258 |     # 如果RUN_MODE不是野兽模式，则释放模型
259 |     if os.getenv("RUN_MODE") != "beast":
260 |         MODNET_PHOTOGRAPHIC_PORTRAIT_MATTING_SESS = None
261 | 
262 |     return output_image
263 | 
264 | 
265 | def get_rmbg_matting(input_image: np.ndarray, checkpoint_path, ref_size=1024):
266 |     global RMBG_SESS
267 | 
268 |     if not os.path.exists(checkpoint_path):
269 |         print(f"Checkpoint file not found: {checkpoint_path}")
270 |         return None
271 | 
272 |     def resize_rmbg_image(image):
273 |         image = image.convert("RGB")
274 |         model_input_size = (ref_size, ref_size)
275 |         image = image.resize(model_input_size, Image.BILINEAR)
276 |         return image
277 | 
278 |     if RMBG_SESS is None:
279 |         RMBG_SESS = load_onnx_model(checkpoint_path, set_cpu=True)
280 | 
281 |     orig_image = Image.fromarray(input_image)
282 |     image = resize_rmbg_image(orig_image)
283 |     im_np = np.array(image).astype(np.float32)
284 |     im_np = im_np.transpose(2, 0, 1)  # Change to CxHxW format
285 |     im_np = np.expand_dims(im_np, axis=0)  # Add batch dimension
286 |     im_np = im_np / 255.0  # Normalize to [0, 1]
287 |     im_np = (im_np - 0.5) / 0.5  # Normalize to [-1, 1]
288 | 
289 |     # Inference
290 |     result = RMBG_SESS.run(None, {RMBG_SESS.get_inputs()[0].name: im_np})[0]
291 | 
292 |     # Post process
293 |     result = np.squeeze(result)
294 |     ma = np.max(result)
295 |     mi = np.min(result)
296 |     result = (result - mi) / (ma - mi)  # Normalize to [0, 1]
297 | 
298 |     # Convert to PIL image
299 |     im_array = (result * 255).astype(np.uint8)
300 |     pil_im = Image.fromarray(
301 |         im_array, mode="L"
302 |     )  # Ensure mask is single channel (L mode)
303 | 
304 |     # Resize the mask to match the original image size
305 |     pil_im = pil_im.resize(orig_image.size, Image.BILINEAR)
306 | 
307 |     # Paste the mask on the original image
308 |     new_im = Image.new("RGBA", orig_image.size, (0, 0, 0, 0))
309 |     new_im.paste(orig_image, mask=pil_im)
310 |     
311 |     # 如果RUN_MODE不是野兽模式，则释放模型
312 |     if os.getenv("RUN_MODE") != "beast":
313 |         RMBG_SESS = None
314 | 
315 |     return np.array(new_im)
316 | 
317 | 
318 | def get_mnn_modnet_matting(input_image, checkpoint_path, ref_size=512):
319 |     if not os.path.exists(checkpoint_path):
320 |         print(f"Checkpoint file not found: {checkpoint_path}")
321 |         return None
322 | 
323 |     try:
324 |         import MNN.expr as expr
325 |         import MNN.nn as nn
326 |     except ImportError as e:
327 |         raise ImportError(
328 |             "The MNN module is not installed or there was an import error. Please ensure that the MNN library is installed by using the command 'pip install mnn'."
329 |         ) from e
330 | 
331 |     config = {}
332 |     config["precision"] = "low"  # 当硬件支持（armv8.2）时使用fp16推理
333 |     config["backend"] = 0  # CPU
334 |     config["numThread"] = 4  # 线程数
335 |     im, width, length = read_modnet_image(input_image, ref_size=512)
336 |     rt = nn.create_runtime_manager((config,))
337 |     net = nn.load_module_from_file(
338 |         checkpoint_path, ["input1"], ["output1"], runtime_manager=rt
339 |     )
340 |     input_var = expr.convert(im, expr.NCHW)
341 |     output_var = net.forward(input_var)
342 |     matte = expr.convert(output_var, expr.NCHW)
343 |     matte = matte.read()  # var转换为np
344 |     matte = (matte * 255).astype("uint8")
345 |     matte = np.squeeze(matte)
346 |     mask = cv2.resize(matte, (width, length), interpolation=cv2.INTER_AREA)
347 |     b, g, r = cv2.split(np.uint8(input_image))
348 | 
349 |     output_image = cv2.merge((b, g, r, mask))
350 | 
351 |     return output_image
352 | 
353 | 
354 | def get_birefnet_portrait_matting(input_image, checkpoint_path, ref_size=512):
355 |     global BIREFNET_V1_LITE_SESS
356 | 
357 |     if not os.path.exists(checkpoint_path):
358 |         print(f"Checkpoint file not found: {checkpoint_path}")
359 |         return None
360 | 
361 |     def transform_image(image):
362 |         image = image.resize((1024, 1024))  # Resize to 1024x1024
363 |         image = (
364 |             np.array(image, dtype=np.float32) / 255.0
365 |         )  # Convert to numpy array and normalize to [0, 1]
366 |         image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]  # Normalize
367 |         image = np.transpose(image, (2, 0, 1))  # Change from (H, W, C) to (C, H, W)
368 |         image = np.expand_dims(image, axis=0)  # Add batch dimension
369 |         return image.astype(np.float32)  # Ensure the output is float32
370 | 
371 |     orig_image = Image.fromarray(input_image)
372 |     input_images = transform_image(
373 |         orig_image
374 |     )  # This will already have the correct shape
375 | 
376 |     # 记录加载onnx模型的开始时间
377 |     load_start_time = time()
378 | 
379 |     # 如果RUN_MODE不是野兽模式，则不加载模型
380 |     if BIREFNET_V1_LITE_SESS is None:
381 |         # print("首次加载birefnet-v1-lite模型...")
382 |         if ONNX_DEVICE == "GPU":
383 |             print("onnxruntime-gpu已安装，尝试使用CUDA加载模型")
384 |             try:
385 |                 import torch
386 |             except ImportError:
387 |                 print(
388 |                     "torch未安装，尝试直接使用onnxruntime-gpu加载模型，这需要配置好CUDA和cuDNN"
389 |                 )
390 |             BIREFNET_V1_LITE_SESS = load_onnx_model(checkpoint_path)
391 |         else:
392 |             BIREFNET_V1_LITE_SESS = load_onnx_model(checkpoint_path, set_cpu=True)
393 | 
394 |     # 记录加载onnx模型的结束时间
395 |     load_end_time = time()
396 | 
397 |     # 打印加载onnx模型所花的时间
398 |     print(f"Loading ONNX model took {load_end_time - load_start_time:.4f} seconds")
399 | 
400 |     input_name = BIREFNET_V1_LITE_SESS.get_inputs()[0].name
401 |     print(onnxruntime.get_device(), BIREFNET_V1_LITE_SESS.get_providers())
402 | 
403 |     time_st = time()
404 |     pred_onnx = BIREFNET_V1_LITE_SESS.run(None, {input_name: input_images})[
405 |         -1
406 |     ]  # Use float32 input
407 |     pred_onnx = np.squeeze(pred_onnx)  # Use numpy to squeeze
408 |     result = 1 / (1 + np.exp(-pred_onnx))  # Sigmoid function using numpy
409 |     print(f"Inference time: {time() - time_st:.4f} seconds")
410 | 
411 |     # Convert to PIL image
412 |     im_array = (result * 255).astype(np.uint8)
413 |     pil_im = Image.fromarray(
414 |         im_array, mode="L"
415 |     )  # Ensure mask is single channel (L mode)
416 | 
417 |     # Resize the mask to match the original image size
418 |     pil_im = pil_im.resize(orig_image.size, Image.BILINEAR)
419 | 
420 |     # Paste the mask on the original image
421 |     new_im = Image.new("RGBA", orig_image.size, (0, 0, 0, 0))
422 |     new_im.paste(orig_image, mask=pil_im)
423 |     
424 |     # 如果RUN_MODE不是野兽模式，则释放模型
425 |     if os.getenv("RUN_MODE") != "beast":
426 |         BIREFNET_V1_LITE_SESS = None
427 | 
428 |     return np.array(new_im)
429 | 


--------------------------------------------------------------------------------
/hivision/creator/layout_calculator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 21:35
  5 | @File: layout_calculator.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     布局计算器
  9 | """
 10 | 
 11 | import cv2.detail
 12 | import numpy as np
 13 | 
 14 | 
 15 | def judge_layout(
 16 |     input_width,
 17 |     input_height,
 18 |     PHOTO_INTERVAL_W,
 19 |     PHOTO_INTERVAL_H,
 20 |     LIMIT_BLOCK_W,
 21 |     LIMIT_BLOCK_H,
 22 | ):
 23 |     centerBlockHeight_1, centerBlockWidth_1 = (
 24 |         input_height,
 25 |         input_width,
 26 |     )  # 由证件照们组成的一个中心区块（1 代表不转置排列）
 27 |     centerBlockHeight_2, centerBlockWidth_2 = (
 28 |         input_width,
 29 |         input_height,
 30 |     )  # 由证件照们组成的一个中心区块（2 代表转置排列）
 31 | 
 32 |     # 1.不转置排列的情况下：
 33 |     layout_col_no_transpose = 0  # 行
 34 |     layout_row_no_transpose = 0  # 列
 35 |     for i in range(1, 4):
 36 |         centerBlockHeight_temp = input_height * i + PHOTO_INTERVAL_H * (i - 1)
 37 |         if centerBlockHeight_temp < LIMIT_BLOCK_H:
 38 |             centerBlockHeight_1 = centerBlockHeight_temp
 39 |             layout_row_no_transpose = i
 40 |         else:
 41 |             break
 42 |     for j in range(1, 9):
 43 |         centerBlockWidth_temp = input_width * j + PHOTO_INTERVAL_W * (j - 1)
 44 |         if centerBlockWidth_temp < LIMIT_BLOCK_W:
 45 |             centerBlockWidth_1 = centerBlockWidth_temp
 46 |             layout_col_no_transpose = j
 47 |         else:
 48 |             break
 49 |     layout_number_no_transpose = layout_row_no_transpose * layout_col_no_transpose
 50 | 
 51 |     # 2.转置排列的情况下：
 52 |     layout_col_transpose = 0  # 行
 53 |     layout_row_transpose = 0  # 列
 54 |     for i in range(1, 4):
 55 |         centerBlockHeight_temp = input_width * i + PHOTO_INTERVAL_H * (i - 1)
 56 |         if centerBlockHeight_temp < LIMIT_BLOCK_H:
 57 |             centerBlockHeight_2 = centerBlockHeight_temp
 58 |             layout_row_transpose = i
 59 |         else:
 60 |             break
 61 |     for j in range(1, 9):
 62 |         centerBlockWidth_temp = input_height * j + PHOTO_INTERVAL_W * (j - 1)
 63 |         if centerBlockWidth_temp < LIMIT_BLOCK_W:
 64 |             centerBlockWidth_2 = centerBlockWidth_temp
 65 |             layout_col_transpose = j
 66 |         else:
 67 |             break
 68 |     layout_number_transpose = layout_row_transpose * layout_col_transpose
 69 | 
 70 |     if layout_number_transpose > layout_number_no_transpose:
 71 |         layout_mode = (layout_col_transpose, layout_row_transpose, 2)
 72 |         return layout_mode, centerBlockWidth_2, centerBlockHeight_2
 73 |     else:
 74 |         layout_mode = (layout_col_no_transpose, layout_row_no_transpose, 1)
 75 |         return layout_mode, centerBlockWidth_1, centerBlockHeight_1
 76 | 
 77 | 
 78 | def generate_layout_array(input_height, input_width, LAYOUT_WIDTH=1795, LAYOUT_HEIGHT=1205):
 79 |     # 1.基础参数表
 80 |     PHOTO_INTERVAL_H = 30  # 证件照与证件照之间的垂直距离
 81 |     PHOTO_INTERVAL_W = 30  # 证件照与证件照之间的水平距离
 82 |     SIDES_INTERVAL_H = 50  # 证件照与画布边缘的垂直距离
 83 |     SIDES_INTERVAL_W = 70  # 证件照与画布边缘的水平距离
 84 |     LIMIT_BLOCK_W = LAYOUT_WIDTH - 2 * SIDES_INTERVAL_W
 85 |     LIMIT_BLOCK_H = LAYOUT_HEIGHT - 2 * SIDES_INTERVAL_H
 86 | 
 87 |     # 2.创建一个 1180x1746 的空白画布
 88 |     white_background = np.zeros([LAYOUT_HEIGHT, LAYOUT_WIDTH, 3], np.uint8)
 89 |     white_background.fill(255)
 90 | 
 91 |     # 3.计算照片的 layout（列、行、横竖朝向）,证件照组成的中心区块的分辨率
 92 |     layout_mode, centerBlockWidth, centerBlockHeight = judge_layout(
 93 |         input_width,
 94 |         input_height,
 95 |         PHOTO_INTERVAL_W,
 96 |         PHOTO_INTERVAL_H,
 97 |         LIMIT_BLOCK_W,
 98 |         LIMIT_BLOCK_H,
 99 |     )
100 |     # 4.开始排列组合
101 |     x11 = (LAYOUT_WIDTH - centerBlockWidth) // 2
102 |     y11 = (LAYOUT_HEIGHT - centerBlockHeight) // 2
103 |     typography_arr = []
104 |     typography_rotate = False
105 |     if layout_mode[2] == 2:
106 |         input_height, input_width = input_width, input_height
107 |         typography_rotate = True
108 | 
109 |     for j in range(layout_mode[1]):
110 |         for i in range(layout_mode[0]):
111 |             xi = x11 + i * input_width + i * PHOTO_INTERVAL_W
112 |             yi = y11 + j * input_height + j * PHOTO_INTERVAL_H
113 |             typography_arr.append([xi, yi])
114 | 
115 |     return typography_arr, typography_rotate
116 | 
117 | 
118 | def generate_layout_image(
119 |     input_image, typography_arr, typography_rotate, width=295, height=413, 
120 |     crop_line:bool=False,
121 |     LAYOUT_WIDTH=1795, 
122 |     LAYOUT_HEIGHT=1205,
123 | ):
124 |   
125 |     # 创建一个白色背景的空白画布
126 |     white_background = np.zeros([LAYOUT_HEIGHT, LAYOUT_WIDTH, 3], np.uint8)
127 |     white_background.fill(255)
128 |     
129 |     # 如果输入图像的高度不等于指定高度，则调整图像大小
130 |     if input_image.shape[0] != height:
131 |         input_image = cv2.resize(input_image, (width, height))
132 |     
133 |     # 如果需要旋转排版，则对图像进行转置和垂直镜像
134 |     if typography_rotate:
135 |         input_image = cv2.transpose(input_image)
136 |         input_image = cv2.flip(input_image, 0)  # 0 表示垂直镜像
137 | 
138 |         # 交换高度和宽度
139 |         height, width = width, height
140 |     
141 |     # 将图像按照排版数组中的位置放置到白色背景上
142 |     for arr in typography_arr:
143 |         locate_x, locate_y = arr[0], arr[1]
144 |         white_background[locate_y : locate_y + height, locate_x : locate_x + width] = (
145 |             input_image
146 |         )
147 | 
148 |     if crop_line:
149 |         # 添加裁剪线
150 |         line_color = (200, 200, 200)  # 浅灰色
151 |         line_thickness = 1
152 | 
153 |         # 初始化裁剪线位置列表
154 |         vertical_lines = []
155 |         horizontal_lines = []
156 | 
157 |         # 根据排版数组添加裁剪线
158 |         for arr in typography_arr:
159 |             x, y = arr[0], arr[1]
160 |             if x not in vertical_lines:
161 |                 vertical_lines.append(x)
162 |             if x + width not in vertical_lines:
163 |                 vertical_lines.append(x + width)
164 |             if y not in horizontal_lines:
165 |                 horizontal_lines.append(y)
166 |             if y + height not in horizontal_lines:
167 |                 horizontal_lines.append(y + height)
168 | 
169 |         # 绘制垂直裁剪线
170 |         for x in vertical_lines:
171 |             cv2.line(white_background, (x, 0), (x, LAYOUT_HEIGHT), line_color, line_thickness)
172 | 
173 |         # 绘制水平裁剪线
174 |         for y in horizontal_lines:
175 |             cv2.line(white_background, (0, y), (LAYOUT_WIDTH, y), line_color, line_thickness)
176 | 
177 |     # 返回排版后的图像
178 |     return white_background
179 | 


--------------------------------------------------------------------------------
/hivision/creator/move_image.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 有一些 png 图像下部也会有一些透明的区域，使得图像无法对其底部边框
  3 | 本程序实现移动图像，使其下部与 png 图像实际大小相对齐
  4 | """
  5 | import os
  6 | import cv2
  7 | import numpy as np
  8 | from hivisionai.hycv.utils import get_box_pro
  9 | 
 10 | path_pre = os.path.join(os.getcwd(), 'pre')
 11 | path_final = os.path.join(os.getcwd(), 'final')
 12 | 
 13 | 
 14 | def merge(boxes):
 15 |     """
 16 |     生成的边框可能不止只有一个，需要将边框合并
 17 |     """
 18 |     x, y, h, w = boxes[0]
 19 |     # x 和 y 应该是整个 boxes 里面最小的值
 20 |     if len(boxes) > 1:
 21 |         for tmp in boxes:
 22 |             x_tmp, y_tmp, h_tmp, w_tmp = tmp
 23 |             if x > x_tmp:
 24 |                 x_max = x_tmp + w_tmp if x_tmp + w_tmp > x + w else x + w
 25 |                 x = x_tmp
 26 |                 w = x_max - x
 27 |             if y > y_tmp:
 28 |                 y_max = y_tmp + h_tmp if y_tmp + h_tmp > y + h else y + h
 29 |                 y = y_tmp
 30 |                 h = y_max - y
 31 |     return tuple((x, y, h, w))
 32 | 
 33 | 
 34 | def get_box(png_img):
 35 |     """
 36 |     获取矩形边框最终返回一个元组 (x,y,h,w)，分别对应矩形左上角的坐标和矩形的高和宽
 37 |     """
 38 |     r,  g,  b , a = cv2.split(png_img)
 39 |     gray_img = a
 40 |     th, binary = cv2.threshold(gray_img, 127 , 255, cv2.THRESH_BINARY)  # 二值化
 41 |     # cv2.imshow("name", binary)
 42 |     # cv2.waitKey(0)
 43 |     contours, hierarchy = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # 得到轮廓列表 contours
 44 |     bounding_boxes = merge([cv2.boundingRect(cnt) for cnt in contours])  # 轮廓合并
 45 |     # print(bounding_boxes)
 46 |     return bounding_boxes
 47 | 
 48 | 
 49 | def get_box_2(png_img):
 50 |     """
 51 |     不用 opencv 内置算法生成矩形了，改用自己的算法（for 循环）
 52 |     """
 53 |     _, _, _, a = cv2.split(png_img)
 54 |     _, a = cv2.threshold(a, 127, 255, cv2.THRESH_BINARY)
 55 |     # 将 r，g，b 通道丢弃，只留下透明度通道
 56 |     # cv2.imshow("name", a)
 57 |     # cv2.waitKey(0)
 58 |     # 在透明度矩阵中，0 代表完全透明
 59 |     height,width=a.shape  # 高和宽
 60 |     f=0
 61 |     tmp1 = 0
 62 | 
 63 |     """
 64 |     获取上下
 65 |     """
 66 |     for tmp1 in range(0,height):
 67 |         tmp_a_high= a[tmp1:tmp1+1,:][0]
 68 |         for tmp2 in range(width):
 69 |             # a = tmp_a_low[tmp2]
 70 |             if tmp_a_high[tmp2]!=0:
 71 |                 f=1
 72 |         if f == 1:
 73 |             break
 74 |     delta_y_high = tmp1 + 1
 75 |     f = 0
 76 |     for tmp1 in range(height,-1, -1):
 77 |         tmp_a_low= a[tmp1-1:tmp1+1,:][0]
 78 |         for tmp2 in range(width):
 79 |             # a = tmp_a_low[tmp2]
 80 |             if tmp_a_low[tmp2]!=0:
 81 |                 f=1
 82 |         if f == 1:
 83 |             break
 84 |     delta_y_bottom = height - tmp1 + 3
 85 |     """
 86 |     获取左右
 87 |     """
 88 |     f = 0
 89 |     for tmp1 in range(width):
 90 |         tmp_a_left = a[:, tmp1:tmp1+1]
 91 |         for tmp2 in range(height):
 92 |             if tmp_a_left[tmp2] != 0:
 93 |                 f = 1
 94 |         if f==1:
 95 |             break
 96 |     delta_x_left = tmp1 + 1
 97 |     f = 0
 98 |     for tmp1 in range(width, -1, -1):
 99 |         tmp_a_left = a[:, tmp1-1:tmp1]
100 |         for tmp2 in range(height):
101 |             if tmp_a_left[tmp2] != 0:
102 |                 f = 1
103 |         if f==1:
104 |             break
105 |     delta_x_right = width - tmp1 + 1
106 |     return  delta_y_high, delta_y_bottom, delta_x_left, delta_x_right
107 | 
108 | 
109 | def move(input_image):
110 |     """
111 |     裁剪主函数，输入一张 png 图像，该图像周围是透明的
112 |     """
113 |     png_img = input_image  # 获取图像
114 | 
115 |     height, width, channels = png_img.shape  # 高 y、宽 x
116 |     y_low,y_high, _, _ = get_box_pro(png_img, model=2)  # for 循环
117 |     base = np.zeros((y_high, width, channels),dtype=np.uint8)  # for 循环
118 |     png_img = png_img[0:height - y_high, :, :]  # for 循环
119 |     png_img = np.concatenate((base, png_img), axis=0)
120 |     return png_img, y_high
121 | 
122 | 
123 | def main():
124 |     if not os.path.exists(path_pre):
125 |         os.makedirs(path_pre)
126 |     if not os.path.exists(path_final):
127 |         os.makedirs(path_final)
128 |     for name in os.listdir(path_pre):
129 |         pass
130 |         # move(name)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     main()
135 | 


--------------------------------------------------------------------------------
/hivision/creator/photo_adjuster.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 20:02
  5 | @File: photo_adjuster.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     证件照调整
  9 | """
 10 | from .context import Context
 11 | from .layout_calculator import generate_layout_array
 12 | import hivision.creator.utils as U
 13 | import numpy as np
 14 | import math
 15 | import cv2
 16 | 
 17 | 
 18 | def adjust_photo(ctx: Context):
 19 |     # Step1. 准备人脸参数
 20 |     face_rect = ctx.face["rectangle"]
 21 |     standard_size = ctx.params.size
 22 |     params = ctx.params
 23 |     x, y = face_rect[0], face_rect[1]
 24 |     w, h = face_rect[2], face_rect[3]
 25 |     height, width = ctx.matting_image.shape[:2]
 26 |     width_height_ratio = standard_size[0] / standard_size[1]
 27 |     # Step2. 计算高级参数
 28 |     face_center = (x + w / 2, y + h / 2)  # 面部中心坐标
 29 |     face_measure = w * h  # 面部面积
 30 |     crop_measure = (
 31 |         face_measure / params.head_measure_ratio
 32 |     )  # 裁剪框面积：为面部面积的 5 倍
 33 |     resize_ratio = crop_measure / (standard_size[0] * standard_size[1])  # 裁剪框缩放率
 34 |     resize_ratio_single = math.sqrt(
 35 |         resize_ratio
 36 |     )  # 长和宽的缩放率（resize_ratio 的开方）
 37 |     crop_size = (
 38 |         int(standard_size[0] * resize_ratio_single),
 39 |         int(standard_size[1] * resize_ratio_single),
 40 |     )  # 裁剪框大小
 41 | 
 42 |     # 裁剪框的定位信息
 43 |     x1 = int(face_center[0] - crop_size[1] / 2)
 44 |     y1 = int(face_center[1] - crop_size[0] * params.head_height_ratio)
 45 |     y2 = y1 + crop_size[0]
 46 |     x2 = x1 + crop_size[1]
 47 | 
 48 |     # Step3, 裁剪框的调整
 49 |     cut_image = IDphotos_cut(x1, y1, x2, y2, ctx.matting_image)
 50 |     cut_image = cv2.resize(cut_image, (crop_size[1], crop_size[0]))
 51 |     y_top, y_bottom, x_left, x_right = U.get_box(
 52 |         cut_image.astype(np.uint8), model=2, correction_factor=0
 53 |     )  # 得到 cut_image 中人像的上下左右距离信息
 54 | 
 55 |     # Step5. 判定 cut_image 中的人像是否处于合理的位置，若不合理，则处理数据以便之后调整位置
 56 |     # 检测人像与裁剪框左边或右边是否存在空隙
 57 |     if x_left > 0 or x_right > 0:
 58 |         status_left_right = 1
 59 |         cut_value_top = int(
 60 |             ((x_left + x_right) * width_height_ratio) / 2
 61 |         )  # 减去左右，为了保持比例，上下也要相应减少 cut_value_top
 62 |     else:
 63 |         status_left_right = 0
 64 |         cut_value_top = 0
 65 | 
 66 |     """
 67 |         检测人头顶与照片的顶部是否在合适的距离内：
 68 |         - status==0: 距离合适，无需移动
 69 |         - status=1: 距离过大，人像应向上移动
 70 |         - status=2: 距离过小，人像应向下移动
 71 |     """
 72 |     status_top, move_value = U.detect_distance(
 73 |         y_top - cut_value_top,
 74 |         crop_size[0],
 75 |         max=params.head_top_range[0],
 76 |         min=params.head_top_range[1],
 77 |     )
 78 | 
 79 |     # Step6. 对照片的第二轮裁剪
 80 |     if status_left_right == 0 and status_top == 0:
 81 |         result_image = cut_image
 82 |     else:
 83 |         result_image = IDphotos_cut(
 84 |             x1 + x_left,
 85 |             y1 + cut_value_top + status_top * move_value,
 86 |             x2 - x_right,
 87 |             y2 - cut_value_top + status_top * move_value,
 88 |             ctx.matting_image,
 89 |         )
 90 | 
 91 |     # 换装参数准备
 92 |     relative_x = x - (x1 + x_left)
 93 |     relative_y = y - (y1 + cut_value_top + status_top * move_value)
 94 | 
 95 |     # Step7. 当照片底部存在空隙时，下拉至底部
 96 |     result_image, y_high = move(result_image.astype(np.uint8))
 97 |     relative_y = relative_y + y_high  # 更新换装参数
 98 | 
 99 |     # Step8. 标准照与高清照转换
100 |     result_image_standard = standard_photo_resize(result_image, standard_size)
101 |     result_image_hd, resize_ratio_max = resize_image_by_min(
102 |         result_image, esp=max(600, standard_size[1])
103 |     )
104 | 
105 |     # Step9. 参数准备 - 为换装服务
106 |     clothing_params = {
107 |         "relative_x": relative_x * resize_ratio_max,
108 |         "relative_y": relative_y * resize_ratio_max,
109 |         "w": w * resize_ratio_max,
110 |         "h": h * resize_ratio_max,
111 |     }
112 | 
113 |     # Step7. 排版照参数获取
114 |     typography_arr, typography_rotate = generate_layout_array(
115 |         input_height=standard_size[0], input_width=standard_size[1]
116 |     )
117 | 
118 |     return (
119 |         result_image_hd,
120 |         result_image_standard,
121 |         clothing_params,
122 |         {
123 |             "arr": typography_arr,
124 |             "rotate": typography_rotate,
125 |         },
126 |     )
127 | 
128 | 
129 | def IDphotos_cut(x1, y1, x2, y2, img):
130 |     """
131 |     在图片上进行滑动裁剪，输入输出为
132 |     输入：一张图片 img，和裁剪框信息 (x1,x2,y1,y2)
133 |     输出：裁剪好的图片，然后裁剪框超出了图像范围，那么将用 0 矩阵补位
134 |     ------------------------------------
135 |     x:裁剪框左上的横坐标
136 |     y:裁剪框左上的纵坐标
137 |     x2:裁剪框右下的横坐标
138 |     y2:裁剪框右下的纵坐标
139 |     crop_size:裁剪框大小
140 |     img:裁剪图像（numpy.array）
141 |     output_path:裁剪图片的输出路径
142 |     ------------------------------------
143 |     """
144 | 
145 |     crop_size = (y2 - y1, x2 - x1)
146 |     """
147 |     ------------------------------------
148 |     temp_x_1:裁剪框左边超出图像部分
149 |     temp_y_1:裁剪框上边超出图像部分
150 |     temp_x_2:裁剪框右边超出图像部分
151 |     temp_y_2:裁剪框下边超出图像部分
152 |     ------------------------------------
153 |     """
154 |     temp_x_1 = 0
155 |     temp_y_1 = 0
156 |     temp_x_2 = 0
157 |     temp_y_2 = 0
158 | 
159 |     if y1 < 0:
160 |         temp_y_1 = abs(y1)
161 |         y1 = 0
162 |     if y2 > img.shape[0]:
163 |         temp_y_2 = y2
164 |         y2 = img.shape[0]
165 |         temp_y_2 = temp_y_2 - y2
166 | 
167 |     if x1 < 0:
168 |         temp_x_1 = abs(x1)
169 |         x1 = 0
170 |     if x2 > img.shape[1]:
171 |         temp_x_2 = x2
172 |         x2 = img.shape[1]
173 |         temp_x_2 = temp_x_2 - x2
174 | 
175 |     # 生成一张全透明背景
176 |     background_bgr = np.full((crop_size[0], crop_size[1]), 255, dtype=np.uint8)
177 |     background_a = np.full((crop_size[0], crop_size[1]), 0, dtype=np.uint8)
178 |     background = cv2.merge(
179 |         (background_bgr, background_bgr, background_bgr, background_a)
180 |     )
181 | 
182 |     background[
183 |         temp_y_1 : crop_size[0] - temp_y_2, temp_x_1 : crop_size[1] - temp_x_2
184 |     ] = img[y1:y2, x1:x2]
185 | 
186 |     return background
187 | 
188 | 
189 | def move(input_image):
190 |     """
191 |     裁剪主函数，输入一张 png 图像，该图像周围是透明的
192 |     """
193 |     png_img = input_image  # 获取图像
194 | 
195 |     height, width, channels = png_img.shape  # 高 y、宽 x
196 |     y_low, y_high, _, _ = U.get_box(png_img, model=2)  # for 循环
197 |     base = np.zeros((y_high, width, channels), dtype=np.uint8)  # for 循环
198 |     png_img = png_img[0 : height - y_high, :, :]  # for 循环
199 |     png_img = np.concatenate((base, png_img), axis=0)
200 |     return png_img, y_high
201 | 
202 | 
203 | def standard_photo_resize(input_image: np.array, size):
204 |     """
205 |     input_image: 输入图像，即高清照
206 |     size: 标准照的尺寸
207 |     """
208 |     resize_ratio = input_image.shape[0] / size[0]
209 |     resize_item = int(round(input_image.shape[0] / size[0]))
210 |     if resize_ratio >= 2:
211 |         for i in range(resize_item - 1):
212 |             if i == 0:
213 |                 result_image = cv2.resize(
214 |                     input_image,
215 |                     (size[1] * (resize_item - i - 1), size[0] * (resize_item - i - 1)),
216 |                     interpolation=cv2.INTER_AREA,
217 |                 )
218 |             else:
219 |                 result_image = cv2.resize(
220 |                     result_image,
221 |                     (size[1] * (resize_item - i - 1), size[0] * (resize_item - i - 1)),
222 |                     interpolation=cv2.INTER_AREA,
223 |                 )
224 |     else:
225 |         result_image = cv2.resize(
226 |             input_image, (size[1], size[0]), interpolation=cv2.INTER_AREA
227 |         )
228 | 
229 |     return result_image
230 | 
231 | 
232 | def resize_image_by_min(input_image, esp=600):
233 |     """
234 |     将图像缩放为最短边至少为 esp 的图像。
235 |     :param input_image: 输入图像（OpenCV 矩阵）
236 |     :param esp: 缩放后的最短边长
237 |     :return: 缩放后的图像，缩放倍率
238 |     """
239 |     height, width = input_image.shape[0], input_image.shape[1]
240 |     min_border = min(height, width)
241 |     if min_border < esp:
242 |         if height >= width:
243 |             new_width = esp
244 |             new_height = height * esp // width
245 |         else:
246 |             new_height = esp
247 |             new_width = width * esp // height
248 | 
249 |         return (
250 |             cv2.resize(
251 |                 input_image, (new_width, new_height), interpolation=cv2.INTER_AREA
252 |             ),
253 |             new_height / height,
254 |         )
255 | 
256 |     else:
257 |         return input_image, 1
258 | 


--------------------------------------------------------------------------------
/hivision/creator/retinaface/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference import retinaface_detect_faces
2 | 


--------------------------------------------------------------------------------
/hivision/creator/retinaface/box_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def decode(loc, priors, variances):
 5 |     """Decode locations from predictions using priors to undo
 6 |     the encoding we did for offset regression at train time.
 7 |     Args:
 8 |         loc (tensor): location predictions for loc layers,
 9 |             Shape: [num_priors,4]
10 |         priors (tensor): Prior boxes in center-offset form.
11 |             Shape: [num_priors,4].
12 |         variances: (list[float]) Variances of priorboxes
13 |     Return:
14 |         decoded bounding box predictions
15 |     """
16 | 
17 |     boxes = None
18 | 
19 |     boxes = np.concatenate(
20 |         (
21 |             priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
22 |             priors[:, 2:] * np.exp(loc[:, 2:] * variances[1]),
23 |         ),
24 |         axis=1,
25 |     )
26 | 
27 |     boxes[:, :2] -= boxes[:, 2:] / 2
28 |     boxes[:, 2:] += boxes[:, :2]
29 |     return boxes
30 | 
31 | 
32 | def decode_landm(pre, priors, variances):
33 |     """Decode landm from predictions using priors to undo
34 |     the encoding we did for offset regression at train time.
35 |     Args:
36 |         pre (tensor): landm predictions for loc layers,
37 |             Shape: [num_priors,10]
38 |         priors (tensor): Prior boxes in center-offset form.
39 |             Shape: [num_priors,4].
40 |         variances: (list[float]) Variances of priorboxes
41 |     Return:
42 |         decoded landm predictions
43 |     """
44 |     landms = None
45 | 
46 |     landms = np.concatenate(
47 |         (
48 |             priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
49 |             priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
50 |             priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
51 |             priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
52 |             priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
53 |         ),
54 |         axis=1,
55 |     )
56 | 
57 |     return landms
58 | 


--------------------------------------------------------------------------------
/hivision/creator/retinaface/inference.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import onnxruntime
  4 | from hivision.creator.retinaface.box_utils import decode, decode_landm
  5 | from hivision.creator.retinaface.prior_box import PriorBox
  6 | 
  7 | 
  8 | def py_cpu_nms(dets, thresh):
  9 |     """Pure Python NMS baseline."""
 10 |     x1 = dets[:, 0]
 11 |     y1 = dets[:, 1]
 12 |     x2 = dets[:, 2]
 13 |     y2 = dets[:, 3]
 14 |     scores = dets[:, 4]
 15 | 
 16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 17 |     order = scores.argsort()[::-1]
 18 | 
 19 |     keep = []
 20 |     while order.size > 0:
 21 |         i = order[0]
 22 |         keep.append(i)
 23 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 24 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 25 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 26 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 27 | 
 28 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 29 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 30 |         inter = w * h
 31 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 32 | 
 33 |         inds = np.where(ovr <= thresh)[0]
 34 |         order = order[inds + 1]
 35 | 
 36 |     return keep
 37 | 
 38 | 
 39 | # 替换掉 argparse 的部分，直接使用普通变量
 40 | network = "resnet50"
 41 | use_cpu = False
 42 | confidence_threshold = 0.8
 43 | top_k = 5000
 44 | nms_threshold = 0.2
 45 | keep_top_k = 750
 46 | save_image = True
 47 | vis_thres = 0.6
 48 | 
 49 | ONNX_DEVICE = (
 50 |     "CUDAExecutionProvider"
 51 |     if onnxruntime.get_device() == "GPU"
 52 |     else "CPUExecutionProvider"
 53 | )
 54 | 
 55 | 
 56 | def load_onnx_model(checkpoint_path, set_cpu=False):
 57 |     providers = (
 58 |         ["CUDAExecutionProvider", "CPUExecutionProvider"]
 59 |         if ONNX_DEVICE == "CUDAExecutionProvider"
 60 |         else ["CPUExecutionProvider"]
 61 |     )
 62 | 
 63 |     if set_cpu:
 64 |         sess = onnxruntime.InferenceSession(
 65 |             checkpoint_path, providers=["CPUExecutionProvider"]
 66 |         )
 67 |     else:
 68 |         try:
 69 |             sess = onnxruntime.InferenceSession(checkpoint_path, providers=providers)
 70 |         except Exception as e:
 71 |             if ONNX_DEVICE == "CUDAExecutionProvider":
 72 |                 print(f"Failed to load model with CUDAExecutionProvider: {e}")
 73 |                 print("Falling back to CPUExecutionProvider")
 74 |                 # 尝试使用CPU加载模型
 75 |                 sess = onnxruntime.InferenceSession(
 76 |                     checkpoint_path, providers=["CPUExecutionProvider"]
 77 |                 )
 78 |             else:
 79 |                 raise e  # 如果是CPU执行失败，重新抛出异常
 80 | 
 81 |     return sess
 82 | 
 83 | 
 84 | def retinaface_detect_faces(image, model_path: str, sess=None):
 85 |     cfg = {
 86 |         "name": "Resnet50",
 87 |         "min_sizes": [[16, 32], [64, 128], [256, 512]],
 88 |         "steps": [8, 16, 32],
 89 |         "variance": [0.1, 0.2],
 90 |         "clip": False,
 91 |         "loc_weight": 2.0,
 92 |         "gpu_train": True,
 93 |         "batch_size": 24,
 94 |         "ngpu": 4,
 95 |         "epoch": 100,
 96 |         "decay1": 70,
 97 |         "decay2": 90,
 98 |         "image_size": 840,
 99 |         "pretrain": True,
100 |         "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3},
101 |         "in_channel": 256,
102 |         "out_channel": 256,
103 |     }
104 | 
105 |     # Load ONNX model
106 |     if sess is None:
107 |         retinaface = load_onnx_model(model_path, set_cpu=False)
108 |     else:
109 |         retinaface = sess
110 | 
111 |     resize = 1
112 | 
113 |     # Read and preprocess the image
114 |     img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
115 |     img = np.float32(img_rgb)
116 | 
117 |     im_height, im_width, _ = img.shape
118 |     scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
119 |     img -= (104, 117, 123)
120 |     img = img.transpose(2, 0, 1)
121 |     img = np.expand_dims(img, axis=0)
122 | 
123 |     # Run the model
124 |     inputs = {"input": img}
125 |     loc, conf, landms = retinaface.run(None, inputs)
126 | 
127 |     priorbox = PriorBox(cfg, image_size=(im_height, im_width))
128 |     priors = priorbox.forward()
129 | 
130 |     prior_data = priors
131 | 
132 |     boxes = decode(np.squeeze(loc, axis=0), prior_data, cfg["variance"])
133 |     boxes = boxes * scale / resize
134 |     scores = np.squeeze(conf, axis=0)[:, 1]
135 | 
136 |     landms = decode_landm(np.squeeze(landms.data, axis=0), prior_data, cfg["variance"])
137 | 
138 |     scale1 = np.array(
139 |         [
140 |             img.shape[3],
141 |             img.shape[2],
142 |             img.shape[3],
143 |             img.shape[2],
144 |             img.shape[3],
145 |             img.shape[2],
146 |             img.shape[3],
147 |             img.shape[2],
148 |             img.shape[3],
149 |             img.shape[2],
150 |         ]
151 |     )
152 |     landms = landms * scale1 / resize
153 | 
154 |     # ignore low scores
155 |     inds = np.where(scores > confidence_threshold)[0]
156 |     boxes = boxes[inds]
157 |     landms = landms[inds]
158 |     scores = scores[inds]
159 | 
160 |     # keep top-K before NMS
161 |     order = scores.argsort()[::-1][:top_k]
162 |     boxes = boxes[order]
163 |     landms = landms[order]
164 |     scores = scores[order]
165 | 
166 |     # do NMS
167 |     dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
168 |     keep = py_cpu_nms(dets, nms_threshold)
169 |     dets = dets[keep, :]
170 |     landms = landms[keep]
171 | 
172 |     # keep top-K faster NMS
173 |     dets = dets[:keep_top_k, :]
174 |     landms = landms[:keep_top_k, :]
175 | 
176 |     dets = np.concatenate((dets, landms), axis=1)
177 | 
178 |     return dets, retinaface
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     import gradio as gr
183 | 
184 |     # Create Gradio interface
185 |     iface = gr.Interface(
186 |         fn=retinaface_detect_faces,
187 |         inputs=[
188 |             gr.Image(
189 |                 type="numpy", label="上传图片", height=400
190 |             ),  # Set the height to 400
191 |             gr.Textbox(value="./FaceDetector.onnx", label="ONNX模型路径"),
192 |         ],
193 |         outputs=gr.Number(label="检测到的人脸数量"),
194 |         title="人脸检测",
195 |         description="上传图片并提供ONNX模型路径以检测人脸数量。",
196 |     )
197 | 
198 |     # Launch the Gradio app
199 |     iface.launch()
200 | 


--------------------------------------------------------------------------------
/hivision/creator/retinaface/prior_box.py:
--------------------------------------------------------------------------------
 1 | from itertools import product as product
 2 | import numpy as np
 3 | from math import ceil
 4 | 
 5 | 
 6 | class PriorBox(object):
 7 |     def __init__(self, cfg, image_size=None):
 8 |         super(PriorBox, self).__init__()
 9 |         self.min_sizes = cfg["min_sizes"]
10 |         self.steps = cfg["steps"]
11 |         self.clip = cfg["clip"]
12 |         self.image_size = image_size
13 |         self.feature_maps = [
14 |             [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
15 |             for step in self.steps
16 |         ]
17 |         self.name = "s"
18 | 
19 |     def forward(self):
20 |         anchors = []
21 |         for k, f in enumerate(self.feature_maps):
22 |             min_sizes = self.min_sizes[k]
23 |             for i, j in product(range(f[0]), range(f[1])):
24 |                 for min_size in min_sizes:
25 |                     s_kx = min_size / self.image_size[1]
26 |                     s_ky = min_size / self.image_size[0]
27 |                     dense_cx = [
28 |                         x * self.steps[k] / self.image_size[1] for x in [j + 0.5]
29 |                     ]
30 |                     dense_cy = [
31 |                         y * self.steps[k] / self.image_size[0] for y in [i + 0.5]
32 |                     ]
33 |                     for cy, cx in product(dense_cy, dense_cx):
34 |                         anchors += [cx, cy, s_kx, s_ky]
35 | 
36 |         output = np.array(anchors).reshape(-1, 4)
37 | 
38 |         if self.clip:
39 |             output = np.clip(output, 0, 1)
40 | 
41 |         return output
42 | 


--------------------------------------------------------------------------------
/hivision/creator/retinaface/weights/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/creator/retinaface/weights/.gitkeep


--------------------------------------------------------------------------------
/hivision/creator/rotation_adjust.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 人脸旋转矫正模块
 3 | 
 4 | 本模块提供了用于旋转图像的函数，主要用于人脸旋转矫正。
 5 | 包含了处理3通道和4通道图像的旋转函数。
 6 | """
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | 
11 | 
12 | def rotate_bound(image: np.ndarray, angle: float, center=None):
13 |     """
14 |     旋转图像而不损失信息的函数
15 | 
16 |     Args:
17 |         image (np.ndarray): 输入图像，3通道numpy数组
18 |         angle (float): 旋转角度（度）
19 |         center (tuple, optional): 旋转中心坐标，默认为图像中心
20 | 
21 |     Returns:
22 |         tuple: 包含以下元素的元组：
23 |             - rotated (np.ndarray): 旋转后的图像
24 |             - cos (float): 旋转角度的余弦值
25 |             - sin (float): 旋转角度的正弦值
26 |             - dW (int): 宽度变化量
27 |             - dH (int): 高度变化量
28 |     """
29 |     (h, w) = image.shape[:2]
30 |     if center is None:
31 |         (cX, cY) = (w / 2, h / 2)
32 |     else:
33 |         (cX, cY) = center
34 | 
35 |     M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
36 |     cos = np.abs(M[0, 0])
37 |     sin = np.abs(M[0, 1])
38 | 
39 |     nW = int((h * sin) + (w * cos))
40 |     nH = int((h * cos) + (w * sin))
41 | 
42 |     M[0, 2] += (nW / 2) - cX
43 |     M[1, 2] += (nH / 2) - cY
44 | 
45 |     rotated = cv2.warpAffine(image, M, (nW, nH))
46 | 
47 |     # 计算偏移量
48 |     dW = nW - w
49 |     dH = nH - h
50 | 
51 |     return rotated, cos, sin, dW, dH
52 | 
53 | 
54 | def rotate_bound_4channels(image: np.ndarray, a: np.ndarray, angle: float, center=None):
55 |     """
56 |     旋转4通道图像的函数
57 | 
58 |     这是rotate_bound函数的4通道版本，可以同时处理RGB图像和其对应的alpha通道。
59 | 
60 |     Args:
61 |         image (np.ndarray): 输入的3通道RGB图像
62 |         a (np.ndarray): 输入图像的alpha通道
63 |         angle (float): 旋转角度（度）
64 |         center (tuple, optional): 旋转中心坐标，默认为图像中心
65 | 
66 |     Returns:
67 |         tuple: 包含以下元素的元组：
68 |             - input_image (np.ndarray): 旋转后的3通道RGB图像
69 |             - result_image (np.ndarray): 旋转后的4通道RGBA图像
70 |             - cos (float): 旋转角度的余弦值
71 |             - sin (float): 旋转角度的正弦值
72 |             - dW (int): 宽度变化量
73 |             - dH (int): 高度变化量
74 |     """
75 |     input_image, cos, sin, dW, dH = rotate_bound(image, angle, center)
76 |     new_a, _, _, _, _ = rotate_bound(a, angle, center)  # 对alpha通道进行旋转
77 |     b, g, r = cv2.split(input_image)
78 |     result_image = cv2.merge((b, g, r, new_a))  # 合并旋转后的RGB通道和alpha通道
79 | 
80 |     return input_image, result_image, cos, sin, dW, dH
81 | 


--------------------------------------------------------------------------------
/hivision/creator/tensor2numpy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 作者：林泽毅
 3 | 建这个开源库的起源呢，是因为在做 onnx 推理的时候，需要将原来的 tensor 转换成 numpy.array
 4 | 问题是 Tensor 和 Numpy 的矩阵排布逻辑不同
 5 | 包括 Tensor 推理经常会进行 Transform，比如 ToTensor,Normalize 等
 6 | 就想做一些等价转换的函数。
 7 | """
 8 | import numpy as np
 9 | 
10 | 
11 | def NTo_Tensor(array):
12 |     """
13 |     :param array: opencv/PIL读取的numpy矩阵
14 |     :return:返回一个形如 Tensor 的 numpy 矩阵
15 |     Example:
16 |     Inputs:array.shape = (512,512,3)
17 |     Outputs:output.shape = (3,512,512)
18 |     """
19 |     output = array.transpose((2, 0, 1))
20 |     return output
21 | 
22 | 
23 | def NNormalize(array, mean=np.array([0.5, 0.5, 0.5]), std=np.array([0.5, 0.5, 0.5]), dtype=np.float32):
24 |     """
25 |     :param array: opencv/PIL读取的numpy矩阵
26 |            mean: 归一化均值，np.array 格式
27 |            std:  归一化标准差，np.array 格式
28 |            dtype：输出的 numpy 数据格式，一般 onnx 需要 float32
29 |     :return:numpy 矩阵
30 |     Example:
31 |     Inputs:array 为 opencv/PIL 读取的一张图片
32 |            mean=np.array([0.5,0.5,0.5])
33 |            std=np.array([0.5,0.5,0.5])
34 |            dtype=np.float32
35 |     Outputs:output 为归一化后的 numpy 矩阵
36 |     """
37 |     im = array / 255.0
38 |     im = np.divide(np.subtract(im, mean), std)
39 |     output = np.asarray(im, dtype=dtype)
40 | 
41 |     return output
42 | 
43 | 
44 | def NUnsqueeze(array, axis=0):
45 |     """
46 |     :param array: opencv/PIL读取的numpy矩阵
47 |            axis：要增加的维度
48 |     :return:numpy 矩阵
49 |     Example:
50 |     Inputs:array 为 opencv/PIL 读取的一张图片，array.shape 为 [512,512,3]
51 |            axis=0
52 |     Outputs:output 为 array 在第 0 维增加一个维度，shape 转为 [1,512,512,3]
53 |     """
54 |     if axis == 0:
55 |         output = array[None, :, :, :]
56 |     elif axis == 1:
57 |         output = array[:, None, :, :]
58 |     elif axis == 2:
59 |         output = array[:, :, None, :]
60 |     else:
61 |         output = array[:, :, :, None]
62 | 
63 |     return output
64 | 


--------------------------------------------------------------------------------
/hivision/creator/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | r"""
  4 | @DATE: 2024/9/5 19:25
  5 | @File: utils.py
  6 | @IDE: pycharm
  7 | @Description:
  8 |     通用图像处理工具
  9 | """
 10 | import cv2
 11 | import numpy as np
 12 | 
 13 | 
 14 | def resize_image_esp(input_image, esp=2000):
 15 |     """
 16 |     输入：
 17 |     input_path：numpy 图片
 18 |     esp：限制的最大边长
 19 |     """
 20 |     # resize 函数=>可以让原图压缩到最大边为 esp 的尺寸 (不改变比例)
 21 |     width = input_image.shape[0]
 22 | 
 23 |     length = input_image.shape[1]
 24 |     max_num = max(width, length)
 25 | 
 26 |     if max_num > esp:
 27 |         print("Image resizing...")
 28 |         if width == max_num:
 29 |             length = int((esp / width) * length)
 30 |             width = esp
 31 | 
 32 |         else:
 33 |             width = int((esp / length) * width)
 34 |             length = esp
 35 |         print(length, width)
 36 |         im_resize = cv2.resize(
 37 |             input_image, (length, width), interpolation=cv2.INTER_AREA
 38 |         )
 39 |         return im_resize
 40 |     else:
 41 |         return input_image
 42 | 
 43 | 
 44 | def get_box(
 45 |     image: np.ndarray,
 46 |     model: int = 1,
 47 |     correction_factor=None,
 48 |     thresh: int = 127,
 49 | ):
 50 |     """
 51 |     本函数能够实现输入一张四通道图像，返回图像中最大连续非透明面积的区域的矩形坐标
 52 |     本函数将采用 opencv 内置函数来解析整个图像的 mask，并提供一些参数，用于读取图像的位置信息
 53 |     Args:
 54 |         image: 四通道矩阵图像
 55 |         model: 返回值模式
 56 |         correction_factor: 提供一些边缘扩张接口，输入格式为 list 或者 int:[up, down, left, right]。
 57 |                     举个例子，假设我们希望剪切出的矩形框左边能够偏左 1 个像素，则输入 [0, 0, 1, 0]；
 58 |                         如果希望右边偏右 1 个像素，则输入 [0, 0, 0, 1]
 59 |                     如果输入为 int，则默认只会对左右两边做拓展，比如输入 2，则和 [0, 0, 2, 2] 是等效的
 60 |         thresh: 二值化阈值，为了保持一些羽化效果，thresh 必须要小
 61 |     Returns:
 62 |         model 为 1 时，将会返回切割出的矩形框的四个坐标点信息
 63 |         model 为 2 时，将会返回矩形框四边相距于原图四边的距离
 64 |     """
 65 |     # ------------ 数据格式规范部分 -------------- #
 66 |     # 输入必须为四通道
 67 |     if correction_factor is None:
 68 |         correction_factor = [0, 0, 0, 0]
 69 |     if not isinstance(image, np.ndarray) or len(cv2.split(image)) != 4:
 70 |         raise TypeError("输入的图像必须为四通道 np.ndarray 类型矩阵！")
 71 |     # correction_factor 规范化
 72 |     if isinstance(correction_factor, int):
 73 |         correction_factor = [0, 0, correction_factor, correction_factor]
 74 |     elif not isinstance(correction_factor, list):
 75 |         raise TypeError("correction_factor 必须为 int 或者 list 类型！")
 76 |     # ------------ 数据格式规范完毕 -------------- #
 77 |     # 分离 mask
 78 |     _, _, _, mask = cv2.split(image)
 79 |     # mask 二值化处理
 80 |     _, mask = cv2.threshold(mask, thresh=thresh, maxval=255, type=0)
 81 |     contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 82 |     temp = np.ones(image.shape, np.uint8) * 255
 83 |     cv2.drawContours(temp, contours, -1, (0, 0, 255), -1)
 84 |     contours_area = []
 85 |     for cnt in contours:
 86 |         contours_area.append(cv2.contourArea(cnt))
 87 |     idx = contours_area.index(max(contours_area))
 88 |     x, y, w, h = cv2.boundingRect(contours[idx])  # 框出图像
 89 |     # ------------ 开始输出数据 -------------- #
 90 |     height, width, _ = image.shape
 91 |     y_up = y - correction_factor[0] if y - correction_factor[0] >= 0 else 0
 92 |     y_down = (
 93 |         y + h + correction_factor[1]
 94 |         if y + h + correction_factor[1] < height
 95 |         else height - 1
 96 |     )
 97 |     x_left = x - correction_factor[2] if x - correction_factor[2] >= 0 else 0
 98 |     x_right = (
 99 |         x + w + correction_factor[3]
100 |         if x + w + correction_factor[3] < width
101 |         else width - 1
102 |     )
103 |     if model == 1:
104 |         # model=1，将会返回切割出的矩形框的四个坐标点信息
105 |         return [y_up, y_down, x_left, x_right]
106 |     elif model == 2:
107 |         # model=2, 将会返回矩形框四边相距于原图四边的距离
108 |         return [y_up, height - y_down, x_left, width - x_right]
109 |     else:
110 |         raise EOFError("请选择正确的模式！")
111 | 
112 | 
113 | def detect_distance(value, crop_height, max=0.06, min=0.04):
114 |     """
115 |     检测人头顶与照片顶部的距离是否在适当范围内。
116 |     输入：与顶部的差值
117 |     输出：(status, move_value)
118 |     status=0 不动
119 |     status=1 人脸应向上移动（裁剪框向下移动）
120 |     status-2 人脸应向下移动（裁剪框向上移动）
121 |     ---------------------------------------
122 |     value：头顶与照片顶部的距离
123 |     crop_height: 裁剪框的高度
124 |     max: 距离的最大值
125 |     min: 距离的最小值
126 |     ---------------------------------------
127 |     """
128 |     value = value / crop_height  # 头顶往上的像素占图像的比例
129 |     if min <= value <= max:
130 |         return 0, 0
131 |     elif value > max:
132 |         # 头顶往上的像素比例高于 max
133 |         move_value = value - max
134 |         move_value = int(move_value * crop_height)
135 |         # print("上移{}".format(move_value))
136 |         return 1, move_value
137 |     else:
138 |         # 头顶往上的像素比例低于 min
139 |         move_value = min - value
140 |         move_value = int(move_value * crop_height)
141 |         # print("下移{}".format(move_value))
142 |         return -1, move_value
143 | 
144 | 
145 | def cutting_rect_pan(
146 |     x1, y1, x2, y2, width, height, L1, L2, L3, clockwise, standard_size
147 | ):
148 |     """
149 |     本函数的功能是对旋转矫正结果图的裁剪框进行修正 ———— 解决"旋转三角形"现象。
150 |     Args:
151 |         - x1: int, 裁剪框左上角的横坐标
152 |         - y1: int, 裁剪框左上角的纵坐标
153 |         - x2: int, 裁剪框右下角的横坐标
154 |         - y2: int, 裁剪框右下角的纵坐标
155 |         - width: int, 待裁剪图的宽度
156 |         - height:int, 待裁剪图的高度
157 |         - L1: CLassObject, 根据旋转点连线所构造函数
158 |         - L2: CLassObject, 根据旋转点连线所构造函数
159 |         - L3: ClassObject, 一个特殊裁切点的坐标
160 |         - clockwise: int, 旋转时针状态
161 |         - standard_size: tuple, 标准照的尺寸
162 | 
163 |     Returns:
164 |         - x1: int, 新的裁剪框左上角的横坐标
165 |         - y1: int, 新的裁剪框左上角的纵坐标
166 |         - x2: int, 新的裁剪框右下角的横坐标
167 |         - y2: int, 新的裁剪框右下角的纵坐标
168 |         - x_bias: int, 裁剪框横坐标方向上的计算偏置量
169 |         - y_bias: int, 裁剪框纵坐标方向上的计算偏置量
170 |     """
171 |     # 用于计算的裁剪框坐标x1_cal,x2_cal,y1_cal,y2_cal(如果裁剪框超出了图像范围，则缩小直至在范围内)
172 |     x1_std = x1 if x1 > 0 else 0
173 |     x2_std = x2 if x2 < width else width
174 |     # y1_std = y1 if y1 > 0 else 0
175 |     y2_std = y2 if y2 < height else height
176 | 
177 |     # 初始化x和y的计算偏置项x_bias和y_bias
178 |     x_bias = 0
179 |     y_bias = 0
180 | 
181 |     # 如果顺时针偏转
182 |     if clockwise == 1:
183 |         if y2 > L1.forward_x(x1_std):
184 |             y_bias = int(-(y2_std - L1.forward_x(x1_std)))
185 |         if y2 > L2.forward_x(x2_std):
186 |             x_bias = int(-(x2_std - L2.forward_y(y2_std)))
187 |         x2 = x2_std + x_bias
188 |         if x1 < L3.x:
189 |             x1 = L3.x
190 |     # 如果逆时针偏转
191 |     else:
192 |         if y2 > L1.forward_x(x1_std):
193 |             x_bias = int(L1.forward_y(y2_std) - x1_std)
194 |         if y2 > L2.forward_x(x2_std):
195 |             y_bias = int(-(y2_std - L2.forward_x(x2_std)))
196 |         x1 = x1_std + x_bias
197 |         if x2 > L3.x:
198 |             x2 = L3.x
199 | 
200 |     # 计算裁剪框的y的变化
201 |     y2 = int(y2_std + y_bias)
202 |     new_cut_width = x2 - x1
203 |     new_cut_height = int(new_cut_width / standard_size[1] * standard_size[0])
204 |     y1 = y2 - new_cut_height
205 | 
206 |     return x1, y1, x2, y2, x_bias, y_bias
207 | 


--------------------------------------------------------------------------------
/hivision/creator/weights/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/creator/weights/.gitkeep


--------------------------------------------------------------------------------
/hivision/error.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | r"""
 4 | @DATE: 2024/9/5 18:32
 5 | @File: error.py
 6 | @IDE: pycharm
 7 | @Description:
 8 |     错误处理
 9 | """
10 | 
11 | 
12 | class FaceError(Exception):
13 |     def __init__(self, err, face_num):
14 |         """
15 |         证件照人脸错误，此时人脸检测失败，可能是没有检测到人脸或者检测到多个人脸
16 |         Args:
17 |             err: 错误描述
18 |             face_num: 告诉此时识别到的人像个数
19 |         """
20 |         super().__init__(err)
21 |         self.face_num = face_num
22 | 
23 | 
24 | class APIError(Exception):
25 |     def __init__(self, err, status_code):
26 |         """
27 |         API错误
28 |         Args:
29 |             err: 错误描述
30 |             status_code: 告诉此时的错误状态码
31 |         """
32 |         super().__init__(err)
33 |         self.status_code = status_code
34 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/__init__.py:
--------------------------------------------------------------------------------
1 | from .beauty_tools import BeautyTools
2 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/base_adjust.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 亮度、对比度、锐化、饱和度调整模块
  3 | """
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | 
  9 | def adjust_brightness_contrast_sharpen_saturation(
 10 |     image,
 11 |     brightness_factor=0,
 12 |     contrast_factor=0,
 13 |     sharpen_strength=0,
 14 |     saturation_factor=0,
 15 | ):
 16 |     """
 17 |     调整图像的亮度、对比度、锐度和饱和度。
 18 | 
 19 |     参数:
 20 |     image (numpy.ndarray): 输入的图像数组。
 21 |     brightness_factor (float): 亮度调整因子。大于0增加亮度，小于0降低亮度。
 22 |     contrast_factor (float): 对比度调整因子。大于0增加对比度，小于0降低对比度。
 23 |     sharpen_strength (float): 锐化强度。
 24 |     saturation_factor (float): 饱和度调整因子。大于0增加饱和度，小于0降低饱和度。
 25 | 
 26 |     返回:
 27 |     numpy.ndarray: 调整后的图像。
 28 |     """
 29 |     if (
 30 |         brightness_factor == 0
 31 |         and contrast_factor == 0
 32 |         and sharpen_strength == 0
 33 |         and saturation_factor == 0
 34 |     ):
 35 |         return image.copy()
 36 | 
 37 |     adjusted_image = image.copy()
 38 | 
 39 |     # 调整饱和度
 40 |     if saturation_factor != 0:
 41 |         adjusted_image = adjust_saturation(adjusted_image, saturation_factor)
 42 | 
 43 |     # 调整亮度和对比度
 44 |     alpha = 1.0 + (contrast_factor / 100.0)
 45 |     beta = brightness_factor
 46 |     adjusted_image = cv2.convertScaleAbs(adjusted_image, alpha=alpha, beta=beta)
 47 | 
 48 |     # 增强锐化
 49 |     adjusted_image = sharpen_image(adjusted_image, sharpen_strength)
 50 | 
 51 |     return adjusted_image
 52 | 
 53 | 
 54 | def adjust_saturation(image, saturation_factor):
 55 |     """
 56 |     调整图像的饱和度。
 57 | 
 58 |     参数:
 59 |     image (numpy.ndarray): 输入的图像数组。
 60 |     saturation_factor (float): 饱和度调整因子。大于0增加饱和度，小于0降低饱和度。
 61 | 
 62 |     返回:
 63 |     numpy.ndarray: 调整后的图像。
 64 |     """
 65 |     hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 66 |     h, s, v = cv2.split(hsv)
 67 |     s = s.astype(np.float32)
 68 |     s = s + s * (saturation_factor / 100.0)
 69 |     s = np.clip(s, 0, 255).astype(np.uint8)
 70 |     hsv = cv2.merge([h, s, v])
 71 |     return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
 72 | 
 73 | 
 74 | def sharpen_image(image, strength=0):
 75 |     """
 76 |     对图像进行锐化处理。
 77 | 
 78 |     参数:
 79 |     image (numpy.ndarray): 输入的图像数组。
 80 |     strength (float): 锐化强度，范围建议为0-5。0表示不进行锐化。
 81 | 
 82 |     返回:
 83 |     numpy.ndarray: 锐化后的图像。
 84 |     """
 85 |     print(f"Sharpen strength: {strength}")
 86 |     if strength == 0:
 87 |         return image.copy()
 88 | 
 89 |     strength = strength * 20
 90 |     kernel_strength = 1 + (strength / 500)
 91 | 
 92 |     kernel = (
 93 |         np.array([[-0.5, -0.5, -0.5], [-0.5, 5, -0.5], [-0.5, -0.5, -0.5]])
 94 |         * kernel_strength
 95 |     )
 96 | 
 97 |     sharpened = cv2.filter2D(image, -1, kernel)
 98 |     sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
 99 | 
100 |     alpha = strength / 200
101 |     blended = cv2.addWeighted(image, 1 - alpha, sharpened, alpha, 0)
102 | 
103 |     return blended
104 | 
105 | 
106 | # Gradio接口
107 | def base_adjustment(image, brightness, contrast, sharpen, saturation):
108 |     adjusted = adjust_brightness_contrast_sharpen_saturation(
109 |         image, brightness, contrast, sharpen, saturation
110 |     )
111 |     return adjusted
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     import gradio as gr
116 | 
117 |     iface = gr.Interface(
118 |         fn=base_adjustment,
119 |         inputs=[
120 |             gr.Image(label="Input Image", height=400),
121 |             gr.Slider(
122 |                 minimum=-20,
123 |                 maximum=20,
124 |                 value=0,
125 |                 step=1,
126 |                 label="Brightness",
127 |             ),
128 |             gr.Slider(
129 |                 minimum=-100,
130 |                 maximum=100,
131 |                 value=0,
132 |                 step=1,
133 |                 label="Contrast",
134 |             ),
135 |             gr.Slider(
136 |                 minimum=0,
137 |                 maximum=5,
138 |                 value=0,
139 |                 step=1,
140 |                 label="Sharpen",
141 |             ),
142 |             gr.Slider(
143 |                 minimum=-100,
144 |                 maximum=100,
145 |                 value=0,
146 |                 step=1,
147 |                 label="Saturation",
148 |             ),
149 |         ],
150 |         outputs=gr.Image(label="Adjusted Image"),
151 |         title="Image Adjustment",
152 |         description="Adjust the brightness, contrast, sharpness, and saturation of an image using sliders.",
153 |     )
154 |     iface.launch()
155 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/beauty_tools.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @author: cuny
 3 | @file: MakeBeautiful.py
 4 | @time: 2022/7/7 20:23
 5 | @description: 
 6 | 美颜工具集合文件，作为暴露在外的插件接口
 7 | """
 8 | 
 9 | from .grind_skin import grindSkin
10 | from .whitening import MakeWhiter
11 | from .thin_face import thinFace
12 | import numpy as np
13 | 
14 | 
15 | def BeautyTools(
16 |     input_image: np.ndarray,
17 |     landmark,
18 |     thinStrength: int,
19 |     thinPlace: int,
20 |     grindStrength: int,
21 |     whiterStrength: int,
22 | ) -> np.ndarray:
23 |     """
24 |     美颜工具的接口函数，用于实现美颜效果
25 |     Args:
26 |         input_image: 输入的图像
27 |         landmark: 瘦脸需要的人脸关键点信息，为fd68返回的第二个参数
28 |         thinStrength: 瘦脸强度，为0-10（如果更高其实也没什么问题），当强度为0或者更低时，则不瘦脸
29 |         thinPlace: 选择瘦脸区域，为0-2之间的值，越大瘦脸的点越靠下
30 |         grindStrength: 磨皮强度，为0-10（如果更高其实也没什么问题），当强度为0或者更低时，则不磨皮
31 |         whiterStrength: 美白强度，为0-10（如果更高其实也没什么问题），当强度为0或者更低时，则不美白
32 |     Returns:
33 |         output_image 输出图像
34 |     """
35 |     try:
36 |         _, _, _ = input_image.shape
37 |     except ValueError:
38 |         raise TypeError("输入图像必须为3通道或者4通道!")
39 |     # 三通道或者四通道图像
40 |     # 首先进行瘦脸
41 |     input_image = thinFace(
42 |         input_image, landmark, place=thinPlace, strength=thinStrength
43 |     )
44 |     # 其次进行磨皮
45 |     input_image = grindSkin(src=input_image, strength=grindStrength)
46 |     # 最后进行美白
47 |     makeWhiter = MakeWhiter()
48 |     input_image = makeWhiter.run(input_image, strength=whiterStrength)
49 |     return input_image
50 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/grind_skin.py:
--------------------------------------------------------------------------------
  1 | # Required Libraries
  2 | import cv2
  3 | import numpy as np
  4 | import gradio as gr
  5 | 
  6 | 
  7 | def annotate_image(image, grind_degree, detail_degree, strength):
  8 |     """Annotates the image with parameters in the lower-left corner."""
  9 |     font = cv2.FONT_HERSHEY_SIMPLEX
 10 |     font_scale = 0.5
 11 |     color = (0, 0, 255)
 12 |     thickness = 1
 13 |     line_type = cv2.LINE_AA
 14 | 
 15 |     # Text positions
 16 |     y_offset = 20
 17 |     x_offset = 10
 18 |     y_base = image.shape[0] - 10
 19 | 
 20 |     # Define each line of the annotation
 21 |     lines = [
 22 |         f"Grind Degree: {grind_degree}",
 23 |         f"Detail Degree: {detail_degree}",
 24 |         f"Strength: {strength}",
 25 |     ]
 26 | 
 27 |     # Draw the text lines on the image
 28 |     for i, line in enumerate(lines):
 29 |         y_position = y_base - (i * y_offset)
 30 |         cv2.putText(
 31 |             image,
 32 |             line,
 33 |             (x_offset, y_position),
 34 |             font,
 35 |             font_scale,
 36 |             color,
 37 |             thickness,
 38 |             line_type,
 39 |         )
 40 | 
 41 |     return image
 42 | 
 43 | 
 44 | def grindSkin(src, grindDegree: int = 3, detailDegree: int = 1, strength: int = 9):
 45 |     """
 46 |     Dest =(Src * (100 - Opacity) + (Src + 2 * GaussBlur(EPFFilter(Src) - Src)) * Opacity) / 100
 47 |     人像磨皮方案
 48 |     Args:
 49 |         src: 原图
 50 |         grindDegree: 磨皮程度调节参数
 51 |         detailDegree: 细节程度调节参数
 52 |         strength: 融合程度，作为磨皮强度（0 - 10）
 53 | 
 54 |     Returns:
 55 |         磨皮后的图像
 56 |     """
 57 |     if strength <= 0:
 58 |         return src
 59 |     dst = src.copy()
 60 |     opacity = min(10.0, strength) / 10.0
 61 |     dx = grindDegree * 5
 62 |     fc = grindDegree * 12.5
 63 |     temp1 = cv2.bilateralFilter(src[:, :, :3], dx, fc, fc)
 64 |     temp2 = cv2.subtract(temp1, src[:, :, :3])
 65 |     temp3 = cv2.GaussianBlur(temp2, (2 * detailDegree - 1, 2 * detailDegree - 1), 0)
 66 |     temp4 = cv2.add(cv2.add(temp3, temp3), src[:, :, :3])
 67 |     dst[:, :, :3] = cv2.addWeighted(temp4, opacity, src[:, :, :3], 1 - opacity, 0.0)
 68 |     return dst
 69 | 
 70 | 
 71 | def process_image(input_img, grind_degree, detail_degree, strength):
 72 |     # Reading the image using OpenCV
 73 |     img = cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)
 74 |     # Processing the image
 75 |     output_img = grindSkin(img, grind_degree, detail_degree, strength)
 76 |     # Annotating the processed image with parameters
 77 |     output_img_annotated = annotate_image(
 78 |         output_img.copy(), grind_degree, detail_degree, strength
 79 |     )
 80 |     # Horizontal stacking of input and processed images
 81 |     combined_img = cv2.hconcat([img, output_img_annotated])
 82 |     # Convert the combined image back to RGB for display
 83 |     combined_img_rgb = cv2.cvtColor(combined_img, cv2.COLOR_BGR2RGB)
 84 |     return combined_img_rgb
 85 | 
 86 | 
 87 | with gr.Blocks(title="Skin Grinding") as iface:
 88 |     gr.Markdown("## Skin Grinding Application")
 89 | 
 90 |     with gr.Row():
 91 |         image_input = gr.Image(type="numpy", label="Input Image")
 92 |         image_output = gr.Image(label="Output Image")
 93 | 
 94 |     grind_degree_slider = gr.Slider(
 95 |         minimum=1, maximum=10, value=3, step=1, label="Grind Degree"
 96 |     )
 97 |     detail_degree_slider = gr.Slider(
 98 |         minimum=1, maximum=10, value=1, step=1, label="Detail Degree"
 99 |     )
100 |     strength_slider = gr.Slider(
101 |         minimum=0, maximum=10, value=9, step=1, label="Strength"
102 |     )
103 | 
104 |     gr.Button("Process Image").click(
105 |         fn=process_image,
106 |         inputs=[
107 |             image_input,
108 |             grind_degree_slider,
109 |             detail_degree_slider,
110 |             strength_slider,
111 |         ],
112 |         outputs=image_output,
113 |     )
114 | 
115 | if __name__ == "__main__":
116 |     iface.launch()
117 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/handler.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from hivision.creator.context import Context
 3 | from hivision.plugin.beauty.whitening import make_whitening
 4 | from hivision.plugin.beauty.base_adjust import (
 5 |     adjust_brightness_contrast_sharpen_saturation,
 6 | )
 7 | 
 8 | 
 9 | def beauty_face(ctx: Context):
10 |     """
11 |     对人脸进行美颜处理
12 |     1. 美白
13 |     2. 亮度
14 | 
15 |     :param ctx: Context对象，包含处理参数和图像
16 |     """
17 |     middle_image = ctx.origin_image.copy()
18 |     processed = False
19 | 
20 |     # 如果美白强度大于0，进行美白处理
21 |     if ctx.params.whitening_strength > 0:
22 |         middle_image = make_whitening(middle_image, ctx.params.whitening_strength)
23 |         processed = True
24 | 
25 |     # 如果亮度、对比度、锐化强度不为0，进行亮度、对比度、锐化处理
26 |     if (
27 |         ctx.params.brightness_strength != 0
28 |         or ctx.params.contrast_strength != 0
29 |         or ctx.params.sharpen_strength != 0
30 |         or ctx.params.saturation_strength != 0
31 |     ):
32 |         middle_image = adjust_brightness_contrast_sharpen_saturation(
33 |             middle_image,
34 |             ctx.params.brightness_strength,
35 |             ctx.params.contrast_strength,
36 |             ctx.params.sharpen_strength,
37 |             ctx.params.saturation_strength,
38 |         )
39 |         processed = True
40 | 
41 |     # 如果进行了美颜处理，更新matting_image
42 |     if processed:
43 |         # 分离中间图像的BGR通道
44 |         b, g, r = cv2.split(middle_image)
45 |         # 从原始matting_image中获取alpha通道
46 |         _, _, _, alpha = cv2.split(ctx.matting_image)
47 |         # 合并处理后的BGR通道和原始alpha通道
48 |         ctx.matting_image = cv2.merge((b, g, r, alpha))
49 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/lut/lut_origin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/plugin/beauty/lut/lut_origin.png


--------------------------------------------------------------------------------
/hivision/plugin/beauty/thin_face.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: cuny
  3 | @file: ThinFace.py
  4 | @time: 2022/7/2 15:50
  5 | @description:
  6 | 瘦脸算法，用到了图像局部平移法
  7 | 先使用人脸关键点检测，然后再使用图像局部平移法
  8 | 需要注意的是，这部分不会包含dlib人脸关键点检测，因为考虑到模型载入的问题
  9 | """
 10 | 
 11 | import cv2
 12 | import math
 13 | import numpy as np
 14 | 
 15 | 
 16 | class TranslationWarp(object):
 17 |     """
 18 |     本类包含瘦脸算法，由于瘦脸算法包含了很多个版本，所以以类的方式呈现
 19 |     前两个算法没什么好讲的，网上资料很多
 20 |     第三个采用numpy内部的自定义函数处理，在处理速度上有一些提升
 21 |     最后采用cv2.map算法，处理速度大幅度提升
 22 |     """
 23 | 
 24 |     # 瘦脸
 25 |     @staticmethod
 26 |     def localTranslationWarp(srcImg, startX, startY, endX, endY, radius):
 27 |         # 双线性插值法
 28 |         def BilinearInsert(src, ux, uy):
 29 |             w, h, c = src.shape
 30 |             if c == 3:
 31 |                 x1 = int(ux)
 32 |                 x2 = x1 + 1
 33 |                 y1 = int(uy)
 34 |                 y2 = y1 + 1
 35 |                 part1 = (
 36 |                     src[y1, x1].astype(np.float64) * (float(x2) - ux) * (float(y2) - uy)
 37 |                 )
 38 |                 part2 = (
 39 |                     src[y1, x2].astype(np.float64) * (ux - float(x1)) * (float(y2) - uy)
 40 |                 )
 41 |                 part3 = (
 42 |                     src[y2, x1].astype(np.float64) * (float(x2) - ux) * (uy - float(y1))
 43 |                 )
 44 |                 part4 = (
 45 |                     src[y2, x2].astype(np.float64) * (ux - float(x1)) * (uy - float(y1))
 46 |                 )
 47 |                 insertValue = part1 + part2 + part3 + part4
 48 |                 return insertValue.astype(np.int8)
 49 | 
 50 |         ddradius = float(radius * radius)  # 圆的半径
 51 |         copyImg = srcImg.copy()  # copy后的图像矩阵
 52 |         # 计算公式中的|m-c|^2
 53 |         ddmc = (endX - startX) * (endX - startX) + (endY - startY) * (endY - startY)
 54 |         H, W, C = srcImg.shape  # 获取图像的形状
 55 |         for i in range(W):
 56 |             for j in range(H):
 57 |                 # # 计算该点是否在形变圆的范围之内
 58 |                 # # 优化，第一步，直接判断是会在（startX,startY)的矩阵框中
 59 |                 if math.fabs(i - startX) > radius and math.fabs(j - startY) > radius:
 60 |                     continue
 61 |                 distance = (i - startX) * (i - startX) + (j - startY) * (j - startY)
 62 |                 if distance < ddradius:
 63 |                     # 计算出（i,j）坐标的原坐标
 64 |                     # 计算公式中右边平方号里的部分
 65 |                     ratio = (ddradius - distance) / (ddradius - distance + ddmc)
 66 |                     ratio = ratio * ratio
 67 |                     # 映射原位置
 68 |                     UX = i - ratio * (endX - startX)
 69 |                     UY = j - ratio * (endY - startY)
 70 | 
 71 |                     # 根据双线性插值法得到UX，UY的值
 72 |                     # start_ = time.time()
 73 |                     value = BilinearInsert(srcImg, UX, UY)
 74 |                     # print(f"双线性插值耗时;{time.time() - start_}")
 75 |                     # 改变当前 i ，j的值
 76 |                     copyImg[j, i] = value
 77 |         return copyImg
 78 | 
 79 |     # 瘦脸pro1, 限制了for循环的遍历次数
 80 |     @staticmethod
 81 |     def localTranslationWarpLimitFor(
 82 |         srcImg, startP: np.matrix, endP: np.matrix, radius: float
 83 |     ):
 84 |         startX, startY = startP[0, 0], startP[0, 1]
 85 |         endX, endY = endP[0, 0], endP[0, 1]
 86 | 
 87 |         # 双线性插值法
 88 |         def BilinearInsert(src, ux, uy):
 89 |             w, h, c = src.shape
 90 |             if c == 3:
 91 |                 x1 = int(ux)
 92 |                 x2 = x1 + 1
 93 |                 y1 = int(uy)
 94 |                 y2 = y1 + 1
 95 |                 part1 = (
 96 |                     src[y1, x1].astype(np.float64) * (float(x2) - ux) * (float(y2) - uy)
 97 |                 )
 98 |                 part2 = (
 99 |                     src[y1, x2].astype(np.float64) * (ux - float(x1)) * (float(y2) - uy)
100 |                 )
101 |                 part3 = (
102 |                     src[y2, x1].astype(np.float64) * (float(x2) - ux) * (uy - float(y1))
103 |                 )
104 |                 part4 = (
105 |                     src[y2, x2].astype(np.float64) * (ux - float(x1)) * (uy - float(y1))
106 |                 )
107 |                 insertValue = part1 + part2 + part3 + part4
108 |                 return insertValue.astype(np.int8)
109 | 
110 |         ddradius = float(radius * radius)  # 圆的半径
111 |         copyImg = srcImg.copy()  # copy后的图像矩阵
112 |         # 计算公式中的|m-c|^2
113 |         ddmc = (endX - startX) ** 2 + (endY - startY) ** 2
114 |         # 计算正方形的左上角起始点
115 |         startTX, startTY = (
116 |             startX - math.floor(radius + 1),
117 |             startY - math.floor((radius + 1)),
118 |         )
119 |         # 计算正方形的右下角的结束点
120 |         endTX, endTY = (
121 |             startX + math.floor(radius + 1),
122 |             startY + math.floor((radius + 1)),
123 |         )
124 |         # 剪切srcImg
125 |         srcImg = srcImg[startTY : endTY + 1, startTX : endTX + 1, :]
126 |         # db.cv_show(srcImg)
127 |         # 裁剪后的图像相当于在x,y都减少了startX - math.floor(radius + 1)
128 |         # 原本的endX, endY在切后的坐标点
129 |         endX, endY = (
130 |             endX - startX + math.floor(radius + 1),
131 |             endY - startY + math.floor(radius + 1),
132 |         )
133 |         # 原本的startX, startY剪切后的坐标点
134 |         startX, startY = (math.floor(radius + 1), math.floor(radius + 1))
135 |         H, W, C = srcImg.shape  # 获取图像的形状
136 |         for i in range(W):
137 |             for j in range(H):
138 |                 # 计算该点是否在形变圆的范围之内
139 |                 # 优化，第一步，直接判断是会在（startX,startY)的矩阵框中
140 |                 # if math.fabs(i - startX) > radius and math.fabs(j - startY) > radius:
141 |                 #     continue
142 |                 distance = (i - startX) * (i - startX) + (j - startY) * (j - startY)
143 |                 if distance < ddradius:
144 |                     # 计算出（i,j）坐标的原坐标
145 |                     # 计算公式中右边平方号里的部分
146 |                     ratio = (ddradius - distance) / (ddradius - distance + ddmc)
147 |                     ratio = ratio * ratio
148 |                     # 映射原位置
149 |                     UX = i - ratio * (endX - startX)
150 |                     UY = j - ratio * (endY - startY)
151 | 
152 |                     # 根据双线性插值法得到UX，UY的值
153 |                     # start_ = time.time()
154 |                     value = BilinearInsert(srcImg, UX, UY)
155 |                     # print(f"双线性插值耗时;{time.time() - start_}")
156 |                     # 改变当前 i ，j的值
157 |                     copyImg[j + startTY, i + startTX] = value
158 |         return copyImg
159 | 
160 |     # # 瘦脸pro2,采用了numpy自定义函数做处理
161 |     # def localTranslationWarpNumpy(self, srcImg, startP: np.matrix, endP: np.matrix, radius: float):
162 |     #     startX , startY = startP[0, 0], startP[0, 1]
163 |     #     endX, endY = endP[0, 0], endP[0, 1]
164 |     #     ddradius = float(radius * radius)  # 圆的半径
165 |     #     copyImg = srcImg.copy()  # copy后的图像矩阵
166 |     #     # 计算公式中的|m-c|^2
167 |     #     ddmc = (endX - startX)**2 + (endY - startY)**2
168 |     #     # 计算正方形的左上角起始点
169 |     #     startTX, startTY = (startX - math.floor(radius + 1), startY - math.floor((radius + 1)))
170 |     #     # 计算正方形的右下角的结束点
171 |     #     endTX, endTY = (startX + math.floor(radius + 1), startY + math.floor((radius + 1)))
172 |     #     # 剪切srcImg
173 |     #     self.thinImage = srcImg[startTY : endTY + 1, startTX : endTX + 1, :]
174 |     #     # s = self.thinImage
175 |     #     # db.cv_show(srcImg)
176 |     #     # 裁剪后的图像相当于在x,y都减少了startX - math.floor(radius + 1)
177 |     #     # 原本的endX, endY在切后的坐标点
178 |     #     endX, endY = (endX - startX + math.floor(radius + 1), endY - startY + math.floor(radius + 1))
179 |     #     # 原本的startX, startY剪切后的坐标点
180 |     #     startX ,startY = (math.floor(radius + 1), math.floor(radius + 1))
181 |     #     H, W, C = self.thinImage.shape  # 获取图像的形状
182 |     #     index_m = np.arange(H * W).reshape((H, W))
183 |     #     triangle_ufunc = np.frompyfunc(self.process, 9, 3)
184 |     #     # start_ = time.time()
185 |     #     finalImgB, finalImgG, finalImgR = triangle_ufunc(index_m, self, W, ddradius, ddmc, startX, startY, endX, endY)
186 |     #     finaleImg = np.dstack((finalImgB, finalImgG, finalImgR)).astype(np.uint8)
187 |     #     finaleImg = np.fliplr(np.rot90(finaleImg, -1))
188 |     #     copyImg[startTY: endTY + 1, startTX: endTX + 1, :] = finaleImg
189 |     #     # print(f"图像处理耗时;{time.time() - start_}")
190 |     #     # db.cv_show(copyImg)
191 |     #     return copyImg
192 | 
193 |     # 瘦脸pro3,采用opencv内置函数
194 |     @staticmethod
195 |     def localTranslationWarpFastWithStrength(
196 |         srcImg, startP: np.matrix, endP: np.matrix, radius, strength: float = 100.0
197 |     ):
198 |         """
199 |         采用opencv内置函数
200 |         Args:
201 |             srcImg: 源图像
202 |             startP: 起点位置
203 |             endP: 终点位置
204 |             radius: 处理半径
205 |             strength: 瘦脸强度，一般取100以上
206 | 
207 |         Returns:
208 | 
209 |         """
210 |         startX, startY = startP[0, 0], startP[0, 1]
211 |         endX, endY = endP[0, 0], endP[0, 1]
212 |         ddradius = float(radius * radius)
213 |         # copyImg = np.zeros(srcImg.shape, np.uint8)
214 |         # copyImg = srcImg.copy()
215 | 
216 |         maskImg = np.zeros(srcImg.shape[:2], np.uint8)
217 |         cv2.circle(maskImg, (startX, startY), math.ceil(radius), (255, 255, 255), -1)
218 | 
219 |         K0 = 100 / strength
220 | 
221 |         # 计算公式中的|m-c|^2
222 |         ddmc_x = (endX - startX) * (endX - startX)
223 |         ddmc_y = (endY - startY) * (endY - startY)
224 |         H, W, C = srcImg.shape
225 | 
226 |         mapX = np.vstack([np.arange(W).astype(np.float32).reshape(1, -1)] * H)
227 |         mapY = np.hstack([np.arange(H).astype(np.float32).reshape(-1, 1)] * W)
228 | 
229 |         distance_x = (mapX - startX) * (mapX - startX)
230 |         distance_y = (mapY - startY) * (mapY - startY)
231 |         distance = distance_x + distance_y
232 |         K1 = np.sqrt(distance)
233 |         ratio_x = (ddradius - distance_x) / (ddradius - distance_x + K0 * ddmc_x)
234 |         ratio_y = (ddradius - distance_y) / (ddradius - distance_y + K0 * ddmc_y)
235 |         ratio_x = ratio_x * ratio_x
236 |         ratio_y = ratio_y * ratio_y
237 | 
238 |         UX = mapX - ratio_x * (endX - startX) * (1 - K1 / radius)
239 |         UY = mapY - ratio_y * (endY - startY) * (1 - K1 / radius)
240 | 
241 |         np.copyto(UX, mapX, where=maskImg == 0)
242 |         np.copyto(UY, mapY, where=maskImg == 0)
243 |         UX = UX.astype(np.float32)
244 |         UY = UY.astype(np.float32)
245 |         copyImg = cv2.remap(srcImg, UX, UY, interpolation=cv2.INTER_LINEAR)
246 |         return copyImg
247 | 
248 | 
249 | def thinFace(src, landmark, place: int = 0, strength=30.0):
250 |     """
251 |     瘦脸程序接口，输入人脸关键点信息和强度，即可实现瘦脸
252 |     注意处理四通道图像
253 |     Args:
254 |         src: 原图
255 |         landmark: 关键点信息
256 |         place: 选择瘦脸区域，为0-4之间的值
257 |         strength: 瘦脸强度，输入值在0-10之间，如果小于或者等于0，则不瘦脸
258 | 
259 |     Returns:
260 |         瘦脸后的图像
261 |     """
262 |     strength = min(100.0, strength * 10.0)
263 |     if strength <= 0.0:
264 |         return src
265 |     # 也可以设置瘦脸区域
266 |     place = max(0, min(4, int(place)))
267 |     left_landmark = landmark[4 + place]
268 |     left_landmark_down = landmark[6 + place]
269 |     right_landmark = landmark[13 + place]
270 |     right_landmark_down = landmark[15 + place]
271 |     endPt = landmark[58]
272 |     # 计算第4个点到第6个点的距离作为瘦脸距离
273 |     r_left = math.sqrt(
274 |         (left_landmark[0, 0] - left_landmark_down[0, 0]) ** 2
275 |         + (left_landmark[0, 1] - left_landmark_down[0, 1]) ** 2
276 |     )
277 | 
278 |     # 计算第14个点到第16个点的距离作为瘦脸距离
279 |     r_right = math.sqrt(
280 |         (right_landmark[0, 0] - right_landmark_down[0, 0]) ** 2
281 |         + (right_landmark[0, 1] - right_landmark_down[0, 1]) ** 2
282 |     )
283 |     # 瘦左边脸
284 |     thin_image = TranslationWarp.localTranslationWarpFastWithStrength(
285 |         src, left_landmark[0], endPt[0], r_left, strength
286 |     )
287 |     # 瘦右边脸
288 |     thin_image = TranslationWarp.localTranslationWarpFastWithStrength(
289 |         thin_image, right_landmark[0], endPt[0], r_right, strength
290 |     )
291 |     return thin_image
292 | 
293 | 
294 | # if __name__ == "__main__":
295 | #     import os
296 | #     from hycv.FaceDetection68.faceDetection68 import FaceDetection68
297 | 
298 | #     local_file = os.path.dirname(__file__)
299 | #     PREDICTOR_PATH = f"{local_file}/weights/shape_predictor_68_face_landmarks.dat"  # 关键点检测模型路径
300 | #     fd68 = FaceDetection68(model_path=PREDICTOR_PATH)
301 | #     input_image = cv2.imread("test_image/4.jpg", -1)
302 | #     _, landmark_, _ = fd68.facePoints(input_image)
303 | #     output_image = thinFace(input_image, landmark_, strength=30.2)
304 | #     cv2.imwrite("thinFaceCompare.png", np.hstack((input_image, output_image)))
305 | 


--------------------------------------------------------------------------------
/hivision/plugin/beauty/whitening.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import os
 4 | import gradio as gr
 5 | 
 6 | 
 7 | class LutWhite:
 8 |     CUBE64_ROWS = 8
 9 |     CUBE64_SIZE = 64
10 |     CUBE256_SIZE = 256
11 |     CUBE_SCALE = CUBE256_SIZE // CUBE64_SIZE
12 | 
13 |     def __init__(self, lut_image):
14 |         self.lut = self._create_lut(lut_image)
15 | 
16 |     def _create_lut(self, lut_image):
17 |         reshape_lut = np.zeros(
18 |             (self.CUBE256_SIZE, self.CUBE256_SIZE, self.CUBE256_SIZE, 3), dtype=np.uint8
19 |         )
20 |         for i in range(self.CUBE64_SIZE):
21 |             tmp = i // self.CUBE64_ROWS
22 |             cx = (i % self.CUBE64_ROWS) * self.CUBE64_SIZE
23 |             cy = tmp * self.CUBE64_SIZE
24 |             cube64 = lut_image[cy : cy + self.CUBE64_SIZE, cx : cx + self.CUBE64_SIZE]
25 |             if cube64.size == 0:
26 |                 continue
27 |             cube256 = cv2.resize(cube64, (self.CUBE256_SIZE, self.CUBE256_SIZE))
28 |             reshape_lut[i * self.CUBE_SCALE : (i + 1) * self.CUBE_SCALE] = cube256
29 |         return reshape_lut
30 | 
31 |     def apply(self, src):
32 |         b, g, r = src[:, :, 0], src[:, :, 1], src[:, :, 2]
33 |         return self.lut[b, g, r]
34 | 
35 | 
36 | class MakeWhiter:
37 |     def __init__(self, lut_image):
38 |         self.lut_white = LutWhite(lut_image)
39 | 
40 |     def run(self, src: np.ndarray, strength: int) -> np.ndarray:
41 |         strength = np.clip(strength / 10.0, 0, 1)
42 |         if strength <= 0:
43 |             return src
44 |         img = self.lut_white.apply(src[:, :, :3])
45 |         return cv2.addWeighted(src[:, :, :3], 1 - strength, img, strength, 0)
46 | 
47 | 
48 | base_dir = os.path.dirname(os.path.abspath(__file__))
49 | default_lut = cv2.imread(os.path.join(base_dir, "lut/lut_origin.png"))
50 | make_whiter = MakeWhiter(default_lut)
51 | 
52 | 
53 | def make_whitening(image, strength):
54 |     image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
55 | 
56 |     iteration = strength // 10
57 |     bias = strength % 10
58 | 
59 |     for i in range(iteration):
60 |         image = make_whiter.run(image, 10)
61 | 
62 |     image = make_whiter.run(image, bias)
63 | 
64 |     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
65 | 
66 | 
67 | def make_whitening_png(image, strength):
68 |     image = cv2.cvtColor(np.array(image), cv2.COLOR_RGBA2BGRA)
69 | 
70 |     b, g, r, a = cv2.split(image)
71 |     bgr_image = cv2.merge((b, g, r))
72 | 
73 |     b_w, g_w, r_w = cv2.split(make_whiter.run(bgr_image, strength))
74 |     output_image = cv2.merge((b_w, g_w, r_w, a))
75 | 
76 |     return cv2.cvtColor(output_image, cv2.COLOR_RGBA2BGRA)
77 | 
78 | 
79 | # 启动Gradio应用
80 | if __name__ == "__main__":
81 |     demo = gr.Interface(
82 |         fn=make_whitening,
83 |         inputs=[
84 |             gr.Image(type="pil", image_mode="RGBA", label="Input Image"),
85 |             gr.Slider(0, 30, step=1, label="Whitening Strength"),
86 |         ],
87 |         outputs=gr.Image(type="pil"),
88 |         title="Image Whitening Demo",
89 |         description="Upload an image and adjust the whitening strength to see the effect.",
90 |     )
91 |     demo.launch()
92 | 


--------------------------------------------------------------------------------
/hivision/plugin/font/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/plugin/font/.gitkeep


--------------------------------------------------------------------------------
/hivision/plugin/font/青鸟华光简琥珀.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/plugin/font/青鸟华光简琥珀.ttf


--------------------------------------------------------------------------------
/hivision/plugin/template/assets/template_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/plugin/template/assets/template_1.png


--------------------------------------------------------------------------------
/hivision/plugin/template/assets/template_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zeyi-Lin/HivisionIDPhotos/d993cfb1d8453383254db6cbce2bab8173ac3ae0/hivision/plugin/template/assets/template_2.png


--------------------------------------------------------------------------------
/hivision/plugin/template/assets/template_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "template_1": {
 3 |         "width": 1080,
 4 |         "height": 1400,
 5 |         "anchor_points": {
 6 |             "left_top": [358, 153],
 7 |             "right_top": [1017, 353],
 8 |             "left_bottom": [56, 1134],
 9 |             "right_bottom": [747, 1332],
10 |             "rotation": -16.42
11 |         }
12 |     },
13 |     "template_2": {
14 |         "width": 1080,
15 |         "height": 1440,
16 |         "anchor_points": {
17 |             "left_top": [199, 199],
18 |             "right_top": [921, 216],
19 |             "left_bottom": [163, 1129],
20 |             "right_bottom": [876, 1153],
21 |             "rotation": -2.2
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/hivision/plugin/template/template_calculator.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import json
 4 | from hivision.creator.rotation_adjust import rotate_bound
 5 | import os
 6 | 
 7 | base_path = os.path.dirname(os.path.abspath(__file__))
 8 | template_config_path = os.path.join(base_path, 'assets', 'template_config.json')
 9 | 
10 | def generte_template_photo(template_name: str, input_image: np.ndarray) -> np.ndarray:
11 |     """
12 |     生成模板照片
13 |     :param template_name: 模板名称
14 |     :param input_image: 输入图像
15 |     :return: 模板照片
16 |     """
17 |     # 读取模板配置json
18 |     with open(template_config_path, 'r') as f:
19 |         template_config_dict = json.load(f)
20 |     # 获取对应该模板的配置
21 |     template_config = template_config_dict[template_name]
22 |     
23 |     template_width = template_config['width']
24 |     template_height = template_config['height']
25 | 
26 |     anchor_points = template_config['anchor_points']
27 |     rotation = anchor_points['rotation']
28 |     left_top = anchor_points['left_top']
29 |     right_top = anchor_points['right_top']
30 |     left_bottom = anchor_points['left_bottom']
31 |     right_bottom = anchor_points['right_bottom']
32 | 
33 |     if rotation < 0:
34 |         height = right_bottom[1] - left_top[1]
35 |         width = right_top[0] - left_bottom[0]
36 |     else:
37 |         height = left_top[1] - right_bottom[1]
38 |         width = left_bottom[0] - right_top[0]
39 | 
40 |     # 读取模板图像
41 |     template_image_path = os.path.join(base_path, 'assets', f'{template_name}.png')
42 |     template_image = cv2.imread(template_image_path, cv2.IMREAD_UNCHANGED)
43 | 
44 |     # 无损旋转
45 |     rotated_image = rotate_bound(input_image, -1 * rotation)[0]
46 |     rotated_image_height, rotated_image_width, _ = rotated_image.shape
47 | 
48 |     # 计算缩放比例
49 |     scale_x = width / rotated_image_width
50 |     scale_y = height / rotated_image_height
51 |     scale = max(scale_x, scale_y)
52 | 
53 |     resized_image = cv2.resize(rotated_image, None, fx=scale, fy=scale)
54 |     resized_height, resized_width, _ = resized_image.shape
55 | 
56 |     # 创建一个与template_image大小相同的背景，使用白色填充
57 |     result = np.full((template_height, template_width, 3), 255, dtype=np.uint8)
58 | 
59 |     # 计算粘贴位置
60 |     paste_x = left_bottom[0]
61 |     paste_y = left_top[1]
62 | 
63 |     # 确保不会超出边界
64 |     paste_height = min(resized_height, template_height - paste_y)
65 |     paste_width = min(resized_width, template_width - paste_x)
66 | 
67 |     # 将旋转后的图像粘贴到结果图像上
68 |     result[paste_y:paste_y+paste_height, paste_x:paste_x+paste_width] = resized_image[:paste_height, :paste_width]
69 |     
70 |     template_image = cv2.cvtColor(template_image, cv2.COLOR_BGRA2RGBA)
71 | 
72 |     # 将template_image叠加到结果图像上
73 |     if template_image.shape[2] == 4:  # 确保template_image有alpha通道
74 |         alpha = template_image[:, :, 3] / 255.0
75 |         for c in range(0, 3):
76 |             result[:, :, c] = result[:, :, c] * (1 - alpha) + template_image[:, :, c] * alpha
77 | 
78 |     return result
79 | 


--------------------------------------------------------------------------------
/hivision/plugin/watermark.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reference: https://gist.github.com/Deali-Axy/e22ea79bfbe785f9017b2e3cd7fdb3eb
  3 | """
  4 | 
  5 | import enum
  6 | import os
  7 | import math
  8 | import textwrap
  9 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance, ImageChops
 10 | import os
 11 | 
 12 | base_path = os.path.abspath(os.path.dirname(__file__))
 13 | 
 14 | 
 15 | class WatermarkerStyles(enum.Enum):
 16 |     """水印样式"""
 17 | 
 18 |     STRIPED = 1  # 斜向重复
 19 |     CENTRAL = 2  # 居中
 20 | 
 21 | 
 22 | class Watermarker(object):
 23 |     """图片水印工具"""
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         input_image: Image.Image,
 28 |         text: str,
 29 |         style: WatermarkerStyles,
 30 |         angle=30,
 31 |         color="#8B8B1B",
 32 |         font_file="青鸟华光简琥珀.ttf",
 33 |         opacity=0.15,
 34 |         size=50,
 35 |         space=75,
 36 |         chars_per_line=8,
 37 |         font_height_crop=1.2,
 38 |     ):
 39 |         """_summary_
 40 | 
 41 |         Parameters
 42 |         ----------
 43 |         input_image : Image.Image
 44 |             PIL图片对象
 45 |         text : str
 46 |             水印文字
 47 |         style : WatermarkerStyles
 48 |             水印样式
 49 |         angle : int, optional
 50 |             水印角度, by default 30
 51 |         color : str, optional
 52 |             水印颜色, by default "#8B8B1B"
 53 |         font_file : str, optional
 54 |             字体文件, by default "青鸟华光简琥珀.ttf"
 55 |         font_height_crop : float, optional
 56 |             字体高度裁剪比例, by default 1.2
 57 |         opacity : float, optional
 58 |             水印透明度, by default 0.15
 59 |         size : int, optional
 60 |             字体大小, by default 50
 61 |         space : int, optional
 62 |             水印间距, by default 75
 63 |         chars_per_line : int, optional
 64 |             每行字符数, by default 8
 65 |         """
 66 |         self.input_image = input_image
 67 |         self.text = text
 68 |         self.style = style
 69 |         self.angle = angle
 70 |         self.color = color
 71 |         self.font_file = os.path.join(base_path, "font", font_file)
 72 |         self.font_height_crop = font_height_crop
 73 |         self.opacity = opacity
 74 |         self.size = size
 75 |         self.space = space
 76 |         self.chars_per_line = chars_per_line
 77 |         self._result_image = None
 78 | 
 79 |     @staticmethod
 80 |     def set_image_opacity(image: Image, opacity: float):
 81 |         alpha = image.split()[3]
 82 |         alpha = ImageEnhance.Brightness(alpha).enhance(opacity)
 83 |         image.putalpha(alpha)
 84 |         return image
 85 | 
 86 |     @staticmethod
 87 |     def crop_image_edge(image: Image):
 88 |         bg = Image.new(mode="RGBA", size=image.size)
 89 |         diff = ImageChops.difference(image, bg)
 90 |         bbox = diff.getbbox()
 91 |         if bbox:
 92 |             return image.crop(bbox)
 93 |         return image
 94 | 
 95 |     def _add_mark_striped(self):
 96 |         origin_image = self.input_image.convert("RGBA")
 97 |         width = len(self.text) * self.size
 98 |         height = round(self.size * self.font_height_crop)
 99 |         watermark_image = Image.new(mode="RGBA", size=(width, height))
100 |         draw_table = ImageDraw.Draw(watermark_image)
101 |         draw_table.text(
102 |             (0, 0),
103 |             self.text,
104 |             fill=self.color,
105 |             font=ImageFont.truetype(self.font_file, size=self.size),
106 |         )
107 |         watermark_image = Watermarker.crop_image_edge(watermark_image)
108 |         Watermarker.set_image_opacity(watermark_image, self.opacity)
109 | 
110 |         c = int(math.sqrt(origin_image.size[0] ** 2 + origin_image.size[1] ** 2))
111 |         watermark_mask = Image.new(mode="RGBA", size=(c, c))
112 |         y, idx = 0, 0
113 |         while y < c:
114 |             x = -int((watermark_image.size[0] + self.space) * 0.5 * idx)
115 |             idx = (idx + 1) % 2
116 |             while x < c:
117 |                 watermark_mask.paste(watermark_image, (x, y))
118 |                 x += watermark_image.size[0] + self.space
119 |             y += watermark_image.size[1] + self.space
120 | 
121 |         watermark_mask = watermark_mask.rotate(self.angle)
122 |         origin_image.paste(
123 |             watermark_mask,
124 |             (int((origin_image.size[0] - c) / 2), int((origin_image.size[1] - c) / 2)),
125 |             mask=watermark_mask.split()[3],
126 |         )
127 |         return origin_image
128 | 
129 |     def _add_mark_central(self):
130 |         origin_image = self.input_image.convert("RGBA")
131 |         text_lines = textwrap.wrap(self.text, width=self.chars_per_line)
132 |         text = "\n".join(text_lines)
133 |         width = len(text) * self.size
134 |         height = round(self.size * self.font_height_crop * len(text_lines))
135 |         watermark_image = Image.new(mode="RGBA", size=(width, height))
136 |         draw_table = ImageDraw.Draw(watermark_image)
137 |         draw_table.text(
138 |             (0, 0),
139 |             text,
140 |             fill=self.color,
141 |             font=ImageFont.truetype(self.font_file, size=self.size),
142 |         )
143 |         watermark_image = Watermarker.crop_image_edge(watermark_image)
144 |         Watermarker.set_image_opacity(watermark_image, self.opacity)
145 | 
146 |         c = int(math.sqrt(origin_image.size[0] ** 2 + origin_image.size[1] ** 2))
147 |         watermark_mask = Image.new(mode="RGBA", size=(c, c))
148 |         watermark_mask.paste(
149 |             watermark_image,
150 |             (
151 |                 int((watermark_mask.width - watermark_image.width) / 2),
152 |                 int((watermark_mask.height - watermark_image.height) / 2),
153 |             ),
154 |         )
155 |         watermark_mask = watermark_mask.rotate(self.angle)
156 | 
157 |         origin_image.paste(
158 |             watermark_mask,
159 |             (
160 |                 int((origin_image.width - watermark_mask.width) / 2),
161 |                 int((origin_image.height - watermark_mask.height) / 2),
162 |             ),
163 |             mask=watermark_mask.split()[3],
164 |         )
165 |         return origin_image
166 | 
167 |     @property
168 |     def image(self):
169 |         if not self._result_image:
170 |             if self.style == WatermarkerStyles.STRIPED:
171 |                 self._result_image = self._add_mark_striped()
172 |             elif self.style == WatermarkerStyles.CENTRAL:
173 |                 self._result_image = self._add_mark_central()
174 |         return self._result_image
175 | 
176 |     def save(self, file_path: str, image_format: str = "png"):
177 |         with open(file_path, "wb") as f:
178 |             self.image.save(f, image_format)
179 | 
180 | 
181 | # Gradio 接口
182 | def watermark_image(
183 |     image,
184 |     text,
185 |     style,
186 |     angle,
187 |     color,
188 |     opacity,
189 |     size,
190 |     space,
191 | ):
192 |     # 创建 Watermarker 实例
193 |     watermarker = Watermarker(
194 |         input_image=image,
195 |         text=text,
196 |         style=(
197 |             WatermarkerStyles.STRIPED
198 |             if style == "STRIPED"
199 |             else WatermarkerStyles.CENTRAL
200 |         ),
201 |         angle=angle,
202 |         color=color,
203 |         opacity=opacity,
204 |         size=size,
205 |         space=space,
206 |     )
207 | 
208 |     # 返回带水印的图片
209 |     return watermarker.image
210 | 
211 | 
212 | if __name__ == "__main__":
213 |     import gradio as gr
214 | 
215 |     iface = gr.Interface(
216 |         fn=watermark_image,
217 |         inputs=[
218 |             gr.Image(type="pil", label="上传图片", height=400),
219 |             gr.Textbox(label="水印文字"),
220 |             gr.Radio(choices=["STRIPED", "CENTRAL"], label="水印样式"),
221 |             gr.Slider(minimum=0, maximum=360, value=30, label="水印角度"),
222 |             gr.ColorPicker(label="水印颜色"),
223 |             gr.Slider(minimum=0, maximum=1, value=0.15, label="水印透明度"),
224 |             gr.Slider(minimum=10, maximum=100, value=50, label="字体大小"),
225 |             gr.Slider(minimum=10, maximum=200, value=75, label="水印间距"),
226 |         ],
227 |         outputs=gr.Image(type="pil", label="带水印的图片", height=400),
228 |         title="图片水印工具",
229 |         description="上传一张图片，添加水印并下载。",
230 |     )
231 | 
232 |     iface.launch()
233 | 


--------------------------------------------------------------------------------
/hivision/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | from PIL import Image
  4 | import io
  5 | import numpy as np
  6 | import cv2
  7 | import base64
  8 | from hivision.plugin.watermark import Watermarker, WatermarkerStyles
  9 | 
 10 | 
 11 | def save_image_dpi_to_bytes(image: np.ndarray, output_image_path: str = None, dpi: int = 300):
 12 |     """
 13 |     设置图像的DPI（每英寸点数）并返回字节流
 14 | 
 15 |     :param image: numpy.ndarray, 输入的图像数组
 16 |     :param output_image_path: Path to save the resized image. 保存调整大小后的图像的路径。
 17 |     :param dpi: int, 要设置的DPI值，默认为300
 18 |     """
 19 |     image = Image.fromarray(image)
 20 |     # 创建一个字节流对象
 21 |     byte_stream = io.BytesIO()
 22 |     # 将图像保存到字节流
 23 |     image.save(byte_stream, format="PNG", dpi=(dpi, dpi))
 24 |     # 获取字节流的内容
 25 |     image_bytes = byte_stream.getvalue()
 26 | 
 27 |     # Save the image to the output path
 28 |     if output_image_path:
 29 |         with open(output_image_path, "wb") as f:
 30 |             f.write(image_bytes)
 31 | 
 32 |     return image_bytes
 33 | 
 34 | 
 35 | def resize_image_to_kb(input_image: np.ndarray, output_image_path: str = None, target_size_kb: int = 100, dpi: int = 300):
 36 |     """
 37 |     Resize an image to a target size in KB.
 38 |     将图像调整大小至目标文件大小（KB）。
 39 | 
 40 |     :param input_image_path: Path to the input image. 输入图像的路径。
 41 |     :param output_image_path: Path to save the resized image. 保存调整大小后的图像的路径。
 42 |     :param target_size_kb: Target size in KB. 目标文件大小（KB）。
 43 | 
 44 |     Example:
 45 |     resize_image_to_kb('input_image.jpg', 'output_image.jpg', 50)
 46 |     """
 47 | 
 48 |     if isinstance(input_image, np.ndarray):
 49 |         img = Image.fromarray(input_image)
 50 |     elif isinstance(input_image, Image.Image):
 51 |         img = input_image
 52 |     else:
 53 |         raise ValueError("input_image must be a NumPy array or PIL Image.")
 54 | 
 55 |     # Convert image to RGB mode if it's not
 56 |     if img.mode != "RGB":
 57 |         img = img.convert("RGB")
 58 | 
 59 |     # Initial quality value
 60 |     quality = 95
 61 | 
 62 |     while True:
 63 |         # Create a BytesIO object to hold the image data in memory
 64 |         img_byte_arr = io.BytesIO()
 65 | 
 66 |         # Save the image to the BytesIO object with the current quality
 67 |         img.save(img_byte_arr, format="JPEG", quality=quality, dpi=(dpi, dpi))
 68 | 
 69 |         # Get the size of the image in KB
 70 |         img_size_kb = len(img_byte_arr.getvalue()) / 1024
 71 | 
 72 |         # Check if the image size is within the target size
 73 |         if img_size_kb <= target_size_kb or quality == 1:
 74 |             # If the image is smaller than the target size, add padding
 75 |             if img_size_kb < target_size_kb:
 76 |                 padding_size = int(
 77 |                     (target_size_kb * 1024) - len(img_byte_arr.getvalue())
 78 |                 )
 79 |                 padding = b"\x00" * padding_size
 80 |                 img_byte_arr.write(padding)
 81 | 
 82 |             # Save the image to the output path
 83 |             if output_image_path:
 84 |                 with open(output_image_path, "wb") as f:
 85 |                     f.write(img_byte_arr.getvalue())
 86 |             
 87 |             return img_byte_arr.getvalue()
 88 | 
 89 |         # Reduce the quality if the image is still too large
 90 |         quality -= 5
 91 | 
 92 |         # Ensure quality does not go below 1
 93 |         if quality < 1:
 94 |             quality = 1
 95 | 
 96 | 
 97 | def resize_image_to_kb_base64(input_image, target_size_kb, mode="exact"):
 98 |     """
 99 |     Resize an image to a target size in KB and return it as a base64 encoded string.
100 |     将图像调整大小至目标文件大小（KB）并返回base64编码的字符串。
101 | 
102 |     :param input_image: Input image as a NumPy array or PIL Image. 输入图像，可以是NumPy数组或PIL图像。
103 |     :param target_size_kb: Target size in KB. 目标文件大小（KB）。
104 |     :param mode: Mode of resizing ('exact', 'max', 'min'). 模式：'exact'（精确大小）、'max'（不大于）、'min'（不小于）。
105 | 
106 |     :return: Base64 encoded string of the resized image. 调整大小后的图像的base64编码字符串。
107 |     """
108 | 
109 |     if isinstance(input_image, np.ndarray):
110 |         img = Image.fromarray(input_image)
111 |     elif isinstance(input_image, Image.Image):
112 |         img = input_image
113 |     else:
114 |         raise ValueError("input_image must be a NumPy array or PIL Image.")
115 | 
116 |     # Convert image to RGB mode if it's not
117 |     if img.mode != "RGB":
118 |         img = img.convert("RGB")
119 | 
120 |     # Initial quality value
121 |     quality = 95
122 | 
123 |     while True:
124 |         # Create a BytesIO object to hold the image data in memory
125 |         img_byte_arr = io.BytesIO()
126 | 
127 |         # Save the image to the BytesIO object with the current quality
128 |         img.save(img_byte_arr, format="JPEG", quality=quality)
129 | 
130 |         # Get the size of the image in KB
131 |         img_size_kb = len(img_byte_arr.getvalue()) / 1024
132 | 
133 |         # Check based on the mode
134 |         if mode == "exact":
135 |             # If the image size is equal to the target size, we can return it
136 |             if img_size_kb == target_size_kb:
137 |                 break
138 | 
139 |             # If the image is smaller than the target size, add padding
140 |             elif img_size_kb < target_size_kb:
141 |                 padding_size = int(
142 |                     (target_size_kb * 1024) - len(img_byte_arr.getvalue())
143 |                 )
144 |                 padding = b"\x00" * padding_size
145 |                 img_byte_arr.write(padding)
146 |                 break
147 | 
148 |         elif mode == "max":
149 |             # If the image size is within the target size, we can return it
150 |             if img_size_kb <= target_size_kb or quality == 1:
151 |                 break
152 | 
153 |         elif mode == "min":
154 |             # If the image size is greater than or equal to the target size, we can return it
155 |             if img_size_kb >= target_size_kb:
156 |                 break
157 | 
158 |         # Reduce the quality if the image is still too large
159 |         quality -= 5
160 | 
161 |         # Ensure quality does not go below 1
162 |         if quality < 1:
163 |             quality = 1
164 | 
165 |     # Encode the image data to base64
166 |     img_base64 = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
167 |     return "data:image/png;base64," + img_base64
168 | 
169 | 
170 | def numpy_2_base64(img: np.ndarray) -> str:
171 |     _, buffer = cv2.imencode(".png", img)
172 |     base64_image = base64.b64encode(buffer).decode("utf-8")
173 | 
174 |     return "data:image/png;base64," + base64_image
175 | 
176 | 
177 | def base64_2_numpy(base64_image: str) -> np.ndarray:
178 |     # Remove the data URL prefix if present
179 |     if base64_image.startswith('data:image'):
180 |         base64_image = base64_image.split(',')[1]
181 |     
182 |     # Decode base64 string to bytes
183 |     img_bytes = base64.b64decode(base64_image)
184 |     
185 |     # Convert bytes to numpy array
186 |     img_array = np.frombuffer(img_bytes, dtype=np.uint8)
187 |     
188 |     # Decode the image array
189 |     img = cv2.imdecode(img_array, cv2.IMREAD_UNCHANGED)
190 |     
191 |     return img
192 | 
193 | # 字节流转base64
194 | def bytes_2_base64(img_byte_arr: bytes) -> str:
195 |     base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
196 |     return "data:image/png;base64," + base64_image
197 | 
198 | 
199 | def save_numpy_image(numpy_img, file_path):
200 |     # 检查数组的形状
201 |     if numpy_img.shape[2] == 4:
202 |         # 将 BGR 转换为 RGB，并保留透明通道
203 |         rgb_img = np.concatenate(
204 |             (np.flip(numpy_img[:, :, :3], axis=-1), numpy_img[:, :, 3:]), axis=-1
205 |         ).astype(np.uint8)
206 |         img = Image.fromarray(rgb_img, mode="RGBA")
207 |     else:
208 |         # 将 BGR 转换为 RGB
209 |         rgb_img = np.flip(numpy_img, axis=-1).astype(np.uint8)
210 |         img = Image.fromarray(rgb_img, mode="RGB")
211 | 
212 |     img.save(file_path)
213 | 
214 | 
215 | def numpy_to_bytes(numpy_img):
216 |     img = Image.fromarray(numpy_img)
217 |     img_byte_arr = io.BytesIO()
218 |     img.save(img_byte_arr, format="PNG")
219 |     img_byte_arr.seek(0)
220 |     return img_byte_arr
221 | 
222 | 
223 | def hex_to_rgb(value):
224 |     value = value.lstrip("#")
225 |     length = len(value)
226 |     return tuple(
227 |         int(value[i : i + length // 3], 16) for i in range(0, length, length // 3)
228 |     )
229 | 
230 | 
231 | def generate_gradient(start_color, width, height, mode="updown"):
232 |     # 定义背景颜色
233 |     end_color = (255, 255, 255)  # 白色
234 | 
235 |     # 创建一个空白图像
236 |     r_out = np.zeros((height, width), dtype=int)
237 |     g_out = np.zeros((height, width), dtype=int)
238 |     b_out = np.zeros((height, width), dtype=int)
239 | 
240 |     if mode == "updown":
241 |         # 生成上下渐变色
242 |         for y in range(height):
243 |             r = int(
244 |                 (y / height) * end_color[0] + ((height - y) / height) * start_color[0]
245 |             )
246 |             g = int(
247 |                 (y / height) * end_color[1] + ((height - y) / height) * start_color[1]
248 |             )
249 |             b = int(
250 |                 (y / height) * end_color[2] + ((height - y) / height) * start_color[2]
251 |             )
252 |             r_out[y, :] = r
253 |             g_out[y, :] = g
254 |             b_out[y, :] = b
255 | 
256 |     else:
257 |         # 生成中心渐变色
258 |         img = np.zeros((height, width, 3))
259 |         # 定义椭圆中心和半径
260 |         center = (width // 2, height // 2)
261 |         end_axies = max(height, width)
262 |         # 定义渐变色
263 |         end_color = (255, 255, 255)
264 |         # 绘制椭圆
265 |         for y in range(end_axies):
266 |             axes = (end_axies - y, end_axies - y)
267 |             r = int(
268 |                 (y / end_axies) * end_color[0]
269 |                 + ((end_axies - y) / end_axies) * start_color[0]
270 |             )
271 |             g = int(
272 |                 (y / end_axies) * end_color[1]
273 |                 + ((end_axies - y) / end_axies) * start_color[1]
274 |             )
275 |             b = int(
276 |                 (y / end_axies) * end_color[2]
277 |                 + ((end_axies - y) / end_axies) * start_color[2]
278 |             )
279 | 
280 |             cv2.ellipse(img, center, axes, 0, 0, 360, (b, g, r), -1)
281 |         b_out, g_out, r_out = cv2.split(np.uint64(img))
282 | 
283 |     return r_out, g_out, b_out
284 | 
285 | 
286 | def add_background(input_image, bgr=(0, 0, 0), mode="pure_color"):
287 |     """
288 |     本函数的功能为为透明图像加上背景。
289 |     :param input_image: numpy.array(4 channels), 透明图像
290 |     :param bgr: tuple, 合成纯色底时的 BGR 值
291 |     :param new_background: numpy.array(3 channels)，合成自定义图像底时的背景图
292 |     :return: output: 合成好的输出图像
293 |     """
294 |     height, width = input_image.shape[0], input_image.shape[1]
295 |     try:
296 |         b, g, r, a = cv2.split(input_image)
297 |     except ValueError:
298 |         raise ValueError(
299 |             "The input image must have 4 channels. 输入图像必须有4个通道，即透明图像。"
300 |         )
301 | 
302 |     a_cal = a / 255
303 |     if mode == "pure_color":
304 |         # 纯色填充
305 |         b2 = np.full([height, width], bgr[0], dtype=int)
306 |         g2 = np.full([height, width], bgr[1], dtype=int)
307 |         r2 = np.full([height, width], bgr[2], dtype=int)
308 |     elif mode == "updown_gradient":
309 |         b2, g2, r2 = generate_gradient(bgr, width, height, mode="updown")
310 |     else:
311 |         b2, g2, r2 = generate_gradient(bgr, width, height, mode="center")
312 | 
313 |     output = cv2.merge(
314 |         ((b - b2) * a_cal + b2, (g - g2) * a_cal + g2, (r - r2) * a_cal + r2)
315 |     )
316 | 
317 |     return output
318 | 
319 | def add_background_with_image(input_image: np.ndarray, background_image: np.ndarray) -> np.ndarray:
320 |     """
321 |     本函数的功能为为透明图像加上背景。
322 |     :param input_image: numpy.array(4 channels), 透明图像
323 |     :param background_image: numpy.array(3 channels), 背景图像
324 |     :return: output: 合成好的输出图像
325 |     """
326 |     height, width = input_image.shape[:2]
327 |     try:
328 |         b, g, r, a = cv2.split(input_image)
329 |     except ValueError:
330 |         raise ValueError(
331 |             "The input image must have 4 channels. 输入图像必须有4个通道，即透明图像。"
332 |         )
333 | 
334 |     # 确保背景图像与输入图像大小一致
335 |     background_image = cv2.resize(background_image, (width, height), cv2.INTER_AREA)
336 |     background_image = cv2.cvtColor(background_image, cv2.COLOR_BGR2RGB)
337 |     b2, g2, r2 = cv2.split(background_image)
338 | 
339 |     a_cal = a / 255.0
340 | 
341 |     # 修正混合公式
342 |     output = cv2.merge(
343 |         (b * a_cal + b2 * (1 - a_cal),
344 |          g * a_cal + g2 * (1 - a_cal),
345 |          r * a_cal + r2 * (1 - a_cal))
346 |     )
347 | 
348 |     return output.astype(np.uint8)
349 | 
350 | def add_watermark(
351 |     image, text, size=50, opacity=0.5, angle=45, color="#8B8B1B", space=75
352 | ):
353 |     image = Image.fromarray(image)
354 |     watermarker = Watermarker(
355 |         input_image=image,
356 |         text=text,
357 |         style=WatermarkerStyles.STRIPED,
358 |         angle=angle,
359 |         color=color,
360 |         opacity=opacity,
361 |         size=size,
362 |         space=space,
363 |     )
364 |     return np.array(watermarker.image.convert("RGB"))
365 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import argparse
  4 | import numpy as np
  5 | from hivision.error import FaceError
  6 | from hivision.utils import hex_to_rgb, resize_image_to_kb, add_background, save_image_dpi_to_bytes
  7 | from hivision import IDCreator
  8 | from hivision.creator.layout_calculator import (
  9 |     generate_layout_array,
 10 |     generate_layout_image,
 11 | )
 12 | from hivision.creator.choose_handler import choose_handler
 13 | from hivision.utils import hex_to_rgb, resize_image_to_kb
 14 | 
 15 | 
 16 | INFERENCE_TYPE = [
 17 |     "idphoto",
 18 |     "human_matting",
 19 |     "add_background",
 20 |     "generate_layout_photos",
 21 |     "idphoto_crop",
 22 | ]
 23 | MATTING_MODEL = [
 24 |     "hivision_modnet",
 25 |     "modnet_photographic_portrait_matting",
 26 |     "mnn_hivision_modnet",
 27 |     "rmbg-1.4",
 28 |     "birefnet-v1-lite",
 29 | ]
 30 | FACE_DETECT_MODEL = [
 31 |     "mtcnn",
 32 |     "face_plusplus",
 33 |     "retinaface-resnet50",
 34 | ]
 35 | RENDER = [0, 1, 2]
 36 | 
 37 | parser = argparse.ArgumentParser(description="HivisionIDPhotos 证件照制作推理程序。")
 38 | parser.add_argument(
 39 |     "-t",
 40 |     "--type",
 41 |     help="请求 API 的种类",
 42 |     choices=INFERENCE_TYPE,
 43 |     default="idphoto",
 44 | )
 45 | parser.add_argument("-i", "--input_image_dir", help="输入图像路径", required=True)
 46 | parser.add_argument("-o", "--output_image_dir", help="保存图像路径", required=True)
 47 | parser.add_argument("--height", help="证件照尺寸-高", default=413)
 48 | parser.add_argument("--width", help="证件照尺寸-宽", default=295)
 49 | parser.add_argument("-c", "--color", help="证件照背景色", default="638cce")
 50 | parser.add_argument("--hd", type=bool, help="是否输出高清照", default=True)
 51 | parser.add_argument(
 52 |     "-k", "--kb", help="输出照片的 KB 值，仅对换底和制作排版照生效", default=None
 53 | )
 54 | parser.add_argument(
 55 |     "-r",
 56 |     "--render",
 57 |     type=int,
 58 |     help="底色合成的模式，有 0:纯色、1:上下渐变、2:中心渐变 可选",
 59 |     choices=RENDER,
 60 |     default=0,
 61 | )
 62 | parser.add_argument(
 63 |     "--dpi",
 64 |     type=int,
 65 |     help="输出照片的 DPI 值",
 66 |     default=300,
 67 | )
 68 | parser.add_argument(
 69 |     "--face_align",
 70 |     type=bool,
 71 |     help="是否进行人脸旋转矫正",
 72 |     default=False,
 73 | )
 74 | parser.add_argument(
 75 |     "--matting_model",
 76 |     help="抠图模型权重",
 77 |     default="modnet_photographic_portrait_matting",
 78 |     choices=MATTING_MODEL,
 79 | )
 80 | parser.add_argument(
 81 |     "--face_detect_model",
 82 |     help="人脸检测模型",
 83 |     default="mtcnn",
 84 |     choices=FACE_DETECT_MODEL,
 85 | )
 86 | 
 87 | args = parser.parse_args()
 88 | 
 89 | # ------------------- 选择抠图与人脸检测模型 -------------------
 90 | creator = IDCreator()
 91 | choose_handler(creator, args.matting_model, args.face_detect_model)
 92 | 
 93 | root_dir = os.path.dirname(os.path.abspath(__file__))
 94 | input_image = cv2.imread(args.input_image_dir, cv2.IMREAD_UNCHANGED)
 95 | 
 96 | # 如果模式是生成证件照
 97 | if args.type == "idphoto":
 98 |     # 将字符串转为元组
 99 |     size = (int(args.height), int(args.width))
100 |     try:
101 |         result = creator(input_image, size=size, face_alignment=args.face_align)
102 |     except FaceError:
103 |         print("人脸数量不等于 1，请上传单张人脸的图像。")
104 |     else:
105 |         # 保存标准照
106 |         save_image_dpi_to_bytes(cv2.cvtColor(result.standard, cv2.COLOR_RGBA2BGRA), args.output_image_dir, dpi=args.dpi)
107 | 
108 |         # 保存高清照
109 |         file_name, file_extension = os.path.splitext(args.output_image_dir)
110 |         new_file_name = file_name + "_hd" + file_extension
111 |         save_image_dpi_to_bytes(cv2.cvtColor(result.hd, cv2.COLOR_RGBA2BGRA), new_file_name, dpi=args.dpi)
112 | 
113 | # 如果模式是人像抠图
114 | elif args.type == "human_matting":
115 |     result = creator(input_image, change_bg_only=True)
116 |     cv2.imwrite(args.output_image_dir, result.hd)
117 | 
118 | # 如果模式是添加背景
119 | elif args.type == "add_background":
120 | 
121 |     render_choice = ["pure_color", "updown_gradient", "center_gradient"]
122 | 
123 |     # 将字符串转为元组
124 |     color = hex_to_rgb(args.color)
125 |     # 将元祖的 0 和 2 号数字交换
126 |     color = (color[2], color[1], color[0])
127 | 
128 |     result_image = add_background(
129 |         input_image, bgr=color, mode=render_choice[args.render]
130 |     )
131 |     result_image = result_image.astype(np.uint8)
132 |     result_image = cv2.cvtColor(result_image, cv2.COLOR_RGBA2BGRA)
133 |     
134 |     if args.kb:
135 |         resize_image_to_kb(result_image, args.output_image_dir, int(args.kb), dpi=args.dpi)
136 |     else:
137 |         save_image_dpi_to_bytes(cv2.cvtColor(result_image, cv2.COLOR_RGBA2BGRA), args.output_image_dir, dpi=args.dpi)
138 | 
139 | # 如果模式是生成排版照
140 | elif args.type == "generate_layout_photos":
141 | 
142 |     size = (int(args.height), int(args.width))
143 | 
144 |     typography_arr, typography_rotate = generate_layout_array(
145 |         input_height=size[0], input_width=size[1]
146 |     )
147 | 
148 |     result_layout_image = generate_layout_image(
149 |         input_image,
150 |         typography_arr,
151 |         typography_rotate,
152 |         height=size[0],
153 |         width=size[1],
154 |     )
155 | 
156 |     if args.kb:
157 |         result_layout_image = cv2.cvtColor(result_layout_image, cv2.COLOR_RGB2BGR)
158 |         result_layout_image = resize_image_to_kb(
159 |             result_layout_image, args.output_image_dir, int(args.kb), dpi=args.dpi
160 |         )
161 |     else:
162 |         save_image_dpi_to_bytes(cv2.cvtColor(result_layout_image, cv2.COLOR_RGBA2BGRA), args.output_image_dir, dpi=args.dpi)
163 | 
164 | # 如果模式是证件照裁切
165 | elif args.type == "idphoto_crop":
166 |     # 将字符串转为元组
167 |     size = (int(args.height), int(args.width))
168 |     try:
169 |         result = creator(input_image, size=size, crop_only=True)
170 |     except FaceError:
171 |         print("人脸数量不等于 1，请上传单张人脸的图像。")
172 |     else:
173 |         # 保存标准照
174 |         save_image_dpi_to_bytes(cv2.cvtColor(result.standard, cv2.COLOR_RGBA2BGRA), args.output_image_dir, dpi=args.dpi)
175 | 
176 |         # 保存高清照
177 |         file_name, file_extension = os.path.splitext(args.output_image_dir)
178 |         new_file_name = file_name + "_hd" + file_extension
179 |         save_image_dpi_to_bytes(cv2.cvtColor(result.hd, cv2.COLOR_RGBA2BGRA), new_file_name, dpi=args.dpi)


--------------------------------------------------------------------------------
/requirements-app.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.43.0
2 | fastapi


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python>=4.8.1.78
2 | onnxruntime>=1.15.0
3 | numpy<=1.26.4
4 | requests
5 | mtcnn-runtime
6 | tqdm
7 | starlette
8 | 


--------------------------------------------------------------------------------
/scripts/build_pypi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | r"""
 4 | @DATE: 2024/9/5 16:56
 5 | @File: build_pypi.py
 6 | @IDE: pycharm
 7 | @Description:
 8 |     构建pypi包
 9 | """
10 | 


--------------------------------------------------------------------------------
/scripts/download_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import requests
  3 | import argparse
  4 | from tqdm import tqdm  # 导入 tqdm 库
  5 | 
  6 | # 获取当前脚本所在目录的上一级目录
  7 | base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  8 | 
  9 | 
 10 | def download_file(url, save_path):
 11 |     try:
 12 |         print(f"Begin downloading: {url}")
 13 |         response = requests.get(url, stream=True)
 14 |         response.raise_for_status()  # 检查请求是否成功
 15 | 
 16 |         # 获取文件总大小
 17 |         total_size = int(response.headers.get("content-length", 0))
 18 |         # 使用 tqdm 显示进度条
 19 |         with open(save_path, "wb") as file, tqdm(
 20 |             total=total_size,
 21 |             unit="B",
 22 |             unit_scale=True,
 23 |             unit_divisor=1024,
 24 |             desc=os.path.basename(save_path),
 25 |         ) as bar:
 26 |             for chunk in response.iter_content(chunk_size=8192):
 27 |                 file.write(chunk)
 28 |                 bar.update(len(chunk))  # 更新进度条
 29 |         print(f"Download completed. Save to: {save_path}")
 30 |     except requests.exceptions.RequestException as e:
 31 |         print(f"Download failed: {e}")
 32 | 
 33 | 
 34 | def download_models(model_urls):
 35 |     # 下载每个模型
 36 |     for model_name, model_info in model_urls.items():
 37 |         # 指定下载保存的目录
 38 |         save_dir = model_info["location"]
 39 | 
 40 |         # 创建目录（如果不存在的话）
 41 |         os.makedirs(os.path.join(base_path, save_dir), exist_ok=True)
 42 | 
 43 |         url = model_info["url"]
 44 |         file_format = model_info["format"]
 45 | 
 46 |         # 特殊处理 rmbg-1.4 模型的文件名
 47 |         file_name = f"{model_name}.{file_format}"
 48 | 
 49 |         save_path = os.path.join(base_path, save_dir, file_name)
 50 | 
 51 |         # 检查文件是否已经存在
 52 |         if os.path.exists(save_path):
 53 |             print(f"File already exists, skipping download: {save_path}")
 54 |             continue
 55 | 
 56 |         # 下载文件
 57 |         download_file(url, save_path)
 58 | 
 59 | 
 60 | def main(models_to_download):
 61 |     # 模型权重的下载链接
 62 |     model_urls = {
 63 |         "hivision_modnet": {
 64 |             "url": "https://github.com/Zeyi-Lin/HivisionIDPhotos/releases/download/pretrained-model/hivision_modnet.onnx",
 65 |             "format": "onnx",
 66 |             "location": "hivision/creator/weights",
 67 |         },
 68 |         "modnet_photographic_portrait_matting": {
 69 |             "url": "https://github.com/Zeyi-Lin/HivisionIDPhotos/releases/download/pretrained-model/modnet_photographic_portrait_matting.onnx",
 70 |             "format": "onnx",
 71 |             "location": "hivision/creator/weights",
 72 |         },
 73 |         # "mnn_hivision_modnet": {
 74 |         #     "url": "https://github.com/Zeyi-Lin/HivisionIDPhotos/releases/download/pretrained-model/mnn_hivision_modnet.mnn",
 75 |         #     "format": "mnn",
 76 |         # },
 77 |         "rmbg-1.4": {
 78 |             "url": "https://huggingface.co/briaai/RMBG-1.4/resolve/main/onnx/model.onnx?download=true",
 79 |             "format": "onnx",
 80 |             "location": "hivision/creator/weights",
 81 |         },
 82 |         "birefnet-v1-lite": {
 83 |             "url": "https://github.com/ZhengPeng7/BiRefNet/releases/download/v1/BiRefNet-general-bb_swin_v1_tiny-epoch_232.onnx",
 84 |             "format": "onnx",
 85 |             "location": "hivision/creator/weights",
 86 |         },
 87 |         "retinaface-resnet50": {
 88 |             "url": "https://github.com/Zeyi-Lin/HivisionIDPhotos/releases/download/pretrained-model/retinaface-resnet50.onnx",
 89 |             "format": "onnx",
 90 |             "location": "hivision/creator/retinaface/weights",
 91 |         },
 92 |     }
 93 | 
 94 |     # 如果选择下载所有模型
 95 |     if "all" in models_to_download:
 96 |         selected_urls = model_urls
 97 |     else:
 98 |         selected_urls = {model: model_urls[model] for model in models_to_download}
 99 | 
100 |     if not selected_urls:
101 |         print("No valid models selected for download.")
102 |         return
103 | 
104 |     download_models(selected_urls)
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     MODEL_CHOICES = [
109 |         "hivision_modnet",
110 |         "modnet_photographic_portrait_matting",
111 |         # "mnn_hivision_modnet",
112 |         "rmbg-1.4",
113 |         "birefnet-lite",
114 |         "all",
115 |     ]
116 | 
117 |     parser = argparse.ArgumentParser(description="Download matting models.")
118 |     parser.add_argument(
119 |         "--models",
120 |         nargs="+",
121 |         required=True,
122 |         choices=MODEL_CHOICES,
123 |         help='Specify which models to download (options: hivision_modnet, modnet_photographic_portrait_matting, mnn_hivision_modnet, rmbg-1.4, all). Only "all" will download all models.',
124 |     )
125 |     args = parser.parse_args()
126 | 
127 |     models_to_download = args.models if args.models else ["all"]
128 |     main(models_to_download)
129 | 


--------------------------------------------------------------------------------
/test/create_id_photo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | r"""
 4 | @DATE: 2024/9/5 21:39
 5 | @File: create_id_photo.py
 6 | @IDE: pycharm
 7 | @Description:
 8 |     用于测试创建证件照
 9 | """
10 | from hivision.creator import IDCreator
11 | import cv2
12 | import os
13 | 
14 | now_dir = os.path.dirname(__file__)
15 | image_path = os.path.join(os.path.dirname(now_dir), "app", "images", "test.jpg")
16 | output_dir = os.path.join(now_dir, "temp")
17 | 
18 | image = cv2.imread(image_path)
19 | creator = IDCreator()
20 | result = creator(image)
21 | cv2.imwrite(os.path.join(output_dir, "result.png"), result.standard)
22 | cv2.imwrite(os.path.join(output_dir, "result_hd.png"), result.hd)
23 | 


--------------------------------------------------------------------------------
/test/temp/.gitkeep:
--------------------------------------------------------------------------------
1 | 存放一些测试临时文件


--------------------------------------------------------------------------------