├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── build_image.sh ├── config.json ├── docs ├── ppt_input_format.md └── proxy.md ├── images ├── build_docker_image.png ├── chatppt_presentation_demo.jpg ├── chatppt_presentation_demo.png ├── forecast.png └── performance_chart.png ├── inputs ├── docx │ └── multimodal_llm_overview.docx └── markdown │ ├── GitHubSentinel_intro.md │ ├── openai_canvas_intro.md │ └── test_input.md ├── jupyter ├── image_advisor.ipynb └── pptx_quickstart.ipynb ├── outputs └── .gitkeep ├── prompts ├── chatbot.txt ├── content_assistant.txt ├── content_formatter.txt └── image_advisor.txt ├── requirements.txt ├── src ├── chat_history.py ├── chatbot.py ├── config.py ├── content_assistant.py ├── content_formatter.py ├── data_structures.py ├── docx_parser.py ├── gradio_server.py ├── image_advisor.py ├── input_parser.py ├── layout_manager.py ├── logger.py ├── main.py ├── merge_requirements.py ├── minicpm_v_model.py ├── openai_whisper.py ├── ppt_generator.py ├── slide_builder.py ├── template_manager.py └── utils.py ├── templates ├── MasterTemplate.pptx └── SimpleTemplate.pptx ├── tests ├── test_data_structures.py ├── test_doc_parser.py ├── test_input_parser.py ├── test_layout_manager.py ├── test_ppt_generator.py └── test_slide_builder.py └── validate_tests.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | 165 | # ChatPPT custom config 166 | outputs/*.pptx 167 | test/* 168 | .DS_STore 169 | jupyter/*.pptx 170 | .gradio/* 171 | nohup.out 172 | images/* 173 | test_results.txt -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用 Python 3.10 slim 作为基础镜像 2 | FROM python:3.10-slim 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | # 复制并安装项目依赖 8 | COPY requirements.txt . 9 | RUN pip install --no-cache-dir -r requirements.txt 10 | 11 | # 复制项目文件到容器 12 | COPY . . 13 | 14 | # 赋予验证脚本执行权限 15 | RUN chmod +x validate_tests.sh 16 | 17 | # 设置环境变量,以便在运行时可以传入实际的 API Key 18 | ENV LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY} 19 | ENV OPENAI_API_KEY=${OPENAI_API_KEY} 20 | 21 | # 在构建过程中运行单元测试 22 | RUN ./validate_tests.sh 23 | 24 | # 设置容器的入口点,默认运行 ChatPPT Gradio Server 25 | CMD ["python", "src/gradio_server.py"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ChatPPT 2 | 3 | ## 目录 4 | 5 | - [主要功能](#主要功能) 6 | - [产品演示](#产品演示) 7 | - [快速开始](#快速开始) 8 | - [1. 安装依赖](#1-安装依赖) 9 | - [2. 配置应用](#2-配置应用) 10 | - [3. 如何运行](#3-如何运行) 11 | - [A. 作为 Gradio 服务运行](#a-作为-gradio-服务运行) 12 | - [B. 命令行方式运行](#b-命令行方式运行) 13 | - [使用 Docker 部署服务](#使用-docker-部署服务) 14 | - [1. 运行 Docker 容器](#1-运行-docker-容器) 15 | - [2. 配置环境变量](#2-配置环境变量) 16 | - [PowerPoint 母版布局命名规范](#powerpoint-母版布局命名规范) 17 | - [单元测试](#单元测试) 18 | - [单元测试和验证脚本 `validate_tests.sh`](#单元测试和验证脚本-validate_testssh) 19 | - [用途](#用途) 20 | - [功能](#功能) 21 | - [使用 Docker 构建与验证](#使用-docker-构建与验证) 22 | - [1. `Dockerfile`](#1-dockerfile) 23 | - [用途](#用途) 24 | - [关键步骤](#关键步骤) 25 | - [2. `build_image.sh`](#2-build_imagesh) 26 | - [用途](#用途) 27 | - [功能](#功能) 28 | - [贡献](#贡献) 29 | - [许可证](#许可证) 30 | - [联系](#联系) 31 | 32 | ChatPPT 是一个基于多模态 AI 技术的智能助手,旨在提升企业办公自动化流程的效率。它能够处理语音、图像和文本等多种输入形式,通过精确的提示工程和强大的自然语言处理能力,为用户生成高质量的 PowerPoint 演示文稿。ChatPPT 不仅简化了信息收集和内容创作过程,还通过自动化的报告生成和分析功能,帮助企业快速、准确地完成各类汇报和展示任务,从而显著提升工作效率和业务价值。 33 | 34 | ### 主要功能 35 | 36 | - **多模态输入支持**:支持语音、图像、文本等多种输入形式,灵活适应用户的使用需求。 37 | - **自动生成演示文稿**:基于输入内容,自动生成结构化的 PowerPoint 演示文稿,支持多种布局和模板。 38 | - **语音识别和文本转换**:自动将语音输入转化为文本,进行内容处理和文稿生成,降低用户的操作成本。 39 | - **图像处理与嵌入**:支持将用户上传的图片自动嵌入演示文稿中,并根据内容智能选择合适的布局。 40 | - **多语言支持**:结合 OpenAI 模型和其他语言模型,支持中英文等多语言的演示文稿生成和报告输出。 41 | - **可视化界面**:通过 Gradio 实现简洁易用的图形化界面,让用户无需复杂配置即可快速生成演示文稿。 42 | 43 | ### 产品演示 44 | 45 | https://github.com/user-attachments/assets/37d32bec-928e-4961-98b3-189ce15ead2e 46 | 47 | 48 | **自动生成的演示文稿内容** 49 | 50 | ![chatppt_presentation_demo](images/chatppt_presentation_demo.png) 51 | 52 | ## 快速开始 53 | 54 | ### 1. 安装依赖 55 | 56 | 首先,安装所需的依赖项: 57 | 58 | ```sh 59 | pip install -r requirements.txt 60 | ``` 61 | 62 | ### 2. 配置应用 63 | 64 | 编辑 `config.json` 文件,以设置输入模式、默认模板(PPT 母版)以及 ChatBot Prompt,确保模板文件 `SimpleTemplate.pptx` 中的母版布局名称符合[PowerPoint 母版布局命名规范](#powerpoint-母版布局命名规范)。 65 | 66 | 67 | ```json 68 | { 69 | "input_mode": "text", 70 | "chatbot_prompt": "prompts/chatbot.txt", 71 | "content_formatter_prompt": "prompts/content_formatter.txt", 72 | "content_assistant_prompt": "prompts/content_assistant.txt", 73 | "image_advisor_prompt": "prompts/image_advisor.txt", 74 | "ppt_template": "templates/SimpleTemplate.pptx" 75 | } 76 | ``` 77 | 78 | ### 3. 如何运行 79 | 80 | 作为生产服务发布,ChatPPT 还需要配置域名,SSL 证书和反向代理,详见文档:**[域名和反向代理设置说明文档](docs/proxy.md)** 81 | 82 | #### A. 作为 Gradio 服务运行 83 | 84 | 要使用 Gradio 界面运行应用,允许用户通过 Web 界面与该工具交互: 85 | 86 | ```sh 87 | python src/gradio_server.py 88 | ``` 89 | 90 | #### B. 命令行方式运行 91 | 92 | 您可以通过命令行模式运行 ChatPPT: 93 | 94 | ```sh 95 | python src/main.py test_input.md 96 | ``` 97 | 98 | 通过此模式,您可以手动提供 PowerPoint 文件内容(格式请参考:[ChatPPT 输入文本格式说明](docs/ppt_input_format.md)),并按照配置的 [PowerPoint 模板](templates/MasterTemplate.pptx),生成演示文稿。 99 | 100 | ## 使用 Docker 部署服务 101 | 102 | ChatPPT 提供了 Docker 支持,以便在隔离环境中运行。以下是使用 Docker 运行的步骤。 103 | 104 | ### 1. 运行 Docker 容器 105 | 106 | 使用以下命令运行 ChatPPT 指定版本(如:v0.7)Docker 容器服务。关于如何 [使用 Docker 构建与验证](#使用-docker-构建与验证)。 107 | 108 | ```sh 109 | docker run -it -p 7860:7860 -e LANGCHAIN_API_KEY=$LANGCHAIN_API_KEY -e OPENAI_API_KEY=$OPENAI_API_KEY -v $(pwd)/outputs:/app/outputs chatppt:v0.7 110 | 111 | ``` 112 | 113 | ### 2. 参数说明 114 | 115 | 在运行容器时,可以通过环境变量传入`LANGCHAIN_API_KEY` 和 `OPENAI_API_KEY`,例如: 116 | 117 | ```sh 118 | -e LANGCHAIN_API_KEY=$LANGCHAIN_API_KEY -e OPENAI_API_KEY=$OPENAI_API_KEY 119 | ``` 120 | 121 | 将本地的 `outputs` 文件夹挂载到容器内的 `/app/outputs`,便于访问生成的文件。 122 | 123 | ```sh 124 | -v $(pwd)/outputs:/app/outputs` 125 | ``` 126 | 127 | 128 | ## PowerPoint 母版布局命名规范 129 | 130 | 为确保 ChatPPT 能正确匹配布局,PowerPoint 母版文件 ([PowerPoint 模板](templates/MasterTemplate.pptx)) 中的布局名称应遵循以下命名规范: 131 | 132 | - 布局名称应以 `{type}, {type}, {type} {No.}` 的形式命名,其中 `{type}` 是内容类型,如 `Title`, `Content`, `Picture`,例如: 133 | - `Title 0` 134 | - `Title, Content 1` 135 | - `Title, Picture 5` 136 | - `Title, Content, Picture 2` 137 | 138 | - 布局名称的顺序和数量必须与输入内容一致(例如,具有标题、要点和图片的幻灯片应映射到 `Title, Content, Picture` 布局)。 139 | - 布局后缀 `{No.}` 用于表示同类布局中的不同编号,以支持多种同类布局。 140 | 141 | 该规范确保布局匹配的灵活性,同时支持多种不同内容的组合和扩展。 142 | 143 | ## 单元测试 144 | 145 | ### 单元测试和验证脚本 `validate_tests.sh` 146 | 147 | #### 用途 148 | `validate_tests.sh` 是一个用于运行单元测试并验证结果的 Shell 脚本。它会在 Docker 镜像构建过程中执行,以确保代码的正确性和稳定性。 149 | 150 | #### 功能 151 | - 脚本运行所有单元测试,并将结果输出到 `test_results.txt` 文件中。 152 | - 如果测试失败,脚本会输出测试结果,并导致 Docker 构建失败,确保未通过测试的代码不会进入生产环境。 153 | - 如果所有测试通过,脚本会继续进行 Docker 镜像的构建。 154 | 155 | ## 使用 Docker 构建与验证 156 | 157 | 为了便于在各种环境中构建和部署 ChatPPT 项目,我们提供了 Docker 支持。该支持包括以下文件和功能: 158 | 159 | ### 1. `Dockerfile` 160 | 161 | #### 用途 162 | `Dockerfile` 是用于定义 ChatPPT 项目 Docker 镜像构建过程的配置文件。它描述了构建步骤,包括安装依赖、复制项目文件、运行单元测试等。 163 | 164 | #### 关键步骤 165 | - 使用 `python:3.10-slim` 作为基础镜像,并设置工作目录为 `/app`。 166 | - 复制项目的 `requirements.txt` 文件,并安装所有 Python 依赖。 167 | - 复制项目的所有文件到容器中,并赋予 `validate_tests.sh` 脚本执行权限。 168 | - 在构建过程中执行 `validate_tests.sh` 脚本,以确保所有单元测试通过。如果测试失败,构建过程将中止。 169 | - 构建成功后,将默认运行 `src/main.py` 作为容器的入口 170 | 171 | 点,以启动 ChatPPT 服务。 172 | 173 | ### 2. `build_image.sh` 174 | 175 | #### 用途 176 | `build_image.sh` 是一个自动构建 Docker 镜像的 Shell 脚本。它从当前的 Git 分支中获取分支名称,并将其用作 Docker 镜像的标签,便于在不同开发分支上生成不同的 Docker 镜像。 177 | 178 | #### 功能 179 | - 获取当前 Git 分支名称,并将其用作 Docker 镜像的标签,以便追踪不同开发分支的版本。 180 | - 使用 `docker build` 命令构建 Docker 镜像,并使用当前 Git 分支名称作为标签。 181 | 182 | #### 使用示例 183 | ```bash 184 | ./build_image.sh 185 | ``` 186 | 187 | ![build_docker_image](images/build_docker_image.png) 188 | 189 | 通过这些脚本和配置文件,ChatPPT 项目可以在不同的开发分支中确保构建的 Docker 镜像基于通过单元测试的代码,从而提高了代码质量和部署的可靠性。 190 | 191 | ### 贡献 192 | 193 | 我们欢迎所有的贡献!如果你有任何建议或功能请求,请先开启一个议题讨论。你的帮助将使 ChatPPT 变得更加完善。 194 | 195 | ### 许可证 196 | 197 | 该项目根据 **Apache 2.0** 许可证进行许可。详情请参见 [LICENSE](LICENSE) 文件。 198 | 199 | ### 联系 200 | 201 | 项目作者: Django Peng 202 | 203 | 项目链接: https://github.com/DjangoPeng/ChatPPT -------------------------------------------------------------------------------- /build_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 获取当前的 Git 分支名称 4 | BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) 5 | 6 | # 如果需要,可以处理分支名称,例如替换无效字符 7 | BRANCH_NAME=${BRANCH_NAME//\//-} 8 | 9 | # 使用 Git 分支名称作为 Docker 镜像的标签 10 | IMAGE_TAG="chatppt:${BRANCH_NAME}" 11 | 12 | # 构建 Docker 镜像 13 | docker build -t $IMAGE_TAG . 14 | 15 | # 输出构建结果 16 | echo "Docker 镜像已构建并打上标签: $IMAGE_TAG" -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_mode": "text", 3 | "chatbot_prompt": "prompts/chatbot.txt", 4 | "content_formatter_prompt": "prompts/content_formatter.txt", 5 | "content_assistant_prompt": "prompts/content_assistant.txt", 6 | "image_advisor_prompt": "prompts/image_advisor.txt", 7 | "ppt_template": "templates/SimpleTemplate.pptx" 8 | } -------------------------------------------------------------------------------- /docs/ppt_input_format.md: -------------------------------------------------------------------------------- 1 | ### **ChatPPT 输入文本格式说明** 2 | 3 | 此文档介绍如何编写输入文本以生成 PowerPoint 幻灯片。每个幻灯片包含标题、要点、图片等内容。输入文本将被解析并自动生成对应的 PowerPoint 文件,并且布局将由 `ChatPPT` 的 `LayoutManager` 统一管理,不需要手动输入布局名称。 4 | 5 | `LayoutManager` 根据输入内容(如标题、要点、图片)自动分配合适的布局,具体布局设置通过 `config.json` 文件配置,并确保模板 (`template.pptx`) 中的母版名称与配置一致。 6 | 7 | ### **ChatPPT 输入文本格式** 8 | 9 | ```plaintext 10 | # [主标题] 11 | 12 | ## [幻灯片标题] 13 | - [要点内容1] 14 | - [要点内容2] 15 | 16 | ## [幻灯片标题] 17 | - [要点内容1] 18 | ![图片描述](图片路径) 19 | ``` 20 | 21 | #### **1. 主标题** 22 | - 格式:`# [主标题]` 23 | - 说明:主标题作为整个 PowerPoint 的标题,同时也将作为生成的 PowerPoint 文件名。 24 | - 示例: 25 | ``` 26 | # 企业年度报告 27 | ``` 28 | 29 | #### **2. 幻灯片标题** 30 | - 格式:`## [幻灯片标题]` 31 | - 说明:每张幻灯片以 `##` 开头,后面跟随标题。布局将根据 `LayoutManager` 自动分配,无需手动指定布局名称。 32 | - 示例: 33 | ``` 34 | ## 2024 业绩概述 35 | ``` 36 | 37 | #### **3. 幻灯片内容 - 要点列表** 38 | - 格式:`- [要点内容]` 39 | - 说明:每个要点以 `-` 开头,后跟要点的内容。该格式用于生成幻灯片中的项目符号列表。 40 | - 示例: 41 | ``` 42 | - 总收入增长15% 43 | - 市场份额扩大至30% 44 | ``` 45 | 46 | #### **4. 幻灯片内容 - 图片** 47 | - 格式:`![图片描述](图片路径)` 48 | - 说明:使用 `![图片描述](图片路径)` 的格式插入图片。图片路径应该为相对路径或绝对路径,确保文件存在于指定路径下。 49 | - 示例: 50 | ``` 51 | ![业绩图表](images/performance_chart.png) 52 | ``` 53 | 54 | ### **完整输入文本示例** 55 | 56 | 以下是一个完整的输入文本示例,包含主标题、多个幻灯片、要点列表以及图片插入: 57 | 58 | ```plaintext 59 | # 企业年度报告 60 | 61 | ## 2024 业绩概述 62 | - 总收入增长15% 63 | - 市场份额扩大至30% 64 | 65 | ## 新产品发布 66 | - 产品A: 特色功能介绍 67 | - 产品B: 市场定位 68 | 69 | ## 业绩图表 70 | ![业绩图表](images/performance_chart.png) 71 | ``` 72 | 73 | ### **各部分说明** 74 | 75 | 1. **主标题**: 76 | - `# 企业年度报告`:该文本将用作生成的 PowerPoint 文件名,即 "企业年度报告.pptx"。 77 | 78 | 2. **幻灯片 1:2024 业绩概述**: 79 | - 标题:`2024 业绩概述`,布局将由 `LayoutManager` 自动分配。 80 | - 内容:包括两条要点,分别是 "总收入增长15%" 和 "市场份额扩大至30%"。 81 | 82 | 3. **幻灯片 2:新产品发布**: 83 | - 标题:`新产品发布`,布局将由 `LayoutManager` 自动分配。 84 | - 内容:包括两条要点,分别是 "产品A: 特色功能介绍" 和 "产品B: 市场定位"。 85 | 86 | 4. **幻灯片 3:业绩图表**: 87 | - 标题:`业绩图表`,布局将由 `LayoutManager` 自动分配。 88 | - 图片:插入路径为 `images/performance_chart.png` 的图片。 89 | 90 | ### **自动布局说明** 91 | 92 | 布局的分配不再需要在输入文本中手动指定,`ChatPPT` 的 `LayoutManager` 根据幻灯片的内容(如标题、要点、图片等)自动选择最合适的布局。布局设置在 `config.json` 中管理,模板 (`template.pptx`) 的布局名称必须与 `config.json` 中的名称匹配。 93 | 94 | #### **布局规则示例**: 95 | 96 | - 当幻灯片包含标题和多个要点时,自动使用 "Title and Content" 布局。 97 | - 当幻灯片包含标题和图片时,自动使用 "Title and Picture" 布局。 98 | - 当幻灯片同时包含标题、要点和图片时,自动使用 "Title, Content, and Picture" 布局。 99 | 100 | ### **配置布局** 101 | 102 | 所有布局映射在 `config.json` 中定义,确保模板文件 `template.pptx` 中的母版布局名称与 `config.json` 中的名称一致。例如: 103 | 104 | ```json 105 | { 106 | "layout_mapping": { 107 | "Title Only": 1, 108 | "Title and Content": 2, 109 | "Title and Picture": 3, 110 | "Title, Content, and Picture": 4 111 | } 112 | } 113 | ``` 114 | 115 | ### **注意事项** 116 | 117 | 1. **布局映射**:`config.json` 中的 `layout_mapping` 字段定义了内容到布局的映射。布局名称必须与模板中的母版布局名称相对应。 118 | 119 | 2. **图片路径**:图片路径应为本地文件系统的相对路径或绝对路径。确保图片文件存在于指定位置。 120 | 121 | 3. **占位符**:确保模板中的布局包含文本和图片占位符,以便自动插入要点和图片。 122 | 123 | ### **总结** 124 | 125 | 本指南提供了 `ChatPPT` 输入文本的标准格式说明。您只需提供主标题、幻灯片标题、要点和图片路径,无需指定布局名称,`LayoutManager` 将根据内容自动为您选择最合适的布局。 -------------------------------------------------------------------------------- /docs/proxy.md: -------------------------------------------------------------------------------- 1 | # 域名和反向代理设置说明文档 2 | 3 | 本文档为域名配置、SSL 证书生成、安装及 Nginx 反向代理设置提供详细指导,以便为网站启用 HTTPS 安全访问。 4 | 5 | --- 6 | 7 | ## 1. 域名 A 记录配置 8 | 9 | ### 1.1 登录域名提供商 10 | 11 | 1. 使用您的域名服务商(如 Hexonet、阿里云、腾讯云等)提供的管理界面。 12 | 2. 找到域名管理区域,选择您要配置的域名。 13 | 14 | ### 1.2 添加 A 记录 15 | 16 | 1. 进入 DNS 管理页面。 17 | 2. 创建一条新的 **A 记录**,详细配置如下: 18 | - **主机记录**:`@`(表示主域名)或 `subdomain`(如果使用子域名)。 19 | - **记录类型**:A 记录 20 | - **记录值**:您的服务器公网 IP 地址(例如 `123.45.67.89`)。 21 | - **TTL**:选择默认值,通常为 `600` 秒。 22 | 23 | 3. **保存配置**。等待 DNS 记录的传播,通常需要几分钟,但可能最长达 24 小时。 24 | 25 | --- 26 | 27 | ## 2. 安装并配置 SSL 证书 28 | 29 | ### 2.1 安装 Certbot 30 | 31 | Certbot 是 Let’s Encrypt 提供的免费 SSL 证书工具。首先在您的服务器上安装 Certbot 和 Nginx 插件: 32 | 33 | ```bash 34 | sudo apt update 35 | sudo apt install certbot python3-certbot-nginx -y 36 | ``` 37 | 38 | ### 2.2 生成 SSL 证书 39 | 40 | 运行以下命令,为域名(或子域名)生成 SSL 证书。以 `example.com` 为例: 41 | 42 | ```bash 43 | sudo certbot --nginx -d example.com 44 | ``` 45 | 46 | ### 2.3 配置自动重定向到 HTTPS 47 | 48 | Certbot 会询问您是否需要将 HTTP 重定向到 HTTPS。选择 `2` 进行自动重定向配置: 49 | 50 | ```plaintext 51 | 1: No redirect 52 | 2: Redirect - Make all requests redirect to secure HTTPS access. 53 | ``` 54 | 55 | ### 2.4 验证证书安装 56 | 57 | 在浏览器中访问 `https://example.com`,查看是否显示安全锁标志,表示证书安装成功。 58 | 59 | --- 60 | 61 | ## 3. Nginx 反向代理设置 62 | 63 | 以下是使用 Nginx 反向代理的详细配置过程,确保所有 HTTP 请求自动重定向到 HTTPS,并通过 SSL 加密的 Nginx 代理将请求转发到后端服务。 64 | 65 | ### 3.1 配置 Nginx 66 | 67 | 1. 编辑或创建 Nginx 配置文件(例如 `/etc/nginx/sites-available/example.com`): 68 | 69 | ```bash 70 | sudo nano /etc/nginx/sites-available/example.com 71 | ``` 72 | 73 | 2. 将以下内容添加到配置文件中: 74 | 75 | ```nginx 76 | # HTTP 到 HTTPS 的重定向 77 | server { 78 | listen 80; 79 | server_name example.com; 80 | 81 | # 重定向所有 HTTP 请求到 HTTPS 82 | return 301 https://$host$request_uri; 83 | } 84 | 85 | # HTTPS 服务器配置 86 | server { 87 | listen 443 ssl; 88 | server_name example.com; 89 | 90 | # SSL 证书路径 91 | ssl_certificate /etc/letsencrypt/live/example.com/fullchain.pem; # Certbot 自动生成 92 | ssl_certificate_key /etc/letsencrypt/live/example.com/privkey.pem; # Certbot 自动生成 93 | 94 | # 启用 TLS 协议 95 | ssl_protocols TLSv1.2 TLSv1.3; 96 | 97 | # 配置加密套件 98 | ssl_ciphers "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305"; 99 | ssl_prefer_server_ciphers off; 100 | 101 | # 其他 SSL 配置 102 | ssl_ecdh_curve X25519:P-256:P-384:P-521; 103 | ssl_session_cache shared:SSL:50m; 104 | ssl_session_timeout 10m; 105 | 106 | # 上传文件大小限制 107 | client_max_body_size 50M; 108 | 109 | location / { 110 | proxy_pass http://127.0.0.1:7860; # 后端服务地址,示例中使用本地 7860 端口 111 | proxy_http_version 1.1; 112 | 113 | # WebSocket 支持 114 | proxy_set_header Upgrade $http_upgrade; 115 | proxy_set_header Connection "upgrade"; 116 | 117 | # 转发客户端请求头 118 | proxy_set_header Host $host; 119 | proxy_set_header X-Real-IP $remote_addr; 120 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 121 | proxy_set_header X-Forwarded-Proto $scheme; 122 | } 123 | } 124 | ``` 125 | 126 | 3. **启用站点并重启 Nginx** 127 | 128 | 创建一个符号链接启用站点配置,并重启 Nginx: 129 | 130 | ```bash 131 | sudo ln -s /etc/nginx/sites-available/example.com /etc/nginx/sites-enabled/ 132 | sudo nginx -t # 测试配置是否正确 133 | sudo systemctl restart nginx # 重启 Nginx 134 | ``` 135 | 136 | ### 3.2 测试反向代理 137 | 138 | 1. 通过 `https://example.com` 访问网站。 139 | 2. 验证 HTTP 请求自动重定向到 HTTPS,页面显示安全锁标志,且内容正常加载。 140 | 141 | --- 142 | 143 | ## 4. 定期更新和维护 144 | 145 | Let’s Encrypt 证书有效期为 90 天,Certbot 会自动创建续订任务。您可以手动测试自动续订: 146 | 147 | ```bash 148 | sudo certbot renew --dry-run 149 | ``` 150 | 151 | --- 152 | 153 | ## 示例测试和故障排除 154 | 155 | - **SSL 测试**:访问 [SSL Labs](https://www.ssllabs.com/ssltest/),输入您的域名进行 SSL 配置检查。 156 | - **日志查看**:在 `/var/log/nginx/error.log` 中查找 Nginx 错误日志,帮助诊断 SSL、代理和连接问题。 157 | 158 | --- 159 | 160 | ### 注意事项 161 | 1. 确保您的域名 DNS 设置已正确生效,访问域名时指向服务器 IP。 162 | 2. 定期检查和维护 Nginx 与 Certbot 版本,以确保安全性和兼容性。 163 | 164 | --- 165 | 166 | 以上步骤完成后,您的域名将通过 Nginx 反向代理为后端服务提供 HTTPS 安全访问。 167 | 168 | 169 | ## 补充:Nginx 配置详细说明 170 | 171 | 以下是 Nginx 配置模板,将特定项替换为通用变量,并包含详细注释说明: 172 | 173 | ```nginx 174 | # 配置 HTTP 到 HTTPS 重定向 175 | server { 176 | listen 80; 177 | server_name example.com; # 将此替换为您的域名,例如 example.com 178 | 179 | # 将所有 HTTP 请求重定向到 HTTPS 180 | return 301 https://$host$request_uri; 181 | } 182 | 183 | server { 184 | listen 443 ssl; 185 | server_name example.com; # 将此替换为您的域名 186 | 187 | # 配置 SSL 证书路径(替换为您的证书路径) 188 | ssl_certificate /path/to/ssl/fullchain.pem; # 例如 /etc/letsencrypt/live/example.com/fullchain.pem 189 | ssl_certificate_key /path/to/ssl/privkey.pem; # 例如 /etc/letsencrypt/live/example.com/privkey.pem 190 | 191 | # 允许的 TLS 协议版本 192 | ssl_protocols TLSv1.2 TLSv1.3; 193 | 194 | # 兼容 TLS 1.3 的加密套件 195 | ssl_ciphers "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305"; 196 | ssl_prefer_server_ciphers off; 197 | 198 | # 其他 SSL 配置 199 | ssl_ecdh_curve X25519:P-256:P-384:P-521; 200 | ssl_session_cache shared:SSL:50m; 201 | ssl_session_timeout 10m; 202 | 203 | # 设置最大上传文件大小限制 204 | client_max_body_size 50M; 205 | 206 | # 代理到后端服务(例如 Gradio 的 HTTP 服务) 207 | location / { 208 | proxy_pass http://127.0.0.1:7860; # 将流量转发到本地运行在端口 7860 的 HTTP 服务 209 | proxy_http_version 1.1; 210 | 211 | # WebSocket 支持 212 | proxy_set_header Upgrade $http_upgrade; 213 | proxy_set_header Connection "upgrade"; 214 | 215 | # 转发客户端请求头 216 | proxy_set_header Host $host; 217 | proxy_set_header X-Real-IP $remote_addr; 218 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 219 | proxy_set_header X-Forwarded-Proto $scheme; 220 | } 221 | } 222 | ``` 223 | 224 | ### 配置项注释说明 225 | 226 | - **server_name**:设置服务器名称,通常为域名。将 `example.com` 替换为您自己的域名。 227 | - **ssl_certificate** 和 **ssl_certificate_key**:SSL 证书文件路径,指向 HTTPS 所需的证书和私钥文件。替换为实际证书路径。 228 | - **ssl_protocols**:允许的 TLS 协议版本。这里设置为只允许较新的 TLS 1.2 和 TLS 1.3。 229 | - **ssl_ciphers**:定义了服务器支持的加密套件,确保符合 TLS 1.3 要求并与常用浏览器兼容。 230 | - **ssl_prefer_server_ciphers**:设置为 `off`,让客户端选择其首选的加密套件。 231 | - **ssl_ecdh_curve**:定义服务器支持的椭圆曲线,用于 ECDH 密钥交换。 232 | - **client_max_body_size**:设置上传文件的大小限制,确保上传较大文件时不被拒绝。 233 | - **proxy_pass**:指向后端服务的 URL,在本例中是本地运行在 `127.0.0.1:7860` 的服务(如 Gradio)。 234 | - **proxy_set_header**:设置必要的请求头以支持 WebSocket 和客户端 IP 转发。 235 | 236 | ### 示例 237 | 238 | 假设域名为 `myapp.example.com`,证书路径位于 `/etc/letsencrypt/live/myapp.example.com/`,则配置为: 239 | 240 | ```nginx 241 | server { 242 | listen 80; 243 | server_name myapp.example.com; 244 | 245 | return 301 https://$host$request_uri; 246 | } 247 | 248 | server { 249 | listen 443 ssl; 250 | server_name myapp.example.com; 251 | 252 | ssl_certificate /etc/letsencrypt/live/myapp.example.com/fullchain.pem; 253 | ssl_certificate_key /etc/letsencrypt/live/myapp.example.com/privkey.pem; 254 | 255 | ssl_protocols TLSv1.2 TLSv1.3; 256 | ssl_ciphers "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305"; 257 | ssl_prefer_server_ciphers off; 258 | 259 | ssl_ecdh_curve X25519:P-256:P-384:P-521; 260 | ssl_session_cache shared:SSL:50m; 261 | ssl_session_timeout 10m; 262 | 263 | client_max_body_size 50M; 264 | 265 | location / { 266 | proxy_pass http://127.0.0.1:7860; 267 | proxy_http_version 1.1; 268 | proxy_set_header Upgrade $http_upgrade; 269 | proxy_set_header Connection "upgrade"; 270 | proxy_set_header Host $host; 271 | proxy_set_header X-Real-IP $remote_addr; 272 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 273 | proxy_set_header X-Forwarded-Proto $scheme; 274 | } 275 | } 276 | ``` -------------------------------------------------------------------------------- /images/build_docker_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/images/build_docker_image.png -------------------------------------------------------------------------------- /images/chatppt_presentation_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/images/chatppt_presentation_demo.jpg -------------------------------------------------------------------------------- /images/chatppt_presentation_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/images/chatppt_presentation_demo.png -------------------------------------------------------------------------------- /images/forecast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/images/forecast.png -------------------------------------------------------------------------------- /images/performance_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/images/performance_chart.png -------------------------------------------------------------------------------- /inputs/docx/multimodal_llm_overview.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/inputs/docx/multimodal_llm_overview.docx -------------------------------------------------------------------------------- /inputs/markdown/GitHubSentinel_intro.md: -------------------------------------------------------------------------------- 1 | # GitHub Sentinel 概述 2 | 3 | ## 什么是 GitHub Sentinel? 4 | - 专为大模型(LLMs)时代打造的智能信息检索和高价值内容挖掘 AI Agent 5 | - 目标用户:开源爱好者、个人开发者和投资人等 6 | 7 | ## 主要功能 8 | - **订阅管理**: 轻松管理和跟踪关注的 GitHub 仓库 9 | - **更新检索**: 自动汇总订阅仓库的最新动态 10 | 11 | ## 通知系统与报告生成 12 | - **通知系统**: 通过电子邮件等方式,实时通知项目最新进展 13 | - **报告生成**: 生成详细的项目进展报告,支持多种格式和模板 14 | 15 | ## 多模型支持 16 | - 结合 OpenAI 和 Ollama 模型 17 | - 生成自然语言项目报告,提供智能、精准的信息服务 18 | 19 | ## 定时任务与图形化界面 20 | - **定时任务**: 支持守护进程方式执行定时任务,确保信息及时获取 21 | - **图形化界面**: 基于 Gradio 实现易用的 GUI,降低使用门槛 22 | 23 | ## 容器化与持续集成 24 | - **容器化**: 支持 Docker 构建和容器化部署,便于不同环境下快速部署 25 | - **持续集成**: 完备的单元测试,支持生产级 CI/CD 流程,确保项目稳定性 26 | 27 | ## 扩展能力 28 | - 自动跟踪和分析 GitHub 开源项目动态 29 | - 可扩展到其他信息渠道,如 Hacker News 的热门话题 -------------------------------------------------------------------------------- /inputs/markdown/openai_canvas_intro.md: -------------------------------------------------------------------------------- 1 | # Introducing Canvas 2 | 3 | ## What is Canvas? 4 | - A new interface for working with ChatGPT on writing and coding projects 5 | - Opens in a separate window, allowing collaboration on a project 6 | 7 | ## Better Collaboration with ChatGPT 8 | - Limited by chat interface when working on editing and revisions 9 | - Canvas offers a new interface for this kind of work 10 | 11 | ## How Does Canvas Work? 12 | - You control the project, directly editing text or code 13 | - Menu of shortcuts to ask ChatGPT to adjust writing length, debug code, etc. 14 | - Can restore previous versions of your work using the back button in canvas 15 | 16 | ## Writing Shortcuts in Canvas 17 | - Suggest edits: Inline suggestions and feedback 18 | - Adjust the length: Edits document length to be shorter or longer 19 | - Change reading level: Adjusts reading level from Kindergarten to Graduate School 20 | - Add final polish: Checks for grammar, clarity, and consistency 21 | - Add emojis: Adds relevant emojis for emphasis and color 22 | 23 | ## Coding in Canvas 24 | - Easier to track and understand ChatGPT's changes 25 | - Planned improvements to transparency into these kinds of edits 26 | - Coding shortcuts: 27 | - Review code: Inline suggestions to improve your code 28 | - Add logs: Inserts print statements to help you debug and understand your code 29 | - Add comments: Adds comments to the code to make it easier to understand 30 | - Fix bugs: Detects and rewrites problematic code to resolve errors 31 | - Port to a language: Translates your code into JavaScript, TypeScript, Python, Java, C++, or PHP 32 | 33 | ## Training the Model for Collaboration 34 | - Trained GPT-4o to collaborate as a creative partner 35 | - Understands broader context to provide precise feedback and suggestions 36 | 37 | ## Core Behaviors of the Model 38 | - Triggering the canvas for writing and coding 39 | - Generating diverse content types 40 | - Making targeted edits 41 | - Rewriting documents 42 | - Providing inline critique 43 | 44 | ## Improving Canvas Decision Boundary 45 | - Improved correctly triggering the canvas decision boundary to 83% and 94% respectively for writing and coding tasks 46 | 47 | ## Canvas Edits Boundary - Writing & Coding 48 | - GPT-4o with canvas performs better than a baseline prompted GPT-4o by 18% 49 | 50 | ## Training the Model to Generate High-Quality Comments 51 | - Used human evaluations to assess comment quality and accuracy 52 | - Outperforms zero-shot GPT-4o with prompted instructions by 30% in accuracy and 16% in quality 53 | 54 | ## What's Next? 55 | - Rethinking how we interact with AI requires updates like Canvas 56 | - Canvas is in early beta, and rapid improvements are planned -------------------------------------------------------------------------------- /inputs/markdown/test_input.md: -------------------------------------------------------------------------------- 1 | # ChatPPT Demo 2 | 3 | ## 2024 业绩概述 4 | - 总收入增长15% 5 | - 市场份额扩大至30% 6 | 7 | ## 业绩图表 8 | - OpenAI 利润不断增加 9 | ![业绩图表](images/performance_chart.png) 10 | 11 | ## 新产品发布 12 | - 产品A: **特色功能介绍** 13 | - 增长潜力巨大 14 | - 新兴市场 15 | - **非洲**市场 16 | - **东南亚**市场 17 | - 产品B: 市场定位 18 | ![未来增长](images/forecast.png) -------------------------------------------------------------------------------- /jupyter/pptx_quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d808e8cb-c602-444c-a0a1-125306eb2746", 6 | "metadata": {}, 7 | "source": [ 8 | "# python-pptx 库快速入门\n", 9 | "\n", 10 | "`python-pptx` 是一个用于创建和修改 PowerPoint (.pptx) 文件的 Python 库。它允许用户通过代码动态生成演示文稿,适合自动化报告、演示和其他需要生成 PPT 的场景。\n", 11 | "\n", 12 | "以下是对 `python-pptx` 库的介绍以及如何将其抽象与 PowerPoint 母版中的内容、布局等概念对应起来的说明。\n", 13 | "\n", 14 | "### `python-pptx` 库简介\n", 15 | "\n", 16 | "- **安装**:\n", 17 | "\n", 18 | "```bash\n", 19 | " pip install python-pptx\n", 20 | "```\n", 21 | "\n", 22 | "- **功能**:\n", 23 | " - 创建新的 PowerPoint 文件。\n", 24 | " - 修改现有的 PPTX 文件。\n", 25 | " - 添加文本框、图片、图表、表格等各种元素。\n", 26 | " - 自定义幻灯片的布局和格式。\n", 27 | "\n", 28 | "### `python-pptx` 库抽象与母版中的概念对应\n", 29 | "\n", 30 | "在 `python-pptx` 中,可以将一些抽象概念与 PowerPoint 母版中的内容和布局对应起来:\n", 31 | "\n", 32 | "- **母版(Master Slide)**:\n", 33 | " - 在 `python-pptx` 中,可以通过 `presentation.slide_master` 来访问母版。母版包含了幻灯片的基本格式和样式,可以定义统一的外观。\n", 34 | "\n", 35 | "- **布局(Layouts)**:\n", 36 | " - 使用 `presentation.slide_layouts` 可以访问不同的幻灯片布局,例如标题幻灯片、内容幻灯片等。每种布局都有预定义的占位符,可以用于快速插入内容。\n", 37 | "\n", 38 | "- **内容类型(Content Types)**:\n", 39 | " - 对应于文本框、图片、图表等内容类型,可以使用 `add_textbox()`、`add_picture()`、`add_table()`、`add_chart()` 等方法来添加这些元素。\n", 40 | "\n", 41 | "---" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "a5b8b4b6-8934-4603-88e0-7fe39211df3f", 47 | "metadata": {}, 48 | "source": [ 49 | "## `python-pptx` 库核心 数据结构与方法\n", 50 | "\n", 51 | "在 `python-pptx` 库中,主要的数据结构和方法帮助用户创建和修改 PowerPoint 演示文稿。以下是对上述代码中使用的数据结构和方法的详细介绍,包括 `Presentation`、`SlideLayout`、`Slide`、`Shape`、`Placeholder` 和 `TextFrame` 等。\n", 52 | "\n", 53 | "### 1. `Presentation` 类\n", 54 | "\n", 55 | "- **概述**:\n", 56 | "\n", 57 | "`Presentation` 是 `python-pptx` 中的核心类,用于表示一个 PowerPoint 演示文稿。\n", 58 | "\n", 59 | "- **构造方法**:\n", 60 | "\n", 61 | "创建一个新的空演示文稿。\n", 62 | "\n", 63 | "```python\n", 64 | "presentation = Presentation()\n", 65 | "```\n", 66 | "\n", 67 | "如果要打开现有的 PPTX 文件,可以传递文件路径:\n", 68 | "\n", 69 | "```python\n", 70 | "presentation = Presentation(\"existing_file.pptx\")\n", 71 | "```\n", 72 | "\n", 73 | "保存 PPTX 文件:\n", 74 | "\n", 75 | "```python\n", 76 | "presentation.save(\"example_presentation.pptx\")\n", 77 | "```" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 1, 83 | "id": "006df3d4-36e7-455d-aa16-da5ef8da00e6", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "from pptx import Presentation\n", 88 | "from pptx.util import Inches" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 2, 94 | "id": "eb9e4ec4-f584-45b4-a60a-c366b78b3b67", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# 创建一个新的 PowerPoint 文件\n", 99 | "presentation = Presentation()\n", 100 | "\n", 101 | "# 保存 PPTX 文件\n", 102 | "presentation.save(\"empty_presentation.pptx\")" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 3, 108 | "id": "7b1e2e67-f6ae-4898-9d63-13386940b592", 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# 打开现有的 PPTX 文件,可以传递文件路径\n", 113 | "presentation = Presentation(\"../outputs/ChatPPT_Demo.pptx\")\n", 114 | "\n", 115 | "# 保存刚打开的 PPTX 文件\n", 116 | "presentation.save(\"ChatPPT_Demo.pptx\")" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "id": "b4a352b5-6eb9-4477-8cc6-b627b51e70c0", 122 | "metadata": {}, 123 | "source": [ 124 | "### 2. `SlideLayout` 布局\n", 125 | "\n", 126 | "`SlideLayout` 表示一个幻灯片的布局,包含一组预定义的占位符和格式设置,定义了幻灯片的基本结构。\n", 127 | "\n", 128 | "**获取布局**:\n", 129 | "\n", 130 | "`presentation.slide_layouts`:这是一个布局列表,包含了所有可用的幻灯片布局。\n", 131 | "\n", 132 | "\n", 133 | "**单个布局**:\n", 134 | "\n", 135 | "```python\n", 136 | "slide_layout = presentation.slide_layouts[0] # 标题幻灯片布局\n", 137 | "slide_layout = presentation.slide_layouts[1] # 内容幻灯片\n", 138 | "```" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 4, 144 | "id": "3ff7f338-cf69-4e45-b276-60b9ad405960", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "template = Presentation(\"../templates/MasterTemplate.pptx\")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 5, 154 | "id": "7cc18e3e-5244-44a0-8fa7-53c0ed0d68f3", 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "" 161 | ] 162 | }, 163 | "execution_count": 5, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "template.slide_layouts" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 6, 175 | "id": "473ff232-77e4-49d0-940c-98a2d53e2ab0", 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "" 182 | ] 183 | }, 184 | "execution_count": 6, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "slide_layout = template.slide_layouts[0]\n", 191 | "slide_layout" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 7, 197 | "id": "7d15b566-1c2c-45d1-8c71-7e5e59b4c917", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "'Title Only'" 204 | ] 205 | }, 206 | "execution_count": 7, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "slide_layout.name" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 8, 218 | "id": "adc17d40-3579-4bb2-836c-878e54bca2ce", 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "Title Only\n", 226 | "Title and Content\n", 227 | "Title and Picture 1\n", 228 | "Title, Content, and Picture\n", 229 | "Title and 2 Column 1\n", 230 | "Tilte and Content 1 \n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "for s in template.slide_layouts:\n", 236 | " print(s.name)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 9, 242 | "id": "eadceced-5f37-4948-b143-afccb9171517", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "slide_layout = template.slide_layouts[-2]" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 10, 252 | "id": "1b6060c2-c335-4958-95d0-2dcefcef0eb1", 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "'Title and 2 Column 1'" 259 | ] 260 | }, 261 | "execution_count": 10, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "slide_layout.name" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "id": "74987989-87a9-422b-95ab-c2266746fa87", 273 | "metadata": {}, 274 | "source": [ 275 | "### 3. `Placeholder` 占位符\n", 276 | "\n", 277 | "`Placeholder` 是一个重要的概念,用于指示幻灯片中应该放置的内容类型(如标题、内容、图片等)。\n", 278 | "\n", 279 | "每个布局和页面都有预定义的占位符列表(`placeholders`),可以用来快速添加内容。\n", 280 | "\n", 281 | "![placeholder](../images/placeholder.png)\n", 282 | "\n", 283 | "- **属性**:\n", 284 | " - `name`:占位符的名称(如标题、内容)。\n", 285 | "\n", 286 | "- **示例**:\n", 287 | "\n", 288 | "```python\n", 289 | "title = slide.placeholders[0] # 获取标题占位符\n", 290 | "content = slide.placeholders[1] # 获取内容占位符\n", 291 | "```" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 11, 297 | "id": "da682d56-b515-407e-aa1a-c05c60b11522", 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "name": "stdout", 302 | "output_type": "stream", 303 | "text": [ 304 | "Title 1\n", 305 | "Content Placeholder 4\n", 306 | "Content Placeholder 4\n", 307 | "Footer Placeholder 4\n", 308 | "Slide Number Placeholder 5\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "for p in slide_layout.placeholders:\n", 314 | " print(p.name)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 12, 320 | "id": "367d1e17-acdc-4864-a357-024a73c9b803", 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "data": { 325 | "text/plain": [ 326 | "pptx.shapes.placeholder.LayoutPlaceholder" 327 | ] 328 | }, 329 | "execution_count": 12, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "# LayoutPlaceholder 类型\n", 336 | "type(slide_layout.placeholders[0])" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "id": "f19fcc5d-b3ed-4d51-afe4-b335ac556127", 342 | "metadata": { 343 | "tags": [] 344 | }, 345 | "source": [ 346 | "\n", 347 | "### 【深入理解】Placeholder 类的继承关系(UML 类图)\n", 348 | "\n", 349 | "在 UML 类图中,我们通常使用以下符号来表示类之间的关系:\n", 350 | "\n", 351 | "- 正方形表示类名。\n", 352 | "- 三个部分的矩形表示类的属性和方法。\n", 353 | "- 箭头表示关系,如继承、关联、聚合和组合。\n", 354 | "\n", 355 | "在这个UML类图中:\n", 356 | "\n", 357 | "- **Placeholder** 是一个抽象的基类,包含了所有占位符共有的属性和方法。\n", 358 | "- **MasterPlaceholder** 继承自 **Placeholder**,添加了 `name` 属性,并提供了 `add_slide` 方法。\n", 359 | "- **LayoutPlaceholder** 继承自 **MasterPlaceholder**,添加了用于插入内容的方法,如 `insert_picture` 和 `insert_table`。\n", 360 | "- **PicturePlaceholder** 和 **TablePlaceholder** 都是 **LayoutPlaceholder** 的子类,它们分别添加了用于设置图片和表格的特定方法,如 `set_picture` 和 `set_table`。\n", 361 | "\n", 362 | "\n", 363 | "\n", 364 | "```\n", 365 | "+-------------------+\n", 366 | "| Placeholder |\n", 367 | "+-------------------+\n", 368 | "| - idx |\n", 369 | "| - shape_type |\n", 370 | "+-------------------+\n", 371 | "| + method1() |\n", 372 | "| + method2() |\n", 373 | "+-------------------+\n", 374 | " ^\n", 375 | " |\n", 376 | "+-------------------+\n", 377 | "| MasterPlaceholder |\n", 378 | "+-------------------+\n", 379 | "| - name |\n", 380 | "+-------------------+\n", 381 | "| + add_slide() |\n", 382 | "+-------------------+\n", 383 | " ^\n", 384 | " |\n", 385 | "+-------------------+\n", 386 | "| LayoutPlaceholder |\n", 387 | "+-------------------+\n", 388 | "| + insert_picture() |\n", 389 | "| + insert_table() |\n", 390 | "+-------------------+\n", 391 | " |\n", 392 | " +-------------------+\n", 393 | " | |\n", 394 | " | |\n", 395 | " v v\n", 396 | "+-------------------+ +-------------------+\n", 397 | "| PicturePlaceholder | | TablePlaceholder |\n", 398 | "+-------------------+ +-------------------+\n", 399 | "| + set_picture() | | + set_table() |\n", 400 | "+-------------------+ +-------------------+\n", 401 | "```\n", 402 | "\n", 403 | "\n", 404 | "请注意,这个类图是简化的,实际的Python-pptx库中的类可能包含更多的属性和方法。此外,UML类图通常包含更多的细节,如可见性(如 `+` 表示公共,`-` 表示私有)和关系类型(如泛化、实现、关联、依赖等)。在这个简化的示例中,只展示了泛化(继承)关系,并且所有的属性和方法都被假设为公共的。" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "id": "d184f05f-24c2-4216-92a8-a045d8357c25", 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "id": "047240b1-bd04-4b17-aa88-be81bae594e9", 418 | "metadata": {}, 419 | "source": [ 420 | "### 4. `Slide` 幻灯片\n", 421 | "\n", 422 | "`Slide` 表示 PowerPoint 演示文稿中的一张幻灯片。\n", 423 | "\n", 424 | "- **属性**:\n", 425 | " - `shapes`:返回该幻灯片中的所有形状,包括文本框、图像、图表等。\n", 426 | " - `slide_layout`:返回该幻灯片的布局对象,指示使用的布局类型。\n", 427 | " \n", 428 | "- **方法**:\n", 429 | " - `shapes.add_shape()`:在t 特定幻灯片上添加一个形状(例如,矩形或圆形)。\n", 430 | " \n", 431 | "- **示例**:\n", 432 | "\n", 433 | "```python\n", 434 | "slide = presentation.slides.add_slide(slide_layout) # 添加一张幻灯片\n", 435 | "```\n", 436 | "\n", 437 | "#### `Shape` 形状\n", 438 | "\n", 439 | "- **概述**:`Shape` 表示幻灯片中的一个形状,可以是文本框、图片、图表、SmartArt、表格等。每个 `Shape` 对象都具有位置、大小和样式属性。\n", 440 | "\n", 441 | "- **属性**:\n", 442 | " - `name`: 形状名称,对应 placeholder。\n", 443 | " - `left`、`top`、`width`、`height`:定义形状的位置和尺寸。\n", 444 | " - `text`:如果是文本框,可以访问或修改其内容。\n", 445 | " \n", 446 | "- **方法**:\n", 447 | " - `add_textbox(left, top, width, height)`:用于在幻灯片上添加一个文本框。\n", 448 | " - `add_picture(image_path, left, top, width=None, height=None)`:用于添加图片。\n", 449 | " \n", 450 | "- **示例**:\n", 451 | "\n", 452 | " ```python\n", 453 | " textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(2)) # 添加文本框\n", 454 | " ```" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": 13, 460 | "id": "e2c83967-7957-4997-980c-c21ee20b5539", 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [ 464 | "presentation = Presentation(\"../outputs/ChatPPT Demo.pptx\")" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 14, 470 | "id": "d2ad396d-3e15-4ccf-88ce-632d2f129e2d", 471 | "metadata": {}, 472 | "outputs": [], 473 | "source": [ 474 | "slide = presentation.slides[0]" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 15, 480 | "id": "4b188c7c-0a35-48cd-8446-14885a67013a", 481 | "metadata": {}, 482 | "outputs": [ 483 | { 484 | "data": { 485 | "text/plain": [ 486 | "'ChatPPT Demo'" 487 | ] 488 | }, 489 | "execution_count": 15, 490 | "metadata": {}, 491 | "output_type": "execute_result" 492 | } 493 | ], 494 | "source": [ 495 | "slide.shapes[0].text" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 16, 501 | "id": "72b575da-dc37-4d02-86bb-b42483c049d3", 502 | "metadata": {}, 503 | "outputs": [ 504 | { 505 | "name": "stdout", 506 | "output_type": "stream", 507 | "text": [ 508 | "slide id:0\n", 509 | "shape name:Title 1\n", 510 | "slide id:1\n", 511 | "shape name:Title 1\n", 512 | "shape name:Content Placeholder 2\n", 513 | "slide id:2\n", 514 | "shape name:Title 1\n", 515 | "shape name:Content Placeholder 2\n", 516 | "shape name:Picture Placeholder 3\n", 517 | "slide id:3\n", 518 | "shape name:Title 1\n", 519 | "shape name:Content Placeholder 2\n", 520 | "shape name:Picture Placeholder 3\n" 521 | ] 522 | } 523 | ], 524 | "source": [ 525 | "# 打印每页形状名称\n", 526 | "for idx, slide in enumerate(presentation.slides):\n", 527 | " print(f\"slide id:{idx}\")\n", 528 | " for shape in slide.shapes:\n", 529 | " print(f\"shape name:{shape.name}\")" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "id": "5e9e798d-466a-4714-9a28-7245b55ac6dd", 535 | "metadata": {}, 536 | "source": [ 537 | "### ChatPPT Demo.pptx 文件\n", 538 | "\n", 539 | "![slides](../images/slides.png)\n", 540 | " " 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 17, 546 | "id": "05ff59b8-9bdc-4a4f-bc9c-96b6ccae8514", 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "name": "stdout", 551 | "output_type": "stream", 552 | "text": [ 553 | "[slide id]:0\n", 554 | "shape name:Title 1\n", 555 | "shape text:ChatPPT Demo\n", 556 | "\n", 557 | "\n", 558 | "[slide id]:1\n", 559 | "shape name:Title 1\n", 560 | "shape text:2024 业绩概述\n", 561 | "\n", 562 | "\n", 563 | "shape name:Content Placeholder 2\n", 564 | "shape text:\n", 565 | "总收入增长15%\n", 566 | "市场份额扩大至30%\n", 567 | "\n", 568 | "\n", 569 | "[slide id]:2\n", 570 | "shape name:Title 1\n", 571 | "shape text:业绩图表\n", 572 | "\n", 573 | "\n", 574 | "shape name:Content Placeholder 2\n", 575 | "shape text:\n", 576 | "OpenAI 利润不断增加\n", 577 | "\n", 578 | "\n", 579 | "shape name:Picture Placeholder 3\n" 580 | ] 581 | }, 582 | { 583 | "ename": "AttributeError", 584 | "evalue": "'PlaceholderPicture' object has no attribute 'text'", 585 | "output_type": "error", 586 | "traceback": [ 587 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 588 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 589 | "Input \u001b[0;32mIn [17]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m shape \u001b[38;5;129;01min\u001b[39;00m slide\u001b[38;5;241m.\u001b[39mshapes:\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshape name:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mshape\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshape text:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mshape\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", 590 | "\u001b[0;31mAttributeError\u001b[0m: 'PlaceholderPicture' object has no attribute 'text'" 591 | ] 592 | } 593 | ], 594 | "source": [ 595 | "# 打印每页形状 名称和文本,如果是非文本(如 PlaceholderPicture)将会报错\n", 596 | "for idx, slide in enumerate(presentation.slides):\n", 597 | " print(f\"[slide id]:{idx}\")\n", 598 | " for shape in slide.shapes:\n", 599 | " print(f\"shape name:{shape.name}\")\n", 600 | " print(f\"shape text:{shape.text}\")\n", 601 | " print(\"\\n\")" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "id": "4239cf1d-d7eb-4bef-92dd-c45d46362e09", 607 | "metadata": {}, 608 | "source": [ 609 | "### 【深入理解】Presentation 和 SlideMaster 类继承关系的 UML 类图\n", 610 | "\n", 611 | "在这个 UML 类图中:\n", 612 | "\n", 613 | "1. **`Presentation`** 类是顶层对象,它包含多个 `Slides` 对象。\n", 614 | "2. **`Slides`** 类是一个幻灯片集合,通过它可以添加或访问单独的 `Slide` 对象。\n", 615 | "3. **`Slide`** 类代表单个幻灯片,它包含形状(`Shapes`)和占位符(`SlidePlaceholders`),并且它通过布局(`SlideLayout`)来定义外观。\n", 616 | "4. **`SlideMaster`** 类包含多个 `SlideLayouts`,它定义了幻灯片的母版布局。\n", 617 | "5. **`SlideLayout`** 类定义了幻灯片的布局结构,其中有占位符和形状。\n", 618 | "6. **`Shape`** 类代表幻灯片中的形状或文本框等内容。\n", 619 | "\n", 620 | "\n", 621 | "```\n", 622 | "+------------------------+\n", 623 | "| Presentation |\n", 624 | "+------------------------+\n", 625 | "| - slides: Slides |\n", 626 | "| - slide_masters: SlideMasters |\n", 627 | "| - slide_layouts: SlideLayouts |\n", 628 | "| - core_properties: CoreProperties |\n", 629 | "+------------------------+\n", 630 | "| + save(file: str) |\n", 631 | "+------------------------+\n", 632 | " |\n", 633 | " v\n", 634 | "+--------------------+\n", 635 | "| Slides |\n", 636 | "+--------------------+\n", 637 | "| - slides: Slide[] |\n", 638 | "+--------------------+\n", 639 | "| + add_slide(layout: SlideLayout) -> Slide |\n", 640 | "| + get(slide_id: int) -> Slide | None |\n", 641 | "+--------------------+\n", 642 | " |\n", 643 | " v\n", 644 | "+--------------------+\n", 645 | "| Slide |\n", 646 | "+--------------------+\n", 647 | "| - slide_id: int |\n", 648 | "| - shapes: Shapes |\n", 649 | "| - placeholders: SlidePlaceholders |\n", 650 | "| - slide_layout: SlideLayout |\n", 651 | "+--------------------+\n", 652 | "| + add_shape(shape: Shape) |\n", 653 | "| + add_picture(image: Picture) |\n", 654 | "| + add_table(rows: int, cols: int) |\n", 655 | "+--------------------+\n", 656 | " |\n", 657 | " v\n", 658 | "+--------------------+\n", 659 | "| SlideMaster |\n", 660 | "+--------------------+\n", 661 | "| - slide_layouts: SlideLayouts[] |\n", 662 | "+--------------------+\n", 663 | "| + get_by_name(name: str) -> SlideLayout |\n", 664 | "| + index(slide_layout: SlideLayout) -> int |\n", 665 | "+--------------------+\n", 666 | " |\n", 667 | " v\n", 668 | "+--------------------+\n", 669 | "| SlideLayouts |\n", 670 | "+--------------------+\n", 671 | "| - layouts: SlideLayout[] |\n", 672 | "+--------------------+\n", 673 | "| + remove(slide_layout: SlideLayout) |\n", 674 | "+--------------------+\n", 675 | " |\n", 676 | " v\n", 677 | "+--------------------+\n", 678 | "| SlideLayout |\n", 679 | "+--------------------+\n", 680 | "| - placeholders: SlidePlaceholders[] |\n", 681 | "| - shapes: Shapes[] |\n", 682 | "| - slide_master: SlideMaster |\n", 683 | "+--------------------+\n", 684 | " |\n", 685 | " v\n", 686 | "+--------------------+\n", 687 | "| Shape |\n", 688 | "+--------------------+\n", 689 | "| - name: str |\n", 690 | "| - fill: FillFormat |\n", 691 | "| - line: LineFormat |\n", 692 | "+--------------------+\n", 693 | "| + add_textbox(left, top, width, height) |\n", 694 | "| + add_picture(image_file: str) |\n", 695 | "+--------------------+\n", 696 | "```\n" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 18, 702 | "id": "81b5faff-3df3-45ae-b534-1d3f955943ad", 703 | "metadata": {}, 704 | "outputs": [ 705 | { 706 | "name": "stdout", 707 | "output_type": "stream", 708 | "text": [ 709 | "Slide ID: 256\n", 710 | " Layout: \n", 711 | " Shapes: 1 shapes\n", 712 | " Placeholders: 1 placeholders\n", 713 | " Shape Details:\n", 714 | " - Shape Name: Title 1, Type: PLACEHOLDER (14)\n", 715 | " Placeholder Details:\n", 716 | " - Placeholder ID: 0, Type: TITLE (1)\n", 717 | "\n", 718 | "\n", 719 | "Slide ID: 257\n", 720 | " Layout: \n", 721 | " Shapes: 2 shapes\n", 722 | " Placeholders: 2 placeholders\n", 723 | " Shape Details:\n", 724 | " - Shape Name: Title 1, Type: PLACEHOLDER (14)\n", 725 | " - Shape Name: Content Placeholder 2, Type: PLACEHOLDER (14)\n", 726 | " Placeholder Details:\n", 727 | " - Placeholder ID: 0, Type: TITLE (1)\n", 728 | " - Placeholder ID: 10, Type: OBJECT (7)\n", 729 | "\n", 730 | "\n", 731 | "Slide ID: 258\n", 732 | " Layout: \n", 733 | " Shapes: 3 shapes\n", 734 | " Placeholders: 3 placeholders\n", 735 | " Shape Details:\n", 736 | " - Shape Name: Title 1, Type: PLACEHOLDER (14)\n", 737 | " - Shape Name: Content Placeholder 2, Type: PLACEHOLDER (14)\n", 738 | " - Shape Name: Picture Placeholder 3, Type: PLACEHOLDER (14)\n", 739 | " Placeholder Details:\n", 740 | " - Placeholder ID: 0, Type: TITLE (1)\n", 741 | " - Placeholder ID: 10, Type: PICTURE (18)\n", 742 | " - Placeholder ID: 35, Type: OBJECT (7)\n", 743 | "\n", 744 | "\n", 745 | "Slide ID: 259\n", 746 | " Layout: \n", 747 | " Shapes: 3 shapes\n", 748 | " Placeholders: 3 placeholders\n", 749 | " Shape Details:\n", 750 | " - Shape Name: Title 1, Type: PLACEHOLDER (14)\n", 751 | " - Shape Name: Content Placeholder 2, Type: PLACEHOLDER (14)\n", 752 | " - Shape Name: Picture Placeholder 3, Type: PLACEHOLDER (14)\n", 753 | " Placeholder Details:\n", 754 | " - Placeholder ID: 0, Type: TITLE (1)\n", 755 | " - Placeholder ID: 10, Type: PICTURE (18)\n", 756 | " - Placeholder ID: 35, Type: OBJECT (7)\n", 757 | "\n", 758 | "\n" 759 | ] 760 | } 761 | ], 762 | "source": [ 763 | "# 完整打印 Slides 每一页的所有属性\n", 764 | "for s in presentation.slides:\n", 765 | " print(f\"Slide ID: {s.slide_id}\")\n", 766 | " print(f\" Layout: {s.slide_layout}\")\n", 767 | " print(f\" Shapes: {len(s.shapes)} shapes\")\n", 768 | " print(f\" Placeholders: {len(s.placeholders)} placeholders\")\n", 769 | "\n", 770 | " print(\" Shape Details:\")\n", 771 | " for shape in s.shapes:\n", 772 | " print(f\" - Shape Name: {shape.name}, Type: {shape.shape_type}\")\n", 773 | "\n", 774 | " print(\" Placeholder Details:\")\n", 775 | " for placeholder in s.placeholders:\n", 776 | " print(f\" - Placeholder ID: {placeholder.placeholder_format.idx}, Type: {placeholder.placeholder_format.type}\")\n", 777 | "\n", 778 | " print(\"\\n\") # Adding a new line between slides for better readability" 779 | ] 780 | }, 781 | { 782 | "cell_type": "markdown", 783 | "id": "4c94a232-8e63-40f5-9516-fcfb2a345491", 784 | "metadata": {}, 785 | "source": [ 786 | "#### 输出说明:\n", 787 | "1. **Slide ID**: 每张幻灯片的唯一标识符。\n", 788 | "2. **Layout**: 使用的幻灯片布局对象。\n", 789 | "3. **Shapes**: 输出该幻灯片中的形状数量,并列出每个形状的详细信息(名称和类型)。\n", 790 | "4. **Placeholders**: 输出该幻灯片中的占位符数量,并列出每个占位符的 `ID` 和类型。\n", 791 | "\n", 792 | "---\n", 793 | "\n", 794 | "### 新增一页内容" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": 19, 800 | "id": "0b79cfe5-1150-4b25-b6d2-3396cfa8ff90", 801 | "metadata": {}, 802 | "outputs": [], 803 | "source": [ 804 | "# 使用 Slide_ID 获取指定页面\n", 805 | "last_slide_layout = presentation.slides.get(slide_id=259).slide_layout\n", 806 | "\n", 807 | "# 新增一页幻灯片\n", 808 | "new_slide = presentation.slides.add_slide(last_slide_layout)" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": 20, 814 | "id": "37a278b2-401c-4df8-9f36-1ea63b765ec0", 815 | "metadata": {}, 816 | "outputs": [ 817 | { 818 | "data": { 819 | "text/plain": [ 820 | "5" 821 | ] 822 | }, 823 | "execution_count": 20, 824 | "metadata": {}, 825 | "output_type": "execute_result" 826 | } 827 | ], 828 | "source": [ 829 | "# 总页数变成了 5\n", 830 | "len(presentation.slides)" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": 21, 836 | "id": "e098963e-009c-42ae-872b-276d101ebbc4", 837 | "metadata": {}, 838 | "outputs": [], 839 | "source": [ 840 | "# 修改新增页标题" 841 | ] 842 | }, 843 | { 844 | "cell_type": "code", 845 | "execution_count": 22, 846 | "id": "99e7b2e2-a430-4b90-9f36-0af77226bf8e", 847 | "metadata": {}, 848 | "outputs": [ 849 | { 850 | "data": { 851 | "text/plain": [ 852 | "'Title 1'" 853 | ] 854 | }, 855 | "execution_count": 22, 856 | "metadata": {}, 857 | "output_type": "execute_result" 858 | } 859 | ], 860 | "source": [ 861 | "new_slide.shapes[0].name" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": 23, 867 | "id": "1e61a4cb-28d7-4815-bad9-2f551fdc22b0", 868 | "metadata": {}, 869 | "outputs": [], 870 | "source": [ 871 | "new_slide.shapes[0].text = \"测试新增页面标题\"" 872 | ] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "execution_count": 24, 877 | "id": "d1087ec4-f0fa-4d45-89cd-8ce5fb0fa31b", 878 | "metadata": {}, 879 | "outputs": [], 880 | "source": [ 881 | "presentation.save(\"ChatPPT_update.pptx\")" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": null, 887 | "id": "c7ac64ac-ecdd-42dd-98b0-ac60f8d65d40", 888 | "metadata": {}, 889 | "outputs": [], 890 | "source": [] 891 | }, 892 | { 893 | "cell_type": "markdown", 894 | "id": "a808713e-c8eb-48c9-8b59-4fbfc6f94de3", 895 | "metadata": {}, 896 | "source": [ 897 | "## 添加页面内容的方法\n", 898 | "\n", 899 | "### 文本`TextFrame`\n", 900 | "\n", 901 | "- **概述**:`TextFrame` 表示一个文本框,包含文本和相关的格式设置。每个 `TextFrame` 可以包含多段文本。\n", 902 | "- **属性**:\n", 903 | " - `text`:获取或设置文本框的文本内容。\n", 904 | " - `paragraphs`:返回文本框中的所有段落,允许对每段进行单独格式化。\n", 905 | "- **示例**:\n", 906 | "\n", 907 | "```python\n", 908 | "text_frame = textbox.text_frame # 获取文本框的文本框架\n", 909 | "text_frame.text = \"这是一段文本\" # 设置文本内容\n", 910 | "```\n", 911 | "\n", 912 | "#### **文本框**\n", 913 | "\n", 914 | "在幻灯片上添加一个文本框:`add_textbox(left, top, width, height)`\n", 915 | "\n", 916 | "```python\n", 917 | "left = Inches(1) # 位置\n", 918 | "top = Inches(1)\n", 919 | "width = Inches(5)\n", 920 | "height = Inches(2)\n", 921 | "textbox = slide.shapes.add_textbox(left, top, width, height)\n", 922 | "text_frame = textbox.text_frame # 获取文本框内容\n", 923 | "text_frame.text = \"这是一段文本\"\n", 924 | "```\n", 925 | "\n", 926 | "#### **段落 `Paragraph`**\n", 927 | "\n", 928 | "- **概述**:`Paragraph` 表示 `TextFrame` 中的单个段落。\n", 929 | "- **属性**:\n", 930 | " - `text`:获取或设置段落的文本内容。\n", 931 | " - `font`:获取段落的字体设置,可以进行字体样式、大小和颜色的调整。\n", 932 | "- **示例**:\n", 933 | "\n", 934 | "```python\n", 935 | "paragraph = text_frame.add_paragraph() # 添加新段落\n", 936 | "paragraph.text = \"这是新的段落内容。\" # 设置段落文本\n", 937 | "```\n", 938 | "\n", 939 | "#### **字体 `Font`**\n", 940 | "\n", 941 | "- **概述**:`Font` 表示字体样式,允许用户设置文本的字体样式、大小、颜色等。\n", 942 | "- **属性**:\n", 943 | " - `name`:设置字体名称。\n", 944 | " - `size`:设置字体大小(使用 `Pt` 单位)。\n", 945 | " - `bold`、`italic`、`underline`:设置字体的粗体、斜体和下划线样式。\n", 946 | "- **示例**:\n", 947 | "\n", 948 | "```python\n", 949 | "from pptx.util import Pt, RGBColor\n", 950 | "\n", 951 | "run = paragraph.add_run() # 添加文本运行\n", 952 | "run.text = \"这是加粗的文本。\"\n", 953 | "run.font.bold = True # 设置为粗体\n", 954 | "run.font.size = Pt(16) # 设置字体大小\n", 955 | "run.font.color.rgb = RGBColor(255, 0, 0) # 设置字体颜色为红色\n", 956 | "```" 957 | ] 958 | }, 959 | { 960 | "cell_type": "markdown", 961 | "id": "b4826e67-65f3-485b-99cd-0a60c4efb246", 962 | "metadata": {}, 963 | "source": [ 964 | "### 新增文本(整合以上方法)" 965 | ] 966 | }, 967 | { 968 | "cell_type": "code", 969 | "execution_count": 25, 970 | "id": "8fad57e5-d2d4-4780-b925-9b49d57ebde6", 971 | "metadata": {}, 972 | "outputs": [ 973 | { 974 | "name": "stdout", 975 | "output_type": "stream", 976 | "text": [ 977 | "Slide ID: 261\n", 978 | " Layout: \n", 979 | " Shapes: 2 shapes\n", 980 | " Placeholders: 2 placeholders\n", 981 | " Shape Details:\n", 982 | " - Shape Name: Title 1, Type: PLACEHOLDER (14)\n", 983 | " - Shape Name: Content Placeholder 2, Type: PLACEHOLDER (14)\n", 984 | " Placeholder Details:\n", 985 | " - Placeholder ID: 0, Type: TITLE (1)\n", 986 | " - Placeholder ID: 14, Type: OBJECT (7)\n" 987 | ] 988 | } 989 | ], 990 | "source": [ 991 | "from pptx.util import Inches, Pt\n", 992 | "from pptx.dml.color import RGBColor\n", 993 | "\n", 994 | "\n", 995 | "# 添加文本内容幻灯片\n", 996 | "slide_layout = presentation.slide_layouts[-1]\n", 997 | "slide = presentation.slides.add_slide(slide_layout)\n", 998 | "\n", 999 | "\n", 1000 | "# 打印新增页属性\n", 1001 | "print(f\"Slide ID: {slide.slide_id}\")\n", 1002 | "print(f\" Layout: {slide.slide_layout}\")\n", 1003 | "print(f\" Shapes: {len(slide.shapes)} shapes\")\n", 1004 | "print(f\" Placeholders: {len(slide.placeholders)} placeholders\")\n", 1005 | "\n", 1006 | "print(\" Shape Details:\")\n", 1007 | "for shape in slide.shapes:\n", 1008 | " print(f\" - Shape Name: {shape.name}, Type: {shape.shape_type}\")\n", 1009 | "\n", 1010 | "print(\" Placeholder Details:\")\n", 1011 | "for placeholder in slide.placeholders:\n", 1012 | " print(f\" - Placeholder ID: {placeholder.placeholder_format.idx}, Type: {placeholder.placeholder_format.type}\")" 1013 | ] 1014 | }, 1015 | { 1016 | "cell_type": "code", 1017 | "execution_count": 26, 1018 | "id": "e2435a50-3ca9-445c-9949-80777b97a023", 1019 | "metadata": {}, 1020 | "outputs": [], 1021 | "source": [ 1022 | "# 填充原有布局中的占位符(标题和文本)\n", 1023 | "title = slide.shapes.title\n", 1024 | "title.text = \"python-pptx 新增文本内容示例\"\n", 1025 | "content = slide.placeholders[14]\n", 1026 | "content.text = \"填充原有的文本占位符\"" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 27, 1032 | "id": "1570ed11-df6d-4ac5-930b-a36c55c218aa", 1033 | "metadata": {}, 1034 | "outputs": [], 1035 | "source": [ 1036 | "# 新增文本框\n", 1037 | "left = Inches(6)\n", 1038 | "top = Inches(5)\n", 1039 | "width = Inches(5)\n", 1040 | "height = Inches(1)\n", 1041 | "textbox = slide.shapes.add_textbox(left, top, width, height)\n", 1042 | "text_frame = textbox.text_frame\n", 1043 | "text_frame.text = \"额外的文本框内容\"\n", 1044 | "\n", 1045 | "# 格式化文本\n", 1046 | "paragraph = text_frame.add_paragraph() # 添加新段落\n", 1047 | "paragraph.text = \"这是一个新的段落。\" # 设置段落文本\n", 1048 | "\n", 1049 | "# 设置字体\n", 1050 | "run = paragraph.add_run() # 添加文本运行\n", 1051 | "run.text = \" 这部分是加粗的文本。\" # 设置文本内容\n", 1052 | "run.font.bold = True # 设置为粗体\n", 1053 | "run.font.size = Pt(16) # 设置字体大小\n", 1054 | "run.font.color.rgb = RGBColor(255, 0, 0) # 设置字体颜色为红色\n", 1055 | "\n", 1056 | "# 保存 PPTX 文件\n", 1057 | "presentation.save(\"ChatPPT_append_text.pptx\")" 1058 | ] 1059 | }, 1060 | { 1061 | "cell_type": "code", 1062 | "execution_count": null, 1063 | "id": "0502d662-dcb4-40c1-b396-44ff5c283092", 1064 | "metadata": {}, 1065 | "outputs": [], 1066 | "source": [] 1067 | }, 1068 | { 1069 | "cell_type": "markdown", 1070 | "id": "833f7cbf-7608-45f2-9ff1-3d8b836abf95", 1071 | "metadata": {}, 1072 | "source": [ 1073 | "### 添加其他格式内容\n", 1074 | "\n", 1075 | "- **图片**:\n", 1076 | " - `add_picture(image_path, left, top, width=None, height=None)`:在幻灯片上添加图片。\n", 1077 | " ```python\n", 1078 | " slide.shapes.add_picture(\"image.png\", left, top, width, height)\n", 1079 | " ```\n", 1080 | "\n", 1081 | "- **表格**:\n", 1082 | " - `add_table(rows, cols, left, top, width, height)`:添加表格。\n", 1083 | " ```python\n", 1084 | " table = slide.shapes.add_table(rows, cols, left, top, width, height).table\n", 1085 | " ```\n", 1086 | "\n", 1087 | "- **图表**:\n", 1088 | " - `add_chart(chart_type, x, y, cx, cy, data)`:添加图表,`data` 是一个用于生成图表的数据源。\n", 1089 | "\n" 1090 | ] 1091 | }, 1092 | { 1093 | "cell_type": "code", 1094 | "execution_count": null, 1095 | "id": "5909d109-f5be-4360-85f6-bbf44b82c7f0", 1096 | "metadata": {}, 1097 | "outputs": [], 1098 | "source": [] 1099 | }, 1100 | { 1101 | "cell_type": "markdown", 1102 | "id": "b7f4600a-e95b-46db-a7b1-6bb610fa1212", 1103 | "metadata": {}, 1104 | "source": [ 1105 | "#### Homework: 使用 `python-pptx` 自动生成 PowerPoint 文件,内容包括文本、图片、表格和图表" 1106 | ] 1107 | }, 1108 | { 1109 | "cell_type": "code", 1110 | "execution_count": null, 1111 | "id": "eae0ea7f-9074-4110-984b-73ead7286003", 1112 | "metadata": {}, 1113 | "outputs": [], 1114 | "source": [] 1115 | }, 1116 | { 1117 | "cell_type": "code", 1118 | "execution_count": null, 1119 | "id": "34dd29c0-2813-405f-a206-22deb0b9d86e", 1120 | "metadata": {}, 1121 | "outputs": [], 1122 | "source": [] 1123 | } 1124 | ], 1125 | "metadata": { 1126 | "kernelspec": { 1127 | "display_name": "Python 3 (ipykernel)", 1128 | "language": "python", 1129 | "name": "python3" 1130 | }, 1131 | "language_info": { 1132 | "codemirror_mode": { 1133 | "name": "ipython", 1134 | "version": 3 1135 | }, 1136 | "file_extension": ".py", 1137 | "mimetype": "text/x-python", 1138 | "name": "python", 1139 | "nbconvert_exporter": "python", 1140 | "pygments_lexer": "ipython3", 1141 | "version": "3.10.4" 1142 | } 1143 | }, 1144 | "nbformat": 4, 1145 | "nbformat_minor": 5 1146 | } 1147 | -------------------------------------------------------------------------------- /outputs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/outputs/.gitkeep -------------------------------------------------------------------------------- /prompts/chatbot.txt: -------------------------------------------------------------------------------- 1 | **Role**: You are a knowledgeable Chatbot capable of answering a wide range of user questions. 2 | 3 | **Task**: When responding to user inquiries, format your answers in a presentation-friendly style suited for PowerPoint slides. Organize the content into a structured, slide-by-slide layout with **at least 10 slides**. Ensure each slide is rich in detail and elaboration. 4 | 5 | **Format**: Structure your responses as follows: 6 | 7 | ``` 8 | # [Presentation Theme] // Only once, for the first slide as the presentation's theme 9 | 10 | ## [Slide Title] 11 | - [Key point 1]: [Introduction or summary of the point] 12 | - [Detailed explanation covering multiple aspects or subpoints] 13 | - [Specific examples, case studies, or further insights] 14 | - [Additional detail or secondary aspect] 15 | - [Supporting data, quotes, or statistics] 16 | - [Key point 2]: [Brief introduction or summary] 17 | - [Expanded description with step-by-step breakdown] 18 | - [Practical application, scenarios, or research findings] 19 | 20 | ## [Slide Title] 21 | - [Key point 1]: [Comprehensive explanation] 22 | - [Second Level]: [Elaboration on critical points, providing context or rationale] 23 | - [Second Level]: [Additional insight or perspective] 24 | - [Key point 2]: [Clear overview with actionable insights] 25 | - [Second Level]: [Supporting data, strategies, or methods] 26 | - [Third Level]: [Examples or further clarification] 27 | ``` 28 | 29 | **Guidelines**: 30 | - Each response should include **a minimum of 10 slides**. 31 | - Ensure that each slide has **multiple detailed points**, with second-level explanations providing thorough descriptions and third-level points adding examples or additional insights. 32 | - The **Presentation Theme** should appear only on the first slide, and no images or image URLs are needed. 33 | - Response in Chinese. -------------------------------------------------------------------------------- /prompts/content_assistant.txt: -------------------------------------------------------------------------------- 1 | ### Prompt Design in Role-Task-Format 2 | 3 | **Role**: You are a content-enhancing PowerPoint assistant specialized in converting structured markdown to presentation slides. Your main objectives are to handle content breakdown, structure adjustments, and ensure smooth narrative flow. 4 | 5 | **Task**: 6 | 1. **Separate Images Across Slides**: For any slide containing multiple images, split these into separate slides, ensuring that each slide displays only one image. 7 | 2. **Add Supplementary Content**: Where splitting content leaves a slide too brief or lacking coherence, add relevant details to maintain a logical and informative progression. 8 | 3. **Structure for Cohesiveness**: Maintain a smooth narrative by filling gaps with logical transitions, brief summaries, or additional key points, helping each slide to contribute effectively to the overall presentation flow. 9 | 10 | **Format**: 11 | When processing the markdown content, follow this format strictly: 12 | - **# [Presentation Theme]**: Appears only on the first slide and is used as the presentation's overarching theme. 13 | - **## [Slide Title]**: Marks each new slide with the title, followed by relevant points. 14 | - **- [Key Points and Subpoints]**: Retain the bullet structure but ensure images are limited to one per slide. Add transitions or descriptions as needed. 15 | - **![Image Name](Image Path)**: For slides with images, include one image per slide, modifying content as necessary to make each slide’s information stand independently and flow smoothly. 16 | 17 | ### Example of Generated Content 18 | 19 | Given the markdown: 20 | 21 | ```markdown 22 | # 多模态大模型概述 23 | 24 | ## 多模态模型架构 25 | - 多模态模型的典型架构示意图 26 | ![图片1](images/multimodal_llm_overview/1.png) 27 | - TransFormer 架构图 28 | ![图片2](images/multimodal_llm_overview/2.png) 29 | 30 | ## 未来展望 31 | - 多模态大模型将在人工智能领域持续发挥重要作用,推动技术创新 32 | ``` 33 | 34 | Convert to: 35 | 36 | ```markdown 37 | # 多模态大模型概述 38 | 39 | ## 多模态模型架构 40 | - 多模态大模型融合文本、图像、音频等多种模态数据 41 | - 支持复杂任务的高效处理和全面理解 42 | - 数据整合解决了单一模态带来的信息孤岛问题 43 | - 模型在自然语言生成、情感分析、内容推荐等场景中的应用广泛 44 | 45 | ## 典型架构示意图 46 | - 特征提取模块:处理和提取每个模态的数据特征 47 | - 模态融合模块:合并多模态数据,创建共享表示空间 48 | - 输出生成模块:利用整合的信息生成最终输出 49 | - 多模态架构提供的系统化分析能力可以在多领域应用 50 | ![图片1](images/multimodal_llm_overview/1.png) 51 | 52 | ## TransFormer架构 53 | - TransFormer利用自注意力机制促进多模态信息交流 54 | - 多头注意力机制:提升模型捕捉语义关联的能力 55 | - 能够在输入数据中找到远程关联 56 | - 提供多维度的特征关注 57 | - 参数共享机制:提高训练效率和模型泛化能力 58 | - TransFormer架构 在图像识别、语言生成等领域同样表现出色 59 | - TransFormer架构对加速多模态模型的发展至关重要 60 | 61 | ## TransFormer架构示意图 62 | ![图片2](images/multimodal_llm_overview/2.png) 63 | 64 | ## 未来展望 65 | - 自动驾驶:通过融合激光雷达、摄像头等多模态数据提升感知和决策能力 66 | - 医疗诊断:结合影像、基因信息和电子健康记录支持个性化诊疗 67 | - 虚拟助手:分析语音、文本和图像,实现自然流畅的交互体验 68 | - 多模态大模型的发展将为实际应用场景带来深远影响 69 | ``` 70 | -------------------------------------------------------------------------------- /prompts/content_formatter.txt: -------------------------------------------------------------------------------- 1 | **Role**: You are an expert content formatter with proficiency in transforming raw markdown input into a polished, presentation-ready structure suitable for PowerPoint slides. 2 | 3 | **Task**: Convert the provided markdown content into a structured format for presentation use. Ensure clarity by following a slide-by-slide layout and applying multi-level bullet points only as needed. Place the presentation theme only on the first slide. 4 | 5 | **Format**: Structure the output as follows: 6 | 7 | ``` 8 | # [Presentation Theme] // Appears only on the first slide 9 | 10 | ## [Slide Title] 11 | - [Key point]: [Introduction or summary of the point] 12 | - [Detailed explanation covering multiple aspects or subpoints] 13 | - [Specific examples, case studies, or further insights] 14 | - [Additional detail or secondary aspect] 15 | - [Supporting data, quotes, or statistics] 16 | ![image_name](image_filepath) // If the original content includes images, insert them here 17 | 18 | ## [Slide Title] 19 | - [Key point]: [Brief introduction or summary] 20 | - [Expanded description with step-by-step breakdown] 21 | - [Practical application, scenarios, or research findings] 22 | ![image_name](image_filepath) // Images are optional based on the original content 23 | ``` 24 | 25 | Guidelines: 26 | - **First Slide**: Add the **Presentation Theme** title from the original input. 27 | - **Subsequent Slides**: Title each slide and organize points in concise bullets. Only include image placeholders if images are present in the original input. 28 | - **Multi-level Bullet Points**: Use secondary and tertiary levels only as needed to capture hierarchical information. 29 | 30 | ### Example Input and Output 31 | 32 | **Input:** 33 | 34 | ``` 35 | # 多模态大模型概述 36 | 37 | 多模态大模型是指能够处理多种数据模态(如文本、图像、音频等)的人工智能模型。它们在自然语言处理、计算机视觉等领域有广泛的应用。 38 | 39 | ## 1. 多模态大模型的特点 40 | 41 | - 支持多种数据类型: 42 | - 跨模态学习能力: 43 | - 广泛的应用场景: 44 | ### 1.1 支持多种数据类型 45 | 46 | 多模态大模型能够同时处理文本、图像、音频等多种类型的数据,实现数据的融合。 47 | 48 | ## 2. 多模态模型架构 49 | 50 | 以下是多模态模型的典型架构示意图: 51 | 52 | ![图片1](images/multimodal_llm_overview/1.png) 53 | 54 | TransFormer 架构图: 55 | 56 | ![图片2](images/multimodal_llm_overview/2.png) 57 | 58 | ### 2.1 模态融合技术 59 | 60 | 通过模态融合,可以提升模型对复杂数据的理解能力。 61 | 62 | 关键技术:注意力机制、Transformer架构等。 63 | 64 | - 应用领域: 65 | - 自然语言处理: 66 | - 机器翻译、文本生成等。 67 | - 计算机视觉: 68 | - 图像识别、目标检测等。 69 | ## 3. 未来展望 70 | 71 | 多模态大模型将在人工智能领域持续发挥重要作用,推动技术创新。 72 | ``` 73 | 74 | **Output:** 75 | 76 | ``` 77 | # 多模态大模型概述 78 | 79 | ## 多模态大模型的特点 80 | - 支持多种数据类型 81 | - 能够处理文本、图像、音频等多种类型的数据,实现数据的融合 82 | - 跨模态学习能力 83 | - 广泛的应用场景 84 | 85 | 86 | ## 多模态模型架构 87 | - 多模态模型的典型架构示意图 88 | ![图片1](images/multimodal_llm_overview/1.png) 89 | - TransFormer 架构图 90 | ![图片2](images/multimodal_llm_overview/2.png) 91 | 92 | ## 模态融合技术 93 | - 提升对复杂数据的理解能力 94 | - 关键技术:注意力机制、Transformer架构 95 | - 应用领域 96 | - 自然语言处理 97 | - 机器翻译、文本生成 98 | - 计算机视觉 99 | - 图像识别、目标检测 100 | 101 | ## 未来展望 102 | - 多模态大模型将在人工智能领域持续发挥重要作用,推动技术创新 103 | ``` -------------------------------------------------------------------------------- /prompts/image_advisor.txt: -------------------------------------------------------------------------------- 1 | **Role**: You are a helpful assistant specialized in enhancing presentations by suggesting relevant images to make them more engaging. 2 | 3 | **Task**: Given a PowerPoint text content where each slide is separated by "##", identify 3 slides where inserting images would make the presentation more vivid and specific. For each selected slide, provide a best keyword that can be used for Google search to find appropriate images. Specify the slide title. 4 | 5 | **Format**: 6 | [Slide Title 1]: [Best Keyword] 7 | [Slide Title 2]: [Best Keyword] 8 | [Slide Title 3]: [Best Keyword] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-pptx==1.0.2 2 | loguru==0.7.2 3 | gradio==5.1.0 4 | langchain==0.2.16 5 | langchain_core==0.2.41 6 | langchain_community==0.2.17 7 | langchain_ollama==0.1.3 8 | langchain_openai==0.1.25 9 | python-docx==1.1.2 10 | Pillow==9.1.0 11 | torch==2.5.0 12 | transformers==4.46.0 13 | datasets==3.0.2 14 | accelerate==1.0.1 15 | librosa==0.10.2.post1 16 | soundfile==0.12.1 17 | ffmpeg==1.4 18 | torchvision==0.20.0 19 | sentencepiece==0.1.99 20 | bitsandbytes==0.44.1 21 | jupyterlab==4.3.0 -------------------------------------------------------------------------------- /src/chat_history.py: -------------------------------------------------------------------------------- 1 | from langchain_core.chat_history import ( 2 | BaseChatMessageHistory, # 基础聊天消息历史类 3 | InMemoryChatMessageHistory, # 内存中的聊天消息历史类 4 | ) 5 | 6 | # 用于存储会话历史的字典 7 | store = {} 8 | 9 | def get_session_history(session_id: str) -> BaseChatMessageHistory: 10 | """ 11 | 获取指定会话ID的聊天历史。如果该会话ID不存在,则创建一个新的聊天历史实例。 12 | 13 | 参数: 14 | session_id (str): 会话的唯一标识符 15 | 16 | 返回: 17 | BaseChatMessageHistory: 对应会话的聊天历史对象 18 | """ 19 | if session_id not in store: 20 | # 如果会话ID不存在于存储中,创建一个新的内存聊天历史实例 21 | store[session_id] = InMemoryChatMessageHistory() 22 | return store[session_id] -------------------------------------------------------------------------------- /src/chatbot.py: -------------------------------------------------------------------------------- 1 | # chatbot.py 2 | 3 | from abc import ABC, abstractmethod 4 | 5 | from langchain_openai import ChatOpenAI 6 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # 导入提示模板相关类 7 | from langchain_core.messages import HumanMessage # 导入消息类 8 | from langchain_core.runnables.history import RunnableWithMessageHistory # 导入带有消息历史的可运行类 9 | 10 | from logger import LOG # 导入日志工具 11 | from chat_history import get_session_history 12 | 13 | 14 | class ChatBot(ABC): 15 | """ 16 | 聊天机器人基类,提供聊天功能。 17 | """ 18 | def __init__(self, prompt_file="./prompts/chatbot.txt", session_id=None): 19 | self.prompt_file = prompt_file 20 | self.session_id = session_id if session_id else "default_session_id" 21 | self.prompt = self.load_prompt() 22 | # LOG.debug(f"[ChatBot Prompt]{self.prompt}") 23 | self.create_chatbot() 24 | 25 | def load_prompt(self): 26 | """ 27 | 从文件加载系统提示语。 28 | """ 29 | try: 30 | with open(self.prompt_file, "r", encoding="utf-8") as file: 31 | return file.read().strip() 32 | except FileNotFoundError: 33 | raise FileNotFoundError(f"找不到提示文件 {self.prompt_file}!") 34 | 35 | 36 | def create_chatbot(self): 37 | """ 38 | 初始化聊天机器人,包括系统提示和消息历史记录。 39 | """ 40 | # 创建聊天提示模板,包括系统提示和消息占位符 41 | system_prompt = ChatPromptTemplate.from_messages([ 42 | ("system", self.prompt), # 系统提示部分 43 | MessagesPlaceholder(variable_name="messages"), # 消息占位符 44 | ]) 45 | 46 | # 初始化 ChatOllama 模型,配置参数 47 | self.chatbot = system_prompt | ChatOpenAI( 48 | model="gpt-4o-mini", 49 | temperature=0.5, 50 | max_tokens=4096 51 | ) 52 | 53 | # 将聊天机器人与消息历史记录关联 54 | self.chatbot_with_history = RunnableWithMessageHistory(self.chatbot, get_session_history) 55 | 56 | 57 | def chat_with_history(self, user_input, session_id=None): 58 | """ 59 | 处理用户输入,生成包含聊天历史的回复。 60 | 61 | 参数: 62 | user_input (str): 用户输入的消息 63 | session_id (str, optional): 会话的唯一标识符 64 | 65 | 返回: 66 | str: AI 生成的回复 67 | """ 68 | if session_id is None: 69 | session_id = self.session_id 70 | 71 | response = self.chatbot_with_history.invoke( 72 | [HumanMessage(content=user_input)], # 将用户输入封装为 HumanMessage 73 | {"configurable": {"session_id": session_id}}, # 传入配置,包括会话ID 74 | ) 75 | 76 | LOG.debug(f"[ChatBot] {response.content}") # 记录调试日志 77 | return response.content # 返回生成的回复内容 -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | class Config: 5 | def __init__(self, config_file='config.json'): 6 | """ 7 | 初始化 Config 类,并从指定的 config 文件中加载配置。 8 | """ 9 | self.config_file = config_file 10 | self.load_config() # 加载配置文件 11 | 12 | def load_config(self): 13 | """ 14 | 从配置文件加载配置项,并设置默认值以防缺少某些键。 15 | """ 16 | # 检查 config 文件是否存在 17 | if not os.path.exists(self.config_file): 18 | raise FileNotFoundError(f"Config file '{self.config_file}' not found.") 19 | 20 | with open(self.config_file, 'r') as f: 21 | config = json.load(f) 22 | 23 | # 加载 ChatPPT 运行模式,默认为 "text" 模式 24 | self.input_mode = config.get('input_mode', "text") 25 | 26 | # 加载 PPT 默认模板路径,若未指定则使用默认模板 27 | self.ppt_template = config.get('ppt_template', "templates/MasterTemplate.pptx") 28 | 29 | # 加载 ChatBot 提示信息 30 | self.chatbot_prompt = config.get('chatbot_prompt', '') 31 | 32 | # 加载内容格式化提示和助手提示 33 | self.content_formatter_prompt = config.get('content_formatter_prompt', '') 34 | self.content_assistant_prompt = config.get('content_assistant_prompt', '') 35 | self.image_advisor_prompt = config.get('image_advisor_prompt', '') -------------------------------------------------------------------------------- /src/content_assistant.py: -------------------------------------------------------------------------------- 1 | # content_assistant.py 2 | from abc import ABC, abstractmethod 3 | 4 | from langchain_openai import ChatOpenAI 5 | from langchain_core.prompts import ChatPromptTemplate # 导入提示模板相关类 6 | from langchain_core.messages import HumanMessage # 导入消息类 7 | 8 | from logger import LOG # 导入日志工具 9 | 10 | class ContentAssistant(ABC): 11 | """ 12 | 聊天机器人基类,提供聊天功能。 13 | """ 14 | def __init__(self, prompt_file="./prompts/content_assistant.txt"): 15 | self.prompt_file = prompt_file 16 | self.prompt = self.load_prompt() 17 | # LOG.debug(f"[Formatter Prompt]{self.prompt}") 18 | self.create_assistant() 19 | 20 | def load_prompt(self): 21 | """ 22 | 从文件加载系统提示语。 23 | """ 24 | try: 25 | with open(self.prompt_file, "r", encoding="utf-8") as file: 26 | return file.read().strip() 27 | except FileNotFoundError: 28 | raise FileNotFoundError(f"找不到提示文件 {self.prompt_file}!") 29 | 30 | 31 | def create_assistant(self): 32 | """ 33 | 初始化聊天机器人,包括系统提示和消息历史记录。 34 | """ 35 | # 创建聊天提示模板,包括系统提示和消息占位符 36 | system_prompt = ChatPromptTemplate.from_messages([ 37 | ("system", self.prompt), # 系统提示部分 38 | ("human", "{input}"), # 消息占位符 39 | ]) 40 | 41 | self.model = ChatOpenAI( 42 | model="gpt-4o-mini", 43 | temperature=0.5, 44 | max_tokens=4096, 45 | ) 46 | 47 | self.assistant = system_prompt | self.model # 使用的模型名称) 48 | 49 | def adjust_single_picture(self, markdown_content): 50 | """ 51 | 52 | 53 | 参数: 54 | markdown_content (str): PowerPoint markdown 原始格式 55 | 56 | 返回: 57 | str: 格式化后的 markdown 内容 58 | """ 59 | response = self.assistant.invoke({ 60 | "input": markdown_content, 61 | }) 62 | 63 | LOG.debug(f"[Assistant 内容重构后]\n{response.content}") # 记录调试日志 64 | return response.content # 返回生成的回复内容 -------------------------------------------------------------------------------- /src/content_formatter.py: -------------------------------------------------------------------------------- 1 | # content_formatter.py 2 | from abc import ABC, abstractmethod 3 | 4 | from langchain_openai import ChatOpenAI 5 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # 导入提示模板相关类 6 | from langchain_core.messages import HumanMessage # 导入消息类 7 | from langchain_core.runnables.history import RunnableWithMessageHistory # 导入带有消息历史的可运行类 8 | 9 | from logger import LOG # 导入日志工具 10 | 11 | class ContentFormatter(ABC): 12 | """ 13 | 聊天机器人基类,提供聊天功能。 14 | """ 15 | def __init__(self, prompt_file="./prompts/content_formatter.txt"): 16 | self.prompt_file = prompt_file 17 | self.prompt = self.load_prompt() 18 | # LOG.debug(f"[Formatter Prompt]{self.prompt}") 19 | self.create_formatter() 20 | 21 | def load_prompt(self): 22 | """ 23 | 从文件加载系统提示语。 24 | """ 25 | try: 26 | with open(self.prompt_file, "r", encoding="utf-8") as file: 27 | return file.read().strip() 28 | except FileNotFoundError: 29 | raise FileNotFoundError(f"找不到提示文件 {self.prompt_file}!") 30 | 31 | 32 | def create_formatter(self): 33 | """ 34 | 初始化聊天机器人,包括系统提示和消息历史记录。 35 | """ 36 | # 创建聊天提示模板,包括系统提示和消息占位符 37 | system_prompt = ChatPromptTemplate.from_messages([ 38 | ("system", self.prompt), # 系统提示部分 39 | ("human", "{input}"), # 消息占位符 40 | ]) 41 | 42 | self.model = ChatOpenAI( 43 | model="gpt-4o-mini", 44 | temperature=0.5, 45 | max_tokens=4096, 46 | ) 47 | 48 | self.formatter = system_prompt | self.model # 使用的模型名称) 49 | 50 | 51 | def format(self, raw_content): 52 | """ 53 | 54 | 55 | 参数: 56 | raw_content (str): 解析后的 markdown 原始格式 57 | 58 | 返回: 59 | str: 格式化后的 markdown 内容 60 | """ 61 | response = self.formatter.invoke({ 62 | "input": raw_content, 63 | }) 64 | 65 | LOG.debug(f"[Formmater 格式化后]\n{response.content}") # 记录调试日志 66 | return response.content # 返回生成的回复内容 -------------------------------------------------------------------------------- /src/data_structures.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | from dataclasses import dataclass, field 3 | 4 | # 定义 SlideContent 数据类,表示幻灯片的内容,包括标题、要点列表(支持多级),图片路径 5 | @dataclass 6 | class SlideContent: 7 | title: str # 幻灯片的标题 8 | bullet_points: List[dict] = field(default_factory=list) # 要点列表,包含每个要点的文本和层级 9 | image_path: Optional[str] = None # 图片路径,默认为 None 10 | # 定义 Slide 数据类,表示每张幻灯片,包括布局 ID、布局名称以及幻灯片内容。 11 | @dataclass 12 | class Slide: 13 | layout_id: int # 布局 ID,对应 PowerPoint 模板中的布局 14 | layout_name: str # 布局名称 15 | content: SlideContent # 幻灯片的内容,类型为 SlideContent 16 | 17 | # 定义 PowerPoint 数据类,表示整个 PowerPoint 演示文稿,包括标题和幻灯片列表。 18 | @dataclass 19 | class PowerPoint: 20 | title: str # PowerPoint 演示文稿的标题 21 | slides: List[Slide] = field(default_factory=list) # 幻灯片列表,默认为空列表 22 | 23 | # 定义 __str__ 方法,用于打印演示文稿的详细信息 24 | def __str__(self): 25 | result = [f"PowerPoint Presentation: {self.title}"] # 打印 PowerPoint 的标题 26 | for idx, slide in enumerate(self.slides, start=1): 27 | result.append(f"\nSlide {idx}:") 28 | result.append(f" Title: {slide.content.title}") # 打印每张幻灯片的标题 29 | result.append(f" Layout: {slide.layout_name} (ID: {slide.layout_id})") # 打印布局名称和 ID 30 | 31 | # 打印项目符号列表 32 | if slide.content.bullet_points: 33 | bullet_point_strs = [] 34 | for bullet_point in slide.content.bullet_points: 35 | text = bullet_point['text'] # 要点文本 36 | level = bullet_point['level'] # 要点层级 37 | indent = ' ' * level # 根据层级设置缩进 38 | bullet_point_strs.append(f"{indent}- {text}") 39 | result.append(" Bullet Points:\n" + "\n".join(bullet_point_strs)) # 打印格式化后的项目符号 40 | 41 | # 打印图片路径 42 | if slide.content.image_path: 43 | result.append(f" Image: {slide.content.image_path}") 44 | return "\n".join(result) 45 | -------------------------------------------------------------------------------- /src/docx_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | from docx import Document 3 | from docx.oxml.ns import qn 4 | from PIL import Image 5 | from io import BytesIO 6 | 7 | from logger import LOG # 引入日志模块,用于记录调试信息 8 | 9 | def is_paragraph_list_item(paragraph): 10 | """ 11 | 检查段落是否为列表项。 12 | 判断依据是段落的样式名称是否包含 'list bullet' 或 'list number', 13 | 分别对应项目符号列表和编号列表。 14 | """ 15 | style_name = paragraph.style.name.lower() 16 | return 'list bullet' in style_name or 'list number' in style_name 17 | 18 | def get_paragraph_list_level(paragraph): 19 | """ 20 | 获取段落的列表级别(缩进层级)。 21 | 首先尝试通过 XML 结构判断,如果无法获取,则通过样式名称中的数字判断。 22 | """ 23 | p = paragraph._p 24 | numPr = p.find(qn('w:numPr')) 25 | if numPr is not None: 26 | ilvl = numPr.find(qn('w:ilvl')) 27 | if ilvl is not None: 28 | return int(ilvl.get(qn('w:val'))) 29 | 30 | style_name = paragraph.style.name.lower() 31 | if 'list bullet' in style_name or 'list number' in style_name: 32 | for word in style_name.split(): 33 | if word.isdigit(): 34 | return int(word) - 1 35 | return 0 36 | 37 | def generate_markdown_from_docx(docx_filename): 38 | """ 39 | 从指定的 docx 文件生成 Markdown 格式的内容,并将所有图像另存为文件并插入 Markdown 内容中。 40 | 支持标题、列表项、图像和普通段落的转换。 41 | """ 42 | # 获取 docx 文件的基本名称,用于创建图像文件夹 43 | docx_basename = os.path.splitext(os.path.basename(docx_filename))[0] 44 | images_dir = f'images/{docx_basename}/' 45 | if not os.path.exists(images_dir): 46 | os.makedirs(images_dir) # 如果目录不存在,则创建 47 | 48 | document = Document(docx_filename) # 打开 docx 文件 49 | markdown_content = '' 50 | image_counter = 1 # 图像编号计数器 51 | 52 | for para in document.paragraphs: 53 | style = para.style.name # 获取段落样式名称 54 | text = para.text.strip() # 获取段落文本并去除首尾空格 55 | 56 | # 如果段落为空且没有任何运行对象,则跳过 57 | if not text and not para.runs: 58 | continue 59 | 60 | # 检查段落类型:标题、列表项、普通段落 61 | is_heading = 'Heading' in style 62 | is_title = style == 'Title' 63 | is_list = is_paragraph_list_item(para) 64 | list_level = get_paragraph_list_level(para) if is_list else 0 65 | 66 | # 确定标题级别 67 | if is_title: 68 | heading_level = 1 69 | elif is_heading: 70 | heading_level = int(style.replace('Heading ', '')) + 1 71 | else: 72 | heading_level = None 73 | 74 | # 检查段落中的每个运行,寻找并保存图像 75 | for run in para.runs: 76 | # 查找 w:drawing 标签中的图像 77 | drawings = run.element.findall('.//w:drawing', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) 78 | for drawing in drawings: 79 | # 查找图像的关系 ID 80 | blips = drawing.findall('.//a:blip', namespaces={'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}) 81 | for blip in blips: 82 | rId = blip.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed') 83 | image_part = document.part.related_parts[rId] 84 | image_bytes = image_part.blob # 获取图像数据 85 | image_filename = f'{image_counter}.png' 86 | image_path = os.path.join(images_dir, image_filename) 87 | 88 | # 使用 PIL 保存图像为 PNG 格式 89 | image = Image.open(BytesIO(image_bytes)) 90 | if image.mode in ('RGBA', 'P', 'LA'): 91 | image = image.convert('RGB') # 将图像转换为 RGB 模式,以兼容 PNG 格式 92 | image.save(image_path, 'PNG') 93 | 94 | # 在 Markdown 中添加图像链接 95 | markdown_content += f'![图片{image_counter}]({image_path})\n\n' 96 | image_counter += 1 97 | 98 | # 根据段落类型格式化文本内容 99 | if heading_level: 100 | markdown_content += f'{"#" * heading_level} {text}\n\n' # 使用 Markdown 语法表示标题 101 | elif is_list: 102 | markdown_content += f'{" " * list_level}- {text}\n' # 使用缩进和 “-” 表示列表项 103 | elif text: 104 | markdown_content += f'{text}\n\n' # 普通段落直接添加文本 105 | 106 | # 记录调试信息 107 | LOG.debug(f"从 docx 文件解析的 markdown 内容:\n{markdown_content}") 108 | 109 | return markdown_content 110 | 111 | if __name__ == "__main__": 112 | # 指定输入的 docx 文件路径 113 | docx_filename = 'inputs/docx/multimodal_llm_overview.docx' 114 | docx_basename = os.path.splitext(os.path.basename(docx_filename))[0] 115 | 116 | # 生成 Markdown 内容 117 | markdown_content = generate_markdown_from_docx(docx_filename) 118 | 119 | # 保存 Markdown 内容到文件 120 | with open(f'{docx_basename}.md', 'w', encoding='utf-8') as f: 121 | f.write(markdown_content) 122 | -------------------------------------------------------------------------------- /src/gradio_server.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | import os 3 | 4 | from gradio.data_classes import FileData 5 | 6 | from config import Config 7 | from chatbot import ChatBot 8 | from content_formatter import ContentFormatter 9 | from content_assistant import ContentAssistant 10 | from image_advisor import ImageAdvisor 11 | from input_parser import parse_input_text 12 | from ppt_generator import generate_presentation 13 | from template_manager import load_template, get_layout_mapping 14 | from layout_manager import LayoutManager 15 | from logger import LOG 16 | from openai_whisper import asr, transcribe 17 | # from minicpm_v_model import chat_with_image 18 | from docx_parser import generate_markdown_from_docx 19 | 20 | 21 | os.environ["LANGCHAIN_TRACING_V2"] = "true" 22 | os.environ["LANGCHAIN_PROJECT"] = "ChatPPT" 23 | 24 | # 实例化 Config,加载配置文件 25 | config = Config() 26 | chatbot = ChatBot(config.chatbot_prompt) 27 | content_formatter = ContentFormatter(config.content_formatter_prompt) 28 | content_assistant = ContentAssistant(config.content_assistant_prompt) 29 | image_advisor = ImageAdvisor(config.image_advisor_prompt) 30 | 31 | # 加载 PowerPoint 模板,并获取可用布局 32 | ppt_template = load_template(config.ppt_template) 33 | 34 | # 初始化 LayoutManager,管理幻灯片布局 35 | layout_manager = LayoutManager(get_layout_mapping(ppt_template)) 36 | 37 | 38 | # 定义生成幻灯片内容的函数 39 | def generate_contents(message, history): 40 | try: 41 | # 初始化一个列表,用于收集用户输入的文本和音频转录 42 | texts = [] 43 | 44 | # 获取文本输入,如果存在则添加到列表 45 | text_input = message.get("text") 46 | if text_input: 47 | texts.append(text_input) 48 | 49 | # 获取上传的文件列表,如果存在则处理每个文件 50 | for uploaded_file in message.get("files", []): 51 | LOG.debug(f"[上传文件]: {uploaded_file}") 52 | # 获取文件的扩展名,并转换为小写 53 | file_ext = os.path.splitext(uploaded_file)[1].lower() 54 | if file_ext in ('.wav', '.flac', '.mp3'): 55 | # 使用 OpenAI Whisper 模型进行语音识别 56 | audio_text = asr(uploaded_file) 57 | texts.append(audio_text) 58 | # 解释说明图像文件 59 | # elif file_ext in ('.jpg', '.png', '.jpeg'): 60 | # if text_input: 61 | # image_desc = chat_with_image(uploaded_file, text_input) 62 | # else: 63 | # image_desc = chat_with_image(uploaded_file) 64 | # return image_desc 65 | # 使用 Docx 文件作为素材创建 PowerPoint 66 | elif file_ext in ('.docx', '.doc'): 67 | # 调用 generate_markdown_from_docx 函数,获取 markdown 内容 68 | raw_content = generate_markdown_from_docx(uploaded_file) 69 | markdown_content = content_formatter.format(raw_content) 70 | return content_assistant.adjust_single_picture(markdown_content) 71 | else: 72 | LOG.debug(f"[格式不支持]: {uploaded_file}") 73 | 74 | # 将所有文本和转录结果合并为一个字符串,作为用户需求 75 | user_requirement = "需求如下:\n" + "\n".join(texts) 76 | LOG.info(user_requirement) 77 | 78 | # 与聊天机器人进行对话,生成幻灯片内容 79 | slides_content = chatbot.chat_with_history(user_requirement) 80 | 81 | return slides_content 82 | except Exception as e: 83 | LOG.error(f"[内容生成错误]: {e}") 84 | # 抛出 Gradio 错误,以便在界面上显示友好的错误信息 85 | raise gr.Error(f"网络问题,请重试:)") 86 | 87 | 88 | def handle_image_generate(history): 89 | try: 90 | # 获取聊天记录中的最新内容 91 | slides_content = history[-1]["content"] 92 | 93 | content_with_images, image_pair = image_advisor.generate_images(slides_content) 94 | 95 | # for k, v in image_pair.items(): 96 | # history.append( 97 | # # {"text": k, "files": FileData(path=v)} 98 | # {"role": "user", "files": FileData(path=v)} 99 | # ) 100 | 101 | new_message = {"role": "assistant", "content": content_with_images} 102 | 103 | history.append(new_message) 104 | 105 | return history 106 | except Exception as e: 107 | LOG.error(f"[配图生成错误]: {e}") 108 | # 提示用户先输入主题内容或上传文件 109 | raise gr.Error(f"【提示】未找到合适配图,请重试!") 110 | 111 | # 定义处理生成按钮点击事件的函数 112 | def handle_generate(history): 113 | try: 114 | # 获取聊天记录中的最新内容 115 | slides_content = history[-1]["content"] 116 | # 解析输入文本,生成幻灯片数据和演示文稿标题 117 | powerpoint_data, presentation_title = parse_input_text(slides_content, layout_manager) 118 | # 定义输出的 PowerPoint 文件路径 119 | output_pptx = f"outputs/{presentation_title}.pptx" 120 | 121 | # 生成 PowerPoint 演示文稿 122 | generate_presentation(powerpoint_data, config.ppt_template, output_pptx) 123 | return output_pptx 124 | except Exception as e: 125 | LOG.error(f"[PPT 生成错误]: {e}") 126 | # 提示用户先输入主题内容或上传文件 127 | raise gr.Error(f"【提示】请先输入你的主题内容或上传文件") 128 | 129 | # 创建 Gradio 界面 130 | with gr.Blocks( 131 | title="ChatPPT", 132 | css=""" 133 | body { animation: fadeIn 2s; } 134 | @keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } } 135 | """ 136 | ) as demo: 137 | 138 | # 添加标题 139 | gr.Markdown("## ChatPPT") 140 | 141 | # 定义语音(mic)转文本的接口 142 | # gr.Interface( 143 | # fn=transcribe, # 执行转录的函数 144 | # inputs=[ 145 | # gr.Audio(sources="microphone", type="filepath"), # 使用麦克风录制的音频输入 146 | # ], 147 | # outputs="text", # 输出为文本 148 | # flagging_mode="never", # 禁用标记功能 149 | # ) 150 | 151 | # 创建聊天机器人界面,提示用户输入 152 | contents_chatbot = gr.Chatbot( 153 | placeholder="AI 一键生成 PPT

输入你的主题内容或上传音频文件", 154 | height=800, 155 | type="messages", 156 | ) 157 | 158 | # 定义 ChatBot 和生成内容的接口 159 | gr.ChatInterface( 160 | fn=generate_contents, # 处理用户输入的函数 161 | chatbot=contents_chatbot, # 绑定的聊天机器人 162 | type="messages", 163 | multimodal=True # 支持多模态输入(文本和文件) 164 | ) 165 | 166 | image_generate_btn = gr.Button("一键为 PowerPoint 配图") 167 | 168 | image_generate_btn.click( 169 | fn=handle_image_generate, 170 | inputs=contents_chatbot, 171 | outputs=contents_chatbot, 172 | ) 173 | 174 | # 创建生成 PowerPoint 的按钮 175 | generate_btn = gr.Button("一键生成 PowerPoint") 176 | 177 | # 监听生成按钮的点击事件 178 | generate_btn.click( 179 | fn=handle_generate, # 点击时执行的函数 180 | inputs=contents_chatbot, # 输入为聊天记录 181 | outputs=gr.File() # 输出为文件下载链接 182 | ) 183 | 184 | # 主程序入口 185 | if __name__ == "__main__": 186 | # 启动Gradio应用,允许队列功能,并通过 HTTPS 访问 187 | demo.queue().launch( 188 | share=False, 189 | server_name="0.0.0.0", 190 | # auth=("django", "qaz!@#$") # ⚠️注意:记住修改密码 191 | ) -------------------------------------------------------------------------------- /src/image_advisor.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import os 4 | 5 | from abc import ABC 6 | from bs4 import BeautifulSoup 7 | from PIL import Image 8 | from io import BytesIO 9 | 10 | from langchain_openai import ChatOpenAI 11 | from langchain_core.prompts import ChatPromptTemplate 12 | 13 | from logger import LOG # 导入日志工具 14 | 15 | class ImageAdvisor(ABC): 16 | """ 17 | 聊天机器人基类,提供建议配图的功能。 18 | """ 19 | def __init__(self, prompt_file="./prompts/image_advisor.txt"): 20 | self.prompt_file = prompt_file 21 | self.prompt = self.load_prompt() 22 | self.create_advisor() 23 | 24 | def load_prompt(self): 25 | """ 26 | 从文件加载系统提示语。 27 | """ 28 | try: 29 | with open(self.prompt_file, "r", encoding="utf-8") as file: 30 | return file.read().strip() 31 | except FileNotFoundError: 32 | LOG.error(f"找不到提示文件 {self.prompt_file}!") 33 | raise 34 | 35 | def create_advisor(self): 36 | """ 37 | 初始化聊天机器人,包括系统提示和消息历史记录。 38 | """ 39 | chat_prompt = ChatPromptTemplate.from_messages([ 40 | ("system", self.prompt), # 系统提示部分 41 | ("human", "**Content**:\n\n{input}"), # 消息占位符 42 | ]) 43 | 44 | self.model = ChatOpenAI( 45 | model="gpt-4o-mini", 46 | temperature=0.7, 47 | max_tokens=4096, 48 | ) 49 | self.advisor = chat_prompt | self.model 50 | 51 | def generate_images(self, markdown_content, image_directory="tmps", num_images=3): 52 | """ 53 | 生成图片并嵌入到指定的 PowerPoint 内容中。 54 | 55 | 参数: 56 | markdown_content (str): PowerPoint markdown 原始格式 57 | image_directory (str): 本地保存图片的文件夹名称 58 | num_images (int): 每个幻灯片搜索的图像数量 59 | 60 | 返回: 61 | content_with_images (str): 嵌入图片后的内容 62 | image_pair (dict): 每个幻灯片标题对应的图像路径 63 | """ 64 | response = self.advisor.invoke({ 65 | "input": markdown_content, 66 | }) 67 | 68 | LOG.debug(f"[Advisor 建议配图]\n{response.content}") 69 | 70 | keywords = self.get_keywords(response.content) 71 | image_pair = {} 72 | 73 | for slide_title, query in keywords.items(): 74 | # 检索图像 75 | images = self.get_bing_images(slide_title, query, num_images, timeout=1, retries=3) 76 | if images: 77 | for image in images: 78 | LOG.debug(f"Name: {image['slide_title']}, Query: {image['query']} 分辨率:{image['width']}x{image['height']}") 79 | else: 80 | LOG.warning(f"No images found for {slide_title}.") 81 | continue 82 | 83 | # 仅处理分辨率最高的图像 84 | img = images[0] 85 | save_directory = f"images/{image_directory}" 86 | os.makedirs(save_directory, exist_ok=True) 87 | save_path = os.path.join(save_directory, f"{img['slide_title']}_1.jpeg") 88 | self.save_image(img["obj"], save_path) 89 | image_pair[img["slide_title"]] = save_path 90 | 91 | content_with_images = self.insert_images(markdown_content, image_pair) 92 | return content_with_images, image_pair 93 | 94 | def get_keywords(self, advice): 95 | """ 96 | 使用正则表达式提取关键词。 97 | 98 | 参数: 99 | advice (str): 提示文本 100 | 返回: 101 | keywords (dict): 提取的关键词字典 102 | """ 103 | pairs = re.findall(r'\[(.+?)\]:\s*(.+)', advice) 104 | keywords = {key.strip(): value.strip() for key, value in pairs} 105 | LOG.debug(f"[检索关键词 正则提取结果]{keywords}") 106 | return keywords 107 | 108 | def get_bing_images(self, slide_title, query, num_images=5, timeout=1, retries=3): 109 | """ 110 | 从 Bing 检索图像,最多重试3次。 111 | 112 | 参数: 113 | slide_title (str): 幻灯片标题 114 | query (str): 图像搜索关键词 115 | num_images (int): 搜索的图像数量 116 | timeout (int): 每次请求超时时间(秒),默认1秒 117 | retries (int): 最大重试次数,默认3次 118 | 119 | 返回: 120 | sorted_images (list): 符合条件的图像数据列表 121 | """ 122 | url = f"https://www.bing.com/images/search?q={query}" 123 | headers = { 124 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" 125 | } 126 | 127 | # 尝试请求并设置重试逻辑 128 | for attempt in range(retries): 129 | try: 130 | response = requests.get(url, headers=headers, timeout=timeout) 131 | response.raise_for_status() 132 | break # 请求成功,跳出重试循环 133 | except requests.RequestException as e: 134 | LOG.warning(f"Attempt {attempt + 1}/{retries} failed for query '{query}': {e}") 135 | if attempt == retries - 1: 136 | LOG.error(f"Max retries reached for query '{query}'.") 137 | return [] 138 | 139 | soup = BeautifulSoup(response.text, "html.parser") 140 | image_elements = soup.select("a.iusc") 141 | 142 | image_links = [] 143 | for img in image_elements: 144 | m_data = img.get("m") 145 | if m_data: 146 | m_json = eval(m_data) 147 | if "murl" in m_json: 148 | image_links.append(m_json["murl"]) 149 | if len(image_links) >= num_images: 150 | break 151 | 152 | image_data = [] 153 | for link in image_links: 154 | for attempt in range(retries): 155 | try: 156 | img_data = requests.get(link, headers=headers, timeout=timeout) 157 | img = Image.open(BytesIO(img_data.content)) 158 | image_info = { 159 | "slide_title": slide_title, 160 | "query": query, 161 | "width": img.width, 162 | "height": img.height, 163 | "resolution": img.width * img.height, 164 | "obj": img, 165 | } 166 | image_data.append(image_info) 167 | break # 成功下载图像,跳出重试循环 168 | except Exception as e: 169 | LOG.warning(f"Attempt {attempt + 1}/{retries} failed for image '{link}': {e}") 170 | if attempt == retries - 1: 171 | LOG.error(f"Max retries reached for image '{link}'. Skipping.") 172 | 173 | sorted_images = sorted(image_data, key=lambda x: x["resolution"], reverse=True) 174 | return sorted_images 175 | 176 | def save_image(self, img, save_path, format="JPEG", quality=85, max_size=1080): 177 | """ 178 | 保存图像到本地并压缩。 179 | 180 | 参数: 181 | img (Image): 图像对象 182 | save_path (str): 保存路径 183 | format (str): 保存格式,默认 JPEG 184 | quality (int): 图像质量,默认 85 185 | max_size (int): 最大边长,默认 1080 186 | """ 187 | try: 188 | width, height = img.size 189 | if max(width, height) > max_size: 190 | scaling_factor = max_size / max(width, height) 191 | new_width = int(width * scaling_factor) 192 | new_height = int(height * scaling_factor) 193 | img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) 194 | 195 | if img.mode == "RGBA": 196 | format = "PNG" 197 | save_options = {"optimize": True} 198 | else: 199 | save_options = { 200 | "quality": quality, 201 | "optimize": True, 202 | "progressive": True 203 | } 204 | 205 | img.save(save_path, format=format, **save_options) 206 | LOG.debug(f"Image saved as {save_path} in {format} format with quality {quality}.") 207 | except Exception as e: 208 | LOG.error(f"Failed to save image: {e}") 209 | 210 | def insert_images(self, markdown_content, image_pair): 211 | """ 212 | 将图像嵌入到 Markdown 内容中。 213 | 214 | 参数: 215 | markdown_content (str): Markdown 内容 216 | image_pair (dict): 幻灯片标题到图像路径的映射 217 | 218 | 返回: 219 | new_content (str): 嵌入图像后的内容 220 | """ 221 | lines = markdown_content.split('\n') 222 | new_lines = [] 223 | i = 0 224 | while i < len(lines): 225 | line = lines[i] 226 | new_lines.append(line) 227 | if line.startswith('## '): 228 | slide_title = line[3:].strip() 229 | if slide_title in image_pair: 230 | image_path = image_pair[slide_title] 231 | image_markdown = f'![{slide_title}]({image_path})' 232 | new_lines.append(image_markdown) 233 | i += 1 234 | new_content = '\n'.join(new_lines) 235 | return new_content 236 | -------------------------------------------------------------------------------- /src/input_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional 3 | 4 | from data_structures import PowerPoint 5 | from slide_builder import SlideBuilder 6 | from layout_manager import LayoutManager 7 | from logger import LOG # 引入日志模块 8 | 9 | def parse_bullet_point_level(line: str) -> (int, str): 10 | """ 11 | 根据项目符号行解析其缩进层级,并返回项目符号的文本内容。 12 | """ 13 | # 计算前导空格或 Tab 的数量 14 | indent_length = len(line) - len(line.lstrip()) 15 | 16 | # 每 2 个空格算作一个缩进级别,或者根据实际的缩进规则 17 | indent_level = indent_length // 2 18 | 19 | LOG.debug(indent_level) 20 | LOG.debug(line) 21 | 22 | bullet_text = line.strip().lstrip('- ').strip() # 去除 '-' 并处理前后空格,得到项目符号内容 23 | return indent_level, bullet_text 24 | 25 | 26 | # 解析输入文本,生成 PowerPoint 数据结构 27 | def parse_input_text(input_text: str, layout_manager: LayoutManager) -> PowerPoint: 28 | """ 29 | 解析输入的文本并转换为 PowerPoint 数据结构。自动为每张幻灯片分配适当的布局。 30 | """ 31 | lines = input_text.split('\n') # 按行拆分文本 32 | presentation_title = "" # PowerPoint 的主标题 33 | slides = [] # 存储所有幻灯片 34 | slide_builder: Optional[SlideBuilder] = None # 当前幻灯片的构建器 35 | 36 | # 正则表达式,用于匹配幻灯片标题、要点和图片 37 | slide_title_pattern = re.compile(r'^##\s+(.*)') 38 | bullet_pattern = re.compile(r'^(\s*)-\s+(.*)') 39 | image_pattern = re.compile(r'!\[.*?\]\((.*?)\)') 40 | 41 | for line in lines: 42 | if line.strip() == "": 43 | continue # 跳过空行 44 | 45 | # 主标题 (用作 PowerPoint 的标题和文件名) 46 | if line.startswith('# ') and not line.startswith('##'): 47 | presentation_title = line[2:].strip() 48 | 49 | first_slide_builder = SlideBuilder(layout_manager) 50 | first_slide_builder.set_title(presentation_title) 51 | slides.append(first_slide_builder.finalize()) 52 | 53 | # 幻灯片标题 54 | elif line.startswith('## '): 55 | match = slide_title_pattern.match(line) 56 | if match: 57 | title = match.group(1).strip() 58 | 59 | # 如果有当前幻灯片,生成并添加到幻灯片列表中 60 | if slide_builder: 61 | slides.append(slide_builder.finalize()) 62 | 63 | # 创建新的 SlideBuilder 64 | slide_builder = SlideBuilder(layout_manager) 65 | slide_builder.set_title(title) 66 | 67 | # 项目符号(要点) 68 | elif bullet_pattern.match(line) and slide_builder: 69 | match = bullet_pattern.match(line) 70 | if match: 71 | indent_spaces, bullet = match.groups() # 获取缩进空格和项目符号内容 72 | indent_level = len(indent_spaces) // 2 # 计算缩进层级,每 2 个空格为一级 73 | bullet_text = bullet.strip() # 获取项目符号的文本内容 74 | 75 | # 根据层级添加要点 76 | slide_builder.add_bullet_point(bullet_text, level=indent_level) 77 | 78 | # 图片插入 79 | elif line.startswith('![') and slide_builder: 80 | match = image_pattern.match(line) 81 | if match: 82 | image_path = match.group(1).strip() 83 | slide_builder.set_image(image_path) 84 | 85 | # 为最后一张幻灯片分配布局并添加到列表中 86 | if slide_builder: 87 | slides.append(slide_builder.finalize()) 88 | 89 | # 返回 PowerPoint 数据结构以及演示文稿标题 90 | return PowerPoint(title=presentation_title, slides=slides), presentation_title 91 | -------------------------------------------------------------------------------- /src/layout_manager.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List, Tuple 3 | from data_structures import SlideContent 4 | from logger import LOG 5 | 6 | # 定义 content_type 对应的权重 7 | CONTENT_TYPE_WEIGHTS = { 8 | 'Title': 1, 9 | 'Content': 2, 10 | 'Picture': 4 11 | } 12 | 13 | def calculate_layout_encoding(layout_name: str) -> int: 14 | """ 15 | 根据 layout_name 计算其编码值。 16 | 移除编号部分,只对类型进行编码,顺序无关。 17 | """ 18 | # 移除 layout_name 中的编号部分,并按 ',' 分割 19 | parts = layout_name.split(', ') 20 | base_name = ' '.join(part.split()[0] for part in parts) # 只保留类型部分,移除编号 21 | 22 | # 计算权重和 23 | weight_sum = sum(CONTENT_TYPE_WEIGHTS.get(part, 0) for part in base_name.split()) 24 | 25 | return weight_sum 26 | 27 | 28 | def calculate_content_encoding(slide_content: SlideContent) -> int: 29 | """ 30 | 根据 SlideContent 的成员情况计算其编码值。 31 | 如果有 title、bullet_points 和 image_path,则根据这些成员生成编码。 32 | """ 33 | encoding = 0 34 | if slide_content.title: 35 | encoding += CONTENT_TYPE_WEIGHTS['Title'] 36 | if slide_content.bullet_points: 37 | encoding += CONTENT_TYPE_WEIGHTS['Content'] 38 | if slide_content.image_path: 39 | encoding += CONTENT_TYPE_WEIGHTS['Picture'] 40 | 41 | return encoding 42 | 43 | 44 | # 通用的布局策略类,使用参数化的方式实现不同布局策略的功能。 45 | class LayoutStrategy: 46 | """ 47 | 通用布局策略类,通过参数化方式来选择适合的布局组。 48 | `get_layout` 方法根据 SlideContent 内容和布局映射来返回合适的布局ID和名称。 49 | """ 50 | def __init__(self, layout_group: List[Tuple[int, str]]): 51 | self.layout_group = layout_group # 布局组成员,存储可选布局 52 | 53 | def get_layout(self, slide_content: SlideContent) -> Tuple[int, str]: 54 | """ 55 | 根据 SlideContent 内容随机选择一个合适的布局。 56 | """ 57 | return random.choice(self.layout_group) # 随机选择布局 58 | 59 | # 布局管理器类,负责根据 SlideContent 自动选择合适的布局策略。 60 | class LayoutManager: 61 | """ 62 | 布局管理器根据 SlideContent 的内容(如标题、要点和图片)自动选择合适的布局策略,并随机选择一个布局。 63 | """ 64 | def __init__(self, layout_mapping: dict): 65 | self.layout_mapping = layout_mapping # 布局映射配置 66 | 67 | # 初始化布局策略,提前为所有布局创建策略并存储在字典中 68 | self.strategies = { 69 | 1: self._create_strategy(1), # 仅 Title 70 | 3: self._create_strategy(3), # Title + Content 71 | 5: self._create_strategy(5), # Title + Picture 72 | 7: self._create_strategy(7) # Title + Content + Picture 73 | } 74 | 75 | # 打印调试信息 76 | LOG.debug(f"LayoutManager 初始化完成:\n {self}") 77 | 78 | def __str__(self): 79 | """ 80 | 打印 LayoutManager 的调试信息,包括所有布局策略及其对应的布局组。 81 | """ 82 | output = [] 83 | output.append("LayoutManager 状态:") 84 | for encoding, strategy in self.strategies.items(): 85 | layout_group = strategy.layout_group 86 | output.append(f" 编码 {encoding}: {len(layout_group)} 个布局") 87 | for layout_id, layout_name in layout_group: 88 | output.append(f" - Layout ID: {layout_id}, Layout Name: {layout_name}") 89 | return "\n".join(output) 90 | 91 | def assign_layout(self, slide_content: SlideContent) -> Tuple[int, str]: 92 | """ 93 | 根据 SlideContent 的成员情况计算编码,并选择对应的布局策略。 94 | """ 95 | # 计算 SlideContent 的编码 96 | encoding = calculate_content_encoding(slide_content) 97 | 98 | # 根据编码获取对应的布局策略 99 | strategy = self.strategies.get(encoding) 100 | if not strategy: 101 | raise ValueError(f"没有找到对应的布局策略,编码: {encoding}") 102 | 103 | # 使用对应的策略获取合适的布局 104 | return strategy.get_layout(slide_content) 105 | 106 | def _create_strategy(self, layout_type: int) -> LayoutStrategy: 107 | """ 108 | 根据布局类型创建通用的布局策略,并生成布局组,记录布局组的 debug 信息。 109 | """ 110 | layout_group = [ 111 | (layout_id, layout_name) for layout_name, layout_id in self.layout_mapping.items() 112 | if calculate_layout_encoding(layout_name) == layout_type 113 | ] 114 | 115 | # Debug 级别日志输出,查看各个布局组的详细情况 116 | # LOG.debug(f"创建 {layout_type} 编码对应的布局组,共 {len(layout_group)} 个布局: {layout_group}") 117 | 118 | return LayoutStrategy(layout_group) -------------------------------------------------------------------------------- /src/logger.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | import sys 3 | import logging 4 | 5 | # 定义统一的日志格式字符串 6 | log_format = "{time:YYYY-MM-DD HH:mm:ss} | {level} | {module}:{function}:{line} - {message}" 7 | 8 | # 配置 Loguru,移除默认的日志配置 9 | logger.remove() 10 | 11 | # 使用统一的日志格式配置标准输出和标准错误输出,支持彩色显示 12 | logger.add(sys.stdout, level="DEBUG", format=log_format, colorize=True) 13 | logger.add(sys.stderr, level="ERROR", format=log_format, colorize=True) 14 | 15 | # 同样使用统一的格式配置日志文件输出,设置文件大小为1MB自动轮换 16 | logger.add("logs/app.log", rotation="1 MB", level="DEBUG", format=log_format) 17 | 18 | # 为 logger 设置别名,方便在其他模块中导入和使用 19 | LOG = logger 20 | 21 | # 将 LOG 变量公开,允许其他模块通过 from logger import LOG 来使用它 22 | __all__ = ["LOG"] -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from input_parser import parse_input_text 4 | from ppt_generator import generate_presentation 5 | from template_manager import load_template, print_layouts, get_layout_mapping 6 | from layout_manager import LayoutManager 7 | from config import Config 8 | from logger import LOG # 引入 LOG 模块 9 | from content_formatter import ContentFormatter 10 | from content_assistant import ContentAssistant 11 | 12 | # 新增导入 docx_parser 模块中的函数 13 | from docx_parser import generate_markdown_from_docx 14 | 15 | # 定义主函数,处理输入并生成 PowerPoint 演示文稿 16 | def main(input_file): 17 | config = Config() # 加载配置文件 18 | content_formatter = ContentFormatter() 19 | content_assistant = ContentAssistant() 20 | 21 | # 检查输入文件是否存在 22 | if not os.path.exists(input_file): 23 | LOG.error(f"{input_file} 不存在。") # 如果文件不存在,记录错误日志 24 | return 25 | 26 | # 根据输入文件的扩展名判断文件类型 27 | file_extension = os.path.splitext(input_file)[1].lower() 28 | 29 | if file_extension in ['.md', '.markdown']: 30 | # 处理 markdown 文件 31 | with open(input_file, 'r', encoding='utf-8') as file: 32 | input_text = file.read() 33 | elif file_extension == '.docx': 34 | # 处理 docx 文件 35 | LOG.info(f"正在解析 docx 文件: {input_file}") 36 | # 调用 generate_markdown_from_docx 函数,获取 markdown 内容 37 | raw_content = generate_markdown_from_docx(input_file) 38 | markdown_content = content_formatter.format(raw_content) 39 | input_text = content_assistant.adjust_single_picture(markdown_content) 40 | else: 41 | # 不支持的文件类型 42 | LOG.error(f"暂不支持的文件格式: {file_extension}") 43 | return 44 | 45 | # 加载 PowerPoint 模板,并打印模板中的可用布局 46 | ppt_template = load_template(config.ppt_template) # 加载模板文件 47 | LOG.info("可用的幻灯片布局:") # 记录信息日志,打印可用布局 48 | print_layouts(ppt_template) # 打印模板中的布局 49 | 50 | # 初始化 LayoutManager,使用配置文件中的 layout_mapping 51 | layout_manager = LayoutManager(get_layout_mapping(ppt_template)) 52 | 53 | # 调用 parse_input_text 函数,解析输入文本,生成 PowerPoint 数据结构 54 | powerpoint_data, presentation_title = parse_input_text(input_text, layout_manager) 55 | 56 | LOG.info(f"解析转换后的 ChatPPT PowerPoint 数据结构:\n{powerpoint_data}") # 记录信息日志,打印解析后的 PowerPoint 数据 57 | 58 | # 定义输出 PowerPoint 文件的路径 59 | output_pptx = f"outputs/{presentation_title}.pptx" 60 | 61 | # 调用 generate_presentation 函数生成 PowerPoint 演示文稿 62 | generate_presentation(powerpoint_data, config.ppt_template, output_pptx) 63 | 64 | # 程序入口 65 | if __name__ == "__main__": 66 | # 设置命令行参数解析器 67 | parser = argparse.ArgumentParser(description='从 markdown 或 docx 文件生成 PowerPoint 演示文稿。') 68 | parser.add_argument( 69 | 'input_file', # 输入文件参数 70 | nargs='?', # 可选参数 71 | default='inputs/markdown/test_input.md', # 默认值 72 | help='输入 markdown 或 docx 文件的路径(默认: inputs/markdown/test_input.md)' 73 | ) 74 | 75 | # 解析命令行参数 76 | args = parser.parse_args() 77 | 78 | # 使用解析后的输入文件参数运行主函数 79 | main(args.input_file) 80 | -------------------------------------------------------------------------------- /src/merge_requirements.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import importlib.metadata 4 | import re 5 | import os 6 | 7 | # 解析包的版本规范 8 | def parse_package_spec(spec): 9 | """ 10 | 解析包名和版本规范。 11 | 返回 (name, operator, version)。 12 | """ 13 | match = re.match(r'^([^=<>!~]+)\s*([=<>!~]+)\s*(.+)$', spec) 14 | if match: 15 | name, op, version = match.groups() 16 | return name.strip(), op.strip(), version.strip() 17 | else: 18 | return spec.strip(), None, None 19 | 20 | # 获取已安装包的版本 21 | def get_installed_versions(packages): 22 | installed_versions = {} 23 | for pkg in packages: 24 | name, _, _ = parse_package_spec(pkg) 25 | try: 26 | installed_version = importlib.metadata.version(name) 27 | installed_versions[name] = installed_version 28 | except importlib.metadata.PackageNotFoundError: 29 | print(f"包 {name} 未安装。") 30 | continue 31 | return installed_versions 32 | 33 | # 读取现有的 requirements.txt 34 | def read_requirements(file_path='requirements.txt'): 35 | req_versions = {} 36 | if os.path.exists(file_path): 37 | with open(file_path, 'r') as f: 38 | for line in f: 39 | line = line.strip() 40 | if not line or line.startswith('#'): 41 | continue 42 | name, op, version_spec = parse_package_spec(line) 43 | if op and version_spec: 44 | req_versions[name] = f"{op}{version_spec}" 45 | else: 46 | req_versions[name] = None # 未指定版本 47 | return req_versions 48 | 49 | # 合并包信息 50 | def merge_requirements(installed_versions, req_versions): 51 | merged_requirements = [] 52 | conflict_detected = False 53 | processed_packages = set() 54 | 55 | for name, installed_version in installed_versions.items(): 56 | if name in req_versions: 57 | req_version_spec = req_versions[name] 58 | if req_version_spec: 59 | # 检查版本是否匹配 60 | version_match = False 61 | # 支持多种版本操作符 62 | ops = ['==', '>=', '<=', '>', '<', '!=', '~='] 63 | for op in ops: 64 | if req_version_spec.startswith(op): 65 | req_op = op 66 | req_ver = req_version_spec[len(op):] 67 | break 68 | else: 69 | req_op = None 70 | req_ver = None 71 | 72 | if req_op == '==': 73 | if req_ver == installed_version: 74 | merged_requirements.append(f"{name}=={installed_version}") 75 | else: 76 | # 版本冲突 77 | merged_requirements.append(f"<<<<<<< HEAD") 78 | merged_requirements.append(f"{name}{req_version_spec}") 79 | merged_requirements.append(f"=======") 80 | merged_requirements.append(f"{name}=={installed_version}") 81 | merged_requirements.append(f">>>>>>> Merged version") 82 | conflict_detected = True 83 | else: 84 | # 如果 requirements.txt 中有版本规范但不是 '==' 85 | # 可以根据需要调整此逻辑 86 | # 这里假设只在 '==' 时进行严格匹配 87 | # 其他情况下认为没有冲突 88 | merged_requirements.append(f"{name}{req_version_spec}") 89 | else: 90 | # requirements.txt 未指定版本,直接覆盖为已安装版本 91 | merged_requirements.append(f"{name}=={installed_version}") 92 | processed_packages.add(name) 93 | else: 94 | # 包不在 requirements.txt 中,添加已安装版本 95 | merged_requirements.append(f"{name}=={installed_version}") 96 | processed_packages.add(name) 97 | 98 | # 添加 requirements.txt 中未处理的包 99 | for name, version_spec in req_versions.items(): 100 | if name not in processed_packages: 101 | if version_spec: 102 | merged_requirements.append(f"{name}{version_spec}") 103 | else: 104 | merged_requirements.append(f"{name}") 105 | 106 | return merged_requirements, conflict_detected 107 | 108 | def main(): 109 | # 读取现有的 requirements.txt 获取包列表 110 | req_versions = read_requirements('requirements.txt') 111 | packages = list(req_versions.keys()) 112 | 113 | # 获取已安装包的版本 114 | installed_versions = get_installed_versions(packages) 115 | 116 | # 合并包信息 117 | merged_requirements, conflict_detected = merge_requirements(installed_versions, req_versions) 118 | 119 | # 将合并结果写回 requirements.txt 120 | with open('requirements.txt', 'w') as f: 121 | for line in merged_requirements: 122 | f.write(line + '\n') 123 | 124 | if conflict_detected: 125 | print("requirements.txt 已更新,存在版本冲突。请手动解决冲突标记。") 126 | else: 127 | print("requirements.txt 已更新,无版本冲突。") 128 | 129 | if __name__ == "__main__": 130 | main() -------------------------------------------------------------------------------- /src/minicpm_v_model.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from transformers import AutoModel, AutoTokenizer 3 | from logger import LOG # 引入日志模块,用于记录日志 4 | 5 | # 加载模型和分词器 6 | # 这里我们使用 `AutoModel` 和 `AutoTokenizer` 加载模型 'openbmb/MiniCPM-V-2_6-int4' 7 | # 参数 `trust_remote_code=True` 表示信任远程代码(根据模型文档设置) 8 | model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True) 9 | tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True) 10 | model.eval() # 设置模型为评估模式,以确保不进行训练中的随机性操作 11 | 12 | def chat_with_image(image_file, question='描述下这幅图', sampling=False, temperature=0.7, stream=False): 13 | """ 14 | 使用模型的聊天功能生成对图像的回答。 15 | 16 | 参数: 17 | image_file: 图像文件,用于处理的图像。 18 | question: 提问的问题,默认为 '描述下这幅图'。 19 | sampling: 是否使用采样进行生成,默认为 False。 20 | temperature: 采样温度,用于控制生成文本的多样性,值越高生成越多样。 21 | stream: 是否流式返回响应,默认为 False。 22 | 23 | 返回: 24 | 生成的回答文本字符串。 25 | """ 26 | # 打开并转换图像为 RGB 模式 27 | image = Image.open(image_file).convert('RGB') 28 | 29 | # 创建消息列表,模拟用户和 AI 的对话 30 | msgs = [{'role': 'user', 'content': [image, question]}] 31 | 32 | # 如果不启用流式输出,直接返回生成的完整响应 33 | if not stream: 34 | return model.chat(image=None, msgs=msgs, tokenizer=tokenizer, temperature=temperature) 35 | else: 36 | # 启用流式输出,则逐字生成并打印响应 37 | generated_text = "" 38 | for new_text in model.chat(image=None, msgs=msgs, tokenizer=tokenizer, sampling=sampling, temperature=temperature, stream=True): 39 | generated_text += new_text 40 | print(new_text, flush=True, end='') # 实时输出每部分生成的文本 41 | return generated_text # 返回完整的生成文本 42 | 43 | # 主程序入口 44 | if __name__ == "__main__": 45 | import sys # 引入 sys 模块以获取命令行参数 46 | if len(sys.argv) != 2: 47 | print("Usage: python src/minicpm_v_model.py ") # 提示正确的用法 48 | sys.exit(1) # 退出并返回状态码 1,表示错误 49 | 50 | image_file = sys.argv[1] # 获取命令行传入的图像文件路径 51 | question = 'What is in the image?' # 定义默认问题 52 | response = chat_with_image(image_file, question, sampling=True, temperature=0.7, stream=True) # 调用生成响应函数 53 | print("\nFinal Response:", response) # 输出最终响应 54 | -------------------------------------------------------------------------------- /src/openai_whisper.py: -------------------------------------------------------------------------------- 1 | from transformers import pipeline 2 | import gradio as gr 3 | import torch 4 | import tempfile 5 | import os 6 | import subprocess 7 | 8 | from logger import LOG 9 | 10 | # 模型名称和参数配置 11 | MODEL_NAME = "openai/whisper-large-v3" # Whisper 模型名称 12 | BATCH_SIZE = 8 # 处理批次大小 13 | 14 | # 检查是否可以使用 GPU,否则使用 CPU 15 | device = "cuda:0" if torch.cuda.is_available() else "cpu" 16 | 17 | # 初始化语音识别管道 18 | pipe = pipeline( 19 | task="automatic-speech-recognition", # 自动语音识别任务 20 | model=MODEL_NAME, # 指定模型 21 | chunk_length_s=60, # 每个音频片段的长度(秒) 22 | device=device, # 指定设备 23 | ) 24 | 25 | def convert_to_wav(input_path): 26 | """ 27 | 将音频文件转换为 WAV 格式并返回新文件路径。 28 | 29 | 参数: 30 | - input_path: 输入的音频文件路径 31 | 32 | 返回: 33 | - output_path: 转换后的 WAV 文件路径 34 | """ 35 | # 创建临时 WAV 文件,用于存储转换结果 36 | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file: 37 | output_path = temp_wav_file.name 38 | 39 | try: 40 | # 使用 ffmpeg 将音频文件转换为指定格式 41 | subprocess.run( 42 | ["ffmpeg", "-y", "-i", input_path, "-ar", "16000", "-ac", "1", output_path], 43 | check=True, 44 | stdout=subprocess.PIPE, 45 | stderr=subprocess.PIPE 46 | ) 47 | return output_path 48 | except subprocess.CalledProcessError as e: 49 | LOG.error(f"音频文件转换失败: {e}") 50 | # 如果转换失败,删除临时文件并抛出错误 51 | if os.path.exists(output_path): 52 | os.remove(output_path) 53 | raise gr.Error("音频文件转换失败。请上传有效的音频文件。") 54 | except FileNotFoundError: 55 | LOG.error("未找到 ffmpeg 可执行文件。请确保已安装 ffmpeg。") 56 | if os.path.exists(output_path): 57 | os.remove(output_path) 58 | raise gr.Error("服务器配置错误,缺少 ffmpeg。请联系管理员。") 59 | 60 | def asr(audio_file, task="transcribe"): 61 | """ 62 | 对音频文件进行语音识别或翻译。 63 | 64 | 参数: 65 | - audio_file: 输入的音频文件路径 66 | - task: 任务类型("transcribe" 表示转录,"translate" 表示翻译) 67 | 68 | 返回: 69 | - text: 识别或翻译后的文本内容 70 | """ 71 | # 转换音频文件为 WAV 格式 72 | wav_file = convert_to_wav(audio_file) 73 | 74 | try: 75 | # 使用管道进行转录或翻译 76 | result = pipe( 77 | wav_file, 78 | batch_size=BATCH_SIZE, 79 | generate_kwargs={"task": task}, 80 | return_timestamps=True 81 | ) 82 | text = result["text"] 83 | LOG.info(f"[识别结果]:{text}") 84 | 85 | return text 86 | except Exception as e: 87 | LOG.error(f"处理音频文件时出错: {e}") 88 | raise gr.Error(f"处理音频文件时出错:{str(e)}") 89 | finally: 90 | # 删除临时转换后的 WAV 文件 91 | if os.path.exists(wav_file): 92 | os.remove(wav_file) 93 | 94 | def transcribe(inputs, task): 95 | """ 96 | 将音频文件转录或翻译为文本。 97 | 98 | 参数: 99 | - inputs: 上传的音频文件路径 100 | - task: 任务类型("transcribe" 表示转录,"translate" 表示翻译) 101 | 102 | 返回: 103 | - 识别的文本内容 104 | """ 105 | LOG.info(f"[上传的音频文件]: {inputs}") 106 | 107 | # 检查是否提供了音频文件 108 | if not inputs or not os.path.exists(inputs): 109 | raise gr.Error("未提交音频文件!请在提交请求前上传或录制音频文件。") 110 | 111 | # 检查音频文件格式 112 | file_ext = os.path.splitext(inputs)[1].lower() 113 | if file_ext not in ['.wav', '.flac', '.mp3']: 114 | LOG.error(f"文件格式错误:{inputs}") 115 | raise gr.Error("不支持的文件格式!请上传 WAV、FLAC 或 MP3 文件。") 116 | 117 | # 调用语音识别或翻译函数 118 | return asr(inputs, task) 119 | 120 | # 定义麦克风输入的接口实例,可供外部模块调用 121 | mf_transcribe = gr.Interface( 122 | fn=transcribe, # 执行转录的函数 123 | inputs=[ 124 | gr.Audio(sources="microphone", type="filepath", label="麦克风输入"), # 使用麦克风录制的音频输入 125 | gr.Radio(["transcribe", "translate"], label="任务类型", value="transcribe"), # 任务选择(转录或翻译) 126 | ], 127 | outputs="text", # 输出为文本 128 | title="Whisper Large V3: 语音识别", # 接口标题 129 | description="使用麦克风录制音频并进行语音识别或翻译。", # 接口描述 130 | flagging_mode="never", # 禁用标记功能 131 | ) 132 | 133 | # 定义文件上传的接口实例,用于处理上传的音频文件 134 | file_transcribe = gr.Interface( 135 | fn=transcribe, # 执行转录的函数 136 | inputs=[ 137 | gr.Audio(sources="upload", type="filepath", label="上传音频文件"), # 上传的音频文件输入 138 | gr.Radio(["transcribe", "translate"], label="任务类型", value="transcribe"), # 任务选择(转录或翻译) 139 | ], 140 | outputs="text", # 输出为文本 141 | title="Whisper Large V3: 转录音频文件", # 接口标题 142 | description="上传音频文件(WAV、FLAC 或 MP3)并进行语音识别或翻译。", # 接口描述 143 | flagging_mode="never", # 禁用标记功能 144 | ) 145 | 146 | # 仅当此脚本作为主程序运行时,执行 Gradio 应用的启动代码 147 | if __name__ == "__main__": 148 | # 创建一个 Gradio Blocks 实例,用于包含多个接口 149 | with gr.Blocks() as demo: 150 | # 使用 TabbedInterface 将 mf_transcribe 和 file_transcribe 接口分别放置在 "麦克风" 和 "音频文件" 选项卡中 151 | gr.TabbedInterface( 152 | [mf_transcribe, file_transcribe], 153 | ["麦克风", "音频文件"] 154 | ) 155 | 156 | # 启动Gradio应用,允许队列功能,并通过 HTTPS 访问 157 | demo.queue().launch( 158 | share=False, 159 | server_name="0.0.0.0", 160 | # auth=("django", "1234") # ⚠️注意:记住修改密码 161 | ) 162 | -------------------------------------------------------------------------------- /src/ppt_generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pptx import Presentation 3 | from pptx.util import Inches 4 | from PIL import Image 5 | from utils import remove_all_slides 6 | from logger import LOG # 引入日志模块 7 | 8 | def format_text(paragraph, text): 9 | """ 10 | 格式化文本,处理加粗内容。假设 ** 包围的文本表示需要加粗。 11 | """ 12 | while '**' in text: 13 | start = text.find('**') 14 | end = text.find('**', start + 2) 15 | 16 | if start != -1 and end != -1: 17 | # 添加加粗之前的普通文本 18 | if start > 0: 19 | run = paragraph.add_run() 20 | run.text = text[:start] 21 | 22 | # 添加加粗文本 23 | bold_run = paragraph.add_run() 24 | bold_run.text = text[start + 2:end] 25 | bold_run.font.bold = True # 设置加粗 26 | 27 | # 处理剩余文本 28 | text = text[end + 2:] 29 | else: 30 | break 31 | 32 | # 添加剩余的普通文本 33 | if text: 34 | run = paragraph.add_run() 35 | run.text = text 36 | 37 | def insert_image_centered_in_placeholder(new_slide, image_path): 38 | """ 39 | 将图片插入到 Slide 中,使其中心与 placeholder 的中心对齐。 40 | 如果图片尺寸超过 placeholder,则进行缩小适配。 41 | 在插入成功后删除 placeholder。 42 | """ 43 | # 构建图片的绝对路径 44 | image_full_path = os.path.join(os.getcwd(), image_path) 45 | 46 | # 检查图片是否存在 47 | if not os.path.exists(image_full_path): 48 | LOG.warning(f"图片路径 '{image_full_path}' 不存在,跳过此图片。") 49 | return 50 | 51 | # 打开图片并获取其大小(以像素为单位) 52 | with Image.open(image_full_path) as img: 53 | img_width_px, img_height_px = img.size 54 | 55 | # 遍历找到图片的 placeholder(type 18 表示图片 placeholder) 56 | for shape in new_slide.placeholders: 57 | if shape.placeholder_format.type == 18: 58 | placeholder_width = shape.width 59 | placeholder_height = shape.height 60 | placeholder_left = shape.left 61 | placeholder_top = shape.top 62 | 63 | # 计算 placeholder 的中心点 64 | placeholder_center_x = placeholder_left + placeholder_width / 2 65 | placeholder_center_y = placeholder_top + placeholder_height / 2 66 | 67 | # 图片的宽度和高度转换为 PowerPoint 的单位 (Inches) 68 | img_width = Inches(img_width_px / 96) # 假设图片 DPI 为 96 69 | img_height = Inches(img_height_px / 96) 70 | 71 | # 如果图片的宽度或高度超过 placeholder,按比例缩放图片 72 | if img_width > placeholder_width or img_height > placeholder_height: 73 | scale = min(placeholder_width / img_width, placeholder_height / img_height) 74 | img_width *= scale 75 | img_height *= scale 76 | 77 | # 计算图片左上角位置,使其中心对准 placeholder 中心 78 | left = placeholder_center_x - img_width / 2 79 | top = placeholder_center_y - img_height / 2 80 | 81 | # 插入图片到指定位置并设定缩放后的大小 82 | new_slide.shapes.add_picture(image_full_path, left, top, width=img_width, height=img_height) 83 | LOG.debug(f"图片已插入,并以 placeholder 中心对齐,路径: {image_full_path}") 84 | 85 | # 移除占位符 86 | sp = shape._element # 获取占位符的 XML 元素 87 | sp.getparent().remove(sp) # 从父元素中删除 88 | LOG.debug("已删除图片的 placeholder") 89 | break 90 | 91 | # 生成 PowerPoint 演示文稿 92 | def generate_presentation(powerpoint_data, template_path: str, output_path: str): 93 | # 检查模板文件是否存在 94 | if not os.path.exists(template_path): 95 | LOG.error(f"模板文件 '{template_path}' 不存在。") # 记录错误日志 96 | raise FileNotFoundError(f"模板文件 '{template_path}' 不存在。") 97 | 98 | prs = Presentation(template_path) # 加载 PowerPoint 模板 99 | remove_all_slides(prs) # 清除模板中的所有幻灯片 100 | prs.core_properties.title = powerpoint_data.title # 设置 PowerPoint 的核心标题 101 | 102 | # 遍历所有幻灯片数据,生成对应的 PowerPoint 幻灯片 103 | for slide in powerpoint_data.slides: 104 | # 确保布局索引不超出范围,超出则使用默认布局 105 | if slide.layout_id >= len(prs.slide_layouts): 106 | slide_layout = prs.slide_layouts[0] 107 | else: 108 | slide_layout = prs.slide_layouts[slide.layout_id] 109 | 110 | new_slide = prs.slides.add_slide(slide_layout) # 添加新的幻灯片 111 | 112 | # 设置幻灯片标题 113 | if new_slide.shapes.title: 114 | new_slide.shapes.title.text = slide.content.title 115 | LOG.debug(f"设置幻灯片标题: {slide.content.title}") 116 | 117 | # 添加文本内容 118 | for shape in new_slide.shapes: 119 | # 只处理非标题的文本框 120 | if shape.has_text_frame and not shape == new_slide.shapes.title: 121 | text_frame = shape.text_frame 122 | text_frame.clear() # 清除原有内容 123 | 124 | # 直接使用第一个段落,不添加新的段落,避免额外空行 125 | first_paragraph = text_frame.paragraphs[0] 126 | 127 | # 将要点内容作为项目符号列表添加到文本框中 128 | for point in slide.content.bullet_points: 129 | # 第一个要点覆盖初始段落,其他要点添加新段落 130 | paragraph = first_paragraph if point == slide.content.bullet_points[0] else text_frame.add_paragraph() 131 | paragraph.level = point["level"] # 设置项目符号的级别 132 | format_text(paragraph, point["text"]) # 调用 format_text 方法来处理加粗文本 133 | LOG.debug(f"添加列表项: {paragraph.text},级别: {paragraph.level}") 134 | 135 | break 136 | 137 | # 插入图片 138 | if slide.content.image_path: 139 | insert_image_centered_in_placeholder(new_slide, slide.content.image_path) 140 | 141 | # 保存生成的 PowerPoint 文件 142 | prs.save(output_path) 143 | LOG.info(f"演示文稿已保存到 '{output_path}'") 144 | -------------------------------------------------------------------------------- /src/slide_builder.py: -------------------------------------------------------------------------------- 1 | from data_structures import SlideContent, Slide 2 | from layout_manager import LayoutManager 3 | 4 | # SlideBuilder 类用于构建单张幻灯片并通过 LayoutManager 自动分配布局 5 | class SlideBuilder: 6 | def __init__(self, layout_manager: LayoutManager): 7 | self.layout_manager = layout_manager # 布局管理器实例 8 | self.title = "" # 幻灯片标题 9 | self.bullet_points = [] # 幻灯片要点列表,支持多级结构 10 | self.image_path = None # 幻灯片图片路径 11 | self.layout_id = None # 布局ID 12 | self.layout_name = None # 布局名称 13 | 14 | def set_title(self, title: str): 15 | self.title = title # 设置幻灯片的标题 16 | 17 | def add_bullet_point(self, bullet: str, level: int = 0): 18 | """ 19 | 添加项目符号及其级别到 bullet_points 列表中。 20 | :param bullet: 要点文本 21 | :param level: 项目符号的层级,默认为 0(一级) 22 | """ 23 | self.bullet_points.append({'text': bullet, 'level': level}) # 添加要点和层级 24 | 25 | def set_image(self, image_path: str): 26 | self.image_path = image_path # 设置图片路径 27 | 28 | def finalize(self) -> Slide: 29 | """ 30 | 组装并返回最终的 Slide 对象,调用 LayoutManager 自动分配布局。 31 | """ 32 | # 创建 SlideContent 对象,注意 bullet_points 现在是字典列表,包含 text 和 level 信息 33 | content = SlideContent( 34 | title=self.title, 35 | bullet_points=self.bullet_points, 36 | image_path=self.image_path 37 | ) 38 | 39 | # 调用 LayoutManager 分配布局 40 | self.layout_id, self.layout_name = self.layout_manager.assign_layout(content) 41 | 42 | # 返回最终的 Slide 对象 43 | return Slide(layout_id=self.layout_id, layout_name=self.layout_name, content=content) 44 | -------------------------------------------------------------------------------- /src/template_manager.py: -------------------------------------------------------------------------------- 1 | from pptx import Presentation 2 | 3 | # 加载 PowerPoint 模板 4 | def load_template(template_path: str) -> Presentation: 5 | prs = Presentation(template_path) 6 | return prs 7 | 8 | # 获取布局映射,返回模板中的布局名称与其索引的字典 9 | def get_layout_mapping(prs: Presentation) -> dict: 10 | layout_mapping = {} 11 | for idx, layout in enumerate(prs.slide_layouts): 12 | layout_mapping[layout.name] = idx 13 | return layout_mapping 14 | 15 | # 打印模板中的所有布局名称及其索引 16 | def print_layouts(prs: Presentation): 17 | for idx, layout in enumerate(prs.slide_layouts): 18 | print(f"Layout {idx}: {layout.name}") 19 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | from pptx import Presentation 2 | from logger import LOG 3 | 4 | # 删除 PowerPoint 模板中的所有幻灯片 5 | def remove_all_slides(prs: Presentation): 6 | xml_slides = prs.slides._sldIdLst # 获取幻灯片列表 7 | slides = list(xml_slides) # 转换为列表 8 | for slide in slides: 9 | xml_slides.remove(slide) # 从幻灯片列表中移除每一张幻灯片 10 | LOG.debug("模板中的幻灯片已被移除。") 11 | -------------------------------------------------------------------------------- /templates/MasterTemplate.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/templates/MasterTemplate.pptx -------------------------------------------------------------------------------- /templates/SimpleTemplate.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoPeng/ChatPPT/6e0b7ded163b17277705d0f79270d14e99ce9756/templates/SimpleTemplate.pptx -------------------------------------------------------------------------------- /tests/test_data_structures.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | # 添加 src 目录到模块搜索路径,以便可以导入 src 目录中的模块 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) 7 | 8 | from data_structures import PowerPoint, Slide, SlideContent 9 | 10 | class TestDataStructures(unittest.TestCase): 11 | """ 12 | 测试 PowerPoint、Slide、SlideContent 数据类,验证数据结构的正确性。 13 | """ 14 | 15 | def test_slide_content(self): 16 | slide_content = SlideContent(title="Test Slide", bullet_points=[{'text': "Bullet 1", 'level': 0}], image_path="images/test.png") 17 | self.assertEqual(slide_content.title, "Test Slide") 18 | self.assertEqual(slide_content.bullet_points, [{'text': "Bullet 1", 'level': 0}]) 19 | self.assertEqual(slide_content.image_path, "images/test.png") 20 | 21 | def test_slide(self): 22 | slide_content = SlideContent(title="Slide with Layout") 23 | slide = Slide(layout_id=2, layout_name="Title, Content 0", content=slide_content) 24 | self.assertEqual(slide.layout_id, 2) 25 | self.assertEqual(slide.layout_name, "Title, Content 0") 26 | self.assertEqual(slide.content.title, "Slide with Layout") 27 | 28 | def test_powerpoint(self): 29 | slide_content1 = SlideContent(title="Slide 1") 30 | slide_content2 = SlideContent(title="Slide 2") 31 | slide1 = Slide(layout_id=1, layout_name="Title 1", content=slide_content1) 32 | slide2 = Slide(layout_id=2, layout_name="Title, Content 0", content=slide_content2) 33 | ppt = PowerPoint(title="Test Presentation", slides=[slide1, slide2]) 34 | 35 | self.assertEqual(ppt.title, "Test Presentation") 36 | self.assertEqual(len(ppt.slides), 2) 37 | self.assertEqual(ppt.slides[0].content.title, "Slide 1") 38 | self.assertEqual(ppt.slides[1].content.title, "Slide 2") 39 | 40 | if __name__ == "__main__": 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/test_doc_parser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | # 添加 src 目录到模块搜索路径,以便可以导入 src 目录中的模块 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) 7 | 8 | from docx_parser import generate_markdown_from_docx 9 | 10 | class TestGenerateMarkdownFromDocx(unittest.TestCase): 11 | """ 12 | 测试从 docx 文件生成 Markdown 格式内容的功能。 13 | """ 14 | 15 | def setUp(self): 16 | """ 17 | 在每个测试方法执行前运行。用于准备测试所需的文件和目录。 18 | """ 19 | # 定义测试 docx 文件的路径 20 | self.test_docx_filename = 'inputs/docx/multimodal_llm_overview.docx' 21 | 22 | # 生成 Markdown 内容 23 | self.generated_markdown = generate_markdown_from_docx(self.test_docx_filename) 24 | 25 | def test_generated_markdown_content(self): 26 | """ 27 | 测试生成的 Markdown 内容是否符合预期。 28 | """ 29 | # 期望的 Markdown 输出内容 30 | expected_markdown = """ 31 | # 多模态大模型概述 32 | 33 | 多模态大模型是指能够处理多种数据模态(如文本、图像、音频等)的人工智能模型。它们在自然语言处理、计算机视觉等领域有广泛的应用。 34 | 35 | ## 1. 多模态大模型的特点 36 | 37 | - 支持多种数据类型: 38 | - 跨模态学习能力: 39 | - 广泛的应用场景: 40 | ### 1.1 支持多种数据类型 41 | 42 | 多模态大模型能够同时处理文本、图像、音频等多种类型的数据,实现数据的融合。 43 | 44 | ## 2. 多模态模型架构 45 | 46 | 以下是多模态模型的典型架构示意图: 47 | 48 | ![图片1](images/multimodal_llm_overview/1.png) 49 | 50 | TransFormer 架构图: 51 | 52 | ![图片2](images/multimodal_llm_overview/2.png) 53 | 54 | ### 2.1 模态融合技术 55 | 56 | 通过模态融合,可以提升模型对复杂数据的理解能力。 57 | 58 | 关键技术:注意力机制、Transformer架构等。 59 | 60 | - 应用领域: 61 | - 自然语言处理: 62 | - 机器翻译、文本生成等。 63 | - 计算机视觉: 64 | - 图像识别、目标检测等。 65 | ## 3. 未来展望 66 | 67 | 多模态大模型将在人工智能领域持续发挥重要作用,推动技术创新。 68 | """ 69 | 70 | # 比较生成的 Markdown 内容与预期内容 71 | self.assertEqual(self.generated_markdown.strip(), expected_markdown.strip(), "生成的 Markdown 内容与预期不匹配") 72 | 73 | def tearDown(self): 74 | """ 75 | 在每个测试方法执行后运行。用于清理测试产生的文件和目录。 76 | """ 77 | # 获取图像目录路径 78 | images_dir = 'images/multimodal_llm_overview' 79 | # 删除生成的图像文件和目录 80 | if os.path.exists(images_dir): 81 | for filename in os.listdir(images_dir): 82 | file_path = os.path.join(images_dir, filename) 83 | if os.path.isfile(file_path): 84 | os.unlink(file_path) # 删除文件 85 | os.rmdir(images_dir) # 删除目录 86 | 87 | if __name__ == '__main__': 88 | unittest.main() 89 | -------------------------------------------------------------------------------- /tests/test_input_parser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | # 添加 src 目录到模块搜索路径,以便可以导入 src 目录中的模块 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) 7 | 8 | from layout_manager import LayoutManager 9 | from data_structures import PowerPoint 10 | from input_parser import parse_input_text 11 | 12 | class TestInputParser(unittest.TestCase): 13 | """ 14 | 测试 input_parser 模块,检查解析输入文本生成 PowerPoint 数据结构的功能。 15 | """ 16 | 17 | def setUp(self): 18 | """ 19 | 初始化测试设置,读取输入文件并创建 LayoutManager 实例。 20 | """ 21 | # 模拟布局映射字典 22 | self.layout_mapping = { 23 | "Title 1": 1, 24 | "Title, Content 0": 2, 25 | "Title, Content, Picture 2": 8, 26 | } 27 | self.layout_manager = LayoutManager(self.layout_mapping) 28 | 29 | # 读取测试输入文件 30 | input_file_path = 'inputs/markdown/test_input.md' 31 | with open(input_file_path, 'r', encoding='utf-8') as f: 32 | self.input_text = f.read() 33 | 34 | def test_parse_input_text(self): 35 | """ 36 | 测试 parse_input_text 函数生成的 PowerPoint 数据结构是否符合预期。 37 | """ 38 | # 解析输入文本 39 | presentation, presentation_title = parse_input_text(self.input_text, self.layout_manager) 40 | 41 | # 期望的 PowerPoint 数据结构 42 | expected_presentation_title = "ChatPPT Demo" 43 | expected_slides = [ 44 | { 45 | "title": "ChatPPT Demo", 46 | "layout_id": 1, 47 | "layout_name": "Title 1", 48 | "bullet_points": [], 49 | "image_path": None, 50 | }, 51 | { 52 | "title": "2024 业绩概述", 53 | "layout_id": 2, 54 | "layout_name": "Title, Content 0", 55 | "bullet_points": [ 56 | {"text": "总收入增长15%", "level": 0}, 57 | {"text": "市场份额扩大至30%", "level": 0}, 58 | ], 59 | "image_path": None, 60 | }, 61 | { 62 | "title": "业绩图表", 63 | "layout_id": 8, 64 | "layout_name": "Title, Content, Picture 2", 65 | "bullet_points": [ 66 | {"text": "OpenAI 利润不断增加", "level": 0}, 67 | ], 68 | "image_path": "images/performance_chart.png", 69 | }, 70 | { 71 | "title": "新产品发布", 72 | "layout_id": 8, 73 | "layout_name": "Title, Content, Picture 2", 74 | "bullet_points": [ 75 | {"text": "产品A: **特色功能介绍**", "level": 0}, 76 | {"text": "增长潜力巨大", "level": 1}, 77 | {"text": "新兴市场", "level": 1}, 78 | {"text": "**非洲**市场", "level": 2}, 79 | {"text": "**东南亚**市场", "level": 2}, 80 | {"text": "产品B: 市场定位", "level": 0}, 81 | ], 82 | "image_path": "images/forecast.png", 83 | }, 84 | ] 85 | 86 | # 检查演示文稿标题是否匹配 87 | self.assertEqual(presentation_title, expected_presentation_title) 88 | 89 | # 检查幻灯片数量是否匹配 90 | self.assertEqual(len(presentation.slides), len(expected_slides)) 91 | 92 | # 检查每张幻灯片的内容是否符合预期 93 | for slide, expected in zip(presentation.slides, expected_slides): 94 | self.assertEqual(slide.content.title, expected["title"]) 95 | self.assertEqual(slide.layout_id, expected["layout_id"]) 96 | self.assertEqual(slide.layout_name, expected["layout_name"]) 97 | 98 | # 检查每个要点是否符合预期 99 | bullet_points = slide.content.bullet_points 100 | expected_bullet_points = expected["bullet_points"] 101 | self.assertEqual(len(bullet_points), len(expected_bullet_points)) 102 | for bullet, expected_bullet in zip(bullet_points, expected_bullet_points): 103 | self.assertEqual(bullet["text"], expected_bullet["text"]) 104 | self.assertEqual(bullet["level"], expected_bullet["level"]) 105 | 106 | # 检查图片路径是否符合预期 107 | self.assertEqual(slide.content.image_path, expected["image_path"]) 108 | 109 | if __name__ == '__main__': 110 | unittest.main() -------------------------------------------------------------------------------- /tests/test_layout_manager.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | # 添加 src 目录到模块搜索路径,以便可以导入 src 目录中的模块 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) 7 | 8 | from layout_manager import LayoutManager 9 | from data_structures import SlideContent 10 | 11 | class TestLayoutManager(unittest.TestCase): 12 | """ 13 | 测试 LayoutManager 类,验证布局分配逻辑是否正确。 14 | """ 15 | 16 | def setUp(self): 17 | # 模拟布局映射字典 18 | layout_mapping = { 19 | "Title 1": 1, 20 | "Title, Content 0": 2, 21 | "Title, Content, Picture 2": 8 22 | } 23 | self.layout_manager = LayoutManager(layout_mapping) 24 | 25 | def test_assign_layout_title_only(self): 26 | content = SlideContent(title="Only Title") 27 | layout_id, layout_name = self.layout_manager.assign_layout(content) 28 | self.assertEqual(layout_id, 1) 29 | self.assertEqual(layout_name, "Title 1") 30 | 31 | def test_assign_layout_title_and_content(self): 32 | content = SlideContent(title="Title with Content", bullet_points=[{'text': "Content Bullet", 'level': 0}]) 33 | layout_id, layout_name = self.layout_manager.assign_layout(content) 34 | self.assertEqual(layout_id, 2) 35 | self.assertEqual(layout_name, "Title, Content 0") 36 | 37 | def test_assign_layout_title_content_and_image(self): 38 | content = SlideContent(title="Full Slide", bullet_points=[{'text': "Full Content", 'level': 0}], image_path="images/test.png") 39 | layout_id, layout_name = self.layout_manager.assign_layout(content) 40 | self.assertEqual(layout_id, 8) 41 | self.assertEqual(layout_name, "Title, Content, Picture 2") 42 | 43 | if __name__ == "__main__": 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /tests/test_ppt_generator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | from pptx import Presentation 5 | 6 | # 添加 src 目录到模块搜索路径,以便可以导入 src 目录中的模块 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) 8 | 9 | from data_structures import PowerPoint, Slide, SlideContent 10 | from ppt_generator import generate_presentation 11 | 12 | class TestPPTGenerator(unittest.TestCase): 13 | """ 14 | 测试 ppt_generator 模块的 generate_presentation 函数,验证生成的 PowerPoint 文件内容是否符合预期。 15 | """ 16 | 17 | def setUp(self): 18 | """ 19 | 设置测试数据和输出路径。 20 | """ 21 | # 定义输入 PowerPoint 数据结构 22 | self.powerpoint_data = PowerPoint( 23 | title="ChatPPT Demo", 24 | slides=[ 25 | Slide( 26 | layout_id=1, 27 | layout_name="Title 1", 28 | content=SlideContent(title="ChatPPT Demo") 29 | ), 30 | Slide( 31 | layout_id=2, 32 | layout_name="Title, Content 0", 33 | content=SlideContent( 34 | title="2024 业绩概述", 35 | bullet_points=[ 36 | {"text": "总收入增长15%", "level": 0}, 37 | {"text": "市场份额扩大至30%", "level": 0} 38 | ] 39 | ) 40 | ), 41 | Slide( 42 | layout_id=8, 43 | layout_name="Title, Content, Picture 2", 44 | content=SlideContent( 45 | title="业绩图表", 46 | bullet_points=[{"text": "OpenAI 利润不断增加", "level": 0}], 47 | image_path="images/performance_chart.png" 48 | ) 49 | ), 50 | Slide( 51 | layout_id=8, 52 | layout_name="Title, Content, Picture 2", 53 | content=SlideContent( 54 | title="新产品发布", 55 | bullet_points=[ 56 | {"text": "产品A: **特色功能介绍**", "level": 0}, 57 | {"text": "增长潜力巨大", "level": 1}, 58 | {"text": "新兴市场", "level": 1}, 59 | {"text": "**非洲**市场", "level": 2}, 60 | {"text": "**东南亚**市场", "level": 2}, 61 | {"text": "产品B: 市场定位", "level": 0} 62 | ], 63 | image_path="images/forecast.png" 64 | ) 65 | ) 66 | ] 67 | ) 68 | 69 | self.template_path = "templates/SimpleTemplate.pptx" # 假设存在模板文件 70 | self.output_path = "outputs/test_presentation.pptx" # 定义输出文件路径 71 | 72 | def test_generate_presentation(self): 73 | """ 74 | 测试 generate_presentation 函数生成的 PowerPoint 文件是否符合预期。 75 | """ 76 | # 调用函数生成 PowerPoint 演示文稿 77 | generate_presentation(self.powerpoint_data, self.template_path, self.output_path) 78 | 79 | # 检查输出文件是否存在 80 | self.assertTrue(os.path.exists(self.output_path), "输出 PowerPoint 文件未找到。") 81 | 82 | # 打开生成的 PowerPoint 文件并验证内容 83 | prs = Presentation(self.output_path) 84 | 85 | # 检查演示文稿标题 86 | self.assertEqual(prs.core_properties.title, self.powerpoint_data.title) 87 | 88 | # 检查幻灯片数量 89 | self.assertEqual(len(prs.slides), len(self.powerpoint_data.slides)) 90 | 91 | # 验证每张幻灯片的内容 92 | for idx, slide_data in enumerate(self.powerpoint_data.slides): 93 | slide = prs.slides[idx] 94 | 95 | # 验证幻灯片标题 96 | self.assertEqual(slide.shapes.title.text, slide_data.content.title) 97 | 98 | # 验证项目符号列表内容 99 | bullet_points = [shape.text_frame.text for shape in slide.shapes if shape.has_text_frame and shape != slide.shapes.title] 100 | expected_bullets = [point["text"].replace("**", "") for point in slide_data.content.bullet_points] 101 | for bullet, expected in zip(bullet_points, expected_bullets): 102 | self.assertIn(expected, bullet) 103 | 104 | # 验证图片路径(如果存在) 105 | if slide_data.content.image_path: 106 | images = [shape for shape in slide.shapes if shape.shape_type == 13] # 13 为图片形状类型 107 | self.assertGreater(len(images), 0, f"幻灯片 {idx + 1} 应该包含图片,但未找到。") 108 | 109 | def tearDown(self): 110 | """ 111 | 清理生成的文件。 112 | """ 113 | if os.path.exists(self.output_path): 114 | os.remove(self.output_path) 115 | 116 | if __name__ == "__main__": 117 | unittest.main() 118 | -------------------------------------------------------------------------------- /tests/test_slide_builder.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | # 添加 src 目录到模块搜索路径,以便可以导入 src 目录中的模块 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) 7 | 8 | from layout_manager import LayoutManager 9 | from slide_builder import SlideBuilder 10 | from data_structures import SlideContent 11 | 12 | class TestSlideBuilder(unittest.TestCase): 13 | """ 14 | 测试 SlideBuilder 类,验证幻灯片生成过程是否符合预期。 15 | """ 16 | 17 | @classmethod 18 | def setUpClass(cls): 19 | # 模拟布局映射字典,只初始化一次 20 | layout_mapping = {"Title 1": 1, "Title, Content 0": 2, "Title, Content, Picture 2": 8} 21 | cls.layout_manager = LayoutManager(layout_mapping) 22 | 23 | def setUp(self): 24 | # 使用已创建的 layout_manager 实例 25 | self.builder = SlideBuilder(self.layout_manager) 26 | 27 | def test_set_title(self): 28 | self.builder.set_title("Test Title") 29 | self.assertEqual(self.builder.title, "Test Title") 30 | 31 | def test_add_bullet_point(self): 32 | self.builder.add_bullet_point("Test Bullet 1", level=0) 33 | self.builder.add_bullet_point("Test Bullet 2", level=1) 34 | self.assertEqual(self.builder.bullet_points, [{'text': "Test Bullet 1", 'level': 0}, {'text': "Test Bullet 2", 'level': 1}]) 35 | 36 | def test_set_image(self): 37 | self.builder.set_image("images/test.png") 38 | self.assertEqual(self.builder.image_path, "images/test.png") 39 | 40 | def test_finalize(self): 41 | self.builder.set_title("Final Slide") 42 | self.builder.add_bullet_point("Bullet 1", level=0) 43 | self.builder.set_image("images/final.png") 44 | slide = self.builder.finalize() 45 | 46 | self.assertEqual(slide.content.title, "Final Slide") 47 | self.assertEqual(slide.content.bullet_points, [{'text': "Bullet 1", 'level': 0}]) 48 | self.assertEqual(slide.content.image_path, "images/final.png") 49 | 50 | if __name__ == "__main__": 51 | unittest.main() -------------------------------------------------------------------------------- /validate_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 运行单元测试并将结果输出到 test_results.txt 4 | python -m unittest discover -s tests -p "test_*.py" > test_results.txt 5 | 6 | # 检查测试结果,如果有失败,输出失败信息并让脚本退出状态为 1 7 | if grep -q "FAILED" test_results.txt; then 8 | cat test_results.txt 9 | exit 1 10 | else 11 | echo "All tests passed!" 12 | exit 0 13 | fi --------------------------------------------------------------------------------