├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── README_CN.md ├── assets └── images │ ├── 8_documents.png │ ├── Doc_example.png │ ├── ExecutionResult.png │ └── RepoAgent.png ├── display ├── Makefile ├── README_DISPLAY.md ├── book_template │ └── book.json ├── book_tools │ ├── generate_repoagent_books.py │ └── generate_summary_from_book.py ├── books │ └── BOOKS.md └── scripts │ └── install_nodejs.sh ├── markdown_docs ├── display │ └── book_tools │ │ ├── generate_repoagent_books.md │ │ └── generate_summary_from_book.md ├── repo_agent │ ├── change_detector.md │ ├── chat_engine.md │ ├── doc_meta_info.md │ ├── file_handler.md │ ├── log.md │ ├── main.md │ ├── multi_task_dispatch.md │ ├── project_manager.md │ ├── runner.md │ ├── settings.md │ └── utils │ │ ├── gitignore_checker.md │ │ └── meta_info_utils.md └── tests │ ├── test_change_detector.md │ ├── test_json_handler.md │ └── test_structure_tree.md ├── pdm.lock ├── pyproject.toml ├── repo_agent ├── __init__.py ├── __main__.py ├── change_detector.py ├── chat_engine.py ├── chat_with_repo │ ├── __init__.py │ ├── __main__.py │ ├── gradio_interface.py │ ├── json_handler.py │ ├── main.py │ ├── prompt.py │ ├── rag.py │ ├── text_analysis_tool.py │ └── vector_store_manager.py ├── doc_meta_info.py ├── file_handler.py ├── log.py ├── main.py ├── multi_task_dispatch.py ├── project_manager.py ├── prompt.py ├── runner.py ├── settings.py └── utils │ ├── gitignore_checker.py │ └── meta_info_utils.py ├── requirements.txt └── tests ├── __init__.py ├── test_change_detector.py ├── test_json_handler.py └── test_structure_tree.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | pypi-publish: 9 | name: upload release to PyPI 10 | runs-on: ubuntu-latest 11 | permissions: 12 | # This permission is needed for private repositories. 13 | contents: read 14 | # IMPORTANT: this permission is mandatory for trusted publishing 15 | id-token: write 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: pdm-project/setup-pdm@v3 20 | 21 | - name: Publish package distributions to PyPI 22 | run: pdm publish 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # 忽略私人配置文件,包含有api_key等信息 2 | config private.toml 3 | private.toml 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | .DS_Store 8 | */.DS_Store 9 | *.py[cod] 10 | *$py.class 11 | chroma_db/ 12 | .vscode 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | .DS_Store 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # poetry 106 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 107 | # This is especially recommended for binary packages to ensure reproducibility, and is more 108 | # commonly ignored for libraries. 109 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 110 | #poetry.lock 111 | 112 | # pdm 113 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 114 | #pdm.lock 115 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 116 | # in version control. 117 | # https://pdm.fming.dev/#use-with-ide 118 | .pdm.toml 119 | .pdm-python 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env/ 133 | .venv/ 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | assets/tests 164 | 165 | prompt_output/ 166 | .idea/ 167 | 168 | # PyCharm 169 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 170 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 171 | # and can be added to the global gitignore or merged into this file. For a more nuclear 172 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 173 | #.idea/ 174 | 175 | # VS Code 176 | .vscode/ 177 | 178 | # RepoAgent 179 | log.txt 180 | .chroma_db 181 | config.yml 182 | config.toml 183 | .project_doc_record 184 | 185 | # Gradio 186 | .gradio -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 |

RepoAgent:一个用于代码库级别代码文档生成的LLM驱动框架

2 | 3 |

4 | PyPI - 下载量 5 | 6 | PyPI - 版本 7 | 8 | 9 | PyPI - Python版本 10 | 11 | GitHub授权许可 12 | GitHub仓库星标 13 | GitHub问题 14 | 15 | arXiv 16 | 17 |

18 | 19 |

20 | RepoAgent 21 |

22 | 23 |

24 | English README 25 | • 26 | 简体中文说明 27 |

28 | 29 | ## 👾 背景 30 | 31 | 在计算机编程领域,全面的项目文档的重要性,包括每个Python文件的详细解释,不言而喻。这样的文档是理解、维护和增强代码库的基石。它提供了代码的必要上下文和理由,使当前和未来的开发者更容易理解软件的目的、功能和结构。它不仅便于当前和未来的开发者理解项目的目的和结构,还确保了项目随时间的推移保持可访问和可修改,大大简化了新团队成员的学习曲线。 32 | 33 | 传统上,创建和维护软件文档需要大量的人力和专业知识,这对没有专门人员的小团队来说是一个挑战。像GPT这样的大型语言模型(LLMs)的引入改变了这一点,使得AI可以处理大部分文档化过程。这种转变允许人类开发者专注于验证和微调,极大地减少了文档化的手动负担。 34 | 35 | **🏆 我们的目标是创建一个智能文档助手,帮助人们阅读和理解仓库并生成文档,最终帮助人们提高效率和节省时间。** 36 | 37 | ## ✨ 特性 38 | 39 | - **🤖 自动检测Git仓库中的变化,跟踪文件的增加、删除和修改。** 40 | - **📝 通过AST独立分析代码结构,为各个对象生成文档。** 41 | - **🔍 准确识别对象间的双向调用关系,丰富文档内容的全局视角。** 42 | - **📚 根据变化无缝替换Markdown内容,保持文档一致性。** 43 | - **🕙 执行多线程并发操作,提高文档生成效率。** 44 | - **👭 为团队协作提供可持续的自动化文档更新方法。** 45 | - **😍 以惊人的方式展示代码文档(每个项目都有由Gitbook提供支持的文档书)。** 46 | 47 | ## 🚀 开始使用 48 | 49 | ### 安装方法 50 | 51 | #### 使用pip(普通用户首选) 52 | 53 | 直接使用pip安装`repoagent`包: 54 | 55 | ```bash 56 | pip install repoagent 57 | ``` 58 | 59 | #### 使用PDM进行开发环境设置 60 | 61 | 如果您想要贡献或者设置一个开发环境: 62 | 63 | - **安装PDM**:如果您还没有安装,请[安装PDM](https://pdm-project.org/latest/#installation)。 64 | - **使用CodeSpace或克隆仓库**: 65 | 66 | - **使用CodeSpace** 67 | 获取RepoAgent环境的最简单方式。点击下面链接使用GitHub Codespace,然后进行下一步。 68 | 69 | [![在GitHub Codespaces中打开](https://github.com/codespaces/badge.svg)](https://codespaces.new/LOGIC-10/RepoAgent?quickstart=1) 70 | 71 | - **克隆仓库** 72 | 73 | ```bash 74 | git clone https://github.com/LOGIC-10/RepoAgent.git 75 | cd RepoAgent 76 | ``` 77 | 78 | - **使用PDM设置** 79 | 80 | - 初始化Python虚拟环境。确保在`/RepoAgent`目录下运行下面的命令: 81 | 82 | ```bash 83 | pdm venv create --name repoagent 84 | ``` 85 | 86 | - [激活虚拟环境](https://pdm-project.org/latest/usage/venv/#activate-a-virtualenv) 87 | 88 | - 使用PDM安装依赖 89 | 90 | ```bash 91 | pdm install 92 | ``` 93 | 94 | ### 配置RepoAgent 95 | 96 | 在配置RepoAgent具体参数之前,请先确保已经在命令行配置 OpenAI API 作为环境变量: 97 | 98 | ```sh 99 | export OPENAI_API_KEY=YOUR_API_KEY # on Linux/Mac 100 | 101 | set OPENAI_API_KEY=YOUR_API_KEY # on Windows 102 | $Env:OPENAI_API_KEY = "YOUR_API_KEY" # on Windows (PowerShell) 103 | ``` 104 | 105 | ## 运行RepoAgent 106 | 107 | 进入RepoAgent根目录并在终端尝试以下命令: 108 | ```sh 109 | repoagent run # 这条命令会生成文档或自动更新文档 (pre-commit-hook 会自动调用它) 110 | repoagent --print-hierarchy # 此命令将打印repoagent解析出的目标仓库 111 | ``` 112 | 113 | run 命令支持以下可选标志(如果设置,将覆盖配置默认值): 114 | 115 | - `-m`, `--model` TEXT:指定用于完成的模型。默认值:`gpt-3.5-turbo` 116 | - `-t`, `--temperature` FLOAT:设置模型的生成温度。较低的值使模型更确定性。默认值:`0.2` 117 | - `-r`, `--request-timeout` INTEGER:定义 API 请求的超时时间(秒)。默认值:`60` 118 | - `-b`, `--base-url` TEXT:API 调用的基础 URL。默认值:`https://api.openai.com/v1` 119 | - `-tp`, `--target-repo-path` PATH:目标仓库的文件系统路径。用作文档生成的根路径。默认值:`path/to/your/target/repository` 120 | - `-hp`, `--hierarchy-path` TEXT:项目层级文件的名称或路径,用于组织文档结构。默认值:`.project_doc_record` 121 | - `-mdp`, `--markdown-docs-path` TEXT:Markdown 文档将被存储或生成的文件夹路径。默认值:`markdown_docs` 122 | - `-i`, `--ignore-list` TEXT:在文档生成过程中要忽略的文件或目录列表,用逗号分隔。 123 | - `-l`, `--language` TEXT:文档的 ISO 639 代码或语言名称。默认值:`Chinese` 124 | - `-ll`, `--log-level` [DEBUG|INFO|WARNING|ERROR|CRITICAL]:设置应用程序的日志级别。默认值:`INFO` 125 | 126 | 你也可以尝试以下功能 127 | 128 | ```sh 129 | repoagent clean # 此命令将删除与repoagent相关的缓存 130 | repoagent diff # 此命令将检查基于当前代码更改将更新/生成哪些文档 131 | ``` 132 | 133 | 如果您是第一次对目标仓库生成文档,此时RepoAgent会自动生成一个维护全局结构信息的json文件,并在目标仓库根目录下创建一个文件夹用于存放文档。 134 | 全局结构信息json文件和文档文件夹的路径都可以在`config.yml`中进行配置。 135 | 136 | 当您首次完成对目标仓库生成全局文档后,或您clone下来的项目已经包含了全局文档信息后,就可以通过**pre-commit**配置目标仓库**hook**和团队一起无缝自动维护一个项目内部文档了! 137 | 138 | ### 配置目标仓库 139 | 140 | RepoAgent目前支持对项目的文档生成和自动维护,因此需要对目标仓库进行一定的配置。 141 | 142 | 首先,确保目标仓库是一个git仓库,且已经初始化。 143 | ``` 144 | git init 145 | ``` 146 | 在目标仓库中安装pre-commit,用于检测git仓库中的变更。 147 | ``` 148 | pip install pre-commit 149 | ``` 150 | 在目标仓库根目录下,创建一个名为`.pre-commit-config.yaml`的文件,示例如下: 151 | ``` 152 | repos: 153 | - repo: local 154 | hooks: 155 | - id: repo-agent 156 | name: RepoAgent 157 | entry: repoagent 158 | language: system 159 | pass_filenames: false # 阻止pre commit传入文件名作为参数 160 | # 可以指定钩子触发的文件类型,但是目前只支持python 161 | types: [python] 162 | ``` 163 | 具体hooks的配置方法请参考[pre-commit](https://pre-commit.com/#plugins)。 164 | 配置好yaml文件后,执行以下命令,安装钩子。 165 | ``` 166 | pre-commit install 167 | ``` 168 | 这样,每次git commit时,都会触发RepoAgent的钩子,自动检测目标仓库中的变更,并生成对应的文档。 169 | 接着,可以对目标仓库进行一些修改,例如在目标仓库中添加一个新的文件,或者修改一个已有的文件。 170 | 您只需要正常执行git的工作流程: git add, git commit -m "your commit message", git push 171 | RepoAgent hook会在git commit时自动触发,检测前一步您git add的文件,并生成对应的文档。 172 | 173 | 执行后,RepoAgent会自动更改目标仓库中的已暂存文件并正式提交commit,执行完毕后会显示绿色的Passed,如下图所示: 174 | ![Execution Result](https://raw.githubusercontent.com/OpenBMB/RepoAgent/main/assets/images/ExecutionResult.png) 175 | 176 | 生成的文档将存放在目标仓库根目录下的指定文件夹中,生成的文档效果如下图所示: 177 | ![Documentation](https://raw.githubusercontent.com/OpenBMB/RepoAgent/main/assets/images/Doc_example.png) 178 | ![Documentation](https://raw.githubusercontent.com/OpenBMB/RepoAgent/main/assets/images/8_documents.png) 179 | 180 | 181 | 我们使用默认模型**gpt-3.5-turbo**对一个约**27万行**的中大型项目[**XAgent**](https://github.com/OpenBMB/XAgent)生成了文档。您可以前往XAgent项目的Markdown_Docs文件目录下查看生成效果。如果您希望得到更好的文档效果,我们建议您使用更先进的模型,如**gpt-4-1106** 或 **gpt-4-0125-preview**。 182 | 183 | **最后,您可以通过自定义Prompt来灵活调整文档的输出格式、模板等方面的效果。 我们很高兴您探索更科学的自动化Technical Writing Prompts并对社区作出贡献。** 184 | 185 | ### 探索 chat with repo 186 | 187 | 我们将与仓库对话视为所有下游应用的统一入口,作为连接RepoAgent与人类用户和其他AI智能体之间的接口。我们未来的研究将探索适配各种下游应用的接口,并实现这些下游任务的独特性和现实要求。 188 | 189 | 在这里,我们展示了我们的下游任务之一的初步原型:自动issue问题解答和代码解释。您可以通过在终端运行以下代码启动服务。 190 | 191 | ```sh 192 | pip install repoagent[chat-with-repo] 193 | repoagent chat-with-repo 194 | ``` 195 | 196 | # ✅ 未来工作 197 | 198 | - [x] 支持通过`pip install repoagent`将项目作为包进行安装配置 199 | - [ ] 通过全局文档信息自动生成仓库README.md文件 200 | - [ ] **多编程语言支持** 支持更多编程语言,如Java、C或C++等 201 | - [ ] 本地模型支持如 Llama、chatGLM、Qianwen 等 202 | 203 | 204 | # 🥰 精选案例 205 | 206 | 以下是采用了RepoAgent的开源项目精选案例。 207 | 208 | - [MiniCPM](https://github.com/OpenBMB/MiniCPM): 一个端侧大语言模型,大小为2B,效果可与7B模型媲美。 209 | - [ChatDev](https://github.com/OpenBMB/ChatDev): 用于软件开发的协作式AI智能体。 210 | - [XAgent](https://github.com/OpenBMB/XAgent): 一个用于解决复杂任务的自主大型语言模型智能体。 211 | - [EasyRL4Rec](https://github.com/chongminggao/EasyRL4Rec): 一个用户友好的推荐系统强化学习库。 212 | 213 | # 📊 引用我们 214 | ```bibtex 215 | @misc{luo2024repoagent, 216 | title={RepoAgent: An LLM-Powered Open-Source Framework for Repository-level Code Documentation Generation}, 217 | author={Qinyu Luo and Yining Ye and Shihao Liang and Zhong Zhang and Yujia Qin and Yaxi Lu and Yesai Wu and Xin Cong and Yankai Lin and Yingli Zhang and Xiaoyin Che and Zhiyuan Liu and Maosong Sun}, 218 | year={2024}, 219 | eprint={2402.16667}, 220 | archivePrefix={arXiv}, 221 | primaryClass={cs.CL} 222 | } 223 | ``` 224 | -------------------------------------------------------------------------------- /assets/images/8_documents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/RepoAgent/825d988127d7bfd757237d9c4e8678d9104030f0/assets/images/8_documents.png -------------------------------------------------------------------------------- /assets/images/Doc_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/RepoAgent/825d988127d7bfd757237d9c4e8678d9104030f0/assets/images/Doc_example.png -------------------------------------------------------------------------------- /assets/images/ExecutionResult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/RepoAgent/825d988127d7bfd757237d9c4e8678d9104030f0/assets/images/ExecutionResult.png -------------------------------------------------------------------------------- /assets/images/RepoAgent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/RepoAgent/825d988127d7bfd757237d9c4e8678d9104030f0/assets/images/RepoAgent.png -------------------------------------------------------------------------------- /display/Makefile: -------------------------------------------------------------------------------- 1 | # get repo path from ../config.yml 2 | REPO_PATH := $(shell grep 'repo_path:' ../config.yml | awk '{ print $$2 }') 3 | 4 | Markdown_Docs_folder := $(shell grep 'Markdown_Docs_folder:' ../config.yml | awk '{ print $$2 }') 5 | 6 | # get book name from REPO_PATH 7 | BOOK_NAME := $(notdir $(REPO_PATH)) 8 | $(info BOOK_NAME is $(BOOK_NAME)) 9 | MARKDOWN_DOCS_FOLDER := $(Markdown_Docs_folder) 10 | $(info MARKDOWN_DOCS_FOLDER is $(MARKDOWN_DOCS_FOLDER)) 11 | GITBOOK_PORT := 4000 12 | GITBOOK_LRPORT := 30000 13 | 14 | # info colors 15 | GREEN := $(shell tput -Txterm setaf 2) 16 | YELLOW := $(shell tput -Txterm setaf 3) 17 | WHITE := $(shell tput -Txterm setaf 7) 18 | RESET := $(shell tput -Txterm sgr0) 19 | 20 | 21 | .PHONY: help 22 | .PHONY: init_env env_install 23 | .PHONY: init npm_install clear_book copy_book_json install 24 | .PHONY: generate generate_repo_agent_books generate_summary 25 | 26 | 27 | ################################################################################ 28 | # We need nodejs 10.x to run gitbook, this target will install nodejs 10.x 29 | ################################################################################ 30 | env_install: 31 | chmod +x ./scripts/install_nodejs.sh 32 | ./scripts/install_nodejs.sh 33 | 34 | ## init nodejs 10.x env 35 | init_env: env_install 36 | echo "You have prepared nodejs 10.x environment." 37 | 38 | 39 | 40 | ################################################################################ 41 | # The following targets are used to init the gitbook environment 42 | ################################################################################ 43 | ### Install gitbook-cli 44 | npm_install: 45 | npm install gitbook-cli -g 46 | 47 | ## clear repo generated book 48 | clear_book: 49 | -rm -rf ./books/$(BOOK_NAME) 50 | 51 | ### copy book.json 52 | copy_book_json: clear_book 53 | mkdir -p ./books/$(BOOK_NAME) 54 | cp ./book_template/book.json ./books/$(BOOK_NAME)/book.json 55 | 56 | ### gitbook install plugins 57 | install: 58 | echo "You need to make sure you have installed nodejs 10.x." 59 | cd ./books/$(BOOK_NAME) && gitbook install 60 | 61 | ## gitbook init to install plugins 62 | init: npm_install clear_book copy_book_json install 63 | @echo Compelete init docs book 64 | 65 | 66 | ################################################################################ 67 | # The following targets are used to generate the book and book.json for the gitbook 68 | ################################################################################ 69 | clear_src: 70 | -rm -rf ./books/$(BOOK_NAME)/src 71 | 72 | generate_repo_agent_books: 73 | @echo "Generating Repo Agent books..." 74 | @python ./book_tools/generate_repoagent_books.py $(MARKDOWN_DOCS_FOLDER) $(BOOK_NAME) $(REPO_PATH) 75 | 76 | generate_summary: 77 | @echo "Generating summary..." 78 | @python ./book_tools/generate_summary_from_book.py $(BOOK_NAME) 79 | 80 | ## generate repo book 81 | generate: clear_src generate_repo_agent_books generate_summary 82 | @echo complete repo book: $(BOOK_NAME) generate 83 | 84 | ## serve gitbook 85 | serve: generate 86 | gitbook --port $(GITBOOK_PORT) --lrport $(GITBOOK_LRPORT) serve ./books/$(BOOK_NAME) 87 | 88 | 89 | TASK_MAX_CHAR_NUM=30 90 | ## make help info 91 | help: 92 | @echo '' 93 | @echo 'Usage:' 94 | @echo ' ${YELLOW}make${RESET} ${GREEN}${RESET}' 95 | @echo '' 96 | @echo 'Tasks:' 97 | @awk '/^[a-zA-Z\-\_0-9]+:/ { \ 98 | helpInfo = match(lastLine, /^## (.*)/); \ 99 | if (helpInfo) { \ 100 | helpCommand = substr($$1, 0, index($$1, ":")-1); \ 101 | helpInfo = substr(lastLine, RSTART + 3, RLENGTH); \ 102 | printf " ${YELLOW}%-$(TASK_MAX_CHAR_NUM)s${RESET} ${GREEN}%s${RESET}\n", helpCommand, helpInfo; \ 103 | } \ 104 | } \ 105 | { lastLine = $$0 }' $(MAKEFILE_LIST) -------------------------------------------------------------------------------- /display/README_DISPLAY.md: -------------------------------------------------------------------------------- 1 | ## 前提条件 2 | 3 | 在使用 RepoAgent 对相应 repo 生成文档之后,请进入 display 文件夹。 4 | 5 | ```bash 6 | cd display 7 | ``` 8 | 9 | 您需要 **nodejs 10** 的环境,可以使用 nvm 来安装。 10 | 11 | ## 一键部署命令脚本 12 | 13 | 您可以在命令行输入`make help`,查看我们的自动部署脚本。 14 | 15 | ```bash 16 | (RepoAgent) yesai@yesaideMacBook-Pro:RepoAgent/display ‹wys*›$ make help 17 | BOOK_NAME is XAgent-Dev 18 | MARKDOWN_DOCS_FOLDER is Markdown_Docs 19 | 20 | Usage: 21 | make 22 | 23 | Tasks: 24 | init_env init nodejs 10.x env 25 | clear_book clear repo generated book 26 | init gitbook init to install plugins 27 | generate generate repo book 28 | serve serve gitbook 29 | help make help info 30 | 31 | ``` 32 | 33 | 其中,您可以直接使用`make init_env`进行nvm和nodejs 10 的安装,也可以自己根据自身系统,自行安装 nodejs 10。 34 | 如果是Windows系统,您可以使用管理员权限打开命令行,然后输入命令。 35 | 36 | 然后您可以依次进行 `make init` 初始化 gitbook 运行环境(make init 运行一次即可)。 37 | 38 | 环境准备妥当后,您可以多次执行 `make serve`,更改相关配置或者`book.json`后,只需重新运行`make serve` 即可重新部署。 39 | 40 | 成功后命令行输出如下所示: 41 | 42 | ```bash 43 | init! 44 | finish! 45 | info: >> generation finished with success in 16.7s ! 46 | 47 | Starting server ... 48 | Serving book on http://localhost:4000 49 | ``` 50 | 51 | 之后您可以在 http://localhost:4000/ 看到您的可视化 gitbook repo 文档。 52 | 53 | 54 | ## Future TODO List: 55 | 56 | [✅] 一键自动创建环境 57 | 58 | [ ] (本地创建环境不好弄的话)docker 一键部署 gitbook 以及上传 59 | 60 | [ ] 自动一键部署到对应 github 或 gitee 的 Repo 的对应 pages,让大家通过repo相关网址可以直接访问文档 61 | 62 | -------------------------------------------------------------------------------- /display/book_template/book.json: -------------------------------------------------------------------------------- 1 | { 2 | 3 | "title": "RepoAgent所生成文GitBook", 4 | "description": "RepoAgent根据项目repo所生成文档GitBook", 5 | "generator": "site", 6 | "author": "RepoAgent ", 7 | "language": "zh-hans", 8 | "gitbook": "3.2.3", 9 | "root": "./src", 10 | 11 | "links": { 12 | "sidebar": { 13 | "XAgent": "https://x-agent.net", 14 | "RepoAgent": "https://github.com/LOGIC-10/RepoAgent.git" 15 | } 16 | }, 17 | 18 | "pluginsConfig": { 19 | "github-buttons": { 20 | "buttons": [ 21 | { 22 | "repo": "RepoAgent", 23 | "user": "LOGIC-10", 24 | "type": "star", 25 | "count": true, 26 | "size": "small" 27 | } 28 | ] 29 | }, 30 | 31 | "image-captions": { 32 | "caption": "Image _PAGE_LEVEL_._PAGE_IMAGE_NUMBER_ - _CAPTION_" 33 | }, 34 | "autotheme": { 35 | "white": [9, 10, 11, 12, 13, 14, 15, 16], 36 | "sepia": [6, 7, 8, 17, 18, 19], 37 | "night": [20, 21, 22, 23, 0, 1, 2, 3, 4, 5] 38 | }, 39 | "callouts": { 40 | "showTypeInHeader": false 41 | }, 42 | "theme-default": { 43 | "showLevel": false 44 | }, 45 | "disqus": { 46 | "shortName": "" 47 | }, 48 | "prism": { 49 | "css": [ 50 | "prism-themes/themes/prism-atom-dark.css" 51 | ] 52 | }, 53 | "sharing": { 54 | "douban": false, 55 | "facebook": true, 56 | "google": false, 57 | "hatenaBookmark": false, 58 | "instapaper": false, 59 | "line": false, 60 | "linkedin": false, 61 | "messenger": false, 62 | "pocket": false, 63 | "qq": true, 64 | "qzone": false, 65 | "stumbleupon": false, 66 | "twitter": true, 67 | "viber": false, 68 | "vk": false, 69 | "weibo": true, 70 | "whatsapp": false, 71 | "all": [ 72 | "douban", 73 | "facebook", 74 | "google", 75 | "instapaper", 76 | "line", 77 | "linkedin", 78 | "messenger", 79 | "pocket", 80 | "qq", 81 | "qzone", 82 | "stumbleupon", 83 | "twitter", 84 | "viber", 85 | "vk", 86 | "weibo", 87 | "whatsapp" 88 | ] 89 | }, 90 | "tbfed-pagefooter": { 91 | "copyright": "https://x-agent.net,RepoAgent 发布", 92 | "modify_label": "最后更新:", 93 | "modify_format": "YYYY-MM-DD HH:mm:ss" 94 | } 95 | }, 96 | 97 | "plugins": [ 98 | "theme-comscore", 99 | "anchors", 100 | "-lunr", 101 | "-search", 102 | "search-plus", 103 | "disqus", 104 | "-highlight", 105 | "prism", 106 | "prism-themes", 107 | "github-buttons", 108 | "splitter", 109 | "-sharing", 110 | "sharing-plus", 111 | "tbfed-pagefooter", 112 | "expandable-chapters-small", 113 | "copy-code-button", 114 | "callouts", 115 | "image-captions", 116 | "autotheme" 117 | ] 118 | 119 | } -------------------------------------------------------------------------------- /display/book_tools/generate_repoagent_books.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | 5 | 6 | def main(): 7 | markdown_docs_folder = sys.argv[1] 8 | book_name = sys.argv[2] 9 | repo_path = sys.argv[3] 10 | 11 | # mkdir the book folder 12 | dst_dir = os.path.join('./books', book_name, 'src') 13 | docs_dir = os.path.join(repo_path, markdown_docs_folder) 14 | 15 | # check the dst_dir 16 | if not os.path.exists(dst_dir): 17 | os.makedirs(dst_dir) 18 | print("mkdir %s" % dst_dir) 19 | 20 | # cp the Markdown_Docs_folder to dst_dir 21 | for item in os.listdir(docs_dir): 22 | src_path = os.path.join(docs_dir, item) 23 | dst_path = os.path.join(dst_dir, item) 24 | 25 | # check the src_path 26 | if os.path.isdir(src_path): 27 | # if the src_path is a folder, use shutil.copytree to copy 28 | shutil.copytree(src_path, dst_path) 29 | print("copytree %s to %s" % (src_path, dst_path)) 30 | else: 31 | # if the src_path is a file, use shutil.copy2 to copy 32 | shutil.copy2(src_path, dst_path) 33 | print("copy2 %s to %s" % (src_path, dst_path)) 34 | 35 | def create_book_readme_if_not_exist(dire): 36 | readme_path = os.path.join(dire, 'README.md') 37 | 38 | if not os.path.exists(readme_path): 39 | with open(readme_path, 'w') as readme_file: 40 | readme_file.write('# {}\n'.format(book_name)) 41 | 42 | # create book README.md if not exist 43 | create_book_readme_if_not_exist(dst_dir) 44 | 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /display/book_tools/generate_summary_from_book.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | 5 | 6 | def create_readme_if_not_exist(dire): 7 | readme_path = os.path.join(dire, 'README.md') 8 | 9 | if not os.path.exists(readme_path): 10 | with open(readme_path, 'w') as readme_file: 11 | dirname = os.path.basename(dire) 12 | readme_file.write('# {}\n'.format(dirname)) 13 | 14 | 15 | # def output_markdown(dire, base_dir, output_file, iter_depth=0): 16 | # for filename in os.listdir(dire): 17 | # print('add readme ', filename) 18 | # file_or_path = os.path.join(dire, filename) 19 | # if os.path.isdir(file_or_path): 20 | # create_readme_if_not_exist(file_or_path) 21 | # 22 | # for filename in os.listdir(dire): 23 | # print('deal with ', filename) 24 | # file_or_path = os.path.join(dire, filename) 25 | # if os.path.isdir(file_or_path): 26 | # # create_readme_if_not_exist(file_or_path) 27 | # 28 | # if markdown_file_in_dir(file_or_path): 29 | # output_file.write(' ' * iter_depth + '- ' + filename + '\n') 30 | # output_markdown(file_or_path, base_dir, output_file, 31 | # iter_depth + 1) 32 | # else: 33 | # if is_markdown_file(filename): 34 | # if (filename not in ['SUMMARY.md', 35 | # 'README.md'] 36 | # or iter_depth != 0): 37 | # output_file.write(' ' * iter_depth + 38 | # '- [{}]({})\n'.format(is_markdown_file(filename), 39 | # os.path.join(os.path.relpath(dire, base_dir), 40 | # filename))) 41 | 42 | def output_markdown(dire, base_dir, output_file, iter_depth=0): 43 | for filename in os.listdir(dire): 44 | print('add readme ', filename) 45 | file_or_path = os.path.join(dire, filename) 46 | if os.path.isdir(file_or_path): 47 | create_readme_if_not_exist(file_or_path) 48 | 49 | for filename in os.listdir(dire): 50 | print('deal with ', filename) 51 | file_or_path = os.path.join(dire, filename) 52 | if os.path.isdir(file_or_path): 53 | # Check if README.md exists in the directory 54 | readme_path = os.path.join(file_or_path, 'README.md') 55 | if os.path.exists(readme_path): 56 | # If README.md exists, create a markdown link to it 57 | relative_path = os.path.join(os.path.relpath(file_or_path, base_dir), 'README.md') 58 | output_file.write(' ' * iter_depth + '- [{}]({})\n'.format(filename, relative_path)) 59 | # Recursively call output_markdown for nested directories 60 | output_markdown(file_or_path, base_dir, output_file, iter_depth + 1) 61 | else: 62 | if is_markdown_file(filename): 63 | if filename not in ['SUMMARY.md', 'README.md'] or iter_depth != 0 and filename not in ['README.md']: 64 | relative_path = os.path.join(os.path.relpath(dire, base_dir), filename) 65 | output_file.write(' ' * iter_depth + '- [{}]({})\n'.format(is_markdown_file(filename), relative_path)) 66 | 67 | 68 | 69 | def markdown_file_in_dir(dire): 70 | for root, dirs, files in os.walk(dire): 71 | for filename in files: 72 | if re.search('.md$|.markdown$', filename): 73 | return True 74 | return False 75 | 76 | 77 | def is_markdown_file(filename): 78 | match = re.search('.md$|.markdown$', filename) 79 | if not match: 80 | return False 81 | elif len(match.group()) is len('.md'): 82 | return filename[:-3] 83 | elif len(match.group()) is len('.markdown'): 84 | return filename[:-9] 85 | 86 | 87 | def main(): 88 | book_name = sys.argv[1] 89 | 90 | # mkdir the book folder 91 | dir_input = os.path.join('./books', book_name, 'src') 92 | 93 | # check the dst_dir 94 | if not os.path.exists(dir_input): 95 | print(dir_input) 96 | os.makedirs(dir_input) 97 | # Ensure the directory exists or create it 98 | if not os.path.exists(dir_input): 99 | os.makedirs(dir_input) 100 | 101 | # Then proceed to create the file 102 | output_path = os.path.join(dir_input, 'SUMMARY.md') 103 | output = open(output_path, 'w') 104 | # output = open(os.path.join(dir_input, 'SUMMARY.md'), 'w') 105 | output.write('# Summary\n\n') 106 | output_markdown(dir_input, dir_input, output) 107 | 108 | print('GitBook auto summary finished:) ') 109 | return 0 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /display/books/BOOKS.md: -------------------------------------------------------------------------------- 1 | 支持多个 repo book 的创建。 2 | 3 | 统一 book template,支持完全自定义。 4 | 5 | 在 config.yml 中,配置生成的是repo doc,不用更改config.yml,直接运行 make 命令即可生成对应的repo book展示。 -------------------------------------------------------------------------------- /display/scripts/install_nodejs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export NVM_DIR="$HOME/.nvm" 4 | [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm 5 | [ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion" # This loads nvm bash_completion 6 | 7 | 8 | # 检查是否已经安装了 nvm 9 | check_nvm_installed() { 10 | if [ -s "$NVM_DIR/nvm.sh" ]; then 11 | echo "nvm is already installed" 12 | return 0 13 | else 14 | echo "nvm is not installed" 15 | return 1 16 | fi 17 | } 18 | 19 | # 安装 nvm 20 | install_nvm_linux() { 21 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash 22 | source "$NVM_DIR/nvm.sh" 23 | } 24 | 25 | install_nvm_mac() { 26 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash 27 | source "$NVM_DIR/nvm.sh" 28 | } 29 | 30 | install_nvm_windows() { 31 | echo "Downloading nvm for Windows..." 32 | curl -o nvm-setup.exe -L https://github.com/coreybutler/nvm/releases/download/1.1.12/nvm-setup.exe 33 | echo "Installing nvm for Windows..." 34 | ./nvm-setup.exe 35 | echo "nvm version:" 36 | nvm -v 37 | echo "nvm installation for Windows completed." 38 | rm -f nvm-setup.exe 39 | } 40 | 41 | # 安装 Node.js 10 42 | install_nodejs() { 43 | nvm install 10 44 | nvm use 10 45 | } 46 | 47 | # 检查 Node.js 是否安装成功 48 | check_node() { 49 | node_version=$(node -v) 50 | echo "Installed Node.js version: $node_version" 51 | if [[ "$node_version" == v10* ]]; then 52 | echo "Node.js 10 is installed successfully." 53 | else 54 | echo "Node.js 10 is not installed." 55 | exit 1 56 | fi 57 | } 58 | 59 | # 检测操作系统并安装 nvm(如果需要) 60 | case "$OSTYPE" in 61 | linux-gnu*) 62 | if ! check_nvm_installed; then 63 | install_nvm_linux 64 | fi 65 | ;; 66 | darwin*) 67 | if ! check_nvm_installed; then 68 | install_nvm_mac 69 | fi 70 | ;; 71 | cygwin*|msys*|mingw*|bccwin*|wsl*) 72 | if ! check_nvm_installed; then 73 | install_nvm_windows 74 | fi 75 | ;; 76 | *) 77 | echo "Unsupported OS, You could install nvm manually" 78 | exit 1 79 | ;; 80 | esac 81 | 82 | # 安装 Node.js 10 83 | install_nodejs 84 | 85 | check_node 86 | -------------------------------------------------------------------------------- /markdown_docs/display/book_tools/generate_repoagent_books.md: -------------------------------------------------------------------------------- 1 | ## FunctionDef main 2 | **main**: The function of main is to create a directory structure for a book and copy Markdown documentation files into it. 3 | 4 | **parameters**: The parameters of this Function. 5 | · parameter1: markdown_docs_folder - The name of the folder containing Markdown documentation files to be copied. 6 | · parameter2: book_name - The name of the book for which the directory structure is created. 7 | · parameter3: repo_path - The path to the repository where the Markdown documentation folder is located. 8 | 9 | **Code Description**: The main function begins by retrieving command-line arguments that specify the folder containing Markdown documentation, the desired book name, and the repository path. It constructs the destination directory path where the book's source files will be stored, specifically under './books/{book_name}/src'. It also constructs the source directory path for the Markdown documentation files based on the provided repository path and the specified folder name. 10 | 11 | The function then checks if the destination directory exists. If it does not exist, it creates the directory and prints a confirmation message indicating that the directory has been created. 12 | 13 | Next, the function iterates over each item in the source directory. For each item, it constructs the full source and destination paths. If the item is a directory, it uses `shutil.copytree` to recursively copy the entire directory to the destination. If the item is a file, it uses `shutil.copy2` to copy the file to the destination. For each copy operation, a message is printed to confirm the action taken. 14 | 15 | Additionally, the function defines a nested function called `create_book_readme_if_not_exist`, which checks for the existence of a README.md file in the destination directory. If the README.md file does not exist, it creates one and writes the book name as the title in Markdown format. 16 | 17 | Finally, the main function calls `create_book_readme_if_not_exist` to ensure that a README.md file is created for the book if it is not already present. 18 | 19 | **Note**: It is important to ensure that the specified paths and folder names are valid and accessible. The function relies on the presence of the `shutil` and `os` modules, which must be imported for the code to execute successfully. Additionally, the function assumes that the command-line arguments are provided in the correct order and format. 20 | ### FunctionDef create_book_readme_if_not_exist(dire) 21 | **create_book_readme_if_not_exist**: The function of create_book_readme_if_not_exist is to create a README.md file in a specified directory if it does not already exist. 22 | 23 | **parameters**: The parameters of this Function. 24 | · dire: A string representing the directory path where the README.md file should be created. 25 | 26 | **Code Description**: The create_book_readme_if_not_exist function is designed to check for the existence of a README.md file in a specified directory. It takes one parameter, 'dire', which is the path to the directory where the README.md file is intended to be created. 27 | 28 | The function first constructs the full path to the README.md file by joining the provided directory path with the filename 'README.md' using the os.path.join method. It then checks if the file already exists at that path using os.path.exists. If the file does not exist, the function proceeds to create it. 29 | 30 | Within a context manager (using the 'with' statement), the function opens the README.md file in write mode ('w'). This ensures that if the file is created, it will be properly closed after writing. The function writes a header line to the file, formatted as '# {book_name}', where 'book_name' is expected to be a variable that holds the name of the book. However, it is important to note that 'book_name' must be defined in the scope where this function is called, as it is not passed as a parameter to the function itself. 31 | 32 | **Note**: It is essential to ensure that the variable 'book_name' is defined before calling this function, as it is used in the content written to the README.md file. Additionally, the function does not handle exceptions that may arise from file operations, so it is advisable to implement error handling if necessary. 33 | *** 34 | -------------------------------------------------------------------------------- /markdown_docs/display/book_tools/generate_summary_from_book.md: -------------------------------------------------------------------------------- 1 | ## FunctionDef create_readme_if_not_exist(dire) 2 | **create_readme_if_not_exist**: The function of create_readme_if_not_exist is to create a README.md file in a specified directory if it does not already exist. 3 | 4 | **parameters**: The parameters of this Function. 5 | · dire: The directory path where the README.md file should be created. 6 | 7 | **Code Description**: The create_readme_if_not_exist function checks if a README.md file exists in the specified directory (denoted by the parameter 'dire'). If the file does not exist, the function creates it and writes a header containing the name of the directory as the title. The path for the README.md file is constructed using the os.path.join method, which combines the directory path with the file name 'README.md'. The function uses os.path.exists to verify the existence of the README.md file. If the file is absent, it opens the file in write mode and writes a formatted string that includes the base name of the directory, which is obtained using os.path.basename. 8 | 9 | This function is called by the output_markdown function, which iterates through the contents of a specified directory. During its execution, output_markdown checks each item in the directory; if an item is a subdirectory, it invokes create_readme_if_not_exist to ensure that a README.md file is present in that subdirectory. This relationship indicates that create_readme_if_not_exist is a utility function designed to support the documentation generation process by ensuring that each directory has a README.md file, which can be useful for providing context or information about the contents of the directory. 10 | 11 | **Note**: It is important to ensure that the directory path provided to the create_readme_if_not_exist function is valid and accessible. Additionally, the function will only create the README.md file if it does not already exist, preventing overwriting any existing documentation. 12 | ## FunctionDef output_markdown(dire, base_dir, output_file, iter_depth) 13 | **output_markdown**: The function of output_markdown is to generate a Markdown-formatted summary of files and directories, including links to README.md files and relevant Markdown files. 14 | 15 | **parameters**: The parameters of this Function. 16 | · dire: A string representing the directory path to be processed for Markdown files and subdirectories. 17 | · base_dir: A string representing the base directory path used to create relative paths for the output. 18 | · output_file: A file object where the generated Markdown summary will be written. 19 | · iter_depth: An integer indicating the current depth of recursion, used for formatting the output. 20 | 21 | **Code Description**: The output_markdown function is designed to traverse a specified directory (denoted by the parameter 'dire') and its subdirectories, generating a structured Markdown summary of the contents. The function begins by iterating through the files and directories within 'dire'. For each item, it checks if it is a directory. If it is, the function calls create_readme_if_not_exist to ensure that a README.md file exists in that directory. This utility function is crucial for maintaining documentation consistency across directories. 22 | 23 | After ensuring that README.md files are present, the function continues to process each item in the directory. If an item is a directory and contains a README.md file, the function creates a relative Markdown link to that file in the output. The relative path is constructed using os.path.relpath to ensure that the link is correctly formatted based on the base directory. 24 | 25 | For files that are not directories, the function utilizes is_markdown_file to determine if the file is a Markdown file. If the file is identified as a Markdown file and is not excluded by specific conditions (such as being named 'SUMMARY.md' or 'README.md' at the top level), the function writes a relative link to that file in the output. 26 | 27 | The output_markdown function is called by the main function, which serves as the entry point of the program. In main, the function is invoked after creating the necessary directory structure and opening the output file for writing. This relationship indicates that output_markdown is a critical component of the documentation generation process, responsible for compiling the contents of the specified directory into a cohesive Markdown summary. 28 | 29 | **Note**: It is important to ensure that the directory path provided to output_markdown is valid and accessible. The function assumes that the output_file is opened in write mode before being passed to it. Additionally, care should be taken to manage the depth of recursion, as excessive nesting may lead to performance issues or stack overflow errors. 30 | ## FunctionDef markdown_file_in_dir(dire) 31 | **markdown_file_in_dir**: The function of markdown_file_in_dir is to check whether any Markdown file (with .md or .markdown extension) exists in a specified directory or its subdirectories. 32 | 33 | **parameters**: 34 | - parameter1: dire (str) - The directory path to be searched for Markdown files. 35 | 36 | **Code Description**: 37 | The function `markdown_file_in_dir` is designed to traverse a specified directory (`dire`) and its subdirectories to check for the existence of files with `.md` or `.markdown` extensions. It utilizes Python's `os.walk` function to walk through the directory tree, where `root` is the current directory path, `dirs` is a list of subdirectories, and `files` is a list of filenames in the current directory. 38 | 39 | For each file in the list `files`, the function checks whether the filename matches the regular expression pattern `'.md$|.markdown$'`, which identifies files with the `.md` or `.markdown` extensions. If such a file is found, the function immediately returns `True`, indicating that at least one Markdown file exists within the directory or its subdirectories. 40 | 41 | If no Markdown files are found during the entire directory traversal, the function returns `False`. 42 | 43 | **Note**: 44 | - The function stops as soon as a Markdown file is found and returns `True`, which means it does not continue searching further once the condition is met. 45 | - The function uses regular expressions to identify files with `.md` or `.markdown` extensions. Be aware that this check is case-sensitive by default, meaning it will only match lowercase `.md` or `.markdown`. If case-insensitive matching is needed, the regular expression pattern can be modified accordingly. 46 | - This function only returns a Boolean value (True or False). It does not provide any information about the specific files found, just the presence or absence of such files. 47 | 48 | **Output Example**: 49 | - If there is at least one `.md` or `.markdown` file in the directory, the return value would be: 50 | `True` 51 | - If there are no `.md` or `.markdown` files in the directory, the return value would be: 52 | `False` 53 | ## FunctionDef is_markdown_file(filename) 54 | **is_markdown_file**: The function of is_markdown_file is to determine if a given filename corresponds to a Markdown file and return the filename without its extension if it does. 55 | 56 | **parameters**: The parameters of this Function. 57 | · filename: A string representing the name of the file to be checked. 58 | 59 | **Code Description**: The is_markdown_file function uses a regular expression to check if the provided filename ends with either '.md' or '.markdown'. If the filename does not match either of these patterns, the function returns False, indicating that the file is not a Markdown file. If the filename matches '.md', the function returns the filename without the last three characters (the '.md' extension). If the filename matches '.markdown', it returns the filename without the last nine characters (the '.markdown' extension). 60 | 61 | This function is called within the output_markdown function, which is responsible for generating a Markdown-formatted summary of files and directories. In output_markdown, the is_markdown_file function is used to filter out files that are Markdown files. Specifically, it checks each file in the specified directory and its subdirectories. If a file is identified as a Markdown file (and is not 'SUMMARY.md' or 'README.md' under certain conditions), its relative path is formatted and written to the output file. This relationship highlights the utility of is_markdown_file in ensuring that only relevant Markdown files are included in the generated summary. 62 | 63 | **Note**: It is important to ensure that the filename passed to the function is a valid string. The function does not handle exceptions for invalid inputs, so care should be taken to validate the input before calling this function. 64 | 65 | **Output Example**: 66 | - If the input is 'example.md', the output will be 'example'. 67 | - If the input is 'document.markdown', the output will be 'document'. 68 | - If the input is 'image.png', the output will be False. 69 | ## FunctionDef main 70 | **main**: The function of main is to generate a Markdown summary file for a specified book by creating the necessary directory structure and invoking the output_markdown function. 71 | 72 | **parameters**: The parameters of this Function. 73 | · book_name: A string representing the name of the book, which is passed as a command-line argument. 74 | 75 | **Code Description**: The main function serves as the entry point for the script, responsible for orchestrating the creation of a Markdown summary file for a book. It begins by retrieving the book name from the command-line arguments using `sys.argv[1]`. This book name is then used to construct the path for the source directory where the summary will be generated, specifically `./books/{book_name}/src`. 76 | 77 | The function checks if the specified directory exists using `os.path.exists(dir_input)`. If the directory does not exist, it creates the directory structure using `os.makedirs(dir_input)`. This ensures that the environment is prepared for the subsequent operations. 78 | 79 | Once the directory is confirmed to exist, the function proceeds to create the summary file named 'SUMMARY.md' within the specified directory. It opens this file in write mode using `open(output_path, 'w')` and writes a header '# Summary\n\n' to initialize the content. 80 | 81 | The core functionality of generating the summary is delegated to the `output_markdown` function. This function is called with the parameters `dir_input`, `dir_input` (as the base directory), and the opened output file. The `output_markdown` function is responsible for traversing the directory structure, identifying Markdown files, and generating the appropriate links in the summary file. 82 | 83 | After the summary generation process is completed, the function prints a confirmation message indicating that the GitBook auto summary has finished. The function concludes by returning 0, signaling successful execution. 84 | 85 | The relationship with the `output_markdown` function is crucial, as it handles the detailed processing of the directory contents and the creation of the Markdown links, making it an integral part of the summary generation workflow. 86 | 87 | **Note**: It is important to ensure that the book name provided as a command-line argument is valid and corresponds to an existing book directory structure. The function assumes that the necessary permissions are in place for creating directories and files in the specified path. 88 | 89 | **Output Example**: 90 | When executed with a valid book name, the function will create a directory structure like: 91 | ``` 92 | ./books/ 93 | └── example_book/ 94 | └── src/ 95 | └── SUMMARY.md 96 | ``` 97 | The content of 'SUMMARY.md' might look like: 98 | ``` 99 | # Summary 100 | 101 | - [Chapter 1](./chapter1.md) 102 | - [Chapter 2](./chapter2.md) 103 | - [Subdirectory](./subdirectory/README.md) 104 | ``` 105 | -------------------------------------------------------------------------------- /markdown_docs/repo_agent/chat_engine.md: -------------------------------------------------------------------------------- 1 | ## ClassDef ChatEngine 2 | Doc is waiting to be generated... 3 | ### FunctionDef __init__(self, project_manager) 4 | **__init__**: The function of __init__ is to initialize an instance of the ChatEngine class with the necessary configuration settings for the OpenAI API. 5 | 6 | **parameters**: The parameters of this Function. 7 | · project_manager: An instance of the ProjectManager class that is responsible for managing the overall project workflow and interactions. 8 | 9 | **Code Description**: The __init__ method of the ChatEngine class is designed to set up the initial state of the ChatEngine instance by configuring it with the appropriate settings for the OpenAI API. Upon instantiation, the method first retrieves the current configuration settings by calling the `get_setting` method from the SettingsManager class. This method ensures that the settings are accessed in a consistent manner throughout the application, adhering to the Singleton design pattern. 10 | 11 | The retrieved settings include critical parameters such as the OpenAI API key, the base URL for API requests, the timeout duration for requests, the model to be used for chat completions, and the temperature setting that influences the randomness of the generated responses. These parameters are essential for the ChatEngine to function correctly and interact with the OpenAI API effectively. 12 | 13 | The OpenAI instance is then created using these settings, allowing the ChatEngine to perform chat-related functionalities, such as generating responses based on user input. The integration of the SettingsManager ensures that the ChatEngine is always configured with the latest settings, promoting maintainability and reducing the risk of errors due to misconfiguration. 14 | 15 | From a functional perspective, the ChatEngine class relies on the SettingsManager to provide the necessary configuration settings, which are crucial for its operation. This relationship exemplifies the design principle of separation of concerns, where the SettingsManager handles the management of configuration settings, while the ChatEngine focuses on its primary functionality of facilitating chat interactions. 16 | 17 | **Note**: It is important to ensure that the SettingsManager is properly configured and that the Setting class contains valid attributes before instantiating the ChatEngine. Any misconfiguration may lead to runtime errors or unexpected behavior when the ChatEngine attempts to utilize the OpenAI API settings. 18 | *** 19 | ### FunctionDef build_prompt(self, doc_item) 20 | Doc is waiting to be generated... 21 | #### FunctionDef get_referenced_prompt(doc_item) 22 | **get_referenced_prompt**: The function of get_referenced_prompt is to generate a formatted string that summarizes the references made by a given DocItem, including details about the referenced objects and their documentation. 23 | 24 | **parameters**: The parameters of this Function. 25 | · doc_item: An instance of the DocItem class, which contains information about the documentation item and its references. 26 | 27 | **Code Description**: The get_referenced_prompt function is designed to create a prompt that outlines the references associated with a specific DocItem. It first checks if the provided doc_item has any references by evaluating the length of the reference_who attribute, which is a list of DocItem instances that reference the current item. If there are no references, the function returns an empty string. 28 | 29 | If references are present, the function initializes a list called prompt with a predefined introductory string. It then iterates over each reference_item in the doc_item.reference_who list. For each reference_item, the function constructs a detailed string (instance_prompt) that includes the full name of the referenced object, its corresponding documentation content, and the raw code associated with it. The get_full_name method of the reference_item is called to retrieve its full hierarchical name, ensuring clarity in the context of the documentation. 30 | 31 | The instance_prompt is formatted to include the object's name, its documentation (if available), and the raw code, all separated by a visual divider. Each instance_prompt is appended to the prompt list. Finally, the function joins all elements of the prompt list into a single string, separated by newline characters, and returns this string. 32 | 33 | This function is particularly useful in the context of generating documentation, as it provides a clear overview of how different documentation items are interconnected through references. It aids in understanding the relationships between various code elements, which is essential for maintaining comprehensive and accurate documentation. 34 | 35 | **Note**: When using the get_referenced_prompt function, ensure that the doc_item passed to it has been properly initialized and contains valid references. This will guarantee that the generated prompt accurately reflects the relationships and documentation of the referenced items. 36 | 37 | **Output Example**: An example output of the get_referenced_prompt function for a DocItem with references might look like this: 38 | ``` 39 | As you can see, the code calls the following objects, their code and docs are as following: 40 | obj: repo_agent/doc_meta_info.py/DocItem 41 | Document: 42 | **DocItem**: The function of DocItem is to represent individual documentation items within a project, encapsulating their metadata and relationships. 43 | Raw code:``` 44 | class DocItem: 45 | ... 46 | ``` 47 | obj: repo_agent/another_file.py/AnotherClass 48 | Document: 49 | **AnotherClass**: This class serves a different purpose within the project. 50 | Raw code:``` 51 | class AnotherClass: 52 | ... 53 | ``` 54 | ``` 55 | *** 56 | #### FunctionDef get_referencer_prompt(doc_item) 57 | **get_referencer_prompt**: The function of get_referencer_prompt is to generate a prompt string that lists all the objects that reference a given documentation item, along with their associated documentation and code. 58 | 59 | **parameters**: The parameters of this Function. 60 | · doc_item: An instance of the DocItem class, which represents the documentation item for which the referencing objects are being retrieved. 61 | 62 | **Code Description**: The get_referencer_prompt function is designed to create a formatted string that provides information about the objects that reference a specific documentation item. It begins by checking if the provided doc_item has any references in its who_reference_me attribute, which is a list of DocItem instances that reference the current item. If this list is empty, the function returns an empty string, indicating that there are no references to display. 63 | 64 | If there are references, the function initializes a prompt list with a header string that introduces the subsequent information. It then iterates over each DocItem in the who_reference_me list. For each referencing item, it constructs a detailed string that includes the full name of the referencing object (obtained by calling the get_full_name method on the referencer_item), the last version of its markdown content (if available), and its raw code content (if present). Each of these details is formatted in a readable manner, separated by line breaks and a visual divider. 65 | 66 | Finally, the function joins all the strings in the prompt list into a single string, separated by newline characters, and returns this formatted string. This output serves as a comprehensive reference for developers, allowing them to quickly understand which objects are related to the given documentation item and to access their associated documentation and code. 67 | 68 | The get_referencer_prompt function is particularly useful in the context of documentation generation and management, as it helps to clarify the relationships between different code elements. By providing a clear overview of the references, it aids developers in navigating the documentation and understanding the dependencies within the codebase. 69 | 70 | **Note**: When using this function, ensure that the doc_item parameter is a properly initialized instance of the DocItem class with an established hierarchy and references. This will ensure accurate and meaningful output. 71 | 72 | **Output Example**: An example output of the get_referencer_prompt function might look like this: 73 | ``` 74 | Also, the code has been called by the following objects, their code and docs are as following: 75 | obj: repo_agent/doc_meta_info.py/DocItem 76 | Document: 77 | This is a documentation item that describes a specific code element. 78 | Raw code:``` 79 | class DocItem: 80 | ... 81 | ``` 82 | ========== 83 | obj: repo_agent/another_file.py/AnotherClass 84 | Document: 85 | This class interacts with the DocItem and provides additional functionality. 86 | Raw code:``` 87 | class AnotherClass: 88 | ... 89 | ``` 90 | ``` 91 | *** 92 | #### FunctionDef get_relationship_description(referencer_content, reference_letter) 93 | **get_relationship_description**: The function of get_relationship_description is to generate a descriptive string regarding the relationship of a referencer with its callers and callees based on the provided inputs. 94 | 95 | **parameters**: The parameters of this Function. 96 | · referencer_content: A boolean indicating whether there is content related to the referencer. 97 | · reference_letter: A boolean indicating whether there is a reference letter available. 98 | 99 | **Code Description**: The get_relationship_description function evaluates the presence of two boolean parameters: referencer_content and reference_letter. It constructs and returns a specific string based on the combination of these parameters. 100 | 101 | - If both referencer_content and reference_letter are true, the function returns a string that requests the inclusion of the reference relationship with both callers and callees from a functional perspective. 102 | - If only referencer_content is true, it returns a string that requests the inclusion of the relationship with callers from a functional perspective. 103 | - If only reference_letter is true, it returns a string that requests the inclusion of the relationship with callees from a functional perspective. 104 | - If neither parameter is true, the function returns an empty string. 105 | 106 | This design allows for flexible output based on the available information regarding the referencer, ensuring that the user receives relevant instructions based on the context provided. 107 | 108 | **Note**: It is important to ensure that the parameters are boolean values, as the function logic relies on their truthiness to determine the appropriate output. Providing non-boolean values may lead to unexpected results. 109 | 110 | **Output Example**: 111 | - If both parameters are true: "And please include the reference relationship with its callers and callees in the project from a functional perspective." 112 | - If only referencer_content is true: "And please include the relationship with its callers in the project from a functional perspective." 113 | - If only reference_letter is true: "And please include the relationship with its callees in the project from a functional perspective." 114 | - If neither parameter is true: "" (an empty string). 115 | *** 116 | *** 117 | ### FunctionDef generate_doc(self, doc_item) 118 | Doc is waiting to be generated... 119 | *** 120 | -------------------------------------------------------------------------------- /markdown_docs/repo_agent/log.md: -------------------------------------------------------------------------------- 1 | ## ClassDef InterceptHandler 2 | **InterceptHandler**: The function of InterceptHandler is to redirect standard logging output to the Loguru logging system. 3 | 4 | **attributes**: The attributes of this Class. 5 | · record: logging.LogRecord - This parameter represents the log record containing all the information pertinent to the event being logged. 6 | 7 | **Code Description**: The InterceptHandler class extends the logging.Handler class to facilitate the integration of Python's standard logging module with the Loguru logging system. The primary method of this class is `emit`, which is responsible for processing log records. 8 | 9 | When a log record is received, the `emit` method first attempts to map the standard logging level (e.g., DEBUG, INFO, WARNING) to the corresponding Loguru level. If the mapping fails, it defaults to using the numeric level of the log record. This ensures that all log messages are appropriately categorized according to their severity. 10 | 11 | Next, the method identifies the caller of the log message by traversing the call stack. It uses the `inspect.currentframe()` function to obtain the current stack frame and iterates through the frames to find the origin of the log message. This is particularly useful for debugging, as it provides context about where the log message was generated. 12 | 13 | Finally, the method logs the message using Loguru's logging capabilities, including any exception information if present. The `logger.opt()` method is utilized to set the depth of the stack trace and to include exception details, ensuring that the log output is informative and relevant. 14 | 15 | The InterceptHandler is specifically invoked within the `set_logger_level_from_config` function. This function configures the Loguru logger with a specified log level and integrates it with the standard logging module. By calling `logging.basicConfig()` with an instance of InterceptHandler, it effectively redirects all standard logging output to Loguru, allowing for a unified logging approach across the application. This integration is crucial for maintaining consistent logging behavior, especially in applications that utilize both standard logging and Loguru. 16 | 17 | **Note**: When using the InterceptHandler, it is important to ensure that the logging configuration is set up correctly to avoid conflicts between standard logging and Loguru. Additionally, developers should be aware of the performance implications of logging, particularly in multi-threaded environments, where the `enqueue=True` option in Loguru can help manage log messages safely. 18 | ### FunctionDef emit(self, record) 19 | **emit**: emit函数的功能是将日志记录发送到Loguru日志系统。 20 | 21 | **parameters**: 该函数的参数。 22 | · record: logging.LogRecord - 包含日志记录信息的对象。 23 | 24 | **Code Description**: emit函数首先尝试获取与传入的日志记录的级别相对应的Loguru级别。如果成功,则使用该级别;如果失败,则使用记录的级别号。接着,函数通过inspect模块获取当前调用栈的帧信息,以确定日志消息的来源。它会遍历调用栈,直到找到一个非logging模块的帧,从而确定日志消息的深度。最后,使用Loguru的logger对象,结合深度和异常信息,记录日志消息。 25 | 26 | 具体步骤如下: 27 | 1. 使用logger.level方法获取与record.levelname对应的Loguru级别名称。如果该级别不存在,则使用record.levelno作为级别。 28 | 2. 通过inspect.currentframe()获取当前帧,并初始化深度为0。然后,使用while循环遍历调用栈,直到找到一个非logging模块的帧。 29 | 3. 使用logger.opt方法记录日志,传入深度和异常信息,并调用record.getMessage()获取日志消息的内容。 30 | 31 | **Note**: 使用该函数时,请确保传入的record对象是有效的logging.LogRecord实例,以避免潜在的错误。同时,确保Loguru库已正确配置,以便能够处理日志记录。 32 | *** 33 | ## FunctionDef set_logger_level_from_config(log_level) 34 | **set_logger_level_from_config**: The function of set_logger_level_from_config is to configure the loguru logger with a specified log level and integrate it with the standard logging module. 35 | 36 | **parameters**: The parameters of this Function. 37 | · log_level: str - The log level to set for loguru (e.g., "DEBUG", "INFO", "WARNING"). 38 | 39 | **Code Description**: The set_logger_level_from_config function is designed to set the logging level for the loguru logger based on the provided log_level argument. It begins by removing any existing loguru handlers to ensure that there are no conflicts or duplications in logging output. Following this, it adds a new handler to the loguru logger that directs output to stderr at the specified log level. The parameters `enqueue=True`, `backtrace=False`, and `diagnose=False` are used to ensure that logging is thread-safe, minimizes detailed traceback information, and suppresses additional diagnostic information, respectively. 40 | 41 | Additionally, the function redirects the standard logging output to the loguru logger by utilizing the InterceptHandler class. This integration allows loguru to handle all logging consistently across the application, which is particularly useful in scenarios where both standard logging and loguru are used. The function concludes by logging a success message indicating that the log level has been set. 42 | 43 | The set_logger_level_from_config function is called within the run function located in the repo_agent/main.py file. In this context, it retrieves the logging configuration from the SettingsManager and applies it by calling set_logger_level_from_config with the appropriate log level. This ensures that the logging configuration is established before any tasks are executed, allowing for consistent logging behavior throughout the application. 44 | 45 | **Note**: When using the set_logger_level_from_config function, it is essential to ensure that the logging configuration is correctly set up to avoid conflicts between standard logging and loguru. Developers should also consider the implications of logging performance, especially in multi-threaded environments, where the `enqueue=True` option can help manage log messages safely. 46 | -------------------------------------------------------------------------------- /markdown_docs/repo_agent/main.md: -------------------------------------------------------------------------------- 1 | ## FunctionDef cli 2 | **cli**: cli函数的功能是为基于LLM的框架提供仓库级代码文档生成。 3 | 4 | **parameters**: 该函数没有参数。 5 | 6 | **Code Description**: cli函数是一个空函数,当前没有实现任何具体的功能。根据其文档字符串,cli的目的是为一个基于大型语言模型(LLM)的框架提供仓库级别的代码文档生成。这表明该函数可能是未来扩展的基础,旨在处理与代码文档生成相关的任务。 7 | 8 | 在项目结构中,cli函数被调用于repo_agent/__main__.py文件中。虽然在__main__.py中没有提供具体的调用代码,但通常情况下,__main__.py文件是Python程序的入口点,cli函数可能会在程序启动时被调用,以初始化或配置文档生成的相关功能。 9 | 10 | **Note**: 由于cli函数目前未实现任何功能,开发者在使用时应注意该函数尚未完成,可能需要进一步的开发和实现才能达到预期的文档生成效果。 11 | ## FunctionDef handle_setting_error(e) 12 | **handle_setting_error**: handle_setting_error的功能是处理设置中的配置错误。 13 | 14 | **parameters**: 该函数的参数。 15 | · e: ValidationError - 表示验证错误的异常对象,包含有关配置错误的详细信息。 16 | 17 | **Code Description**: handle_setting_error函数用于处理在程序运行过程中遇到的配置错误。当程序尝试获取设置时,如果出现ValidationError异常,该函数将被调用。函数首先通过click库打印一条通用的错误消息,提示用户检查其设置。接着,函数遍历ValidationError对象中的错误信息,针对每个错误输出更详细的字段缺失信息,并使用不同的颜色进行区分。 18 | 19 | 如果错误类型为“missing”,函数会提示用户缺少必需的字段,并建议设置相应的环境变量;如果是其他类型的错误,则直接输出错误消息。最后,函数通过抛出click.ClickException优雅地终止程序,并显示一条终止程序的错误消息。 20 | 21 | 在项目中,handle_setting_error函数被多个函数调用,包括run、print_hierarchy和diff。这些函数在尝试获取设置时,如果遇到ValidationError异常,都会调用handle_setting_error来处理错误并输出相关信息,从而确保用户能够及时了解配置问题并进行修正。 22 | 23 | **Note**: 使用该函数时,请确保传入的参数是ValidationError类型的异常对象,以便正确处理和输出错误信息。 24 | ## FunctionDef run 25 | Doc is waiting to be generated... 26 | ## FunctionDef clean 27 | **clean**: The function of clean is to remove the fake files generated by the documentation process. 28 | 29 | **parameters**: The parameters of this Function. 30 | · No parameters are required for this function. 31 | 32 | **Code Description**: The clean function is designed to facilitate the cleanup of temporary files, referred to as "fake files," that are created during the documentation generation process. This function achieves its purpose by invoking the delete_fake_files function, which is responsible for identifying and removing these temporary files. 33 | 34 | When the clean function is called, it executes the delete_fake_files function, which performs a thorough search through the project's directory structure to locate and delete any files that match specific criteria indicative of temporary files. Upon successful completion of the deletion process, the clean function logs a success message indicating that the fake files have been cleaned up. 35 | 36 | The delete_fake_files function operates by first retrieving the project settings through the SettingsManager's get_setting method. It then utilizes a nested helper function, gci, to recursively traverse the specified directory. The gci function checks each file and directory, identifying those that are temporary based on their naming conventions. If a temporary file is found, it either deletes it if it is empty or renames it back to its original name if it contains content. 37 | 38 | The clean function is crucial in ensuring that the workspace remains free of unnecessary files after documentation tasks are completed. It is typically called at the end of the documentation process to maintain an organized project structure. 39 | 40 | **Note**: It is important to ensure that the project settings are correctly configured and that the target repository is accessible before invoking the clean function. Any issues related to file permissions or incorrect paths may lead to errors during the cleanup process. 41 | ## FunctionDef print_hierarchy 42 | Doc is waiting to be generated... 43 | ## FunctionDef diff 44 | Doc is waiting to be generated... 45 | -------------------------------------------------------------------------------- /markdown_docs/repo_agent/project_manager.md: -------------------------------------------------------------------------------- 1 | ## ClassDef ProjectManager 2 | **ProjectManager**: The function of ProjectManager is to manage and retrieve the structure of a project repository. 3 | 4 | **attributes**: The attributes of this Class. 5 | · repo_path: The file path to the project repository. 6 | · project: An instance of the Jedi Project class, initialized with the repo_path. 7 | · project_hierarchy: The file path to the project hierarchy JSON file, constructed from the repo_path and project_hierarchy parameter. 8 | 9 | **Code Description**: The ProjectManager class is designed to facilitate the management of a project repository by providing methods to retrieve the project's directory structure and build a reference path tree. Upon initialization, the class requires two parameters: `repo_path`, which specifies the location of the project repository, and `project_hierarchy`, which indicates the name of the hierarchy to be used. The class constructs the path to the project hierarchy JSON file by combining the repo_path with the project_hierarchy name. 10 | 11 | The `get_project_structure` method is responsible for returning the structure of the project by recursively traversing the directory tree starting from the repo_path. It constructs a string representation of the project structure, including all directories and Python files, while ignoring hidden files and directories. This method utilizes a nested function `walk_dir` to perform the recursive traversal. 12 | 13 | The `build_path_tree` method creates a hierarchical tree structure based on two lists of paths: `who_reference_me` and `reference_who`, as well as a specific `doc_item_path`. It constructs a nested dictionary using `defaultdict` to represent the tree structure. The method modifies the last part of the `doc_item_path` to indicate a specific item with a star symbol. Finally, it converts the tree structure into a string format for easier visualization. 14 | 15 | The ProjectManager class is instantiated within the Runner class, where it is initialized with the target repository and hierarchy name obtained from the SettingsManager. This integration allows the Runner to leverage the ProjectManager's capabilities to manage and retrieve project structure information, which is essential for the overall functionality of the application. 16 | 17 | **Note**: When using the ProjectManager class, ensure that the provided repo_path is valid and accessible. The project_hierarchy should correspond to an existing hierarchy name to avoid file path errors. 18 | 19 | **Output Example**: A possible output of the `get_project_structure` method might look like this: 20 | ``` 21 | project_root 22 | src 23 | main.py 24 | utils.py 25 | tests 26 | test_main.py 27 | ``` 28 | ### FunctionDef __init__(self, repo_path, project_hierarchy) 29 | **__init__**: __init__的功能是初始化ProjectManager类的实例。 30 | 31 | **parameters**: 该函数的参数如下: 32 | · parameter1: repo_path - 指定项目的存储库路径。 33 | · parameter2: project_hierarchy - 指定项目层次结构的路径。 34 | 35 | **Code Description**: 该__init__函数用于初始化ProjectManager类的实例。在函数内部,首先将传入的repo_path参数赋值给实例变量self.repo_path,以便在类的其他方法中使用。接着,使用jedi库创建一个新的Project对象,并将其赋值给self.project,传入的repo_path作为参数。这使得ProjectManager能够利用jedi库提供的功能来处理代码分析和自动补全等任务。最后,函数通过os.path.join方法构建项目层次结构的完整路径,将其赋值给self.project_hierarchy。该路径由repo_path、project_hierarchy参数和一个名为"project_hierarchy.json"的文件名组成,这样可以方便地访问项目的层次结构数据。 36 | 37 | **Note**: 使用该代码时,请确保传入的repo_path是有效的文件路径,并且project_hierarchy参数指向的目录中存在"project_hierarchy.json"文件,以避免在实例化过程中出现错误。 38 | *** 39 | ### FunctionDef get_project_structure(self) 40 | **get_project_structure**: The function of get_project_structure is to return the structure of the project by recursively walking through the directory tree. 41 | 42 | **parameters**: The parameters of this Function. 43 | · There are no parameters for this function. 44 | 45 | **Code Description**: The get_project_structure function is designed to generate a string representation of the project's directory structure. It does this by defining an inner function called walk_dir, which takes two arguments: root (the current directory being processed) and prefix (a string used to format the output). The function initializes an empty list called structure to hold the formatted directory and file names. 46 | 47 | The walk_dir function begins by appending the base name of the current directory (root) to the structure list, prefixed by the provided prefix. It then creates a new prefix by adding two spaces to the existing prefix to indicate a deeper level in the directory hierarchy. The function proceeds to iterate over the sorted list of items in the current directory, skipping any hidden files or directories (those starting with a dot). 48 | 49 | For each item, it constructs the full path and checks if it is a directory or a Python file (ending with ".py"). If it is a directory, the function calls itself recursively with the new prefix. If it is a Python file, it appends the file name to the structure list with the new prefix. 50 | 51 | Finally, after the walk_dir function has processed all directories and files, the get_project_structure function joins the elements of the structure list into a single string, separated by newline characters, and returns this string. 52 | 53 | **Note**: It is important to ensure that the repo_path attribute of the class instance is correctly set to the root directory of the project before calling this function. The function will only include Python files in the output, ignoring other file types. 54 | 55 | **Output Example**: 56 | ``` 57 | project_name 58 | module1 59 | file1.py 60 | file2.py 61 | module2 62 | file3.py 63 | README.md 64 | ``` 65 | #### FunctionDef walk_dir(root, prefix) 66 | **walk_dir**: walk_dir的功能是遍历指定目录及其子目录,并收集所有Python文件的结构信息。 67 | 68 | **parameters**: 此函数的参数如下: 69 | · parameter1: root - 要遍历的根目录的路径。 70 | · parameter2: prefix - 用于格式化输出的前缀字符串,默认为空字符串。 71 | 72 | **Code Description**: 73 | walk_dir函数用于递归遍历给定的目录(root)及其所有子目录。它首先将当前目录的名称(通过os.path.basename(root)获取)添加到结构列表中(structure),并在前缀字符串(prefix)后添加空格以便于格式化。接着,函数使用os.listdir(root)列出当前目录中的所有文件和子目录,并对这些名称进行排序。 74 | 75 | 在遍历每个名称时,函数会检查名称是否以点(.)开头,以此来忽略隐藏文件和目录。如果名称不是隐藏的,函数会构造该名称的完整路径(path)。如果该路径是一个目录,函数会递归调用walk_dir,传入新的前缀(new_prefix)。如果该路径是一个文件且文件名以“.py”结尾,函数则将该文件的名称添加到结构列表中,前面加上新的前缀。 76 | 77 | 该函数的设计使得它能够有效地收集指定目录下所有Python文件的结构信息,并以层级方式展示。 78 | 79 | **Note**: 使用此代码时,请确保传入的根目录路径是有效的,并且具有读取权限。此外,函数会忽略所有以点开头的文件和目录,因此如果需要处理这些文件,需调整相关逻辑。 80 | *** 81 | *** 82 | ### FunctionDef build_path_tree(self, who_reference_me, reference_who, doc_item_path) 83 | **build_path_tree**: The function of build_path_tree is to construct a hierarchical representation of file paths based on two reference lists and a specific document item path. 84 | 85 | **parameters**: The parameters of this Function. 86 | · who_reference_me: A list of file paths that reference the current object. 87 | · reference_who: A list of file paths that are referenced by the current object. 88 | · doc_item_path: A specific file path that needs to be highlighted in the tree structure. 89 | 90 | **Code Description**: The build_path_tree function creates a nested dictionary structure representing a tree of file paths. It utilizes the `defaultdict` from the `collections` module to facilitate the creation of this tree. The function begins by defining an inner function, `tree`, which initializes a new `defaultdict` that can recursively create nested dictionaries. 91 | 92 | The function then processes the two input lists, `who_reference_me` and `reference_who`. For each path in these lists, it splits the path into its components using the operating system's path separator (`os.sep`). It traverses the tree structure, creating a new node for each part of the path. 93 | 94 | Next, the function processes the `doc_item_path`. It splits this path into components as well, but modifies the last component by prefixing it with a star symbol (✳️) to indicate that it is the item of interest. This modified path is then added to the tree in the same manner as the previous paths. 95 | 96 | Finally, the function defines another inner function, `tree_to_string`, which converts the nested dictionary structure into a formatted string representation. This function recursively traverses the tree, indenting each level of the hierarchy for clarity. The resulting string is returned as the output of the build_path_tree function. 97 | 98 | **Note**: It is important to ensure that the paths provided in `who_reference_me` and `reference_who` are valid and correctly formatted. The function assumes that the paths are well-structured and uses the operating system's path separator for splitting. 99 | 100 | **Output Example**: 101 | Given the following inputs: 102 | - who_reference_me: ["folder1/fileA.txt", "folder1/folder2/fileB.txt"] 103 | - reference_who: ["folder3/fileC.txt"] 104 | - doc_item_path: "folder1/folder2/fileB.txt" 105 | 106 | The output of the function might look like this: 107 | ``` 108 | folder1 109 | fileA.txt 110 | folder2 111 | ✳️fileB.txt 112 | folder3 113 | fileC.txt 114 | ``` 115 | #### FunctionDef tree 116 | **tree**: tree函数的功能是返回一个默认字典,该字典的默认值是一个新的tree函数。 117 | 118 | **parameters**: 该函数没有参数。 119 | 120 | **Code Description**: tree函数使用了Python的defaultdict类。defaultdict是collections模块中的一个字典子类,它提供了一个默认值,当访问的键不存在时,会自动调用一个指定的工厂函数来生成这个默认值。在这个函数中,tree函数本身被用作工厂函数,这意味着每当访问一个不存在的键时,defaultdict将会创建一个新的tree对象。这种递归的结构使得返回的字典可以用于构建树形结构,其中每个节点都可以有多个子节点,且子节点的数量和内容是动态生成的。 121 | 122 | **Note**: 使用该函数时,请注意避免无限递归的情况。由于tree函数返回的是一个defaultdict,其默认值也是tree函数本身,因此在访问未定义的键时会不断创建新的defaultdict,可能导致内存消耗过大。 123 | 124 | **Output Example**: 调用tree函数后,可能的返回值如下: 125 | ``` 126 | defaultdict(, {}) 127 | ``` 128 | 此返回值表示一个空的defaultdict,且其默认值是tree函数本身。若访问一个不存在的键,例如`my_tree['a']`,则会创建一个新的defaultdict,作为'a'的值。 129 | *** 130 | #### FunctionDef tree_to_string(tree, indent) 131 | **tree_to_string**: tree_to_string的功能是将树形结构转换为字符串格式,便于可视化展示。 132 | 133 | **parameters**: 此函数的参数如下: 134 | · parameter1: tree - 一个字典类型的树形结构,其中键表示节点,值可以是子节点的字典或其他类型。 135 | · parameter2: indent - 一个整数,表示当前节点的缩进级别,默认为0。 136 | 137 | **Code Description**: tree_to_string函数通过递归的方式将树形结构转换为字符串。首先,函数初始化一个空字符串s。然后,它对传入的tree字典进行排序,并遍历每一个键值对。在遍历过程中,函数将当前键(节点)添加到字符串s中,并根据indent参数添加相应数量的空格以实现缩进。如果当前值是一个字典,表示该节点有子节点,函数会递归调用tree_to_string,将子节点转换为字符串并添加到s中。最终,函数返回构建好的字符串s。 138 | 139 | **Note**: 使用此函数时,请确保传入的tree参数是一个有效的字典结构,并且可以包含嵌套的字典。indent参数用于控制输出的格式,通常不需要手动设置,除非在特定情况下需要调整缩进。 140 | 141 | **Output Example**: 假设输入的tree为如下结构: 142 | { 143 | "根节点": { 144 | "子节点1": {}, 145 | "子节点2": { 146 | "孙节点1": {} 147 | } 148 | } 149 | } 150 | 调用tree_to_string(tree)将返回: 151 | 根节点 152 | 子节点1 153 | 子节点2 154 | 孙节点1 155 | *** 156 | *** 157 | -------------------------------------------------------------------------------- /markdown_docs/repo_agent/runner.md: -------------------------------------------------------------------------------- 1 | ## ClassDef Runner 2 | Doc is waiting to be generated... 3 | ### FunctionDef __init__(self) 4 | Doc is waiting to be generated... 5 | *** 6 | ### FunctionDef get_all_pys(self, directory) 7 | **get_all_pys**: get_all_pys的功能是获取指定目录下的所有Python文件。 8 | 9 | **parameters**: 该函数的参数。 10 | · directory: 需要搜索的目录,类型为字符串。 11 | 12 | **Code Description**: get_all_pys函数用于遍历给定的目录,查找并返回该目录及其子目录下的所有Python文件的路径。函数首先初始化一个空列表python_files,用于存储找到的Python文件路径。接着,使用os.walk(directory)方法递归遍历指定目录。os.walk会返回一个生成器,生成器的每个元素都是一个三元组(root, dirs, files),其中root是当前遍历到的目录路径,dirs是该目录下的子目录列表,files是该目录下的文件列表。函数随后对每个文件进行检查,如果文件名以“.py”结尾,则将该文件的完整路径(通过os.path.join(root, file)构建)添加到python_files列表中。最后,函数返回包含所有找到的Python文件路径的列表。 13 | 14 | **Note**: 使用该函数时,请确保传入的directory参数是一个有效的目录路径。此外,确保在调用该函数之前已导入os模块,以避免运行时错误。 15 | 16 | **Output Example**: 假设在指定目录下找到以下Python文件: 17 | - /path/to/directory/script1.py 18 | - /path/to/directory/subdirectory/script2.py 19 | 20 | 则该函数的返回值将是: 21 | ```python 22 | [ 23 | '/path/to/directory/script1.py', 24 | '/path/to/directory/subdirectory/script2.py' 25 | ] 26 | ``` 27 | *** 28 | ### FunctionDef generate_doc_for_a_single_item(self, doc_item) 29 | Doc is waiting to be generated... 30 | *** 31 | ### FunctionDef first_generate(self) 32 | Doc is waiting to be generated... 33 | *** 34 | ### FunctionDef markdown_refresh(self) 35 | **markdown_refresh**: The function of markdown_refresh is to write the latest document information into a markdown format folder, regardless of whether the markdown content has changed. 36 | 37 | **parameters**: The parameters of this Function. 38 | · None 39 | 40 | **Code Description**: The markdown_refresh function is responsible for generating and updating markdown documentation for the project. It begins by acquiring a lock to ensure thread safety during the execution of the function. The first step is to delete any existing content in the markdown folder specified by the project settings. This is achieved using the shutil.rmtree method, which removes the directory and all its contents, followed by the creation of a new markdown folder. 41 | 42 | Next, the function retrieves a list of all file items from the documentation hierarchy using the get_all_files method from the MetaInfo class. It iterates through each file item, checking whether it contains any documentation content using a recursive helper function named recursive_check. This function inspects the DocItem objects to determine if they have any markdown content or if their children contain markdown content. 43 | 44 | If a file item does not contain any documentation, it is skipped. For file items that do contain documentation, the function constructs the markdown content using another helper function called to_markdown. This function generates the markdown representation of the DocItem and its children, formatting the output according to the hierarchical structure of the documentation. 45 | 46 | Once the markdown content is generated, it is written to a .md file in the markdown folder. The file path is constructed by replacing the .py extension of the file item with .md. The function ensures that the necessary directories are created before writing the markdown content to the file. 47 | 48 | Finally, the function logs an informational message indicating that the markdown documents have been refreshed successfully. 49 | 50 | The markdown_refresh function is called within the first_generate method and the run method of the Runner class. In first_generate, it is used to refresh the markdown documentation after generating all documents for the first time. In the run method, it is invoked after processing changes to ensure that the markdown documentation is up to date with the latest changes in the project. 51 | 52 | **Note**: When using this function, ensure that the project settings are correctly configured, and that the target repository is accessible. The function assumes that the markdown folder is specified in the project settings and that the necessary permissions are in place for file operations. 53 | 54 | **Output Example**: A possible output of the markdown_refresh function could be a markdown file structured as follows: 55 | 56 | # Class Example 57 | This is the documentation for the Example class. 58 | 59 | ## Method example_method 60 | This method does something important. 61 | 62 | ### Parameters 63 | - param1: Description of parameter 1. 64 | - param2: Description of parameter 2. 65 | 66 | *** 67 | 68 | This structure would be repeated for each documented item, providing a clear and organized representation of the project's documentation in markdown format. 69 | #### FunctionDef recursive_check(doc_item) 70 | **recursive_check**: The function of recursive_check is to determine whether a given documentation item contains any Markdown content or if any of its child items contain Markdown content. 71 | 72 | **parameters**: The parameters of this Function. 73 | · doc_item: An instance of the DocItem class, representing the documentation item to be checked. 74 | 75 | **Code Description**: The recursive_check function operates by first checking if the provided DocItem instance, referred to as doc_item, has any Markdown content stored in its md_content attribute. If this attribute is not empty (i.e., it contains one or more Markdown entries), the function immediately returns True, indicating that the documentation item has associated content. 76 | 77 | If the md_content attribute is empty, the function proceeds to iterate through the children of the doc_item. The children are stored in the children attribute, which is a dictionary mapping child object names to their corresponding DocItem instances. For each child DocItem, the recursive_check function is called recursively. If any child returns True, indicating that it contains Markdown content, the parent function will also return True. 78 | 79 | If neither the doc_item nor any of its children contain Markdown content, the function ultimately returns False. This recursive approach allows the function to traverse the entire hierarchy of documentation items, ensuring that all levels are checked for content. 80 | 81 | The recursive_check function is closely related to the DocItem class, which encapsulates the metadata and relationships of documentation items within a project. The function leverages the hierarchical structure established by the DocItem instances to perform its checks effectively. 82 | 83 | **Note**: It is important to ensure that the doc_item passed to the recursive_check function is a valid instance of the DocItem class, as the function relies on the attributes defined within this class to perform its checks accurately. 84 | 85 | **Output Example**: If a DocItem instance has Markdown content, the function would return True. Conversely, if it and all its children lack Markdown content, the function would return False. For instance, if doc_item.md_content is an empty list and all children also have empty md_content, the output would be: 86 | False 87 | *** 88 | #### FunctionDef to_markdown(item, now_level) 89 | **to_markdown**: The function of to_markdown is to generate a Markdown representation of a documentation item and its children. 90 | 91 | **parameters**: The parameters of this Function. 92 | · item: An instance of DocItem, representing the documentation item to be converted to Markdown. 93 | · now_level: An integer indicating the current level of the documentation item in the hierarchy, which affects the Markdown heading level. 94 | 95 | **Code Description**: The to_markdown function constructs a Markdown string that represents a given documentation item (DocItem) and its hierarchical children. It begins by initializing an empty string called markdown_content. The function then appends a header to this string, which consists of a number of hash symbols corresponding to the now_level parameter, followed by the string representation of the item's type (obtained by calling the to_str method on item.item_type) and the object's name (item.obj_name). 96 | 97 | If the item contains parameters (checked by verifying the presence of "params" in item.content and ensuring it has a length greater than zero), these parameters are formatted and appended to the markdown_content string in parentheses. Following this, the function adds the last entry from item.md_content to the markdown_content, or a placeholder message if md_content is empty. 98 | 99 | The function then iterates over the children of the current item (item.children), recursively calling to_markdown for each child with an incremented now_level. Each child's Markdown output is appended to the markdown_content, separated by a line of asterisks for clarity. 100 | 101 | Finally, the complete markdown_content string is returned, providing a structured Markdown representation of the documentation item and its children. 102 | 103 | This function relies on the DocItem class, which encapsulates the metadata and relationships of documentation items, and the DocItemType class, which provides the to_str method to convert item types into string representations. The to_markdown function is essential for generating readable documentation in Markdown format, facilitating better understanding and accessibility of the project's documentation structure. 104 | 105 | **Note**: When using this function, ensure that the DocItem instances are properly structured and that their content is accurately populated to avoid incomplete or misleading documentation output. 106 | 107 | **Output Example**: An example output of the to_markdown function for a DocItem representing a function might look like this: 108 | ``` 109 | ## FunctionDef my_function_name (param1, param2) 110 | This function does something important... 111 | *** 112 | ### FunctionDef my_child_function_name 113 | This child function does something else... 114 | *** 115 | ``` 116 | *** 117 | *** 118 | ### FunctionDef git_commit(self, commit_message) 119 | **git_commit**: git_commit的功能是执行一个Git提交操作,使用指定的提交信息。 120 | 121 | **parameters**: 该函数的参数。 122 | · commit_message: 提交信息,用于描述本次提交的内容。 123 | 124 | **Code Description**: git_commit函数用于在Git版本控制系统中执行提交操作。该函数接受一个参数commit_message,表示提交的描述信息。函数内部使用subprocess模块调用系统命令行,执行`git commit`命令。具体来说,使用`subprocess.check_call`方法来运行命令,命令的参数包括`--no-verify`选项,表示在提交时跳过钩子验证,和`-m`选项后跟提交信息。若在执行过程中发生错误,函数会捕获subprocess.CalledProcessError异常,并打印出错误信息,提示用户提交操作失败的原因。 125 | 126 | **Note**: 使用该函数时,请确保已在正确的Git仓库目录下,并且有未提交的更改。同时,注意commit_message应为有效的字符串,以便清晰地描述提交内容。 127 | *** 128 | ### FunctionDef run(self) 129 | Doc is waiting to be generated... 130 | *** 131 | ### FunctionDef add_new_item(self, file_handler, json_data) 132 | Doc is waiting to be generated... 133 | *** 134 | ### FunctionDef process_file_changes(self, repo_path, file_path, is_new_file) 135 | Doc is waiting to be generated... 136 | *** 137 | ### FunctionDef update_existing_item(self, file_dict, file_handler, changes_in_pyfile) 138 | Doc is waiting to be generated... 139 | *** 140 | ### FunctionDef update_object(self, file_dict, file_handler, obj_name, obj_referencer_list) 141 | Doc is waiting to be generated... 142 | *** 143 | ### FunctionDef get_new_objects(self, file_handler) 144 | **get_new_objects**: The function of get_new_objects is to identify and return the newly added and deleted objects by comparing the current and previous versions of a Python file. 145 | 146 | **parameters**: The parameters of this Function. 147 | · file_handler: An instance of the FileHandler class, responsible for managing file operations and retrieving file versions. 148 | 149 | **Code Description**: The get_new_objects function is designed to analyze the differences between the current and previous versions of a Python file. It utilizes the file_handler parameter to access the modified file versions and extract the functions and classes defined in both versions. 150 | 151 | The function begins by calling the method get_modified_file_versions on the file_handler object, which returns the current and previous versions of the file. It then retrieves the functions and classes from both versions using the get_functions_and_classes method. If there is no previous version, it initializes parse_previous_py as an empty list. 152 | 153 | Next, the function constructs two sets: current_obj and previous_obj, which contain the names of the objects (functions and classes) from the current and previous versions, respectively. By performing set operations, it calculates the newly added objects (new_obj) and the deleted objects (del_obj). The function returns these two lists as a tuple. 154 | 155 | This function is called by the update_existing_item method within the same class. The update_existing_item method is responsible for updating the file structure information based on changes detected in the Python file. It utilizes the output of get_new_objects to determine which objects have been added or deleted, allowing it to update the file_dict accordingly. Specifically, it removes any deleted objects from the file_dict and updates the information of existing objects based on the current version of the file. 156 | 157 | **Note**: It is important to ensure that the file_handler object passed to this function is properly initialized and contains the necessary methods for retrieving file versions and parsing the file content. 158 | 159 | **Output Example**: A possible return value of the function could be: 160 | new_obj: ['add_context_stack', '__init__'] 161 | del_obj: [] 162 | *** 163 | -------------------------------------------------------------------------------- /markdown_docs/repo_agent/utils/meta_info_utils.md: -------------------------------------------------------------------------------- 1 | ## FunctionDef make_fake_files 2 | **make_fake_files**: The function of make_fake_files is to analyze the git status of a repository and create temporary files that reflect the current state of the working directory, specifically for untracked and unstaged changes. 3 | 4 | **parameters**: The parameters of this Function. 5 | · No parameters are required for this function. 6 | 7 | **Code Description**: The make_fake_files function is designed to interact with a Git repository to detect changes in the working directory that have not been staged for commit. It performs the following key operations: 8 | 9 | 1. **Delete Existing Fake Files**: The function begins by calling delete_fake_files to ensure that any previously created temporary files are removed before generating new ones. 10 | 11 | 2. **Retrieve Project Settings**: It retrieves the current project settings using the SettingsManager's get_setting method, which ensures consistent access to configuration settings throughout the application. 12 | 13 | 3. **Initialize Git Repository**: The function initializes a Git repository object using the target repository path specified in the project settings. 14 | 15 | 4. **Detect Unstaged Changes**: It identifies unstaged changes in the repository using the index.diff method, which returns a list of modified files that have not been added to the staging area. Additionally, it collects untracked files that exist in the file system but are not tracked by Git. 16 | 17 | 5. **Skip Untracked Python Files**: The function iterates through the list of untracked files and skips any that have a ".py" extension, logging a message for each skipped file. 18 | 19 | 6. **Handle New and Modified Files**: For files that have been modified (but not staged), the function checks if they end with a specific substring (latest_verison_substring). If they do, an error is logged, and the function exits. Otherwise, it renames the original file to include the latest version substring and creates a new file with the original name, writing the original content back into it. 20 | 21 | 7. **Return Values**: Finally, the function returns a dictionary mapping the original file paths to their corresponding fake file paths, along with a list of files that were skipped during processing. 22 | 23 | The make_fake_files function is called within the diff function in the main.py file. This function is responsible for checking for changes in the repository and determining which documents need to be updated or generated. By calling make_fake_files, the diff function ensures that the current state of the repository is accurately reflected in the documentation process. 24 | 25 | **Note**: It is crucial to ensure that the target repository is properly configured and that the latest_verison_substring does not conflict with existing file names. Any misconfiguration may lead to runtime errors or unexpected behavior during the execution of this function. 26 | 27 | **Output Example**: A possible appearance of the code's return value when calling make_fake_files could be: 28 | ``` 29 | ({ 30 | 'original_file_path.py': 'original_file_path.latest_version', 31 | 'another_file.py': 'another_file.latest_version' 32 | }, ['skipped_file.py']) 33 | ``` 34 | ## FunctionDef delete_fake_files 35 | **delete_fake_files**: The function of delete_fake_files is to remove temporary files generated during the documentation process after the task execution is completed. 36 | 37 | **parameters**: The parameters of this Function. 38 | · No parameters are required for this function. 39 | 40 | **Code Description**: The delete_fake_files function is responsible for cleaning up temporary files, referred to as "fake files," that are created during the documentation generation process. This function utilizes a nested helper function, gci, which performs a recursive traversal of the directory specified by the project settings to identify and delete or rename files based on specific criteria. 41 | 42 | The function begins by retrieving the project settings through the SettingsManager's get_setting method, which ensures that the configuration settings are consistently accessed throughout the application. The gci function is then called with the target repository path, which is obtained from the settings. 43 | 44 | Within the gci function, the following operations are performed: 45 | 1. The function lists all files and directories within the specified filepath. 46 | 2. For each file, it checks if it is a directory and recursively calls itself if it is. 47 | 3. If the file ends with a specific substring (latest_verison_substring), it indicates that it is a temporary file. The function then constructs the original file name by replacing the substring with ".py". 48 | 4. If the original file exists and is successfully deleted, the function prints a message indicating that the temporary file has been deleted. If the temporary file is empty, it is also deleted. 49 | 5. If the original file exists but is not empty, the temporary file is renamed back to the original file name, and a message is printed indicating that the latest version has been recovered. 50 | 51 | The delete_fake_files function is called in various parts of the project, including the clean function in main.py, which explicitly invokes delete_fake_files to ensure that all temporary files are removed after the documentation process. Additionally, it is called within the diff function to clean up any fake files before checking for changes in the repository. The run method of the Runner class also calls delete_fake_files after completing the document update process, ensuring that any temporary files created during the run are cleaned up. 52 | 53 | **Note**: It is important to ensure that the target repository is correctly configured and accessible before invoking delete_fake_files. Any issues with file permissions or incorrect paths may lead to runtime errors during the deletion or renaming processes. 54 | ### FunctionDef gci(filepath) 55 | **gci**: The function of gci is to traverse a specified directory and its subdirectories to delete or rename files based on specific criteria. 56 | 57 | **parameters**: The parameters of this Function. 58 | · filepath: A string representing the path of the directory to be traversed. 59 | 60 | **Code Description**: The gci function begins by listing all files and directories within the specified filepath. It iterates through each item found in the directory. If an item is a directory, the function calls itself recursively to traverse that subdirectory. For files, it checks if the filename ends with a specific substring defined as `latest_verison_substring`. If this condition is met, the function constructs an original filename by replacing the substring with ".py". 61 | 62 | The function then checks the size of the file. If the file size is zero, it indicates that the file is empty, and the function proceeds to delete both the empty file and its corresponding original file. A message is printed to the console indicating the deletion of the temporary file. Conversely, if the file is not empty, the function renames the temporary file back to its original name and prints a message indicating that the latest version is being recovered. 63 | 64 | This function effectively manages temporary files by either deleting them if they are empty or restoring the original file if they contain data, ensuring that the directory remains clean and organized. 65 | 66 | **Note**: It is important to ensure that the `latest_verison_substring` variable is defined in the scope where this function is used, as it is crucial for determining which files to process. Additionally, the function relies on the presence of the `setting.project.target_repo` variable to format the output messages correctly. 67 | *** 68 | -------------------------------------------------------------------------------- /markdown_docs/tests/test_change_detector.md: -------------------------------------------------------------------------------- 1 | ## ClassDef TestChangeDetector 2 | **TestChangeDetector**: The function of TestChangeDetector is to perform unit tests on the ChangeDetector class, specifically focusing on the detection and management of staged and unstaged files in a Git repository. 3 | 4 | **attributes**: The attributes of this Class. 5 | · test_repo_path: The file path to the test repository created for the unit tests. 6 | · repo: The initialized Git repository object used for testing. 7 | 8 | **Code Description**: The TestChangeDetector class is a unit test case that inherits from unittest.TestCase, providing a framework for testing the functionality of the ChangeDetector class. The class includes setup and teardown methods to prepare and clean up the test environment, specifically a Git repository used for testing file changes. 9 | 10 | The setUpClass method is a class method that initializes the test environment before any tests are run. It defines the path for the test repository, creates the directory if it does not exist, initializes a new Git repository, and configures user information for Git operations. It also creates two test files: a Python file and a Markdown file, and performs an initial commit to the repository. 11 | 12 | The class contains three test methods: 13 | 1. test_get_staged_pys: This method tests the ChangeDetector's ability to identify staged Python files. It creates a new Python file, stages it, and asserts that the file is included in the list of staged files returned by the ChangeDetector. 14 | 15 | 2. test_get_unstaged_mds: This method tests the ChangeDetector's ability to identify unstaged Markdown files. It modifies an existing Markdown file without staging it and asserts that the modified file is included in the list of unstaged files returned by the ChangeDetector. 16 | 17 | 3. test_add_unstaged_mds: This method ensures that there are unstaged Markdown files and then uses the ChangeDetector to stage them. It checks that after the staging operation, there are no remaining unstaged Markdown files, asserting that the operation was successful. 18 | 19 | The tearDownClass method is a class method that cleans up the test environment after all tests have been executed. It closes the Git repository and removes the test repository directory to ensure no residual files remain. 20 | 21 | **Note**: It is important to ensure that the ChangeDetector class is properly implemented and available in the testing environment for these tests to execute successfully. Additionally, the tests rely on the presence of the Git command-line tools and the appropriate permissions to create and manipulate files and directories. 22 | ### FunctionDef setUpClass(cls) 23 | **setUpClass**: setUpClass的功能是为测试准备一个Git仓库及相关文件。 24 | 25 | **parameters**: 此函数没有参数。 26 | 27 | **Code Description**: 28 | setUpClass是一个类方法,用于在测试类执行之前设置测试环境。该方法首先定义了测试仓库的路径,将其设置为当前文件所在目录下的'test_repo'文件夹。如果该文件夹不存在,方法会创建它。接着,使用GitPython库初始化一个新的Git仓库,并将其与指定的路径关联。 29 | 30 | 在初始化Git仓库后,方法配置了Git用户信息,包括用户的电子邮件和姓名,以便在后续的Git操作中使用。接下来,方法创建了两个测试文件:一个Python文件'test_file.py',其中包含一行打印语句;另一个Markdown文件'test_file.md',其中包含一个Markdown标题。 31 | 32 | 最后,方法模拟了Git操作,通过将所有文件添加到暂存区并提交一个初始提交,完成了测试环境的设置。这些操作确保了在测试执行时,测试环境是干净且可控的。 33 | 34 | **Note**: 使用此方法时,请确保在测试类中调用setUpClass,以便在所有测试用例执行之前正确设置测试环境。同时,确保已安装GitPython库,以支持Git操作。 35 | *** 36 | ### FunctionDef test_get_staged_pys(self) 37 | **test_get_staged_pys**: The function of test_get_staged_pys is to verify that a newly created Python file is correctly identified as staged in the Git repository. 38 | 39 | **parameters**: The parameters of this Function. 40 | · None 41 | 42 | **Code Description**: The test_get_staged_pys function is a unit test designed to validate the functionality of the ChangeDetector class, specifically its ability to detect staged Python files within a Git repository. The function begins by creating a new Python file named 'new_test_file.py' in a specified test repository path. This file contains a simple print statement. Once the file is created, it is added to the staging area of the Git repository using the Git command `git add`. 43 | 44 | Following the staging of the new file, an instance of the ChangeDetector class is instantiated with the test repository path. The method get_staged_pys of the ChangeDetector instance is then called to retrieve a list of Python files that are currently staged for commit. This method is responsible for checking the differences between the staging area and the last commit (HEAD) to identify which files have been added or modified. 45 | 46 | The test then asserts that 'new_test_file.py' is included in the list of staged files returned by get_staged_pys. This assertion confirms that the ChangeDetector class is functioning as expected, accurately tracking the newly staged Python file. Additionally, the function prints the list of staged Python files for verification purposes. 47 | 48 | This test is crucial for ensuring that the ChangeDetector class operates correctly in identifying changes within a Git repository, particularly for Python files. It serves as a safeguard against potential regressions in the functionality of the change detection mechanism. 49 | 50 | **Note**: It is important to ensure that the test environment is properly set up, including the availability of a valid Git repository and the necessary permissions to create and stage files. The GitPython library must also be correctly configured to facilitate interaction with the Git repository. 51 | *** 52 | ### FunctionDef test_get_unstaged_mds(self) 53 | **test_get_unstaged_mds**: The function of test_get_unstaged_mds is to verify that a modified Markdown file, which has not been staged, is correctly identified as an unstaged file by the ChangeDetector class. 54 | 55 | **parameters**: The parameters of this Function. 56 | · No parameters are required for this function. 57 | 58 | **Code Description**: The test_get_unstaged_mds function is a unit test designed to validate the functionality of the ChangeDetector class, specifically its ability to identify unstaged Markdown files in a Git repository. The function performs the following operations: 59 | 60 | 1. It begins by defining the path to a Markdown file named 'test_file.md' within a test repository directory specified by `self.test_repo_path`. 61 | 2. The function opens this Markdown file in append mode and writes additional content to it, simulating a modification that has not yet been staged. 62 | 3. An instance of the ChangeDetector class is then created, initialized with the path to the test repository. 63 | 4. The method `get_to_be_staged_files` of the ChangeDetector instance is called to retrieve a list of files that have been modified but not staged. 64 | 5. The function asserts that 'test_file.md' is included in the list of unstaged files by checking if its basename is present in the returned list. 65 | 6. Finally, it prints the list of unstaged Markdown files for verification. 66 | 67 | This function is called within the test_add_unstaged_mds function, which ensures that there is at least one unstaged Markdown file before attempting to add unstaged files to the staging area. The test_add_unstaged_mds function relies on the successful execution of test_get_unstaged_mds to confirm that the ChangeDetector can accurately identify unstaged files, thereby establishing a dependency between these two test functions. 68 | 69 | **Note**: It is essential to ensure that the test repository is correctly set up and that the necessary files exist before running this test. The test environment should be clean to avoid false positives or negatives in the test results. 70 | *** 71 | ### FunctionDef test_add_unstaged_mds(self) 72 | **test_add_unstaged_mds**: The function of test_add_unstaged_mds is to verify that the ChangeDetector class correctly stages unstaged Markdown files in a Git repository. 73 | 74 | **parameters**: The parameters of this Function. 75 | · No parameters are required for this function. 76 | 77 | **Code Description**: The test_add_unstaged_mds function is a unit test designed to validate the functionality of the ChangeDetector class, specifically its ability to add unstaged Markdown files to the staging area of a Git repository. The function performs the following operations: 78 | 79 | 1. It first ensures that there is at least one unstaged Markdown file by invoking the test_get_unstaged_mds function. This function modifies a Markdown file in the test repository, ensuring that it is recognized as unstaged. 80 | 81 | 2. An instance of the ChangeDetector class is created, initialized with the path to the test repository specified by `self.test_repo_path`. This instance will be used to manage the staging of files. 82 | 83 | 3. The add_unstaged_files method of the ChangeDetector instance is called. This method identifies all unstaged files that meet specific criteria and stages them in the Git repository. 84 | 85 | 4. After attempting to stage the files, the function retrieves the list of files that are still unstaged by calling the get_to_be_staged_files method. This method checks for any files that remain unstaged after the add operation. 86 | 87 | 5. The function asserts that the length of the list of unstaged files after the add operation is zero, indicating that all unstaged Markdown files have been successfully staged. 88 | 89 | 6. Finally, it prints the number of remaining unstaged Markdown files, which should be zero if the test passes. 90 | 91 | This function is dependent on the successful execution of the test_get_unstaged_mds function, which ensures that there is at least one unstaged Markdown file before the add operation is attempted. The relationship between these two functions is crucial, as test_add_unstaged_mds relies on the outcome of test_get_unstaged_mds to validate the staging functionality of the ChangeDetector class. 92 | 93 | **Note**: It is essential to ensure that the test repository is correctly set up and that the necessary files exist before running this test. The test environment should be clean to avoid false positives or negatives in the test results. 94 | *** 95 | ### FunctionDef tearDownClass(cls) 96 | **tearDownClass**: tearDownClass的功能是清理测试仓库。 97 | 98 | **parameters**: 该函数没有参数。 99 | 100 | **Code Description**: 101 | tearDownClass是一个类方法,用于在测试类的所有测试用例执行完毕后进行清理工作。该方法首先调用cls.repo.close(),用于关闭与测试仓库相关的资源,确保没有未关闭的连接或文件句柄。接着,使用os.system('rm -rf ' + cls.test_repo_path)命令删除测试仓库的文件夹及其内容。这里的cls.test_repo_path是一个类属性,指向测试仓库的路径。通过这种方式,tearDownClass确保了测试环境的整洁,避免了后续测试受到之前测试的影响。 102 | 103 | **Note**: 使用该函数时,请确保在测试用例执行后调用,以避免资源泄漏或文件冲突。同时,注意使用os.system删除文件时要小心,以免误删其他重要文件。 104 | *** 105 | -------------------------------------------------------------------------------- /markdown_docs/tests/test_json_handler.md: -------------------------------------------------------------------------------- 1 | ## ClassDef TestJsonFileProcessor 2 | **TestJsonFileProcessor**: The function of TestJsonFileProcessor is to test the functionalities of the JsonFileProcessor class, specifically its methods for reading and extracting data from JSON files. 3 | 4 | **attributes**: The attributes of this Class. 5 | · processor: An instance of the JsonFileProcessor class initialized with the filename "test.json". 6 | 7 | **Code Description**: The TestJsonFileProcessor class is a unit test case that inherits from unittest.TestCase. It is designed to validate the behavior of the JsonFileProcessor class, which is responsible for handling JSON file operations. The class contains several test methods that utilize the unittest framework's features, such as setup methods and mocking. 8 | 9 | The setUp method initializes an instance of JsonFileProcessor with a test JSON file named "test.json". This setup is executed before each test method runs, ensuring that each test has a fresh instance of the processor. 10 | 11 | The test_read_json_file method tests the read_json_file method of the JsonFileProcessor class. It uses the @patch decorator to mock the built-in open function, simulating the reading of a JSON file containing a specific structure. The test asserts that the data returned by read_json_file matches the expected dictionary structure and verifies that the open function was called with the correct parameters. 12 | 13 | The test_extract_md_contents method tests the extract_md_contents method of the JsonFileProcessor class. It mocks the read_json_file method to return a predefined JSON structure. The test checks that the extracted markdown content includes the expected value "content1". 14 | 15 | The test_search_in_json_nested method tests the search_in_json_nested method of the JsonFileProcessor class. Similar to the previous tests, it mocks the open function to provide a different JSON structure. The test asserts that the result of the search matches the expected dictionary for the specified file name and verifies the correct invocation of the open function. 16 | 17 | **Note**: It is important to ensure that the JsonFileProcessor class is implemented correctly for these tests to pass. The tests rely on the structure of the JSON data being consistent with the expectations set in the test cases. 18 | 19 | **Output Example**: 20 | For the test_read_json_file method, the expected output when read_json_file is called would be: 21 | {"files": [{"objects": [{"md_content": "content1"}]}]} 22 | 23 | For the test_extract_md_contents method, the expected output for md_contents would include: 24 | ["content1"] 25 | 26 | For the test_search_in_json_nested method, the expected output when searching for "file1" would be: 27 | {"name": "file1"} 28 | ### FunctionDef setUp(self) 29 | **setUp**: setUp的功能是初始化测试环境。 30 | 31 | **parameters**: 该函数没有参数。 32 | 33 | **Code Description**: setUp函数是一个测试准备函数,通常在单元测试框架中使用。在这个函数中,创建了一个名为processor的实例,类型为JsonFileProcessor,并传入了一个字符串参数"test.json"。这个实例化的过程意味着在每个测试用例执行之前,都会创建一个新的JsonFileProcessor对象,确保每个测试用例都在一个干净的状态下运行。JsonFileProcessor类的具体功能和实现细节并未在此代码片段中提供,但可以推测它与处理JSON文件相关。 34 | 35 | **Note**: 使用setUp函数时,确保JsonFileProcessor类已正确实现,并且"test.json"文件存在于预期的路径中,以避免在测试执行时出现文件未找到的错误。 36 | *** 37 | ### FunctionDef test_read_json_file(self, mock_file) 38 | **test_read_json_file**: The function of test_read_json_file is 测试 read_json_file 方法的功能。 39 | 40 | **parameters**: 此函数的参数。 41 | · mock_file: 一个模拟文件对象,用于测试文件读取操作。 42 | 43 | **Code Description**: 该函数用于测试 `read_json_file` 方法的正确性。首先,它调用 `self.processor.read_json_file()` 方法以读取 JSON 文件的数据。接着,使用 `self.assertEqual` 方法验证读取的数据是否与预期的字典结构相符,即 `{"files": [{"objects": [{"md_content": "content1"}]}]}`。最后,`mock_file.assert_called_with("test.json", "r", encoding="utf-8")` 用于确认在读取文件时,是否以正确的参数调用了模拟的文件对象,确保文件名为 "test.json",模式为只读("r"),并且使用 UTF-8 编码。 44 | 45 | **Note**: 使用此代码时,请确保已正确设置模拟文件对象,以便能够准确测试文件读取功能。同时,确保 `read_json_file` 方法能够处理预期的文件格式和内容。 46 | *** 47 | ### FunctionDef test_extract_md_contents(self, mock_read_json) 48 | **test_extract_md_contents**: The function of test_extract_md_contents is 测试 extract_md_contents 方法的功能。 49 | 50 | **parameters**: 此函数的参数。 51 | · mock_read_json: 一个模拟的函数,用于替代实际的 JSON 读取操作。 52 | 53 | **Code Description**: 54 | 该函数主要用于测试 `extract_md_contents` 方法的正确性。首先,使用 `mock_read_json` 模拟读取 JSON 文件的操作,返回一个包含文件信息的字典,其中包含一个对象列表,列表中的每个对象都有一个 `md_content` 字段。具体来说,模拟返回的 JSON 数据结构为: 55 | ```json 56 | { 57 | "files": [ 58 | { 59 | "objects": [ 60 | { 61 | "md_content": "content1" 62 | } 63 | ] 64 | } 65 | ] 66 | } 67 | ``` 68 | 接下来,调用 `self.processor.extract_md_contents()` 方法,该方法的目的是提取所有的 `md_content` 内容。最后,使用 `self.assertIn("content1", md_contents)` 断言来验证提取的内容中是否包含 "content1"。如果包含,则测试通过,表明 `extract_md_contents` 方法能够正确提取出 JSON 数据中的 Markdown 内容。 69 | 70 | **Note**: 使用此代码时,请确保 `extract_md_contents` 方法能够处理模拟的 JSON 数据结构,并且在测试环境中正确配置了 `mock_read_json`。 71 | 72 | **Output Example**: 该函数的返回值可能类似于以下结构: 73 | ```python 74 | ["content1"] 75 | ``` 76 | *** 77 | ### FunctionDef test_search_in_json_nested(self, mock_file) 78 | **test_search_in_json_nested**: The function of test_search_in_json_nested is 测试 search_in_json_nested 方法的功能。 79 | 80 | **parameters**: 该函数的参数。 81 | · parameter1: mock_file - 一个模拟文件对象,用于测试文件操作。 82 | 83 | **Code Description**: 该函数用于测试 `search_in_json_nested` 方法的功能。首先,它调用 `self.processor.search_in_json_nested` 方法,传入两个参数:文件名 `"test.json"` 和要搜索的关键字 `"file1"`。该方法的预期结果是返回一个字典 `{"name": "file1"}`,表示在 JSON 文件中成功找到与关键字匹配的条目。接着,使用 `self.assertEqual` 方法验证返回结果是否与预期结果相符。如果结果匹配,则测试通过。最后,`mock_file.assert_called_with` 用于验证在测试过程中是否以正确的参数调用了文件打开方法,确保文件 `"test.json"` 以只读模式("r")和 UTF-8 编码打开。 84 | 85 | **Note**: 使用该代码时,请确保 `mock_file` 已正确配置为模拟文件操作,以避免实际文件的读写操作影响测试结果。同时,确保 `search_in_json_nested` 方法的实现能够正确处理嵌套 JSON 数据,以便返回预期的结果。 86 | *** 87 | -------------------------------------------------------------------------------- /markdown_docs/tests/test_structure_tree.md: -------------------------------------------------------------------------------- 1 | ## FunctionDef build_path_tree(who_reference_me, reference_who, doc_item_path) 2 | **build_path_tree**: The function of build_path_tree is to create a hierarchical representation of file paths based on provided references and a specific document item path. 3 | 4 | **parameters**: The parameters of this Function. 5 | · parameter1: who_reference_me - A list of file paths that reference the current entity. 6 | · parameter2: reference_who - A list of file paths that reference another entity. 7 | · parameter3: doc_item_path - A specific file path that needs to be highlighted in the output. 8 | 9 | **Code Description**: The build_path_tree function constructs a nested dictionary structure representing a tree of file paths. It begins by defining an inner function, tree, which initializes a defaultdict that allows for the creation of nested dictionaries automatically. The variable path_tree is then assigned the result of calling this inner function. 10 | 11 | The function processes two lists of paths: who_reference_me and reference_who. For each path in these lists, it splits the path into its components using the operating system's path separator (os.sep). It then traverses the path_tree structure, creating nested dictionaries for each part of the path. 12 | 13 | After processing the reference paths, the function handles the doc_item_path. It splits this path into components as well, but modifies the last component by prefixing it with a star symbol (✳️) to indicate it as a special item. The function again traverses the path_tree to include this modified path. 14 | 15 | Finally, the function defines another inner function, tree_to_string, which recursively converts the tree structure into a string representation. This function sorts the keys at each level and adds indentation based on the depth of the tree. The resulting string representation of the path_tree is returned as the output of the build_path_tree function. 16 | 17 | **Note**: It is important to ensure that the input paths are formatted correctly and that the os module is imported for the path separator to function properly. The output string will visually represent the hierarchy of paths, with the doc_item_path clearly marked. 18 | 19 | **Output Example**: 20 | Assuming the following inputs: 21 | who_reference_me = ["folder1/fileA.txt", "folder1/folder2/fileB.txt"] 22 | reference_who = ["folder3/fileC.txt"] 23 | doc_item_path = "folder1/folder2/fileB.txt" 24 | 25 | The output of the function could look like this: 26 | folder1 27 | fileA.txt 28 | folder2 29 | ✳️fileB.txt 30 | folder3 31 | fileC.txt 32 | ### FunctionDef tree 33 | **tree**: tree函数的功能是返回一个默认字典,该字典的默认值是一个新的tree函数。 34 | 35 | **parameters**: 该函数没有参数。 36 | 37 | **Code Description**: tree函数使用了Python的collections模块中的defaultdict。defaultdict是一个字典子类,它提供了一个默认值,当访问一个不存在的键时,会自动创建一个新的值。在这个实现中,tree函数返回一个defaultdict,其中的默认值是调用tree函数本身。这意味着每当访问一个不存在的键时,defaultdict会自动创建一个新的defaultdict。这种递归的结构可以用于构建树形数据结构,其中每个节点可以有多个子节点,且子节点的数量和名称是动态生成的。 38 | 39 | **Note**: 使用此代码时,请注意避免无限递归的情况。由于tree函数返回的defaultdict的默认值是tree函数本身,因此在访问不存在的键时,会不断创建新的defaultdict,直到达到某种条件或限制。 40 | 41 | **Output Example**: 调用tree函数后,可能会得到如下结构: 42 | ``` 43 | defaultdict(, { 44 | 'key1': defaultdict(, { 45 | 'subkey1': defaultdict(, {}), 46 | 'subkey2': defaultdict(, {}) 47 | }), 48 | 'key2': defaultdict(, {}) 49 | }) 50 | ``` 51 | 在这个例子中,'key1'和'key2'是顶层键,而'subkey1'和'subkey2'是'key1'下的子键。 52 | *** 53 | ### FunctionDef tree_to_string(tree, indent) 54 | **tree_to_string**: tree_to_string 函数的功能是将树形结构转换为字符串格式,便于可视化展示。 55 | 56 | **parameters**: 此函数的参数如下: 57 | · parameter1: tree - 一个字典类型的树形结构,其中包含键值对,键为节点名称,值为子节点(可以是字典或其他类型)。 58 | · parameter2: indent - 一个整数,表示当前节点的缩进级别,默认为0。 59 | 60 | **Code Description**: tree_to_string 函数通过递归的方式遍历给定的树形结构,并将其格式化为字符串。函数首先初始化一个空字符串 s,用于存储最终的结果。接着,函数对树中的每个键值对进行排序,并逐个处理每个键。对于每个键,函数会在字符串中添加相应数量的空格(由 indent 参数控制),然后添加键的名称,并换行。如果该键对应的值是一个字典,函数会递归调用自身,增加缩进级别(indent + 1),以处理子树。最终,函数返回构建好的字符串,展示了树形结构的层次关系。 61 | 62 | **Note**: 使用此函数时,请确保传入的 tree 参数为字典类型,并且其值可以是字典或其他类型。缩进参数 indent 应为非负整数,以确保输出格式正确。 63 | 64 | **Output Example**: 假设输入的树形结构为: 65 | { 66 | "根节点": { 67 | "子节点1": {}, 68 | "子节点2": { 69 | "孙节点1": {} 70 | } 71 | }, 72 | "另一个根节点": {} 73 | } 74 | 调用 tree_to_string 函数后,返回的字符串可能如下所示: 75 | 根节点 76 | 子节点1 77 | 子节点2 78 | 孙节点1 79 | 另一个根节点 80 | *** 81 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | authors = [ 3 | {name = "Qinyu Luo", email = "qinyuluo123@gmail.com"}, 4 | ] 5 | maintainers = [ 6 | {name = "Edwards Arno", email = "Edwards.Arno@outlook.com"}, 7 | ] 8 | license = {text = "Apache-2.0"} 9 | requires-python = ">=3.11,<4.0" 10 | dependencies = [ 11 | "loguru>=0.7.2", 12 | "jedi>=0.19.1", 13 | "GitPython>=3.1.41", 14 | "prettytable>=3.9.0", 15 | "python-iso639>=2024.2.7", 16 | "pydantic-settings>=2.2.1", 17 | "click>=8.1.7", 18 | "python-iso639>=2024.10.22", 19 | "colorama>=0.4.6", 20 | "llama-index-llms-openai-like>=0.3.3", 21 | ] 22 | name = "repoagent" 23 | version = "0.2.0" 24 | description = "An LLM-Powered Framework for Repository-level Code Documentation Generation." 25 | readme = "README.md" 26 | classifiers = [ 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Topic :: Scientific/Engineering :: Artificial Intelligence" 31 | ] 32 | 33 | [project.urls] 34 | repository = "https://github.com/LOGIC-10/RepoAgent" 35 | 36 | [project.scripts] 37 | repoagent = "repo_agent.main:cli" 38 | 39 | [project.optional-dependencies] 40 | chat_with_repo = [ 41 | "markdown>=3.7", 42 | "llama-index-embeddings-openai>=0.2.5", 43 | "llama-index-vector-stores-chroma>=0.3.0", 44 | "gradio>=5.6.0", 45 | ] 46 | 47 | [build-system] 48 | requires = ["pdm-backend"] 49 | build-backend = "pdm.backend" 50 | 51 | [tool.pyright] 52 | reportCallIssue="none" 53 | 54 | [tool.ruff] 55 | # General ruff settings can stay here. 56 | 57 | [tool.ruff.lint] 58 | select = ["I001"] 59 | 60 | [tool.pdm] 61 | [tool.pdm.dev-dependencies] 62 | test = [ 63 | "pytest<8.0.0,>=7.4.4", 64 | "pytest-mock<4.0.0,>=3.12.0", 65 | ] 66 | lint = [ 67 | "ruff>=0.7.4", 68 | ] 69 | 70 | [tool.pdm.build] 71 | includes = [ 72 | "repo_agent", 73 | ] -------------------------------------------------------------------------------- /repo_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/RepoAgent/825d988127d7bfd757237d9c4e8678d9104030f0/repo_agent/__init__.py -------------------------------------------------------------------------------- /repo_agent/__main__.py: -------------------------------------------------------------------------------- 1 | from .main import cli 2 | 3 | if __name__ == "__main__": 4 | cli() 5 | -------------------------------------------------------------------------------- /repo_agent/chat_engine.py: -------------------------------------------------------------------------------- 1 | from llama_index.llms.openai_like import OpenAILike 2 | 3 | from repo_agent.doc_meta_info import DocItem 4 | from repo_agent.log import logger 5 | from repo_agent.prompt import chat_template 6 | from repo_agent.settings import SettingsManager 7 | 8 | 9 | class ChatEngine: 10 | """ 11 | ChatEngine is used to generate the doc of functions or classes. 12 | """ 13 | 14 | def __init__(self, project_manager): 15 | setting = SettingsManager.get_setting() 16 | 17 | self.llm = OpenAILike( 18 | api_key=setting.chat_completion.openai_api_key.get_secret_value(), 19 | api_base=setting.chat_completion.openai_base_url, 20 | timeout=setting.chat_completion.request_timeout, 21 | model=setting.chat_completion.model, 22 | temperature=setting.chat_completion.temperature, 23 | max_retries=1, 24 | is_chat_model=True, 25 | ) 26 | 27 | def build_prompt(self, doc_item: DocItem): 28 | """Builds and returns the system and user prompts based on the DocItem.""" 29 | setting = SettingsManager.get_setting() 30 | 31 | code_info = doc_item.content 32 | referenced = len(doc_item.who_reference_me) > 0 33 | 34 | code_type = code_info["type"] 35 | code_name = code_info["name"] 36 | code_content = code_info["code_content"] 37 | have_return = code_info["have_return"] 38 | file_path = doc_item.get_full_name() 39 | 40 | def get_referenced_prompt(doc_item: DocItem) -> str: 41 | if len(doc_item.reference_who) == 0: 42 | return "" 43 | prompt = [ 44 | """As you can see, the code calls the following objects, their code and docs are as following:""" 45 | ] 46 | for reference_item in doc_item.reference_who: 47 | instance_prompt = ( 48 | f"""obj: {reference_item.get_full_name()}\nDocument: \n{reference_item.md_content[-1] if len(reference_item.md_content) > 0 else 'None'}\nRaw code:```\n{reference_item.content['code_content'] if 'code_content' in reference_item.content.keys() else ''}\n```""" 49 | + "=" * 10 50 | ) 51 | prompt.append(instance_prompt) 52 | return "\n".join(prompt) 53 | 54 | def get_referencer_prompt(doc_item: DocItem) -> str: 55 | if len(doc_item.who_reference_me) == 0: 56 | return "" 57 | prompt = [ 58 | """Also, the code has been called by the following objects, their code and docs are as following:""" 59 | ] 60 | for referencer_item in doc_item.who_reference_me: 61 | instance_prompt = ( 62 | f"""obj: {referencer_item.get_full_name()}\nDocument: \n{referencer_item.md_content[-1] if len(referencer_item.md_content) > 0 else 'None'}\nRaw code:```\n{referencer_item.content['code_content'] if 'code_content' in referencer_item.content.keys() else 'None'}\n```""" 63 | + "=" * 10 64 | ) 65 | prompt.append(instance_prompt) 66 | return "\n".join(prompt) 67 | 68 | def get_relationship_description(referencer_content, reference_letter): 69 | if referencer_content and reference_letter: 70 | return "And please include the reference relationship with its callers and callees in the project from a functional perspective" 71 | elif referencer_content: 72 | return "And please include the relationship with its callers in the project from a functional perspective." 73 | elif reference_letter: 74 | return "And please include the relationship with its callees in the project from a functional perspective." 75 | else: 76 | return "" 77 | 78 | code_type_tell = "Class" if code_type == "ClassDef" else "Function" 79 | parameters_or_attribute = ( 80 | "attributes" if code_type == "ClassDef" else "parameters" 81 | ) 82 | have_return_tell = ( 83 | "**Output Example**: Mock up a possible appearance of the code's return value." 84 | if have_return 85 | else "" 86 | ) 87 | combine_ref_situation = ( 88 | "and combine it with its calling situation in the project," 89 | if referenced 90 | else "" 91 | ) 92 | 93 | referencer_content = get_referencer_prompt(doc_item) 94 | reference_letter = get_referenced_prompt(doc_item) 95 | has_relationship = get_relationship_description( 96 | referencer_content, reference_letter 97 | ) 98 | 99 | project_structure_prefix = ", and the related hierarchical structure of this project is as follows (The current object is marked with an *):" 100 | 101 | return chat_template.format_messages( 102 | combine_ref_situation=combine_ref_situation, 103 | file_path=file_path, 104 | project_structure_prefix=project_structure_prefix, 105 | code_type_tell=code_type_tell, 106 | code_name=code_name, 107 | code_content=code_content, 108 | have_return_tell=have_return_tell, 109 | has_relationship=has_relationship, 110 | reference_letter=reference_letter, 111 | referencer_content=referencer_content, 112 | parameters_or_attribute=parameters_or_attribute, 113 | language=setting.project.language, 114 | ) 115 | 116 | def generate_doc(self, doc_item: DocItem): 117 | """Generates documentation for a given DocItem.""" 118 | messages = self.build_prompt(doc_item) 119 | 120 | try: 121 | response = self.llm.chat(messages) 122 | logger.debug(f"LLM Prompt Tokens: {response.raw.usage.prompt_tokens}") # type: ignore 123 | logger.debug( 124 | f"LLM Completion Tokens: {response.raw.usage.completion_tokens}" # type: ignore 125 | ) 126 | logger.debug( 127 | f"Total LLM Token Count: {response.raw.usage.total_tokens}" # type: ignore 128 | ) 129 | return response.message.content 130 | except Exception as e: 131 | logger.error(f"Error in llamaindex chat call: {e}") 132 | raise 133 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/__init__.py: -------------------------------------------------------------------------------- 1 | # repo_agent/chat_with_repo/__init__.py 2 | 3 | from .main import main 4 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/__main__.py: -------------------------------------------------------------------------------- 1 | from .main import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/gradio_interface.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | import markdown 3 | 4 | from repo_agent.log import logger 5 | 6 | 7 | class GradioInterface: 8 | def __init__(self, respond_function): 9 | self.respond = respond_function 10 | self.cssa = """ 11 | 35 |
36 | 37 | """ 38 | self.cssb = """ 39 |
40 | 41 | 42 | """ 43 | self.setup_gradio_interface() 44 | 45 | def wrapper_respond(self, msg_input, system_input): 46 | # 调用原来的 respond 函数 47 | msg, output1, output2, output3, code, codex = self.respond( 48 | msg_input, system_input 49 | ) 50 | output1 = markdown.markdown(str(output1)) 51 | output2 = markdown.markdown(str(output2)) 52 | code = markdown.markdown(str(code)) 53 | output1 = ( 54 | self.cssa 55 | + """ 56 |
Response
57 |
58 |
59 | """ 60 | + str(output1) 61 | + """ 62 |
63 |
64 | 65 | """ 66 | ) 67 | output2 = ( 68 | self.cssa 69 | + """ 70 |
Embedding Recall
71 |
72 |
73 | """ 74 | + str(output2) 75 | + self.cssb 76 | ) 77 | code = ( 78 | self.cssa 79 | + """ 80 |
Code
81 |
82 |
83 | """ 84 | + str(code) 85 | + self.cssb 86 | ) 87 | 88 | return msg, output1, output2, output3, code, codex 89 | 90 | def clean(self): 91 | msg = "" 92 | output1 = gr.HTML( 93 | self.cssa 94 | + """ 95 |
Response
96 |
97 |
98 | 99 | """ 100 | + self.cssb 101 | ) 102 | output2 = gr.HTML( 103 | self.cssa 104 | + """ 105 |
Embedding Recall
106 |
107 |
108 | 109 | """ 110 | + self.cssb 111 | ) 112 | output3 = "" 113 | code = gr.HTML( 114 | self.cssa 115 | + """ 116 |
Code
117 |
118 |
119 | 120 | """ 121 | + self.cssb 122 | ) 123 | codex = "" 124 | return msg, output1, output2, output3, code, codex 125 | 126 | def setup_gradio_interface(self): 127 | with gr.Blocks() as demo: 128 | gr.Markdown(""" 129 | # RepoAgent: Chat with doc 130 | """) 131 | with gr.Tab("main chat"): 132 | with gr.Row(): 133 | with gr.Column(): 134 | msg = gr.Textbox(label="Question Input", lines=4) 135 | system = gr.Textbox( 136 | label="(Optional)insturction editing", lines=4 137 | ) 138 | btn = gr.Button("Submit") 139 | btnc = gr.ClearButton() 140 | btnr = gr.Button("record") 141 | 142 | output1 = gr.HTML( 143 | self.cssa 144 | + """ 145 |
Response
146 |
147 |
148 | 149 | """ 150 | + self.cssb 151 | ) 152 | with gr.Row(): 153 | with gr.Column(): 154 | # output2 = gr.Textbox(label = "Embedding recall") 155 | output2 = gr.HTML( 156 | self.cssa 157 | + """ 158 |
Embedding Recall
159 |
160 |
161 | 162 | """ 163 | + self.cssb 164 | ) 165 | code = gr.HTML( 166 | self.cssa 167 | + """ 168 |
Code
169 |
170 |
171 | 172 | """ 173 | + self.cssb 174 | ) 175 | with gr.Row(): 176 | with gr.Column(): 177 | output3 = gr.Textbox(label="key words", lines=2) 178 | output4 = gr.Textbox(label="key words code", lines=14) 179 | 180 | btn.click( 181 | self.wrapper_respond, 182 | inputs=[msg, system], 183 | outputs=[msg, output1, output2, output3, code, output4], 184 | ) 185 | btnc.click( 186 | self.clean, outputs=[msg, output1, output2, output3, code, output4] 187 | ) 188 | msg.submit( 189 | self.wrapper_respond, 190 | inputs=[msg, system], 191 | outputs=[msg, output1, output2, output3, code, output4], 192 | ) # Press enter to submit 193 | 194 | gr.close_all() 195 | demo.queue().launch(share=False, height=800) 196 | 197 | 198 | # 使用方法 199 | if __name__ == "__main__": 200 | 201 | def respond_function(msg, system): 202 | RAG = """ 203 | 204 | 205 | """ 206 | return msg, RAG, "Embedding_recall_output", "Key_words_output", "Code_output" 207 | 208 | gradio_interface = GradioInterface(respond_function) 209 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/json_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | from repo_agent.log import logger 5 | 6 | 7 | class JsonFileProcessor: 8 | def __init__(self, file_path): 9 | self.file_path = file_path 10 | 11 | def read_json_file(self): 12 | try: 13 | with open(self.file_path, "r", encoding="utf-8") as file: 14 | data = json.load(file) 15 | return data 16 | except FileNotFoundError: 17 | logger.exception(f"File not found: {self.file_path}") 18 | sys.exit(1) 19 | 20 | def extract_data(self): 21 | # Load JSON data from a file 22 | json_data = self.read_json_file() 23 | md_contents = [] 24 | extracted_contents = [] 25 | # Iterate through each file in the JSON data 26 | for file, items in json_data.items(): 27 | # Check if the value is a list (new format) 28 | if isinstance(items, list): 29 | # Iterate through each item in the list 30 | for item in items: 31 | # Check if 'md_content' exists and is not empty 32 | if "md_content" in item and item["md_content"]: 33 | # Append the first element of 'md_content' to the result list 34 | md_contents.append(item["md_content"][0]) 35 | # Build a dictionary containing the required information 36 | item_dict = { 37 | "type": item.get("type", "UnknownType"), 38 | "name": item.get("name", "Unnamed"), 39 | "code_start_line": item.get("code_start_line", -1), 40 | "code_end_line": item.get("code_end_line", -1), 41 | "have_return": item.get("have_return", False), 42 | "code_content": item.get("code_content", "NoContent"), 43 | "name_column": item.get("name_column", 0), 44 | "item_status": item.get("item_status", "UnknownStatus"), 45 | # Adapt or remove fields based on new structure requirements 46 | } 47 | extracted_contents.append(item_dict) 48 | return md_contents, extracted_contents 49 | 50 | def recursive_search(self, data_item, search_text, code_results, md_results): 51 | if isinstance(data_item, dict): 52 | # Direct comparison is removed as there's no direct key==search_text in the new format 53 | for key, value in data_item.items(): 54 | # Recursively search through dictionary values and lists 55 | if isinstance(value, (dict, list)): 56 | self.recursive_search(value, search_text, code_results, md_results) 57 | elif isinstance(data_item, list): 58 | for item in data_item: 59 | # Now we check for the 'name' key in each item of the list 60 | if isinstance(item, dict) and item.get("name") == search_text: 61 | # If 'code_content' exists, append it to results 62 | if "code_content" in item: 63 | code_results.append(item["code_content"]) 64 | md_results.append(item["md_content"]) 65 | # Recursive call in case of nested lists or dicts 66 | self.recursive_search(item, search_text, code_results, md_results) 67 | 68 | def search_code_contents_by_name(self, file_path, search_text): 69 | # Attempt to retrieve code from the JSON file 70 | try: 71 | with open(file_path, "r", encoding="utf-8") as file: 72 | data = json.load(file) 73 | code_results = [] 74 | md_results = [] # List to store matching items' code_content and md_content 75 | self.recursive_search(data, search_text, code_results, md_results) 76 | # 确保无论结果如何都返回两个值 77 | if code_results or md_results: 78 | return code_results, md_results 79 | else: 80 | return ["No matching item found."], ["No matching item found."] 81 | except FileNotFoundError: 82 | return "File not found." 83 | except json.JSONDecodeError: 84 | return "Invalid JSON file." 85 | except Exception as e: 86 | return f"An error occurred: {e}" 87 | 88 | 89 | if __name__ == "__main__": 90 | processor = JsonFileProcessor("database.json") 91 | md_contents, extracted_contents = processor.extract_data() 92 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/main.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from repo_agent.chat_with_repo.gradio_interface import GradioInterface 4 | from repo_agent.chat_with_repo.rag import RepoAssistant 5 | from repo_agent.log import logger 6 | from repo_agent.settings import SettingsManager 7 | 8 | 9 | def main(): 10 | logger.info("Initializing the RepoAgent chat with doc module.") 11 | 12 | # Load settings 13 | setting = SettingsManager.get_setting() 14 | 15 | api_key = setting.chat_completion.openai_api_key.get_secret_value() 16 | api_base = str(setting.chat_completion.openai_base_url) 17 | db_path = ( 18 | setting.project.target_repo 19 | / setting.project.hierarchy_name 20 | / "project_hierarchy.json" 21 | ) 22 | 23 | # Initialize RepoAssistant 24 | assistant = RepoAssistant(api_key, api_base, db_path) 25 | 26 | # Extract data 27 | md_contents, meta_data = assistant.json_data.extract_data() 28 | 29 | # Create vector store and measure runtime 30 | logger.info("Starting vector store creation...") 31 | start_time = time.time() 32 | assistant.vector_store_manager.create_vector_store( 33 | md_contents, meta_data, api_key, api_base 34 | ) 35 | elapsed_time = time.time() - start_time 36 | logger.info(f"Vector store created successfully in {elapsed_time:.2f} seconds.") 37 | 38 | # Launch Gradio interface 39 | GradioInterface(assistant.respond) 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/prompt.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import ChatPromptTemplate, PromptTemplate 2 | from llama_index.core.llms import ChatMessage, MessageRole 3 | 4 | # Query Generation Prompt 5 | query_generation_prompt_str = ( 6 | "You are a helpful assistant that generates multiple search queries based on a " 7 | "single input query. Generate {num_queries} search queries, one on each line, " 8 | "related to the following input query:\n" 9 | "Query: {query}\n" 10 | "Queries:\n" 11 | ) 12 | query_generation_template = PromptTemplate(query_generation_prompt_str) 13 | 14 | # Relevance Ranking Prompt 15 | relevance_ranking_instruction = ( 16 | "You are an expert relevance ranker. Given a list of documents and a query, your job is to determine how relevant each document is for answering the query. " 17 | "Your output is JSON, which is a list of documents. Each document has two fields, content and relevance_score. relevance_score is from 0.0 to 100.0. " 18 | "Higher relevance means higher score." 19 | ) 20 | relevance_ranking_guideline = "Query: {query} Docs: {docs}" 21 | 22 | relevance_ranking_message_template = [ 23 | ChatMessage(content=relevance_ranking_instruction, role=MessageRole.SYSTEM), 24 | ChatMessage( 25 | content=relevance_ranking_guideline, 26 | role=MessageRole.USER, 27 | ), 28 | ] 29 | relevance_ranking_chat_template = ChatPromptTemplate( 30 | message_templates=relevance_ranking_message_template 31 | ) 32 | 33 | # RAG (Retrieve and Generate) Prompt 34 | rag_prompt_str = ( 35 | "You are a helpful assistant in repository Q&A. Users will ask questions about something contained in a repository. " 36 | "You will be shown the user's question, and the relevant information from the repository. Answer the user's question only with information given.\n\n" 37 | "Question: {query}.\n\n" 38 | "Information: {information}" 39 | ) 40 | rag_template = PromptTemplate(rag_prompt_str) 41 | 42 | # RAG_AR (Advanced RAG) Prompt 43 | rag_ar_prompt_str = ( 44 | "You are a helpful Repository-Level Software Q&A assistant. Your task is to answer users' questions based on the given information about a software repository, " 45 | "including related code and documents.\n\n" 46 | "Currently, you're in the {project_name} project. The user's question is:\n" 47 | "{query}\n\n" 48 | "Now, you are given related code and documents as follows:\n\n" 49 | "-------------------Code-------------------\n" 50 | "Some most likely related code snippets recalled by the retriever are:\n" 51 | "{related_code}\n\n" 52 | "-------------------Document-------------------\n" 53 | "Some most relevant documents recalled by the retriever are:\n" 54 | "{embedding_recall}\n\n" 55 | "Please note: \n" 56 | "1. All the provided recall results are related to the current project {project_name}. Please filter useful information according to the user's question and provide corresponding answers or solutions.\n" 57 | "2. Ensure that your responses are accurate and detailed. Present specific answers in a professional manner and tone.\n" 58 | "3. The user's question may be asked in any language. You must respond **in the same language** as the user's question, even if the input language is not English.\n" 59 | "4. If you find the user's question completely unrelated to the provided information or if you believe you cannot provide an accurate answer, kindly decline. Note: DO NOT fabricate any non-existent information.\n\n" 60 | "Now, focusing on the user's query, and incorporating the given information to offer a specific, detailed, and professional answer IN THE SAME LANGUAGE AS the user's question." 61 | ) 62 | 63 | 64 | rag_ar_template = PromptTemplate(rag_ar_prompt_str) 65 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/rag.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from llama_index.llms.openai import OpenAI 4 | 5 | from repo_agent.chat_with_repo.json_handler import JsonFileProcessor 6 | from repo_agent.chat_with_repo.prompt import ( 7 | query_generation_template, 8 | rag_ar_template, 9 | rag_template, 10 | relevance_ranking_chat_template, 11 | ) 12 | from repo_agent.chat_with_repo.text_analysis_tool import TextAnalysisTool 13 | from repo_agent.chat_with_repo.vector_store_manager import VectorStoreManager 14 | from repo_agent.log import logger 15 | 16 | 17 | class RepoAssistant: 18 | def __init__(self, api_key, api_base, db_path): 19 | self.db_path = db_path 20 | self.md_contents = [] 21 | 22 | self.weak_model = OpenAI( 23 | api_key=api_key, 24 | api_base=api_base, 25 | model="gpt-4o-mini", 26 | ) 27 | self.strong_model = OpenAI( 28 | api_key=api_key, 29 | api_base=api_base, 30 | model="gpt-4o", 31 | ) 32 | self.textanslys = TextAnalysisTool(self.weak_model, db_path) 33 | self.json_data = JsonFileProcessor(db_path) 34 | self.vector_store_manager = VectorStoreManager(top_k=5, llm=self.weak_model) 35 | 36 | def generate_queries(self, query_str: str, num_queries: int = 4): 37 | fmt_prompt = query_generation_template.format( 38 | num_queries=num_queries - 1, query=query_str 39 | ) 40 | response = self.weak_model.complete(fmt_prompt) 41 | queries = response.text.split("\n") 42 | return queries 43 | 44 | def rerank(self, query, docs): # 这里要防止返回值格式上出问题 45 | response = self.weak_model.chat( 46 | response_format={"type": "json_object"}, 47 | temperature=0, 48 | messages=relevance_ranking_chat_template.format_messages( 49 | query=query, docs=docs 50 | ), 51 | ) 52 | scores = json.loads(response.message.content)["documents"] # type: ignore 53 | logger.debug(f"scores: {scores}") 54 | sorted_data = sorted(scores, key=lambda x: x["relevance_score"], reverse=True) 55 | top_5_contents = [doc["content"] for doc in sorted_data[:5]] 56 | return top_5_contents 57 | 58 | def rag(self, query, retrieved_documents): 59 | rag_prompt = rag_template.format( 60 | query=query, information="\n\n".join(retrieved_documents) 61 | ) 62 | response = self.weak_model.complete(rag_prompt) 63 | return response.text 64 | 65 | def list_to_markdown(self, list_items): 66 | markdown_content = "" 67 | 68 | # 对于列表中的每个项目,添加一个带数字的列表项 69 | for index, item in enumerate(list_items, start=1): 70 | markdown_content += f"{index}. {item}\n" 71 | 72 | return markdown_content 73 | 74 | def rag_ar(self, query, related_code, embedding_recall, project_name): 75 | rag_ar_prompt = rag_ar_template.format_messages( 76 | query=query, 77 | related_code=related_code, 78 | embedding_recall=embedding_recall, 79 | project_name=project_name, 80 | ) 81 | response = self.strong_model.chat(rag_ar_prompt) 82 | return response.message.content 83 | 84 | def respond(self, message, instruction): 85 | """ 86 | Respond to a user query by processing input, querying the vector store, 87 | reranking results, and generating a final response. 88 | """ 89 | logger.debug("Starting response generation.") 90 | 91 | # Step 1: Format the chat prompt 92 | prompt = self.textanslys.format_chat_prompt(message, instruction) 93 | logger.debug(f"Formatted prompt: {prompt}") 94 | 95 | questions = self.textanslys.keyword(prompt) 96 | logger.debug(f"Generated keywords from prompt: {questions}") 97 | 98 | # Step 2: Generate additional queries 99 | prompt_queries = self.generate_queries(prompt, 3) 100 | logger.debug(f"Generated queries: {prompt_queries}") 101 | 102 | all_results = [] 103 | all_documents = [] 104 | 105 | # Step 3: Query the VectorStoreManager for each query 106 | for query in prompt_queries: 107 | logger.debug(f"Querying vector store with: {query}") 108 | query_results = self.vector_store_manager.query_store(query) 109 | logger.debug(f"Results for query '{query}': {query_results}") 110 | all_results.extend(query_results) 111 | 112 | # Step 4: Deduplicate results by content 113 | unique_results = {result["text"]: result for result in all_results}.values() 114 | unique_documents = [result["text"] for result in unique_results] 115 | logger.debug(f"Unique documents: {unique_documents}") 116 | 117 | unique_code = [ 118 | result.get("metadata", {}).get("code_content") for result in unique_results 119 | ] 120 | logger.debug(f"Unique code content: {unique_code}") 121 | 122 | # Step 5: Rerank documents based on relevance 123 | retrieved_documents = self.rerank(message, unique_documents) 124 | logger.debug(f"Reranked documents: {retrieved_documents}") 125 | 126 | # Step 6: Generate a response using RAG (Retrieve and Generate) 127 | response = self.rag(prompt, retrieved_documents) 128 | chunkrecall = self.list_to_markdown(retrieved_documents) 129 | logger.debug(f"RAG-generated response: {response}") 130 | logger.debug(f"Markdown chunk recall: {chunkrecall}") 131 | 132 | bot_message = str(response) 133 | logger.debug(f"Initial bot_message: {bot_message}") 134 | 135 | # Step 7: Perform NER and queryblock processing 136 | keyword = str(self.textanslys.nerquery(bot_message)) 137 | keywords = str(self.textanslys.nerquery(str(prompt) + str(questions))) 138 | logger.debug(f"Extracted keywords: {keyword}, {keywords}") 139 | 140 | codez, mdz = self.textanslys.queryblock(keyword) 141 | codey, mdy = self.textanslys.queryblock(keywords) 142 | 143 | # Ensure all returned items are lists 144 | codez = codez if isinstance(codez, list) else [codez] 145 | mdz = mdz if isinstance(mdz, list) else [mdz] 146 | codey = codey if isinstance(codey, list) else [codey] 147 | mdy = mdy if isinstance(mdy, list) else [mdy] 148 | 149 | # Step 8: Merge and deduplicate results 150 | codex = list(dict.fromkeys(codez + codey)) 151 | md = list(dict.fromkeys(mdz + mdy)) 152 | unique_mdx = list(set([item for sublist in md for item in sublist])) 153 | uni_codex = list(dict.fromkeys(codex)) 154 | uni_md = list(dict.fromkeys(unique_mdx)) 155 | 156 | # Convert to Markdown format 157 | codex_md = self.textanslys.list_to_markdown(uni_codex) 158 | retrieved_documents = list(dict.fromkeys(retrieved_documents + uni_md)) 159 | 160 | # Final rerank and response generation 161 | retrieved_documents = self.rerank(message, retrieved_documents[:6]) 162 | logger.debug(f"Final retrieved documents after rerank: {retrieved_documents}") 163 | 164 | uni_code = self.rerank( 165 | message, list(dict.fromkeys(uni_codex + unique_code))[:6] 166 | ) 167 | logger.debug(f"Final unique code after rerank: {uni_code}") 168 | 169 | unique_code_md = self.textanslys.list_to_markdown(unique_code) 170 | logger.debug(f"Unique code in Markdown: {unique_code_md}") 171 | 172 | # Generate final response using RAG_AR 173 | bot_message = self.rag_ar(prompt, uni_code, retrieved_documents, "test") 174 | logger.debug(f"Final bot_message after RAG_AR: {bot_message}") 175 | 176 | return message, bot_message, chunkrecall, questions, unique_code_md, codex_md 177 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/text_analysis_tool.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.llms.function_calling import FunctionCallingLLM 2 | from llama_index.llms.openai import OpenAI 3 | 4 | from repo_agent.chat_with_repo.json_handler import JsonFileProcessor 5 | 6 | 7 | class TextAnalysisTool: 8 | def __init__(self, llm: FunctionCallingLLM, db_path): 9 | self.jsonsearch = JsonFileProcessor(db_path) 10 | self.llm = llm 11 | self.db_path = db_path 12 | 13 | def keyword(self, query): 14 | prompt = f"Please provide a list of Code keywords according to the following query, please output no more than 3 keywords, Input: {query}, Output:" 15 | response = self.llm.complete(prompt) 16 | return response 17 | 18 | def tree(self, query): 19 | prompt = f"Please analyze the following text and generate a tree structure based on its hierarchy:\n\n{query}" 20 | response = self.llm.complete(prompt) 21 | return response 22 | 23 | def format_chat_prompt(self, message, instruction): 24 | prompt = f"System:{instruction}\nUser: {message}\nAssistant:" 25 | return prompt 26 | 27 | def queryblock(self, message): 28 | search_result, md = self.jsonsearch.search_code_contents_by_name( 29 | self.db_path, message 30 | ) 31 | return search_result, md 32 | 33 | def list_to_markdown(self, search_result): 34 | markdown_str = "" 35 | # 遍历列表,将每个元素转换为Markdown格式的项 36 | for index, content in enumerate(search_result, start=1): 37 | # 添加到Markdown字符串中,每个项后跟一个换行符 38 | markdown_str += f"{index}. {content}\n\n" 39 | 40 | return markdown_str 41 | 42 | def nerquery(self, message): 43 | instrcution = """ 44 | Extract the most relevant class or function base on the following instrcution: 45 | 46 | The output must strictly be a pure function name or class name, without any additional characters. 47 | For example: 48 | Pure function names: calculateSum, processData 49 | Pure class names: MyClass, DataProcessor 50 | The output function name or class name should be only one. 51 | """ 52 | query = f"{instrcution}\n\nThe input is shown as bellow:\n{message}\n\nAnd now directly give your Output:" 53 | response = self.llm.complete(query) 54 | # logger.debug(f"Input: {message}, Output: {response}") 55 | return response 56 | 57 | 58 | if __name__ == "__main__": 59 | api_base = "https://api.openai.com/v1" 60 | api_key = "your_api_key" 61 | log_file = "your_logfile_path" 62 | llm = OpenAI(api_key=api_key, api_base=api_base) 63 | db_path = "your_database_path" 64 | test = TextAnalysisTool(llm, db_path) 65 | -------------------------------------------------------------------------------- /repo_agent/chat_with_repo/vector_store_manager.py: -------------------------------------------------------------------------------- 1 | import chromadb 2 | from llama_index.core import ( 3 | Document, 4 | StorageContext, 5 | VectorStoreIndex, 6 | get_response_synthesizer, 7 | ) 8 | from llama_index.core.node_parser import ( 9 | SemanticSplitterNodeParser, 10 | SentenceSplitter, 11 | ) 12 | from llama_index.core.query_engine import RetrieverQueryEngine 13 | from llama_index.core.retrievers import VectorIndexRetriever 14 | from llama_index.embeddings.openai import OpenAIEmbedding 15 | from llama_index.vector_stores.chroma import ChromaVectorStore 16 | 17 | from repo_agent.log import logger 18 | 19 | 20 | class VectorStoreManager: 21 | def __init__(self, top_k, llm): 22 | """ 23 | Initialize the VectorStoreManager. 24 | """ 25 | self.query_engine = None # Initialize as None 26 | self.chroma_db_path = "./chroma_db" # Path to Chroma database 27 | self.collection_name = "test" # Default collection name 28 | self.similarity_top_k = top_k 29 | self.llm = llm 30 | 31 | def create_vector_store(self, md_contents, meta_data, api_key, api_base): 32 | """ 33 | Add markdown content and metadata to the index. 34 | """ 35 | if not md_contents or not meta_data: 36 | logger.warning("No content or metadata provided. Skipping.") 37 | return 38 | 39 | # Ensure lengths match 40 | min_length = min(len(md_contents), len(meta_data)) 41 | md_contents = md_contents[:min_length] 42 | meta_data = meta_data[:min_length] 43 | 44 | logger.debug(f"Number of markdown contents: {len(md_contents)}") 45 | logger.debug(f"Number of metadata entries: {len(meta_data)}") 46 | 47 | # Initialize Chroma client and collection 48 | db = chromadb.PersistentClient(path=self.chroma_db_path) 49 | chroma_collection = db.get_or_create_collection(self.collection_name) 50 | 51 | # Define embedding model 52 | embed_model = OpenAIEmbedding( 53 | model_name="text-embedding-3-large", 54 | api_key=api_key, 55 | api_base=api_base, 56 | ) 57 | 58 | # Initialize semantic chunker (SimpleNodeParser) 59 | logger.debug("Initializing semantic chunker (SimpleNodeParser).") 60 | splitter = SemanticSplitterNodeParser( 61 | buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model 62 | ) 63 | base_splitter = SentenceSplitter(chunk_size=1024) 64 | 65 | documents = [ 66 | Document(text=content, extra_info=meta) 67 | for content, meta in zip(md_contents, meta_data) 68 | ] 69 | 70 | all_nodes = [] 71 | for i, doc in enumerate(documents): 72 | logger.debug( 73 | f"Processing document {i+1}: Content length={len(doc.get_text())}" 74 | ) 75 | 76 | try: 77 | # Try semantic splitting first 78 | nodes = splitter.get_nodes_from_documents([doc]) 79 | logger.debug(f"Document {i+1} split into {len(nodes)} semantic chunks.") 80 | 81 | except Exception as e: 82 | # Fallback to baseline sentence splitting 83 | logger.warning( 84 | f"Semantic splitting failed for document {i+1}, falling back to SentenceSplitter. Error: {e}" 85 | ) 86 | nodes = base_splitter.get_nodes_from_documents([doc]) 87 | logger.debug(f"Document {i+1} split into {len(nodes)} sentence chunks.") 88 | 89 | all_nodes.extend(nodes) 90 | 91 | if not all_nodes: 92 | logger.warning("No valid nodes to add to the index after chunking.") 93 | return 94 | 95 | logger.debug(f"Number of valid chunks: {len(all_nodes)}") 96 | 97 | # Set up ChromaVectorStore and load data 98 | vector_store = ChromaVectorStore(chroma_collection=chroma_collection) 99 | storage_context = StorageContext.from_defaults(vector_store=vector_store) 100 | index = VectorStoreIndex( 101 | all_nodes, storage_context=storage_context, embed_model=embed_model 102 | ) 103 | retriever = VectorIndexRetriever( 104 | index=index, similarity_top_k=self.similarity_top_k, embed_model=embed_model 105 | ) 106 | 107 | response_synthesizer = get_response_synthesizer(llm=self.llm) 108 | 109 | # Set the query engine 110 | self.query_engine = RetrieverQueryEngine( 111 | retriever=retriever, 112 | response_synthesizer=response_synthesizer, 113 | ) 114 | 115 | logger.info(f"Vector store created and loaded with {len(documents)} documents.") 116 | 117 | def query_store(self, query): 118 | """ 119 | Query the vector store for relevant documents. 120 | """ 121 | if not self.query_engine: 122 | logger.error( 123 | "Query engine is not initialized. Please create a vector store first." 124 | ) 125 | return [] 126 | 127 | # Query the vector store 128 | logger.debug(f"Querying vector store with: {query}") 129 | results = self.query_engine.query(query) 130 | 131 | # Extract relevant information from results 132 | return [{"text": results.response, "metadata": results.metadata}] 133 | -------------------------------------------------------------------------------- /repo_agent/log.py: -------------------------------------------------------------------------------- 1 | # repo_agent/log.py 2 | import inspect 3 | import logging 4 | import sys 5 | 6 | from loguru import logger 7 | 8 | logger = logger.opt(colors=True) 9 | """ 10 | RepoAgent 日志记录器对象。 11 | 12 | 默认信息: 13 | - 格式: `[%(asctime)s %(name)s] %(levelname)s: %(message)s` 14 | - 等级: `INFO` ,根据 `CONFIG["log_level"]` 配置改变 15 | - 输出: 输出至 stdout 16 | 17 | 用法示例: 18 | ```python 19 | from repo_agent.log import logger 20 | 21 | # 基本消息记录 22 | logger.info("It works!") # 使用颜色 23 | 24 | # 记录异常信息 25 | try: 26 | 1 / 0 27 | except ZeroDivisionError: 28 | # 使用 `logger.exception` 可以在记录异常消息时自动附加异常的堆栈跟踪信息。 29 | logger.exception("ZeroDivisionError occurred") 30 | 31 | # 记录调试信息 32 | logger.debug(f"Debugging info: {some_debug_variable}") 33 | 34 | # 记录警告信息 35 | logger.warning("This is a warning message") 36 | 37 | # 记录错误信息 38 | logger.error("An error occurred") 39 | ``` 40 | 41 | """ 42 | 43 | 44 | class InterceptHandler(logging.Handler): 45 | def emit(self, record: logging.LogRecord) -> None: 46 | # Get corresponding Loguru level if it exists. 47 | level: str | int 48 | try: 49 | level = logger.level(record.levelname).name 50 | except ValueError: 51 | level = record.levelno 52 | 53 | # Find caller from where originated the logged message. 54 | frame, depth = inspect.currentframe(), 0 55 | while frame and (depth == 0 or frame.f_code.co_filename == logging.__file__): 56 | frame = frame.f_back 57 | depth += 1 58 | 59 | logger.opt(depth=depth, exception=record.exc_info).log( 60 | level, record.getMessage() 61 | ) 62 | 63 | 64 | def set_logger_level_from_config(log_level): 65 | """ 66 | Configures the loguru logger with specified log level and integrates it with the standard logging module. 67 | 68 | Args: 69 | log_level (str): The log level to set for loguru (e.g., "DEBUG", "INFO", "WARNING"). 70 | 71 | This function: 72 | - Removes any existing loguru handlers to ensure a clean slate. 73 | - Adds a new handler to loguru, directing output to stderr with the specified level. 74 | - `enqueue=True` ensures thread-safe logging by using a queue, helpful in multi-threaded contexts. 75 | - `backtrace=False` minimizes detailed traceback to prevent overly verbose output. 76 | - `diagnose=False` suppresses additional loguru diagnostic information for more concise logs. 77 | - Redirects the standard logging output to loguru using the InterceptHandler, allowing loguru to handle 78 | all logs consistently across the application. 79 | """ 80 | logger.remove() 81 | logger.add( 82 | sys.stderr, level=log_level, enqueue=True, backtrace=False, diagnose=False 83 | ) 84 | 85 | # Intercept standard logging 86 | logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True) 87 | 88 | logger.success(f"Log level set to {log_level}!") 89 | -------------------------------------------------------------------------------- /repo_agent/main.py: -------------------------------------------------------------------------------- 1 | from importlib import metadata 2 | 3 | import click 4 | from pydantic import ValidationError 5 | 6 | from repo_agent.doc_meta_info import DocItem, MetaInfo 7 | from repo_agent.log import logger, set_logger_level_from_config 8 | from repo_agent.runner import Runner, delete_fake_files 9 | from repo_agent.settings import SettingsManager, LogLevel 10 | from repo_agent.utils.meta_info_utils import delete_fake_files, make_fake_files 11 | 12 | try: 13 | version_number = metadata.version("repoagent") 14 | except metadata.PackageNotFoundError: 15 | version_number = "0.0.0" 16 | 17 | 18 | @click.group() 19 | @click.version_option(version_number) 20 | def cli(): 21 | """An LLM-Powered Framework for Repository-level Code Documentation Generation.""" 22 | pass 23 | 24 | 25 | def handle_setting_error(e: ValidationError): 26 | """Handle configuration errors for settings.""" 27 | # 输出更详细的字段缺失信息,使用颜色区分 28 | for error in e.errors(): 29 | field = error["loc"][-1] 30 | if error["type"] == "missing": 31 | message = click.style( 32 | f"Missing required field `{field}`. Please set the `{field}` environment variable.", 33 | fg="yellow", 34 | ) 35 | else: 36 | message = click.style(error["msg"], fg="yellow") 37 | click.echo(message, err=True, color=True) 38 | 39 | # 使用 ClickException 优雅地退出程序 40 | raise click.ClickException( 41 | click.style( 42 | "Program terminated due to configuration errors.", fg="red", bold=True 43 | ) 44 | ) 45 | 46 | 47 | @cli.command() 48 | @click.option( 49 | "--model", 50 | "-m", 51 | default="gpt-4o-mini", 52 | show_default=True, 53 | help="Specifies the model to use for completion.", 54 | type=str, 55 | ) 56 | @click.option( 57 | "--temperature", 58 | "-t", 59 | default=0.2, 60 | show_default=True, 61 | help="Sets the generation temperature for the model. Lower values make the model more deterministic.", 62 | type=float, 63 | ) 64 | @click.option( 65 | "--request-timeout", 66 | "-r", 67 | default=60, 68 | show_default=True, 69 | help="Defines the timeout in seconds for the API request.", 70 | type=int, 71 | ) 72 | @click.option( 73 | "--base-url", 74 | "-b", 75 | default="https://api.openai.com/v1", 76 | show_default=True, 77 | help="The base URL for the API calls.", 78 | type=str, 79 | ) 80 | @click.option( 81 | "--target-repo-path", 82 | "-tp", 83 | default="", 84 | show_default=True, 85 | help="The file system path to the target repository. This path is used as the root for documentation generation.", 86 | type=click.Path(file_okay=False), 87 | ) 88 | @click.option( 89 | "--hierarchy-path", 90 | "-hp", 91 | default=".project_doc_record", 92 | show_default=True, 93 | help="The name or path for the project hierarchy file, used to organize documentation structure.", 94 | type=str, 95 | ) 96 | @click.option( 97 | "--markdown-docs-path", 98 | "-mdp", 99 | default="markdown_docs", 100 | show_default=True, 101 | help="The folder path where Markdown documentation will be stored or generated.", 102 | type=str, 103 | ) 104 | @click.option( 105 | "--ignore-list", 106 | "-i", 107 | default="", 108 | help="A comma-separated list of files or directories to ignore during documentation generation.", 109 | ) 110 | @click.option( 111 | "--language", 112 | "-l", 113 | default="English", 114 | show_default=True, 115 | help="The ISO 639 code or language name for the documentation. ", 116 | type=str, 117 | ) 118 | @click.option( 119 | "--max-thread-count", 120 | "-mtc", 121 | default=4, 122 | show_default=True, 123 | ) 124 | @click.option( 125 | "--log-level", 126 | "-ll", 127 | default="INFO", 128 | show_default=True, 129 | help="Sets the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL) for the application. Default is INFO.", 130 | type=click.Choice([level.value for level in LogLevel], case_sensitive=False), 131 | ) 132 | @click.option( 133 | "--print-hierarchy", 134 | "-pr", 135 | is_flag=True, 136 | show_default=True, 137 | default=False, 138 | help="If set, prints the hierarchy of the target repository when finished running the main task.", 139 | ) 140 | def run( 141 | model, 142 | temperature, 143 | request_timeout, 144 | base_url, 145 | target_repo_path, 146 | hierarchy_path, 147 | markdown_docs_path, 148 | ignore_list, 149 | language, 150 | max_thread_count, 151 | log_level, 152 | print_hierarchy, 153 | ): 154 | """Run the program with the specified parameters.""" 155 | try: 156 | # Fetch and validate the settings using the SettingsManager 157 | setting = SettingsManager.initialize_with_params( 158 | target_repo=target_repo_path, 159 | hierarchy_name=hierarchy_path, 160 | markdown_docs_name=markdown_docs_path, 161 | ignore_list=[item.strip() for item in ignore_list.split(",") if item], 162 | language=language, 163 | log_level=log_level, 164 | model=model, 165 | temperature=temperature, 166 | request_timeout=request_timeout, 167 | openai_base_url=base_url, 168 | max_thread_count=max_thread_count, 169 | ) 170 | set_logger_level_from_config(log_level=log_level) 171 | except ValidationError as e: 172 | handle_setting_error(e) 173 | return 174 | 175 | # 如果设置成功,则运行任务 176 | runner = Runner() 177 | runner.run() 178 | logger.success("Documentation task completed.") 179 | if print_hierarchy: 180 | runner.meta_info.target_repo_hierarchical_tree.print_recursive() 181 | logger.success("Hierarchy printed.") 182 | 183 | 184 | @cli.command() 185 | def clean(): 186 | """Clean the fake files generated by the documentation process.""" 187 | delete_fake_files() 188 | logger.success("Fake files have been cleaned up.") 189 | 190 | 191 | @cli.command() 192 | def diff(): 193 | """Check for changes and print which documents will be updated or generated.""" 194 | try: 195 | # Fetch and validate the settings using the SettingsManager 196 | setting = SettingsManager.get_setting() 197 | except ValidationError as e: 198 | handle_setting_error(e) 199 | return 200 | 201 | runner = Runner() 202 | if runner.meta_info.in_generation_process: # 如果不是在生成过程中,就开始检测变更 203 | click.echo("This command only supports pre-check") 204 | raise click.Abort() 205 | 206 | file_path_reflections, jump_files = make_fake_files() 207 | new_meta_info = MetaInfo.init_meta_info(file_path_reflections, jump_files) 208 | new_meta_info.load_doc_from_older_meta(runner.meta_info) 209 | delete_fake_files() 210 | 211 | DocItem.check_has_task( 212 | new_meta_info.target_repo_hierarchical_tree, 213 | ignore_list=setting.project.ignore_list, 214 | ) 215 | if new_meta_info.target_repo_hierarchical_tree.has_task: 216 | click.echo("The following docs will be generated/updated:") 217 | new_meta_info.target_repo_hierarchical_tree.print_recursive( 218 | diff_status=True, ignore_list=setting.project.ignore_list 219 | ) 220 | else: 221 | click.echo("No docs will be generated/updated, check your source-code update") 222 | 223 | 224 | @cli.command() 225 | def chat_with_repo(): 226 | """ 227 | Start an interactive chat session with the repository. 228 | """ 229 | try: 230 | # Fetch and validate the settings using the SettingsManager 231 | setting = SettingsManager.get_setting() 232 | except ValidationError as e: 233 | # Handle configuration errors if the settings are invalid 234 | handle_setting_error(e) 235 | return 236 | 237 | from repo_agent.chat_with_repo import main 238 | 239 | main() 240 | 241 | 242 | if __name__ == "__main__": 243 | cli() 244 | -------------------------------------------------------------------------------- /repo_agent/multi_task_dispatch.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | import threading 5 | import time 6 | from typing import Any, Callable, Dict, List 7 | 8 | from colorama import Fore, Style 9 | 10 | 11 | class Task: 12 | def __init__(self, task_id: int, dependencies: List[Task], extra_info: Any = None): 13 | self.task_id = task_id 14 | self.extra_info = extra_info 15 | self.dependencies = dependencies 16 | self.status = 0 # 任务状态:0未开始,1正在进行,2已经完成,3出错了 17 | 18 | 19 | class TaskManager: 20 | def __init__(self): 21 | """ 22 | Initialize a MultiTaskDispatch object. 23 | 24 | This method initializes the MultiTaskDispatch object by setting up the necessary attributes. 25 | 26 | Attributes: 27 | - task_dict (Dict[int, Task]): A dictionary that maps task IDs to Task objects. 28 | - task_lock (threading.Lock): A lock used for thread synchronization when accessing the task_dict. 29 | - now_id (int): The current task ID. 30 | - query_id (int): The current query ID. 31 | - sync_func (None): A placeholder for a synchronization function. 32 | 33 | """ 34 | self.task_dict: Dict[int, Task] = {} 35 | self.task_lock = threading.Lock() 36 | self.now_id = 0 37 | self.query_id = 0 38 | 39 | @property 40 | def all_success(self) -> bool: 41 | return len(self.task_dict) == 0 42 | 43 | def add_task(self, dependency_task_id: List[int], extra=None) -> int: 44 | """ 45 | Adds a new task to the task dictionary. 46 | 47 | Args: 48 | dependency_task_id (List[int]): List of task IDs that the new task depends on. 49 | extra (Any, optional): Extra information associated with the task. Defaults to None. 50 | 51 | Returns: 52 | int: The ID of the newly added task. 53 | """ 54 | with self.task_lock: 55 | depend_tasks = [self.task_dict[task_id] for task_id in dependency_task_id] 56 | self.task_dict[self.now_id] = Task( 57 | task_id=self.now_id, dependencies=depend_tasks, extra_info=extra 58 | ) 59 | self.now_id += 1 60 | return self.now_id - 1 61 | 62 | def get_next_task(self, process_id: int): 63 | """ 64 | Get the next task for a given process ID. 65 | 66 | Args: 67 | process_id (int): The ID of the process. 68 | 69 | Returns: 70 | tuple: A tuple containing the next task object and its ID. 71 | If there are no available tasks, returns (None, -1). 72 | """ 73 | with self.task_lock: 74 | self.query_id += 1 75 | for task_id in self.task_dict.keys(): 76 | ready = ( 77 | len(self.task_dict[task_id].dependencies) == 0 78 | ) and self.task_dict[task_id].status == 0 79 | if ready: 80 | self.task_dict[task_id].status = 1 81 | print( 82 | f"{Fore.RED}[process {process_id}]{Style.RESET_ALL}: get task({task_id}), remain({len(self.task_dict)})" 83 | ) 84 | return self.task_dict[task_id], task_id 85 | return None, -1 86 | 87 | def mark_completed(self, task_id: int): 88 | """ 89 | Marks a task as completed and removes it from the task dictionary. 90 | 91 | Args: 92 | task_id (int): The ID of the task to mark as completed. 93 | 94 | """ 95 | with self.task_lock: 96 | target_task = self.task_dict[task_id] 97 | for task in self.task_dict.values(): 98 | if target_task in task.dependencies: 99 | task.dependencies.remove(target_task) 100 | self.task_dict.pop(task_id) # 从任务字典中移除 101 | 102 | 103 | def worker(task_manager, process_id: int, handler: Callable): 104 | """ 105 | Worker function that performs tasks assigned by the task manager. 106 | 107 | Args: 108 | task_manager: The task manager object that assigns tasks to workers. 109 | process_id (int): The ID of the current worker process. 110 | handler (Callable): The function that handles the tasks. 111 | 112 | Returns: 113 | None 114 | """ 115 | while True: 116 | if task_manager.all_success: 117 | return 118 | task, task_id = task_manager.get_next_task(process_id) 119 | if task is None: 120 | time.sleep(0.5) 121 | continue 122 | # print(f"will perform task: {task_id}") 123 | handler(task.extra_info) 124 | task_manager.mark_completed(task.task_id) 125 | # print(f"task complete: {task_id}") 126 | 127 | 128 | if __name__ == "__main__": 129 | task_manager = TaskManager() 130 | 131 | def some_function(): # 随机睡一会 132 | time.sleep(random.random() * 3) 133 | 134 | # 添加任务,例如: 135 | i1 = task_manager.add_task(some_function, []) # type: ignore 136 | i2 = task_manager.add_task(some_function, []) # type: ignore 137 | i3 = task_manager.add_task(some_function, [i1]) # type: ignore 138 | i4 = task_manager.add_task(some_function, [i2, i3]) # type: ignore 139 | i5 = task_manager.add_task(some_function, [i2, i3]) # type: ignore 140 | i6 = task_manager.add_task(some_function, [i1]) # type: ignore 141 | 142 | threads = [threading.Thread(target=worker, args=(task_manager,)) for _ in range(4)] 143 | for thread in threads: 144 | thread.start() 145 | for thread in threads: 146 | thread.join() 147 | -------------------------------------------------------------------------------- /repo_agent/project_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import jedi 4 | 5 | 6 | class ProjectManager: 7 | def __init__(self, repo_path, project_hierarchy): 8 | self.repo_path = repo_path 9 | self.project = jedi.Project(self.repo_path) 10 | self.project_hierarchy = os.path.join( 11 | self.repo_path, project_hierarchy, "project_hierarchy.json" 12 | ) 13 | 14 | def get_project_structure(self): 15 | """ 16 | Returns the structure of the project by recursively walking through the directory tree. 17 | 18 | Returns: 19 | str: The project structure as a string. 20 | """ 21 | 22 | def walk_dir(root, prefix=""): 23 | structure.append(prefix + os.path.basename(root)) 24 | new_prefix = prefix + " " 25 | for name in sorted(os.listdir(root)): 26 | if name.startswith("."): # 忽略隐藏文件和目录 27 | continue 28 | path = os.path.join(root, name) 29 | if os.path.isdir(path): 30 | walk_dir(path, new_prefix) 31 | elif os.path.isfile(path) and name.endswith(".py"): 32 | structure.append(new_prefix + name) 33 | 34 | structure = [] 35 | walk_dir(self.repo_path) 36 | return "\n".join(structure) 37 | 38 | def build_path_tree(self, who_reference_me, reference_who, doc_item_path): 39 | from collections import defaultdict 40 | 41 | def tree(): 42 | return defaultdict(tree) 43 | 44 | path_tree = tree() 45 | 46 | # 构建 who_reference_me 和 reference_who 的树 47 | for path_list in [who_reference_me, reference_who]: 48 | for path in path_list: 49 | parts = path.split(os.sep) 50 | node = path_tree 51 | for part in parts: 52 | node = node[part] 53 | 54 | # 处理 doc_item_path 55 | parts = doc_item_path.split(os.sep) 56 | parts[-1] = "✳️" + parts[-1] # 在最后一个对象前面加上星号 57 | node = path_tree 58 | for part in parts: 59 | node = node[part] 60 | 61 | def tree_to_string(tree, indent=0): 62 | s = "" 63 | for key, value in sorted(tree.items()): 64 | s += " " * indent + key + "\n" 65 | if isinstance(value, dict): 66 | s += tree_to_string(value, indent + 1) 67 | return s 68 | 69 | return tree_to_string(path_tree) 70 | 71 | 72 | if __name__ == "__main__": 73 | project_manager = ProjectManager(repo_path="", project_hierarchy="") 74 | print(project_manager.get_project_structure()) 75 | -------------------------------------------------------------------------------- /repo_agent/prompt.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import ChatPromptTemplate 2 | from llama_index.core.llms import ChatMessage, MessageRole 3 | 4 | doc_generation_instruction = ( 5 | "You are an AI documentation assistant, and your task is to generate documentation based on the given code of an object. " 6 | "The purpose of the documentation is to help developers and beginners understand the function and specific usage of the code.\n\n" 7 | "Currently, you are in a project{project_structure_prefix}\n" 8 | "{project_structure}\n\n" 9 | "The path of the document you need to generate in this project is {file_path}.\n" 10 | 'Now you need to generate a document for a {code_type_tell}, whose name is "{code_name}".\n\n' 11 | "The content of the code is as follows:\n" 12 | "{code_content}\n\n" 13 | "{reference_letter}\n" 14 | "{referencer_content}\n\n" 15 | "Please generate a detailed explanation document for this object based on the code of the target object itself {combine_ref_situation}.\n\n" 16 | "Please write out the function of this {code_type_tell} in bold plain text, followed by a detailed analysis in plain text " 17 | "(including all details), in language {language} to serve as the documentation for this part of the code.\n\n" 18 | "The standard format is as follows:\n\n" 19 | "**{code_name}**: The function of {code_name} is XXX. (Only code name and one sentence function description are required)\n" 20 | "**{parameters_or_attribute}**: The {parameters_or_attribute} of this {code_type_tell}.\n" 21 | "· parameter1: XXX\n" 22 | "· parameter2: XXX\n" 23 | "· ...\n" 24 | "**Code Description**: The description of this {code_type_tell}.\n" 25 | "(Detailed and CERTAIN code analysis and description...{has_relationship})\n" 26 | "**Note**: Points to note about the use of the code\n" 27 | "{have_return_tell}\n\n" 28 | "Please note:\n" 29 | "- Any part of the content you generate SHOULD NOT CONTAIN Markdown hierarchical heading and divider syntax.\n" 30 | "- Write mainly in the desired language. If necessary, you can write with some English words in the analysis and description " 31 | "to enhance the document's readability because you do not need to translate the function name or variable name into the target language.\n" 32 | ) 33 | 34 | documentation_guideline = ( 35 | "Keep in mind that your audience is document readers, so use a deterministic tone to generate precise content and don't let them know " 36 | "you're provided with code snippet and documents. AVOID ANY SPECULATION and inaccurate descriptions! Now, provide the documentation " 37 | "for the target object in {language} in a professional way." 38 | ) 39 | 40 | 41 | message_templates = [ 42 | ChatMessage(content=doc_generation_instruction, role=MessageRole.SYSTEM), 43 | ChatMessage( 44 | content=documentation_guideline, 45 | role=MessageRole.USER, 46 | ), 47 | ] 48 | 49 | chat_template = ChatPromptTemplate(message_templates=message_templates) 50 | -------------------------------------------------------------------------------- /repo_agent/settings.py: -------------------------------------------------------------------------------- 1 | from enum import StrEnum 2 | from typing import Optional 3 | 4 | from iso639 import Language, LanguageNotFoundError 5 | from pydantic import ( 6 | DirectoryPath, 7 | Field, 8 | HttpUrl, 9 | PositiveFloat, 10 | PositiveInt, 11 | SecretStr, 12 | field_validator, 13 | ) 14 | from pydantic_settings import BaseSettings 15 | from pathlib import Path 16 | 17 | 18 | class LogLevel(StrEnum): 19 | DEBUG = "DEBUG" 20 | INFO = "INFO" 21 | WARNING = "WARNING" 22 | ERROR = "ERROR" 23 | CRITICAL = "CRITICAL" 24 | 25 | 26 | class ProjectSettings(BaseSettings): 27 | target_repo: DirectoryPath = "" # type: ignore 28 | hierarchy_name: str = ".project_doc_record" 29 | markdown_docs_name: str = "markdown_docs" 30 | ignore_list: list[str] = [] 31 | language: str = "English" 32 | max_thread_count: PositiveInt = 4 33 | log_level: LogLevel = LogLevel.INFO 34 | 35 | @field_validator("language") 36 | @classmethod 37 | def validate_language_code(cls, v: str) -> str: 38 | try: 39 | language_name = Language.match(v).name 40 | return language_name # Returning the resolved language name 41 | except LanguageNotFoundError: 42 | raise ValueError( 43 | "Invalid language input. Please enter a valid ISO 639 code or language name." 44 | ) 45 | 46 | @field_validator("log_level", mode="before") 47 | @classmethod 48 | def set_log_level(cls, v: str) -> LogLevel: 49 | if isinstance(v, str): 50 | v = v.upper() # Convert input to uppercase 51 | if ( 52 | v in LogLevel._value2member_map_ 53 | ): # Check if the converted value is in enum members 54 | return LogLevel(v) 55 | raise ValueError(f"Invalid log level: {v}") 56 | 57 | 58 | class ChatCompletionSettings(BaseSettings): 59 | model: str = "gpt-4o-mini" # NOTE: No model restrictions for user flexibility, but it's recommended to use models with a larger context window. 60 | temperature: PositiveFloat = 0.2 61 | request_timeout: PositiveInt = 60 62 | openai_base_url: str = "https://api.openai.com/v1" 63 | openai_api_key: SecretStr = Field(..., exclude=True) 64 | 65 | @field_validator("openai_base_url", mode="before") 66 | @classmethod 67 | def convert_base_url_to_str(cls, openai_base_url: HttpUrl) -> str: 68 | return str(openai_base_url) 69 | 70 | 71 | class Setting(BaseSettings): 72 | project: ProjectSettings = {} # type: ignore 73 | chat_completion: ChatCompletionSettings = {} # type: ignore 74 | 75 | 76 | class SettingsManager: 77 | _setting_instance: Optional[Setting] = ( 78 | None # Private class attribute, initially None 79 | ) 80 | 81 | @classmethod 82 | def get_setting(cls): 83 | if cls._setting_instance is None: 84 | cls._setting_instance = Setting() 85 | return cls._setting_instance 86 | 87 | @classmethod 88 | def initialize_with_params( 89 | cls, 90 | target_repo: Path, 91 | markdown_docs_name: str, 92 | hierarchy_name: str, 93 | ignore_list: list[str], 94 | language: str, 95 | max_thread_count: int, 96 | log_level: str, 97 | model: str, 98 | temperature: float, 99 | request_timeout: int, 100 | openai_base_url: str, 101 | ): 102 | project_settings = ProjectSettings( 103 | target_repo=target_repo, 104 | hierarchy_name=hierarchy_name, 105 | markdown_docs_name=markdown_docs_name, 106 | ignore_list=ignore_list, 107 | language=language, 108 | max_thread_count=max_thread_count, 109 | log_level=LogLevel(log_level), 110 | ) 111 | 112 | chat_completion_settings = ChatCompletionSettings( 113 | model=model, 114 | temperature=temperature, 115 | request_timeout=request_timeout, 116 | openai_base_url=openai_base_url, 117 | ) 118 | 119 | cls._setting_instance = Setting( 120 | project=project_settings, 121 | chat_completion=chat_completion_settings, 122 | ) 123 | 124 | 125 | if __name__ == "__main__": 126 | setting = SettingsManager.get_setting() 127 | print(setting.model_dump()) 128 | -------------------------------------------------------------------------------- /repo_agent/utils/gitignore_checker.py: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | import os 3 | 4 | 5 | class GitignoreChecker: 6 | def __init__(self, directory: str, gitignore_path: str): 7 | """ 8 | Initialize the GitignoreChecker with a specific directory and the path to a .gitignore file. 9 | 10 | Args: 11 | directory (str): The directory to be checked. 12 | gitignore_path (str): The path to the .gitignore file. 13 | """ 14 | self.directory = directory 15 | self.gitignore_path = gitignore_path 16 | self.folder_patterns, self.file_patterns = self._load_gitignore_patterns() 17 | 18 | def _load_gitignore_patterns(self) -> tuple: 19 | """ 20 | Load and parse the .gitignore file, then split the patterns into folder and file patterns. 21 | 22 | If the specified .gitignore file is not found, fall back to the default path. 23 | 24 | Returns: 25 | tuple: A tuple containing two lists - one for folder patterns and one for file patterns. 26 | """ 27 | try: 28 | with open(self.gitignore_path, "r", encoding="utf-8") as file: 29 | gitignore_content = file.read() 30 | except FileNotFoundError: 31 | # Fallback to the default .gitignore path if the specified file is not found 32 | default_path = os.path.join( 33 | os.path.dirname(__file__), "..", "..", ".gitignore" 34 | ) 35 | with open(default_path, "r", encoding="utf-8") as file: 36 | gitignore_content = file.read() 37 | 38 | patterns = self._parse_gitignore(gitignore_content) 39 | return self._split_gitignore_patterns(patterns) 40 | 41 | @staticmethod 42 | def _parse_gitignore(gitignore_content: str) -> list: 43 | """ 44 | Parse the .gitignore content and return patterns as a list. 45 | 46 | Args: 47 | gitignore_content (str): The content of the .gitignore file. 48 | 49 | Returns: 50 | list: A list of patterns extracted from the .gitignore content. 51 | """ 52 | patterns = [] 53 | for line in gitignore_content.splitlines(): 54 | line = line.strip() 55 | if line and not line.startswith("#"): 56 | patterns.append(line) 57 | return patterns 58 | 59 | @staticmethod 60 | def _split_gitignore_patterns(gitignore_patterns: list) -> tuple: 61 | """ 62 | Split the .gitignore patterns into folder patterns and file patterns. 63 | 64 | Args: 65 | gitignore_patterns (list): A list of patterns from the .gitignore file. 66 | 67 | Returns: 68 | tuple: Two lists, one for folder patterns and one for file patterns. 69 | """ 70 | folder_patterns = [] 71 | file_patterns = [] 72 | for pattern in gitignore_patterns: 73 | if pattern.endswith("/"): 74 | folder_patterns.append(pattern.rstrip("/")) 75 | else: 76 | file_patterns.append(pattern) 77 | return folder_patterns, file_patterns 78 | 79 | @staticmethod 80 | def _is_ignored(path: str, patterns: list, is_dir: bool = False) -> bool: 81 | """ 82 | Check if the given path matches any of the patterns. 83 | 84 | Args: 85 | path (str): The path to check. 86 | patterns (list): A list of patterns to check against. 87 | is_dir (bool): True if the path is a directory, False otherwise. 88 | 89 | Returns: 90 | bool: True if the path matches any pattern, False otherwise. 91 | """ 92 | for pattern in patterns: 93 | if fnmatch.fnmatch(path, pattern): 94 | return True 95 | if is_dir and pattern.endswith("/") and fnmatch.fnmatch(path, pattern[:-1]): 96 | return True 97 | return False 98 | 99 | def check_files_and_folders(self) -> list: 100 | """ 101 | Check all files and folders in the given directory against the split gitignore patterns. 102 | Return a list of files that are not ignored and have the '.py' extension. 103 | The returned file paths are relative to the self.directory. 104 | 105 | Returns: 106 | list: A list of paths to files that are not ignored and have the '.py' extension. 107 | """ 108 | not_ignored_files = [] 109 | for root, dirs, files in os.walk(self.directory): 110 | dirs[:] = [ 111 | d 112 | for d in dirs 113 | if not self._is_ignored(d, self.folder_patterns, is_dir=True) 114 | ] 115 | 116 | for file in files: 117 | file_path = os.path.join(root, file) 118 | relative_path = os.path.relpath(file_path, self.directory) 119 | if not self._is_ignored( 120 | file, self.file_patterns 121 | ) and file_path.endswith(".py"): 122 | not_ignored_files.append(relative_path) 123 | 124 | return not_ignored_files 125 | 126 | 127 | # Example usage: 128 | # gitignore_checker = GitignoreChecker('path_to_directory', 'path_to_gitignore_file') 129 | # not_ignored_files = gitignore_checker.check_files_and_folders() 130 | # print(not_ignored_files) 131 | -------------------------------------------------------------------------------- /repo_agent/utils/meta_info_utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import os 3 | 4 | import git 5 | from colorama import Fore, Style 6 | 7 | from repo_agent.log import logger 8 | from repo_agent.settings import SettingsManager 9 | 10 | latest_verison_substring = "_latest_version.py" 11 | 12 | 13 | def make_fake_files(): 14 | """根据git status检测暂存区信息。如果有文件: 15 | 1. 新增文件,没有add。无视 16 | 2. 修改文件内容,没有add,原始文件重命名为fake_file,新建原本的文件名内容为git status中的文件内容 17 | 3. 删除文件,没有add,原始文件重命名为fake_file,新建原本的文件名内容为git status中的文件内容 18 | 注意: 目标仓库的文件不能以latest_verison_substring结尾 19 | """ 20 | delete_fake_files() 21 | setting = SettingsManager.get_setting() 22 | 23 | repo = git.Repo(setting.project.target_repo) 24 | unstaged_changes = repo.index.diff(None) # 在git status里,但是有修改没提交 25 | untracked_files = repo.untracked_files # 在文件系统里,但没在git里的文件 26 | 27 | jump_files = [] # 这里面的内容不parse、不生成文档,并且引用关系也不计算他们 28 | for file_name in untracked_files: 29 | if file_name.endswith(".py"): 30 | print( 31 | f"{Fore.LIGHTMAGENTA_EX}[SKIP untracked files]: {Style.RESET_ALL}{file_name}" 32 | ) 33 | jump_files.append(file_name) 34 | for diff_file in unstaged_changes.iter_change_type( 35 | "A" 36 | ): # 新增的、没有add的文件,都不处理 37 | if diff_file.a_path.endswith(latest_verison_substring): 38 | logger.error( 39 | "FAKE_FILE_IN_GIT_STATUS detected! suggest to use `delete_fake_files` and re-generate document" 40 | ) 41 | exit() 42 | jump_files.append(diff_file.a_path) 43 | 44 | file_path_reflections = {} 45 | for diff_file in itertools.chain( 46 | unstaged_changes.iter_change_type("M"), unstaged_changes.iter_change_type("D") 47 | ): # 获取修改过的文件 48 | if diff_file.a_path.endswith(latest_verison_substring): 49 | logger.error( 50 | "FAKE_FILE_IN_GIT_STATUS detected! suggest to use `delete_fake_files` and re-generate document" 51 | ) 52 | exit() 53 | now_file_path = diff_file.a_path # 针对repo_path的相对路径 54 | if now_file_path.endswith(".py"): 55 | raw_file_content = diff_file.a_blob.data_stream.read().decode("utf-8") 56 | latest_file_path = now_file_path[:-3] + latest_verison_substring 57 | if os.path.exists(os.path.join(setting.project.target_repo, now_file_path)): 58 | os.rename( 59 | os.path.join(setting.project.target_repo, now_file_path), 60 | os.path.join(setting.project.target_repo, latest_file_path), 61 | ) 62 | 63 | print( 64 | f"{Fore.LIGHTMAGENTA_EX}[Save Latest Version of Code]: {Style.RESET_ALL}{now_file_path} -> {latest_file_path}" 65 | ) 66 | else: 67 | print( 68 | f"{Fore.LIGHTMAGENTA_EX}[Create Temp-File for Deleted(But not Staged) Files]: {Style.RESET_ALL}{now_file_path} -> {latest_file_path}" 69 | ) 70 | with open( 71 | os.path.join(setting.project.target_repo, latest_file_path), "w" 72 | ) as writer: 73 | pass 74 | with open( 75 | os.path.join(setting.project.target_repo, now_file_path), "w" 76 | ) as writer: 77 | writer.write(raw_file_content) 78 | file_path_reflections[now_file_path] = latest_file_path # real指向fake 79 | return file_path_reflections, jump_files 80 | 81 | 82 | def delete_fake_files(): 83 | """在任务执行完成以后,删除所有的fake_file""" 84 | setting = SettingsManager.get_setting() 85 | 86 | def gci(filepath): 87 | # 遍历filepath下所有文件,包括子目录 88 | files = os.listdir(filepath) 89 | for fi in files: 90 | fi_d = os.path.join(filepath, fi) 91 | if os.path.isdir(fi_d): 92 | gci(fi_d) 93 | elif fi_d.endswith(latest_verison_substring): 94 | origin_name = fi_d.replace(latest_verison_substring, ".py") 95 | os.remove(origin_name) 96 | if os.path.getsize(fi_d) == 0: 97 | print( 98 | f"{Fore.LIGHTRED_EX}[Deleting Temp File]: {Style.RESET_ALL}{fi_d[len(str(setting.project.target_repo)):]}, {origin_name[len(str(setting.project.target_repo)):]}" 99 | ) # type: ignore 100 | os.remove(fi_d) 101 | else: 102 | print( 103 | f"{Fore.LIGHTRED_EX}[Recovering Latest Version]: {Style.RESET_ALL}{origin_name[len(str(setting.project.target_repo)):]} <- {fi_d[len(str(setting.project.target_repo)):]}" 104 | ) # type: ignore 105 | os.rename(fi_d, origin_name) 106 | 107 | gci(setting.project.target_repo) 108 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/RepoAgent/825d988127d7bfd757237d9c4e8678d9104030f0/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_change_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from git import Repo 5 | 6 | from repo_agent.change_detector import ChangeDetector 7 | 8 | 9 | class TestChangeDetector(unittest.TestCase): 10 | @classmethod 11 | def setUpClass(cls): 12 | # 定义测试仓库的路径 13 | cls.test_repo_path = os.path.join(os.path.dirname(__file__), 'test_repo') 14 | 15 | # 如果测试仓库文件夹不存在,则创建它 16 | if not os.path.exists(cls.test_repo_path): 17 | os.makedirs(cls.test_repo_path) 18 | 19 | # 初始化 Git 仓库 20 | cls.repo = Repo.init(cls.test_repo_path) 21 | 22 | # 配置 Git 用户信息 23 | cls.repo.git.config('user.email', 'ci@example.com') 24 | cls.repo.git.config('user.name', 'CI User') 25 | 26 | # 创建一些测试文件 27 | with open(os.path.join(cls.test_repo_path, 'test_file.py'), 'w') as f: 28 | f.write('print("Hello, Python")') 29 | 30 | with open(os.path.join(cls.test_repo_path, 'test_file.md'), 'w') as f: 31 | f.write('# Hello, Markdown') 32 | 33 | # 模拟 Git 操作:添加和提交文件 34 | cls.repo.git.add(A=True) 35 | cls.repo.git.commit('-m', 'Initial commit') 36 | 37 | def test_get_staged_pys(self): 38 | # 创建一个新的 Python 文件并暂存 39 | new_py_file = os.path.join(self.test_repo_path, 'new_test_file.py') 40 | with open(new_py_file, 'w') as f: 41 | f.write('print("New Python File")') 42 | self.repo.git.add(new_py_file) 43 | 44 | # 使用 ChangeDetector 检查暂存文件 45 | change_detector = ChangeDetector(self.test_repo_path) 46 | staged_files = change_detector.get_staged_pys() 47 | 48 | # 断言新文件在暂存文件列表中 49 | self.assertIn('new_test_file.py', [os.path.basename(path) for path in staged_files]) 50 | 51 | print(f"\ntest_get_staged_pys: Staged Python files: {staged_files}") 52 | 53 | 54 | def test_get_unstaged_mds(self): 55 | # 修改一个 Markdown 文件但不暂存 56 | md_file = os.path.join(self.test_repo_path, 'test_file.md') 57 | with open(md_file, 'a') as f: 58 | f.write('\nAdditional Markdown content') 59 | 60 | # 使用 ChangeDetector 获取未暂存的 Markdown 文件 61 | change_detector = ChangeDetector(self.test_repo_path) 62 | unstaged_files = change_detector.get_to_be_staged_files() 63 | 64 | # 断言修改的文件在未暂存文件列表中 65 | self.assertIn('test_file.md', [os.path.basename(path) for path in unstaged_files]) 66 | 67 | print(f"\ntest_get_unstaged_mds: Unstaged Markdown files: {unstaged_files}") 68 | 69 | 70 | def test_add_unstaged_mds(self): 71 | # 确保有一个未暂存的 Markdown 文件 72 | self.test_get_unstaged_mds() 73 | 74 | # 使用 ChangeDetector 添加未暂存的 Markdown 文件 75 | change_detector = ChangeDetector(self.test_repo_path) 76 | change_detector.add_unstaged_files() 77 | 78 | # 检查文件是否被暂存 79 | unstaged_files_after_add = change_detector.get_to_be_staged_files() 80 | 81 | # 断言暂存操作后没有未暂存的 Markdown 文件 82 | self.assertEqual(len(unstaged_files_after_add), 0) 83 | 84 | remaining_unstaged_files = len(unstaged_files_after_add) 85 | print(f"\ntest_add_unstaged_mds: Number of remaining unstaged Markdown files after add: {remaining_unstaged_files}") 86 | 87 | 88 | @classmethod 89 | def tearDownClass(cls): 90 | # 清理测试仓库 91 | cls.repo.close() 92 | os.system('rm -rf ' + cls.test_repo_path) 93 | 94 | if __name__ == '__main__': 95 | unittest.main() 96 | -------------------------------------------------------------------------------- /tests/test_json_handler.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import mock_open, patch 3 | 4 | from ..repo_agent.chat_with_repo.json_handler import ( 5 | JsonFileProcessor, # Adjust the import according to your project structure 6 | ) 7 | 8 | 9 | class TestJsonFileProcessor(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.processor = JsonFileProcessor("test.json") 13 | 14 | @patch("builtins.open", new_callable=mock_open, read_data='{"files": [{"objects": [{"md_content": "content1"}]}]}') 15 | def test_read_json_file(self, mock_file): 16 | # Test read_json_file method 17 | data = self.processor.read_json_file() 18 | self.assertEqual(data, {"files": [{"objects": [{"md_content": "content1"}]}]}) 19 | mock_file.assert_called_with("test.json", "r", encoding="utf-8") 20 | 21 | @patch.object(JsonFileProcessor, 'read_json_file') 22 | def test_extract_md_contents(self, mock_read_json): 23 | # Test extract_md_contents method 24 | mock_read_json.return_value = {"files": [{"objects": [{"md_content": "content1"}]}]} 25 | md_contents = self.processor.extract_md_contents() 26 | self.assertIn("content1", md_contents) 27 | 28 | @patch("builtins.open", new_callable=mock_open, read_data='{"name": "test", "files": [{"name": "file1"}]}') 29 | def test_search_in_json_nested(self, mock_file): 30 | # Test search_in_json_nested method 31 | result = self.processor.search_in_json_nested("test.json", "file1") 32 | self.assertEqual(result, {"name": "file1"}) 33 | mock_file.assert_called_with("test.json", "r", encoding="utf-8") 34 | 35 | # Additional tests for error handling (FileNotFoundError, JSONDecodeError, etc.) can be added here 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /tests/test_structure_tree.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import defaultdict 3 | 4 | 5 | def build_path_tree(who_reference_me, reference_who, doc_item_path): 6 | def tree(): 7 | return defaultdict(tree) 8 | path_tree = tree() 9 | 10 | for path_list in [who_reference_me, reference_who]: 11 | for path in path_list: 12 | parts = path.split(os.sep) 13 | node = path_tree 14 | for part in parts: 15 | node = node[part] 16 | 17 | # 处理 doc_item_path 18 | parts = doc_item_path.split(os.sep) 19 | parts[-1] = '✳️' + parts[-1] # 在最后一个对象前面加上星号 20 | node = path_tree 21 | for part in parts: 22 | node = node[part] 23 | 24 | def tree_to_string(tree, indent=0): 25 | s = '' 26 | for key, value in sorted(tree.items()): 27 | s += ' ' * indent + key + '\n' 28 | if isinstance(value, dict): 29 | s += tree_to_string(value, indent + 1) 30 | return s 31 | 32 | return tree_to_string(path_tree) 33 | 34 | 35 | if "__name__ == main": 36 | who_reference_me = [ 37 | "repo_agent/file_handler.py/FileHandler/__init__", 38 | "repo_agent/runner.py/need_to_generate" 39 | ] 40 | reference_who = [ 41 | "repo_agent/file_handler.py/FileHandler/__init__", 42 | "repo_agent/runner.py/need_to_generate", 43 | ] 44 | 45 | doc_item_path = 'tests/test_change_detector.py/TestChangeDetector' 46 | 47 | result = build_path_tree(who_reference_me,reference_who,doc_item_path) 48 | print(result) 49 | --------------------------------------------------------------------------------