├── .streamlit ├── config.toml └── secrets.toml ├── LICENSE ├── README.md ├── __pycache__ └── app.cpython-311.pyc ├── app.py ├── image ├── index.png ├── md.png ├── py.png ├── qa.png ├── qichen.jpg └── streamlit.png ├── packages.txt ├── requirements.txt └── work ├── __pycache__ └── work.cpython-311.pyc └── work.py /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [runner] 2 | 3 | # Default: true 4 | fastReruns = false -------------------------------------------------------------------------------- /.streamlit/secrets.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/.streamlit/secrets.toml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 项目概述 2 | 3 | ## code_using_GPT 4 | 5 | ### 1. 项目简介 6 | 7 | 本项目主要通过使用ChatGPT,来为代码生成注释、生成README.md文档、以及根据代码文件与大模型进行问答,以便于更好的了解代码! 8 | 9 | ### Streamlit 一键部署 10 | 11 | 12 | * fork本仓库后点击 `Streamlit 部署` 选择NEW APP 13 | 14 |

15 | 16 | 17 | 18 |

19 | 20 | 21 | ![程序界面](./image/streamlit.png) 22 | * Repository 选择fork的仓库 23 | * Branch 一般默认为main 24 | * Main file path为app.py 25 | * App URL (Optional) 自定义域名 26 | * 点击Deploy!即可 27 | 28 | ### 本地安装说明 29 | 1. 安装Git: 30 | 31 | 如果您尚未安装Git,请首先安装它。您可以从Git官网下载适合您操作系统的版本:https://git-scm.com/downloads 32 | 33 | 2. 克隆GitHub存储库: 34 | 35 | 打开命令行终端,并导航到您希望将项目克隆到的目录。然后运行以下命令,将GitHub存储库克隆到您的本地计算机: 36 | ```bash 37 | git clone https://github.com/llmadd/code_using_GPT.git 38 | ``` 39 | 请将上述URL替换为您要克隆的GitHub存储库的实际URL。 40 | 41 | 3. 进入项目目录: 42 | 43 | 使用cd命令进入克隆的存储库目录: 44 | ```bash 45 | cd code_using_GPT 46 | ``` 47 | 48 | 4. 创建虚拟环境(可选但建议): 49 | 50 | 为了隔离项目的依赖关系,您可以创建一个虚拟环境。运行以下命令创建并激活虚拟环境: 51 | 52 | ```bash 53 | python -m venv venv 54 | source venv/bin/activate # 在Windows上使用 venv\Scripts\activate 55 | ``` 56 | 57 | 5. 安装项目依赖项: 58 | 59 | 使用pip安装requirements.txt文件中列出的所有依赖项: 60 | 61 | ```bash 62 | pip install -r requirements.txt 63 | ``` 64 | 65 | 这将安装项目所需的所有Python包。 66 | 67 | 6. 运行Streamlit应用程序: 68 | 在命令行中运行以下命令来启动Streamlit应用程序: 69 | 70 | ```bash 71 | streamlit run app.py 72 | ``` 73 | 74 | 这将启动Streamlit应用程序,并在您的默认浏览器中打开应用程序界面。 75 | 76 | ### 3. 使用说明 77 | 上传文档后,界面如下: 78 | ![程序界面](./image/index.png) 79 | 80 | * 可以点击注释代码按钮,为当前文件生成注释并下载 81 | ![代码注释](./image/py.png) 82 | 83 | * 点击生成文档按钮,可以为代码生成解释文档并下载 84 | ![生成文档](./image/md.png) 85 | * 可以与大模型交流,提出问题,大模型会根据问题相关代码回答 86 | ![基于代码问答](./image/qa.png) 87 | 88 | ### 4. 项目使用技术栈 89 | 90 | #### langchain+Streamlit+Openai+Chroma 91 | 92 | * 使用langchain构建各流程chain 93 | 94 | * 使用streamlit构建前端界面 95 | 96 | * 使用openai,gpt3.5模型 97 | 98 | * 使用chromadb为向量数据库 99 | 100 | ### 5. 待完善功能 101 | - [x] 多语言支持(C、JAVA等) 102 | 103 | - [x] 用户一键部署,更方便的openaikey修改 104 | 105 | - [x] 用户可自主选择模型 106 | 107 | - [ ] 一键格式化代码 108 | 109 | - [ ] 更多需求可以联系我... 110 | 111 | ### 6. 联系我 112 | 113 | ![微信](./image/qichen.jpg) 114 | 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /__pycache__/app.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/__pycache__/app.cpython-311.pyc -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import work.work as work 3 | from typing import Any, Dict, List, Optional, Sequence, Tuple, Union 4 | 5 | 6 | def doc_code(code_str,chat)->str: 7 | spliter = work.get_file_type(st.session_state.file_type) 8 | code_split_ = work.code_splite(code_str,spliter) 9 | result = "" 10 | results = work.doc_futures_run(code_split_,chat) 11 | for i in results: 12 | result += i 13 | return result 14 | 15 | def code_with_comment(code_str,chat)->str: 16 | spliter = work.get_file_type(st.session_state.file_type) 17 | code_split_ = work.code_splite(code_str,spliter) 18 | result = "" 19 | results = work.comment_future_run(code_split_,chat) 20 | for i in results: 21 | result += i 22 | return result 23 | 24 | def qa_with_code(question:str,code_str,chat)->str: 25 | spliter = work.get_file_type(st.session_state.file_type) 26 | code_split_ = work.code_splite(code_str,spliter) 27 | db = work.get_code_embd_save(code_split_) 28 | result = work.qa_with_code_chain(db = db,question=question,chat = chat) 29 | return result 30 | 31 | 32 | 33 | st.title(":blue[Analyzing code using GPT 🤖]") 34 | 35 | 36 | if 'code' not in st.session_state: 37 | st.session_state.code = "" 38 | if 'doc_result' not in st.session_state: 39 | st.session_state.doc_result = "" 40 | if 'comment_result' not in st.session_state: 41 | st.session_state.comment_result = "" 42 | if 'file_type' not in st.session_state: 43 | st.session_state.file_type = "" 44 | 45 | with st.sidebar: 46 | st.title(":blue[ChatGPT 🤖]") 47 | openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password") 48 | model_name = st.selectbox( 49 | '选择Openai模型', 50 | ('gpt-3.5-turbo', 'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-0613','gpt-3.5-turbo-16k-0613','gpt-4','gpt-4-0613','gpt-4-32k','gpt-4-32k-0613')) 51 | temperature = st.slider('设置模型温度值', 0.0, 2.0, 0.5) 52 | 53 | if not openai_api_key: 54 | st.error("请输入OpenAI API Key") 55 | st.stop() 56 | else: 57 | st.session_state.chat = work.load_env(openai_api_key=openai_api_key,model_name=model_name,temperature=temperature) 58 | 59 | 60 | 61 | 62 | uploaded_file = st.file_uploader(label=":blue[上传代码文件]",type=[ 63 | 'cpp', 'cc', 'cxx', 'hpp', 'h', 'hxx', 64 | 'go', 'java', 'js', 'php', 'proto', 'py', 65 | 'rst', 'rb', 'rs', 'scala', 'swift', 'md', 66 | 'markdown', 'tex', 'html', 'sol'], help=":blue[仅支持所有主流代码文件]",key = "up_file") 67 | 68 | if uploaded_file is not None: 69 | with st.sidebar: 70 | st.title(f"{uploaded_file.name}文件源码:") 71 | code_str = uploaded_file.getvalue().decode("utf-8") 72 | st.session_state.file_type = uploaded_file.name 73 | st.session_state.code = code_str 74 | st.code(code_str,language="python") 75 | col1, col2 = st.columns(spec= [0.5,0.5], gap = "large") 76 | with col1: 77 | comment_bt = st.button("插入注释") 78 | if comment_bt: 79 | with st.spinner("正在插入注释..."): 80 | result = code_with_comment(st.session_state.code,st.session_state.chat) 81 | st.session_state.comment_result = result 82 | st.success("注释插入完成") 83 | with col2: 84 | doc_code_bt = st.button("生成文档") 85 | if doc_code_bt: 86 | with st.spinner("正在生成文档..."): 87 | result = doc_code(st.session_state.code,st.session_state.chat) 88 | st.session_state.doc_result = result 89 | st.success("文档生成完成") 90 | if st.session_state.comment_result != "": 91 | with st.expander("注释结果"): 92 | st.code(st.session_state.comment_result,language="python") 93 | st.download_button( 94 | label="下载注释后文件", 95 | data=st.session_state.comment_result, 96 | file_name='llmadd.py',) 97 | if st.session_state.doc_result != "": 98 | with st.expander("文档内容"): 99 | st.markdown(st.session_state.doc_result) 100 | st.download_button( 101 | label="下载文档", 102 | data=st.session_state.doc_result, 103 | file_name='doc.md',) 104 | 105 | # chat 106 | if "messages" not in st.session_state: 107 | st.session_state["messages"] = [{"role": "ai", "avatar":"🤖", "content": "我是强大的人工智能助手,请上传你的代码文件,我将帮助你更好了解代码!"}] 108 | for msg in st.session_state.messages: 109 | st.chat_message(name=msg["role"],avatar=msg["avatar"]).markdown(msg["content"]) 110 | 111 | if st.session_state.code == "": 112 | st.error("请上传代码文件") 113 | st.stop() 114 | prompt = st.chat_input(placeholder="咨询与代码文件有关问题",max_chars = 4000,key="prompt") 115 | if prompt: 116 | 117 | st.session_state.messages.append({"role": "human", "avatar":"🧑", "content": prompt}) 118 | 119 | st.chat_message(name="human",avatar="🧑").markdown(prompt) 120 | 121 | with st.chat_message(name="ai",avatar="🤖"): 122 | with st.spinner("正在生成答案..."): 123 | response = qa_with_code(prompt,st.session_state.code,st.session_state.chat) 124 | 125 | st.session_state.messages.append({"role": "ai", "avatar":"🤖", "content": response}) 126 | 127 | st.markdown(response) 128 | 129 | -------------------------------------------------------------------------------- /image/index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/index.png -------------------------------------------------------------------------------- /image/md.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/md.png -------------------------------------------------------------------------------- /image/py.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/py.png -------------------------------------------------------------------------------- /image/qa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/qa.png -------------------------------------------------------------------------------- /image/qichen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/qichen.jpg -------------------------------------------------------------------------------- /image/streamlit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/streamlit.png -------------------------------------------------------------------------------- /packages.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | chromadb==0.4.8 2 | langchain==0.0.279 3 | openai==0.28.0 4 | python-dotenv==1.0.0 5 | streamlit==1.26.0 6 | pysqlite3-binary 7 | tiktoken==0.4.0 8 | -------------------------------------------------------------------------------- /work/__pycache__/work.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/work/__pycache__/work.cpython-311.pyc -------------------------------------------------------------------------------- /work/work.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | from typing import Any, Dict, List, Optional, Sequence, Tuple, Union 4 | import concurrent.futures 5 | from langchain.chains.base import Chain 6 | from langchain.prompts import PromptTemplate 7 | from langchain.chat_models import ChatOpenAI 8 | from langchain.chains import LLMChain 9 | from langchain.prompts.chat import ( 10 | ChatPromptTemplate, 11 | HumanMessagePromptTemplate, 12 | ) 13 | from langchain.text_splitter import ( 14 | RecursiveCharacterTextSplitter, 15 | Language, 16 | ) 17 | from langchain.vectorstores import Chroma 18 | from langchain.embeddings import OpenAIEmbeddings 19 | 20 | CHUNK_SIZE = 3000 21 | 22 | def load_env(openai_api_key:str,model_name:str,temperature:float)->None: 23 | os.environ["OPENAI_API_KEY"] = openai_api_key 24 | chat = ChatOpenAI(model = model_name,temperature = temperature,streaming=True) 25 | return chat 26 | 27 | code_with_comment_chain_systemtemplate = """ 28 | 你强大的人工智能ChatGPT。 29 | 30 | 你的任务是为python代码增加中文注释。禁止修改代码! 31 | 32 | 只允许输出增加注释后的python代码。禁止输出任何其他内容! 33 | """ 34 | 35 | doc_code_chain_systemtemplate = """ 36 | 你强大的人工智能ChatGPT。 37 | 38 | 你的任务是为代码生成一篇README.md文档。 39 | 40 | 文档中介绍代码用到的技术栈,代码的功能,代码的使用方法,代码的运行环境等等。 41 | 42 | 用markdown格式输出README.md文档。 43 | """ 44 | 45 | qa_with_code_chain_systemtemplate = """ 46 | 你强大的人工智能ChatGPT。 47 | 48 | 你需要根据代码内容和你自身的知识尽可能的回答用户的问题。 49 | 50 | 要尽可能详细的回答用户问题 51 | """ 52 | 53 | def get_file_type(file_name): 54 | extension = "."+file_name.split('.')[-1] 55 | if extension in ['.cpp', '.cc', '.cxx', '.hpp', '.h', '.hxx']: 56 | splitter = RecursiveCharacterTextSplitter.from_language( 57 | language=Language.CPP,chunk_size=CHUNK_SIZE, chunk_overlap=0 58 | ) 59 | return splitter 60 | elif extension == '.go': 61 | splitter = RecursiveCharacterTextSplitter.from_language( 62 | language=Language.GO,chunk_size=CHUNK_SIZE, chunk_overlap=0 63 | ) 64 | return splitter 65 | elif extension == '.java': 66 | splitter = RecursiveCharacterTextSplitter.from_language( 67 | language=Language.JAVA,chunk_size=CHUNK_SIZE, chunk_overlap=0 68 | ) 69 | return splitter 70 | elif extension == '.js': 71 | splitter = RecursiveCharacterTextSplitter.from_language( 72 | language=Language.JS,chunk_size=CHUNK_SIZE, chunk_overlap=0 73 | ) 74 | return splitter 75 | elif extension == '.php': 76 | splitter = RecursiveCharacterTextSplitter.from_language( 77 | language=Language.PHP,chunk_size=CHUNK_SIZE, chunk_overlap=0 78 | ) 79 | return splitter 80 | elif extension == '.proto': 81 | splitter = RecursiveCharacterTextSplitter.from_language( 82 | language=Language.PROTO,chunk_size=CHUNK_SIZE, chunk_overlap=0 83 | ) 84 | return splitter 85 | elif extension == '.py': 86 | splitter = RecursiveCharacterTextSplitter.from_language( 87 | language=Language.PYTHON,chunk_size=CHUNK_SIZE, chunk_overlap=0 88 | ) 89 | return splitter 90 | elif extension in '.rst': 91 | splitter = RecursiveCharacterTextSplitter.from_language( 92 | language=Language.RST,chunk_size=CHUNK_SIZE, chunk_overlap=0 93 | ) 94 | return splitter 95 | elif extension == '.rb': 96 | splitter = RecursiveCharacterTextSplitter.from_language( 97 | language=Language.RUBY,chunk_size=CHUNK_SIZE, chunk_overlap=0 98 | ) 99 | return splitter 100 | elif extension == '.rs': 101 | splitter = RecursiveCharacterTextSplitter.from_language( 102 | language=Language.RUST,chunk_size=CHUNK_SIZE, chunk_overlap=0 103 | ) 104 | return splitter 105 | elif extension == '.scala': 106 | splitter = RecursiveCharacterTextSplitter.from_language( 107 | language=Language.SCALA,chunk_size=CHUNK_SIZE, chunk_overlap=0 108 | ) 109 | return splitter 110 | elif extension == '.swift': 111 | splitter = RecursiveCharacterTextSplitter.from_language( 112 | language=Language.SWIFT,chunk_size=CHUNK_SIZE, chunk_overlap=0 113 | ) 114 | return splitter 115 | elif extension in ['.md', '.markdown']: 116 | splitter = RecursiveCharacterTextSplitter.from_language( 117 | language=Language.MARKDOWN,chunk_size=CHUNK_SIZE, chunk_overlap=0 118 | ) 119 | return splitter 120 | elif extension == '.tex': 121 | splitter = RecursiveCharacterTextSplitter.from_language( 122 | language=Language.LATEX,chunk_size=CHUNK_SIZE, chunk_overlap=0 123 | ) 124 | return splitter 125 | elif extension in ['.html', '.htm']: 126 | splitter = RecursiveCharacterTextSplitter.from_language( 127 | language=Language.HTML,chunk_size=CHUNK_SIZE, chunk_overlap=0 128 | ) 129 | return splitter 130 | elif extension == '.sol': 131 | splitter = RecursiveCharacterTextSplitter.from_language( 132 | language=Language.SOL,chunk_size=CHUNK_SIZE, chunk_overlap=0 133 | ) 134 | return splitter 135 | else: 136 | return 'Unknown' 137 | 138 | def get_code_embd_save(code_split:List[str])->Chroma: 139 | embeddings = OpenAIEmbeddings() 140 | db = Chroma.from_texts(texts=code_split,embedding=embeddings) 141 | return db 142 | 143 | 144 | def qa_with_code_chain(db:Chroma,question:str,chat)->str: 145 | retrievers_re = "" 146 | retrievers = db.as_retriever(search_kwargs={'k': 4,}) 147 | doc_re = retrievers.get_relevant_documents(question) 148 | for i in doc_re: 149 | retrievers_re += i.page_content 150 | human_prompt = """ 151 | 根据下面代码内容回答问题: 152 | -------------------- 153 | {retrievers_re} 154 | -------------------- 155 | 问题:{question} 156 | """ 157 | human_message_prompt = HumanMessagePromptTemplate( 158 | prompt=PromptTemplate( 159 | template=human_prompt, 160 | input_variables=["question"], 161 | partial_variables={"retrievers_re": retrievers_re} 162 | ) 163 | ) 164 | chat_prompt_template = ChatPromptTemplate.from_messages([ 165 | ("system", qa_with_code_chain_systemtemplate), 166 | human_message_prompt 167 | ]) 168 | chain = LLMChain(llm=chat, prompt=chat_prompt_template) 169 | answer = chain.run(question) 170 | return answer 171 | 172 | def code_splite(code:str,splitter:RecursiveCharacterTextSplitter)->List[str]: 173 | splite_code = splitter.split_text(text=code) 174 | 175 | return splite_code 176 | 177 | def code_with_comment_chain(code:str,chat)->str: 178 | chat_prompt_template = ChatPromptTemplate.from_messages([ 179 | ("system", code_with_comment_chain_systemtemplate), 180 | ("human","{text}") 181 | ]) 182 | chain = LLMChain(llm=chat, prompt=chat_prompt_template) 183 | result = chain.run(code) 184 | return result 185 | 186 | def code_doc_chain(code: str,chat) -> str: 187 | chat_prompt_template = ChatPromptTemplate.from_messages([ 188 | ("system", doc_code_chain_systemtemplate), 189 | ("human", "{text}") 190 | ]) 191 | chain = LLMChain(llm=chat, prompt=chat_prompt_template) 192 | result = chain.run(code) 193 | return result 194 | 195 | def doc_futures_run(code_list:List[str],chat)->List[str]: 196 | 197 | results = [] 198 | 199 | with concurrent.futures.ThreadPoolExecutor() as executor: 200 | 201 | futures = [executor.submit(code_doc_chain, _i,chat) for _i in code_list] 202 | 203 | # for future in concurrent.futures.as_completed(futures): 204 | # result = future.result() 205 | # results.append(result) 206 | # 更新结果不按顺序返回 207 | for result in executor.map(lambda future: future.result(), futures): 208 | results.append(result) 209 | return results 210 | 211 | def comment_future_run(code_list:List[str],chat)->List[str]: 212 | results = [] 213 | 214 | with concurrent.futures.ThreadPoolExecutor() as executor: 215 | 216 | futures = [executor.submit(code_with_comment_chain, _i,chat) for _i in code_list] 217 | 218 | # for future in concurrent.futures.as_completed(futures): 219 | # result = future.result() 220 | # results.append(result) 221 | # 更新结果不按顺序返回 222 | for result in executor.map(lambda future: future.result(), futures): 223 | results.append(result) 224 | return results 225 | 226 | 227 | 228 | # print(st.secrets["openai_api_key"]) 229 | --------------------------------------------------------------------------------