├── .streamlit
    ├── config.toml
    └── secrets.toml
├── LICENSE
├── README.md
├── __pycache__
    └── app.cpython-311.pyc
├── app.py
├── image
    ├── index.png
    ├── md.png
    ├── py.png
    ├── qa.png
    ├── qichen.jpg
    └── streamlit.png
├── packages.txt
├── requirements.txt
└── work
    ├── __pycache__
        └── work.cpython-311.pyc
    └── work.py


/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [runner]
2 | 
3 | # Default: true
4 | fastReruns = false


--------------------------------------------------------------------------------
/.streamlit/secrets.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/.streamlit/secrets.toml


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 项目概述
  2 | 
  3 | ## code_using_GPT
  4 | 
  5 | ### 1. 项目简介
  6 | 
  7 | 本项目主要通过使用ChatGPT,来为代码生成注释、生成README.md文档、以及根据代码文件与大模型进行问答，以便于更好的了解代码！
  8 | 
  9 | ### Streamlit 一键部署
 10 | 
 11 | 
 12 | * fork本仓库后点击 `Streamlit 部署` 选择NEW APP
 13 | 
 14 | <p>
 15 | <a href="https://share.streamlit.io/">
 16 | <button style="background-color: #007bff; color: white; padding: 10px 20px; border: none; border-radius: 5px;">Streamlit 部署</button>
 17 | </a>
 18 | </p>
 19 | 
 20 | 
 21 | ![程序界面](./image/streamlit.png)
 22 | * Repository 选择fork的仓库
 23 | * Branch 一般默认为main
 24 | * Main file path为app.py
 25 | * App URL (Optional) 自定义域名
 26 | * 点击Deploy!即可
 27 | 
 28 | ### 本地安装说明
 29 | 1. 安装Git：
 30 | 
 31 |     如果您尚未安装Git，请首先安装它。您可以从Git官网下载适合您操作系统的版本：https://git-scm.com/downloads
 32 | 
 33 | 2. 克隆GitHub存储库：
 34 | 
 35 |     打开命令行终端，并导航到您希望将项目克隆到的目录。然后运行以下命令，将GitHub存储库克隆到您的本地计算机：
 36 | ```bash
 37 | git clone https://github.com/llmadd/code_using_GPT.git
 38 | ```
 39 |  请将上述URL替换为您要克隆的GitHub存储库的实际URL。
 40 | 
 41 | 3. 进入项目目录：
 42 | 
 43 |     使用cd命令进入克隆的存储库目录：
 44 | ```bash
 45 | cd code_using_GPT
 46 | ```
 47 | 
 48 | 4. 创建虚拟环境（可选但建议）：
 49 | 
 50 |     为了隔离项目的依赖关系，您可以创建一个虚拟环境。运行以下命令创建并激活虚拟环境：
 51 | 
 52 | ```bash
 53 | python -m venv venv
 54 | source venv/bin/activate   # 在Windows上使用 venv\Scripts\activate
 55 | ```
 56 | 
 57 | 5. 安装项目依赖项：
 58 | 
 59 |     使用pip安装requirements.txt文件中列出的所有依赖项：
 60 | 
 61 | ```bash
 62 | pip install -r requirements.txt
 63 | ```
 64 | 
 65 | 这将安装项目所需的所有Python包。
 66 | 
 67 | 6. 运行Streamlit应用程序：
 68 |     在命令行中运行以下命令来启动Streamlit应用程序：
 69 | 
 70 | ```bash
 71 | streamlit run app.py
 72 | ```
 73 | 
 74 | 这将启动Streamlit应用程序，并在您的默认浏览器中打开应用程序界面。
 75 | 
 76 | ### 3. 使用说明
 77 | 上传文档后，界面如下：
 78 | ![程序界面](./image/index.png)
 79 | 
 80 | * 可以点击注释代码按钮，为当前文件生成注释并下载
 81 | ![代码注释](./image/py.png)
 82 | 
 83 | * 点击生成文档按钮，可以为代码生成解释文档并下载
 84 | ![生成文档](./image/md.png)
 85 | * 可以与大模型交流，提出问题，大模型会根据问题相关代码回答
 86 | ![基于代码问答](./image/qa.png)
 87 | 
 88 | ### 4. 项目使用技术栈
 89 | 
 90 | #### langchain+Streamlit+Openai+Chroma
 91 | 
 92 | * 使用langchain构建各流程chain
 93 | 
 94 | * 使用streamlit构建前端界面
 95 | 
 96 | * 使用openai，gpt3.5模型
 97 | 
 98 | * 使用chromadb为向量数据库
 99 | 
100 | ### 5. 待完善功能
101 | - [x] 多语言支持(C、JAVA等)
102 | 
103 | - [x] 用户一键部署，更方便的openaikey修改
104 | 
105 | - [x] 用户可自主选择模型
106 | 
107 | - [ ] 一键格式化代码
108 | 
109 | - [ ] 更多需求可以联系我...
110 | 
111 | ### 6. 联系我
112 | 
113 | ![微信](./image/qichen.jpg)
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/__pycache__/app.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/__pycache__/app.cpython-311.pyc


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import work.work as work
  3 | from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
  4 | 
  5 | 
  6 | def doc_code(code_str,chat)->str:
  7 |     spliter = work.get_file_type(st.session_state.file_type)
  8 |     code_split_ = work.code_splite(code_str,spliter)
  9 |     result = ""
 10 |     results = work.doc_futures_run(code_split_,chat)
 11 |     for i in results:
 12 |         result += i
 13 |     return result
 14 | 
 15 | def code_with_comment(code_str,chat)->str:
 16 |     spliter = work.get_file_type(st.session_state.file_type)
 17 |     code_split_ = work.code_splite(code_str,spliter)
 18 |     result = ""
 19 |     results = work.comment_future_run(code_split_,chat)
 20 |     for i in results:
 21 |         result += i
 22 |     return result
 23 | 
 24 | def qa_with_code(question:str,code_str,chat)->str:
 25 |     spliter = work.get_file_type(st.session_state.file_type)
 26 |     code_split_ = work.code_splite(code_str,spliter)
 27 |     db = work.get_code_embd_save(code_split_)
 28 |     result = work.qa_with_code_chain(db = db,question=question,chat = chat)
 29 |     return result
 30 | 
 31 | 
 32 | 
 33 | st.title(":blue[Analyzing code using GPT 🤖]")
 34 | 
 35 | 
 36 | if 'code' not in st.session_state:
 37 |     st.session_state.code = ""
 38 | if 'doc_result' not in st.session_state:
 39 |     st.session_state.doc_result = ""
 40 | if 'comment_result' not in st.session_state:
 41 |     st.session_state.comment_result = ""
 42 | if 'file_type' not in st.session_state:
 43 |     st.session_state.file_type = ""
 44 | 
 45 | with st.sidebar:
 46 |     st.title(":blue[ChatGPT 🤖]")
 47 |     openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
 48 |     model_name = st.selectbox(
 49 |     '选择Openai模型',
 50 |     ('gpt-3.5-turbo', 'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-0613','gpt-3.5-turbo-16k-0613','gpt-4','gpt-4-0613','gpt-4-32k','gpt-4-32k-0613'))
 51 |     temperature = st.slider('设置模型温度值', 0.0, 2.0, 0.5)   
 52 | 
 53 | if not openai_api_key:
 54 |     st.error("请输入OpenAI API Key")
 55 |     st.stop()
 56 | else:
 57 |     st.session_state.chat = work.load_env(openai_api_key=openai_api_key,model_name=model_name,temperature=temperature)
 58 | 
 59 | 
 60 | 
 61 | 
 62 | uploaded_file = st.file_uploader(label=":blue[上传代码文件]",type=[
 63 |     'cpp', 'cc', 'cxx', 'hpp', 'h', 'hxx', 
 64 |     'go', 'java', 'js', 'php', 'proto', 'py', 
 65 |     'rst', 'rb', 'rs', 'scala', 'swift', 'md', 
 66 |     'markdown', 'tex', 'html', 'sol'], help=":blue[仅支持所有主流代码文件]",key = "up_file")
 67 | 
 68 | if uploaded_file is not None:
 69 |     with st.sidebar:
 70 |         st.title(f"{uploaded_file.name}文件源码:")
 71 |         code_str = uploaded_file.getvalue().decode("utf-8")
 72 |         st.session_state.file_type = uploaded_file.name
 73 |         st.session_state.code = code_str
 74 |         st.code(code_str,language="python")
 75 |     col1, col2 = st.columns(spec= [0.5,0.5], gap = "large")
 76 |     with col1:
 77 |         comment_bt = st.button("插入注释")
 78 |     if comment_bt:
 79 |         with st.spinner("正在插入注释..."):
 80 |             result = code_with_comment(st.session_state.code,st.session_state.chat)
 81 |         st.session_state.comment_result = result
 82 |         st.success("注释插入完成")
 83 |     with col2:
 84 |         doc_code_bt = st.button("生成文档")
 85 |     if doc_code_bt:
 86 |         with st.spinner("正在生成文档..."):
 87 |             result = doc_code(st.session_state.code,st.session_state.chat)
 88 |         st.session_state.doc_result = result
 89 |         st.success("文档生成完成")
 90 | if st.session_state.comment_result != "":
 91 |     with st.expander("注释结果"):
 92 |         st.code(st.session_state.comment_result,language="python")
 93 |         st.download_button(
 94 |             label="下载注释后文件",
 95 |             data=st.session_state.comment_result,
 96 |             file_name='llmadd.py',)
 97 | if st.session_state.doc_result != "":
 98 |     with st.expander("文档内容"):
 99 |         st.markdown(st.session_state.doc_result)
100 |         st.download_button(
101 |             label="下载文档",
102 |             data=st.session_state.doc_result,
103 |             file_name='doc.md',)
104 | 
105 | # chat 
106 | if "messages" not in st.session_state:
107 |     st.session_state["messages"] = [{"role": "ai", "avatar":"🤖", "content": "我是强大的人工智能助手,请上传你的代码文件，我将帮助你更好了解代码！"}]
108 | for msg in st.session_state.messages:
109 |     st.chat_message(name=msg["role"],avatar=msg["avatar"]).markdown(msg["content"])
110 | 
111 | if st.session_state.code == "":
112 |     st.error("请上传代码文件")
113 |     st.stop()
114 | prompt = st.chat_input(placeholder="咨询与代码文件有关问题",max_chars = 4000,key="prompt")
115 | if prompt:
116 | 
117 |     st.session_state.messages.append({"role": "human", "avatar":"🧑", "content": prompt})
118 | 
119 |     st.chat_message(name="human",avatar="🧑").markdown(prompt)
120 | 
121 |     with st.chat_message(name="ai",avatar="🤖"):
122 |         with st.spinner("正在生成答案..."):
123 |             response = qa_with_code(prompt,st.session_state.code,st.session_state.chat)
124 | 
125 |         st.session_state.messages.append({"role": "ai", "avatar":"🤖", "content": response})
126 | 
127 |         st.markdown(response)
128 | 
129 | 


--------------------------------------------------------------------------------
/image/index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/index.png


--------------------------------------------------------------------------------
/image/md.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/md.png


--------------------------------------------------------------------------------
/image/py.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/py.png


--------------------------------------------------------------------------------
/image/qa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/qa.png


--------------------------------------------------------------------------------
/image/qichen.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/qichen.jpg


--------------------------------------------------------------------------------
/image/streamlit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/image/streamlit.png


--------------------------------------------------------------------------------
/packages.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | chromadb==0.4.8
2 | langchain==0.0.279
3 | openai==0.28.0
4 | python-dotenv==1.0.0
5 | streamlit==1.26.0
6 | pysqlite3-binary
7 | tiktoken==0.4.0
8 | 


--------------------------------------------------------------------------------
/work/__pycache__/work.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llmadd/code_using_GPT/f1791fda625a84aad7508d00e5dc62d0ebc3b89b/work/__pycache__/work.cpython-311.pyc


--------------------------------------------------------------------------------
/work/work.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dotenv import load_dotenv
  3 | from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
  4 | import concurrent.futures
  5 | from langchain.chains.base import Chain
  6 | from langchain.prompts import PromptTemplate
  7 | from langchain.chat_models import ChatOpenAI
  8 | from langchain.chains import LLMChain
  9 | from langchain.prompts.chat import (
 10 |     ChatPromptTemplate,
 11 |     HumanMessagePromptTemplate,
 12 | )
 13 | from langchain.text_splitter import (
 14 |     RecursiveCharacterTextSplitter,
 15 |     Language,
 16 | )
 17 | from langchain.vectorstores import Chroma
 18 | from langchain.embeddings import OpenAIEmbeddings
 19 | 
 20 | CHUNK_SIZE = 3000
 21 | 
 22 | def load_env(openai_api_key:str,model_name:str,temperature:float)->None:
 23 |     os.environ["OPENAI_API_KEY"] = openai_api_key
 24 |     chat = ChatOpenAI(model = model_name,temperature = temperature,streaming=True)
 25 |     return chat
 26 | 
 27 | code_with_comment_chain_systemtemplate = """
 28 | 你强大的人工智能ChatGPT。
 29 | 
 30 | 你的任务是为python代码增加中文注释。禁止修改代码！
 31 | 
 32 | 只允许输出增加注释后的python代码。禁止输出任何其他内容！
 33 | """
 34 | 
 35 | doc_code_chain_systemtemplate = """
 36 | 你强大的人工智能ChatGPT。
 37 | 
 38 | 你的任务是为代码生成一篇README.md文档。
 39 | 
 40 | 文档中介绍代码用到的技术栈，代码的功能，代码的使用方法，代码的运行环境等等。
 41 | 
 42 | 用markdown格式输出README.md文档。
 43 | """
 44 | 
 45 | qa_with_code_chain_systemtemplate = """
 46 | 你强大的人工智能ChatGPT。
 47 | 
 48 | 你需要根据代码内容和你自身的知识尽可能的回答用户的问题。
 49 | 
 50 | 要尽可能详细的回答用户问题
 51 | """
 52 | 
 53 | def get_file_type(file_name):
 54 |     extension = "."+file_name.split('.')[-1]
 55 |     if extension in ['.cpp', '.cc', '.cxx', '.hpp', '.h', '.hxx']:
 56 |         splitter = RecursiveCharacterTextSplitter.from_language(
 57 |         language=Language.CPP,chunk_size=CHUNK_SIZE, chunk_overlap=0
 58 |     )
 59 |         return splitter
 60 |     elif extension == '.go':
 61 |         splitter = RecursiveCharacterTextSplitter.from_language(
 62 |         language=Language.GO,chunk_size=CHUNK_SIZE, chunk_overlap=0
 63 |     )   
 64 |         return splitter
 65 |     elif extension == '.java':
 66 |         splitter = RecursiveCharacterTextSplitter.from_language(
 67 |         language=Language.JAVA,chunk_size=CHUNK_SIZE, chunk_overlap=0
 68 |     )   
 69 |         return splitter
 70 |     elif extension == '.js':
 71 |         splitter = RecursiveCharacterTextSplitter.from_language(
 72 |         language=Language.JS,chunk_size=CHUNK_SIZE, chunk_overlap=0
 73 |     )   
 74 |         return splitter
 75 |     elif extension == '.php':
 76 |         splitter = RecursiveCharacterTextSplitter.from_language(
 77 |         language=Language.PHP,chunk_size=CHUNK_SIZE, chunk_overlap=0
 78 |     )   
 79 |         return splitter
 80 |     elif extension == '.proto':
 81 |         splitter = RecursiveCharacterTextSplitter.from_language(
 82 |         language=Language.PROTO,chunk_size=CHUNK_SIZE, chunk_overlap=0
 83 |     )   
 84 |         return splitter
 85 |     elif extension == '.py':
 86 |         splitter = RecursiveCharacterTextSplitter.from_language(
 87 |         language=Language.PYTHON,chunk_size=CHUNK_SIZE, chunk_overlap=0
 88 |     )   
 89 |         return splitter
 90 |     elif extension in '.rst':
 91 |         splitter = RecursiveCharacterTextSplitter.from_language(
 92 |         language=Language.RST,chunk_size=CHUNK_SIZE, chunk_overlap=0
 93 |     )   
 94 |         return splitter
 95 |     elif extension == '.rb':
 96 |         splitter = RecursiveCharacterTextSplitter.from_language(
 97 |         language=Language.RUBY,chunk_size=CHUNK_SIZE, chunk_overlap=0
 98 |     )   
 99 |         return splitter
100 |     elif extension == '.rs':
101 |         splitter = RecursiveCharacterTextSplitter.from_language(
102 |         language=Language.RUST,chunk_size=CHUNK_SIZE, chunk_overlap=0
103 |     )   
104 |         return splitter
105 |     elif extension == '.scala':
106 |         splitter = RecursiveCharacterTextSplitter.from_language(
107 |         language=Language.SCALA,chunk_size=CHUNK_SIZE, chunk_overlap=0
108 |     )   
109 |         return splitter
110 |     elif extension == '.swift':
111 |         splitter = RecursiveCharacterTextSplitter.from_language(
112 |         language=Language.SWIFT,chunk_size=CHUNK_SIZE, chunk_overlap=0
113 |     )   
114 |         return splitter
115 |     elif extension in ['.md', '.markdown']:
116 |         splitter = RecursiveCharacterTextSplitter.from_language(
117 |         language=Language.MARKDOWN,chunk_size=CHUNK_SIZE, chunk_overlap=0
118 |     )   
119 |         return splitter
120 |     elif extension == '.tex':
121 |         splitter = RecursiveCharacterTextSplitter.from_language(
122 |         language=Language.LATEX,chunk_size=CHUNK_SIZE, chunk_overlap=0
123 |     )   
124 |         return splitter
125 |     elif extension in ['.html', '.htm']:
126 |         splitter = RecursiveCharacterTextSplitter.from_language(
127 |         language=Language.HTML,chunk_size=CHUNK_SIZE, chunk_overlap=0
128 |     )   
129 |         return splitter
130 |     elif extension == '.sol':
131 |         splitter = RecursiveCharacterTextSplitter.from_language(
132 |         language=Language.SOL,chunk_size=CHUNK_SIZE, chunk_overlap=0
133 |     )   
134 |         return splitter
135 |     else:
136 |         return 'Unknown'
137 | 
138 | def get_code_embd_save(code_split:List[str])->Chroma:
139 |     embeddings = OpenAIEmbeddings()
140 |     db = Chroma.from_texts(texts=code_split,embedding=embeddings)   
141 |     return db
142 | 
143 | 
144 | def qa_with_code_chain(db:Chroma,question:str,chat)->str:
145 |     retrievers_re = ""
146 |     retrievers = db.as_retriever(search_kwargs={'k': 4,})
147 |     doc_re = retrievers.get_relevant_documents(question)
148 |     for i in doc_re:
149 |         retrievers_re += i.page_content   
150 |     human_prompt = """
151 |     根据下面代码内容回答问题：
152 |     --------------------
153 |     {retrievers_re}
154 |     --------------------
155 |     问题：{question}
156 |     """
157 |     human_message_prompt = HumanMessagePromptTemplate(
158 |         prompt=PromptTemplate(
159 |             template=human_prompt,
160 |             input_variables=["question"],
161 |             partial_variables={"retrievers_re": retrievers_re}
162 |         )
163 |     )
164 |     chat_prompt_template = ChatPromptTemplate.from_messages([
165 |         ("system", qa_with_code_chain_systemtemplate),
166 |         human_message_prompt
167 |     ])
168 |     chain = LLMChain(llm=chat, prompt=chat_prompt_template)
169 |     answer = chain.run(question)
170 |     return answer
171 | 
172 | def code_splite(code:str,splitter:RecursiveCharacterTextSplitter)->List[str]:
173 |     splite_code = splitter.split_text(text=code)
174 | 
175 |     return splite_code
176 | 
177 | def code_with_comment_chain(code:str,chat)->str:
178 |     chat_prompt_template = ChatPromptTemplate.from_messages([
179 |         ("system", code_with_comment_chain_systemtemplate),
180 |         ("human","{text}")
181 |     ])
182 |     chain = LLMChain(llm=chat, prompt=chat_prompt_template)
183 |     result = chain.run(code)
184 |     return result
185 | 
186 | def code_doc_chain(code: str,chat) -> str:
187 |     chat_prompt_template = ChatPromptTemplate.from_messages([
188 |         ("system", doc_code_chain_systemtemplate),
189 |         ("human", "{text}")
190 |     ])
191 |     chain = LLMChain(llm=chat, prompt=chat_prompt_template)
192 |     result = chain.run(code)
193 |     return  result
194 | 
195 | def doc_futures_run(code_list:List[str],chat)->List[str]:
196 | 
197 |     results = []
198 | 
199 |     with concurrent.futures.ThreadPoolExecutor() as executor:
200 | 
201 |         futures = [executor.submit(code_doc_chain, _i,chat) for _i in code_list]
202 |         
203 |         # for future in concurrent.futures.as_completed(futures):
204 |         #     result = future.result()
205 |         #     results.append(result)
206 |         # 更新结果不按顺序返回 
207 |         for result in executor.map(lambda future: future.result(), futures):
208 |             results.append(result)
209 |     return results
210 | 
211 | def comment_future_run(code_list:List[str],chat)->List[str]:
212 |     results = []
213 | 
214 |     with concurrent.futures.ThreadPoolExecutor() as executor:
215 | 
216 |         futures = [executor.submit(code_with_comment_chain, _i,chat) for _i in code_list]
217 |         
218 |         # for future in concurrent.futures.as_completed(futures):
219 |         #     result = future.result()
220 |         #     results.append(result)
221 |         # 更新结果不按顺序返回 
222 |         for result in executor.map(lambda future: future.result(), futures):
223 |             results.append(result)
224 |     return results
225 | 
226 | 
227 | 
228 | # print(st.secrets["openai_api_key"])
229 |     


--------------------------------------------------------------------------------